[CRIU] [PATCH] arm: Workaround shmat() page coloring alignment

Andrei Vagin avagin at virtuozzo.com
Fri Apr 14 15:20:44 PDT 2017


Applied, thanks!

On Wed, Apr 12, 2017 at 08:29:34PM +0300, Dmitry Safonov wrote:
> Please, see incode comment about the issue.
> There was an attempt to fix this feature in kernel:
>   http://www.spinics.net/lists/arm-kernel/msg258870.html
> 
> I'll send a patch for kernel with a correct justification.
> Still it's worth to workaround this on older kernels.
> mremap() after shmat() makes it possible to C/R shmem between ARMv7 CPUs.
> Without it C/R will fail even on the same platform.
> 
> That is possible that this workaround changes *restore failure* to
> *corruption* in shared memory after restore.
> Still, I think it's worth to be applied by the following reasons:
> 1. All ARMv7 CPUs do not have VIPT aliasing data cache.
>    And ARMv6 CPUs may have any, but they are not popular because of UMP.
> 2. SysV IPC shmem should be writable and SHMLBA unaligned
>    (if it's only readable, then it's OK).
> 3. For the data corruption we need to migrate from non-VIPT cached CPUs
>    to VIPT aliasing.
> 4. As this is shmem migration between platforms - quite likely we have
>    it`s copy (in case of whole IPC namespace dump - in images).
> 5. C/R before on the same CPU may fail (quite likely, more than 50% of
>    zdtm/transition/ipc failed while I've test it).
> 
> So, I think it's very unlikely that someone uses CRIU to migrate
> from ARMv7 to ARMv6 unaligned writable shmem, but if he is - here is
> a BIG WARNING on restore.
> 
> And after all: we've restored application as it was on checkpoint,
> we can't reliably tell that shmem needs align on the target platform,
> but we warned on restoring.
> 
> I wanted to test the cpuid for VIPT caching firstly, but it's in cp15
> coprocessor, which means it's unavailable from userspace. Only one
> reliable way to tell is to check couple of first boot lines in dmesg:
> [    0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing instruction cache
> 
> Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
> ---
>  criu/arch/arm/include/asm/restorer.h |  3 ++
>  criu/arch/arm/restorer.c             | 59 ++++++++++++++++++++++++++++++++++++
>  criu/pie/restorer.c                  | 12 +++++++-
>  3 files changed, 73 insertions(+), 1 deletion(-)
> 
> diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h
> index 7e4b09c893b6..f7b963baec13 100644
> --- a/criu/arch/arm/include/asm/restorer.h
> +++ b/criu/arch/arm/include/asm/restorer.h
> @@ -57,6 +57,9 @@
>  
>  int restore_gpregs(struct rt_sigframe *f, UserArmRegsEntry *r);
>  int restore_nonsigframe_gpregs(UserArmRegsEntry *r);
> +#define ARCH_HAS_SHMAT_HOOK
> +unsigned long arch_shmat(int shmid, void *shmaddr,
> +			int shmflg, unsigned long size);
>  
>  static inline void restore_tls(tls_t *ptls) {
>  	asm (
> diff --git a/criu/arch/arm/restorer.c b/criu/arch/arm/restorer.c
> index 21234b95d33e..588c1c074fc5 100644
> --- a/criu/arch/arm/restorer.c
> +++ b/criu/arch/arm/restorer.c
> @@ -7,8 +7,67 @@
>  #include "log.h"
>  #include <compel/asm/fpu.h>
>  #include "cpu.h"
> +#include "page.h"
> +#include "common/err.h"
>  
>  int restore_nonsigframe_gpregs(UserArmRegsEntry *r)
>  {
>  	return 0;
>  }
> +
> +/*
> + * On ARMv6 CPUs with VIPT caches there are aliasing issues:
> + * if two different cache line indexes correspond to the same physical
> + * address, then changes made to one of the alias might be lost or they
> + * can overwrite each other. To overcome aliasing issues, page coloring
> + * with 4 pages align for shared mappings was introduced (SHMLBA) in kernel.
> + * Which resulted in unique physical address after any tag in cache
> + * (because two upper bits corresponding to page address get unused in tags).
> + *
> + * The problem here is in shmat() syscall:
> + * 1. if shmaddr is NULL then do_shmat() uses arch_get_unmapped_area()
> + *    to allocate shared mapping. Which checks if CPU cache is VIPT
> + *    and only then use SHMLBA alignment.
> + * 2. if shmaddr is specified then do_shmat() checks that address has
> + *    SHMLBA alignment regardless to CPU cache aliasing.
> + *
> + * All above means that on non-VIPT CPU (like any ARMv7) we can get
> + * non-SHMLBA, but page-aligned address with shmat(shmid, NULL, shmflg),
> + * but we can't restore it with shmat(shmid, shmaddr, shmflg).
> + * Which results that we can dump e.g., application with shmem aligned
> + * on 2 pages, but can't restore it on the same ARMv7 CPU.
> + *
> + * To workaround this kernel feature, use mremap() on shmem mapping,
> + * allocated with shmat(shmid, NULL, shmflg).
> + */
> +#define SHMLBA (4UL * PAGE_SIZE)
> +unsigned long arch_shmat(int shmid, void *shmaddr,
> +			int shmflg, unsigned long size)
> +{
> +	unsigned long smap;
> +
> +	/* SHMLBA-aligned, direct call shmat() */
> +	if (!((unsigned long)shmaddr & (SHMLBA - 1)))
> +		return sys_shmat(shmid, shmaddr, shmflg);
> +
> +	smap = sys_shmat(shmid, NULL, shmflg);
> +	if (IS_ERR_VALUE(smap)) {
> +		pr_err("shmat() with NULL shmaddr failed: %d\n", (int)smap);
> +		return smap;
> +	}
> +
> +	/* We're lucky! */
> +	if (smap == (unsigned long)shmaddr)
> +		return smap;
> +
> +	/* Warn ALOUD */
> +	pr_warn("Restoring shmem %p unaligned to SHMLBA.\n", shmaddr);
> +	pr_warn("Make sure that you don't migrate shmem from non-VIPT cached CPU to VIPT cached (e.g., ARMv7 -> ARMv6)\n");
> +	pr_warn("Otherwise YOU HAVE A CHANCE OF DATA CORRUPTIONS in writeable shmem\n");
> +
> +	smap = sys_mremap(smap, size, size,
> +			MREMAP_FIXED | MREMAP_MAYMOVE, (unsigned long)shmaddr);
> +	if (IS_ERR_VALUE(smap))
> +		pr_err("mremap() for shmem failed: %d\n", (int)smap);
> +	return smap;
> +}
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 53ecaa0520e2..70ebcdc18a83 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -543,6 +543,14 @@ static long restore_self_exe_late(struct task_restore_args *args)
>  	return ret;
>  }
>  
> +#ifndef ARCH_HAS_SHMAT_HOOK
> +unsigned long arch_shmat(int shmid, void *shmaddr,
> +			int shmflg, unsigned long size)
> +{
> +	return sys_shmat(shmid, shmaddr, shmflg);
> +}
> +#endif
> +
>  static unsigned long restore_mapping(VmaEntry *vma_entry)
>  {
>  	int prot	= vma_entry->prot;
> @@ -551,6 +559,8 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
>  
>  	if (vma_entry_is(vma_entry, VMA_AREA_SYSVIPC)) {
>  		int att_flags;
> +		void *shmaddr = decode_pointer(vma_entry->start);
> +		unsigned long shmsize = (vma_entry->end - vma_entry->start);
>  		/*
>  		 * See comment in open_shmem_sysv() for what SYSV_SHMEM_SKIP_FD
>  		 * means and why we check for PROT_EXEC few lines below.
> @@ -565,7 +575,7 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
>  			att_flags = SHM_RDONLY;
>  
>  		pr_info("Attach SYSV shmem %d at %"PRIx64"\n", (int)vma_entry->fd, vma_entry->start);
> -		return sys_shmat(vma_entry->fd, decode_pointer(vma_entry->start), att_flags);
> +		return arch_shmat(vma_entry->fd, shmaddr, att_flags, shmsize);
>  	}
>  
>  	/*
> -- 
> 2.12.2
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list