[CRIU] [PATCH] kerndat: Separate per-arch kerndat

Andrei Vagin avagin at virtuozzo.com
Mon Feb 19 20:49:47 MSK 2018


Applied, thanks!
On Thu, Feb 15, 2018 at 09:08:04PM +0000, Dmitry Safonov wrote:
> x86's kerndat section in crtools.c has grown too much.
> Let's make it more readable and *looking at cleared include-list*,
> it'll better parallelize build.
> 
> Maybe we should turn __weak function into 0-defines.
> Or clean 0-defines with ifdefs in generic file.
> I have no strong opinion on that.
> 
> Signed-off-by: Dmitry Safonov <dima at arista.com>
> ---
>  criu/arch/aarch64/include/asm/kerndat.h  |   7 +
>  criu/arch/aarch64/include/asm/restorer.h |   2 -
>  criu/arch/arm/include/asm/kerndat.h      |   7 +
>  criu/arch/arm/include/asm/restorer.h     |   2 -
>  criu/arch/ppc64/include/asm/kerndat.h    |   7 +
>  criu/arch/ppc64/include/asm/restorer.h   |   2 -
>  criu/arch/s390/include/asm/kerndat.h     |   7 +
>  criu/arch/s390/include/asm/restorer.h    |   2 -
>  criu/arch/x86/Makefile                   |   1 +
>  criu/arch/x86/crtools.c                  | 269 +------------------------------
>  criu/arch/x86/include/asm/kerndat.h      |   8 +
>  criu/arch/x86/include/asm/restorer.h     |   5 -
>  criu/arch/x86/kerndat.c                  | 249 ++++++++++++++++++++++++++++
>  criu/include/kerndat.h                   |   1 +
>  14 files changed, 295 insertions(+), 274 deletions(-)
>  create mode 100644 criu/arch/aarch64/include/asm/kerndat.h
>  create mode 100644 criu/arch/arm/include/asm/kerndat.h
>  create mode 100644 criu/arch/ppc64/include/asm/kerndat.h
>  create mode 100644 criu/arch/s390/include/asm/kerndat.h
>  create mode 100644 criu/arch/x86/include/asm/kerndat.h
>  create mode 100644 criu/arch/x86/kerndat.c
> 
> diff --git a/criu/arch/aarch64/include/asm/kerndat.h b/criu/arch/aarch64/include/asm/kerndat.h
> new file mode 100644
> index 000000000000..60956b573db9
> --- /dev/null
> +++ b/criu/arch/aarch64/include/asm/kerndat.h
> @@ -0,0 +1,7 @@
> +#ifndef __CR_ASM_KERNDAT_H__
> +#define __CR_ASM_KERNDAT_H__
> +
> +#define kdat_compatible_cr()			0
> +#define kdat_can_map_vdso()			0
> +
> +#endif /* __CR_ASM_KERNDAT_H__ */
> diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h
> index bef85f3a3210..f502cdcaf67c 100644
> --- a/criu/arch/aarch64/include/asm/restorer.h
> +++ b/criu/arch/aarch64/include/asm/restorer.h
> @@ -52,8 +52,6 @@
>  			: "sp", "x0", "memory")
>  
>  
> -#define kdat_compatible_cr()			0
> -#define kdat_can_map_vdso()			0
>  #define arch_map_vdso(map, compat)		-1
>  
>  int restore_gpregs(struct rt_sigframe *f, UserAarch64RegsEntry *r);
> diff --git a/criu/arch/arm/include/asm/kerndat.h b/criu/arch/arm/include/asm/kerndat.h
> new file mode 100644
> index 000000000000..60956b573db9
> --- /dev/null
> +++ b/criu/arch/arm/include/asm/kerndat.h
> @@ -0,0 +1,7 @@
> +#ifndef __CR_ASM_KERNDAT_H__
> +#define __CR_ASM_KERNDAT_H__
> +
> +#define kdat_compatible_cr()			0
> +#define kdat_can_map_vdso()			0
> +
> +#endif /* __CR_ASM_KERNDAT_H__ */
> diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h
> index d9208185f731..217d920e846e 100644
> --- a/criu/arch/arm/include/asm/restorer.h
> +++ b/criu/arch/arm/include/asm/restorer.h
> @@ -53,8 +53,6 @@
>  		     : "memory")
>  
>  
> -#define kdat_compatible_cr()			0
> -#define kdat_can_map_vdso()			0
>  #define arch_map_vdso(map, compat)		-1
>  
>  int restore_gpregs(struct rt_sigframe *f, UserArmRegsEntry *r);
> diff --git a/criu/arch/ppc64/include/asm/kerndat.h b/criu/arch/ppc64/include/asm/kerndat.h
> new file mode 100644
> index 000000000000..60956b573db9
> --- /dev/null
> +++ b/criu/arch/ppc64/include/asm/kerndat.h
> @@ -0,0 +1,7 @@
> +#ifndef __CR_ASM_KERNDAT_H__
> +#define __CR_ASM_KERNDAT_H__
> +
> +#define kdat_compatible_cr()			0
> +#define kdat_can_map_vdso()			0
> +
> +#endif /* __CR_ASM_KERNDAT_H__ */
> diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h
> index 3fffa833c157..d48d833d6b6c 100644
> --- a/criu/arch/ppc64/include/asm/restorer.h
> +++ b/criu/arch/ppc64/include/asm/restorer.h
> @@ -48,8 +48,6 @@
>  		  "r"(&thread_args[i])		/* %6 */		\
>  		: "memory","0","3","4","5","6","7","14","15")
>  
> -#define kdat_compatible_cr()			0
> -#define kdat_can_map_vdso()			0
>  #define arch_map_vdso(map, compat)		-1
>  
>  int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r);
> diff --git a/criu/arch/s390/include/asm/kerndat.h b/criu/arch/s390/include/asm/kerndat.h
> new file mode 100644
> index 000000000000..60956b573db9
> --- /dev/null
> +++ b/criu/arch/s390/include/asm/kerndat.h
> @@ -0,0 +1,7 @@
> +#ifndef __CR_ASM_KERNDAT_H__
> +#define __CR_ASM_KERNDAT_H__
> +
> +#define kdat_compatible_cr()			0
> +#define kdat_can_map_vdso()			0
> +
> +#endif /* __CR_ASM_KERNDAT_H__ */
> diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h
> index 63e09986339b..cfdefcab9bab 100644
> --- a/criu/arch/s390/include/asm/restorer.h
> +++ b/criu/arch/s390/include/asm/restorer.h
> @@ -39,8 +39,6 @@
>  	  "d"(&thread_args[i])						\
>  	: "0", "1", "2", "3", "4", "5", "6", "cc", "memory")
>  
> -#define kdat_compatible_cr()			0
> -#define kdat_can_map_vdso()			0
>  #define arch_map_vdso(map, compat)		-1
>  
>  int restore_gpregs(struct rt_sigframe *f, UserS390RegsEntry *r);
> diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile
> index 669dc073a5be..75fa782c8279 100644
> --- a/criu/arch/x86/Makefile
> +++ b/criu/arch/x86/Makefile
> @@ -11,6 +11,7 @@ ldflags-y		+= -r -z noexecstack
>  
>  obj-y			+= cpu.o
>  obj-y			+= crtools.o
> +obj-y			+= kerndat.o
>  obj-y			+= sigframe.o
>  ifeq ($(CONFIG_COMPAT),y)
>          obj-y		+= sigaction_compat.o
> diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
> index e13b39b90076..0b5a0acd6779 100644
> --- a/criu/arch/x86/crtools.c
> +++ b/criu/arch/x86/crtools.c
> @@ -1,267 +1,14 @@
> -#include <signal.h>
> -#include <stdlib.h>
> -#include <string.h>
> -#include <unistd.h>
> -#include <elf.h>
> -#include <sched.h>
> -#include <sys/mman.h>
> -#include <sys/syscall.h>
> -#include <sys/auxv.h>
> -#include <sys/wait.h>
> -#include <sys/ptrace.h>
> -
> -#include "types.h"
> -#include "log.h"
> -#include "asm/compat.h"
> -#include "asm/parasite-syscall.h"
> -#include "asm/restorer.h"
> -#include <compel/asm/fpu.h>
> -#include "asm/dump.h"
> -
> -#include "cr_options.h"
> -#include "common/compiler.h"
> -#include "restorer.h"
> -#include "parasite-syscall.h"
> -#include "util.h"
> +#include "compel/asm/fpu.h"
> +#include "compel/compel.h"
> +#include "compel/plugins/std/syscall-codes.h"
>  #include "cpu.h"
> -#include <compel/plugins/std/syscall-codes.h>
> -#include "kerndat.h"
> -#include <compel/compel.h>
> -
> -#include "protobuf.h"
> +#include "cr_options.h"
>  #include "images/core.pb-c.h"
> -#include "images/creds.pb-c.h"
> -
> -/* XXX: Move all kerndat features to per-arch kerndat .c */
> -int kdat_can_map_vdso(void)
> -{
> -	pid_t child;
> -	int stat;
> -
> -	/*
> -	 * Running under fork so if vdso_64 is disabled - don't create
> -	 * it for criu accidentally.
> -	 */
> -	child = fork();
> -	if (child < 0) {
> -		pr_perror("%s(): failed to fork()", __func__);
> -		return -1;
> -	}
> -
> -	if (child == 0) {
> -		int ret;
> -
> -		ret = syscall(SYS_arch_prctl, ARCH_MAP_VDSO_32, 0);
> -		if (ret == 0)
> -			exit(1);
> -		/*
> -		 * Mapping vDSO while have not unmap it yet:
> -		 * this is restricted by API if ARCH_MAP_VDSO_* is supported.
> -		 */
> -		if (ret == -1 && errno == EEXIST)
> -			exit(1);
> -		exit(0);
> -	}
> -
> -	if (waitpid(child, &stat, 0) != child) {
> -		pr_err("Failed to wait for arch_prctl() test\n");
> -		kill(child, SIGKILL);
> -		return -1;
> -	}
> -
> -	if (!WIFEXITED(stat))
> -		return -1;
> -
> -	return WEXITSTATUS(stat);
> -
> -}
> -
> -#ifdef CONFIG_COMPAT
> -void *mmap_ia32(void *addr, size_t len, int prot,
> -		int flags, int fildes, off_t off)
> -{
> -	struct syscall_args32 s;
> -
> -	s.nr    = __NR32_mmap2;
> -	s.arg0  = (uint32_t)(uintptr_t)addr;
> -	s.arg1  = (uint32_t)len;
> -	s.arg2  = prot;
> -	s.arg3  = flags;
> -	s.arg4  = fildes;
> -	s.arg5  = (uint32_t)off;
> -
> -	do_full_int80(&s);
> -
> -	return (void *)(uintptr_t)s.nr;
> -}
> -
> -/*
> - * The idea of the test:
> - * From kernel's top-down allocator we assume here that
> - * 1. A = mmap(0, ...); munmap(A);
> - * 2. B = mmap(0, ...);
> - * results in A == B.
> - * ...but if we have 32-bit mmap() bug, then A will have only lower
> - * 4 bytes of 64-bit address allocated with mmap().
> - * That means, that the next mmap() will return B != A
> - * (as munmap(A) hasn't really unmapped A mapping).
> - *
> - * As mapping with lower 4 bytes of A may really exist, we run
> - * this test under fork().
> - *
> - * Another approach to test bug's presence would be to parse
> - * /proc/self/maps before and after 32-bit mmap(), but that would
> - * be soo slow.
> - */
> -static void mmap_bug_test(void)
> -{
> -	void *map1, *map2;
> -	int err;
> -
> -	map1 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
> -	/* 32-bit error, not sign-extended - can't use IS_ERR_VALUE() here */
> -	err = (uintptr_t)map1 % PAGE_SIZE;
> -	if (err) {
> -		pr_err("ia32 mmap() failed: %d\n", err);
> -		exit(1);
> -	}
> -
> -	if (munmap(map1, PAGE_SIZE)) {
> -		pr_err("Failed to unmap() 32-bit mapping: %m\n");
> -		exit(1);
> -	}
> -
> -	map2 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
> -	err = (uintptr_t)map2 % PAGE_SIZE;
> -	if (err) {
> -		pr_err("ia32 mmap() failed: %d\n", err);
> -		exit(1);
> -	}
> -
> -	if (map1 != map2)
> -		exit(1);
> -	exit(0);
> -}
> -
> -/*
> - * Pre v4.12 kernels have a bug: for a process started as 64-bit
> - * 32-bit mmap() may return 8 byte pointer.
> - * Which is fatal for us: after 32-bit C/R a task will map 64-bit
> - * addresses, cut upper 4 bytes and try to use lower 4 bytes.
> - * This is a check if the bug was fixed in the kernel.
> - */
> -static int has_32bit_mmap_bug(void)
> -{
> -	pid_t child = fork();
> -	int stat;
> -
> -	if (child < 0) {
> -		pr_perror("%s(): failed to fork()", __func__);
> -		return -1;
> -	}
> -
> -	if (child == 0)
> -		mmap_bug_test();
> -
> -	if (waitpid(child, &stat, 0) != child) {
> -		pr_err("Failed to wait for mmap test\n");
> -		kill(child, SIGKILL);
> -		return -1;
> -	}
> -
> -	if (!WIFEXITED(stat) || WEXITSTATUS(stat) != 0)
> -		return 1;
> -	return 0;
> -}
> -
> -int kdat_compatible_cr(void)
> -{
> -	if (!kdat.can_map_vdso)
> -		return 0;
> -
> -	if (has_32bit_mmap_bug())
> -		return 0;
> -
> -	return 1;
> -}
> -#else /* !CONFIG_COMPAT */
> -int kdat_compatible_cr(void)
> -{
> -	return 0;
> -}
> -#endif
> -
> -static int kdat_x86_has_ptrace_fpu_xsave_bug_child(void *arg)
> -{
> -	ptrace(PTRACE_TRACEME, 0, 0, 0);
> -	kill(getpid(), SIGSTOP);
> -	pr_err("Continue after SIGSTOP.. Urr what?\n");
> -	_exit(1);
> -}
> -
> -/*
> - * Pre v4.14 kernels have a bug on Skylake CPUs:
> - * copyout_from_xsaves() creates fpu state for
> - *   ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
> - * without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
> - * That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
> - * XFEATURE_MASK_FP.
> - * But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
> - * as mxcsr store part of the state.
> - */
> -int kdat_x86_has_ptrace_fpu_xsave_bug(void)
> -{
> -	user_fpregs_struct_t xsave = { };
> -	struct iovec iov;
> -	char stack[PAGE_SIZE];
> -	int flags = CLONE_VM | CLONE_FILES | CLONE_UNTRACED | SIGCHLD;
> -	int ret = -1;
> -	pid_t child;
> -	int stat;
> -
> -	/* OSXSAVE can't be changed during boot. */
> -	if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
> -		return 0;
> -
> -	child = clone(kdat_x86_has_ptrace_fpu_xsave_bug_child,
> -		stack + ARRAY_SIZE(stack), flags, 0);
> -	if (child < 0) {
> -		pr_perror("%s(): failed to clone()", __func__);
> -		return -1;
> -	}
> -
> -	if (waitpid(child, &stat, WUNTRACED) != child) {
> -		/*
> -		 * waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
> -		 * and the child has stopped already.
> -		 */
> -		pr_perror("Failed to wait for %s() test\n", __func__);
> -		goto out_kill;
> -	}
> -
> -	if (!WIFSTOPPED(stat)) {
> -		pr_err("Born child is unstoppable! (might be dead)\n");
> -		goto out_kill;
> -	}
> -
> -	iov.iov_base = &xsave;
> -	iov.iov_len = sizeof(xsave);
> -
> -	if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
> -		pr_perror("Can't obtain FPU registers for %d", child);
> -		goto out_kill;
> -	}
> -	/*
> -	 * MXCSR should be never 0x0: e.g., it should contain either:
> -	 * R+/R-/RZ/RN to determine rounding model.
> -	 */
> -	ret = !xsave.i387.mxcsr;
> +#include "log.h"
> +#include "protobuf.h"
> +#include "types.h"
>  
> -out_kill:
> -	kill(child, SIGKILL);
> -	waitpid(child, &stat, 0);
> -	return ret;
> -}
> +#include "asm/compat.h"
>  
>  int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
>  {
> diff --git a/criu/arch/x86/include/asm/kerndat.h b/criu/arch/x86/include/asm/kerndat.h
> new file mode 100644
> index 000000000000..903bc80f7c44
> --- /dev/null
> +++ b/criu/arch/x86/include/asm/kerndat.h
> @@ -0,0 +1,8 @@
> +#ifndef __CR_ASM_KERNDAT_H__
> +#define __CR_ASM_KERNDAT_H__
> +
> +extern int kdat_compatible_cr(void);
> +extern int kdat_can_map_vdso(void);
> +extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
> +
> +#endif /* __CR_ASM_KERNDAT_H__ */
> diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
> index 179f1942f9f8..15867aa1260c 100644
> --- a/criu/arch/x86/include/asm/restorer.h
> +++ b/criu/arch/x86/include/asm/restorer.h
> @@ -80,11 +80,6 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
>  # define ARCH_MAP_VDSO_64		0x2003
>  #endif
>  
> -/* XXX: Introduce per-arch kerndat header */
> -extern int kdat_compatible_cr(void);
> -extern int kdat_can_map_vdso(void);
> -extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
> -
>  static inline void
>  __setup_sas_compat(struct ucontext_ia32* uc, ThreadSasEntry *sas)
>  {
> diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c
> new file mode 100644
> index 000000000000..a67017d3456e
> --- /dev/null
> +++ b/criu/arch/x86/kerndat.c
> @@ -0,0 +1,249 @@
> +#include <elf.h>
> +#include <sched.h>
> +#include <signal.h>
> +#include <stdlib.h>
> +#include <sys/wait.h>
> +#include <sys/ptrace.h>
> +#include <sys/types.h>
> +#include <sys/uio.h>
> +#include <unistd.h>
> +
> +#include "compel/asm/fpu.h"
> +#include "compel/plugins/std/syscall-codes.h"
> +#include "cpu.h"
> +#include "kerndat.h"
> +#include "log.h"
> +#include "types.h"
> +
> +#include "asm/compat.h"
> +#include "asm/dump.h"
> +
> +int kdat_can_map_vdso(void)
> +{
> +	pid_t child;
> +	int stat;
> +
> +	/*
> +	 * Running under fork so if vdso_64 is disabled - don't create
> +	 * it for criu accidentally.
> +	 */
> +	child = fork();
> +	if (child < 0) {
> +		pr_perror("%s(): failed to fork()", __func__);
> +		return -1;
> +	}
> +
> +	if (child == 0) {
> +		int ret;
> +
> +		ret = syscall(SYS_arch_prctl, ARCH_MAP_VDSO_32, 0);
> +		if (ret == 0)
> +			exit(1);
> +		/*
> +		 * Mapping vDSO while have not unmap it yet:
> +		 * this is restricted by API if ARCH_MAP_VDSO_* is supported.
> +		 */
> +		if (ret == -1 && errno == EEXIST)
> +			exit(1);
> +		exit(0);
> +	}
> +
> +	if (waitpid(child, &stat, 0) != child) {
> +		pr_err("Failed to wait for arch_prctl() test\n");
> +		kill(child, SIGKILL);
> +		return -1;
> +	}
> +
> +	if (!WIFEXITED(stat))
> +		return -1;
> +
> +	return WEXITSTATUS(stat);
> +
> +}
> +
> +#ifdef CONFIG_COMPAT
> +void *mmap_ia32(void *addr, size_t len, int prot,
> +		int flags, int fildes, off_t off)
> +{
> +	struct syscall_args32 s;
> +
> +	s.nr    = __NR32_mmap2;
> +	s.arg0  = (uint32_t)(uintptr_t)addr;
> +	s.arg1  = (uint32_t)len;
> +	s.arg2  = prot;
> +	s.arg3  = flags;
> +	s.arg4  = fildes;
> +	s.arg5  = (uint32_t)off;
> +
> +	do_full_int80(&s);
> +
> +	return (void *)(uintptr_t)s.nr;
> +}
> +
> +/*
> + * The idea of the test:
> + * From kernel's top-down allocator we assume here that
> + * 1. A = mmap(0, ...); munmap(A);
> + * 2. B = mmap(0, ...);
> + * results in A == B.
> + * ...but if we have 32-bit mmap() bug, then A will have only lower
> + * 4 bytes of 64-bit address allocated with mmap().
> + * That means, that the next mmap() will return B != A
> + * (as munmap(A) hasn't really unmapped A mapping).
> + *
> + * As mapping with lower 4 bytes of A may really exist, we run
> + * this test under fork().
> + *
> + * Another approach to test bug's presence would be to parse
> + * /proc/self/maps before and after 32-bit mmap(), but that would
> + * be soo slow.
> + */
> +static void mmap_bug_test(void)
> +{
> +	void *map1, *map2;
> +	int err;
> +
> +	map1 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
> +	/* 32-bit error, not sign-extended - can't use IS_ERR_VALUE() here */
> +	err = (uintptr_t)map1 % PAGE_SIZE;
> +	if (err) {
> +		pr_err("ia32 mmap() failed: %d\n", err);
> +		exit(1);
> +	}
> +
> +	if (munmap(map1, PAGE_SIZE)) {
> +		pr_err("Failed to unmap() 32-bit mapping: %m\n");
> +		exit(1);
> +	}
> +
> +	map2 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
> +	err = (uintptr_t)map2 % PAGE_SIZE;
> +	if (err) {
> +		pr_err("ia32 mmap() failed: %d\n", err);
> +		exit(1);
> +	}
> +
> +	if (map1 != map2)
> +		exit(1);
> +	exit(0);
> +}
> +
> +/*
> + * Pre v4.12 kernels have a bug: for a process started as 64-bit
> + * 32-bit mmap() may return 8 byte pointer.
> + * Which is fatal for us: after 32-bit C/R a task will map 64-bit
> + * addresses, cut upper 4 bytes and try to use lower 4 bytes.
> + * This is a check if the bug was fixed in the kernel.
> + */
> +static int has_32bit_mmap_bug(void)
> +{
> +	pid_t child = fork();
> +	int stat;
> +
> +	if (child < 0) {
> +		pr_perror("%s(): failed to fork()", __func__);
> +		return -1;
> +	}
> +
> +	if (child == 0)
> +		mmap_bug_test();
> +
> +	if (waitpid(child, &stat, 0) != child) {
> +		pr_err("Failed to wait for mmap test\n");
> +		kill(child, SIGKILL);
> +		return -1;
> +	}
> +
> +	if (!WIFEXITED(stat) || WEXITSTATUS(stat) != 0)
> +		return 1;
> +	return 0;
> +}
> +
> +int kdat_compatible_cr(void)
> +{
> +	if (!kdat.can_map_vdso)
> +		return 0;
> +
> +	if (has_32bit_mmap_bug())
> +		return 0;
> +
> +	return 1;
> +}
> +#else /* !CONFIG_COMPAT */
> +int kdat_compatible_cr(void)
> +{
> +	return 0;
> +}
> +#endif
> +
> +static int kdat_x86_has_ptrace_fpu_xsave_bug_child(void *arg)
> +{
> +	ptrace(PTRACE_TRACEME, 0, 0, 0);
> +	kill(getpid(), SIGSTOP);
> +	pr_err("Continue after SIGSTOP.. Urr what?\n");
> +	_exit(1);
> +}
> +
> +/*
> + * Pre v4.14 kernels have a bug on Skylake CPUs:
> + * copyout_from_xsaves() creates fpu state for
> + *   ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
> + * without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
> + * That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
> + * XFEATURE_MASK_FP.
> + * But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
> + * as mxcsr store part of the state.
> + */
> +int kdat_x86_has_ptrace_fpu_xsave_bug(void)
> +{
> +	user_fpregs_struct_t xsave = { };
> +	struct iovec iov;
> +	char stack[PAGE_SIZE];
> +	int flags = CLONE_VM | CLONE_FILES | CLONE_UNTRACED | SIGCHLD;
> +	int ret = -1;
> +	pid_t child;
> +	int stat;
> +
> +	/* OSXSAVE can't be changed during boot. */
> +	if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
> +		return 0;
> +
> +	child = clone(kdat_x86_has_ptrace_fpu_xsave_bug_child,
> +		stack + ARRAY_SIZE(stack), flags, 0);
> +	if (child < 0) {
> +		pr_perror("%s(): failed to clone()", __func__);
> +		return -1;
> +	}
> +
> +	if (waitpid(child, &stat, WUNTRACED) != child) {
> +		/*
> +		 * waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
> +		 * and the child has stopped already.
> +		 */
> +		pr_perror("Failed to wait for %s() test\n", __func__);
> +		goto out_kill;
> +	}
> +
> +	if (!WIFSTOPPED(stat)) {
> +		pr_err("Born child is unstoppable! (might be dead)\n");
> +		goto out_kill;
> +	}
> +
> +	iov.iov_base = &xsave;
> +	iov.iov_len = sizeof(xsave);
> +
> +	if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
> +		pr_perror("Can't obtain FPU registers for %d", child);
> +		goto out_kill;
> +	}
> +	/*
> +	 * MXCSR should be never 0x0: e.g., it should contain either:
> +	 * R+/R-/RZ/RN to determine rounding model.
> +	 */
> +	ret = !xsave.i387.mxcsr;
> +
> +out_kill:
> +	kill(child, SIGKILL);
> +	waitpid(child, &stat, 0);
> +	return ret;
> +}
> diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
> index 9e7af14a39e5..d26d7630bbd2 100644
> --- a/criu/include/kerndat.h
> +++ b/criu/include/kerndat.h
> @@ -4,6 +4,7 @@
>  #include <stdbool.h>
>  #include "int.h"
>  #include "common/config.h"
> +#include "asm/kerndat.h"
>  #ifdef CONFIG_VDSO
>  #include "util-vdso.h"
>  #endif
> -- 
> 2.13.6
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list