[CRIU] [PATCH] kerndat: Separate per-arch kerndat

Dmitry Safonov dima at arista.com
Fri Feb 16 00:08:04 MSK 2018


x86's kerndat section in crtools.c has grown too much.
Let's make it more readable and *looking at cleared include-list*,
it'll better parallelize build.

Maybe we should turn __weak function into 0-defines.
Or clean 0-defines with ifdefs in generic file.
I have no strong opinion on that.

Signed-off-by: Dmitry Safonov <dima at arista.com>
---
 criu/arch/aarch64/include/asm/kerndat.h  |   7 +
 criu/arch/aarch64/include/asm/restorer.h |   2 -
 criu/arch/arm/include/asm/kerndat.h      |   7 +
 criu/arch/arm/include/asm/restorer.h     |   2 -
 criu/arch/ppc64/include/asm/kerndat.h    |   7 +
 criu/arch/ppc64/include/asm/restorer.h   |   2 -
 criu/arch/s390/include/asm/kerndat.h     |   7 +
 criu/arch/s390/include/asm/restorer.h    |   2 -
 criu/arch/x86/Makefile                   |   1 +
 criu/arch/x86/crtools.c                  | 269 +------------------------------
 criu/arch/x86/include/asm/kerndat.h      |   8 +
 criu/arch/x86/include/asm/restorer.h     |   5 -
 criu/arch/x86/kerndat.c                  | 249 ++++++++++++++++++++++++++++
 criu/include/kerndat.h                   |   1 +
 14 files changed, 295 insertions(+), 274 deletions(-)
 create mode 100644 criu/arch/aarch64/include/asm/kerndat.h
 create mode 100644 criu/arch/arm/include/asm/kerndat.h
 create mode 100644 criu/arch/ppc64/include/asm/kerndat.h
 create mode 100644 criu/arch/s390/include/asm/kerndat.h
 create mode 100644 criu/arch/x86/include/asm/kerndat.h
 create mode 100644 criu/arch/x86/kerndat.c

diff --git a/criu/arch/aarch64/include/asm/kerndat.h b/criu/arch/aarch64/include/asm/kerndat.h
new file mode 100644
index 000000000000..60956b573db9
--- /dev/null
+++ b/criu/arch/aarch64/include/asm/kerndat.h
@@ -0,0 +1,7 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+#define kdat_compatible_cr()			0
+#define kdat_can_map_vdso()			0
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h
index bef85f3a3210..f502cdcaf67c 100644
--- a/criu/arch/aarch64/include/asm/restorer.h
+++ b/criu/arch/aarch64/include/asm/restorer.h
@@ -52,8 +52,6 @@
 			: "sp", "x0", "memory")
 
 
-#define kdat_compatible_cr()			0
-#define kdat_can_map_vdso()			0
 #define arch_map_vdso(map, compat)		-1
 
 int restore_gpregs(struct rt_sigframe *f, UserAarch64RegsEntry *r);
diff --git a/criu/arch/arm/include/asm/kerndat.h b/criu/arch/arm/include/asm/kerndat.h
new file mode 100644
index 000000000000..60956b573db9
--- /dev/null
+++ b/criu/arch/arm/include/asm/kerndat.h
@@ -0,0 +1,7 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+#define kdat_compatible_cr()			0
+#define kdat_can_map_vdso()			0
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h
index d9208185f731..217d920e846e 100644
--- a/criu/arch/arm/include/asm/restorer.h
+++ b/criu/arch/arm/include/asm/restorer.h
@@ -53,8 +53,6 @@
 		     : "memory")
 
 
-#define kdat_compatible_cr()			0
-#define kdat_can_map_vdso()			0
 #define arch_map_vdso(map, compat)		-1
 
 int restore_gpregs(struct rt_sigframe *f, UserArmRegsEntry *r);
diff --git a/criu/arch/ppc64/include/asm/kerndat.h b/criu/arch/ppc64/include/asm/kerndat.h
new file mode 100644
index 000000000000..60956b573db9
--- /dev/null
+++ b/criu/arch/ppc64/include/asm/kerndat.h
@@ -0,0 +1,7 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+#define kdat_compatible_cr()			0
+#define kdat_can_map_vdso()			0
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h
index 3fffa833c157..d48d833d6b6c 100644
--- a/criu/arch/ppc64/include/asm/restorer.h
+++ b/criu/arch/ppc64/include/asm/restorer.h
@@ -48,8 +48,6 @@
 		  "r"(&thread_args[i])		/* %6 */		\
 		: "memory","0","3","4","5","6","7","14","15")
 
-#define kdat_compatible_cr()			0
-#define kdat_can_map_vdso()			0
 #define arch_map_vdso(map, compat)		-1
 
 int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r);
diff --git a/criu/arch/s390/include/asm/kerndat.h b/criu/arch/s390/include/asm/kerndat.h
new file mode 100644
index 000000000000..60956b573db9
--- /dev/null
+++ b/criu/arch/s390/include/asm/kerndat.h
@@ -0,0 +1,7 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+#define kdat_compatible_cr()			0
+#define kdat_can_map_vdso()			0
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h
index 63e09986339b..cfdefcab9bab 100644
--- a/criu/arch/s390/include/asm/restorer.h
+++ b/criu/arch/s390/include/asm/restorer.h
@@ -39,8 +39,6 @@
 	  "d"(&thread_args[i])						\
 	: "0", "1", "2", "3", "4", "5", "6", "cc", "memory")
 
-#define kdat_compatible_cr()			0
-#define kdat_can_map_vdso()			0
 #define arch_map_vdso(map, compat)		-1
 
 int restore_gpregs(struct rt_sigframe *f, UserS390RegsEntry *r);
diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile
index 669dc073a5be..75fa782c8279 100644
--- a/criu/arch/x86/Makefile
+++ b/criu/arch/x86/Makefile
@@ -11,6 +11,7 @@ ldflags-y		+= -r -z noexecstack
 
 obj-y			+= cpu.o
 obj-y			+= crtools.o
+obj-y			+= kerndat.o
 obj-y			+= sigframe.o
 ifeq ($(CONFIG_COMPAT),y)
         obj-y		+= sigaction_compat.o
diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index e13b39b90076..0b5a0acd6779 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -1,267 +1,14 @@
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <elf.h>
-#include <sched.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <sys/auxv.h>
-#include <sys/wait.h>
-#include <sys/ptrace.h>
-
-#include "types.h"
-#include "log.h"
-#include "asm/compat.h"
-#include "asm/parasite-syscall.h"
-#include "asm/restorer.h"
-#include <compel/asm/fpu.h>
-#include "asm/dump.h"
-
-#include "cr_options.h"
-#include "common/compiler.h"
-#include "restorer.h"
-#include "parasite-syscall.h"
-#include "util.h"
+#include "compel/asm/fpu.h"
+#include "compel/compel.h"
+#include "compel/plugins/std/syscall-codes.h"
 #include "cpu.h"
-#include <compel/plugins/std/syscall-codes.h>
-#include "kerndat.h"
-#include <compel/compel.h>
-
-#include "protobuf.h"
+#include "cr_options.h"
 #include "images/core.pb-c.h"
-#include "images/creds.pb-c.h"
-
-/* XXX: Move all kerndat features to per-arch kerndat .c */
-int kdat_can_map_vdso(void)
-{
-	pid_t child;
-	int stat;
-
-	/*
-	 * Running under fork so if vdso_64 is disabled - don't create
-	 * it for criu accidentally.
-	 */
-	child = fork();
-	if (child < 0) {
-		pr_perror("%s(): failed to fork()", __func__);
-		return -1;
-	}
-
-	if (child == 0) {
-		int ret;
-
-		ret = syscall(SYS_arch_prctl, ARCH_MAP_VDSO_32, 0);
-		if (ret == 0)
-			exit(1);
-		/*
-		 * Mapping vDSO while have not unmap it yet:
-		 * this is restricted by API if ARCH_MAP_VDSO_* is supported.
-		 */
-		if (ret == -1 && errno == EEXIST)
-			exit(1);
-		exit(0);
-	}
-
-	if (waitpid(child, &stat, 0) != child) {
-		pr_err("Failed to wait for arch_prctl() test\n");
-		kill(child, SIGKILL);
-		return -1;
-	}
-
-	if (!WIFEXITED(stat))
-		return -1;
-
-	return WEXITSTATUS(stat);
-
-}
-
-#ifdef CONFIG_COMPAT
-void *mmap_ia32(void *addr, size_t len, int prot,
-		int flags, int fildes, off_t off)
-{
-	struct syscall_args32 s;
-
-	s.nr    = __NR32_mmap2;
-	s.arg0  = (uint32_t)(uintptr_t)addr;
-	s.arg1  = (uint32_t)len;
-	s.arg2  = prot;
-	s.arg3  = flags;
-	s.arg4  = fildes;
-	s.arg5  = (uint32_t)off;
-
-	do_full_int80(&s);
-
-	return (void *)(uintptr_t)s.nr;
-}
-
-/*
- * The idea of the test:
- * From kernel's top-down allocator we assume here that
- * 1. A = mmap(0, ...); munmap(A);
- * 2. B = mmap(0, ...);
- * results in A == B.
- * ...but if we have 32-bit mmap() bug, then A will have only lower
- * 4 bytes of 64-bit address allocated with mmap().
- * That means, that the next mmap() will return B != A
- * (as munmap(A) hasn't really unmapped A mapping).
- *
- * As mapping with lower 4 bytes of A may really exist, we run
- * this test under fork().
- *
- * Another approach to test bug's presence would be to parse
- * /proc/self/maps before and after 32-bit mmap(), but that would
- * be soo slow.
- */
-static void mmap_bug_test(void)
-{
-	void *map1, *map2;
-	int err;
-
-	map1 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	/* 32-bit error, not sign-extended - can't use IS_ERR_VALUE() here */
-	err = (uintptr_t)map1 % PAGE_SIZE;
-	if (err) {
-		pr_err("ia32 mmap() failed: %d\n", err);
-		exit(1);
-	}
-
-	if (munmap(map1, PAGE_SIZE)) {
-		pr_err("Failed to unmap() 32-bit mapping: %m\n");
-		exit(1);
-	}
-
-	map2 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	err = (uintptr_t)map2 % PAGE_SIZE;
-	if (err) {
-		pr_err("ia32 mmap() failed: %d\n", err);
-		exit(1);
-	}
-
-	if (map1 != map2)
-		exit(1);
-	exit(0);
-}
-
-/*
- * Pre v4.12 kernels have a bug: for a process started as 64-bit
- * 32-bit mmap() may return 8 byte pointer.
- * Which is fatal for us: after 32-bit C/R a task will map 64-bit
- * addresses, cut upper 4 bytes and try to use lower 4 bytes.
- * This is a check if the bug was fixed in the kernel.
- */
-static int has_32bit_mmap_bug(void)
-{
-	pid_t child = fork();
-	int stat;
-
-	if (child < 0) {
-		pr_perror("%s(): failed to fork()", __func__);
-		return -1;
-	}
-
-	if (child == 0)
-		mmap_bug_test();
-
-	if (waitpid(child, &stat, 0) != child) {
-		pr_err("Failed to wait for mmap test\n");
-		kill(child, SIGKILL);
-		return -1;
-	}
-
-	if (!WIFEXITED(stat) || WEXITSTATUS(stat) != 0)
-		return 1;
-	return 0;
-}
-
-int kdat_compatible_cr(void)
-{
-	if (!kdat.can_map_vdso)
-		return 0;
-
-	if (has_32bit_mmap_bug())
-		return 0;
-
-	return 1;
-}
-#else /* !CONFIG_COMPAT */
-int kdat_compatible_cr(void)
-{
-	return 0;
-}
-#endif
-
-static int kdat_x86_has_ptrace_fpu_xsave_bug_child(void *arg)
-{
-	ptrace(PTRACE_TRACEME, 0, 0, 0);
-	kill(getpid(), SIGSTOP);
-	pr_err("Continue after SIGSTOP.. Urr what?\n");
-	_exit(1);
-}
-
-/*
- * Pre v4.14 kernels have a bug on Skylake CPUs:
- * copyout_from_xsaves() creates fpu state for
- *   ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
- * without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
- * That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
- * XFEATURE_MASK_FP.
- * But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
- * as mxcsr store part of the state.
- */
-int kdat_x86_has_ptrace_fpu_xsave_bug(void)
-{
-	user_fpregs_struct_t xsave = { };
-	struct iovec iov;
-	char stack[PAGE_SIZE];
-	int flags = CLONE_VM | CLONE_FILES | CLONE_UNTRACED | SIGCHLD;
-	int ret = -1;
-	pid_t child;
-	int stat;
-
-	/* OSXSAVE can't be changed during boot. */
-	if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
-		return 0;
-
-	child = clone(kdat_x86_has_ptrace_fpu_xsave_bug_child,
-		stack + ARRAY_SIZE(stack), flags, 0);
-	if (child < 0) {
-		pr_perror("%s(): failed to clone()", __func__);
-		return -1;
-	}
-
-	if (waitpid(child, &stat, WUNTRACED) != child) {
-		/*
-		 * waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
-		 * and the child has stopped already.
-		 */
-		pr_perror("Failed to wait for %s() test\n", __func__);
-		goto out_kill;
-	}
-
-	if (!WIFSTOPPED(stat)) {
-		pr_err("Born child is unstoppable! (might be dead)\n");
-		goto out_kill;
-	}
-
-	iov.iov_base = &xsave;
-	iov.iov_len = sizeof(xsave);
-
-	if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
-		pr_perror("Can't obtain FPU registers for %d", child);
-		goto out_kill;
-	}
-	/*
-	 * MXCSR should be never 0x0: e.g., it should contain either:
-	 * R+/R-/RZ/RN to determine rounding model.
-	 */
-	ret = !xsave.i387.mxcsr;
+#include "log.h"
+#include "protobuf.h"
+#include "types.h"
 
-out_kill:
-	kill(child, SIGKILL);
-	waitpid(child, &stat, 0);
-	return ret;
-}
+#include "asm/compat.h"
 
 int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
 {
diff --git a/criu/arch/x86/include/asm/kerndat.h b/criu/arch/x86/include/asm/kerndat.h
new file mode 100644
index 000000000000..903bc80f7c44
--- /dev/null
+++ b/criu/arch/x86/include/asm/kerndat.h
@@ -0,0 +1,8 @@
+#ifndef __CR_ASM_KERNDAT_H__
+#define __CR_ASM_KERNDAT_H__
+
+extern int kdat_compatible_cr(void);
+extern int kdat_can_map_vdso(void);
+extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
+
+#endif /* __CR_ASM_KERNDAT_H__ */
diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
index 179f1942f9f8..15867aa1260c 100644
--- a/criu/arch/x86/include/asm/restorer.h
+++ b/criu/arch/x86/include/asm/restorer.h
@@ -80,11 +80,6 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
 # define ARCH_MAP_VDSO_64		0x2003
 #endif
 
-/* XXX: Introduce per-arch kerndat header */
-extern int kdat_compatible_cr(void);
-extern int kdat_can_map_vdso(void);
-extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
-
 static inline void
 __setup_sas_compat(struct ucontext_ia32* uc, ThreadSasEntry *sas)
 {
diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c
new file mode 100644
index 000000000000..a67017d3456e
--- /dev/null
+++ b/criu/arch/x86/kerndat.c
@@ -0,0 +1,249 @@
+#include <elf.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "compel/asm/fpu.h"
+#include "compel/plugins/std/syscall-codes.h"
+#include "cpu.h"
+#include "kerndat.h"
+#include "log.h"
+#include "types.h"
+
+#include "asm/compat.h"
+#include "asm/dump.h"
+
+int kdat_can_map_vdso(void)
+{
+	pid_t child;
+	int stat;
+
+	/*
+	 * Running under fork so if vdso_64 is disabled - don't create
+	 * it for criu accidentally.
+	 */
+	child = fork();
+	if (child < 0) {
+		pr_perror("%s(): failed to fork()", __func__);
+		return -1;
+	}
+
+	if (child == 0) {
+		int ret;
+
+		ret = syscall(SYS_arch_prctl, ARCH_MAP_VDSO_32, 0);
+		if (ret == 0)
+			exit(1);
+		/*
+		 * Mapping vDSO while have not unmap it yet:
+		 * this is restricted by API if ARCH_MAP_VDSO_* is supported.
+		 */
+		if (ret == -1 && errno == EEXIST)
+			exit(1);
+		exit(0);
+	}
+
+	if (waitpid(child, &stat, 0) != child) {
+		pr_err("Failed to wait for arch_prctl() test\n");
+		kill(child, SIGKILL);
+		return -1;
+	}
+
+	if (!WIFEXITED(stat))
+		return -1;
+
+	return WEXITSTATUS(stat);
+
+}
+
+#ifdef CONFIG_COMPAT
+void *mmap_ia32(void *addr, size_t len, int prot,
+		int flags, int fildes, off_t off)
+{
+	struct syscall_args32 s;
+
+	s.nr    = __NR32_mmap2;
+	s.arg0  = (uint32_t)(uintptr_t)addr;
+	s.arg1  = (uint32_t)len;
+	s.arg2  = prot;
+	s.arg3  = flags;
+	s.arg4  = fildes;
+	s.arg5  = (uint32_t)off;
+
+	do_full_int80(&s);
+
+	return (void *)(uintptr_t)s.nr;
+}
+
+/*
+ * The idea of the test:
+ * From kernel's top-down allocator we assume here that
+ * 1. A = mmap(0, ...); munmap(A);
+ * 2. B = mmap(0, ...);
+ * results in A == B.
+ * ...but if we have 32-bit mmap() bug, then A will have only lower
+ * 4 bytes of 64-bit address allocated with mmap().
+ * That means, that the next mmap() will return B != A
+ * (as munmap(A) hasn't really unmapped A mapping).
+ *
+ * As mapping with lower 4 bytes of A may really exist, we run
+ * this test under fork().
+ *
+ * Another approach to test bug's presence would be to parse
+ * /proc/self/maps before and after 32-bit mmap(), but that would
+ * be soo slow.
+ */
+static void mmap_bug_test(void)
+{
+	void *map1, *map2;
+	int err;
+
+	map1 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	/* 32-bit error, not sign-extended - can't use IS_ERR_VALUE() here */
+	err = (uintptr_t)map1 % PAGE_SIZE;
+	if (err) {
+		pr_err("ia32 mmap() failed: %d\n", err);
+		exit(1);
+	}
+
+	if (munmap(map1, PAGE_SIZE)) {
+		pr_err("Failed to unmap() 32-bit mapping: %m\n");
+		exit(1);
+	}
+
+	map2 = mmap_ia32(0, PAGE_SIZE, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
+	err = (uintptr_t)map2 % PAGE_SIZE;
+	if (err) {
+		pr_err("ia32 mmap() failed: %d\n", err);
+		exit(1);
+	}
+
+	if (map1 != map2)
+		exit(1);
+	exit(0);
+}
+
+/*
+ * Pre v4.12 kernels have a bug: for a process started as 64-bit
+ * 32-bit mmap() may return 8 byte pointer.
+ * Which is fatal for us: after 32-bit C/R a task will map 64-bit
+ * addresses, cut upper 4 bytes and try to use lower 4 bytes.
+ * This is a check if the bug was fixed in the kernel.
+ */
+static int has_32bit_mmap_bug(void)
+{
+	pid_t child = fork();
+	int stat;
+
+	if (child < 0) {
+		pr_perror("%s(): failed to fork()", __func__);
+		return -1;
+	}
+
+	if (child == 0)
+		mmap_bug_test();
+
+	if (waitpid(child, &stat, 0) != child) {
+		pr_err("Failed to wait for mmap test\n");
+		kill(child, SIGKILL);
+		return -1;
+	}
+
+	if (!WIFEXITED(stat) || WEXITSTATUS(stat) != 0)
+		return 1;
+	return 0;
+}
+
+int kdat_compatible_cr(void)
+{
+	if (!kdat.can_map_vdso)
+		return 0;
+
+	if (has_32bit_mmap_bug())
+		return 0;
+
+	return 1;
+}
+#else /* !CONFIG_COMPAT */
+int kdat_compatible_cr(void)
+{
+	return 0;
+}
+#endif
+
+static int kdat_x86_has_ptrace_fpu_xsave_bug_child(void *arg)
+{
+	ptrace(PTRACE_TRACEME, 0, 0, 0);
+	kill(getpid(), SIGSTOP);
+	pr_err("Continue after SIGSTOP.. Urr what?\n");
+	_exit(1);
+}
+
+/*
+ * Pre v4.14 kernels have a bug on Skylake CPUs:
+ * copyout_from_xsaves() creates fpu state for
+ *   ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
+ * without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
+ * That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
+ * XFEATURE_MASK_FP.
+ * But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
+ * as mxcsr store part of the state.
+ */
+int kdat_x86_has_ptrace_fpu_xsave_bug(void)
+{
+	user_fpregs_struct_t xsave = { };
+	struct iovec iov;
+	char stack[PAGE_SIZE];
+	int flags = CLONE_VM | CLONE_FILES | CLONE_UNTRACED | SIGCHLD;
+	int ret = -1;
+	pid_t child;
+	int stat;
+
+	/* OSXSAVE can't be changed during boot. */
+	if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
+		return 0;
+
+	child = clone(kdat_x86_has_ptrace_fpu_xsave_bug_child,
+		stack + ARRAY_SIZE(stack), flags, 0);
+	if (child < 0) {
+		pr_perror("%s(): failed to clone()", __func__);
+		return -1;
+	}
+
+	if (waitpid(child, &stat, WUNTRACED) != child) {
+		/*
+		 * waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
+		 * and the child has stopped already.
+		 */
+		pr_perror("Failed to wait for %s() test\n", __func__);
+		goto out_kill;
+	}
+
+	if (!WIFSTOPPED(stat)) {
+		pr_err("Born child is unstoppable! (might be dead)\n");
+		goto out_kill;
+	}
+
+	iov.iov_base = &xsave;
+	iov.iov_len = sizeof(xsave);
+
+	if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
+		pr_perror("Can't obtain FPU registers for %d", child);
+		goto out_kill;
+	}
+	/*
+	 * MXCSR should be never 0x0: e.g., it should contain either:
+	 * R+/R-/RZ/RN to determine rounding model.
+	 */
+	ret = !xsave.i387.mxcsr;
+
+out_kill:
+	kill(child, SIGKILL);
+	waitpid(child, &stat, 0);
+	return ret;
+}
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index 9e7af14a39e5..d26d7630bbd2 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -4,6 +4,7 @@
 #include <stdbool.h>
 #include "int.h"
 #include "common/config.h"
+#include "asm/kerndat.h"
 #ifdef CONFIG_VDSO
 #include "util-vdso.h"
 #endif
-- 
2.13.6



More information about the CRIU mailing list