[CRIU] [PATCHv2 2/9] x86/kerndat: Add a check for ptrace() bug on Skylake

Dmitry Safonov dima at arista.com
Wed Feb 7 15:48:47 MSK 2018


We need to know if ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, iov)
returns xsave without FP state part.

Signed-off-by: Dmitry Safonov <dima at arista.com>
---
 criu/arch/x86/crtools.c              | 75 ++++++++++++++++++++++++++++++++++++
 criu/arch/x86/include/asm/restorer.h |  2 +
 criu/include/kerndat.h               |  1 +
 criu/kerndat.c                       | 18 +++++++++
 4 files changed, 96 insertions(+)

diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index d7b21e7c9bae..e13b39b90076 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -3,10 +3,12 @@
 #include <string.h>
 #include <unistd.h>
 #include <elf.h>
+#include <sched.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/auxv.h>
 #include <sys/wait.h>
+#include <sys/ptrace.h>
 
 #include "types.h"
 #include "log.h"
@@ -30,6 +32,7 @@
 #include "images/core.pb-c.h"
 #include "images/creds.pb-c.h"
 
+/* XXX: Move all kerndat features to per-arch kerndat .c */
 int kdat_can_map_vdso(void)
 {
 	pid_t child;
@@ -188,6 +191,78 @@ int kdat_compatible_cr(void)
 }
 #endif
 
+static int kdat_x86_has_ptrace_fpu_xsave_bug_child(void *arg)
+{
+	ptrace(PTRACE_TRACEME, 0, 0, 0);
+	kill(getpid(), SIGSTOP);
+	pr_err("Continue after SIGSTOP.. Urr what?\n");
+	_exit(1);
+}
+
+/*
+ * Pre v4.14 kernels have a bug on Skylake CPUs:
+ * copyout_from_xsaves() creates fpu state for
+ *   ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
+ * without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
+ * That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
+ * XFEATURE_MASK_FP.
+ * But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
+ * as mxcsr store part of the state.
+ */
+int kdat_x86_has_ptrace_fpu_xsave_bug(void)
+{
+	user_fpregs_struct_t xsave = { };
+	struct iovec iov;
+	char stack[PAGE_SIZE];
+	int flags = CLONE_VM | CLONE_FILES | CLONE_UNTRACED | SIGCHLD;
+	int ret = -1;
+	pid_t child;
+	int stat;
+
+	/* OSXSAVE can't be changed during boot. */
+	if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
+		return 0;
+
+	child = clone(kdat_x86_has_ptrace_fpu_xsave_bug_child,
+		stack + ARRAY_SIZE(stack), flags, 0);
+	if (child < 0) {
+		pr_perror("%s(): failed to clone()", __func__);
+		return -1;
+	}
+
+	if (waitpid(child, &stat, WUNTRACED) != child) {
+		/*
+		 * waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
+		 * and the child has stopped already.
+		 */
+		pr_perror("Failed to wait for %s() test\n", __func__);
+		goto out_kill;
+	}
+
+	if (!WIFSTOPPED(stat)) {
+		pr_err("Born child is unstoppable! (might be dead)\n");
+		goto out_kill;
+	}
+
+	iov.iov_base = &xsave;
+	iov.iov_len = sizeof(xsave);
+
+	if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
+		pr_perror("Can't obtain FPU registers for %d", child);
+		goto out_kill;
+	}
+	/*
+	 * MXCSR should be never 0x0: e.g., it should contain either:
+	 * R+/R-/RZ/RN to determine rounding model.
+	 */
+	ret = !xsave.i387.mxcsr;
+
+out_kill:
+	kill(child, SIGKILL);
+	waitpid(child, &stat, 0);
+	return ret;
+}
+
 int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
 {
 	CoreEntry *core = x;
diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
index e39675d957d1..179f1942f9f8 100644
--- a/criu/arch/x86/include/asm/restorer.h
+++ b/criu/arch/x86/include/asm/restorer.h
@@ -80,8 +80,10 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
 # define ARCH_MAP_VDSO_64		0x2003
 #endif
 
+/* XXX: Introduce per-arch kerndat header */
 extern int kdat_compatible_cr(void);
 extern int kdat_can_map_vdso(void);
+extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
 
 static inline void
 __setup_sas_compat(struct ucontext_ia32* uc, ThreadSasEntry *sas)
diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
index 5c87b2c350d8..9e7af14a39e5 100644
--- a/criu/include/kerndat.h
+++ b/criu/include/kerndat.h
@@ -72,6 +72,7 @@ struct kerndat_s {
 	unsigned int sysctl_nr_open;
 	unsigned long files_stat_max_files;
 	bool has_pid_for_children_ns;
+	bool x86_has_ptrace_fpu_xsave_bug;
 };
 
 extern struct kerndat_s kdat;
diff --git a/criu/kerndat.c b/criu/kerndat.c
index 68abd61805fe..b9c4fdaf402f 100644
--- a/criu/kerndat.c
+++ b/criu/kerndat.c
@@ -792,6 +792,22 @@ int kerndat_has_pid_for_children_ns(void)
 	return 0;
 }
 
+int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void)
+{
+	return 0;
+}
+
+static int kerndat_x86_has_ptrace_fpu_xsave_bug(void)
+{
+	int ret = kdat_x86_has_ptrace_fpu_xsave_bug();
+
+	if (ret < 0)
+		return ret;
+
+	kdat.x86_has_ptrace_fpu_xsave_bug = !!ret;
+	return 0;
+}
+
 #define KERNDAT_CACHE_FILE	KDAT_RUNDIR"/criu.kdat"
 #define KERNDAT_CACHE_FILE_TMP	KDAT_RUNDIR"/.criu.kdat"
 
@@ -1059,6 +1075,8 @@ int kerndat_init(void)
 		ret = kerndat_has_ns_get_parent();
 	if (!ret)
 		ret = kerndat_has_pid_for_children_ns();
+	if (!ret)
+		ret = kerndat_x86_has_ptrace_fpu_xsave_bug();
 
 	kerndat_lsm();
 	kerndat_mmap_min_addr();
-- 
2.13.6



More information about the CRIU mailing list