[CRIU] [PATCH 2/8] x86/kerndat: Add a check for ptrace() bug on Skylake
Andrei Vagin
avagin at virtuozzo.com
Tue Feb 6 04:40:09 MSK 2018
On Wed, Jan 31, 2018 at 04:58:09PM +0000, Dmitry Safonov wrote:
> We need to know if ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, iov)
> returns xsave without FP state part.
>
> Signed-off-by: Dmitry Safonov <dima at arista.com>
> ---
> criu/arch/x86/crtools.c | 71 ++++++++++++++++++++++++++++++++++++
> criu/arch/x86/include/asm/restorer.h | 2 +
> criu/include/kerndat.h | 1 +
> criu/kerndat.c | 18 +++++++++
> 4 files changed, 92 insertions(+)
>
> diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
> index d7b21e7c9bae..a389e563a886 100644
> --- a/criu/arch/x86/crtools.c
> +++ b/criu/arch/x86/crtools.c
> @@ -7,6 +7,7 @@
> #include <sys/syscall.h>
> #include <sys/auxv.h>
> #include <sys/wait.h>
> +#include <sys/ptrace.h>
>
> #include "types.h"
> #include "log.h"
> @@ -30,6 +31,7 @@
> #include "images/core.pb-c.h"
> #include "images/creds.pb-c.h"
>
> +/* XXX: Move all kerndat features to per-arch kerndat .c */
> int kdat_can_map_vdso(void)
> {
> pid_t child;
> @@ -188,6 +190,75 @@ int kdat_compatible_cr(void)
> }
> #endif
>
> +/*
> + * Pre v4.14 kernels have a bug on Skylake CPUs:
> + * copyout_from_xsaves() creates fpu state for
> + * ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
> + * without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
> + * That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
> + * XFEATURE_MASK_FP.
> + * But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
> + * as mxcsr store part of the state.
> + */
> +int kdat_x86_has_ptrace_fpu_xsave_bug(void)
> +{
> + user_fpregs_struct_t xsave = { };
> + struct iovec iov;
> + int ret = -1;
> + pid_t child;
> + int stat;
> +
> + /* OSXSAVE can't be changed during boot. */
> + if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
> + return 0;
> +
> + child = fork();
> + if (child < 0) {
> + pr_perror("%s(): failed to fork()", __func__);
> + return -1;
> + }
> +
> + if (child == 0) {
> + ptrace(PTRACE_TRACEME, 0, 0, 0);
It can fail in a case when criu is running under strace. In this case I
would like to get a warning and conside that a kernel doesn't have this
bug.
> + kill(getpid(), SIGSTOP);
> + pr_err("Continue after SIGSTOP.. Urr what?\n");
> + exit(1);
> + }
> +
> + if (waitpid(child, &stat, WUNTRACED) != child) {
> + /*
> + * waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
> + * and the child has stopped already.
> + */
> + if (!(errno == ECHILD && WIFSTOPPED(stat))) {
I don't understand this expression ^^^^
> + pr_perror("Failed to wait for %s() test\n", __func__);
> + goto out_kill;
> + }
> + }
> +
> + if (!WIFSTOPPED(stat)) {
> + pr_err("Born child is unstoppable!\n");
> + goto out_kill;
> + }
> +
> + iov.iov_base = &xsave;
> + iov.iov_len = sizeof(xsave);
> +
> + if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
> + pr_perror("Can't obtain FPU registers for %d", child);
> + goto out_kill;
> + }
> + /*
> + * MXCSR should be never 0x0: e.g., it should contain either:
> + * R+/R-/RZ/RN to determine rounding model.
> + */
> + ret = !xsave.i387.mxcsr;
> +
> +out_kill:
> + kill(child, SIGKILL);
You need to wait a child here.
> + return ret;
> +}
> +
> int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
> {
> CoreEntry *core = x;
> diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h
> index e39675d957d1..179f1942f9f8 100644
> --- a/criu/arch/x86/include/asm/restorer.h
> +++ b/criu/arch/x86/include/asm/restorer.h
> @@ -80,8 +80,10 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
> # define ARCH_MAP_VDSO_64 0x2003
> #endif
>
> +/* XXX: Introduce per-arch kerndat header */
> extern int kdat_compatible_cr(void);
> extern int kdat_can_map_vdso(void);
> +extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
>
> static inline void
> __setup_sas_compat(struct ucontext_ia32* uc, ThreadSasEntry *sas)
> diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h
> index 5c87b2c350d8..9e7af14a39e5 100644
> --- a/criu/include/kerndat.h
> +++ b/criu/include/kerndat.h
> @@ -72,6 +72,7 @@ struct kerndat_s {
> unsigned int sysctl_nr_open;
> unsigned long files_stat_max_files;
> bool has_pid_for_children_ns;
> + bool x86_has_ptrace_fpu_xsave_bug;
> };
>
> extern struct kerndat_s kdat;
> diff --git a/criu/kerndat.c b/criu/kerndat.c
> index 68abd61805fe..b9c4fdaf402f 100644
> --- a/criu/kerndat.c
> +++ b/criu/kerndat.c
> @@ -792,6 +792,22 @@ int kerndat_has_pid_for_children_ns(void)
> return 0;
> }
>
> +int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void)
> +{
> + return 0;
> +}
> +
> +static int kerndat_x86_has_ptrace_fpu_xsave_bug(void)
> +{
> + int ret = kdat_x86_has_ptrace_fpu_xsave_bug();
> +
> + if (ret < 0)
> + return ret;
> +
> + kdat.x86_has_ptrace_fpu_xsave_bug = !!ret;
> + return 0;
> +}
> +
> #define KERNDAT_CACHE_FILE KDAT_RUNDIR"/criu.kdat"
> #define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR"/.criu.kdat"
>
> @@ -1059,6 +1075,8 @@ int kerndat_init(void)
> ret = kerndat_has_ns_get_parent();
> if (!ret)
> ret = kerndat_has_pid_for_children_ns();
> + if (!ret)
> + ret = kerndat_x86_has_ptrace_fpu_xsave_bug();
>
> kerndat_lsm();
> kerndat_mmap_min_addr();
> --
> 2.13.6
>
More information about the CRIU
mailing list