[CRIU] Cleanup do_full_int80()
Dmitry Safonov
0x7f454c46 at gmail.com
Tue Oct 1 00:22:03 MSK 2019
Hi Nicolas,
On 9/30/19 9:57 PM, Nicolas Viennot wrote:
> 1) Instead of tampering with the nr argument, do_full_int80() returns
> the value of the system call. It also avoids copying all registers back
> into the syscall_args32 argument after the syscall.
>
> 2) Additionally, the registers r12-r15 were added in the list of
> clobbers as kernels older than v4.4 do not preserve these.
>
> 3) Further, GCC uses a 128-byte red-zone as defined in the x86_64 ABI
> optimizing away the correct position of the %rsp register in
> leaf-functions. We now avoid tampering with the red-zone, fixing a
> SIGSEGV when running mmap_bug_test() in debug mode (DEBUG=1).
>
> Signed-off-by: Nicolas Viennot <Nicolas.Viennot at twosigma.com>
Thanks much for doing this and I feel sorry to keep you going through so
many iterations,
Reviewed-by: Dmitry Safonov <0x7f454c46 at gmail.com>
> ---
> criu/arch/x86/crtools.c | 6 ++--
> criu/arch/x86/include/asm/compat.h | 51 ++++++++++++++++++++----------
> criu/arch/x86/kerndat.c | 4 +--
> criu/arch/x86/restorer.c | 3 +-
> criu/arch/x86/sigaction_compat.c | 6 +---
> 5 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
> index efc23e5f..e4073c27 100644
> --- a/criu/arch/x86/crtools.c
> +++ b/criu/arch/x86/crtools.c
> @@ -590,8 +590,7 @@ static int get_robust_list32(pid_t pid, uintptr_t head, uintptr_t len)
> .arg2 = (uint32_t)len,
> };
>
> - do_full_int80(&s);
> - return (int)s.nr;
> + return do_full_int80(&s);
> }
>
> static int set_robust_list32(uint32_t head, uint32_t len)
> @@ -602,8 +601,7 @@ static int set_robust_list32(uint32_t head, uint32_t len)
> .arg1 = len,
> };
>
> - do_full_int80(&s);
> - return (int)s.nr;
> + return do_full_int80(&s);
> }
>
> int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info)
> diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h
> index cd1ae472..acd552fb 100644
> --- a/criu/arch/x86/include/asm/compat.h
> +++ b/criu/arch/x86/include/asm/compat.h
> @@ -38,26 +38,45 @@ struct syscall_args32 {
> uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
> };
>
> -static inline void do_full_int80(struct syscall_args32 *args)
> +static inline uint32_t do_full_int80(struct syscall_args32 *args)
> {
> /*
> - * r8-r11 registers are cleared during returning to userspace
> - * from syscall - that's x86_64 ABI to avoid leaking kernel
> - * pointers.
> + * Kernel older than v4.4 do not preserve r8-r15 registers when
> + * invoking int80, so we need to preserve them.
> *
> - * Other than that - we can't use %rbp in clobbers as GCC's inline
> - * assembly doesn't allow to do so. So, here is explicitly saving
> - * %rbp before syscall and restoring it's value afterward.
> + * Additionally, %rbp is used as the 6th syscall argument, and we need
> + * to preserve its value when returning from the syscall to avoid
> + * upsetting GCC. However, we can't use %rbp in the GCC asm clobbers
> + * due to a GCC limitation. Instead, we explicitly save %rbp on the
> + * stack before invoking the syscall and restore its value afterward.
> + *
> + * Further, GCC may not adjust the %rsp pointer when allocating the
> + * args and ret variables because 1) do_full_int80() is a leaf
> + * function, and 2) the local variables (args and ret) are in the
> + * 128-byte red-zone as defined in the x86_64 ABI. To use the stack
> + * when preserving %rbp, we must either tell GCC to a) mark the
> + * function as non-leaf, or b) move away from the red-zone when using
> + * the stack. It seems that there is no easy way to do a), so we'll go
> + * with b).
> + * Note 1: Another workaround would have been to add %rsp in the list
> + * of clobbers, but this was deprecated in GCC 9.
> + * Note 2: This red-zone bug only manifests when compiling CRIU with
> + * DEBUG=1.
> */
> - asm volatile ("pushq %%rbp\n\t"
> - "mov %6, %%ebp\n\t"
> - "int $0x80\n\t"
> - "mov %%ebp, %6\n\t"
> - "popq %%rbp\n\t"
> - : "+a" (args->nr),
> - "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
> - "+S" (args->arg3), "+D" (args->arg4), "+g" (args->arg5)
> - : : "r8", "r9", "r10", "r11");
> + uint32_t ret;
> +
> + asm volatile ("sub $128, %%rsp\n\t"
> + "pushq %%rbp\n\t"
> + "mov %7, %%ebp\n\t"
> + "int $0x80\n\t"
> + "popq %%rbp\n\t"
> + "add $128, %%rsp\n\t"
> + : "=a" (ret)
> + : "a" (args->nr),
> + "b" (args->arg0), "c" (args->arg1), "d" (args->arg2),
> + "S" (args->arg3), "D" (args->arg4), "g" (args->arg5)
> + : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");
> + return ret;
> }
>
> #ifndef CR_NOGLIBC
> diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c
> index f7593251..94c954e1 100644
> --- a/criu/arch/x86/kerndat.c
> +++ b/criu/arch/x86/kerndat.c
> @@ -75,9 +75,7 @@ void *mmap_ia32(void *addr, size_t len, int prot,
> s.arg4 = fildes;
> s.arg5 = (uint32_t)off;
>
> - do_full_int80(&s);
> -
> - return (void *)(uintptr_t)s.nr;
> + return (void *)(uintptr_t)do_full_int80(&s);
> }
>
> /*
> diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c
> index 2d335d5e..b2c3b366 100644
> --- a/criu/arch/x86/restorer.c
> +++ b/criu/arch/x86/restorer.c
> @@ -54,8 +54,7 @@ int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
> .arg1 = len,
> };
>
> - do_full_int80(&s);
> - return (int)s.nr;
> + return do_full_int80(&s);
> }
>
> static int prepare_stack32(void **stack32)
> diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c
> index b38ba801..f467da49 100644
> --- a/criu/arch/x86/sigaction_compat.c
> +++ b/criu/arch/x86/sigaction_compat.c
> @@ -28,7 +28,6 @@ extern char restore_rt_sigaction;
> */
> int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
> {
> - int ret;
> struct syscall_args32 arg = {};
> unsigned long act_stack = (unsigned long)stack32;
>
> @@ -49,8 +48,5 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
> arg.arg2 = 0; /* oldact */
> arg.arg3 = (uint32_t)sizeof(act->rt_sa_mask); /* sigsetsize */
>
> - do_full_int80(&arg);
> - asm volatile ("\t movl %%eax,%0\n" : "=r"(ret));
> - return ret;
> + return do_full_int80(&arg);
> }
> -
>
Thanks,
Dmitry
More information about the CRIU
mailing list