[CRIU] Cleanup do_full_int80()

Dmitry Safonov 0x7f454c46 at gmail.com
Tue Oct 1 00:22:03 MSK 2019


Hi Nicolas,

On 9/30/19 9:57 PM, Nicolas Viennot wrote:
> 1) Instead of tampering with the nr argument, do_full_int80() returns
> the value of the system call. It also avoids copying all registers back
> into the syscall_args32 argument after the syscall.
> 
> 2) Additionally, the registers r12-r15 were added in the list of
> clobbers as kernels older than v4.4 do not preserve these.
> 
> 3) Further, GCC uses a 128-byte red-zone as defined in the x86_64 ABI
> optimizing away the correct position of the %rsp register in
> leaf-functions. We now avoid tampering with the red-zone, fixing a
> SIGSEGV when running mmap_bug_test() in debug mode (DEBUG=1).
> 
> Signed-off-by: Nicolas Viennot <Nicolas.Viennot at twosigma.com>

Thanks much for doing this and I feel sorry to keep you going through so
many iterations,

Reviewed-by: Dmitry Safonov <0x7f454c46 at gmail.com>

> ---
>  criu/arch/x86/crtools.c            |  6 ++--
>  criu/arch/x86/include/asm/compat.h | 51 ++++++++++++++++++++----------
>  criu/arch/x86/kerndat.c            |  4 +--
>  criu/arch/x86/restorer.c           |  3 +-
>  criu/arch/x86/sigaction_compat.c   |  6 +---
>  5 files changed, 40 insertions(+), 30 deletions(-)
> 
> diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
> index efc23e5f..e4073c27 100644
> --- a/criu/arch/x86/crtools.c
> +++ b/criu/arch/x86/crtools.c
> @@ -590,8 +590,7 @@ static int get_robust_list32(pid_t pid, uintptr_t head, uintptr_t len)
>  		.arg2	= (uint32_t)len,
>  	};
>  
> -	do_full_int80(&s);
> -	return (int)s.nr;
> +	return do_full_int80(&s);
>  }
>  
>  static int set_robust_list32(uint32_t head, uint32_t len)
> @@ -602,8 +601,7 @@ static int set_robust_list32(uint32_t head, uint32_t len)
>  		.arg1	= len,
>  	};
>  
> -	do_full_int80(&s);
> -	return (int)s.nr;
> +	return do_full_int80(&s);
>  }
>  
>  int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info)
> diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h
> index cd1ae472..acd552fb 100644
> --- a/criu/arch/x86/include/asm/compat.h
> +++ b/criu/arch/x86/include/asm/compat.h
> @@ -38,26 +38,45 @@ struct syscall_args32 {
>  	uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
>  };
>  
> -static inline void do_full_int80(struct syscall_args32 *args)
> +static inline uint32_t do_full_int80(struct syscall_args32 *args)
>  {
>  	/*
> -	 * r8-r11 registers are cleared during returning to userspace
> -	 * from syscall - that's x86_64 ABI to avoid leaking kernel
> -	 * pointers.
> +	 * Kernel older than v4.4 do not preserve r8-r15 registers when
> +	 * invoking int80, so we need to preserve them.
>  	 *
> -	 * Other than that - we can't use %rbp in clobbers as GCC's inline
> -	 * assembly doesn't allow to do so. So, here is explicitly saving
> -	 * %rbp before syscall and restoring it's value afterward.
> +	 * Additionally, %rbp is used as the 6th syscall argument, and we need
> +	 * to preserve its value when returning from the syscall to avoid
> +	 * upsetting GCC. However, we can't use %rbp in the GCC asm clobbers
> +	 * due to a GCC limitation. Instead, we explicitly save %rbp on the
> +	 * stack before invoking the syscall and restore its value afterward.
> +	 *
> +	 * Further, GCC may not adjust the %rsp pointer when allocating the
> +	 * args and ret variables because 1) do_full_int80() is a leaf
> +	 * function, and 2) the local variables (args and ret) are in the
> +	 * 128-byte red-zone as defined in the x86_64 ABI. To use the stack
> +	 * when preserving %rbp, we must either tell GCC to a) mark the
> +	 * function as non-leaf, or b) move away from the red-zone when using
> +	 * the stack. It seems that there is no easy way to do a), so we'll go
> +	 * with b).
> +	 * Note 1: Another workaround would have been to add %rsp in the list
> +	 * of clobbers, but this was deprecated in GCC 9.
> +	 * Note 2: This red-zone bug only manifests when compiling CRIU with
> +	 * DEBUG=1.
>  	 */
> -	asm volatile ("pushq %%rbp\n\t"
> -			"mov %6, %%ebp\n\t"
> -			"int $0x80\n\t"
> -			"mov %%ebp, %6\n\t"
> -			"popq %%rbp\n\t"
> -		      : "+a" (args->nr),
> -			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
> -			"+S" (args->arg3), "+D" (args->arg4), "+g" (args->arg5)
> -			: : "r8", "r9", "r10", "r11");
> +	uint32_t ret;
> +
> +	asm volatile ("sub $128, %%rsp\n\t"
> +		      "pushq %%rbp\n\t"
> +		      "mov %7, %%ebp\n\t"
> +		      "int $0x80\n\t"
> +		      "popq %%rbp\n\t"
> +		      "add $128, %%rsp\n\t"
> +		      : "=a" (ret)
> +		      : "a" (args->nr),
> +			"b" (args->arg0), "c" (args->arg1), "d" (args->arg2),
> +			"S" (args->arg3), "D" (args->arg4), "g" (args->arg5)
> +		      : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");
> +	return ret;
>  }
>  
>  #ifndef CR_NOGLIBC
> diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c
> index f7593251..94c954e1 100644
> --- a/criu/arch/x86/kerndat.c
> +++ b/criu/arch/x86/kerndat.c
> @@ -75,9 +75,7 @@ void *mmap_ia32(void *addr, size_t len, int prot,
>  	s.arg4  = fildes;
>  	s.arg5  = (uint32_t)off;
>  
> -	do_full_int80(&s);
> -
> -	return (void *)(uintptr_t)s.nr;
> +	return (void *)(uintptr_t)do_full_int80(&s);
>  }
>  
>  /*
> diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c
> index 2d335d5e..b2c3b366 100644
> --- a/criu/arch/x86/restorer.c
> +++ b/criu/arch/x86/restorer.c
> @@ -54,8 +54,7 @@ int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
>  		.arg1	= len,
>  	};
>  
> -	do_full_int80(&s);
> -	return (int)s.nr;
> +	return do_full_int80(&s);
>  }
>  
>  static int prepare_stack32(void **stack32)
> diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c
> index b38ba801..f467da49 100644
> --- a/criu/arch/x86/sigaction_compat.c
> +++ b/criu/arch/x86/sigaction_compat.c
> @@ -28,7 +28,6 @@ extern char restore_rt_sigaction;
>   */
>  int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
>  {
> -	int ret;
>  	struct syscall_args32 arg = {};
>  	unsigned long act_stack = (unsigned long)stack32;
>  
> @@ -49,8 +48,5 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
>  	arg.arg2	= 0;					/* oldact */
>  	arg.arg3	= (uint32_t)sizeof(act->rt_sa_mask);	/* sigsetsize */
>  
> -	do_full_int80(&arg);
> -	asm volatile ("\t movl %%eax,%0\n" : "=r"(ret));
> -	return ret;
> +	return do_full_int80(&arg);
>  }
> -
> 

Thanks,
          Dmitry


More information about the CRIU mailing list