[CRIU] Cleanup do_full_int80()
Nicolas Viennot
Nicolas.Viennot at twosigma.com
Mon Sep 30 23:57:08 MSK 2019
1) Instead of tampering with the nr argument, do_full_int80() returns
the value of the system call. It also avoids copying all registers back
into the syscall_args32 argument after the syscall.
2) Additionally, the registers r12-r15 were added in the list of
clobbers as kernels older than v4.4 do not preserve these.
3) Further, GCC uses a 128-byte red-zone as defined in the x86_64 ABI
optimizing away the correct position of the %rsp register in
leaf-functions. We now avoid tampering with the red-zone, fixing a
SIGSEGV when running mmap_bug_test() in debug mode (DEBUG=1).
Signed-off-by: Nicolas Viennot <Nicolas.Viennot at twosigma.com>
---
criu/arch/x86/crtools.c | 6 ++--
criu/arch/x86/include/asm/compat.h | 51 ++++++++++++++++++++----------
criu/arch/x86/kerndat.c | 4 +--
criu/arch/x86/restorer.c | 3 +-
criu/arch/x86/sigaction_compat.c | 6 +---
5 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index efc23e5f..e4073c27 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -590,8 +590,7 @@ static int get_robust_list32(pid_t pid, uintptr_t head, uintptr_t len)
.arg2 = (uint32_t)len,
};
- do_full_int80(&s);
- return (int)s.nr;
+ return do_full_int80(&s);
}
static int set_robust_list32(uint32_t head, uint32_t len)
@@ -602,8 +601,7 @@ static int set_robust_list32(uint32_t head, uint32_t len)
.arg1 = len,
};
- do_full_int80(&s);
- return (int)s.nr;
+ return do_full_int80(&s);
}
int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info)
diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h
index cd1ae472..acd552fb 100644
--- a/criu/arch/x86/include/asm/compat.h
+++ b/criu/arch/x86/include/asm/compat.h
@@ -38,26 +38,45 @@ struct syscall_args32 {
uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
};
-static inline void do_full_int80(struct syscall_args32 *args)
+static inline uint32_t do_full_int80(struct syscall_args32 *args)
{
/*
- * r8-r11 registers are cleared during returning to userspace
- * from syscall - that's x86_64 ABI to avoid leaking kernel
- * pointers.
+ * Kernel older than v4.4 do not preserve r8-r15 registers when
+ * invoking int80, so we need to preserve them.
*
- * Other than that - we can't use %rbp in clobbers as GCC's inline
- * assembly doesn't allow to do so. So, here is explicitly saving
- * %rbp before syscall and restoring it's value afterward.
+ * Additionally, %rbp is used as the 6th syscall argument, and we need
+ * to preserve its value when returning from the syscall to avoid
+ * upsetting GCC. However, we can't use %rbp in the GCC asm clobbers
+ * due to a GCC limitation. Instead, we explicitly save %rbp on the
+ * stack before invoking the syscall and restore its value afterward.
+ *
+ * Further, GCC may not adjust the %rsp pointer when allocating the
+ * args and ret variables because 1) do_full_int80() is a leaf
+ * function, and 2) the local variables (args and ret) are in the
+ * 128-byte red-zone as defined in the x86_64 ABI. To use the stack
+ * when preserving %rbp, we must either tell GCC to a) mark the
+ * function as non-leaf, or b) move away from the red-zone when using
+ * the stack. It seems that there is no easy way to do a), so we'll go
+ * with b).
+ * Note 1: Another workaround would have been to add %rsp in the list
+ * of clobbers, but this was deprecated in GCC 9.
+ * Note 2: This red-zone bug only manifests when compiling CRIU with
+ * DEBUG=1.
*/
- asm volatile ("pushq %%rbp\n\t"
- "mov %6, %%ebp\n\t"
- "int $0x80\n\t"
- "mov %%ebp, %6\n\t"
- "popq %%rbp\n\t"
- : "+a" (args->nr),
- "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
- "+S" (args->arg3), "+D" (args->arg4), "+g" (args->arg5)
- : : "r8", "r9", "r10", "r11");
+ uint32_t ret;
+
+ asm volatile ("sub $128, %%rsp\n\t"
+ "pushq %%rbp\n\t"
+ "mov %7, %%ebp\n\t"
+ "int $0x80\n\t"
+ "popq %%rbp\n\t"
+ "add $128, %%rsp\n\t"
+ : "=a" (ret)
+ : "a" (args->nr),
+ "b" (args->arg0), "c" (args->arg1), "d" (args->arg2),
+ "S" (args->arg3), "D" (args->arg4), "g" (args->arg5)
+ : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");
+ return ret;
}
#ifndef CR_NOGLIBC
diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c
index f7593251..94c954e1 100644
--- a/criu/arch/x86/kerndat.c
+++ b/criu/arch/x86/kerndat.c
@@ -75,9 +75,7 @@ void *mmap_ia32(void *addr, size_t len, int prot,
s.arg4 = fildes;
s.arg5 = (uint32_t)off;
- do_full_int80(&s);
-
- return (void *)(uintptr_t)s.nr;
+ return (void *)(uintptr_t)do_full_int80(&s);
}
/*
diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c
index 2d335d5e..b2c3b366 100644
--- a/criu/arch/x86/restorer.c
+++ b/criu/arch/x86/restorer.c
@@ -54,8 +54,7 @@ int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
.arg1 = len,
};
- do_full_int80(&s);
- return (int)s.nr;
+ return do_full_int80(&s);
}
static int prepare_stack32(void **stack32)
diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c
index b38ba801..f467da49 100644
--- a/criu/arch/x86/sigaction_compat.c
+++ b/criu/arch/x86/sigaction_compat.c
@@ -28,7 +28,6 @@ extern char restore_rt_sigaction;
*/
int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
{
- int ret;
struct syscall_args32 arg = {};
unsigned long act_stack = (unsigned long)stack32;
@@ -49,8 +48,5 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act)
arg.arg2 = 0; /* oldact */
arg.arg3 = (uint32_t)sizeof(act->rt_sa_mask); /* sigsetsize */
- do_full_int80(&arg);
- asm volatile ("\t movl %%eax,%0\n" : "=r"(ret));
- return ret;
+ return do_full_int80(&arg);
}
-
--
2.19.1
More information about the CRIU
mailing list