[CRIU] [PATCHv3 12/17] core/x86: add compatible 32 register set

Dmitry Safonov dsafonov at virtuozzo.com
Mon Apr 18 11:02:33 PDT 2016


Introduced user_regs_struct32.
Other changes mainly are reforming existing code to use the new
register sets union.

For protobuf images - reuse user_x86_regs_entry for both compatible
and native tasks with enum in the beggining that describes register
set type. That's better and simpler, than introducing a new 32-bit
register set for compatible tasks. I tried to do this firstly with
oneof keyword:
https://github.com/0x7f454c46/criu/commit/499c93ae0e2b8ffb8c562f309bb046d77d6b07c0
But protobuf supports oneof keyword only from recent version 2.6.0,
so I tried to rework it into enum + 2 register sets:
https://github.com/0x7f454c46/criu/commit/aab4489bd4e0b1360b6e05614c2fce3ff2a52eb7

But that did not work either because restorer pie takes gpregs as
thread_restore_args parameter and UserRegsEntry shouldn't contain
pointers, but structure objects. This may be fixed by redefining
UserRegsEntry not as typedef for UserX86RegsEntry, but containing
needed objects, than treat it right for restorer - but that's
more complicated that reusing user_x86_regs_entry.

Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
---
v2:
  To keep backward compatibility, made gpregs_case optional and the last
  member of user_x86_regs_entry.
  Made NATIVE type it's default value, so I'll omit something like that in code:
+	if (r->has_gpregs_case == false)
+		r->gpregs_case = USER_X86_REGS_CASE_T__NATIVE;

 criu/arch/x86/crtools.c           | 187 ++++++++++++++++++++++++++++----------
 criu/arch/x86/include/asm/dump.h  |   3 +
 criu/arch/x86/include/asm/types.h | 104 +++++++++++++++------
 criu/parasite-syscall.c           |   2 +
 images/core-x86.proto             |   7 ++
 5 files changed, 227 insertions(+), 76 deletions(-)

diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index 4a96d1d0b569..d259fc0269a5 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -26,6 +26,7 @@
 /*
  * Injected syscall instruction
  */
+/* FIXME: 32-bit syscalls */
 const char code_syscall[] = {
 	0x0f, 0x05,				/* syscall    */
 	0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc	/* int 3, ... */
@@ -40,17 +41,21 @@ static inline __always_unused void __check_code_syscall(void)
 	BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
 }
 
+/*
+ * regs must be inited when calling this function from original context
+ */
 void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
 {
-	regs->ip = new_ip;
+	set_user_reg(regs, ip, new_ip);
 	if (stack)
-		regs->sp = (unsigned long) stack;
+		set_user_reg(regs, sp, (unsigned long) stack);
 
 	/* Avoid end of syscall processing */
-	regs->orig_ax = -1;
+	set_user_reg(regs, orig_ax, -1);
 
 	/* Make sure flags are in known state */
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF);
+	set_user_reg(regs, flags, get_user_reg(regs, flags) &
+			~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF));
 }
 
 int arch_task_compatible(pid_t pid)
@@ -58,14 +63,18 @@ int arch_task_compatible(pid_t pid)
 	unsigned long cs, ds;
 
 	errno = 0;
-	cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct_t, cs), 0);
+	/*
+	 * Offset of register must be from 64-bit set even for
+	 * compatible tasks. Fix this to support native i386 tasks
+	 */
+	cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, cs), 0);
 	if (errno != 0) {
 		pr_perror("Can't get CS register for %d", pid);
 		return -1;
 	}
 
 	errno = 0;
-	ds = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct_t, ds), 0);
+	ds = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, ds), 0);
 	if (errno != 0) {
 		pr_perror("Can't get DS register for %d", pid);
 		return -1;
@@ -77,6 +86,7 @@ int arch_task_compatible(pid_t pid)
 
 bool arch_can_dump_task(pid_t pid)
 {
+	/* FIXME: remove it */
 	if (arch_task_compatible(pid)) {
 		pr_err("Can't dump task %d running in 32-bit mode\n", pid);
 		return false;
@@ -96,23 +106,42 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
 	user_regs_struct_t regs = ctl->orig.regs;
 	int err;
 
-	regs.ax  = (unsigned long)nr;
-	regs.di  = arg1;
-	regs.si  = arg2;
-	regs.dx  = arg3;
-	regs.r10 = arg4;
-	regs.r8  = arg5;
-	regs.r9  = arg6;
+	if (regs.is_native) {
+		user_regs_struct64 *r = &regs.native;
+
+		r->ax  = (uint64_t)nr;
+		r->di  = arg1;
+		r->si  = arg2;
+		r->dx  = arg3;
+		r->r10 = arg4;
+		r->r8  = arg5;
+		r->r9  = arg6;
+	} else {
+		user_regs_struct32 *r = &regs.compat;
+
+		r->ax  = (uint32_t)nr;
+		r->bx  = arg1;
+		r->cx  = arg2;
+		r->dx  = arg3;
+		r->si  = arg4;
+		r->di  = arg5;
+		r->bp  = arg6;
+	}
 
 	err = __parasite_execute_syscall(ctl, &regs, code_syscall);
 
-	*ret = regs.ax;
+	*ret = get_user_reg(&regs, ax);
 	return err;
 }
 
+#define get_signed_user_reg(pregs, name)				\
+	(((pregs)->is_native) ? (int64_t)((pregs)->native.name) :	\
+				(int32_t)((pregs)->compat.name))
+
 int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
 {
 	struct xsave_struct xsave	= {  };
+	UserX86RegsEntry *gpregs	= core->thread_info->gpregs;
 
 	struct iovec iov;
 	int ret = -1;
@@ -120,18 +149,18 @@ int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
 	pr_info("Dumping GP/FPU registers for %d\n", pid);
 
 	/* Did we come from a system call? */
-	if ((int)regs.orig_ax >= 0) {
+	if (get_signed_user_reg(&regs, orig_ax) >= 0) {
 		/* Restart the system call */
-		switch ((long)(int)regs.ax) {
+		switch (get_signed_user_reg(&regs, ax)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
-			regs.ax = regs.orig_ax;
-			regs.ip -= 2;
+			set_user_reg(&regs, ax, get_user_reg(&regs, orig_ax));
+			set_user_reg(&regs, ip, get_user_reg(&regs, ip) - 2);
 			break;
 		case -ERESTART_RESTARTBLOCK:
 			pr_warn("Will restore %d with interrupted system call\n", pid);
-			regs.ax = -EINTR;
+			set_user_reg(&regs, ax, -EINTR);
 			break;
 		}
 	}
@@ -139,33 +168,55 @@ int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
 #define assign_reg(dst, src, e)		do { dst->e = (__typeof__(dst->e))src.e; } while (0)
 #define assign_array(dst, src, e)	memcpy(dst->e, &src.e, sizeof(src.e))
 
-	assign_reg(core->thread_info->gpregs, regs, r15);
-	assign_reg(core->thread_info->gpregs, regs, r14);
-	assign_reg(core->thread_info->gpregs, regs, r13);
-	assign_reg(core->thread_info->gpregs, regs, r12);
-	assign_reg(core->thread_info->gpregs, regs, bp);
-	assign_reg(core->thread_info->gpregs, regs, bx);
-	assign_reg(core->thread_info->gpregs, regs, r11);
-	assign_reg(core->thread_info->gpregs, regs, r10);
-	assign_reg(core->thread_info->gpregs, regs, r9);
-	assign_reg(core->thread_info->gpregs, regs, r8);
-	assign_reg(core->thread_info->gpregs, regs, ax);
-	assign_reg(core->thread_info->gpregs, regs, cx);
-	assign_reg(core->thread_info->gpregs, regs, dx);
-	assign_reg(core->thread_info->gpregs, regs, si);
-	assign_reg(core->thread_info->gpregs, regs, di);
-	assign_reg(core->thread_info->gpregs, regs, orig_ax);
-	assign_reg(core->thread_info->gpregs, regs, ip);
-	assign_reg(core->thread_info->gpregs, regs, cs);
-	assign_reg(core->thread_info->gpregs, regs, flags);
-	assign_reg(core->thread_info->gpregs, regs, sp);
-	assign_reg(core->thread_info->gpregs, regs, ss);
-	assign_reg(core->thread_info->gpregs, regs, fs_base);
-	assign_reg(core->thread_info->gpregs, regs, gs_base);
-	assign_reg(core->thread_info->gpregs, regs, ds);
-	assign_reg(core->thread_info->gpregs, regs, es);
-	assign_reg(core->thread_info->gpregs, regs, fs);
-	assign_reg(core->thread_info->gpregs, regs, gs);
+	if (regs.is_native) {
+		assign_reg(gpregs, regs.native, r15);
+		assign_reg(gpregs, regs.native, r14);
+		assign_reg(gpregs, regs.native, r13);
+		assign_reg(gpregs, regs.native, r12);
+		assign_reg(gpregs, regs.native, bp);
+		assign_reg(gpregs, regs.native, bx);
+		assign_reg(gpregs, regs.native, r11);
+		assign_reg(gpregs, regs.native, r10);
+		assign_reg(gpregs, regs.native, r9);
+		assign_reg(gpregs, regs.native, r8);
+		assign_reg(gpregs, regs.native, ax);
+		assign_reg(gpregs, regs.native, cx);
+		assign_reg(gpregs, regs.native, dx);
+		assign_reg(gpregs, regs.native, si);
+		assign_reg(gpregs, regs.native, di);
+		assign_reg(gpregs, regs.native, orig_ax);
+		assign_reg(gpregs, regs.native, ip);
+		assign_reg(gpregs, regs.native, cs);
+		assign_reg(gpregs, regs.native, flags);
+		assign_reg(gpregs, regs.native, sp);
+		assign_reg(gpregs, regs.native, ss);
+		assign_reg(gpregs, regs.native, fs_base);
+		assign_reg(gpregs, regs.native, gs_base);
+		assign_reg(gpregs, regs.native, ds);
+		assign_reg(gpregs, regs.native, es);
+		assign_reg(gpregs, regs.native, fs);
+		assign_reg(gpregs, regs.native, gs);
+		gpregs->gpregs_case = USER_X86_REGS_CASE_T__NATIVE;
+	} else {
+		assign_reg(gpregs, regs.compat, bx);
+		assign_reg(gpregs, regs.compat, cx);
+		assign_reg(gpregs, regs.compat, dx);
+		assign_reg(gpregs, regs.compat, si);
+		assign_reg(gpregs, regs.compat, di);
+		assign_reg(gpregs, regs.compat, bp);
+		assign_reg(gpregs, regs.compat, ax);
+		assign_reg(gpregs, regs.compat, ds);
+		assign_reg(gpregs, regs.compat, es);
+		assign_reg(gpregs, regs.compat, fs);
+		assign_reg(gpregs, regs.compat, gs);
+		assign_reg(gpregs, regs.compat, orig_ax);
+		assign_reg(gpregs, regs.compat, ip);
+		assign_reg(gpregs, regs.compat, cs);
+		assign_reg(gpregs, regs.compat, flags);
+		assign_reg(gpregs, regs.compat, sp);
+		assign_reg(gpregs, regs.compat, ss);
+		gpregs->gpregs_case = USER_X86_REGS_CASE_T__COMPAT;
+	}
 
 #ifndef PTRACE_GETREGSET
 # define PTRACE_GETREGSET 0x4204
@@ -227,6 +278,44 @@ err:
 	return ret;
 }
 
+int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs)
+{
+	struct iovec iov;
+	int ret;
+
+	iov.iov_base = &regs->native;
+	iov.iov_len = sizeof(user_regs_struct64);
+
+	ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov);
+	if (iov.iov_len == sizeof(regs->native)) {
+		regs->is_native = true;
+		return ret;
+	}
+	if (iov.iov_len == sizeof(regs->compat)) {
+		regs->is_native = false;
+		return ret;
+	}
+
+	pr_err("PTRACE_GETREGSET read %zu bytes for pid %d, but native/compat regs sizes are %zu/%zu bytes",
+			iov.iov_len, pid,
+			sizeof(regs->native), sizeof(regs->compat));
+	return -1;
+}
+
+int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs)
+{
+	struct iovec iov;
+
+	if (regs->is_native) {
+		iov.iov_base = &regs->native;
+		iov.iov_len = sizeof(user_regs_struct64);
+	} else {
+		iov.iov_base = &regs->compat;
+		iov.iov_len = sizeof(user_regs_struct32);
+	}
+	return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
+}
+
 int arch_alloc_thread_info(CoreEntry *core)
 {
 	size_t sz;
@@ -461,6 +550,12 @@ void *mmap_seized(struct parasite_ctl *ctl,
 
 int restore_gpregs(struct rt_sigframe *f, UserX86RegsEntry *r)
 {
+	/* FIXME: rt_sigcontext for compatible tasks */
+	if (r->gpregs_case != USER_X86_REGS_CASE_T__NATIVE) {
+		pr_err("Can't prepare rt_sigframe for compatible task restore\n");
+		return -1;
+	}
+
 #define CPREG1(d)	f->uc.uc_mcontext.d = r->d
 #define CPREG2(d, s)	f->uc.uc_mcontext.d = r->s
 
diff --git a/criu/arch/x86/include/asm/dump.h b/criu/arch/x86/include/asm/dump.h
index 1505fd2983b0..02ec20042cb0 100644
--- a/criu/arch/x86/include/asm/dump.h
+++ b/criu/arch/x86/include/asm/dump.h
@@ -5,6 +5,9 @@ extern int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core);
 extern int arch_alloc_thread_info(CoreEntry *core);
 extern void arch_free_thread_info(CoreEntry *core);
 
+#define ARCH_HAS_GET_REGS
+extern int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs);
+extern int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs);
 
 #define core_put_tls(core, tls)
 
diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h
index c8e76964e430..32b1c014afa1 100644
--- a/criu/arch/x86/include/asm/types.h
+++ b/criu/arch/x86/include/asm/types.h
@@ -61,34 +61,78 @@ typedef struct {
 } user_desc_t;
 
 typedef struct {
-	unsigned long	r15;
-	unsigned long	r14;
-	unsigned long	r13;
-	unsigned long	r12;
-	unsigned long	bp;
-	unsigned long	bx;
-	unsigned long	r11;
-	unsigned long	r10;
-	unsigned long	r9;
-	unsigned long	r8;
-	unsigned long	ax;
-	unsigned long	cx;
-	unsigned long	dx;
-	unsigned long	si;
-	unsigned long	di;
-	unsigned long	orig_ax;
-	unsigned long	ip;
-	unsigned long	cs;
-	unsigned long	flags;
-	unsigned long	sp;
-	unsigned long	ss;
-	unsigned long	fs_base;
-	unsigned long	gs_base;
-	unsigned long	ds;
-	unsigned long	es;
-	unsigned long	fs;
-	unsigned long	gs;
+	uint64_t	r15;
+	uint64_t	r14;
+	uint64_t	r13;
+	uint64_t	r12;
+	uint64_t	bp;
+	uint64_t	bx;
+	uint64_t	r11;
+	uint64_t	r10;
+	uint64_t	r9;
+	uint64_t	r8;
+	uint64_t	ax;
+	uint64_t	cx;
+	uint64_t	dx;
+	uint64_t	si;
+	uint64_t	di;
+	uint64_t	orig_ax;
+	uint64_t	ip;
+	uint64_t	cs;
+	uint64_t	flags;
+	uint64_t	sp;
+	uint64_t	ss;
+	uint64_t	fs_base;
+	uint64_t	gs_base;
+	uint64_t	ds;
+	uint64_t	es;
+	uint64_t	fs;
+	uint64_t	gs;
+} user_regs_struct64;
+
+typedef struct {
+	uint32_t	bx;
+	uint32_t	cx;
+	uint32_t	dx;
+	uint32_t	si;
+	uint32_t	di;
+	uint32_t	bp;
+	uint32_t	ax;
+	uint32_t	ds;
+	uint32_t	es;
+	uint32_t	fs;
+	uint32_t	gs;
+	uint32_t	orig_ax;
+	uint32_t	ip;
+	uint32_t	cs;
+	uint32_t	flags;
+	uint32_t	sp;
+	uint32_t	ss;
+} user_regs_struct32;
+
+#ifdef CONFIG_X86_64
+typedef struct {
+	union {
+		user_regs_struct64 native;
+		user_regs_struct32 compat;
+	};
+	bool is_native;
 } user_regs_struct_t;
+#define get_user_reg(pregs, name) (((pregs)->is_native) ?	\
+		((pregs)->native.name) :			\
+		((pregs)->compat.name))
+#define set_user_reg(pregs, name, val) (((pregs)->is_native) ?	\
+		((pregs)->native.name = val) :			\
+		((pregs)->compat.name = val))
+#else
+typedef struct {
+	union {
+		user_regs_struct32 native;
+	};
+} user_regs_struct_t;
+#define get_user_reg(pregs, name) ((pregs)->native.name)
+#define set_user_reg(pregs, name, val) ((pregs)->native.name = val)
+#endif
 
 typedef struct {
 	unsigned short	cwd;
@@ -123,9 +167,9 @@ static inline unsigned long task_size() { return TASK_SIZE; }
 typedef u64 auxv_t;
 typedef u32 tls_t;
 
-#define REG_RES(regs) ((regs).ax)
-#define REG_IP(regs)  ((regs).ip)
-#define REG_SYSCALL_NR(regs)	((regs).orig_ax)
+#define REG_RES(regs)		get_user_reg(&regs, ax)
+#define REG_IP(regs)		get_user_reg(&regs, ip)
+#define REG_SYSCALL_NR(regs)	get_user_reg(&regs, orig_ax)
 
 #define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__X86_64
 
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
index 845a4c3609ad..c6c600017c73 100644
--- a/criu/parasite-syscall.c
+++ b/criu/parasite-syscall.c
@@ -74,6 +74,7 @@ static struct vma_area *get_vma_by_ip(struct list_head *vma_area_list,
 	return NULL;
 }
 
+#ifndef ARCH_HAS_GET_REGS
 static inline int ptrace_get_regs(int pid, user_regs_struct_t *regs)
 {
 	struct iovec iov;
@@ -91,6 +92,7 @@ static inline int ptrace_set_regs(int pid, user_regs_struct_t *regs)
 	iov.iov_len = sizeof(user_regs_struct_t);
 	return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
 }
+#endif
 
 static int get_thread_ctx(int pid, struct thread_ctx *ctx)
 {
diff --git a/images/core-x86.proto b/images/core-x86.proto
index f88ec06a046a..0aca778c0428 100644
--- a/images/core-x86.proto
+++ b/images/core-x86.proto
@@ -1,5 +1,11 @@
 import "opts.proto";
 
+enum user_x86_regs_case_t {
+	NATIVE = 1;
+	COMPAT = 2;
+}
+
+/* Reusing entry for both 64 and 32 bits register sets */
 message user_x86_regs_entry {
 	required uint64			r15		=  1;
 	required uint64			r14		=  2;
@@ -28,6 +34,7 @@ message user_x86_regs_entry {
 	required uint64			es		= 25;
 	required uint64			fs		= 26;
 	required uint64			gs		= 27;
+	optional user_x86_regs_case_t	gpregs_case	= 28 [default = NATIVE];
 }
 
 message user_x86_xsave_entry {
-- 
2.8.0



More information about the CRIU mailing list