[CRIU] [PATCHv2 12/17] core/x86: add compatible 32 register set
Dmitry Safonov
dsafonov at virtuozzo.com
Tue Apr 12 09:10:50 PDT 2016
Introduced user_regs_struct32.
Other changes mainly are reforming existing code to use the new
register sets union.
For protobuf images - reuse user_x86_regs_entry for both compatible
and native tasks with enum in the beggining that describes register
set type. That's better and simpler, than introducing a new 32-bit
register set for compatible tasks. I tried to do this firstly with
oneof keyword:
https://github.com/0x7f454c46/criu/commit/499c93ae0e2b8ffb8c562f309bb046d77d6b07c0
But protobuf supports oneof keyword only from recent version 2.6.0,
so I tried to rework it into enum + 2 register sets:
https://github.com/0x7f454c46/criu/commit/aab4489bd4e0b1360b6e05614c2fce3ff2a52eb7
But that did not work either because restorer pie takes gpregs as
thread_restore_args parameter and UserRegsEntry shouldn't contain
pointers, but structure objects. This may be fixed by redefining
UserRegsEntry not as typedef for UserX86RegsEntry, but containing
needed objects, than treat it right for restorer - but that's
more complicated that reusing user_x86_regs_entry.
Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
Acked-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
criu/arch/x86/crtools.c | 187 ++++++++++++++++++++++++++++----------
criu/arch/x86/include/asm/dump.h | 3 +
criu/arch/x86/include/asm/types.h | 104 +++++++++++++++------
criu/parasite-syscall.c | 2 +
images/core-x86.proto | 61 +++++++------
5 files changed, 254 insertions(+), 103 deletions(-)
diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index 4a96d1d0b569..d259fc0269a5 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -26,6 +26,7 @@
/*
* Injected syscall instruction
*/
+/* FIXME: 32-bit syscalls */
const char code_syscall[] = {
0x0f, 0x05, /* syscall */
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
@@ -40,17 +41,21 @@ static inline __always_unused void __check_code_syscall(void)
BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
}
+/*
+ * regs must be inited when calling this function from original context
+ */
void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
{
- regs->ip = new_ip;
+ set_user_reg(regs, ip, new_ip);
if (stack)
- regs->sp = (unsigned long) stack;
+ set_user_reg(regs, sp, (unsigned long) stack);
/* Avoid end of syscall processing */
- regs->orig_ax = -1;
+ set_user_reg(regs, orig_ax, -1);
/* Make sure flags are in known state */
- regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF);
+ set_user_reg(regs, flags, get_user_reg(regs, flags) &
+ ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF));
}
int arch_task_compatible(pid_t pid)
@@ -58,14 +63,18 @@ int arch_task_compatible(pid_t pid)
unsigned long cs, ds;
errno = 0;
- cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct_t, cs), 0);
+ /*
+ * Offset of register must be from 64-bit set even for
+ * compatible tasks. Fix this to support native i386 tasks
+ */
+ cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, cs), 0);
if (errno != 0) {
pr_perror("Can't get CS register for %d", pid);
return -1;
}
errno = 0;
- ds = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct_t, ds), 0);
+ ds = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, ds), 0);
if (errno != 0) {
pr_perror("Can't get DS register for %d", pid);
return -1;
@@ -77,6 +86,7 @@ int arch_task_compatible(pid_t pid)
bool arch_can_dump_task(pid_t pid)
{
+ /* FIXME: remove it */
if (arch_task_compatible(pid)) {
pr_err("Can't dump task %d running in 32-bit mode\n", pid);
return false;
@@ -96,23 +106,42 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
user_regs_struct_t regs = ctl->orig.regs;
int err;
- regs.ax = (unsigned long)nr;
- regs.di = arg1;
- regs.si = arg2;
- regs.dx = arg3;
- regs.r10 = arg4;
- regs.r8 = arg5;
- regs.r9 = arg6;
+ if (regs.is_native) {
+ user_regs_struct64 *r = ®s.native;
+
+ r->ax = (uint64_t)nr;
+ r->di = arg1;
+ r->si = arg2;
+ r->dx = arg3;
+ r->r10 = arg4;
+ r->r8 = arg5;
+ r->r9 = arg6;
+ } else {
+ user_regs_struct32 *r = ®s.compat;
+
+ r->ax = (uint32_t)nr;
+ r->bx = arg1;
+ r->cx = arg2;
+ r->dx = arg3;
+ r->si = arg4;
+ r->di = arg5;
+ r->bp = arg6;
+ }
err = __parasite_execute_syscall(ctl, ®s, code_syscall);
- *ret = regs.ax;
+ *ret = get_user_reg(®s, ax);
return err;
}
+#define get_signed_user_reg(pregs, name) \
+ (((pregs)->is_native) ? (int64_t)((pregs)->native.name) : \
+ (int32_t)((pregs)->compat.name))
+
int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
{
struct xsave_struct xsave = { };
+ UserX86RegsEntry *gpregs = core->thread_info->gpregs;
struct iovec iov;
int ret = -1;
@@ -120,18 +149,18 @@ int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
pr_info("Dumping GP/FPU registers for %d\n", pid);
/* Did we come from a system call? */
- if ((int)regs.orig_ax >= 0) {
+ if (get_signed_user_reg(®s, orig_ax) >= 0) {
/* Restart the system call */
- switch ((long)(int)regs.ax) {
+ switch (get_signed_user_reg(®s, ax)) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
- regs.ax = regs.orig_ax;
- regs.ip -= 2;
+ set_user_reg(®s, ax, get_user_reg(®s, orig_ax));
+ set_user_reg(®s, ip, get_user_reg(®s, ip) - 2);
break;
case -ERESTART_RESTARTBLOCK:
pr_warn("Will restore %d with interrupted system call\n", pid);
- regs.ax = -EINTR;
+ set_user_reg(®s, ax, -EINTR);
break;
}
}
@@ -139,33 +168,55 @@ int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
#define assign_reg(dst, src, e) do { dst->e = (__typeof__(dst->e))src.e; } while (0)
#define assign_array(dst, src, e) memcpy(dst->e, &src.e, sizeof(src.e))
- assign_reg(core->thread_info->gpregs, regs, r15);
- assign_reg(core->thread_info->gpregs, regs, r14);
- assign_reg(core->thread_info->gpregs, regs, r13);
- assign_reg(core->thread_info->gpregs, regs, r12);
- assign_reg(core->thread_info->gpregs, regs, bp);
- assign_reg(core->thread_info->gpregs, regs, bx);
- assign_reg(core->thread_info->gpregs, regs, r11);
- assign_reg(core->thread_info->gpregs, regs, r10);
- assign_reg(core->thread_info->gpregs, regs, r9);
- assign_reg(core->thread_info->gpregs, regs, r8);
- assign_reg(core->thread_info->gpregs, regs, ax);
- assign_reg(core->thread_info->gpregs, regs, cx);
- assign_reg(core->thread_info->gpregs, regs, dx);
- assign_reg(core->thread_info->gpregs, regs, si);
- assign_reg(core->thread_info->gpregs, regs, di);
- assign_reg(core->thread_info->gpregs, regs, orig_ax);
- assign_reg(core->thread_info->gpregs, regs, ip);
- assign_reg(core->thread_info->gpregs, regs, cs);
- assign_reg(core->thread_info->gpregs, regs, flags);
- assign_reg(core->thread_info->gpregs, regs, sp);
- assign_reg(core->thread_info->gpregs, regs, ss);
- assign_reg(core->thread_info->gpregs, regs, fs_base);
- assign_reg(core->thread_info->gpregs, regs, gs_base);
- assign_reg(core->thread_info->gpregs, regs, ds);
- assign_reg(core->thread_info->gpregs, regs, es);
- assign_reg(core->thread_info->gpregs, regs, fs);
- assign_reg(core->thread_info->gpregs, regs, gs);
+ if (regs.is_native) {
+ assign_reg(gpregs, regs.native, r15);
+ assign_reg(gpregs, regs.native, r14);
+ assign_reg(gpregs, regs.native, r13);
+ assign_reg(gpregs, regs.native, r12);
+ assign_reg(gpregs, regs.native, bp);
+ assign_reg(gpregs, regs.native, bx);
+ assign_reg(gpregs, regs.native, r11);
+ assign_reg(gpregs, regs.native, r10);
+ assign_reg(gpregs, regs.native, r9);
+ assign_reg(gpregs, regs.native, r8);
+ assign_reg(gpregs, regs.native, ax);
+ assign_reg(gpregs, regs.native, cx);
+ assign_reg(gpregs, regs.native, dx);
+ assign_reg(gpregs, regs.native, si);
+ assign_reg(gpregs, regs.native, di);
+ assign_reg(gpregs, regs.native, orig_ax);
+ assign_reg(gpregs, regs.native, ip);
+ assign_reg(gpregs, regs.native, cs);
+ assign_reg(gpregs, regs.native, flags);
+ assign_reg(gpregs, regs.native, sp);
+ assign_reg(gpregs, regs.native, ss);
+ assign_reg(gpregs, regs.native, fs_base);
+ assign_reg(gpregs, regs.native, gs_base);
+ assign_reg(gpregs, regs.native, ds);
+ assign_reg(gpregs, regs.native, es);
+ assign_reg(gpregs, regs.native, fs);
+ assign_reg(gpregs, regs.native, gs);
+ gpregs->gpregs_case = USER_X86_REGS_CASE_T__NATIVE;
+ } else {
+ assign_reg(gpregs, regs.compat, bx);
+ assign_reg(gpregs, regs.compat, cx);
+ assign_reg(gpregs, regs.compat, dx);
+ assign_reg(gpregs, regs.compat, si);
+ assign_reg(gpregs, regs.compat, di);
+ assign_reg(gpregs, regs.compat, bp);
+ assign_reg(gpregs, regs.compat, ax);
+ assign_reg(gpregs, regs.compat, ds);
+ assign_reg(gpregs, regs.compat, es);
+ assign_reg(gpregs, regs.compat, fs);
+ assign_reg(gpregs, regs.compat, gs);
+ assign_reg(gpregs, regs.compat, orig_ax);
+ assign_reg(gpregs, regs.compat, ip);
+ assign_reg(gpregs, regs.compat, cs);
+ assign_reg(gpregs, regs.compat, flags);
+ assign_reg(gpregs, regs.compat, sp);
+ assign_reg(gpregs, regs.compat, ss);
+ gpregs->gpregs_case = USER_X86_REGS_CASE_T__COMPAT;
+ }
#ifndef PTRACE_GETREGSET
# define PTRACE_GETREGSET 0x4204
@@ -227,6 +278,44 @@ err:
return ret;
}
+int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs)
+{
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = ®s->native;
+ iov.iov_len = sizeof(user_regs_struct64);
+
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov);
+ if (iov.iov_len == sizeof(regs->native)) {
+ regs->is_native = true;
+ return ret;
+ }
+ if (iov.iov_len == sizeof(regs->compat)) {
+ regs->is_native = false;
+ return ret;
+ }
+
+ pr_err("PTRACE_GETREGSET read %zu bytes for pid %d, but native/compat regs sizes are %zu/%zu bytes",
+ iov.iov_len, pid,
+ sizeof(regs->native), sizeof(regs->compat));
+ return -1;
+}
+
+int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs)
+{
+ struct iovec iov;
+
+ if (regs->is_native) {
+ iov.iov_base = ®s->native;
+ iov.iov_len = sizeof(user_regs_struct64);
+ } else {
+ iov.iov_base = ®s->compat;
+ iov.iov_len = sizeof(user_regs_struct32);
+ }
+ return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
+}
+
int arch_alloc_thread_info(CoreEntry *core)
{
size_t sz;
@@ -461,6 +550,12 @@ void *mmap_seized(struct parasite_ctl *ctl,
int restore_gpregs(struct rt_sigframe *f, UserX86RegsEntry *r)
{
+ /* FIXME: rt_sigcontext for compatible tasks */
+ if (r->gpregs_case != USER_X86_REGS_CASE_T__NATIVE) {
+ pr_err("Can't prepare rt_sigframe for compatible task restore\n");
+ return -1;
+ }
+
#define CPREG1(d) f->uc.uc_mcontext.d = r->d
#define CPREG2(d, s) f->uc.uc_mcontext.d = r->s
diff --git a/criu/arch/x86/include/asm/dump.h b/criu/arch/x86/include/asm/dump.h
index 1505fd2983b0..02ec20042cb0 100644
--- a/criu/arch/x86/include/asm/dump.h
+++ b/criu/arch/x86/include/asm/dump.h
@@ -5,6 +5,9 @@ extern int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core);
extern int arch_alloc_thread_info(CoreEntry *core);
extern void arch_free_thread_info(CoreEntry *core);
+#define ARCH_HAS_GET_REGS
+extern int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs);
+extern int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs);
#define core_put_tls(core, tls)
diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h
index c8e76964e430..32b1c014afa1 100644
--- a/criu/arch/x86/include/asm/types.h
+++ b/criu/arch/x86/include/asm/types.h
@@ -61,34 +61,78 @@ typedef struct {
} user_desc_t;
typedef struct {
- unsigned long r15;
- unsigned long r14;
- unsigned long r13;
- unsigned long r12;
- unsigned long bp;
- unsigned long bx;
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
- unsigned long ax;
- unsigned long cx;
- unsigned long dx;
- unsigned long si;
- unsigned long di;
- unsigned long orig_ax;
- unsigned long ip;
- unsigned long cs;
- unsigned long flags;
- unsigned long sp;
- unsigned long ss;
- unsigned long fs_base;
- unsigned long gs_base;
- unsigned long ds;
- unsigned long es;
- unsigned long fs;
- unsigned long gs;
+ uint64_t r15;
+ uint64_t r14;
+ uint64_t r13;
+ uint64_t r12;
+ uint64_t bp;
+ uint64_t bx;
+ uint64_t r11;
+ uint64_t r10;
+ uint64_t r9;
+ uint64_t r8;
+ uint64_t ax;
+ uint64_t cx;
+ uint64_t dx;
+ uint64_t si;
+ uint64_t di;
+ uint64_t orig_ax;
+ uint64_t ip;
+ uint64_t cs;
+ uint64_t flags;
+ uint64_t sp;
+ uint64_t ss;
+ uint64_t fs_base;
+ uint64_t gs_base;
+ uint64_t ds;
+ uint64_t es;
+ uint64_t fs;
+ uint64_t gs;
+} user_regs_struct64;
+
+typedef struct {
+ uint32_t bx;
+ uint32_t cx;
+ uint32_t dx;
+ uint32_t si;
+ uint32_t di;
+ uint32_t bp;
+ uint32_t ax;
+ uint32_t ds;
+ uint32_t es;
+ uint32_t fs;
+ uint32_t gs;
+ uint32_t orig_ax;
+ uint32_t ip;
+ uint32_t cs;
+ uint32_t flags;
+ uint32_t sp;
+ uint32_t ss;
+} user_regs_struct32;
+
+#ifdef CONFIG_X86_64
+typedef struct {
+ union {
+ user_regs_struct64 native;
+ user_regs_struct32 compat;
+ };
+ bool is_native;
} user_regs_struct_t;
+#define get_user_reg(pregs, name) (((pregs)->is_native) ? \
+ ((pregs)->native.name) : \
+ ((pregs)->compat.name))
+#define set_user_reg(pregs, name, val) (((pregs)->is_native) ? \
+ ((pregs)->native.name = val) : \
+ ((pregs)->compat.name = val))
+#else
+typedef struct {
+ union {
+ user_regs_struct32 native;
+ };
+} user_regs_struct_t;
+#define get_user_reg(pregs, name) ((pregs)->native.name)
+#define set_user_reg(pregs, name, val) ((pregs)->native.name = val)
+#endif
typedef struct {
unsigned short cwd;
@@ -123,9 +167,9 @@ static inline unsigned long task_size() { return TASK_SIZE; }
typedef u64 auxv_t;
typedef u32 tls_t;
-#define REG_RES(regs) ((regs).ax)
-#define REG_IP(regs) ((regs).ip)
-#define REG_SYSCALL_NR(regs) ((regs).orig_ax)
+#define REG_RES(regs) get_user_reg(®s, ax)
+#define REG_IP(regs) get_user_reg(®s, ip)
+#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax)
#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__X86_64
diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c
index 845a4c3609ad..c6c600017c73 100644
--- a/criu/parasite-syscall.c
+++ b/criu/parasite-syscall.c
@@ -74,6 +74,7 @@ static struct vma_area *get_vma_by_ip(struct list_head *vma_area_list,
return NULL;
}
+#ifndef ARCH_HAS_GET_REGS
static inline int ptrace_get_regs(int pid, user_regs_struct_t *regs)
{
struct iovec iov;
@@ -91,6 +92,7 @@ static inline int ptrace_set_regs(int pid, user_regs_struct_t *regs)
iov.iov_len = sizeof(user_regs_struct_t);
return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
}
+#endif
static int get_thread_ctx(int pid, struct thread_ctx *ctx)
{
diff --git a/images/core-x86.proto b/images/core-x86.proto
index f88ec06a046a..04cf57f15aab 100644
--- a/images/core-x86.proto
+++ b/images/core-x86.proto
@@ -1,33 +1,40 @@
import "opts.proto";
+enum user_x86_regs_case_t {
+ NATIVE = 1;
+ COMPAT = 2;
+}
+
+/* Reusing entry for both 64 and 32 bits register sets */
message user_x86_regs_entry {
- required uint64 r15 = 1;
- required uint64 r14 = 2;
- required uint64 r13 = 3;
- required uint64 r12 = 4;
- required uint64 bp = 5;
- required uint64 bx = 6;
- required uint64 r11 = 7;
- required uint64 r10 = 8;
- required uint64 r9 = 9;
- required uint64 r8 = 10;
- required uint64 ax = 11;
- required uint64 cx = 12;
- required uint64 dx = 13;
- required uint64 si = 14;
- required uint64 di = 15;
- required uint64 orig_ax = 16;
- required uint64 ip = 17;
- required uint64 cs = 18;
- required uint64 flags = 19;
- required uint64 sp = 20;
- required uint64 ss = 21;
- required uint64 fs_base = 22;
- required uint64 gs_base = 23;
- required uint64 ds = 24;
- required uint64 es = 25;
- required uint64 fs = 26;
- required uint64 gs = 27;
+ required user_x86_regs_case_t gpregs_case = 1;
+ required uint64 r15 = 2;
+ required uint64 r14 = 3;
+ required uint64 r13 = 4;
+ required uint64 r12 = 5;
+ required uint64 bp = 6;
+ required uint64 bx = 7;
+ required uint64 r11 = 8;
+ required uint64 r10 = 9;
+ required uint64 r9 = 10;
+ required uint64 r8 = 11;
+ required uint64 ax = 12;
+ required uint64 cx = 13;
+ required uint64 dx = 14;
+ required uint64 si = 15;
+ required uint64 di = 16;
+ required uint64 orig_ax = 17;
+ required uint64 ip = 18;
+ required uint64 cs = 19;
+ required uint64 flags = 20;
+ required uint64 sp = 21;
+ required uint64 ss = 22;
+ required uint64 fs_base = 23;
+ required uint64 gs_base = 24;
+ required uint64 ds = 25;
+ required uint64 es = 26;
+ required uint64 fs = 27;
+ required uint64 gs = 28;
}
message user_x86_xsave_entry {
--
2.8.0
More information about the CRIU
mailing list