[CRIU] [PATCH 08/20] restorer: introduced the multiarch support into the restorer.
alekskartashov at parallels.com
alekskartashov at parallels.com
Wed Dec 12 08:34:18 EST 2012
From: Alexander Kartashov <alekskartashov at parallels.com>
* Introduced the macro RT_SIGFRAME_UC to access the user context of a sigframe.
* Introduced the function restore_fpregs to format the FPU area in a sigframe.
* Introduced TLS restoration for ARM.
* Introduced the macro ARCH_RT_SIGRETURN that calls sys_rt_sigreturn in
a machine-dependent way.
* Don't touch a VMA that lies in the virtual address space area 0..TASK_SIZE,
because VM above TASK_SIZE is read-only but some areas are mapped on ARM
into the process address space.
* Introduced the macro RUN_CLONE_RESTORE_FN to start a thread
in a machine-dependent way.
* Introduced the macro SIGFRAME_OFFSET to tune the beginning of a sigframe
on different architectures.
Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>
---
include/restorer.h | 114 +++++---------------------------------
pie/restorer.c | 154 +++++++++++++++-------------------------------------
2 files changed, 58 insertions(+), 210 deletions(-)
diff --git a/include/restorer.h b/include/restorer.h
index 9011830..ba569ae 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -16,10 +16,6 @@
#include "../protobuf/creds.pb-c.h"
#include "../protobuf/core.pb-c.h"
-#ifndef CONFIG_X86_64
-# error Only x86-64 is supported
-#endif
-
struct task_restore_core_args;
struct thread_restore_args;
@@ -66,7 +62,16 @@ struct thread_restore_args {
struct restore_mem_zone mem_zone;
int pid;
- UserX86RegsEntry gpregs;
+ UserRegsEntry gpregs;
+
+#ifdef ARCH_NEED_FP
+ UserFPState fpstate;
+#endif
+
+#ifdef CONFIG_HAS_TLS
+ u32 tls;
+#endif
+
u64 clear_tid_addr;
bool has_futex;
@@ -116,107 +121,12 @@ struct task_restore_core_args {
u32 mm_saved_auxv_size;
char comm[TASK_COMM_LEN];
TaskKobjIdsEntry ids;
+ uint32_t tls;
int *rst_tcp_socks;
int rst_tcp_socks_size;
} __aligned(sizeof(long));
-struct pt_regs {
- unsigned long r15;
- unsigned long r14;
- unsigned long r13;
- unsigned long r12;
- unsigned long bp;
- unsigned long bx;
-
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
- unsigned long ax;
- unsigned long cx;
- unsigned long dx;
- unsigned long si;
- unsigned long di;
- unsigned long orig_ax;
-
- unsigned long ip;
- unsigned long cs;
- unsigned long flags;
- unsigned long sp;
- unsigned long ss;
-};
-
-struct rt_sigcontext {
- unsigned long r8;
- unsigned long r9;
- unsigned long r10;
- unsigned long r11;
- unsigned long r12;
- unsigned long r13;
- unsigned long r14;
- unsigned long r15;
- unsigned long rdi;
- unsigned long rsi;
- unsigned long rbp;
- unsigned long rbx;
- unsigned long rdx;
- unsigned long rax;
- unsigned long rcx;
- unsigned long rsp;
- unsigned long rip;
- unsigned long eflags;
- unsigned short cs;
- unsigned short gs;
- unsigned short fs;
- unsigned short __pad0;
- unsigned long err;
- unsigned long trapno;
- unsigned long oldmask;
- unsigned long cr2;
- struct user_fpregs_entry *fpstate;
- unsigned long reserved1[8];
-};
-
-#ifndef __ARCH_SI_PREAMBLE_SIZE
-#define __ARCH_SI_PREAMBLE_SIZE (3 * sizeof(int))
-#endif
-
-#define SI_MAX_SIZE 128
-#ifndef SI_PAD_SIZE
-#define SI_PAD_SIZE ((SI_MAX_SIZE - __ARCH_SI_PREAMBLE_SIZE) / sizeof(int))
-#endif
-
-typedef struct rt_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
- int _pad[SI_PAD_SIZE];
-} rt_siginfo_t;
-
-typedef struct rt_sigaltstack {
- void *ss_sp;
- int ss_flags;
- size_t ss_size;
-} rt_stack_t;
-
-struct rt_ucontext {
- unsigned long uc_flags;
- struct rt_ucontext *uc_link;
- rt_stack_t uc_stack;
- struct rt_sigcontext uc_mcontext;
- rt_sigset_t uc_sigmask; /* mask last for extensibility */
-};
-
-struct rt_sigframe {
- char *pretcode;
- struct rt_ucontext uc;
- struct rt_siginfo info;
-
- /* fp state follows here */
-};
-
-
#define SHMEMS_SIZE 4096
/*
@@ -285,4 +195,6 @@ find_shmem(struct shmems *shmems, unsigned long shmid)
(vma_entry_is(vma, VMA_ANON_PRIVATE) || \
vma_entry_is(vma, VMA_FILE_PRIVATE)))
+#include <memcpy_64.h>
+
#endif /* CR_RESTORER_H__ */
diff --git a/pie/restorer.c b/pie/restorer.c
index b6211f1..ec1ceb2 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -27,6 +27,8 @@
#include "creds.pb-c.h"
+#include <restorer_private.h>
+
#define sys_prctl_safe(opcode, val1, val2, val3) \
({ \
long __ret = sys_prctl(opcode, val1, val2, val3, 0); \
@@ -141,56 +143,12 @@ static void restore_sched_info(struct rst_sched_param *p)
sys_sched_setscheduler(0, p->policy, &parm);
}
-static int restore_gpregs(struct rt_sigframe *f, UserX86RegsEntry *r)
-{
- long ret;
- unsigned long fsgs_base;
-
-#define CPREG1(d) f->uc.uc_mcontext.d = r->d
-#define CPREG2(d, s) f->uc.uc_mcontext.d = r->s
-
- CPREG1(r8);
- CPREG1(r9);
- CPREG1(r10);
- CPREG1(r11);
- CPREG1(r12);
- CPREG1(r13);
- CPREG1(r14);
- CPREG1(r15);
- CPREG2(rdi, di);
- CPREG2(rsi, si);
- CPREG2(rbp, bp);
- CPREG2(rbx, bx);
- CPREG2(rdx, dx);
- CPREG2(rax, ax);
- CPREG2(rcx, cx);
- CPREG2(rsp, sp);
- CPREG2(rip, ip);
- CPREG2(eflags, flags);
- CPREG1(cs);
- CPREG1(gs);
- CPREG1(fs);
-
- fsgs_base = r->fs_base;
- ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
- if (ret) {
- pr_info("SET_FS fail %ld\n", ret);
- return -1;
- }
-
- fsgs_base = r->gs_base;
- ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
- if (ret) {
- pr_info("SET_GS fail %ld\n", ret);
- return -1;
- }
-
- return 0;
-}
static int restore_thread_common(struct rt_sigframe *sigframe,
struct thread_restore_args *args)
{
+ int ret;
+
sys_set_tid_address((int *)args->clear_tid_addr);
if (args->has_futex) {
@@ -201,11 +159,20 @@ static int restore_thread_common(struct rt_sigframe *sigframe,
}
if (args->has_blk_sigset)
- sigframe->uc.uc_sigmask.sig[0] = args->blk_sigset;
+ RT_SIGFRAME_UC(sigframe).uc_sigmask.sig[0] = args->blk_sigset;
restore_sched_info(&args->sp);
- return restore_gpregs(sigframe, &args->gpregs);
+ ret = restore_gpregs(sigframe, &args->gpregs);
+ if (ret) {
+ return ret;
+ }
+
+#ifdef ARCH_NEED_FP
+ ret = restore_fpregs(sigframe, &args->fpstate);
+#endif
+
+ return ret;
}
/*
@@ -232,6 +199,9 @@ long __export_restore_thread(struct thread_restore_args *args)
restore_creds(&args->ta->creds);
+#ifdef CONFIG_HAS_TLS
+ restore_tls(args->tls);
+#endif
pr_info("%ld: Restored\n", sys_gettid());
@@ -240,15 +210,9 @@ long __export_restore_thread(struct thread_restore_args *args)
futex_dec_and_wake(&thread_inprogress);
- new_sp = (long)rt_sigframe + 8;
- asm volatile(
- "movq %0, %%rax \n"
- "movq %%rax, %%rsp \n"
- "movl $"__stringify(__NR_rt_sigreturn)", %%eax \n"
- "syscall \n"
- :
- : "r"(new_sp)
- : "rax","rsp","memory");
+ new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;
+ ARCH_RT_SIGRETURN(new_sp);
+
core_restore_end:
pr_err("Restorer abnormal termination for %ld\n", sys_getpid());
sys_exit_group(1);
@@ -448,6 +412,10 @@ long __export_restore_task(struct task_restore_core_args *args)
}
}
+ if (vma_entry->end >= TASK_SIZE) {
+ continue;
+ }
+
if (vma_entry->end > premmapped_end) {
if (vma_entry->start < premmapped_end)
addr = premmapped_end;
@@ -470,6 +438,10 @@ long __export_restore_task(struct task_restore_core_args *args)
if (!vma_priv(vma_entry))
continue;
+ if (vma_entry->end >= TASK_SIZE) {
+ continue;
+ }
+
if (vma_entry->start > vma_entry->shmid)
break;
@@ -487,6 +459,10 @@ long __export_restore_task(struct task_restore_core_args *args)
if (!vma_priv(vma_entry))
continue;
+ if (vma_entry->start > TASK_SIZE) {
+ continue;
+ }
+
if (vma_entry->start < vma_entry->shmid)
break;
@@ -678,41 +654,8 @@ long __export_restore_task(struct task_restore_core_args *args)
* thread will run with own stack and we must not
* have any additional instructions... oh, dear...
*/
- asm volatile(
- "clone_emul: \n"
- "movq %2, %%rsi \n"
- "subq $16, %%rsi \n"
- "movq %6, %%rdi \n"
- "movq %%rdi, 8(%%rsi) \n"
- "movq %5, %%rdi \n"
- "movq %%rdi, 0(%%rsi) \n"
- "movq %1, %%rdi \n"
- "movq %3, %%rdx \n"
- "movq %4, %%r10 \n"
- "movl $"__stringify(__NR_clone)", %%eax \n"
- "syscall \n"
-
- "testq %%rax,%%rax \n"
- "jz thread_run \n"
-
- "movq %%rax, %0 \n"
- "jmp clone_end \n"
-
- "thread_run: \n" /* new stack here */
- "xorq %%rbp, %%rbp \n" /* clear ABI frame pointer */
- "popq %%rax \n" /* clone_restore_fn -- restore_thread */
- "popq %%rdi \n" /* arguments */
- "callq *%%rax \n"
-
- "clone_end: \n"
- : "=r"(ret)
- : "g"(clone_flags),
- "g"(new_sp),
- "g"(&parent_tid),
- "g"(&thread_args[i].pid),
- "g"(args->clone_restore_fn),
- "g"(&thread_args[i])
- : "rax", "rdi", "rsi", "rdx", "r10", "memory");
+
+ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn);
}
ret = sys_flock(fd, LOCK_UN);
@@ -766,28 +709,26 @@ long __export_restore_task(struct task_restore_core_args *args)
ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE);
if (ret < 0) {
- ret = ((long)__LINE__ << 32) | -ret;
+ ret = ((long)__LINE__ << 16) | ((-ret) & 0xffff);
goto core_restore_failed;
}
/*
* Sigframe stack.
*/
- new_sp = (long)rt_sigframe + 8;
+ new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;
/*
* Prepare the stack and call for sigreturn,
* pure assembly since we don't need any additional
* code insns from gcc.
*/
- asm volatile(
- "movq %0, %%rax \n"
- "movq %%rax, %%rsp \n"
- "movl $"__stringify(__NR_rt_sigreturn)", %%eax \n"
- "syscall \n"
- :
- : "r"(new_sp)
- : "rax","rsp","memory");
+
+#ifdef CONFIG_HAS_TLS
+ restore_tls(args->tls);
+#endif
+
+ ARCH_RT_SIGRETURN(new_sp);
core_restore_end:
pr_err("Restorer fail %ld\n", sys_getpid());
@@ -795,12 +736,7 @@ core_restore_end:
return -1;
core_restore_failed:
- asm volatile(
- "movq %0, %%rsp \n"
- "movq 0, %%rax \n"
- "jmp *%%rax \n"
- :
- : "r"(ret)
- : "memory");
+ ARCH_FAIL_CORE_RESTORE;
+
return ret;
}
--
1.7.9.5
More information about the CRIU
mailing list