[CRIU] [PATCH 08/20] restorer: introduced the multiarch support into the restorer.

alekskartashov at parallels.com alekskartashov at parallels.com
Wed Dec 12 08:34:18 EST 2012


From: Alexander Kartashov <alekskartashov at parallels.com>

* Introduced the macro RT_SIGFRAME_UC to access the user context of a sigframe.
* Introduced the function restore_fpregs to format the FPU area in a sigframe.
* Introduced TLS restoration for ARM.
* Introduced the macro ARCH_RT_SIGRETURN that calls sys_rt_sigreturn in
  a machine-dependent way.
* Don't touch a VMA that lies in the virtual address space area 0..TASK_SIZE,
  because VM above TASK_SIZE is read-only but some areas are mapped on ARM
  into the process address space.
* Introduced the macro RUN_CLONE_RESTORE_FN to start a thread
  in a machine-dependent way.
* Introduced the macro SIGFRAME_OFFSET to tune the beginning of a sigframe
  on different architectures.

Signed-off-by: Alexander Kartashov <alekskartashov at parallels.com>
---
 include/restorer.h |  114 +++++---------------------------------
 pie/restorer.c     |  154 +++++++++++++++-------------------------------------
 2 files changed, 58 insertions(+), 210 deletions(-)

diff --git a/include/restorer.h b/include/restorer.h
index 9011830..ba569ae 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -16,10 +16,6 @@
 #include "../protobuf/creds.pb-c.h"
 #include "../protobuf/core.pb-c.h"
 
-#ifndef CONFIG_X86_64
-# error Only x86-64 is supported
-#endif
-
 struct task_restore_core_args;
 struct thread_restore_args;
 
@@ -66,7 +62,16 @@ struct thread_restore_args {
 	struct restore_mem_zone		mem_zone;
 
 	int				pid;
-	UserX86RegsEntry		gpregs;
+	UserRegsEntry		        gpregs;
+
+#ifdef ARCH_NEED_FP
+	UserFPState			fpstate;
+#endif
+
+#ifdef CONFIG_HAS_TLS
+	u32				tls;
+#endif
+
 	u64				clear_tid_addr;
 
 	bool				has_futex;
@@ -116,107 +121,12 @@ struct task_restore_core_args {
 	u32				mm_saved_auxv_size;
 	char				comm[TASK_COMM_LEN];
 	TaskKobjIdsEntry		ids;
+	uint32_t                        tls;
 
 	int				*rst_tcp_socks;
 	int				rst_tcp_socks_size;
 } __aligned(sizeof(long));
 
-struct pt_regs {
-	unsigned long	r15;
-	unsigned long	r14;
-	unsigned long	r13;
-	unsigned long	r12;
-	unsigned long	bp;
-	unsigned long	bx;
-
-	unsigned long	r11;
-	unsigned long	r10;
-	unsigned long	r9;
-	unsigned long	r8;
-	unsigned long	ax;
-	unsigned long	cx;
-	unsigned long	dx;
-	unsigned long	si;
-	unsigned long	di;
-	unsigned long	orig_ax;
-
-	unsigned long	ip;
-	unsigned long	cs;
-	unsigned long	flags;
-	unsigned long	sp;
-	unsigned long	ss;
-};
-
-struct rt_sigcontext {
-	unsigned long			r8;
-	unsigned long			r9;
-	unsigned long			r10;
-	unsigned long			r11;
-	unsigned long			r12;
-	unsigned long			r13;
-	unsigned long			r14;
-	unsigned long			r15;
-	unsigned long			rdi;
-	unsigned long			rsi;
-	unsigned long			rbp;
-	unsigned long			rbx;
-	unsigned long			rdx;
-	unsigned long			rax;
-	unsigned long			rcx;
-	unsigned long			rsp;
-	unsigned long			rip;
-	unsigned long			eflags;
-	unsigned short			cs;
-	unsigned short			gs;
-	unsigned short			fs;
-	unsigned short			__pad0;
-	unsigned long			err;
-	unsigned long			trapno;
-	unsigned long			oldmask;
-	unsigned long			cr2;
-	struct user_fpregs_entry	*fpstate;
-	unsigned long			reserved1[8];
-};
-
-#ifndef __ARCH_SI_PREAMBLE_SIZE
-#define __ARCH_SI_PREAMBLE_SIZE	(3 * sizeof(int))
-#endif
-
-#define SI_MAX_SIZE	128
-#ifndef SI_PAD_SIZE
-#define SI_PAD_SIZE	((SI_MAX_SIZE - __ARCH_SI_PREAMBLE_SIZE) / sizeof(int))
-#endif
-
-typedef struct rt_siginfo {
-	int	si_signo;
-	int	si_errno;
-	int	si_code;
-	int	_pad[SI_PAD_SIZE];
-} rt_siginfo_t;
-
-typedef struct rt_sigaltstack {
-	void	*ss_sp;
-	int	ss_flags;
-	size_t	ss_size;
-} rt_stack_t;
-
-struct rt_ucontext {
-	unsigned long		uc_flags;
-	struct rt_ucontext	*uc_link;
-	rt_stack_t		uc_stack;
-	struct rt_sigcontext	uc_mcontext;
-	rt_sigset_t		uc_sigmask;	/* mask last for extensibility */
-};
-
-struct rt_sigframe {
-	char			*pretcode;
-	struct rt_ucontext	uc;
-	struct rt_siginfo	info;
-
-	/* fp state follows here */
-};
-
-
 #define SHMEMS_SIZE	4096
 
 /*
@@ -285,4 +195,6 @@ find_shmem(struct shmems *shmems, unsigned long shmid)
 			(vma_entry_is(vma, VMA_ANON_PRIVATE) || \
 			vma_entry_is(vma, VMA_FILE_PRIVATE)))
 
+#include <memcpy_64.h>
+
 #endif /* CR_RESTORER_H__ */
diff --git a/pie/restorer.c b/pie/restorer.c
index b6211f1..ec1ceb2 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -27,6 +27,8 @@
 
 #include "creds.pb-c.h"
 
+#include <restorer_private.h>
+
 #define sys_prctl_safe(opcode, val1, val2, val3)			\
 	({								\
 		long __ret = sys_prctl(opcode, val1, val2, val3, 0);	\
@@ -141,56 +143,12 @@ static void restore_sched_info(struct rst_sched_param *p)
 	sys_sched_setscheduler(0, p->policy, &parm);
 }
 
-static int restore_gpregs(struct rt_sigframe *f, UserX86RegsEntry *r)
-{
-	long ret;
-	unsigned long fsgs_base;
-
-#define CPREG1(d)	f->uc.uc_mcontext.d = r->d
-#define CPREG2(d, s)	f->uc.uc_mcontext.d = r->s
-
-	CPREG1(r8);
-	CPREG1(r9);
-	CPREG1(r10);
-	CPREG1(r11);
-	CPREG1(r12);
-	CPREG1(r13);
-	CPREG1(r14);
-	CPREG1(r15);
-	CPREG2(rdi, di);
-	CPREG2(rsi, si);
-	CPREG2(rbp, bp);
-	CPREG2(rbx, bx);
-	CPREG2(rdx, dx);
-	CPREG2(rax, ax);
-	CPREG2(rcx, cx);
-	CPREG2(rsp, sp);
-	CPREG2(rip, ip);
-	CPREG2(eflags, flags);
-	CPREG1(cs);
-	CPREG1(gs);
-	CPREG1(fs);
-
-	fsgs_base = r->fs_base;
-	ret = sys_arch_prctl(ARCH_SET_FS, fsgs_base);
-	if (ret) {
-		pr_info("SET_FS fail %ld\n", ret);
-		return -1;
-	}
-
-	fsgs_base = r->gs_base;
-	ret = sys_arch_prctl(ARCH_SET_GS, fsgs_base);
-	if (ret) {
-		pr_info("SET_GS fail %ld\n", ret);
-		return -1;
-	}
-
-	return 0;
-}
 
 static int restore_thread_common(struct rt_sigframe *sigframe,
 		struct thread_restore_args *args)
 {
+	int ret;
+
 	sys_set_tid_address((int *)args->clear_tid_addr);
 
 	if (args->has_futex) {
@@ -201,11 +159,20 @@ static int restore_thread_common(struct rt_sigframe *sigframe,
 	}
 
 	if (args->has_blk_sigset)
-		sigframe->uc.uc_sigmask.sig[0] = args->blk_sigset;
+		RT_SIGFRAME_UC(sigframe).uc_sigmask.sig[0] = args->blk_sigset;
 
 	restore_sched_info(&args->sp);
 
-	return restore_gpregs(sigframe, &args->gpregs);
+	ret = restore_gpregs(sigframe, &args->gpregs);
+	if (ret) {
+		return ret;
+	}
+
+#ifdef ARCH_NEED_FP
+	ret = restore_fpregs(sigframe, &args->fpstate);
+#endif
+
+	return ret;
 }
 
 /*
@@ -232,6 +199,9 @@ long __export_restore_thread(struct thread_restore_args *args)
 
 	restore_creds(&args->ta->creds);
 
+#ifdef CONFIG_HAS_TLS
+	restore_tls(args->tls);
+#endif
 
 	pr_info("%ld: Restored\n", sys_gettid());
 
@@ -240,15 +210,9 @@ long __export_restore_thread(struct thread_restore_args *args)
 
 	futex_dec_and_wake(&thread_inprogress);
 
-	new_sp = (long)rt_sigframe + 8;
-	asm volatile(
-		"movq %0, %%rax					\n"
-		"movq %%rax, %%rsp				\n"
-		"movl $"__stringify(__NR_rt_sigreturn)", %%eax	\n"
-		"syscall					\n"
-		:
-		: "r"(new_sp)
-		: "rax","rsp","memory");
+	new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;
+	ARCH_RT_SIGRETURN(new_sp);
+
 core_restore_end:
 	pr_err("Restorer abnormal termination for %ld\n", sys_getpid());
 	sys_exit_group(1);
@@ -448,6 +412,10 @@ long __export_restore_task(struct task_restore_core_args *args)
 			}
 		}
 
+		if (vma_entry->end >= TASK_SIZE) {
+			continue;
+		}
+
 		if (vma_entry->end > premmapped_end) {
 			if (vma_entry->start < premmapped_end)
 				addr = premmapped_end;
@@ -470,6 +438,10 @@ long __export_restore_task(struct task_restore_core_args *args)
 		if (!vma_priv(vma_entry))
 			continue;
 
+		if (vma_entry->end >= TASK_SIZE) {
+			continue;
+		}
+
 		if (vma_entry->start > vma_entry->shmid)
 			break;
 
@@ -487,6 +459,10 @@ long __export_restore_task(struct task_restore_core_args *args)
 		if (!vma_priv(vma_entry))
 			continue;
 
+		if (vma_entry->start > TASK_SIZE) {
+			continue;
+		}
+
 		if (vma_entry->start < vma_entry->shmid)
 			break;
 
@@ -678,41 +654,8 @@ long __export_restore_task(struct task_restore_core_args *args)
 			 * thread will run with own stack and we must not
 			 * have any additional instructions... oh, dear...
 			 */
-			asm volatile(
-				"clone_emul:				\n"
-				"movq %2, %%rsi				\n"
-				"subq $16, %%rsi			\n"
-				"movq %6, %%rdi				\n"
-				"movq %%rdi, 8(%%rsi)			\n"
-				"movq %5, %%rdi				\n"
-				"movq %%rdi, 0(%%rsi)			\n"
-				"movq %1, %%rdi				\n"
-				"movq %3, %%rdx				\n"
-				"movq %4, %%r10				\n"
-				"movl $"__stringify(__NR_clone)", %%eax	\n"
-				"syscall				\n"
-
-				"testq %%rax,%%rax			\n"
-				"jz thread_run				\n"
-
-				"movq %%rax, %0				\n"
-				"jmp clone_end				\n"
-
-				"thread_run:				\n"	/* new stack here */
-				"xorq %%rbp, %%rbp			\n"	/* clear ABI frame pointer */
-				"popq %%rax				\n"	/* clone_restore_fn  -- restore_thread */
-				"popq %%rdi				\n"	/* arguments */
-				"callq *%%rax				\n"
-
-				"clone_end:				\n"
-				: "=r"(ret)
-				:	"g"(clone_flags),
-					"g"(new_sp),
-					"g"(&parent_tid),
-					"g"(&thread_args[i].pid),
-					"g"(args->clone_restore_fn),
-					"g"(&thread_args[i])
-				: "rax", "rdi", "rsi", "rdx", "r10", "memory");
+
+			RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn);
 		}
 
 		ret = sys_flock(fd, LOCK_UN);
@@ -766,28 +709,26 @@ long __export_restore_task(struct task_restore_core_args *args)
 
 	ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE);
 	if (ret < 0) {
-		ret = ((long)__LINE__ << 32) | -ret;
+		ret = ((long)__LINE__ << 16) | ((-ret) & 0xffff);
 		goto core_restore_failed;
 	}
 
 	/*
 	 * Sigframe stack.
 	 */
-	new_sp = (long)rt_sigframe + 8;
+	new_sp = (long)rt_sigframe + SIGFRAME_OFFSET;
 
 	/*
 	 * Prepare the stack and call for sigreturn,
 	 * pure assembly since we don't need any additional
 	 * code insns from gcc.
 	 */
-	asm volatile(
-		"movq %0, %%rax					\n"
-		"movq %%rax, %%rsp				\n"
-		"movl $"__stringify(__NR_rt_sigreturn)", %%eax	\n"
-		"syscall					\n"
-		:
-		: "r"(new_sp)
-		: "rax","rsp","memory");
+
+#ifdef CONFIG_HAS_TLS
+	restore_tls(args->tls);
+#endif
+
+	ARCH_RT_SIGRETURN(new_sp);
 
 core_restore_end:
 	pr_err("Restorer fail %ld\n", sys_getpid());
@@ -795,12 +736,7 @@ core_restore_end:
 	return -1;
 
 core_restore_failed:
-	asm volatile(
-		"movq %0, %%rsp				\n"
-		"movq 0, %%rax				\n"
-		"jmp *%%rax				\n"
-		:
-		: "r"(ret)
-		: "memory");
+	ARCH_FAIL_CORE_RESTORE;
+
 	return ret;
 }
-- 
1.7.9.5



More information about the CRIU mailing list