[CRIU] [PATCH 03/15] restore: Allocate task and threads args from rst-malloc

Pavel Emelyanov xemul at virtuozzo.com
Tue May 24 04:34:20 PDT 2016


These two objects are actually needed at the very early stages of
restore, much earlier than they are actually allocated. To make
life nicer, let's allocate args from private rst mem, it will get
remapped into restorer blob automatically.

The new memory layout then is

  | restorer code | memzone (stacks and sigframes) | arguments |

The last area is unmapped at the very end of restorer code. Also
this gives the opportunity not to copy many bits betweek static
variables and task-args, but put them directly into args. All the
next patches are about it.

Also the sigreturn_restore() is now stuffed with preparatory code
and is quite big. When new "restoring" is added it's confusing at
which place in the code it should be inserted. This patch makes
a clear distinction -- the sigreturn_restore() is __only__ about
remapping the memory into restorer blob and jumping into it. All
the rest should go into restore_one_alive_task().

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/cr-restore.c | 56 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 54686c0..d6b23d7 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -113,7 +113,7 @@
 static struct pstree_item *current;
 
 static int restore_task_with_children(void *);
-static int sigreturn_restore(pid_t pid, CoreEntry *core);
+static int sigreturn_restore(pid_t pid, unsigned long ta_cp, CoreEntry *core);
 static int prepare_restorer_blob(void);
 static int prepare_rlimits(int pid, CoreEntry *core);
 static int prepare_posix_timers(int pid, CoreEntry *core);
@@ -483,10 +483,22 @@ static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc)
 
 static int restore_one_alive_task(int pid, CoreEntry *core)
 {
+	unsigned args_len;
+	unsigned long ta_cp;
+	struct task_restore_args *ta;
 	pr_info("Restoring resources\n");
 
 	rst_mem_switch_to_private();
 
+	args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) *
+			current->nr_threads, page_size());
+	ta_cp = rst_mem_align_cpos(RM_PRIVATE);
+	ta = rst_mem_alloc(args_len, RM_PRIVATE);
+	if (!ta)
+		return -1;
+
+	memzero(ta, args_len);
+
 	if (prepare_fds(current))
 		return -1;
 
@@ -523,7 +535,7 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (prepare_proc_misc(pid, core->tc))
 		return -1;
 
-	return sigreturn_restore(pid, core);
+	return sigreturn_restore(pid, ta_cp, core);
 }
 
 static void zombie_prepare_signals(void)
@@ -2672,7 +2684,7 @@ static int rst_prep_creds(pid_t pid, CoreEntry *core, unsigned long *creds_pos)
 	return 0;
 }
 
-static int sigreturn_restore(pid_t pid, CoreEntry *core)
+static int sigreturn_restore(pid_t pid, unsigned long ta_cp, CoreEntry *core)
 {
 	void *mem = MAP_FAILED;
 	void *restore_thread_exec_start;
@@ -2688,7 +2700,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	struct task_restore_args *task_args;
 	struct thread_restore_args *thread_args;
 	struct restore_mem_zone *mz;
-	long args_len;
 
 	struct vma_area *vma;
 	unsigned long tgt_vmas;
@@ -2717,10 +2728,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	BUILD_BUG_ON(sizeof(struct task_restore_args) & 1);
 	BUILD_BUG_ON(sizeof(struct thread_restore_args) & 1);
 
-	args_len = round_up(sizeof(*task_args) + sizeof(*thread_args) * current->nr_threads, page_size());
-	pr_info("%d threads require %ldK of memory\n",
-			current->nr_threads, KBYTES(args_len));
-
 	/*
 	 * Copy VMAs to private rst memory so that it's able to
 	 * walk them and m(un|re)map.
@@ -2793,8 +2800,10 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 
 	rst_mem_size = rst_mem_lock();
 	memzone_size = round_up(sizeof(struct restore_mem_zone) * current->nr_threads, page_size());
-	restore_bootstrap_len = restorer_len + memzone_size + args_len + rst_mem_size;
+	restore_bootstrap_len = restorer_len + memzone_size + rst_mem_size;
 	BUG_ON(restore_bootstrap_len & (PAGE_SIZE - 1));
+	pr_info("%d threads require %ldK of memory\n",
+			current->nr_threads, KBYTES(restore_bootstrap_len));
 
 #ifdef CONFIG_VDSO
 	/*
@@ -2843,7 +2852,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	exec_mem_hint += restorer_len;
 
 	/* VMA we need to run task_restore code */
-	mem = mmap((void *)exec_mem_hint, memzone_size + args_len,
+	mem = mmap((void *)exec_mem_hint, memzone_size,
 			PROT_READ | PROT_WRITE,
 			MAP_PRIVATE | MAP_ANON | MAP_FIXED, 0, 0);
 	if (mem != (void *)exec_mem_hint) {
@@ -2853,15 +2862,28 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 
 	exec_mem_hint -= restorer_len;
 
-	memzero(mem, memzone_size + args_len);
-	mz		= mem;
-	task_args	= mem + memzone_size;
-	thread_args	= (struct thread_restore_args *)(task_args + 1);
-	mem		+= memzone_size + args_len;
+	memzero(mem, memzone_size);
+	mz = mem;
+	mem += memzone_size;
 
 	if (rst_mem_remap(mem))
 		goto err;
 
+	task_args	= rst_mem_remap_ptr(ta_cp, RM_PRIVATE);
+	thread_args	= (struct thread_restore_args *)(task_args + 1);
+
+	/*
+	 * At this point we've found a gap in VM that fits in both -- current
+	 * and target tasks' mappings -- and its structure is
+	 *
+	 * | restorer code | memzone (stacks and sigframes) | arguments |
+	 *
+	 * Arguments is task_restore_args, thread_restore_args-s and all
+	 * the bunch of objects allocated with rst_mem_alloc().
+	 * Note, that the task_args itself is inside the 3rd section and (!)
+	 * it gets unmapped at the very end of __export_restore_task
+	 */
+
 	task_args->proc_fd = dup(get_service_fd(PROC_FD_OFF));
 	if (task_args->proc_fd < 0) {
 		pr_perror("can't dup proc fd");
@@ -2871,8 +2893,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	task_args->breakpoint = &rsti(current)->breakpoint;
 	task_args->task_entries = rst_mem_remap_ptr(task_entries_pos, RM_SHREMAP);
 
-	task_args->rst_mem = mem - args_len;
-	task_args->rst_mem_size = rst_mem_size + args_len;
+	task_args->rst_mem = mem;
+	task_args->rst_mem_size = rst_mem_size;
 
 	task_args->bootstrap_start = (void *)exec_mem_hint;
 	task_args->bootstrap_len = restore_bootstrap_len;
-- 
2.5.0



More information about the CRIU mailing list