[CRIU] [PATCH v3 5/5] restore: handle the case where zombies are reparented

Tycho Andersen tycho.andersen at canonical.com
Wed Jul 13 08:10:57 PDT 2016


For example, if a zombie has a helper that sets up its session id, the
zombie will be reparented to the init task, which will then potentially get
a SIGCHLD for a task which isn't its direct child zombie, which we didn't
handle. Instead, let's recursively find all the zombies for the init task,
in case they get reparented this way.

This commit is a little ugly: we are now passing three lists of pids
(helpers, zombies, and things that are allowed to die) to the restorer blob
for each task. We do need to have some way to distinguish, because:

* we want to wait on helpers
* we don't want to wait on zombies (i.e. we want to keep the pid dead), but
  confirm that it actually died
* we don't want to wait on anything that gets reparented, but we don't want
  to error out if we get a SIGCHLD for it

We could introduce a new struct:

struct pid_info {
  pid_t pid;
  bool  zombie;
  bool  direct_child;
}

to handle this instead of the three separate lists. I'm not sure which is
cleaner, but I'd be happy to refactor to the other way if that's better.

v2: only the zombies need to be recursively collected, helpers wait on
    their children before they exit and will never be reparented

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 criu/cr-restore.c       | 31 ++++++++++++++++++++++++++-----
 criu/include/restorer.h |  3 +++
 criu/pie/restorer.c     | 20 ++++++--------------
 3 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 977e049..d8e4cf1 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -487,14 +487,16 @@ static int prepare_sigactions(void)
 	return ret;
 }
 
-static int collect_child_pids(int state, unsigned int *n)
+static int collect_child_pids(struct pstree_item *root, int state, unsigned int *n, bool recurse)
 {
 	struct pstree_item *pi;
 
-	*n = 0;
-	list_for_each_entry(pi, &current->children, sibling) {
+	list_for_each_entry(pi, &root->children, sibling) {
 		pid_t *child;
 
+		if (recurse && collect_child_pids(pi, state, n, recurse) < 0)
+			return -1;
+
 		if (pi->pid.state != state)
 			continue;
 
@@ -512,13 +514,28 @@ static int collect_child_pids(int state, unsigned int *n)
 static int collect_helper_pids(struct task_restore_args *ta)
 {
 	ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
-	return collect_child_pids(TASK_HELPER, &ta->helpers_n);
+	ta->helpers_n = 0;
+	return collect_child_pids(current, TASK_HELPER, &ta->helpers_n, false);
 }
 
 static int collect_zombie_pids(struct task_restore_args *ta)
 {
 	ta->zombies = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
-	return collect_child_pids(TASK_DEAD, &ta->zombies_n);
+	ta->zombies_n = 0;
+	return collect_child_pids(current, TASK_DEAD, &ta->zombies_n, false);
+}
+
+static int collect_allowed_to_die_pids(struct task_restore_args *ta)
+{
+	ta->allowed_to_die = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
+	ta->allowed_to_die_n = 0;
+
+	if (collect_child_pids(current, TASK_DEAD, &ta->allowed_to_die_n, current == root_item) < 0)
+		return -1;
+	if (collect_child_pids(current, TASK_HELPER, &ta->allowed_to_die_n, false) < 0)
+		return -1;
+
+	return 0;
 }
 
 static int open_core(int pid, CoreEntry **pcore)
@@ -655,6 +672,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (collect_zombie_pids(ta) < 0)
 		return -1;
 
+	if (collect_allowed_to_die_pids(ta) < 0)
+		return -1;
+
 	if (inherit_fd_fini() < 0)
 		return -1;
 
@@ -3003,6 +3023,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	RST_MEM_FIXUP_PPTR(task_args->helpers);
 	RST_MEM_FIXUP_PPTR(task_args->zombies);
 	RST_MEM_FIXUP_PPTR(task_args->seccomp_filters);
+	RST_MEM_FIXUP_PPTR(task_args->allowed_to_die);
 
 	if (core->tc->has_seccomp_mode)
 		task_args->seccomp_mode = core->tc->seccomp_mode;
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index 7c0336e..ac50bf0 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -149,6 +149,9 @@ struct task_restore_args {
 	struct sock_fprog		*seccomp_filters;
 	unsigned int			seccomp_filters_n;
 
+	pid_t				*allowed_to_die;
+	unsigned int			allowed_to_die_n;
+
 	/* * * * * * * * * * * * * * * * * * * * */
 
 	unsigned long			task_size;
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 424ccf3..d3a24d8 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -57,10 +57,8 @@
 
 static struct task_entries *task_entries;
 static futex_t thread_inprogress;
-static pid_t *helpers;
-static int n_helpers;
-static pid_t *zombies;
-static int n_zombies;
+static pid_t *allowed_to_die;
+static int n_allowed_to_die;
 
 extern void cr_restore_rt (void) asm ("__cr_restore_rt")
 			__attribute__ ((visibility ("hidden")));
@@ -72,12 +70,8 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
 
 	/* We can ignore helpers that die, we expect them to after
 	 * CR_STATE_RESTORE is finished. */
-	for (i = 0; i < n_helpers; i++)
-		if (siginfo->si_pid == helpers[i])
-			return;
-
-	for (i = 0; i < n_zombies; i++)
-		if (siginfo->si_pid == zombies[i])
+	for (i = 0; i < n_allowed_to_die; i++)
+		if (siginfo->si_pid == allowed_to_die[i])
 			return;
 
 	if (siginfo->si_code & CLD_EXITED)
@@ -1106,10 +1100,8 @@ long __export_restore_task(struct task_restore_args *args)
 #endif
 
 	task_entries = args->task_entries;
-	helpers = args->helpers;
-	n_helpers = args->helpers_n;
-	zombies = args->zombies;
-	n_zombies = args->zombies_n;
+	allowed_to_die = args->allowed_to_die;
+	n_allowed_to_die = args->allowed_to_die_n;
 	*args->breakpoint = rst_sigreturn;
 
 	ksigfillset(&act.rt_sa_mask);
-- 
2.7.4



More information about the CRIU mailing list