[CRIU] [PATCH] [v4] restore: handle the case where zombies are reparented

Andrey Vagin avagin at openvz.org
Fri Jul 22 16:44:52 PDT 2016


From: Tycho Andersen <tycho.andersen at canonical.com>

For example, if a zombie has a helper that sets up its session id, the
zombie will be reparented to the init task, which will then potentially get
a SIGCHLD for a task which isn't its direct child zombie, which we didn't
handle. Instead, let's find all the zombies for the init task, in case they
get reparented this way.

v2: only the zombies need to be recursively collected, helpers wait on
    their children before they exit and will never be reparented
v4: the root task waits all zombies

Reported-by: Tycho Andersen <tycho.andersen at canonical.com>
Cc: Tycho Andersen <tycho.andersen at canonical.com>
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 criu/cr-restore.c   | 29 ++++++++++++++++++++++++++---
 criu/pie/restorer.c | 24 +++++++++++++++++++-----
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index d4f6bca..9bd1747 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -488,12 +488,11 @@ static int prepare_sigactions(void)
 	return ret;
 }
 
-static int collect_child_pids(int state, unsigned int *n)
+static int __collect_child_pids(struct pstree_item *p, int state, unsigned int *n)
 {
 	struct pstree_item *pi;
 
-	*n = 0;
-	list_for_each_entry(pi, &current->children, sibling) {
+	list_for_each_entry(pi, &p->children, sibling) {
 		pid_t *child;
 
 		if (pi->pid.state != state)
@@ -510,6 +509,30 @@ static int collect_child_pids(int state, unsigned int *n)
 	return 0;
 }
 
+static int collect_child_pids(int state, unsigned int *n)
+{
+	struct pstree_item *pi;
+
+	*n = 0;
+
+	/*
+	 * All children of helpers and zombies will be reparented to the init
+	 * process and they have to be collected too.
+	 */
+
+	if (current == root_item) {
+		for_each_pstree_item(pi) {
+			if (pi->pid.state != TASK_HELPER &&
+			    pi->pid.state != TASK_DEAD)
+				continue;
+			if (__collect_child_pids(pi, state, n))
+				return -1;
+		}
+	}
+
+	return __collect_child_pids(current, state, n);
+}
+
 static int collect_helper_pids(struct task_restore_args *ta)
 {
 	ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 366ccf0..0113586 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1072,8 +1072,23 @@ static int wait_zombies(struct task_restore_args *task_args)
 	int i;
 
 	for (i = 0; i < task_args->zombies_n; i++) {
-		if (sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL) < 0) {
-			pr_err("Wait on %d zombie failed\n", task_args->zombies[i]);
+		int ret, nr_in_progress;
+
+		nr_in_progress = futex_get(&task_entries->nr_in_progress);
+
+		ret = sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL);
+		if (ret == -ECHILD) {
+			/* A process isn't reparented to this task yet.
+			 * Let's wait when someone complete this stage
+			 * and try again.
+			 */
+			futex_wait_while_eq(&task_entries->nr_in_progress,
+								nr_in_progress);
+			i--;
+			continue;
+		}
+		if (ret < 0) {
+			pr_err("Wait on %d zombie failed: %d\n", task_args->zombies[i], ret);
 			return -1;
 		}
 		pr_debug("%ld: Collect a zombie with pid %d\n",
@@ -1451,11 +1466,10 @@ long __export_restore_task(struct task_restore_args *args)
 
 	restore_finish_stage(CR_STATE_RESTORE);
 
-	if (wait_zombies(args) < 0)
-		goto core_restore_end;
-
 	if (wait_helpers(args) < 0)
 		goto core_restore_end;
+	if (wait_zombies(args) < 0)
+		goto core_restore_end;
 
 	ksigfillset(&to_block);
 	ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
-- 
2.7.4



More information about the CRIU mailing list