[CRIU] [PATCH] [v4] restore: handle the case where zombies are reparented
Andrey Vagin
avagin at openvz.org
Fri Jul 22 16:44:52 PDT 2016
From: Tycho Andersen <tycho.andersen at canonical.com>
For example, if a zombie has a helper that sets up its session id, the
zombie will be reparented to the init task, which will then potentially get
a SIGCHLD for a task which isn't its direct child zombie, which we didn't
handle. Instead, let's find all the zombies for the init task, in case they
get reparented this way.
v2: only the zombies need to be recursively collected, helpers wait on
their children before they exit and will never be reparented
v4: the root task waits all zombies
Reported-by: Tycho Andersen <tycho.andersen at canonical.com>
Cc: Tycho Andersen <tycho.andersen at canonical.com>
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
criu/cr-restore.c | 29 ++++++++++++++++++++++++++---
criu/pie/restorer.c | 24 +++++++++++++++++++-----
2 files changed, 45 insertions(+), 8 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index d4f6bca..9bd1747 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -488,12 +488,11 @@ static int prepare_sigactions(void)
return ret;
}
-static int collect_child_pids(int state, unsigned int *n)
+static int __collect_child_pids(struct pstree_item *p, int state, unsigned int *n)
{
struct pstree_item *pi;
- *n = 0;
- list_for_each_entry(pi, ¤t->children, sibling) {
+ list_for_each_entry(pi, &p->children, sibling) {
pid_t *child;
if (pi->pid.state != state)
@@ -510,6 +509,30 @@ static int collect_child_pids(int state, unsigned int *n)
return 0;
}
+static int collect_child_pids(int state, unsigned int *n)
+{
+ struct pstree_item *pi;
+
+ *n = 0;
+
+ /*
+ * All children of helpers and zombies will be reparented to the init
+ * process and they have to be collected too.
+ */
+
+ if (current == root_item) {
+ for_each_pstree_item(pi) {
+ if (pi->pid.state != TASK_HELPER &&
+ pi->pid.state != TASK_DEAD)
+ continue;
+ if (__collect_child_pids(pi, state, n))
+ return -1;
+ }
+ }
+
+ return __collect_child_pids(current, state, n);
+}
+
static int collect_helper_pids(struct task_restore_args *ta)
{
ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 366ccf0..0113586 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1072,8 +1072,23 @@ static int wait_zombies(struct task_restore_args *task_args)
int i;
for (i = 0; i < task_args->zombies_n; i++) {
- if (sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL) < 0) {
- pr_err("Wait on %d zombie failed\n", task_args->zombies[i]);
+ int ret, nr_in_progress;
+
+ nr_in_progress = futex_get(&task_entries->nr_in_progress);
+
+ ret = sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL);
+ if (ret == -ECHILD) {
+ /* A process isn't reparented to this task yet.
+ * Let's wait when someone complete this stage
+ * and try again.
+ */
+ futex_wait_while_eq(&task_entries->nr_in_progress,
+ nr_in_progress);
+ i--;
+ continue;
+ }
+ if (ret < 0) {
+ pr_err("Wait on %d zombie failed: %d\n", task_args->zombies[i], ret);
return -1;
}
pr_debug("%ld: Collect a zombie with pid %d\n",
@@ -1451,11 +1466,10 @@ long __export_restore_task(struct task_restore_args *args)
restore_finish_stage(CR_STATE_RESTORE);
- if (wait_zombies(args) < 0)
- goto core_restore_end;
-
if (wait_helpers(args) < 0)
goto core_restore_end;
+ if (wait_zombies(args) < 0)
+ goto core_restore_end;
ksigfillset(&to_block);
ret = sys_sigprocmask(SIG_SETMASK, &to_block, NULL, sizeof(k_rtsigset_t));
--
2.7.4
More information about the CRIU
mailing list