[CRIU] [PATCH 13/14] restore: collect signals from zombies (v2)
Andrey Vagin
avagin at openvz.org
Fri Jan 18 03:40:32 EST 2013
Each zombie sends SIGCHLD to parent. crtools restores all pending
signals, so all other signals should be collected.
Here is a problems, that signals SIGCHLD can be merged, but crtools
should be sure, that all signals are collected.
For that crtools enumerate all tasks and waits an answer. A parent
answers for its dead children from a signal handler.
This operation should be done between CR_STATE_RESTORE and
CR_STATE_RESTORE_SIGCHLD. Two new states CR_STATE_RESTORE_ZOMBIES
and CR_STATE_RESTORE_PREP_ZOMBIES were added.
CR_STATE_RESTORE_PREP_ZOMBIES is used for waiting when all tasks
installes a special sighandler.
CR_STATE_RESTORE_ZOMBIES is used for waiting all zombies.
v2: clean up
Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
cr-restore.c | 40 ++++++++++++++++++++++++++++++++++++++--
include/restorer.h | 4 ++++
pie/restorer.c | 10 ++++++++++
3 files changed, 52 insertions(+), 2 deletions(-)
diff --git a/cr-restore.c b/cr-restore.c
index e67232c..e039f5b 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -676,9 +676,12 @@ static int restore_one_zombie(int pid, int exit_code)
pr_info("Restoring zombie with %d code\n", exit_code);
if (task_entries != NULL) {
+ atomic_inc(&task_entries->nr_zombies);
+
restore_finish_stage(CR_STATE_RESTORE);
zombie_prepare_signals();
- restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);
+ restore_finish_stage(CR_STATE_RESTORE_PREP_ZOMBIES);
+ futex_wait_until(&task_entries->pid, pid);
}
if (exit_code & 0x7f) {
@@ -1106,8 +1109,11 @@ static inline int stage_participants(int next_stage)
case CR_STATE_RESTORE_PGID:
return task_entries->nr_tasks;
case CR_STATE_RESTORE:
- case CR_STATE_RESTORE_SIGCHLD:
+ case CR_STATE_RESTORE_ZOMBIES:
+ case CR_STATE_RESTORE_PREP_ZOMBIES:
return task_entries->nr_threads;
+ case CR_STATE_RESTORE_SIGCHLD:
+ return task_entries->nr_threads - atomic_get(&task_entries->nr_zombies);
}
BUG();
@@ -1133,6 +1139,7 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
{
int ret;
struct sigaction act, old_act;
+ struct pstree_item *pi;
ret = sigaction(SIGCHLD, NULL, &act);
if (ret < 0) {
@@ -1188,6 +1195,33 @@ static int restore_root_task(struct pstree_item *init, struct cr_options *opts)
if (ret < 0)
goto out;
+ pr_info("Prepare for collecting SIGCHLD signals from zombies\n");
+ ret = restore_switch_stage(CR_STATE_RESTORE_PREP_ZOMBIES);
+ if (ret < 0)
+ goto out;
+
+ pr_info("Collect SIGCHLD signals from zombies\n");
+ ret = restore_switch_stage(CR_STATE_RESTORE_ZOMBIES);
+ if (ret < 0)
+ goto out;
+
+ pr_info("Wait threads %d %d\n", task_entries->nr_threads, task_entries->nr_tasks);
+ futex_wait_while_gt(&task_entries->nr_in_progress, task_entries->nr_tasks);
+ ret = (int) futex_get(&task_entries->nr_in_progress);
+ if(ret < 0)
+ goto out;
+
+ for_each_pstree_item(pi) {
+ if (pi->state == TASK_HELPER)
+ continue;
+ ret = (int) futex_get(&task_entries->nr_in_progress);
+ if (ret < 0)
+ goto out;
+ futex_set_and_wake(&task_entries->pid, pi->pid.virt);
+ pr_info("Wait %d\n", pi->pid.virt);
+ futex_wait_while(&task_entries->nr_in_progress, ret);
+ }
+
pr_info("Wait until all tasks are restored\n");
ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
if (ret < 0)
@@ -1239,6 +1273,8 @@ static int prepare_task_entries()
task_entries->nr_tasks = 0;
task_entries->nr_helpers = 0;
futex_set(&task_entries->start, CR_STATE_FORKING);
+ atomic_set(&task_entries->nr_zombies, 0);
+ futex_set(&task_entries->pid, 0);
return 0;
}
diff --git a/include/restorer.h b/include/restorer.h
index b9bb3dc..a9e6157 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -162,14 +162,18 @@ enum {
CR_STATE_FORKING,
CR_STATE_RESTORE_PGID,
CR_STATE_RESTORE,
+ CR_STATE_RESTORE_PREP_ZOMBIES,
+ CR_STATE_RESTORE_ZOMBIES,
CR_STATE_RESTORE_SIGCHLD,
CR_STATE_COMPLETE
};
struct task_entries {
int nr_threads, nr_tasks, nr_helpers;
+ atomic_t nr_zombies;
futex_t nr_in_progress;
futex_t start;
+ futex_t pid;
};
static always_inline struct shmem_info *
diff --git a/pie/restorer.c b/pie/restorer.c
index d171fef..553738b 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -53,6 +53,10 @@ extern void cr_restore_rt (void) asm ("__cr_restore_rt")
static void sigzombie_handler(int signal, siginfo_t *siginfo, void *data)
{
+ pr_debug("%ld: Collect a zombie with (pid %d, %d)\n",
+ sys_getpid(), siginfo->si_pid, (int) futex_get(&task_entries->pid));
+ BUG_ON(siginfo->si_pid != futex_get(&task_entries->pid));
+ futex_dec_and_wake(&task_entries->nr_in_progress);
}
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
@@ -227,6 +231,8 @@ long __export_restore_thread(struct thread_restore_args *args)
pr_info("%ld: Restored\n", sys_gettid());
restore_finish_stage(CR_STATE_RESTORE);
+ restore_finish_stage(CR_STATE_RESTORE_PREP_ZOMBIES);
+ restore_finish_stage(CR_STATE_RESTORE_ZOMBIES);
if (restore_signals(args->siginfo, args->siginfo_nr, 0))
goto core_restore_end;
@@ -749,6 +755,10 @@ long __export_restore_task(struct task_restore_core_args *args)
act.rt_sa_restorer = cr_restore_rt;
sys_sigaction(SIGCHLD, &act, NULL, sizeof(rt_sigset_t));
+ restore_finish_stage(CR_STATE_RESTORE_PREP_ZOMBIES);
+ futex_wait_until(&task_entries->pid, my_pid);
+ restore_finish_stage(CR_STATE_RESTORE_ZOMBIES);
+
sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(rt_sigset_t));
ret = restore_signals(args->siginfo, args->siginfo_nr, 1);
--
1.7.11.7
More information about the CRIU
mailing list