[CRIU] [PATCH 11/14] restore: collect signals from zombies (v4)
Pavel Emelyanov
xemul at parallels.com
Mon Mar 25 12:51:46 EDT 2013
On 03/25/2013 06:37 PM, Andrey Vagin wrote:
> Each zombie sends SIGCHLD to parent. crtools restores all pending
> signals, so all other signals should be collected.
>
> Here is a problems, that signals SIGCHLD can be merged, but crtools
> should be sure, that all signals are collected.
>
> For that a zombie locks a global zombie_lock, which is released by
> parent.
>
> This operation should be done between CR_STATE_RESTORE and
> CR_STATE_RESTORE_SIGCHLD.
>
> Here is one more CR_STATE_RESTORE_ZOMBIES, whic is used for waiting all
> zombies.
>
> v2: clean up
> v3: rework synchronization
> v4: rework without additional CR_STATE-s
>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-restore.c | 4 +++-
> include/restorer.h | 2 ++
> pie/restorer.c | 14 ++++++++++++++
> 3 files changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 7454f59..5322f28 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -696,7 +696,7 @@ static int restore_one_zombie(int pid, int exit_code)
> if (task_entries != NULL) {
> restore_finish_stage(CR_STATE_RESTORE);
> zombie_prepare_signals();
> - restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);
Stages engine rely on zombies to finish this stage. Who will update one?
> + mutex_lock(&task_entries->zombie_lock);
> }
>
> if (exit_code & 0x7f) {
> @@ -1263,6 +1263,7 @@ static int prepare_task_entries()
> task_entries->nr_tasks = 0;
> task_entries->nr_helpers = 0;
> futex_set(&task_entries->start, CR_STATE_FORKING);
> + mutex_init(&task_entries->zombie_lock);
So, the first zombie will die right after it gets to the code in restore_one_zombie.
Will parent be ready for that?
> return 0;
> }
> @@ -2026,6 +2027,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> * Now prepare run-time data for threads restore.
> */
> task_args->nr_threads = current->nr_threads;
> + task_args->nr_zombies = current->rst->nr_zombies;
> task_args->clone_restore_fn = (void *)restore_thread_exec_start;
> task_args->thread_args = thread_args;
>
> diff --git a/include/restorer.h b/include/restorer.h
> index 336e28d..842cc5e 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -98,6 +98,7 @@ struct task_restore_core_args {
>
> /* threads restoration */
> int nr_threads; /* number of threads */
> + int nr_zombies;
> thread_restore_fcall_t clone_restore_fn; /* helper address for clone() call */
> struct thread_restore_args *thread_args; /* array of thread arguments */
> struct shmems *shmems;
> @@ -170,6 +171,7 @@ struct task_entries {
> int nr_threads, nr_tasks, nr_helpers;
> futex_t nr_in_progress;
> futex_t start;
> + mutex_t zombie_lock;
> };
>
> static always_inline struct shmem_info *
> diff --git a/pie/restorer.c b/pie/restorer.c
> index 17f2ac3..549f4b7 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -39,6 +39,7 @@
>
> static struct task_entries *task_entries;
> static futex_t thread_inprogress;
> +static futex_t zombies_inprogress;
>
> extern void cr_restore_rt (void) asm ("__cr_restore_rt")
> __attribute__ ((visibility ("hidden")));
> @@ -47,6 +48,15 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
> {
> char *r;
>
> + if (futex_get(&task_entries->start) == CR_STATE_RESTORE_SIGCHLD) {
> + pr_debug("%ld: Collect a zombie with (pid %d, %d)\n",
> + sys_getpid(), siginfo->si_pid, siginfo->si_pid);
> + futex_dec_and_wake(&task_entries->nr_in_progress);
> + futex_dec_and_wake(&zombies_inprogress);
> + mutex_unlock(&task_entries->zombie_lock);
> + return;
> + }
> +
> if (siginfo->si_code & CLD_EXITED)
> r = " exited, status=";
> else if (siginfo->si_code & CLD_KILLED)
> @@ -732,8 +742,12 @@ long __export_restore_task(struct task_restore_core_args *args)
>
> pr_info("%ld: Restored\n", sys_getpid());
>
> + futex_set(&zombies_inprogress, args->nr_zombies);
> +
> restore_finish_stage(CR_STATE_RESTORE);
>
> + futex_wait_while_gt(&zombies_inprogress, 0);
> +
> sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t));
>
> ret = restore_signals(args->siginfo, args->siginfo_nr, 1);
>
More information about the CRIU
mailing list