[CRIU] [PATCH 11/14] restore: collect signals from zombies (v4)

Pavel Emelyanov xemul at parallels.com
Mon Mar 25 12:51:46 EDT 2013


On 03/25/2013 06:37 PM, Andrey Vagin wrote:
> Each zombie sends SIGCHLD to parent. crtools restores all pending
> signals, so all other signals should be collected.
> 
> Here is a problems, that signals SIGCHLD can be merged, but crtools
> should be sure, that all signals are collected.
> 
> For that a zombie locks a global zombie_lock, which is released by
> parent.
> 
> This operation should be done between CR_STATE_RESTORE and
> CR_STATE_RESTORE_SIGCHLD.
> 
> Here is one more CR_STATE_RESTORE_ZOMBIES, whic is used for waiting all
> zombies.
> 
> v2: clean up
> v3: rework synchronization
> v4: rework without additional CR_STATE-s
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c       |  4 +++-
>  include/restorer.h |  2 ++
>  pie/restorer.c     | 14 ++++++++++++++
>  3 files changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 7454f59..5322f28 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -696,7 +696,7 @@ static int restore_one_zombie(int pid, int exit_code)
>  	if (task_entries != NULL) {
>  		restore_finish_stage(CR_STATE_RESTORE);
>  		zombie_prepare_signals();
> -		restore_finish_stage(CR_STATE_RESTORE_SIGCHLD);

Stages engine rely on zombies to finish this stage. Who will update one?

> +		mutex_lock(&task_entries->zombie_lock);
>  	}
>  
>  	if (exit_code & 0x7f) {
> @@ -1263,6 +1263,7 @@ static int prepare_task_entries()
>  	task_entries->nr_tasks = 0;
>  	task_entries->nr_helpers = 0;
>  	futex_set(&task_entries->start, CR_STATE_FORKING);
> +	mutex_init(&task_entries->zombie_lock);

So, the first zombie will die right after it gets to the code in restore_one_zombie.
Will parent be ready for that?

>  	return 0;
>  }
> @@ -2026,6 +2027,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  	 * Now prepare run-time data for threads restore.
>  	 */
>  	task_args->nr_threads		= current->nr_threads;
> +	task_args->nr_zombies		= current->rst->nr_zombies;
>  	task_args->clone_restore_fn	= (void *)restore_thread_exec_start;
>  	task_args->thread_args		= thread_args;
>  
> diff --git a/include/restorer.h b/include/restorer.h
> index 336e28d..842cc5e 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -98,6 +98,7 @@ struct task_restore_core_args {
>  
>  	/* threads restoration */
>  	int				nr_threads;		/* number of threads */
> +	int				nr_zombies;
>  	thread_restore_fcall_t		clone_restore_fn;	/* helper address for clone() call */
>  	struct thread_restore_args	*thread_args;		/* array of thread arguments */
>  	struct shmems			*shmems;
> @@ -170,6 +171,7 @@ struct task_entries {
>  	int nr_threads, nr_tasks, nr_helpers;
>  	futex_t nr_in_progress;
>  	futex_t start;
> +	mutex_t	zombie_lock;
>  };
>  
>  static always_inline struct shmem_info *
> diff --git a/pie/restorer.c b/pie/restorer.c
> index 17f2ac3..549f4b7 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -39,6 +39,7 @@
>  
>  static struct task_entries *task_entries;
>  static futex_t thread_inprogress;
> +static futex_t zombies_inprogress;
>  
>  extern void cr_restore_rt (void) asm ("__cr_restore_rt")
>  			__attribute__ ((visibility ("hidden")));
> @@ -47,6 +48,15 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
>  {
>  	char *r;
>  
> +	if (futex_get(&task_entries->start) == CR_STATE_RESTORE_SIGCHLD) {
> +		pr_debug("%ld: Collect a zombie with (pid %d, %d)\n",
> +			sys_getpid(), siginfo->si_pid, siginfo->si_pid);
> +		futex_dec_and_wake(&task_entries->nr_in_progress);
> +		futex_dec_and_wake(&zombies_inprogress);
> +		mutex_unlock(&task_entries->zombie_lock);
> +		return;
> +	}
> +
>  	if (siginfo->si_code & CLD_EXITED)
>  		r = " exited, status=";
>  	else if (siginfo->si_code & CLD_KILLED)
> @@ -732,8 +742,12 @@ long __export_restore_task(struct task_restore_core_args *args)
>  
>  	pr_info("%ld: Restored\n", sys_getpid());
>  
> +	futex_set(&zombies_inprogress, args->nr_zombies);
> +
>  	restore_finish_stage(CR_STATE_RESTORE);
>  
> +	futex_wait_while_gt(&zombies_inprogress, 0);
> +
>  	sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t));
>  
>  	ret = restore_signals(args->siginfo, args->siginfo_nr, 1);
> 




More information about the CRIU mailing list