[CRIU] [PATCH 2/2] rst: don't hang when SIGCHLD is coalesced

Andrew Vagin avagin at gmail.com
Tue Jul 21 02:42:26 PDT 2015


On Mon, Jul 20, 2015 at 11:56:43PM -0600, Tycho Andersen wrote:
> When a TASK_HELPER would exit just before a zombie, sometimes the signal
> would get coalesced, and we would miss the zombie exit, causing us to block
> forever waiting for the zombie to complete. Let's use an entirely different
> strategy for waiting on zombies: explicitly wait on them with waitid, and
> use WNOWAIT to prevent their data from actually being reaped.
>

Looks good. A few minor comments are inline. Thanks.


> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  cr-restore.c       | 38 ++++++++++++++++++++++++++------------
>  include/restorer.h |  4 +++-
>  include/rst_info.h |  1 -
>  pie/restorer.c     | 42 ++++++++++++++++++++++++++++--------------
>  4 files changed, 57 insertions(+), 28 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index e7bac3f..d838a9b 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -118,6 +118,8 @@ static int prepare_signals(int pid, CoreEntry *core);
>  static int root_as_sibling;
>  static unsigned long helpers_pos = 0;
>  static int n_helpers = 0;
> +static unsigned long zombies_pos = 0;
> +static int n_zombies = 0;
>  
>  static int crtools_prepare_shared(void)
>  {
> @@ -730,29 +732,40 @@ err:
>  	return ret;
>  }
>  
> -static int collect_helper_pids()
> +static int collect_child_pids(int state, int *n)
>  {
>  	struct pstree_item *pi;
>  
> -	helpers_pos = rst_mem_cpos(RM_PRIVATE);
> -
> +	*n = 0;
>  	list_for_each_entry(pi, &current->children, sibling) {
> -		static pid_t *helper;
> +		static pid_t *child;
>  
> -		if (pi->state != TASK_HELPER)
> +		if (pi->state != state)
>  			continue;
>  
> -		helper = rst_mem_alloc(sizeof(*helper), RM_PRIVATE);
> -		if (!helper)
> +		child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
> +		if (!child)
>  			return -1;
>  
> -		n_helpers++;
> -		*helper = pi->pid.virt;
> +		(*n)++;
> +		*child = pi->pid.virt;
>  	}
>  
>  	return 0;
>  }
>  
> +static int collect_helper_pids()
> +{
> +	helpers_pos = rst_mem_cpos(RM_PRIVATE);
> +	return collect_child_pids(TASK_HELPER, &n_helpers);
> +}
> +
> +static int collect_zombie_pids()
> +{
> +	zombies_pos = rst_mem_cpos(RM_PRIVATE);
> +	return collect_child_pids(TASK_DEAD, &n_zombies);
> +}
> +
>  static int open_cores(int pid, CoreEntry *leader_core)
>  {
>  	int i, tpid;
> @@ -823,6 +836,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>  	if (collect_helper_pids() < 0)
>  		return -1;
>  
> +	if (collect_zombie_pids() < 0)
> +		return -1;
> +
>  	if (inherit_fd_fini() < 0)
>  		return -1;
>  
> @@ -896,7 +912,6 @@ static int restore_one_zombie(CoreEntry *core)
>  	if (task_entries != NULL) {
>  		restore_finish_stage(CR_STATE_RESTORE);
>  		zombie_prepare_signals();
> -		mutex_lock(&task_entries->zombie_lock);
>  	}
>  
>  	if (exit_code & 0x7f) {
> @@ -1926,7 +1941,6 @@ static int prepare_task_entries(void)
>  	task_entries->nr_tasks = 0;
>  	task_entries->nr_helpers = 0;
>  	futex_set(&task_entries->start, CR_STATE_RESTORE_NS);
> -	mutex_init(&task_entries->zombie_lock);
>  	mutex_init(&task_entries->userns_sync_lock);
>  
>  	return 0;
> @@ -2885,6 +2899,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  	remap_array(rings,	  mm->n_aios, aio_rings);
>  	remap_array(rlims,	  rlims_nr, rlims_cpos);
>  	remap_array(helpers,	  n_helpers, helpers_pos);
> +	remap_array(zombies,	  n_zombies, zombies_pos);
>  
>  #undef remap_array
>  
> @@ -3005,7 +3020,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
>  	 * Now prepare run-time data for threads restore.
>  	 */
>  	task_args->nr_threads		= current->nr_threads;
> -	task_args->nr_zombies		= rsti(current)->nr_zombies;
>  	task_args->clone_restore_fn	= (void *)restore_thread_exec_start;
>  	task_args->thread_args		= thread_args;
>  
> diff --git a/include/restorer.h b/include/restorer.h
> index dcfa175..97a012c 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -104,7 +104,6 @@ struct task_restore_args {
>  
>  	/* threads restoration */
>  	int				nr_threads;		/* number of threads */
> -	int				nr_zombies;
>  	thread_restore_fcall_t		clone_restore_fn;	/* helper address for clone() call */
>  	struct thread_restore_args	*thread_args;		/* array of thread arguments */
>  	struct task_entries		*task_entries;
> @@ -135,6 +134,9 @@ struct task_restore_args {
>  
>  	pid_t				*helpers /* the TASK_HELPERS to wait on at the end of restore */;
>  	unsigned int			helpers_n;
> +
> +	pid_t				*zombies;
> +	unsigned int			zombies_n;
>  	/* * * * * * * * * * * * * * * * * * * * */
>  
>  	unsigned long			premmapped_addr;
> diff --git a/include/rst_info.h b/include/rst_info.h
> index 5fab162..0e8dc97 100644
> --- a/include/rst_info.h
> +++ b/include/rst_info.h
> @@ -9,7 +9,6 @@ struct task_entries {
>  	int nr_threads, nr_tasks, nr_helpers;
>  	futex_t nr_in_progress;
>  	futex_t start;
> -	mutex_t	zombie_lock;
>  	atomic_t cr_err;
>  	mutex_t userns_sync_lock;
>  };
> diff --git a/pie/restorer.c b/pie/restorer.c
> index fa336fc..20439d6 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -51,10 +51,11 @@
>  
>  static struct task_entries *task_entries;
>  static futex_t thread_inprogress;
> -static futex_t zombies_inprogress;
>  static int cap_last_cap;
>  static pid_t *helpers;
>  static int n_helpers;
> +static pid_t *zombies;
> +static int n_zombies;
>  
>  extern void cr_restore_rt (void) asm ("__cr_restore_rt")
>  			__attribute__ ((visibility ("hidden")));
> @@ -70,16 +71,9 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
>  		if (siginfo->si_pid == helpers[i])
>  			return;
>  
> -	if (futex_get(&task_entries->start) == CR_STATE_RESTORE_SIGCHLD) {
> -		pr_debug("%ld: Collect a zombie with (pid %d, %d)\n",
> -			sys_getpid(), siginfo->si_pid, siginfo->si_pid);
> -		futex_dec_and_wake(&task_entries->nr_in_progress);
> -		futex_dec_and_wake(&zombies_inprogress);
> -		task_entries->nr_threads--;
> -		task_entries->nr_tasks--;
> -		mutex_unlock(&task_entries->zombie_lock);
> -		return;
> -	}
> +	for (i = 0; i < n_zombies; i++)
> +		if (siginfo->si_pid == zombies[i])
> +			return;
>  
>  	if (siginfo->si_code & CLD_EXITED)
>  		r = " exited, status=";
> @@ -805,6 +799,25 @@ static int wait_helpers(struct task_restore_args *task_args)
>  	return 0;
>  }
>  
> +static int wait_zombies(struct task_restore_args *task_args)
> +{
> +	int i;
> +
> +	for (i = 0; i < task_args->zombies_n; i++) {
> +		if (sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL) < 0) {
> +			pr_err("Wait on %d zombie failed\n", task_args->zombies[i]);
> +			return -1;
> +		}
> +		pr_debug("%ld: Collect a zombie with pid %d\n",
> +			sys_getpid(), task_args->zombies[i]);
> +		task_entries->nr_threads--;
> +		task_entries->nr_tasks--;
> +		futex_dec_and_wake(&task_entries->nr_in_progress);
> +	}

I think we don't need to decriment counters in the loop:
	task_entries->nr_threads -= task_args->zombies_n;
	task_entries->nr_tasks   -= task_args->zombies_n;
> +
> +	return 0;
> +}
> +
>  /*
>   * The main routine to restore task via sigreturn.
>   * This one is very special, we never return there
> @@ -836,6 +849,8 @@ long __export_restore_task(struct task_restore_args *args)
>  	task_entries = args->task_entries;
>  	helpers = args->helpers;
>  	n_helpers = args->helpers_n;
> +	zombies = args->zombies;
> +	n_zombies = args->zombies_n;
>  	*args->breakpoint = rst_sigreturn;
>  
>  	ksigfillset(&act.rt_sa_mask);
> @@ -1190,11 +1205,10 @@ long __export_restore_task(struct task_restore_args *args)
>  
>  	pr_info("%ld: Restored\n", sys_getpid());
>  
> -	futex_set(&zombies_inprogress, args->nr_zombies);
> -

We can block SIGCHLD here

>  	restore_finish_stage(CR_STATE_RESTORE);
>  
> -	futex_wait_while_gt(&zombies_inprogress, 0);
> +	if (wait_zombies(args) < 0)
> +		goto core_restore_end;
>  
>  	if (wait_helpers(args) < 0)
>  		goto core_restore_end;

and unblock SIGCHLD back here

In thise case, sigchld_handler will be executed only once for all
helpers and zombies. What do you think about this?

> -- 
> 2.1.4
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list