[CRIU] [PATCH v3 5/5] restore: handle the case where zombies are reparented

Pavel Emelyanov xemul at virtuozzo.com
Fri Jul 22 04:19:36 PDT 2016


On 07/13/2016 06:10 PM, Tycho Andersen wrote:
> For example, if a zombie has a helper that sets up its session id, the
> zombie will be reparented to the init task, which will then potentially get
> a SIGCHLD for a task which isn't its direct child zombie, which we didn't
> handle. Instead, let's recursively find all the zombies for the init task,
> in case they get reparented this way.
> 
> This commit is a little ugly: we are now passing three lists of pids
> (helpers, zombies, and things that are allowed to die) to the restorer blob
> for each task. We do need to have some way to distinguish, because:
> 
> * we want to wait on helpers
> * we don't want to wait on zombies (i.e. we want to keep the pid dead), but
>   confirm that it actually died
> * we don't want to wait on anything that gets reparented, but we don't want
>   to error out if we get a SIGCHLD for it
> 
> We could introduce a new struct:
> 
> struct pid_info {
>   pid_t pid;
>   bool  zombie;
>   bool  direct_child;
> }

Let's try to go this route, but slightly ... modified.

struct restorer_pid_info {
	pid_t pid;
	unsigned type;
}

The type would be ZOMBIE, HELPER or OTHER.

Next, instead of calling collect_zombie_pids, collect_helper_pids and
collect_allowed_to_die_pids, let's call one collect_restorer_pids()
that would fill the array of restorer_pid_info-s with types and, if
the caller is root_item, would dive into recursion only collecting for
TASK_DEAD entries.

In the restorer itself we would need to merge wait_zombies() and
wait_helpers() to only walk the list once and tune the sigchild helper
to allow for sigchilds from any pid in the array.

What do you think?

-- Pavel

> to handle this instead of the three separate lists. I'm not sure which is
> cleaner, but I'd be happy to refactor to the other way if that's better.
> 
> v2: only the zombies need to be recursively collected, helpers wait on
>     their children before they exit and will never be reparented
> 
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  criu/cr-restore.c       | 31 ++++++++++++++++++++++++++-----
>  criu/include/restorer.h |  3 +++
>  criu/pie/restorer.c     | 20 ++++++--------------
>  3 files changed, 35 insertions(+), 19 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 977e049..d8e4cf1 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -487,14 +487,16 @@ static int prepare_sigactions(void)
>  	return ret;
>  }
>  
> -static int collect_child_pids(int state, unsigned int *n)
> +static int collect_child_pids(struct pstree_item *root, int state, unsigned int *n, bool recurse)
>  {
>  	struct pstree_item *pi;
>  
> -	*n = 0;
> -	list_for_each_entry(pi, &current->children, sibling) {
> +	list_for_each_entry(pi, &root->children, sibling) {
>  		pid_t *child;
>  
> +		if (recurse && collect_child_pids(pi, state, n, recurse) < 0)
> +			return -1;
> +
>  		if (pi->pid.state != state)
>  			continue;
>  
> @@ -512,13 +514,28 @@ static int collect_child_pids(int state, unsigned int *n)
>  static int collect_helper_pids(struct task_restore_args *ta)
>  {
>  	ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
> -	return collect_child_pids(TASK_HELPER, &ta->helpers_n);
> +	ta->helpers_n = 0;
> +	return collect_child_pids(current, TASK_HELPER, &ta->helpers_n, false);
>  }
>  
>  static int collect_zombie_pids(struct task_restore_args *ta)
>  {
>  	ta->zombies = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
> -	return collect_child_pids(TASK_DEAD, &ta->zombies_n);
> +	ta->zombies_n = 0;
> +	return collect_child_pids(current, TASK_DEAD, &ta->zombies_n, false);
> +}
> +
> +static int collect_allowed_to_die_pids(struct task_restore_args *ta)
> +{
> +	ta->allowed_to_die = (pid_t *)rst_mem_align_cpos(RM_PRIVATE);
> +	ta->allowed_to_die_n = 0;
> +
> +	if (collect_child_pids(current, TASK_DEAD, &ta->allowed_to_die_n, current == root_item) < 0)
> +		return -1;
> +	if (collect_child_pids(current, TASK_HELPER, &ta->allowed_to_die_n, false) < 0)
> +		return -1;
> +
> +	return 0;
>  }
>  
>  static int open_core(int pid, CoreEntry **pcore)
> @@ -655,6 +672,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
>  	if (collect_zombie_pids(ta) < 0)
>  		return -1;
>  
> +	if (collect_allowed_to_die_pids(ta) < 0)
> +		return -1;
> +
>  	if (inherit_fd_fini() < 0)
>  		return -1;
>  
> @@ -3003,6 +3023,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
>  	RST_MEM_FIXUP_PPTR(task_args->helpers);
>  	RST_MEM_FIXUP_PPTR(task_args->zombies);
>  	RST_MEM_FIXUP_PPTR(task_args->seccomp_filters);
> +	RST_MEM_FIXUP_PPTR(task_args->allowed_to_die);
>  
>  	if (core->tc->has_seccomp_mode)
>  		task_args->seccomp_mode = core->tc->seccomp_mode;
> diff --git a/criu/include/restorer.h b/criu/include/restorer.h
> index 7c0336e..ac50bf0 100644
> --- a/criu/include/restorer.h
> +++ b/criu/include/restorer.h
> @@ -149,6 +149,9 @@ struct task_restore_args {
>  	struct sock_fprog		*seccomp_filters;
>  	unsigned int			seccomp_filters_n;
>  
> +	pid_t				*allowed_to_die;
> +	unsigned int			allowed_to_die_n;
> +
>  	/* * * * * * * * * * * * * * * * * * * * */
>  
>  	unsigned long			task_size;
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index 424ccf3..d3a24d8 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -57,10 +57,8 @@
>  
>  static struct task_entries *task_entries;
>  static futex_t thread_inprogress;
> -static pid_t *helpers;
> -static int n_helpers;
> -static pid_t *zombies;
> -static int n_zombies;
> +static pid_t *allowed_to_die;
> +static int n_allowed_to_die;
>  
>  extern void cr_restore_rt (void) asm ("__cr_restore_rt")
>  			__attribute__ ((visibility ("hidden")));
> @@ -72,12 +70,8 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
>  
>  	/* We can ignore helpers that die, we expect them to after
>  	 * CR_STATE_RESTORE is finished. */
> -	for (i = 0; i < n_helpers; i++)
> -		if (siginfo->si_pid == helpers[i])
> -			return;
> -
> -	for (i = 0; i < n_zombies; i++)
> -		if (siginfo->si_pid == zombies[i])
> +	for (i = 0; i < n_allowed_to_die; i++)
> +		if (siginfo->si_pid == allowed_to_die[i])
>  			return;
>  
>  	if (siginfo->si_code & CLD_EXITED)
> @@ -1106,10 +1100,8 @@ long __export_restore_task(struct task_restore_args *args)
>  #endif
>  
>  	task_entries = args->task_entries;
> -	helpers = args->helpers;
> -	n_helpers = args->helpers_n;
> -	zombies = args->zombies;
> -	n_zombies = args->zombies_n;
> +	allowed_to_die = args->allowed_to_die;
> +	n_allowed_to_die = args->allowed_to_die_n;
>  	*args->breakpoint = rst_sigreturn;
>  
>  	ksigfillset(&act.rt_sa_mask);
> 



More information about the CRIU mailing list