[CRIU] [PATCH] restore: Set CLONE_PARENT iif pdeath_sig is present, v4

Andrew Vagin avagin at parallels.com
Thu Aug 14 13:00:15 PDT 2014


On Thu, Aug 14, 2014 at 10:53:33PM +0400, Cyrill Gorcunov wrote:
> On Thu, Aug 14, 2014 at 10:46:06PM +0400, Pavel Emelyanov wrote:
> > > 
> > > yes, my bad. sorry for delay, has been testing update. attached.
> > 
> > Let's do v4, OK?
> 
> Hope this one is what expected.

> From a3b4879c75e32f2469e3e17b3ccbb649c0244970 Mon Sep 17 00:00:00 2001
> From: Cyrill Gorcunov <gorcunov at openvz.org>
> Date: Thu, 14 Aug 2014 19:34:30 +0400
> Subject: [PATCH] restore: Set CLONE_PARENT iif pdeath_sig is present, v4
> 
> It's been discovered that on 3.11 we might fail on restore
> if pass @CLONE_PARENT flag into clone() call due to kernel
> limitations.
> 
> Because we're treating 3.11 as a base working kernel lets
> do a trick instead
> 
>  - setup this flag iif pdeath_sig is present
>  - if CLONE_NEWPID is passed warn a user about
>    potential consequences.
>  - because we need to carry the condition in attach_to_tasks
>    call, introduce @root_as_sibling variable for this.
> 
> CC: Tycho Andersen <tycho.andersen at canonical.com>
> CC: Pavel Emelyanov <xemul at parallels.com>
> CC: Andrey Vagin <avagin at openvz.org>

Acked-by: Andrey Vagin <avagin at openvz.org>

> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  cr-restore.c | 52 ++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index bd16b1d81ca7..cf7f6e1df1c5 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -93,6 +93,8 @@ static int prepare_rlimits(int pid, CoreEntry *core);
>  static int prepare_posix_timers(int pid, CoreEntry *core);
>  static int prepare_signals(int pid);
>  
> +static int root_as_sibling;
> +
>  static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
>  {
>  	void *ret;
> @@ -922,6 +924,34 @@ struct cr_clone_arg {
>  	CoreEntry *core;
>  };
>  
> +static void maybe_clone_parent(struct pstree_item *item,
> +			      struct cr_clone_arg *ca)
> +{
> +	if (opts.swrk_restore ||
> +	    (opts.restore_detach && ca->core->thread_core->pdeath_sig)) {
> +		/*
> +		 * This means we're called from lib's criu_restore_child().
> +		 * In that case create the root task as the child one to+
> +		 * the caller. This is the only way to correctly restore the
> +		 * pdeath_sig of the root task. But also looks nice.
> +		 *
> +		 * Alternatively, if we are --restore-detached, a similar trick is
> +		 * needed to correctly restore pdeath_sig and prevent processes from
> +		 * dying once restored.
> +		 *
> +		 * There were a problem in kernel 3.11 -- CLONE_PARENT can't be
> +		 * set together with CLONE_NEWPID, which has been solved in further
> +		 * versions of the kernels, but we treat 3.11 as a base, so at
> +		 * least warn a user about potential problems.
> +		 */
> +		item->rst->clone_flags |= CLONE_PARENT;
> +		root_as_sibling = 1;
> +		if (item->rst->clone_flags & CLONE_NEWPID)
> +			pr_warn("Set CLONE_PARENT | CLONE_NEWPID but it might cause restore problem,"
> +				"because not all kernels support such clone flags combinations!\n");

We stops testing CLONE_PARENT again:(. I'll miss this day:)

> +	}
> +}
> +
>  static inline int fork_with_pid(struct pstree_item *item)
>  {
>  	int ret = -1, fd;
> @@ -951,6 +981,9 @@ static inline int fork_with_pid(struct pstree_item *item)
>  			pr_err("Unknown task state %d\n", item->state);
>  			return -1;
>  		}
> +
> +		if (unlikely(item == root_item))
> +			maybe_clone_parent(item, &ca);
>  	} else {
>  		/*
>  		 * Helper entry will not get moved around and thus
> @@ -1100,7 +1133,7 @@ static int criu_signals_setup(void)
>  	}
>  
>  	act.sa_flags |= SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
> -	if (opts.swrk_restore || opts.restore_detach)
> +	if (root_as_sibling)
>  		/*
>  		 * Root task will be our sibling. This means, that
>  		 * we will not notice when (if) it dies in SIGCHLD
> @@ -1587,24 +1620,11 @@ static int restore_root_task(struct pstree_item *init)
>  	futex_set(&task_entries->nr_in_progress,
>  			stage_participants(CR_STATE_RESTORE_NS));
>  
> -	/*
> -	 * This means we're called from lib's criu_restore_child().
> -	 * In that case create the root task as the child one to+
> -	 * the caller. This is the only way to correctly restore the
> -	 * pdeath_sig of the root task. But also looks nice.
> -	 *
> -	 * Alternatively, if we are --restore-detached, a similar trick is
> -	 * needed to correctly restore pdeath_sig and prevent processes from
> -	 * dying once restored.
> -	 */
> -	if (opts.swrk_restore || opts.restore_detach)
> -		init->rst->clone_flags |= CLONE_PARENT;
> -
>  	ret = fork_with_pid(init);
>  	if (ret < 0)
>  		return -1;
>  
> -	if (opts.swrk_restore || opts.restore_detach) {
> +	if (root_as_sibling) {
>  		if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) {
>  			pr_perror("Can't attach to init");
>  			goto out;
> @@ -1663,7 +1683,7 @@ static int restore_root_task(struct pstree_item *init)
>  
>  	timing_stop(TIME_RESTORE);
>  
> -	ret = attach_to_tasks(opts.swrk_restore);
> +	ret = attach_to_tasks(root_as_sibling);
>  
>  	pr_info("Restore finished successfully. Resuming tasks.\n");
>  	futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
> -- 
> 1.9.3
> 



More information about the CRIU mailing list