[CRIU] [RFC][PATCH 3/2] criu: Restore tasks as siblings in swrk

Andrey Vagin avagin at parallels.com
Tue Jul 1 01:32:01 PDT 2014


On Mon, Jun 30, 2014 at 08:30:44PM +0400, Pavel Emelyanov wrote:
> Andrey validly pointed out, that restoring pdeath_sig is not
> compatible with criu_restore_child() call -- after criu restore
> children, it will exit and fire the pdeath_sig into restored 
> tree root, potentially killing it.
> 
> The fix for that could be -- when started in swrk more, criu can
> restore tree not as children tasks, but as siblings, using the
> CLONE_PARENT flag when fork()-ing the root task.
> 
> With this we should also take care about errors handing -- right
> now criu catches the SIGCHILD from dying children tasks, and 
> since we plan to create them be children of the criu parent (the 
> library caller) we will not be able to catch them. To do so we 
> SEIZE the root task in advance thus causing all SIGCHLD-s go to 
> criu, not to its parent.
> 
> Having this done we no longer need the SUBREAPER trick in the
> library call -- tasks get restored right as callers kids :)
> 
> 
> Some thoughts for future -- using this trick we can finally make
> "natural" restoration of shell jobs. I.e. -- make criu restore
> some subtree right under bash, w/o leaving itself as intermediate
> task and w/o re-parenting the subtree to init after restore.
> 
> Signed-off-by: Pavel Emelyanov <xemul at parallels.com>

Acked-by: Andrey Vagin <avagin at parallels.com>

> 
> ---
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 573b989..54f0d34 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1374,7 +1374,7 @@ static int restore_switch_stage(int next_stage)
>  	return restore_wait_inprogress_tasks();
>  }
>  
> -static int attach_to_tasks()
> +static int attach_to_tasks(bool root_seized)
>  {
>  	struct pstree_item *item;
>  
> @@ -1394,9 +1394,16 @@ static int attach_to_tasks()
>  		for (i = 0; i < item->nr_threads; i++) {
>  			pid = item->threads[i].real;
>  
> -			if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
> -				pr_perror("Can't attach to %d", pid);
> -				return -1;
> +			if (item != root_item || !root_seized) {
> +				if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
> +					pr_perror("Can't attach to %d", pid);
> +					return -1;
> +				}
> +			} else {
> +				if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
> +					pr_perror("Can't interrupt task");
> +					return -1;
> +				}
>  			}
>  
>  			if (wait4(pid, &status, __WALL, NULL) != pid) {
> @@ -1513,10 +1520,27 @@ static int restore_root_task(struct pstree_item *init)
>  	futex_set(&task_entries->nr_in_progress,
>  			stage_participants(CR_STATE_RESTORE_NS));
>  
> +	if (opts.swrk_restore)
> +		init->rst->clone_flags |= CLONE_PARENT;
> +
>  	ret = fork_with_pid(init);
>  	if (ret < 0)
>  		return -1;
>  
> +	if (opts.swrk_restore) {
> +		act.sa_flags &= ~SA_NOCLDSTOP;
> +		ret = sigaction(SIGCHLD, &act, NULL);
> +		if (ret < 0) {
> +			pr_perror("sigaction() failed");
> +			goto out;
> +		}
> +
> +		if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) {
> +			pr_perror("Can't attach to init");
> +			goto out;
> +		}
> +	}
> +
>  	pr_info("Wait until namespaces are created\n");
>  	ret = restore_wait_inprogress_tasks();
>  	if (ret)
> @@ -1570,7 +1594,7 @@ static int restore_root_task(struct pstree_item *init)
>  
>  	timing_stop(TIME_RESTORE);
>  
> -	ret = attach_to_tasks();
> +	ret = attach_to_tasks(opts.swrk_restore);
>  
>  	pr_info("Restore finished successfully. Resuming tasks.\n");
>  	futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
> diff --git a/crtools.c b/crtools.c
> index c3a2e27..b662fff 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -183,14 +183,16 @@ int main(int argc, char *argv[])
>  	if (init_service_fd())
>  		return 1;
>  
> -	if (!strcmp(argv[1], "swrk"))
> +	if (!strcmp(argv[1], "swrk")) {
>  		/*
>  		 * This is to start criu service worker from libcriu calls.
>  		 * The usage is "criu swrk <fd>" and is not for CLI/scripts.
>  		 * The arguments semantics can change at any tyme with the
>  		 * corresponding lib call change.
>  		 */
> +		opts.swrk_restore = true;
>  		return cr_service_work(atoi(argv[2]));
> +	}
>  
>  	while (1) {
>  		idx = -1;
> diff --git a/include/cr_options.h b/include/cr_options.h
> index 2732e58..55ca70b 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -34,6 +34,7 @@ struct cr_options {
>  	bool			link_remap_ok;
>  	unsigned int		rst_namespaces_flags;
>  	bool			log_file_per_pid;
> +	bool			swrk_restore;
>  	char			*output;
>  	char			*root;
>  	char			*pidfile;
> diff --git a/lib/criu.c b/lib/criu.c
> index 86256fd..7c1ac07 100644
> --- a/lib/criu.c
> +++ b/lib/criu.c
> @@ -15,10 +15,6 @@
>  #include "rpc.pb-c.h"
>  #include "cr-service-const.h"
>  
> -#ifndef PR_SET_CHILD_SUBREAPER
> -#define PR_SET_CHILD_SUBREAPER 36
> -#endif
> -
>  const char *criu_lib_version = CRIU_VERSION;
>  
>  static char *service_address = CR_DEFAULT_SERVICE_ADDRESS;
> @@ -582,15 +578,6 @@ int criu_restore_child(void)
>  	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sks))
>  		goto out;
>  
> -	/*
> -	 * Set us as child subreaper so that after the swrk
> -	 * finishes restore and exits the restored subtree
> -	 * gets reparented to us.
> -	 */
> -
> -	if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0))
> -		goto err;
> -
>  	pid = fork();
>  	if (pid < 0)
>  		goto err;
> @@ -633,8 +620,6 @@ int criu_restore_child(void)
>  
>  	close(sks[0]);
>  	waitpid(pid, NULL, 0);
> -	/* Drop the subreaper role _after_ swrk exits */
> -	prctl(PR_SET_CHILD_SUBREAPER, 0, 0, 0);
>  
>  	if (!ret) {
>  		ret = resp->success ? resp->restore->pid : -EBADE;
> 
> ---


More information about the CRIU mailing list