[CRIU] [RFC][PATCH 3/2] criu: Restore tasks as siblings in swrk
Andrey Vagin
avagin at parallels.com
Tue Jul 1 01:32:01 PDT 2014
On Mon, Jun 30, 2014 at 08:30:44PM +0400, Pavel Emelyanov wrote:
> Andrey validly pointed out, that restoring pdeath_sig is not
> compatible with criu_restore_child() call -- after criu restore
> children, it will exit and fire the pdeath_sig into restored
> tree root, potentially killing it.
>
> The fix for that could be -- when started in swrk more, criu can
> restore tree not as children tasks, but as siblings, using the
> CLONE_PARENT flag when fork()-ing the root task.
>
> With this we should also take care about errors handing -- right
> now criu catches the SIGCHILD from dying children tasks, and
> since we plan to create them be children of the criu parent (the
> library caller) we will not be able to catch them. To do so we
> SEIZE the root task in advance thus causing all SIGCHLD-s go to
> criu, not to its parent.
>
> Having this done we no longer need the SUBREAPER trick in the
> library call -- tasks get restored right as callers kids :)
>
>
> Some thoughts for future -- using this trick we can finally make
> "natural" restoration of shell jobs. I.e. -- make criu restore
> some subtree right under bash, w/o leaving itself as intermediate
> task and w/o re-parenting the subtree to init after restore.
>
> Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
Acked-by: Andrey Vagin <avagin at parallels.com>
>
> ---
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 573b989..54f0d34 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1374,7 +1374,7 @@ static int restore_switch_stage(int next_stage)
> return restore_wait_inprogress_tasks();
> }
>
> -static int attach_to_tasks()
> +static int attach_to_tasks(bool root_seized)
> {
> struct pstree_item *item;
>
> @@ -1394,9 +1394,16 @@ static int attach_to_tasks()
> for (i = 0; i < item->nr_threads; i++) {
> pid = item->threads[i].real;
>
> - if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
> - pr_perror("Can't attach to %d", pid);
> - return -1;
> + if (item != root_item || !root_seized) {
> + if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
> + pr_perror("Can't attach to %d", pid);
> + return -1;
> + }
> + } else {
> + if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
> + pr_perror("Can't interrupt task");
> + return -1;
> + }
> }
>
> if (wait4(pid, &status, __WALL, NULL) != pid) {
> @@ -1513,10 +1520,27 @@ static int restore_root_task(struct pstree_item *init)
> futex_set(&task_entries->nr_in_progress,
> stage_participants(CR_STATE_RESTORE_NS));
>
> + if (opts.swrk_restore)
> + init->rst->clone_flags |= CLONE_PARENT;
> +
> ret = fork_with_pid(init);
> if (ret < 0)
> return -1;
>
> + if (opts.swrk_restore) {
> + act.sa_flags &= ~SA_NOCLDSTOP;
> + ret = sigaction(SIGCHLD, &act, NULL);
> + if (ret < 0) {
> + pr_perror("sigaction() failed");
> + goto out;
> + }
> +
> + if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) {
> + pr_perror("Can't attach to init");
> + goto out;
> + }
> + }
> +
> pr_info("Wait until namespaces are created\n");
> ret = restore_wait_inprogress_tasks();
> if (ret)
> @@ -1570,7 +1594,7 @@ static int restore_root_task(struct pstree_item *init)
>
> timing_stop(TIME_RESTORE);
>
> - ret = attach_to_tasks();
> + ret = attach_to_tasks(opts.swrk_restore);
>
> pr_info("Restore finished successfully. Resuming tasks.\n");
> futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
> diff --git a/crtools.c b/crtools.c
> index c3a2e27..b662fff 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -183,14 +183,16 @@ int main(int argc, char *argv[])
> if (init_service_fd())
> return 1;
>
> - if (!strcmp(argv[1], "swrk"))
> + if (!strcmp(argv[1], "swrk")) {
> /*
> * This is to start criu service worker from libcriu calls.
> * The usage is "criu swrk <fd>" and is not for CLI/scripts.
> * The arguments semantics can change at any tyme with the
> * corresponding lib call change.
> */
> + opts.swrk_restore = true;
> return cr_service_work(atoi(argv[2]));
> + }
>
> while (1) {
> idx = -1;
> diff --git a/include/cr_options.h b/include/cr_options.h
> index 2732e58..55ca70b 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -34,6 +34,7 @@ struct cr_options {
> bool link_remap_ok;
> unsigned int rst_namespaces_flags;
> bool log_file_per_pid;
> + bool swrk_restore;
> char *output;
> char *root;
> char *pidfile;
> diff --git a/lib/criu.c b/lib/criu.c
> index 86256fd..7c1ac07 100644
> --- a/lib/criu.c
> +++ b/lib/criu.c
> @@ -15,10 +15,6 @@
> #include "rpc.pb-c.h"
> #include "cr-service-const.h"
>
> -#ifndef PR_SET_CHILD_SUBREAPER
> -#define PR_SET_CHILD_SUBREAPER 36
> -#endif
> -
> const char *criu_lib_version = CRIU_VERSION;
>
> static char *service_address = CR_DEFAULT_SERVICE_ADDRESS;
> @@ -582,15 +578,6 @@ int criu_restore_child(void)
> if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sks))
> goto out;
>
> - /*
> - * Set us as child subreaper so that after the swrk
> - * finishes restore and exits the restored subtree
> - * gets reparented to us.
> - */
> -
> - if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0))
> - goto err;
> -
> pid = fork();
> if (pid < 0)
> goto err;
> @@ -633,8 +620,6 @@ int criu_restore_child(void)
>
> close(sks[0]);
> waitpid(pid, NULL, 0);
> - /* Drop the subreaper role _after_ swrk exits */
> - prctl(PR_SET_CHILD_SUBREAPER, 0, 0, 0);
>
> if (!ret) {
> ret = resp->success ? resp->restore->pid : -EBADE;
>
> ---
More information about the CRIU
mailing list