[CRIU] [RFC][PATCH 3/2] criu: Restore tasks as siblings in swrk

Pavel Emelyanov xemul at parallels.com
Mon Jun 30 09:30:44 PDT 2014


Andrey validly pointed out, that restoring pdeath_sig is not
compatible with criu_restore_child() call -- after criu restore
children, it will exit and fire the pdeath_sig into restored 
tree root, potentially killing it.

The fix for that could be -- when started in swrk more, criu can
restore tree not as children tasks, but as siblings, using the
CLONE_PARENT flag when fork()-ing the root task.

With this we should also take care about errors handing -- right
now criu catches the SIGCHILD from dying children tasks, and 
since we plan to create them be children of the criu parent (the 
library caller) we will not be able to catch them. To do so we 
SEIZE the root task in advance thus causing all SIGCHLD-s go to 
criu, not to its parent.

Having this done we no longer need the SUBREAPER trick in the
library call -- tasks get restored right as callers kids :)


Some thoughts for future -- using this trick we can finally make
"natural" restoration of shell jobs. I.e. -- make criu restore
some subtree right under bash, w/o leaving itself as intermediate
task and w/o re-parenting the subtree to init after restore.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>

---

diff --git a/cr-restore.c b/cr-restore.c
index 573b989..54f0d34 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1374,7 +1374,7 @@ static int restore_switch_stage(int next_stage)
 	return restore_wait_inprogress_tasks();
 }
 
-static int attach_to_tasks()
+static int attach_to_tasks(bool root_seized)
 {
 	struct pstree_item *item;
 
@@ -1394,9 +1394,16 @@ static int attach_to_tasks()
 		for (i = 0; i < item->nr_threads; i++) {
 			pid = item->threads[i].real;
 
-			if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
-				pr_perror("Can't attach to %d", pid);
-				return -1;
+			if (item != root_item || !root_seized) {
+				if (ptrace(PTRACE_ATTACH, pid, 0, 0)) {
+					pr_perror("Can't attach to %d", pid);
+					return -1;
+				}
+			} else {
+				if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
+					pr_perror("Can't interrupt task");
+					return -1;
+				}
 			}
 
 			if (wait4(pid, &status, __WALL, NULL) != pid) {
@@ -1513,10 +1520,27 @@ static int restore_root_task(struct pstree_item *init)
 	futex_set(&task_entries->nr_in_progress,
 			stage_participants(CR_STATE_RESTORE_NS));
 
+	if (opts.swrk_restore)
+		init->rst->clone_flags |= CLONE_PARENT;
+
 	ret = fork_with_pid(init);
 	if (ret < 0)
 		return -1;
 
+	if (opts.swrk_restore) {
+		act.sa_flags &= ~SA_NOCLDSTOP;
+		ret = sigaction(SIGCHLD, &act, NULL);
+		if (ret < 0) {
+			pr_perror("sigaction() failed");
+			goto out;
+		}
+
+		if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) {
+			pr_perror("Can't attach to init");
+			goto out;
+		}
+	}
+
 	pr_info("Wait until namespaces are created\n");
 	ret = restore_wait_inprogress_tasks();
 	if (ret)
@@ -1570,7 +1594,7 @@ static int restore_root_task(struct pstree_item *init)
 
 	timing_stop(TIME_RESTORE);
 
-	ret = attach_to_tasks();
+	ret = attach_to_tasks(opts.swrk_restore);
 
 	pr_info("Restore finished successfully. Resuming tasks.\n");
 	futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
diff --git a/crtools.c b/crtools.c
index c3a2e27..b662fff 100644
--- a/crtools.c
+++ b/crtools.c
@@ -183,14 +183,16 @@ int main(int argc, char *argv[])
 	if (init_service_fd())
 		return 1;
 
-	if (!strcmp(argv[1], "swrk"))
+	if (!strcmp(argv[1], "swrk")) {
 		/*
 		 * This is to start criu service worker from libcriu calls.
 		 * The usage is "criu swrk <fd>" and is not for CLI/scripts.
 		 * The arguments semantics can change at any tyme with the
 		 * corresponding lib call change.
 		 */
+		opts.swrk_restore = true;
 		return cr_service_work(atoi(argv[2]));
+	}
 
 	while (1) {
 		idx = -1;
diff --git a/include/cr_options.h b/include/cr_options.h
index 2732e58..55ca70b 100644
--- a/include/cr_options.h
+++ b/include/cr_options.h
@@ -34,6 +34,7 @@ struct cr_options {
 	bool			link_remap_ok;
 	unsigned int		rst_namespaces_flags;
 	bool			log_file_per_pid;
+	bool			swrk_restore;
 	char			*output;
 	char			*root;
 	char			*pidfile;
diff --git a/lib/criu.c b/lib/criu.c
index 86256fd..7c1ac07 100644
--- a/lib/criu.c
+++ b/lib/criu.c
@@ -15,10 +15,6 @@
 #include "rpc.pb-c.h"
 #include "cr-service-const.h"
 
-#ifndef PR_SET_CHILD_SUBREAPER
-#define PR_SET_CHILD_SUBREAPER 36
-#endif
-
 const char *criu_lib_version = CRIU_VERSION;
 
 static char *service_address = CR_DEFAULT_SERVICE_ADDRESS;
@@ -582,15 +578,6 @@ int criu_restore_child(void)
 	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sks))
 		goto out;
 
-	/*
-	 * Set us as child subreaper so that after the swrk
-	 * finishes restore and exits the restored subtree
-	 * gets reparented to us.
-	 */
-
-	if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0))
-		goto err;
-
 	pid = fork();
 	if (pid < 0)
 		goto err;
@@ -633,8 +620,6 @@ int criu_restore_child(void)
 
 	close(sks[0]);
 	waitpid(pid, NULL, 0);
-	/* Drop the subreaper role _after_ swrk exits */
-	prctl(PR_SET_CHILD_SUBREAPER, 0, 0, 0);
 
 	if (!ret) {
 		ret = resp->success ? resp->restore->pid : -EBADE;

---


More information about the CRIU mailing list