[PATCH] restore: Set CLONE_PARENT iif pdeath_sig is present

Cyrill Gorcunov gorcunov at openvz.org
Thu Aug 14 08:34:30 PDT 2014


It's been discovered that on 3.11 we might fail on restore
if pass @CLONE_PARENT flag into clone() call due to kernel
limitations.

Because we're treating 3.11 as a base working kernel lets
do a trick instead

 - setup this flag iif pdeath_sig is present
 - if CLONE_NEWPID is passed warn a user about
   potential consequences.
 - because we need to carry the condition in attach_to_tasks
   call, introduce @root_as_sibling variable for this.

CC: Tycho Andersen <tycho.andersen at canonical.com>
CC: Pavel Emelyanov <xemul at parallels.com>
CC: Andrey Vagin <avagin at openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 cr-restore.c | 50 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index bd16b1d81ca7..4562f8c197dc 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -93,6 +93,8 @@ static int prepare_rlimits(int pid, CoreEntry *core);
 static int prepare_posix_timers(int pid, CoreEntry *core);
 static int prepare_signals(int pid);
 
+static int root_as_sibling;
+
 static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
 {
 	void *ret;
@@ -922,6 +924,34 @@ struct cr_clone_arg {
 	CoreEntry *core;
 };
 
+static void maybe_clone_parent(struct pstree_item *item,
+			      struct cr_clone_arg *ca)
+{
+	if (opts.swrk_restore ||
+	    (opts.restore_detach && ca->core->thread_core->pdeath_sig)) {
+		/*
+		 * This means we're called from lib's criu_restore_child().
+		 * In that case create the root task as the child one to+
+		 * the caller. This is the only way to correctly restore the
+		 * pdeath_sig of the root task. But also looks nice.
+		 *
+		 * Alternatively, if we are --restore-detached, a similar trick is
+		 * needed to correctly restore pdeath_sig and prevent processes from
+		 * dying once restored.
+		 *
+		 * There were a problem in kernel 3.11 -- CLONE_PARENT can't be
+		 * set together with CLONE_NEWPID, which has been solved in further
+		 * versions of the kernels, but we treat 3.11 as a base, so at
+		 * least warn a user about potential problems.
+		 */
+		item->rst->clone_flags |= CLONE_PARENT;
+		root_as_sibling = 1;
+		if (item->rst->clone_flags & CLONE_NEWPID)
+			pr_warn("Set CLONE_PARENT | CLONE_NEWPID but it might cause restore problem,"
+				"because not all kernels support such clone flags combinations!\n");
+	}
+}
+
 static inline int fork_with_pid(struct pstree_item *item)
 {
 	int ret = -1, fd;
@@ -951,6 +981,9 @@ static inline int fork_with_pid(struct pstree_item *item)
 			pr_err("Unknown task state %d\n", item->state);
 			return -1;
 		}
+
+		if (unlikely(item->pid.virt == INIT_PID))
+			maybe_clone_parent(item, &ca);
 	} else {
 		/*
 		 * Helper entry will not get moved around and thus
@@ -1100,7 +1133,7 @@ static int criu_signals_setup(void)
 	}
 
 	act.sa_flags |= SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
-	if (opts.swrk_restore || opts.restore_detach)
+	if (root_as_sibling)
 		/*
 		 * Root task will be our sibling. This means, that
 		 * we will not notice when (if) it dies in SIGCHLD
@@ -1587,19 +1620,6 @@ static int restore_root_task(struct pstree_item *init)
 	futex_set(&task_entries->nr_in_progress,
 			stage_participants(CR_STATE_RESTORE_NS));
 
-	/*
-	 * This means we're called from lib's criu_restore_child().
-	 * In that case create the root task as the child one to+
-	 * the caller. This is the only way to correctly restore the
-	 * pdeath_sig of the root task. But also looks nice.
-	 *
-	 * Alternatively, if we are --restore-detached, a similar trick is
-	 * needed to correctly restore pdeath_sig and prevent processes from
-	 * dying once restored.
-	 */
-	if (opts.swrk_restore || opts.restore_detach)
-		init->rst->clone_flags |= CLONE_PARENT;
-
 	ret = fork_with_pid(init);
 	if (ret < 0)
 		return -1;
@@ -1663,7 +1683,7 @@ static int restore_root_task(struct pstree_item *init)
 
 	timing_stop(TIME_RESTORE);
 
-	ret = attach_to_tasks(opts.swrk_restore);
+	ret = attach_to_tasks(root_as_sibling);
 
 	pr_info("Restore finished successfully. Resuming tasks.\n");
 	futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
-- 
1.9.3


--AkbCVLjbJ9qUtAXD--


More information about the CRIU mailing list