[CRIU] [PATCH 2/2] restore: do not kill processes if not-all of them have been created

Andrey Vagin avagin at openvz.org
Fri Aug 16 10:46:56 EDT 2013


If processes are restored in a current pidns, criu knows pid-s from images
but a part of them can be busy by external processes.

CR_STATE_RESTORE_NS and CR_STATE_FORKING can be blocked only the
task_entries->start futex. This patch is added a new stage
CR_STATE_FAIL, which signs that something wrong and all processes must
exit.

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 cr-restore.c       | 24 +++++++++++++++++-------
 include/restorer.h |  8 +++++---
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index 4a59c91..b570753 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1147,7 +1147,8 @@ static int restore_task_with_children(void *_arg)
 		if (mount_proc())
 			exit(1);
 
-		restore_finish_stage(CR_STATE_RESTORE_NS);
+		if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
+			exit(1);
 
 		if (root_prepare_shared())
 			exit(1);
@@ -1190,7 +1191,8 @@ static int restore_task_with_children(void *_arg)
 	if (current->pgid == current->pid.virt)
 		restore_pgid();
 
-	restore_finish_stage(CR_STATE_FORKING);
+	if (restore_finish_stage(CR_STATE_FORKING) < 0)
+		exit(1);
 
 	if (current->pgid != current->pid.virt)
 		restore_pgid();
@@ -1206,6 +1208,8 @@ static int restore_task_with_children(void *_arg)
 static inline int stage_participants(int next_stage)
 {
 	switch (next_stage) {
+	case CR_STATE_FAIL:
+		return 0;
 	case CR_STATE_RESTORE_NS:
 		return 1;
 	case CR_STATE_FORKING:
@@ -1316,21 +1320,21 @@ static int restore_root_task(struct pstree_item *init)
 
 	ret = restore_switch_stage(CR_STATE_RESTORE_PGID);
 	if (ret < 0)
-		goto out;
+		goto out_kill;
 
 	ret = restore_switch_stage(CR_STATE_RESTORE);
 	if (ret < 0)
-		goto out;
+		goto out_kill;
 
 	ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
 	if (ret < 0)
-		goto out;
+		goto out_kill;
 
 	/* Restore SIGCHLD here to skip SIGCHLD from a network sctip */
 	ret = sigaction(SIGCHLD, &old_act, NULL);
 	if (ret < 0) {
 		pr_perror("sigaction() failed");
-		goto out;
+		goto out_kill;
 	}
 
 	/* Unlock network before disabling repair mode on sockets */
@@ -1356,7 +1360,11 @@ static int restore_root_task(struct pstree_item *init)
 
 	return 0;
 
-out:
+out_kill:
+	/*
+	 * The processes can be killed only when all of them have been created,
+	 * otherwise an external proccesses can be killed.
+	 */
 	if (current_ns_mask & CLONE_NEWPID) {
 		/* Kill init */
 		if (root_item->pid.real > 0)
@@ -1369,6 +1377,8 @@ out:
 				kill(pi->pid.virt, SIGKILL);
 	}
 
+out:
+	__restore_switch_stage(CR_STATE_FAIL);
 	pr_err("Restoring FAILED.\n");
 	return 1;
 }
diff --git a/include/restorer.h b/include/restorer.h
index 4375f9d..1ed1d81 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -180,7 +180,8 @@ struct shmems {
 #define TASK_ENTRIES_SIZE 4096
 
 enum {
-	CR_STATE_RESTORE_NS, /* is used for executing "setup-namespace" scripts */
+	CR_STATE_FAIL		= -1,
+	CR_STATE_RESTORE_NS	= 0, /* is used for executing "setup-namespace" scripts */
 	CR_STATE_FORKING,
 	CR_STATE_RESTORE_PGID,
 	CR_STATE_RESTORE,
@@ -217,10 +218,11 @@ find_shmem(struct shmems *shmems, unsigned long shmid)
 	return NULL;
 }
 
-#define restore_finish_stage(__stage) do {				\
+#define restore_finish_stage(__stage) ({				\
 		futex_dec_and_wake(&task_entries->nr_in_progress);	\
 		futex_wait_while(&task_entries->start, __stage);	\
-	} while (0)
+		(s32) futex_get(&task_entries->start);			\
+	})
 
 
 /* the restorer_blob_offset__ prefix is added by gen_offsets.sh */
-- 
1.8.3.1



More information about the CRIU mailing list