[CRIU] [PATCH 2/2] restore: do not kill processes if not-all of them have been created
Andrey Vagin
avagin at openvz.org
Fri Aug 16 10:46:56 EDT 2013
If processes are restored in a current pidns, criu knows pid-s from images
but a part of them can be busy by external processes.
CR_STATE_RESTORE_NS and CR_STATE_FORKING can be blocked only the
task_entries->start futex. This patch is added a new stage
CR_STATE_FAIL, which signs that something wrong and all processes must
exit.
Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
cr-restore.c | 24 +++++++++++++++++-------
include/restorer.h | 8 +++++---
2 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/cr-restore.c b/cr-restore.c
index 4a59c91..b570753 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1147,7 +1147,8 @@ static int restore_task_with_children(void *_arg)
if (mount_proc())
exit(1);
- restore_finish_stage(CR_STATE_RESTORE_NS);
+ if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
+ exit(1);
if (root_prepare_shared())
exit(1);
@@ -1190,7 +1191,8 @@ static int restore_task_with_children(void *_arg)
if (current->pgid == current->pid.virt)
restore_pgid();
- restore_finish_stage(CR_STATE_FORKING);
+ if (restore_finish_stage(CR_STATE_FORKING) < 0)
+ exit(1);
if (current->pgid != current->pid.virt)
restore_pgid();
@@ -1206,6 +1208,8 @@ static int restore_task_with_children(void *_arg)
static inline int stage_participants(int next_stage)
{
switch (next_stage) {
+ case CR_STATE_FAIL:
+ return 0;
case CR_STATE_RESTORE_NS:
return 1;
case CR_STATE_FORKING:
@@ -1316,21 +1320,21 @@ static int restore_root_task(struct pstree_item *init)
ret = restore_switch_stage(CR_STATE_RESTORE_PGID);
if (ret < 0)
- goto out;
+ goto out_kill;
ret = restore_switch_stage(CR_STATE_RESTORE);
if (ret < 0)
- goto out;
+ goto out_kill;
ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
if (ret < 0)
- goto out;
+ goto out_kill;
/* Restore SIGCHLD here to skip SIGCHLD from a network sctip */
ret = sigaction(SIGCHLD, &old_act, NULL);
if (ret < 0) {
pr_perror("sigaction() failed");
- goto out;
+ goto out_kill;
}
/* Unlock network before disabling repair mode on sockets */
@@ -1356,7 +1360,11 @@ static int restore_root_task(struct pstree_item *init)
return 0;
-out:
+out_kill:
+ /*
+ * The processes can be killed only when all of them have been created,
+ * otherwise an external proccesses can be killed.
+ */
if (current_ns_mask & CLONE_NEWPID) {
/* Kill init */
if (root_item->pid.real > 0)
@@ -1369,6 +1377,8 @@ out:
kill(pi->pid.virt, SIGKILL);
}
+out:
+ __restore_switch_stage(CR_STATE_FAIL);
pr_err("Restoring FAILED.\n");
return 1;
}
diff --git a/include/restorer.h b/include/restorer.h
index 4375f9d..1ed1d81 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -180,7 +180,8 @@ struct shmems {
#define TASK_ENTRIES_SIZE 4096
enum {
- CR_STATE_RESTORE_NS, /* is used for executing "setup-namespace" scripts */
+ CR_STATE_FAIL = -1,
+ CR_STATE_RESTORE_NS = 0, /* is used for executing "setup-namespace" scripts */
CR_STATE_FORKING,
CR_STATE_RESTORE_PGID,
CR_STATE_RESTORE,
@@ -217,10 +218,11 @@ find_shmem(struct shmems *shmems, unsigned long shmid)
return NULL;
}
-#define restore_finish_stage(__stage) do { \
+#define restore_finish_stage(__stage) ({ \
futex_dec_and_wake(&task_entries->nr_in_progress); \
futex_wait_while(&task_entries->start, __stage); \
- } while (0)
+ (s32) futex_get(&task_entries->start); \
+ })
/* the restorer_blob_offset__ prefix is added by gen_offsets.sh */
--
1.8.3.1
More information about the CRIU
mailing list