[CRIU] [PATCH 2/2] rst: don't hang when SIGCHLD is coalesced

Tycho Andersen tycho.andersen at canonical.com
Tue Jul 21 11:26:08 PDT 2015


When a TASK_HELPER would exit just before a zombie, sometimes the signal
would get coalesced, and we would miss the zombie exit, causing us to block
forever waiting for the zombie to complete. Let's use an entirely different
strategy for waiting on zombies: explicitly wait on them with waitid, and
use WNOWAIT to prevent their data from actually being reaped.

v2: don't decrement nr_{tasks,threads} in the loop

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
Acked-by: Andrew Vagin <avagin at openvz.org>
---
 cr-restore.c       | 38 ++++++++++++++++++++++++++------------
 include/restorer.h |  4 +++-
 include/rst_info.h |  1 -
 pie/restorer.c     | 43 +++++++++++++++++++++++++++++--------------
 4 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index e7bac3f..d838a9b 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -118,6 +118,8 @@ static int prepare_signals(int pid, CoreEntry *core);
 static int root_as_sibling;
 static unsigned long helpers_pos = 0;
 static int n_helpers = 0;
+static unsigned long zombies_pos = 0;
+static int n_zombies = 0;
 
 static int crtools_prepare_shared(void)
 {
@@ -730,29 +732,40 @@ err:
 	return ret;
 }
 
-static int collect_helper_pids()
+static int collect_child_pids(int state, int *n)
 {
 	struct pstree_item *pi;
 
-	helpers_pos = rst_mem_cpos(RM_PRIVATE);
-
+	*n = 0;
 	list_for_each_entry(pi, &current->children, sibling) {
-		static pid_t *helper;
+		static pid_t *child;
 
-		if (pi->state != TASK_HELPER)
+		if (pi->state != state)
 			continue;
 
-		helper = rst_mem_alloc(sizeof(*helper), RM_PRIVATE);
-		if (!helper)
+		child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
+		if (!child)
 			return -1;
 
-		n_helpers++;
-		*helper = pi->pid.virt;
+		(*n)++;
+		*child = pi->pid.virt;
 	}
 
 	return 0;
 }
 
+static int collect_helper_pids()
+{
+	helpers_pos = rst_mem_cpos(RM_PRIVATE);
+	return collect_child_pids(TASK_HELPER, &n_helpers);
+}
+
+static int collect_zombie_pids()
+{
+	zombies_pos = rst_mem_cpos(RM_PRIVATE);
+	return collect_child_pids(TASK_DEAD, &n_zombies);
+}
+
 static int open_cores(int pid, CoreEntry *leader_core)
 {
 	int i, tpid;
@@ -823,6 +836,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (collect_helper_pids() < 0)
 		return -1;
 
+	if (collect_zombie_pids() < 0)
+		return -1;
+
 	if (inherit_fd_fini() < 0)
 		return -1;
 
@@ -896,7 +912,6 @@ static int restore_one_zombie(CoreEntry *core)
 	if (task_entries != NULL) {
 		restore_finish_stage(CR_STATE_RESTORE);
 		zombie_prepare_signals();
-		mutex_lock(&task_entries->zombie_lock);
 	}
 
 	if (exit_code & 0x7f) {
@@ -1926,7 +1941,6 @@ static int prepare_task_entries(void)
 	task_entries->nr_tasks = 0;
 	task_entries->nr_helpers = 0;
 	futex_set(&task_entries->start, CR_STATE_RESTORE_NS);
-	mutex_init(&task_entries->zombie_lock);
 	mutex_init(&task_entries->userns_sync_lock);
 
 	return 0;
@@ -2885,6 +2899,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	remap_array(rings,	  mm->n_aios, aio_rings);
 	remap_array(rlims,	  rlims_nr, rlims_cpos);
 	remap_array(helpers,	  n_helpers, helpers_pos);
+	remap_array(zombies,	  n_zombies, zombies_pos);
 
 #undef remap_array
 
@@ -3005,7 +3020,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	 * Now prepare run-time data for threads restore.
 	 */
 	task_args->nr_threads		= current->nr_threads;
-	task_args->nr_zombies		= rsti(current)->nr_zombies;
 	task_args->clone_restore_fn	= (void *)restore_thread_exec_start;
 	task_args->thread_args		= thread_args;
 
diff --git a/include/restorer.h b/include/restorer.h
index dcfa175..97a012c 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -104,7 +104,6 @@ struct task_restore_args {
 
 	/* threads restoration */
 	int				nr_threads;		/* number of threads */
-	int				nr_zombies;
 	thread_restore_fcall_t		clone_restore_fn;	/* helper address for clone() call */
 	struct thread_restore_args	*thread_args;		/* array of thread arguments */
 	struct task_entries		*task_entries;
@@ -135,6 +134,9 @@ struct task_restore_args {
 
 	pid_t				*helpers /* the TASK_HELPERS to wait on at the end of restore */;
 	unsigned int			helpers_n;
+
+	pid_t				*zombies;
+	unsigned int			zombies_n;
 	/* * * * * * * * * * * * * * * * * * * * */
 
 	unsigned long			premmapped_addr;
diff --git a/include/rst_info.h b/include/rst_info.h
index 5fab162..0e8dc97 100644
--- a/include/rst_info.h
+++ b/include/rst_info.h
@@ -9,7 +9,6 @@ struct task_entries {
 	int nr_threads, nr_tasks, nr_helpers;
 	futex_t nr_in_progress;
 	futex_t start;
-	mutex_t	zombie_lock;
 	atomic_t cr_err;
 	mutex_t userns_sync_lock;
 };
diff --git a/pie/restorer.c b/pie/restorer.c
index fa336fc..48c2844 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -51,10 +51,11 @@
 
 static struct task_entries *task_entries;
 static futex_t thread_inprogress;
-static futex_t zombies_inprogress;
 static int cap_last_cap;
 static pid_t *helpers;
 static int n_helpers;
+static pid_t *zombies;
+static int n_zombies;
 
 extern void cr_restore_rt (void) asm ("__cr_restore_rt")
 			__attribute__ ((visibility ("hidden")));
@@ -70,16 +71,9 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
 		if (siginfo->si_pid == helpers[i])
 			return;
 
-	if (futex_get(&task_entries->start) == CR_STATE_RESTORE_SIGCHLD) {
-		pr_debug("%ld: Collect a zombie with (pid %d, %d)\n",
-			sys_getpid(), siginfo->si_pid, siginfo->si_pid);
-		futex_dec_and_wake(&task_entries->nr_in_progress);
-		futex_dec_and_wake(&zombies_inprogress);
-		task_entries->nr_threads--;
-		task_entries->nr_tasks--;
-		mutex_unlock(&task_entries->zombie_lock);
-		return;
-	}
+	for (i = 0; i < n_zombies; i++)
+		if (siginfo->si_pid == zombies[i])
+			return;
 
 	if (siginfo->si_code & CLD_EXITED)
 		r = " exited, status=";
@@ -805,6 +799,26 @@ static int wait_helpers(struct task_restore_args *task_args)
 	return 0;
 }
 
+static int wait_zombies(struct task_restore_args *task_args)
+{
+	int i;
+
+	task_entries->nr_threads -= task_args->zombies_n;
+	task_entries->nr_tasks -= task_args->zombies_n;
+
+	for (i = 0; i < task_args->zombies_n; i++) {
+		if (sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL) < 0) {
+			pr_err("Wait on %d zombie failed\n", task_args->zombies[i]);
+			return -1;
+		}
+		pr_debug("%ld: Collect a zombie with pid %d\n",
+			sys_getpid(), task_args->zombies[i]);
+		futex_dec_and_wake(&task_entries->nr_in_progress);
+	}
+
+	return 0;
+}
+
 /*
  * The main routine to restore task via sigreturn.
  * This one is very special, we never return there
@@ -836,6 +850,8 @@ long __export_restore_task(struct task_restore_args *args)
 	task_entries = args->task_entries;
 	helpers = args->helpers;
 	n_helpers = args->helpers_n;
+	zombies = args->zombies;
+	n_zombies = args->zombies_n;
 	*args->breakpoint = rst_sigreturn;
 
 	ksigfillset(&act.rt_sa_mask);
@@ -1190,11 +1206,10 @@ long __export_restore_task(struct task_restore_args *args)
 
 	pr_info("%ld: Restored\n", sys_getpid());
 
-	futex_set(&zombies_inprogress, args->nr_zombies);
-
 	restore_finish_stage(CR_STATE_RESTORE);
 
-	futex_wait_while_gt(&zombies_inprogress, 0);
+	if (wait_zombies(args) < 0)
+		goto core_restore_end;
 
 	if (wait_helpers(args) < 0)
 		goto core_restore_end;
-- 
2.1.4



More information about the CRIU mailing list