[CRIU] [PATCH 2/2] rst: don't hang when SIGCHLD is coalesced
Andrew Vagin
avagin at gmail.com
Tue Jul 21 02:42:26 PDT 2015
On Mon, Jul 20, 2015 at 11:56:43PM -0600, Tycho Andersen wrote:
> When a TASK_HELPER would exit just before a zombie, sometimes the signal
> would get coalesced, and we would miss the zombie exit, causing us to block
> forever waiting for the zombie to complete. Let's use an entirely different
> strategy for waiting on zombies: explicitly wait on them with waitid, and
> use WNOWAIT to prevent their data from actually being reaped.
>
Looks good. A few minor comments are inline. Thanks.
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
> cr-restore.c | 38 ++++++++++++++++++++++++++------------
> include/restorer.h | 4 +++-
> include/rst_info.h | 1 -
> pie/restorer.c | 42 ++++++++++++++++++++++++++++--------------
> 4 files changed, 57 insertions(+), 28 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index e7bac3f..d838a9b 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -118,6 +118,8 @@ static int prepare_signals(int pid, CoreEntry *core);
> static int root_as_sibling;
> static unsigned long helpers_pos = 0;
> static int n_helpers = 0;
> +static unsigned long zombies_pos = 0;
> +static int n_zombies = 0;
>
> static int crtools_prepare_shared(void)
> {
> @@ -730,29 +732,40 @@ err:
> return ret;
> }
>
> -static int collect_helper_pids()
> +static int collect_child_pids(int state, int *n)
> {
> struct pstree_item *pi;
>
> - helpers_pos = rst_mem_cpos(RM_PRIVATE);
> -
> + *n = 0;
> list_for_each_entry(pi, ¤t->children, sibling) {
> - static pid_t *helper;
> + static pid_t *child;
>
> - if (pi->state != TASK_HELPER)
> + if (pi->state != state)
> continue;
>
> - helper = rst_mem_alloc(sizeof(*helper), RM_PRIVATE);
> - if (!helper)
> + child = rst_mem_alloc(sizeof(*child), RM_PRIVATE);
> + if (!child)
> return -1;
>
> - n_helpers++;
> - *helper = pi->pid.virt;
> + (*n)++;
> + *child = pi->pid.virt;
> }
>
> return 0;
> }
>
> +static int collect_helper_pids()
> +{
> + helpers_pos = rst_mem_cpos(RM_PRIVATE);
> + return collect_child_pids(TASK_HELPER, &n_helpers);
> +}
> +
> +static int collect_zombie_pids()
> +{
> + zombies_pos = rst_mem_cpos(RM_PRIVATE);
> + return collect_child_pids(TASK_DEAD, &n_zombies);
> +}
> +
> static int open_cores(int pid, CoreEntry *leader_core)
> {
> int i, tpid;
> @@ -823,6 +836,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
> if (collect_helper_pids() < 0)
> return -1;
>
> + if (collect_zombie_pids() < 0)
> + return -1;
> +
> if (inherit_fd_fini() < 0)
> return -1;
>
> @@ -896,7 +912,6 @@ static int restore_one_zombie(CoreEntry *core)
> if (task_entries != NULL) {
> restore_finish_stage(CR_STATE_RESTORE);
> zombie_prepare_signals();
> - mutex_lock(&task_entries->zombie_lock);
> }
>
> if (exit_code & 0x7f) {
> @@ -1926,7 +1941,6 @@ static int prepare_task_entries(void)
> task_entries->nr_tasks = 0;
> task_entries->nr_helpers = 0;
> futex_set(&task_entries->start, CR_STATE_RESTORE_NS);
> - mutex_init(&task_entries->zombie_lock);
> mutex_init(&task_entries->userns_sync_lock);
>
> return 0;
> @@ -2885,6 +2899,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> remap_array(rings, mm->n_aios, aio_rings);
> remap_array(rlims, rlims_nr, rlims_cpos);
> remap_array(helpers, n_helpers, helpers_pos);
> + remap_array(zombies, n_zombies, zombies_pos);
>
> #undef remap_array
>
> @@ -3005,7 +3020,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
> * Now prepare run-time data for threads restore.
> */
> task_args->nr_threads = current->nr_threads;
> - task_args->nr_zombies = rsti(current)->nr_zombies;
> task_args->clone_restore_fn = (void *)restore_thread_exec_start;
> task_args->thread_args = thread_args;
>
> diff --git a/include/restorer.h b/include/restorer.h
> index dcfa175..97a012c 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -104,7 +104,6 @@ struct task_restore_args {
>
> /* threads restoration */
> int nr_threads; /* number of threads */
> - int nr_zombies;
> thread_restore_fcall_t clone_restore_fn; /* helper address for clone() call */
> struct thread_restore_args *thread_args; /* array of thread arguments */
> struct task_entries *task_entries;
> @@ -135,6 +134,9 @@ struct task_restore_args {
>
> pid_t *helpers /* the TASK_HELPERS to wait on at the end of restore */;
> unsigned int helpers_n;
> +
> + pid_t *zombies;
> + unsigned int zombies_n;
> /* * * * * * * * * * * * * * * * * * * * */
>
> unsigned long premmapped_addr;
> diff --git a/include/rst_info.h b/include/rst_info.h
> index 5fab162..0e8dc97 100644
> --- a/include/rst_info.h
> +++ b/include/rst_info.h
> @@ -9,7 +9,6 @@ struct task_entries {
> int nr_threads, nr_tasks, nr_helpers;
> futex_t nr_in_progress;
> futex_t start;
> - mutex_t zombie_lock;
> atomic_t cr_err;
> mutex_t userns_sync_lock;
> };
> diff --git a/pie/restorer.c b/pie/restorer.c
> index fa336fc..20439d6 100644
> --- a/pie/restorer.c
> +++ b/pie/restorer.c
> @@ -51,10 +51,11 @@
>
> static struct task_entries *task_entries;
> static futex_t thread_inprogress;
> -static futex_t zombies_inprogress;
> static int cap_last_cap;
> static pid_t *helpers;
> static int n_helpers;
> +static pid_t *zombies;
> +static int n_zombies;
>
> extern void cr_restore_rt (void) asm ("__cr_restore_rt")
> __attribute__ ((visibility ("hidden")));
> @@ -70,16 +71,9 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
> if (siginfo->si_pid == helpers[i])
> return;
>
> - if (futex_get(&task_entries->start) == CR_STATE_RESTORE_SIGCHLD) {
> - pr_debug("%ld: Collect a zombie with (pid %d, %d)\n",
> - sys_getpid(), siginfo->si_pid, siginfo->si_pid);
> - futex_dec_and_wake(&task_entries->nr_in_progress);
> - futex_dec_and_wake(&zombies_inprogress);
> - task_entries->nr_threads--;
> - task_entries->nr_tasks--;
> - mutex_unlock(&task_entries->zombie_lock);
> - return;
> - }
> + for (i = 0; i < n_zombies; i++)
> + if (siginfo->si_pid == zombies[i])
> + return;
>
> if (siginfo->si_code & CLD_EXITED)
> r = " exited, status=";
> @@ -805,6 +799,25 @@ static int wait_helpers(struct task_restore_args *task_args)
> return 0;
> }
>
> +static int wait_zombies(struct task_restore_args *task_args)
> +{
> + int i;
> +
> + for (i = 0; i < task_args->zombies_n; i++) {
> + if (sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL) < 0) {
> + pr_err("Wait on %d zombie failed\n", task_args->zombies[i]);
> + return -1;
> + }
> + pr_debug("%ld: Collect a zombie with pid %d\n",
> + sys_getpid(), task_args->zombies[i]);
> + task_entries->nr_threads--;
> + task_entries->nr_tasks--;
> + futex_dec_and_wake(&task_entries->nr_in_progress);
> + }
I think we don't need to decriment counters in the loop:
task_entries->nr_threads -= task_args->zombies_n;
task_entries->nr_tasks -= task_args->zombies_n;
> +
> + return 0;
> +}
> +
> /*
> * The main routine to restore task via sigreturn.
> * This one is very special, we never return there
> @@ -836,6 +849,8 @@ long __export_restore_task(struct task_restore_args *args)
> task_entries = args->task_entries;
> helpers = args->helpers;
> n_helpers = args->helpers_n;
> + zombies = args->zombies;
> + n_zombies = args->zombies_n;
> *args->breakpoint = rst_sigreturn;
>
> ksigfillset(&act.rt_sa_mask);
> @@ -1190,11 +1205,10 @@ long __export_restore_task(struct task_restore_args *args)
>
> pr_info("%ld: Restored\n", sys_getpid());
>
> - futex_set(&zombies_inprogress, args->nr_zombies);
> -
We can block SIGCHLD here
> restore_finish_stage(CR_STATE_RESTORE);
>
> - futex_wait_while_gt(&zombies_inprogress, 0);
> + if (wait_zombies(args) < 0)
> + goto core_restore_end;
>
> if (wait_helpers(args) < 0)
> goto core_restore_end;
and unblock SIGCHLD back here
In thise case, sigchld_handler will be executed only once for all
helpers and zombies. What do you think about this?
> --
> 2.1.4
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list