[CRIU] [PATCH 3/3] seccomp: Use BUG after CR_STATE_RESTORE_SIGCHLD stage if failed

Fri May 25 00:45:43 MSK 2018

On Thu, May 24, 2018 at 11:54:59AM +0300, Cyrill Gorcunov wrote:
> As avagin@ pointed we can't use exit after CR_STATE_RESTORE_SIGCHLD
> stage, thus simply remember if we've failed and kill everything
> when triggered.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
> ---
>  criu/pie/restorer.c | 31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
> index fbdec0bca01c..b9dca5e99af7 100644
> --- a/criu/pie/restorer.c
> +++ b/criu/pie/restorer.c
> @@ -66,6 +66,7 @@
>  	})
>  
>  static struct task_entries *task_entries_local;
> +static atomic_t thread_aborted = ATOMIC_INIT(0);
>  static futex_t thread_inprogress;
>  static pid_t *helpers;
>  static int n_helpers;
> @@ -588,18 +589,23 @@ long __export_restore_thread(struct thread_restore_args *args)
>  	 * operation bound to uid 0 in current user ns.
>  	 */
>  	if (restore_seccomp(args))
> -		goto core_restore_end;
> +		goto thread_panic;
>  
>  	ret = restore_creds(args->creds_args, args->ta->proc_fd);
>  	ret = ret || restore_dumpable_flag(&args->ta->mm);
>  	ret = ret || restore_pdeath_sig(args);
> -	if (ret)
> -		goto core_restore_end;
>  
>  	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
>  
>  	futex_dec_and_wake(&thread_inprogress);
>  
> +	/*
> +	 * We can't just exit in such deep stage on thread restore,
> +	 * instead notify group leader that we've failed.
> +	 */
> +	if (ret)
> +		goto thread_panic;

Why do you move this check after switching a stage?
> +
>  	if (check_only)
>  		restore_finish_stage(task_entries_local, CR_STATE_COMPLETE);
>  
> @@ -611,6 +617,11 @@ long __export_restore_thread(struct thread_restore_args *args)
>  	futex_abort_and_wake(&task_entries_local->nr_in_progress);
>  	sys_exit_group(1);
>  	return -1;
> +thread_panic:
> +	pr_err("Restorer panic for %ld\n", sys_getpid());
> +	atomic_set(&thread_aborted, 1);
> +	BUG();
> +	return -1;
>  }
>  
>  static long restore_self_exe_late(struct task_restore_args *args)
> @@ -1748,6 +1759,20 @@ long __export_restore_task(struct task_restore_args *args)
>  	/* Wait until children stop to use args->task_entries */
>  	futex_wait_while_gt(&thread_inprogress, 1);
>  
> +	/*
> +	 * If error happened somewhere insidee deep restore of
> +	 * a thread where we can't exit gracefully and have fired
> +	 * BUG already, do the same here in group leader so all
> +	 * threads get killed as well.
> +	 *
> +	 * Note that we've to complete @thread_inprogress counting
> +	 * since the group leader is still waiting and futex-abort
> +	 * flag won't work here because the form we have futexes
> +	 * doesn't allow multiple writers
> +	 */
> +	if (atomic_read(&thread_aborted))

I don't think that we need this check and BUG. I think when one thread
calls BUG(), the whole process will be killed.

> +		BUG();
> +
>  	if (args->check_only) {
>  		pr_info("Restore check was successful.\n");
>  		while (1) {
> -- 
> 2.14.3
>