[Devel] Re: [PATCH 5/5] c/r: defer restore of blocked signals mask during restart

Serge E. Hallyn serue at us.ibm.com
Thu Oct 1 10:48:32 PDT 2009


Quoting Oren Laadan (orenl at librato.com):
> When a task finished restoring its state, it calls wait_task_sync()
> to wait for all others tasks, in an interruptible wait.
> 
> If the task also restored some pending signal from its saves state,
> then the signal may become visible when the blocked mask is restored.
> In this case the sleep will fail, as will the entire restart.
> 
> (Note that it doesn't affect other threads, because the blocked mask
> is per-thread.
> 
> To avoid this, we block all signals for restarting processes until the
> _entire_ restart succeeds, and defer the setting of blocked mask to
> that point.
> 
> Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
> ---
>  checkpoint/process.c             |   22 +++++++++++++++++++++-
>  checkpoint/restart.c             |   12 ++++++++++++
>  checkpoint/signal.c              |    8 ++++++--
>  include/linux/checkpoint.h       |    2 ++
>  include/linux/checkpoint_types.h |    4 ++++
>  include/linux/sched.h            |    2 +-
>  6 files changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/checkpoint/process.c b/checkpoint/process.c
> index 3c02f8e..5ba64f0 100644
> --- a/checkpoint/process.c
> +++ b/checkpoint/process.c
> @@ -815,10 +815,15 @@ static int restore_task_pgid(struct ckpt_ctx *ctx)
>  }
> 
>  /* pre_restore_task - prepare the task for restore */
> -static int pre_restore_task(struct ckpt_ctx *ctx)
> +int pre_restore_task(struct ckpt_ctx *ctx)
>  {
>  	sigset_t sigset;
> 
> +	/* task-specific restart data: freed from post_restore_task() */
> +	current->checkpoint_data = kzalloc(sizeof(struct ckpt_data), GFP_KERNEL);
> +	if (!current->checkpoint_data)
> +		return -ENOMEM;
> +
>  	/*
>  	 * Block task's signals to avoid interruptions due to signals,
>  	 * say, from restored timers, file descriptors etc. Signals
> @@ -828,6 +833,9 @@ static int pre_restore_task(struct ckpt_ctx *ctx)
>  	 * i/o notification may fail the restart if a signal occurs
>  	 * before that task completed its restore. FIX ?
>  	 */
> +
> +	current->checkpoint_data->blocked = current->blocked;
> +
>  	sigfillset(&sigset);
>  	sigdelset(&sigset, SIGKILL);
>  	sigdelset(&sigset, SIGSTOP);
> @@ -836,6 +844,18 @@ static int pre_restore_task(struct ckpt_ctx *ctx)
>  	return 0;
>  }
> 
> +/* pre_restore_task - prepare the task for restore */
> +void post_restore_task(struct ckpt_ctx *ctx)
> +{
> +	/* only now is it safe to unblock the restored task's signals */
> +	sigprocmask(SIG_SETMASK, &current->checkpoint_data->blocked, NULL);
> +	recalc_sigpending();

sigprocmask() does recalc_sigpending() itself.

Otherwise,

Acked-by: Serge Hallyn <serue at us.ibm.com>

> +
> +	/* task-specific restart data: allocated in pre_restore_task() */
> +	kfree(current->checkpoint_data);
> +	current->checkpoint_data = NULL;
> +}
> +
>  /* read the entire state of the current task */
>  int restore_task(struct ckpt_ctx *ctx)
>  {
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 258e9eb..b12c8bd 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -763,6 +763,10 @@ static int do_restore_task(void)
>  	if (ret < 0)
>  		goto out;
> 
> +	ret = pre_restore_task(ctx);
> +	if (ret < 0)
> +		goto out;
> +
>  	zombie = restore_task(ctx);
>  	if (zombie < 0) {
>  		ret = zombie;
> @@ -790,6 +794,7 @@ static int do_restore_task(void)
>  	if (ret < 0)
>  		restore_notify_error(ctx, ret);
> 
> +	post_restore_task(ctx);
>  	current->flags &= ~PF_RESTARTING;
>  	set_task_ctx(current, NULL);
>  	ckpt_ctx_put(ctx);
> @@ -997,6 +1002,10 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  	 */
> 
>  	if (ctx->uflags & RESTART_TASKSELF) {
> +		ret = pre_restore_task(ctx);
> +		ckpt_debug("pre restore task: %d\n", ret);
> +		if (ret < 0)
> +			goto out;
>  		ret = restore_task(ctx);
>  		ckpt_debug("restore task: %d\n", ret);
>  		if (ret < 0)
> @@ -1029,6 +1038,9 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  		ckpt_debug("freezing restart tasks ... %d\n", ret);
>  	}
>   out:
> +	if (ctx->uflags & RESTART_TASKSELF)
> +		post_restore_task(ctx);
> +
>  	if (ret < 0) {
>  		ckpt_set_ctx_error(ctx, ret);
>  		destroy_descendants(ctx);
> diff --git a/checkpoint/signal.c b/checkpoint/signal.c
> index cd3956d..cdfb142 100644
> --- a/checkpoint/signal.c
> +++ b/checkpoint/signal.c
> @@ -712,8 +712,12 @@ int restore_task_signal(struct ckpt_ctx *ctx)
>  	sigdelset(&blocked, SIGKILL);
>  	sigdelset(&blocked, SIGSTOP);
> 
> -	sigprocmask(SIG_SETMASK, &blocked, NULL);
> -	recalc_sigpending();
> +	/*
> +	 * Unblocking signals now may affect us in wait_task_sync().
> +	 * Instead, save blocked mask in current->checkpoint_data for
> +	 * post_restore_task().
> +	 */
> +	current->checkpoint_data->blocked = blocked;
> 
>  	ckpt_hdr_put(ctx, h);
>  	return 0;
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 12210e4..e403dd7 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -148,6 +148,8 @@ extern int ckpt_activate_next(struct ckpt_ctx *ctx);
>  extern int ckpt_collect_task(struct ckpt_ctx *ctx, struct task_struct *t);
>  extern int checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t);
>  extern int restore_task(struct ckpt_ctx *ctx);
> +extern int pre_restore_task(struct ckpt_ctx *ctx);
> +extern void post_restore_task(struct ckpt_ctx *ctx);
> 
>  /* arch hooks */
>  extern int checkpoint_write_header_arch(struct ckpt_ctx *ctx);
> diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
> index 9b7b4dd..b9393f4 100644
> --- a/include/linux/checkpoint_types.h
> +++ b/include/linux/checkpoint_types.h
> @@ -28,6 +28,10 @@ struct ckpt_stats {
>  	int user_ns;
>  };
> 
> +struct ckpt_data {
> +	sigset_t blocked;
> +};
> +
>  struct ckpt_ctx {
>  	int crid;		/* unique checkpoint id */
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 0ab9553..3448873 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1482,7 +1482,7 @@ struct task_struct {
>  #endif /* CONFIG_TRACING */
>  #ifdef CONFIG_CHECKPOINT
>  	struct ckpt_ctx *checkpoint_ctx;
> -	unsigned long required_id;
> +	struct ckpt_data *checkpoint_data;
>  #endif
>  };
> 
> -- 
> 1.6.0.4
> 
> _______________________________________________
> Containers mailing list
> Containers at lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list