[Devel] [PATCH 1/3] restart: make sure all tasks are in sys_restart
Serge E. Hallyn
serue at us.ibm.com
Tue Sep 29 09:53:42 PDT 2009
Make sure that all prepared tasks are in sys_restart before the
coordinator proceeds. Otherwise, it was possible for the last
task actually in sys_restart to sync before the straggler was in
sys_restart, sending that task a signal and causing that task to
exit. Then since there are not enough tasks completing restart,
the restart operation ends up hanging, waiting for the killed task.
Signed-off-by: Serge E. Hallyn <serue at us.ibm.com>
---
checkpoint/restart.c | 53 ++++++++++++++++++++++++++++++++++++++
checkpoint/sys.c | 1 +
include/linux/checkpoint_types.h | 2 +
3 files changed, 56 insertions(+), 0 deletions(-)
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 543b380..849bda5 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -655,13 +655,35 @@ static struct ckpt_ctx *wait_checkpoint_ctx(void)
static int do_ghost_task(void)
{
struct ckpt_ctx *ctx;
+ int ret;
ctx = wait_checkpoint_ctx();
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ /*
+ * Wait until coordinator task has completed prepare_descendants().
+ * Note that prepare_descendants() wakes each task one a time
+ * finishing the wait_checkpoint_ctx() above. We may want to change
+ * that flow so tasks only sleep once, but this works for now.
+ */
+ ret = wait_event_interruptible(ctx->waitq,
+ atomic_read(&ctx->nr_running));
+ if (ret) {
+ ckpt_debug("wait for coordinator prep returned %d\n", ret);
+ return ret;
+ }
+ /*
+ * Now each prepared task decrements nr_running, and, when that
+ * hits zero, wakes the coordinator. That way the coordinator
+ * knows that all tasks are in sys_restart(0
+ */
+ if (atomic_dec_and_test(&ctx->nr_running))
+ complete(&ctx->all_ready);
+
current->flags |= PF_RESTARTING;
+ /* Finally wait for all ghosts to be woken up - to die */
wait_event_interruptible(ctx->ghostq,
all_tasks_activated(ctx) ||
ckpt_test_ctx_error(ctx));
@@ -682,6 +704,26 @@ static int do_restore_task(void)
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ /*
+ * Wait until coordinator task has completed prepare_descendants().
+ * Note that prepare_descendants() wakes each task one a time
+ * finishing the wait_checkpoint_ctx() above. We may want to change
+ * that flow so tasks only sleep once, but this works for now.
+ */
+ ret = wait_event_interruptible(ctx->waitq,
+ atomic_read(&ctx->nr_running));
+ if (ret) {
+ ckpt_debug("wait for coordinator prep returned %d\n", ret);
+ return ret;
+ }
+ /*
+ * Now each prepared task decrements nr_running, and, when that
+ * hits zero, wakes the coordinator. That way the coordinator
+ * knows that all tasks are in sys_restart(0
+ */
+ if (atomic_dec_and_test(&ctx->nr_running))
+ complete(&ctx->all_ready);
+
current->flags |= PF_RESTARTING;
/* wait for our turn, do the restore, and tell next task in line */
@@ -814,6 +856,7 @@ static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
ret = -ESRCH;
atomic_set(&ctx->nr_total, nr_pids);
+ atomic_set(&ctx->nr_running, nr_pids);
return ret;
}
@@ -948,6 +991,16 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
ret = prepare_descendants(ctx, ctx->root_task);
if (ret < 0)
goto out;
+
+ /* tell tasks we're ready for them to dec ctx->nr_running */
+ wake_up_all(&ctx->waitq);
+ /* ctx->nr_running hit 0, all our tasks are in sys_restart */
+ ret = wait_for_completion_interruptible(&ctx->all_ready);
+ if (ret) {
+ ckpt_debug("waiting on all_ready returned %d\n", ret);
+ goto out;
+ }
+
/* wait for all other tasks to complete do_restore_task() */
ret = wait_all_tasks_finish(ctx);
if (ret < 0)
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 76a3fa9..d48e261 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -233,6 +233,7 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
ctx->uflags = uflags;
ctx->kflags = kflags;
ctx->ktime_begin = ktime_get();
+ init_completion(&ctx->all_ready);
atomic_set(&ctx->refcount, 0);
INIT_LIST_HEAD(&ctx->pgarr_list);
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index 9b7b4dd..2a854e0 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -69,8 +69,10 @@ struct ckpt_ctx {
struct ckpt_pids *pids_arr; /* array of all pids [restart] */
int nr_pids; /* size of pids array */
atomic_t nr_total; /* total tasks count (with ghosts) */
+ atomic_t nr_running; /* countdown of tasks in sys_restart */
int active_pid; /* (next) position in pids array */
struct completion complete; /* completion for container root */
+ struct completion all_ready; /* all tasks are in sys_restart */
wait_queue_head_t waitq; /* waitqueue for restarting tasks */
wait_queue_head_t ghostq; /* waitqueue for ghost tasks */
struct cred *realcred, *ecred; /* tmp storage for cred at restart */
--
1.6.1
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list