[Devel] Re: [PATCH 3/5] c/r: introduce walk_task_subtree() to iterate through descendants
Serge E. Hallyn
serue at us.ibm.com
Thu Oct 1 09:41:09 PDT 2009
Quoting Oren Laadan (orenl at librato.com):
> Introduce a helper to iterate over the descendants of a given
> task. The prototype is:
>
> int walk_task_subtree(struct task_struct *root,
> int (*func)(struct task_struct *, void *),
> void *data)
>
> The function will start with @root, and iterate through all the
> descendants, including threads, in a DFS manner. Children of a task
> are traversed before proceeding to the next thread of that task.
>
> For each task, the callback @func will be called providing the task
> pointer and the @data. The callback is invoked while holding the
> tasklist_lock for reading. If the callback fails it should return a
> negative error, and the traversal ends. If the callback succeeds, it
> returns a non-negative number, and these values are summed.
>
> On success, walk_task_subtree() returns the total summed. On failure,
> it returns a negative value.
>
> The code in checkpoint/checkpoint.c and checkpoint/restart.c has
> been converted to use this helper.
>
> Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
Acked-by: Serge Hallyn <serue at us.ibm.com>
> ---
> checkpoint/checkpoint.c | 97 ++++++++++++++--------------------------
> checkpoint/restart.c | 105 +++++++++++++++++++-------------------------
> checkpoint/sys.c | 55 +++++++++++++++++++++++
> include/linux/checkpoint.h | 3 +
> 4 files changed, 137 insertions(+), 123 deletions(-)
>
> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> index dbe9e10..1eeb557 100644
> --- a/checkpoint/checkpoint.c
> +++ b/checkpoint/checkpoint.c
> @@ -520,80 +520,51 @@ static int collect_objects(struct ckpt_ctx *ctx)
> return ret;
> }
>
> +struct ckpt_cnt_tasks {
> + struct ckpt_ctx *ctx;
> + int nr;
> +};
> +
> /* count number of tasks in tree (and optionally fill pid's in array) */
> -static int tree_count_tasks(struct ckpt_ctx *ctx)
> +static int __tree_count_tasks(struct task_struct *task, void *data)
> {
> - struct task_struct *root;
> - struct task_struct *task;
> - struct task_struct *parent;
> - struct task_struct **tasks_arr = ctx->tasks_arr;
> - int nr_tasks = ctx->nr_tasks;
> - int nr = 0;
> + struct ckpt_cnt_tasks *d = (struct ckpt_cnt_tasks *) data;
> + struct ckpt_ctx *ctx = d->ctx;
> int ret;
>
> - read_lock(&tasklist_lock);
> -
> - /* we hold the lock, so root_task->real_parent can't change */
> - task = ctx->root_task;
> - if (ctx->root_init) {
> - /* container-init: start from container parent */
> - parent = task->real_parent;
> - root = parent;
> - } else {
> - /* non-container-init: start from root task and down */
> - parent = NULL;
> - root = task;
> - }
> -
> - /* count tasks via DFS scan of the tree */
> - while (1) {
> - ctx->tsk = task; /* (for ckpt_write_err) */
> + ctx->tsk = task; /* (for ckpt_write_err) */
>
> - /* is this task cool ? */
> - ret = may_checkpoint_task(ctx, task);
> - if (ret < 0) {
> - nr = ret;
> - break;
> - }
> - if (tasks_arr) {
> - /* unlikely... but if so then try again later */
> - if (nr == nr_tasks) {
> - nr = -EBUSY; /* cleanup in ckpt_ctx_free() */
> - break;
> - }
> - tasks_arr[nr] = task;
> - get_task_struct(task);
> - }
> - nr++;
> - /* if has children - proceed with child */
> - if (!list_empty(&task->children)) {
> - parent = task;
> - task = list_entry(task->children.next,
> - struct task_struct, sibling);
> - continue;
> - }
> - while (task != root) {
> - /* if has sibling - proceed with sibling */
> - if (!list_is_last(&task->sibling, &parent->children)) {
> - task = list_entry(task->sibling.next,
> - struct task_struct, sibling);
> - break;
> - }
> + /* is this task cool ? */
> + ret = may_checkpoint_task(ctx, task);
> + if (ret < 0)
> + goto out;
>
> - /* else, trace back to parent and proceed */
> - task = parent;
> - parent = parent->real_parent;
> + if (ctx->tasks_arr) {
> + if (d->nr == ctx->nr_tasks) { /* unlikely... try again later */
> + __ckpt_write_err(ctx, "TE", "bad task count\n", -EBUSY);
> + ret = -EBUSY;
> + goto out;
> }
> - if (task == root)
> - break;
> + ctx->tasks_arr[d->nr++] = task;
> + get_task_struct(task);
> }
> +
> + ret = 1;
> + out:
> + if (ret < 0)
> + ckpt_write_err(ctx, "", NULL);
> ctx->tsk = NULL;
> + return ret;
> +}
>
> - read_unlock(&tasklist_lock);
> +static int tree_count_tasks(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_cnt_tasks data;
>
> - if (nr < 0)
> - ckpt_write_err(ctx, "", NULL);
> - return nr;
> + data.ctx = ctx;
> + data.nr = 0;
> +
> + return walk_task_subtree(ctx->root_task, __tree_count_tasks, &data);
> }
>
> /*
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 37454c5..c021eaf 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -804,77 +804,56 @@ static int do_restore_task(void)
> * Called by the coodinator to set the ->checkpoint_ctx pointer of the
> * root task and all its descendants.
> */
> -static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
> +static int __prepare_descendants(struct task_struct *task, void *data)
> {
> - struct task_struct *leader = root;
> - struct task_struct *parent = NULL;
> - struct task_struct *task = root;
> - int nr_pids = 0;
> - int ret = -EBUSY;
> + struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
>
> - read_lock(&tasklist_lock);
> - while (1) {
> - ckpt_debug("consider task %d\n", task_pid_vnr(task));
> - if (task_ptrace(task) & PT_PTRACED)
> - break;
> - /*
> - * Set task->checkpoint_ctx of all non-zombie descendants.
> - * If a descendant already has a ->checkpoint_ctx, it
> - * must be a coordinator (for a different restart ?) so
> - * we fail.
> - *
> - * Note that own ancestors cannot interfere since they
> - * won't descend past us, as own ->checkpoint_ctx must
> - * already be set.
> - */
> - if (!task->exit_state) {
> - if (!set_task_ctx(task, ctx))
> - break;
> - ckpt_debug("prepare task %d\n", task_pid_vnr(task));
> - wake_up_process(task);
> - nr_pids++;
> - }
> + ckpt_debug("consider task %d\n", task_pid_vnr(task));
>
> - /* if has children - proceed with child */
> - if (!list_empty(&task->children)) {
> - parent = task;
> - task = list_entry(task->children.next,
> - struct task_struct, sibling);
> - continue;
> - }
> - while (task != root) {
> - /* if has sibling - proceed with sibling */
> - if (!list_is_last(&task->sibling, &parent->children)) {
> - task = list_entry(task->sibling.next,
> - struct task_struct, sibling);
> - break;
> - }
> -
> - /* else, trace back to parent and proceed */
> - task = parent;
> - parent = parent->real_parent;
> - }
> - if (task == root) {
> - /* in case root task is multi-threaded */
> - root = task = next_thread(task);
> - if (root == leader) {
> - ret = 0;
> - break;
> - }
> - }
> + if (task_ptrace(task) & PT_PTRACED) {
> + ckpt_debug("ptraced task %d\n", task_pid_vnr(task));
> + return -EBUSY;
> }
> - read_unlock(&tasklist_lock);
> - ckpt_debug("nr %d/%d ret %d\n", ctx->nr_pids, nr_pids, ret);
> +
> + /*
> + * Set task->checkpoint_ctx of all non-zombie descendants.
> + * If a descendant already has a ->checkpoint_ctx, it
> + * must be a coordinator (for a different restart ?) so
> + * we fail.
> + *
> + * Note that own ancestors cannot interfere since they
> + * won't descend past us, as own ->checkpoint_ctx must
> + * already be set.
> + */
> + if (!task->exit_state) {
> + if (!set_task_ctx(task, ctx))
> + return -EBUSY;
> + ckpt_debug("prepare task %d\n", task_pid_vnr(task));
> + wake_up_process(task);
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
> +{
> + int nr_pids;
> +
> + nr_pids = walk_task_subtree(root, __prepare_descendants, ctx);
> + ckpt_debug("nr %d/%d\n", ctx->nr_pids, nr_pids);
> + if (nr_pids < 0)
> + return nr_pids;
>
> /*
> * Actual tasks count may exceed ctx->nr_pids due of 'dead'
> * tasks used as place-holders for PGIDs, but not fall short.
> */
> - if (!ret && (nr_pids < ctx->nr_pids))
> - ret = -ESRCH;
> + if (nr_pids < ctx->nr_pids)
> + return -ESRCH;
>
> atomic_set(&ctx->nr_total, nr_pids);
> - return ret;
> + return nr_pids;
> }
>
> static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
> @@ -991,6 +970,12 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
> return -EBUSY;
> }
>
> + /*
> + * From now on we are committed to the restart. If anything
> + * fails, we'll wipe out the entire subtree below us, to
> + * ensure proper cleanup.
> + */
> +
> if (ctx->uflags & RESTART_TASKSELF) {
> ret = restore_task(ctx);
> ckpt_debug("restore task: %d\n", ret);
> diff --git a/checkpoint/sys.c b/checkpoint/sys.c
> index 77613d7..7604089 100644
> --- a/checkpoint/sys.c
> +++ b/checkpoint/sys.c
> @@ -276,6 +276,61 @@ void ckpt_ctx_put(struct ckpt_ctx *ctx)
> ckpt_ctx_free(ctx);
> }
>
> +int walk_task_subtree(struct task_struct *root,
> + int (*func)(struct task_struct *, void *),
> + void *data)
> +{
> +
> + struct task_struct *leader = root;
> + struct task_struct *parent = NULL;
> + struct task_struct *task = root;
> + int total = 0;
> + int ret;
> +
> + read_lock(&tasklist_lock);
> + while (1) {
> + /* invoke callback on this task */
> + ret = func(task, data);
> + if (ret < 0)
> + break;
> +
> + total += ret;
> +
> + /* if has children - proceed with child */
> + if (!list_empty(&task->children)) {
> + parent = task;
> + task = list_entry(task->children.next,
> + struct task_struct, sibling);
> + continue;
> + }
> +
> + while (task != root) {
> + /* if has sibling - proceed with sibling */
> + if (!list_is_last(&task->sibling, &parent->children)) {
> + task = list_entry(task->sibling.next,
> + struct task_struct, sibling);
> + break;
> + }
> +
> + /* else, trace back to parent and proceed */
> + task = parent;
> + parent = parent->real_parent;
> + }
> +
> + if (task == root) {
> + /* in case root task is multi-threaded */
> + root = task = next_thread(task);
> + if (root == leader)
> + break;
> + }
> + }
> + read_unlock(&tasklist_lock);
> +
> + ckpt_debug("total %d ret %d\n", total, ret);
> + return (ret < 0 ? ret : total);
> +}
> +
> +
> /**
> * sys_checkpoint - checkpoint a container
> * @pid: pid of the container init(1) process
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index b7f1796..12210e4 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -50,6 +50,9 @@
> RESTART_FROZEN | \
> RESTART_GHOST)
>
> +extern int walk_task_subtree(struct task_struct *task,
> + int (*func)(struct task_struct *, void *),
> + void *data);
> extern void exit_checkpoint(struct task_struct *tsk);
>
> extern int ckpt_kwrite(struct ckpt_ctx *ctx, void *buf, int count);
> --
> 1.6.0.4
>
> _______________________________________________
> Containers mailing list
> Containers at lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list