[Devel] [PATCH 1/2] mktree: add support to ghost tasks (TASK_GHOST)
Oren Laadan
orenl at librato.com
Mon Aug 24 23:09:26 PDT 2009
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
mktree.c | 123 ++++++++++++++++++++++++++++++++++++++++----------------------
1 files changed, 80 insertions(+), 43 deletions(-)
diff --git a/mktree.c b/mktree.c
index e42407f..6c6eb98 100644
--- a/mktree.c
+++ b/mktree.c
@@ -144,12 +144,13 @@ struct task {
pid_t real_parent; /* pid of task's real parent */
};
-#define TASK_ROOT 0x1 /* */
-#define TASK_DEAD 0x2 /* */
-#define TASK_THREAD 0x4 /* */
-#define TASK_SIBLING 0x8 /* */
-#define TASK_SESSION 0x10 /* */
-#define TASK_NEWPID 0x20 /* */
+#define TASK_ROOT 0x1 /* root task */
+#define TASK_GHOST 0x2 /* dead task (pid used as sid/pgid) */
+#define TASK_THREAD 0x4 /* thread (non leader) */
+#define TASK_SIBLING 0x8 /* creator's sibling (use CLONE_PARENT) */
+#define TASK_SESSION 0x10 /* inherits creator's original sid */
+#define TASK_NEWPID 0x20 /* starts a new pid namespace */
+#define TASK_DEAD 0x40 /* dead task (dummy) */
struct ckpt_ctx {
pid_t init_pid;
@@ -843,10 +844,12 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
ckpt_dbg_cont(" prev %d", task->prev_sib->pid);
if (task->phantom)
ckpt_dbg_cont(" placeholder %d", task->phantom->pid);
- ckpt_dbg_cont(" %c%c%c%c",
+ ckpt_dbg_cont(" %c%c%c%c%c%c",
(task->flags & TASK_THREAD) ? 'T' : ' ',
(task->flags & TASK_SIBLING) ? 'P' : ' ',
(task->flags & TASK_SESSION) ? 'S' : ' ',
+ (task->flags & TASK_NEWPID) ? 'N' : ' ',
+ (task->flags & TASK_GHOST) ? 'G' : ' ',
(task->flags & TASK_DEAD) ? 'D' : ' ');
ckpt_dbg_cont("\n");
}
@@ -855,7 +858,7 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
return 0;
}
-static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid)
+static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, pid_t ppid)
{
struct task *task;
@@ -864,12 +867,13 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid)
task = &ctx->tasks_arr[ctx->tasks_nr++];
- task->flags = TASK_DEAD;
+ task->flags = TASK_GHOST;
+ /* */
task->pid = pid;
- task->ppid = ckpt_init_task(ctx)->pid;
+ task->ppid = ppid;
task->tgid = pid;
- task->sid = pid;
+ task->sid = ppid;
task->children = NULL;
task->next_sib = NULL;
@@ -892,36 +896,38 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid)
static int ckpt_init_tree(struct ckpt_ctx *ctx)
{
+ struct ckpt_hdr_pids *pids_arr = ctx->pids_arr;
+ int pids_nr = ctx->pids_nr;
struct task *task;
- pid_t init_sid;
- pid_t init_pid;
- pid_t init_pgid;
+ pid_t root_sid;
+ pid_t root_pid;
+ pid_t root_pgid;
int i;
- init_pid = ctx->pids_arr[0].vpid;
- init_sid = ctx->pids_arr[0].vsid;
- init_pgid = ctx->pids_arr[0].vpgid;
+ root_pid = pids_arr[0].vpid;
+ root_sid = pids_arr[0].vsid;
+ root_pgid = pids_arr[0].vpgid;
/* XXX for out-of-container subtrees */
- for (i = 0; i < ctx->pids_nr; i++) {
- if (ctx->pids_arr[i].vsid == init_sid)
- ctx->pids_arr[i].vsid = init_pid;
- if (ctx->pids_arr[i].vpgid == init_sid)
- ctx->pids_arr[i].vpgid = init_pid;
- if (ctx->pids_arr[i].vpgid == init_pgid)
- ctx->pids_arr[i].vpgid = init_pid;
+ for (i = 0; i < pids_nr; i++) {
+ if (pids_arr[i].vsid == root_sid)
+ pids_arr[i].vsid = root_pid;
+ if (pids_arr[i].vpgid == root_sid)
+ pids_arr[i].vpgid = root_pid;
+ if (pids_arr[i].vpgid == root_pgid)
+ pids_arr[i].vpgid = root_pid;
}
/* populate with known tasks */
- for (i = 0; i < ctx->pids_nr; i++) {
+ for (i = 0; i < pids_nr; i++) {
task = &ctx->tasks_arr[i];
task->flags = 0;
- task->pid = ctx->pids_arr[i].vpid;
- task->ppid = ctx->pids_arr[i].vppid;
- task->tgid = ctx->pids_arr[i].vtgid;
- task->sid = ctx->pids_arr[i].vsid;
+ task->pid = pids_arr[i].vpid;
+ task->ppid = pids_arr[i].vppid;
+ task->tgid = pids_arr[i].vtgid;
+ task->sid = pids_arr[i].vsid;
task->children = NULL;
task->next_sib = NULL;
@@ -936,19 +942,32 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
return -1;
}
- ctx->tasks_nr = ctx->pids_nr;
+ ctx->tasks_nr = pids_nr;
/* add pids unaccounted for (no tasks) */
- for (i = 0; i < ctx->pids_nr; i++) {
- if (ckpt_setup_task(ctx, ctx->pids_arr[i].vsid) < 0)
+ for (i = 0; i < pids_nr; i++) {
+ /* session leader's parent is root task */
+ if (ckpt_setup_task(ctx, pids_arr->vsid, root_pid) < 0)
return -1;
- if (ckpt_setup_task(ctx, ctx->pids_arr[i].vpgid) < 0)
+
+ /*
+ * If pgrp != sid, pgrp owner's parent is sid. Other
+ * tasks with same pgrp will need to have threir sid
+ * matching, too, when the kernel restores their pgrp.
+ * If pgrp == sid, then the call above would have
+ * ensured that the pid is hashed: ckpt_setup_task()
+ * will return promptly.
+ */
+ if (ckpt_setup_task(ctx, pids_arr->vpgid, pids_arr->vsid) < 0)
return -1;
+
+ pids_arr++;
}
/* mark root task(s) */
ctx->tasks_arr[0].flags |= TASK_ROOT;
+ ckpt_dbg("total tasks (including ghosts): %d\n", ctx->tasks_nr);
return 0;
}
@@ -970,10 +989,11 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
* flags, pid, tgid, sid, pgid, and pointers to the a creator, next
* and previous sibling, and first child task. Note that the creator
* may not necessarily correspond to the parent. The possible flags
- * are TASK_DEAD, TASK_THREAD, TASK_SESSION (that asks inherit a
- * session id), and TASK_SIBLING (that asks to inherit the parent via
- * CLONE_PARENT). The algorithm loops through all the entries in the
- * table:
+ * are TASK_ROOT, TASK_GHOST, TASK_THREAD, TASK_SIBLING (that asks to
+ * inherit the parent via CLONE_PARENT), TASK_SESSION (that asks to
+ * inherit a session id), TASK_NEWPID (that asks to start a new pid
+ * namespace), and TASK_DEAD. The algorithm loops through all the
+ * entries in the table:
*
* If the entry is a thread and not the thread group leader, we set
* the creator to be the thread group leader and set TASK_THREAD.
@@ -1023,7 +1043,7 @@ static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task)
struct task *creator;
if (task == ckpt_init_task(ctx)) {
- ckpt_err("pid %d: init - no creator\n", ckpt_init_task(ctx)->pid);
+ ckpt_err("pid %d: no init creator\n", ckpt_init_task(ctx)->pid);
return -1;
}
@@ -1230,6 +1250,7 @@ static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task)
{
struct task *child;
struct pid_swap swap;
+ unsigned long flags = 0;
pid_t newpid;
int ret;
@@ -1307,9 +1328,20 @@ static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task)
}
close(ctx->pipe_out);
+ /*
+ * Ghost tasks are not restarted and end up dead, but their
+ * pids are referred to by other tasks' pgids (also sids, that
+ * are already properly set by now). Therefore, they stick
+ * around until those tasks actually restore their pgrp, and
+ * then exit (more precisely, killed). The RESTART_GHOST flag
+ * tells the kernel that they are not to be restored.
+ */
+ if (task->flags & TASK_GHOST)
+ flags |= RESTART_GHOST;
+
/* on success this doesn't return */
- ckpt_dbg("about to call sys_restart()\n");
- ret = restart(0, STDIN_FILENO, 0);
+ ckpt_dbg("about to call sys_restart(), flags %#lx\n", flags);
+ ret = restart(0, STDIN_FILENO, flags);
if (ret < 0)
perror("task restore failed");
return ret;
@@ -1561,7 +1593,10 @@ static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
memcpy(ctx->copy_arr, ctx->pids_arr, len);
/* read in 'pid_swap' data and adjust ctx->pids_arr */
- for (n = 0; n < ctx->pids_nr; n++) {
+ for (n = 0; n < ctx->tasks_nr; n++) {
+ /* don't expect data from dead tasks */
+ if (ctx->tasks_arr[n].flags & TASK_DEAD)
+ continue;
ret = read(ctx->pipe_in, &swap, sizeof(swap));
if (ret < 0)
ckpt_abort(ctx, "read pipe");
@@ -1576,8 +1611,10 @@ static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
ctx->copy_arr[m].vpid = swap.new;
if (ctx->pids_arr[m].vtgid == swap.old)
ctx->copy_arr[m].vtgid = swap.new;
- if (ctx->pids_arr[m].vppid == swap.old)
- ctx->copy_arr[m].vppid = swap.new;
+ if (ctx->pids_arr[m].vpgid == swap.old)
+ ctx->copy_arr[m].vpgid = swap.new;
+ else if (ctx->pids_arr[m].vpgid == -swap.old)
+ ctx->copy_arr[m].vpgid = -swap.new;
}
}
--
1.6.0.4
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list