[CRIU] [PATCH v2 13/57] pid: Make pgid and sid be allocated dynamically
Kirill Tkhai
ktkhai at virtuozzo.com
Fri Apr 7 02:24:40 PDT 2017
On 07.04.2017 03:40, Andrei Vagin wrote:
> On Tue, Mar 28, 2017 at 06:36:06PM +0300, Kirill Tkhai wrote:
>> They may contain several levels like task's pid,
>> so they must be struct pid type, not a scalar.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>> ---
>> criu/cr-dump.c | 13 ++++---
>> criu/cr-restore.c | 14 ++++---
>> criu/files-reg.c | 4 +-
>> criu/include/pstree.h | 4 +-
>> criu/pstree.c | 95 +++++++++++++++++++++++++++----------------------
>> criu/tty.c | 6 ++-
>> 6 files changed, 74 insertions(+), 62 deletions(-)
>>
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index 5f1f668b..c61bb263 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -1095,8 +1095,8 @@ static int dump_zombies(void)
>> if (parse_pid_stat(vpid(item), &pps_buf) < 0)
>> goto err;
>>
>> - item->sid = pps_buf.sid;
>> - item->pgid = pps_buf.pgid;
>> + item->sid->ns[0].virt = pps_buf.sid;
>> + item->pgid->ns[0].virt = pps_buf.pgid;
>>
>> BUG_ON(!list_empty(&item->children));
>> if (dump_one_zombie(item, &pps_buf) < 0)
>> @@ -1297,14 +1297,15 @@ static int dump_one_task(struct pstree_item *item)
>> }
>>
>> item->pid->ns[0].virt = misc.pid;
>> + item->sid->ns[0].virt = misc.sid;
>> + item->pgid->ns[0].virt = misc.pgid;
>> pstree_insert_pid(item->pid);
>> - item->sid = misc.sid;
>> - item->pgid = misc.pgid;
>>
>> pr_info("sid=%d pgid=%d pid=%d\n",
>> - item->sid, item->pgid, vpid(item));
>> + item->sid->ns[0].virt, item->pgid->ns[0].virt, vpid(item));
>>
>> - if (item->sid == 0) {
>> +
>> + if (item->sid->ns[0].virt == 0) {
>> pr_err("A session leader of %d(%d) is outside of its pid namespace\n",
>> item->pid->real, vpid(item));
>> goto err_cure;
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index 863753b5..d806391e 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -1192,21 +1192,21 @@ static void restore_sid(void)
>> * we can call setpgid() on custom values.
>> */
>>
>> - if (vpid(current) == current->sid) {
>> - pr_info("Restoring %d to %d sid\n", vpid(current), current->sid);
>> + if (equal_pid(current->pid, current->sid)) {
>> + pr_info("Restoring %d to %d sid\n", vpid(current), current->sid->ns[0].virt);
>> sid = setsid();
>> - if (sid != current->sid) {
>> + if (sid != last_level_pid(current->sid)) {
>> pr_perror("Can't restore sid (%d)", sid);
>> exit(1);
>> }
>> } else {
>> sid = getsid(getpid());
>> - if (sid != current->sid) {
>> + if (sid != last_level_pid(current->sid)) {
>
> sid may be from a parent pid namespace, so the level will be different
> and you can't use last_level_pid() here, can you?
On the stage of this patch we don't have more than one pid_ns. Nested pid namespaces
become allowed in [57/57] "ns: Nested pid_ns support", and this function is turned off there,
if more then one pid_ns present.
>> /* Skip the root task if it's not init */
>> if (current == root_item && vpid(root_item) != INIT_PID)
>> return;
>> pr_err("Requested sid %d doesn't match inherited %d\n",
>> - current->sid, sid);
>> + last_level_pid(current->sid), sid);
>> exit(1);
>> }
>> }
>> @@ -1224,7 +1224,7 @@ static void restore_pgid(void)
>> * helpers are still with us.
>> */
>>
>> - pid_t pgid, my_pgid = current->pgid;
>> + pid_t pgid, my_pgid = last_level_pid(current->pgid);
>>
>> pr_info("Restoring %d to %d pgid\n", vpid(current), my_pgid);
>>
>> @@ -1250,7 +1250,7 @@ static void restore_pgid(void)
>>
>> pr_info("\twill call setpgid, mine pgid is %d\n", pgid);
>> if (setpgid(0, my_pgid) != 0) {
>> - pr_perror("Can't restore pgid (%d/%d->%d)", vpid(current), pgid, current->pgid);
>> + pr_perror("Can't restore pgid (%d/%d->%d)", vpid(current), pgid, current->pgid->ns[0].virt);
>> exit(1);
>> }
>>
>> diff --git a/criu/files-reg.c b/criu/files-reg.c
>> index 45142758..ede2744e 100644
>> --- a/criu/files-reg.c
>> +++ b/criu/files-reg.c
>> @@ -390,8 +390,8 @@ static int open_remap_dead_process(struct reg_file_info *rfi,
>>
>> init_pstree_helper(helper);
>>
>> - helper->sid = root_item->sid;
>> - helper->pgid = root_item->pgid;
>> + helper->sid->ns[0].virt = root_item->sid->ns[0].virt;
>> + helper->pgid->ns[0].virt = root_item->pgid->ns[0].virt;
>> helper->pid->ns[0].virt = rfe->remap_id;
>> helper->parent = root_item;
>> helper->ids = root_item->ids;
>> diff --git a/criu/include/pstree.h b/criu/include/pstree.h
>> index 769dcf7d..6edd04d7 100644
>> --- a/criu/include/pstree.h
>> +++ b/criu/include/pstree.h
>> @@ -17,8 +17,8 @@ struct pstree_item {
>> struct list_head sibling; /* linkage in my parent's children list */
>>
>> struct pid *pid;
>> - pid_t pgid;
>> - pid_t sid;
>> + struct pid *pgid;
>> + struct pid *sid;
>> pid_t born_sid;
>>
>> int nr_threads; /* number of threads */
>> diff --git a/criu/pstree.c b/criu/pstree.c
>> index c02ddb21..0dffabcf 100644
>> --- a/criu/pstree.c
>> +++ b/criu/pstree.c
>> @@ -209,7 +209,12 @@ struct pstree_item *__alloc_pstree_item(bool rst, int level)
>> if (!item)
>> return NULL;
>> item->pid = xmalloc(p_sz);
>> - if (!item->pid) {
>> + item->pgid = xmalloc(p_sz);
>> + item->sid = xmalloc(p_sz);
>
> You have a separate patch about this, maybe it has to be there?
These two strings are the main goal of this patch :)
What patch do you mean?
> Why do you think that pid, pgid and sid has the same number of levels?
Common case is they have the same number of levels. I think people create
a pid_ns and they don't set sid and pgid very rare. So, the patchset is
optimized for this case. When sid of pgid have not set, they just have
their values 0 in such namespaces. And this allows to simplify code in
some places.
> Could you create a wiki page about C/R-ing pid namespaces and
> describe all restrictions?
>
>> + if (!item->pid || !item->pgid || !item->sid) {
>> + xfree(item->pid);
>> + xfree(item->pgid);
>> + xfree(item->sid);
>> xfree(item);
>> return NULL;
>> }
>> @@ -220,12 +225,17 @@ struct pstree_item *__alloc_pstree_item(bool rst, int level)
>> return NULL;
>> memset(item, 0, sz);
>> vm_area_list_init(&rsti(item)->vmas);
>> -
>> - item->pid = shmalloc(p_sz);
>> + /*
>> + * On restore we never expand pid level,
>> + * so allocate them all at once.
>> + */
>> + item->pid = shmalloc(3 * p_sz);
>> if (!item->pid) {
>> shfree_last(item);
>> return NULL;
>> }
>> + item->pgid = (void *)item->pid + p_sz;
>> + item->sid = (void *)item->pgid + p_sz;
>> }
>>
>> INIT_LIST_HEAD(&item->children);
>> @@ -237,7 +247,7 @@ struct pstree_item *__alloc_pstree_item(bool rst, int level)
>> item->born_sid = -1;
>> item->pid->item = item;
>> futex_init(&item->task_st);
>> - item->pid->level = level;
>> + item->pid->level = item->sid->level = item->pgid->level = level;
>>
>> return item;
>> }
>> @@ -300,7 +310,7 @@ int dump_pstree(struct pstree_item *root_item)
>> * deeper in process tree, thus top-level checking for
>> * leader is enough.
>> */
>> - if (vpid(root_item) != root_item->sid) {
>> + if (!equal_pid(root_item->pid, root_item->sid)) {
>> if (!opts.shell_job) {
>> pr_err("The root process %d is not a session leader. "
>> "Consider using --" OPT_SHELL_JOB " option\n", vpid(item));
>> @@ -317,8 +327,8 @@ int dump_pstree(struct pstree_item *root_item)
>>
>> e.pid = vpid(item);
>> e.ppid = item->parent ? vpid(item->parent) : 0;
>> - e.pgid = item->pgid;
>> - e.sid = item->sid;
>> + e.pgid = item->pgid->ns[0].virt;
>> + e.sid = item->sid->ns[0].virt;
>> e.n_threads = item->nr_threads;
>>
>> e.threads = xmalloc(sizeof(e.threads[0]) * e.n_threads);
>> @@ -355,7 +365,7 @@ static int prepare_pstree_for_shell_job(void)
>> if (!opts.shell_job)
>> return 0;
>>
>> - if (root_item->sid == vpid(root_item))
>> + if (equal_pid(root_item->sid, root_item->pid))
>> return 0;
>>
>> /*
>> @@ -375,17 +385,17 @@ static int prepare_pstree_for_shell_job(void)
>> * Not that clever solution but at least it works.
>> */
>>
>> - old_sid = root_item->sid;
>> - old_gid = root_item->pgid;
>> + old_sid = root_item->sid->ns[0].virt;
>> + old_gid = root_item->pgid->ns[0].virt;
>>
>> pr_info("Migrating process tree (GID %d->%d SID %d->%d)\n",
>> old_gid, current_gid, old_sid, current_sid);
>>
>> for_each_pstree_item(pi) {
>> - if (pi->pgid == old_gid)
>> - pi->pgid = current_gid;
>> - if (pi->sid == old_sid)
>> - pi->sid = current_sid;
>> + if (pi->pgid->ns[0].virt == old_gid)
>> + pi->pgid->ns[0].virt = current_gid;
>> + if (pi->sid->ns[0].virt == old_sid)
>> + pi->sid->ns[0].virt = current_sid;
>> }
>>
>> if (lookup_create_item(current_sid) == NULL)
>> @@ -558,10 +568,10 @@ static int read_pstree_image(pid_t *pid_max)
>> pi->pid->ns[0].virt = e->pid;
>> if (e->pid > *pid_max)
>> *pid_max = e->pid;
>> - pi->pgid = e->pgid;
>> + pi->pgid->ns[0].virt = e->pgid;
>> if (e->pgid > *pid_max)
>> *pid_max = e->pgid;
>> - pi->sid = e->sid;
>> + pi->sid->ns[0].virt = e->sid;
>> if (e->sid > *pid_max)
>> *pid_max = e->sid;
>> pi->pid->state = TASK_ALIVE;
>> @@ -599,6 +609,7 @@ static int read_pstree_image(pid_t *pid_max)
>> for (i = 0; i < e->n_threads; i++) {
>> struct pid *node;
>> pi->threads[i].real = -1;
>> + pi->threads[i].level = pi->pid->level;
>> pi->threads[i].ns[0].virt = e->threads[i];
>> pi->threads[i].state = TASK_THREAD;
>> pi->threads[i].item = NULL;
>> @@ -678,10 +689,10 @@ static int prepare_pstree_ids(void)
>> * a session leader himself -- this is a simple case, we
>> * just proceed in a normal way.
>> */
>> - if (item->sid == root_item->sid || item->sid == vpid(item))
>> + if (equal_pid(item->sid, root_item->sid) || equal_pid(item->sid, item->pid))
>> continue;
>>
>> - leader = pstree_item_by_virt(item->sid);
>> + leader = pstree_item_by_virt(item->sid->ns[0].virt);
>> BUG_ON(leader == NULL);
>> if (leader->pid->state != TASK_UNDEF) {
>> pid_t pid;
>> @@ -693,10 +704,10 @@ static int prepare_pstree_ids(void)
>> if (helper == NULL)
>> return -1;
>>
>> - pr_info("Session leader %d\n", item->sid);
>> + pr_info("Session leader %d\n", item->sid->ns[0].virt);
>>
>> - helper->sid = item->sid;
>> - helper->pgid = leader->pgid;
>> + helper->sid->ns[0].virt = item->sid->ns[0].virt;
>> + helper->pgid->ns[0].virt = leader->pgid->ns[0].virt;
>> helper->ids = leader->ids;
>> helper->parent = leader;
>> list_add(&helper->sibling, &leader->children);
>> @@ -705,8 +716,8 @@ static int prepare_pstree_ids(void)
>> vpid(helper), vpid(leader));
>> } else {
>> helper = leader;
>> - helper->sid = item->sid;
>> - helper->pgid = item->sid;
>> + helper->sid->ns[0].virt = item->sid->ns[0].virt;
>> + helper->pgid->ns[0].virt = item->sid->ns[0].virt;
>> helper->parent = root_item;
>> helper->ids = root_item->ids;
>> list_add_tail(&helper->sibling, &helpers);
>> @@ -714,7 +725,7 @@ static int prepare_pstree_ids(void)
>> init_pstree_helper(helper);
>>
>> pr_info("Add a helper %d for restoring SID %d\n",
>> - vpid(helper), helper->sid);
>> + vpid(helper), helper->sid->ns[0].virt);
>>
>> child = list_entry(item->sibling.prev, struct pstree_item, sibling);
>> item = child;
>> @@ -723,9 +734,9 @@ static int prepare_pstree_ids(void)
>> * Stack on helper task all children with target sid.
>> */
>> list_for_each_entry_safe_continue(child, tmp, &root_item->children, sibling) {
>> - if (child->sid != helper->sid)
>> + if (!equal_pid(child->sid, helper->sid))
>> continue;
>> - if (child->sid == vpid(child))
>> + if (equal_pid(child->sid, child->pid))
>> continue;
>>
>> pr_info("Attach %d to the temporary task %d\n",
>> @@ -744,28 +755,28 @@ static int prepare_pstree_ids(void)
>> if (item->pid->state == TASK_HELPER)
>> continue;
>>
>> - if (item->sid != vpid(item)) {
>> + if (!equal_pid(item->sid, item->pid)) {
>> struct pstree_item *parent;
>>
>> - if (item->parent->sid == item->sid)
>> + if (equal_pid(item->parent->sid, item->sid))
>> continue;
>>
>> /* the task could fork a child before and after setsid() */
>> parent = item->parent;
>> - while (parent && vpid(parent) != item->sid) {
>> - if (parent->born_sid != -1 && parent->born_sid != item->sid) {
>> + while (parent && !equal_pid(parent->pid, item->sid)) {
>> + if (parent->born_sid != -1 && parent->born_sid != item->sid->ns[0].virt) {
>> pr_err("Can't figure out which sid (%d or %d)"
>> "the process %d was born with\n",
>> - parent->born_sid, item->sid, vpid(parent));
>> + parent->born_sid, item->sid->ns[0].virt, vpid(parent));
>> return -1;
>> }
>> - parent->born_sid = item->sid;
>> - pr_info("%d was born with sid %d\n", vpid(parent), item->sid);
>> + parent->born_sid = item->sid->ns[0].virt;
>> + pr_info("%d was born with sid %d\n", vpid(parent), item->sid->ns[0].virt);
>> parent = parent->parent;
>> }
>>
>> if (parent == NULL) {
>> - pr_err("Can't find a session leader for %d\n", item->sid);
>> + pr_err("Can't find a session leader for %d\n", item->sid->ns[0].virt);
>> return -1;
>> }
>>
>> @@ -780,10 +791,10 @@ static int prepare_pstree_ids(void)
>> for_each_pstree_item(item) {
>> struct pid *pid;
>>
>> - if (!item->pgid || vpid(item) == item->pgid)
>> + if (!item->pgid || equal_pid(item->pid, item->pgid))
>> continue;
>>
>> - pid = pstree_pid_by_virt(item->pgid);
>> + pid = pstree_pid_by_virt(item->pgid->ns[0].virt);
>> if (pid->state != TASK_UNDEF) {
>> BUG_ON(pid->state == TASK_THREAD);
>> rsti(item)->pgrp_leader = pid->item;
>> @@ -795,22 +806,22 @@ static int prepare_pstree_ids(void)
>> * means we're inheriting group from the current
>> * task so we need to escape creating a helper here.
>> */
>> - if (current_pgid == item->pgid)
>> + if (current_pgid == item->pgid->ns[0].virt)
>> continue;
>>
>> helper = pid->item;
>> init_pstree_helper(helper);
>>
>> - helper->sid = item->sid;
>> - helper->pgid = item->pgid;
>> - helper->pid->ns[0].virt = item->pgid;
>> + helper->sid->ns[0].virt = item->sid->ns[0].virt;
>> + helper->pgid->ns[0].virt = item->pgid->ns[0].virt;
>> + helper->pid->ns[0].virt = item->pgid->ns[0].virt;
>> helper->parent = item;
>> helper->ids = item->ids;
>> list_add(&helper->sibling, &item->children);
>> rsti(item)->pgrp_leader = helper;
>>
>> pr_info("Add a helper %d for restoring PGID %d\n",
>> - vpid(helper), helper->pgid);
>> + vpid(helper), helper->pgid->ns[0].virt);
>> }
>>
>> return 0;
>> @@ -1027,7 +1038,7 @@ int prepare_dummy_pstree(void)
>>
>> bool restore_before_setsid(struct pstree_item *child)
>> {
>> - int csid = child->born_sid == -1 ? child->sid : child->born_sid;
>> + int csid = child->born_sid == -1 ? child->sid->ns[0].virt : child->born_sid;
>>
>> if (child->parent->born_sid == csid)
>> return true;
>> diff --git a/criu/tty.c b/criu/tty.c
>> index d49aafd6..ad719f7b 100644
>> --- a/criu/tty.c
>> +++ b/criu/tty.c
>> @@ -1038,7 +1038,7 @@ static int pty_open_unpaired_slave(struct file_desc *d, struct tty_info *slave)
>> * checkpoint complete process tree together with
>> * the process which keeps the master peer.
>> */
>> - if (root_item->sid != vpid(root_item)) {
>> + if (!equal_pid(root_item->sid, root_item->pid)) {
>> pr_debug("Restore inherited group %d\n",
>> getpgid(getppid()));
>> if (tty_set_prgp(fd, getpgid(getppid())))
>> @@ -1208,7 +1208,7 @@ static struct pstree_item *find_first_sid(int sid)
>> struct pstree_item *item;
>>
>> for_each_pstree_item(item) {
>> - if (item->sid == sid)
>> + if (item->sid->ns[0].virt == sid)
>> return item;
>> }
>>
>> @@ -1289,7 +1289,7 @@ static int tty_find_restoring_task(struct tty_info *info)
>> * for us.
>> */
>> item = find_first_sid(info->tie->sid);
>> - if (item && vpid(item) == item->sid) {
>> + if (item && equal_pid(item->pid, item->sid)) {
>> pr_info("Set a control terminal %#x to %d\n",
>> info->tfe->id, info->tie->sid);
>> return prepare_ctl_tty(vpid(item),
>>
More information about the CRIU
mailing list