[CRIU] [PATCH v3 49/55] pid_ns: Set user_ns before creation of pid_ns
Kirill Tkhai
ktkhai at virtuozzo.com
Tue Apr 25 05:11:32 PDT 2017
On 25.04.2017 08:26, Andrei Vagin wrote:
> On Mon, Apr 10, 2017 at 11:23:13AM +0300, Kirill Tkhai wrote:
>> Since child's pid_ns may have user_ns not equal
>> to parent's, and we do not want to lose parent's
>> user_ns (as it's not impossible to restore it back),
>> create the child from a sub-process.
>>
>
> I don't understand what you do here. Could you elaborate and add
> comments in code?
>
> Thanks,
> Andrei
>
>> v3: New
>>
>> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>> ---
>> criu/cr-restore.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++------
>> 1 file changed, 115 insertions(+), 16 deletions(-)
>>
>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>> index 780efed18..a73545fbd 100644
>> --- a/criu/cr-restore.c
>> +++ b/criu/cr-restore.c
>> @@ -1046,6 +1046,120 @@ static void maybe_clone_parent(struct pstree_item *item,
>> }
>> }
>>
>> +static int call_clone_fn(void *arg)
>> +{
>> + struct cr_clone_arg *ca = arg;
>> + struct ns_id *pid_ns;
>> + pid_t pid;
>> + int fd;
>> +
>> + pid_ns = lookup_ns_by_id(ca->item->ids->pid_ns_id, &pid_ns_desc);
>> + BUG_ON(!pid_ns);
>> +
>> + if (set_next_pid(pid_ns, ca->item->pid) < 0) {
>> + pr_err("Can't set next pid\n");
>> + return -1;
>> + }
>> +
>> + fd = fdstore_get(pid_ns->user_ns->user.nsfd_id);
>> + if (fd < 0) {
>> + pr_err("Can't get ns fd\n");
>> + return -1;
>> + }
>> +
Lower user_ns:
>> + if (setns(fd, CLONE_NEWUSER) < 0) {
>> + pr_perror("Can't set user ns");
>> + close(fd);
>> + return -1;
>> + }
>> + close(fd);
>> + ca->item->user_ns = pid_ns->user_ns;
>> +
>> + if (ca->clone_flags & CLONE_FILES)
>> + close_pid_proc();
>> +
Create children with CLONE_PARENT:
>> + pid = clone_noasan(restore_task_with_children, ca->clone_flags | CLONE_PARENT | SIGCHLD, ca);
>> +
>> + if (ca->item == root_item) {
>> + ca->item->pid->real = pid;
>> + pr_debug("PID: real %d virt %d\n", pid, vpid(ca->item));
>> + }
>> +
Exit:
>> + return pid > 0 ? 0 : -1;
>> +}
>> +
>> +static int do_fork_with_pid(struct pstree_item *item, struct ns_id *pid_ns, struct cr_clone_arg *ca)
>> +{
>> + int status, i, sz, ret = 0;
>> + struct pid *hlp_pid;
>> + sigset_t sig_mask;
>> + pid_t pid;
>> +
>> + if (!(ca->clone_flags & CLONE_NEWPID) || !current || current->user_ns == pid_ns->user_ns) {
If we do not create a pid_ns, or user_ns of the pid_ns is similar to current's user_ns,
then create the pid_ns without any tricks.
>> + if (set_next_pid(pid_ns, item->pid) < 0) {
>> + pr_err("Can't set next pid\n");
>> + return -1;
>> + }
>> + if (ca->clone_flags & CLONE_FILES)
>> + close_pid_proc();
>> + pid = clone_noasan(restore_task_with_children, ca->clone_flags | SIGCHLD, ca);
>> + if (item == root_item) {
>> + item->pid->real = pid;
>> + pr_debug("PID: real %d virt %d\n", pid, vpid(item));
>> + }
>> + return pid > 0 ? 0 : -1;
>> + }
Otherwise, create a helper, lower user_ns from current->user_ns to pid_ns->user_ns
and create the pid_ns.
Alloc memory for helper pid:
>> +
>> + sz = sizeof(struct pid) + (item->pid->level - 2) * sizeof(((struct pid *)NULL)->ns[0]);
>> + hlp_pid = xmalloc(sz);
>> + if (!hlp_pid)
>> + return -1;
>> + memcpy(hlp_pid, item->pid, sz);
>> + hlp_pid->level--;
>> +
Choose helper pid (see comment, how it's choosed):
>> + for (i = 0; i < hlp_pid->level; i++) {
>> + /*
>> + * Choose helper's pid[] as child's pid[] + 1.
>> + * This should guarantee the helper will not
>> + * occupy child's pids.
>> + */
>> + hlp_pid->ns[i].virt++;
>> + if (hlp_pid->ns[i].virt < 0)
>> + hlp_pid->ns[i].virt = INIT_PID + 1;
>> + }
Set helper pid as next:
>> + if (set_next_pid(pid_ns->parent, hlp_pid) < 0) {
>> + pr_err("Can't set next pid\n");
>> + xfree(hlp_pid);
>> + return -1;
>> + }
>> + xfree(hlp_pid);
>> +
>> + if (ca->clone_flags & CLONE_FILES)
>> + close_pid_proc();
>> +
>> + if (block_sigmask(&sig_mask, SIGCHLD) < 0)
>> + return -1;
>> +
Create helper:
>> + pid = clone_noasan(call_clone_fn, SIGCHLD, ca);
>> + if (pid < 0) {
>> + pr_perror("Can't clone()");
>> + ret = -1;
>> + goto unblock;
>> + }
>> +
Wait till helper dies:
>> + errno = 0;
>> + if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
>> + pr_perror("Child process waiting: %d", status);
>> + ret = -1;
>> + goto unblock;
>> + }
>> +
>> +unblock:
>> + if (restore_sigmask(&sig_mask))
>> + ret = -1;
>> + return ret;
>> +}
>> +
>> static inline int fork_with_pid(struct pstree_item *item)
>> {
>> struct cr_clone_arg ca;
>> @@ -1114,27 +1228,12 @@ static inline int fork_with_pid(struct pstree_item *item)
>> goto err_close;
>> }
>>
>> - if (set_next_pid(pid_ns, item->pid) < 0) {
>> - pr_err("Can't set next pid\n");
>> - goto err_unlock;
>> - }
>> -
>> - if (ca.clone_flags & CLONE_FILES)
>> - close_pid_proc();
>> -
>> - ret = clone_noasan(restore_task_with_children, ca.clone_flags | SIGCHLD, &ca);
>> + ret = do_fork_with_pid(item, pid_ns, &ca);
>> if (ret < 0) {
>> pr_perror("Can't fork for %d", pid);
>> goto err_unlock;
>> }
>>
>> -
>> - if (item == root_item) {
>> - item->pid->real = ret;
>> - pr_debug("PID: real %d virt %d\n",
>> - item->pid->real, vpid(item));
>> - }
>> -
>> err_unlock:
>> if (flock(ca.fd, LOCK_UN))
>> pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
>>
More information about the CRIU
mailing list