[CRIU] [PATCH v3 49/55] pid_ns: Set user_ns before creation of pid_ns

Andrei Vagin avagin at virtuozzo.com
Wed Apr 26 00:53:02 PDT 2017


On Tue, Apr 25, 2017 at 03:11:32PM +0300, Kirill Tkhai wrote:
> On 25.04.2017 08:26, Andrei Vagin wrote:
> > On Mon, Apr 10, 2017 at 11:23:13AM +0300, Kirill Tkhai wrote:
> >> Since child's pid_ns may have user_ns not equal
> >> to parent's, and we do not want to lose parent's
> >> user_ns (as it's not impossible to restore it back),
> >> create the child from a sub-process.
> >>
> > 
> > I don't understand what you do here. Could you elaborate and add
> > comments in code?

Ok, now I understand the idea. And I have one more question. Now we
restore userns for processes before restoring files, so we can meet a
situation whe a process will not be able to restore a file due to lack
of permissions. How are you going to handle this case?

> > 
> > Thanks,
> > Andrei
> >  
> >> v3: New
> >>
> >> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> >> ---
> >>  criu/cr-restore.c |  131 +++++++++++++++++++++++++++++++++++++++++++++++------
> >>  1 file changed, 115 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> >> index 780efed18..a73545fbd 100644
> >> --- a/criu/cr-restore.c
> >> +++ b/criu/cr-restore.c
> >> @@ -1046,6 +1046,120 @@ static void maybe_clone_parent(struct pstree_item *item,
> >>  	}
> >>  }
> >>  
> >> +static int call_clone_fn(void *arg)
> >> +{
> >> +	struct cr_clone_arg *ca = arg;
> >> +	struct ns_id *pid_ns;
> >> +	pid_t pid;
> >> +	int fd;
> >> +
> >> +	pid_ns = lookup_ns_by_id(ca->item->ids->pid_ns_id, &pid_ns_desc);
> >> +	BUG_ON(!pid_ns);
> >> +
> >> +	if (set_next_pid(pid_ns, ca->item->pid) < 0) {
> >> +		pr_err("Can't set next pid\n");
> >> +		return -1;
> >> +	}
> >> +
> >> +	fd = fdstore_get(pid_ns->user_ns->user.nsfd_id);
> >> +	if (fd < 0) {
> >> +		pr_err("Can't get ns fd\n");
> >> +		return -1;
> >> +	}
> >> +
> 
> Lower user_ns:
> 
> >> +	if (setns(fd, CLONE_NEWUSER) < 0) {
> >> +		pr_perror("Can't set user ns");
> >> +		close(fd);
> >> +		return -1;
> >> +	}
> >> +	close(fd);
> >> +	ca->item->user_ns = pid_ns->user_ns;
> >> +
> >> +	if (ca->clone_flags & CLONE_FILES)
> >> +		close_pid_proc();
> >> +
> 
> Create children with CLONE_PARENT:
> 
> >> +	pid = clone_noasan(restore_task_with_children, ca->clone_flags | CLONE_PARENT | SIGCHLD, ca);
> >> +
> >> +	if (ca->item == root_item) {
> >> +		ca->item->pid->real = pid;
> >> +		pr_debug("PID: real %d virt %d\n", pid, vpid(ca->item));
> >> +	}
> >> +
> 
> Exit:
> 
> >> +	return pid > 0 ? 0 : -1;
> >> +}
> >> +
> >> +static int do_fork_with_pid(struct pstree_item *item, struct ns_id *pid_ns, struct cr_clone_arg *ca)
> >> +{
> >> +	int status, i, sz, ret = 0;
> >> +	struct pid *hlp_pid;
> >> +	sigset_t sig_mask;
> >> +	pid_t pid;
> >> +
> >> +	if (!(ca->clone_flags & CLONE_NEWPID) || !current || current->user_ns == pid_ns->user_ns) {
> 
> If we do not create a pid_ns, or user_ns of the pid_ns is similar to current's user_ns,
> then create the pid_ns without any tricks.
> 
> >> +		if (set_next_pid(pid_ns, item->pid) < 0) {
> >> +			pr_err("Can't set next pid\n");
> >> +			return -1;
> >> +		}
> >> +		if (ca->clone_flags & CLONE_FILES)
> >> +			close_pid_proc();
> >> +		pid = clone_noasan(restore_task_with_children, ca->clone_flags | SIGCHLD, ca);
> >> +		if (item == root_item) {
> >> +			item->pid->real = pid;
> >> +			pr_debug("PID: real %d virt %d\n", pid, vpid(item));
> >> +		}
> >> +		return pid > 0 ? 0 : -1;
> >> +	}
> 
> Otherwise, create a helper, lower user_ns from current->user_ns to pid_ns->user_ns
> and create the pid_ns.
> 
> Alloc memory for helper pid:
> >> +
> >> +	sz = sizeof(struct pid) + (item->pid->level - 2) * sizeof(((struct pid *)NULL)->ns[0]);
> >> +	hlp_pid = xmalloc(sz);
> >> +	if (!hlp_pid)
> >> +		return -1;
> >> +	memcpy(hlp_pid, item->pid, sz);
> >> +	hlp_pid->level--;
> >> +
> 
> Choose helper pid (see comment, how it's choosed):
> 
> >> +	for (i = 0; i < hlp_pid->level; i++) {
> >> +		/*
> >> +		 * Choose helper's pid[] as child's pid[] + 1.
> >> +		 * This should guarantee the helper will not
> >> +		 * occupy child's pids.
> >> +		 */
> >> +		hlp_pid->ns[i].virt++;
> >> +		if (hlp_pid->ns[i].virt < 0)
> >> +			hlp_pid->ns[i].virt = INIT_PID + 1;
> >> +	}
> 
> Set helper pid as next:
> 
> >> +	if (set_next_pid(pid_ns->parent, hlp_pid) < 0) {
> >> +		pr_err("Can't set next pid\n");
> >> +		xfree(hlp_pid);
> >> +		return -1;
> >> +	}
> >> +	xfree(hlp_pid);
> >> +
> >> +	if (ca->clone_flags & CLONE_FILES)
> >> +		close_pid_proc();
> >> +
> >> +	if (block_sigmask(&sig_mask, SIGCHLD) < 0)
> >> +		return -1;
> >> +
> 
> Create helper:
> 
> >> +	pid = clone_noasan(call_clone_fn, SIGCHLD, ca);
> >> +	if (pid < 0) {
> >> +		pr_perror("Can't clone()");
> >> +		ret = -1;
> >> +		goto unblock;
> >> +	}
> >> +
> 
> Wait till helper dies:
> 
> >> +	errno = 0;
> >> +	if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
> >> +		pr_perror("Child process waiting: %d", status);
> >> +		ret = -1;
> >> +		goto unblock;
> >> +	}
> >> +
> >> +unblock:
> >> +	if (restore_sigmask(&sig_mask))
> >> +		ret = -1;
> >> +	return ret;
> >> +}
> >> +
> >>  static inline int fork_with_pid(struct pstree_item *item)
> >>  {
> >>  	struct cr_clone_arg ca;
> >> @@ -1114,27 +1228,12 @@ static inline int fork_with_pid(struct pstree_item *item)
> >>  		goto err_close;
> >>  	}
> >>  
> >> -	if (set_next_pid(pid_ns, item->pid) < 0) {
> >> -		pr_err("Can't set next pid\n");
> >> -		goto err_unlock;
> >> -	}
> >> -
> >> -	if (ca.clone_flags & CLONE_FILES)
> >> -		close_pid_proc();
> >> -
> >> -	ret = clone_noasan(restore_task_with_children, ca.clone_flags | SIGCHLD, &ca);
> >> +	ret = do_fork_with_pid(item, pid_ns, &ca);
> >>  	if (ret < 0) {
> >>  		pr_perror("Can't fork for %d", pid);
> >>  		goto err_unlock;
> >>  	}
> >>  
> >> -
> >> -	if (item == root_item) {
> >> -		item->pid->real = ret;
> >> -		pr_debug("PID: real %d virt %d\n",
> >> -				item->pid->real, vpid(item));
> >> -	}
> >> -
> >>  err_unlock:
> >>  	if (flock(ca.fd, LOCK_UN))
> >>  		pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
> >>


More information about the CRIU mailing list