[CRIU] [PATCH v3 49/55] pid_ns: Set user_ns before creation of pid_ns

Kirill Tkhai ktkhai at virtuozzo.com
Wed Apr 26 03:13:46 PDT 2017


On 26.04.2017 10:53, Andrei Vagin wrote:
> On Tue, Apr 25, 2017 at 03:11:32PM +0300, Kirill Tkhai wrote:
>> On 25.04.2017 08:26, Andrei Vagin wrote:
>>> On Mon, Apr 10, 2017 at 11:23:13AM +0300, Kirill Tkhai wrote:
>>>> Since child's pid_ns may have user_ns not equal
>>>> to parent's, and we do not want to lose parent's
>>>> user_ns (as it's not impossible to restore it back),
>>>> create the child from a sub-process.
>>>>
>>>
>>> I don't understand what you do here. Could you elaborate and add
>>> comments in code?
> 
> Ok, now I understand the idea. And I have one more question. Now we
> restore userns for processes before restoring files, so we can meet a
> situation whe a process will not be able to restore a file due to lack
> of permissions. How are you going to handle this case?

There are two ways:
1)use usernsd to open the file
2)restore such files in processes, who have the permissions (name their fle master).

It does not contradict the current ns restore scheme, but now I'm far from choosing
a variant (since even ns-support sceleton is not officially experimental supported yet)
 
>>>
>>> Thanks,
>>> Andrei
>>>  
>>>> v3: New
>>>>
>>>> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>>>> ---
>>>>  criu/cr-restore.c |  131 +++++++++++++++++++++++++++++++++++++++++++++++------
>>>>  1 file changed, 115 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
>>>> index 780efed18..a73545fbd 100644
>>>> --- a/criu/cr-restore.c
>>>> +++ b/criu/cr-restore.c
>>>> @@ -1046,6 +1046,120 @@ static void maybe_clone_parent(struct pstree_item *item,
>>>>  	}
>>>>  }
>>>>  
>>>> +static int call_clone_fn(void *arg)
>>>> +{
>>>> +	struct cr_clone_arg *ca = arg;
>>>> +	struct ns_id *pid_ns;
>>>> +	pid_t pid;
>>>> +	int fd;
>>>> +
>>>> +	pid_ns = lookup_ns_by_id(ca->item->ids->pid_ns_id, &pid_ns_desc);
>>>> +	BUG_ON(!pid_ns);
>>>> +
>>>> +	if (set_next_pid(pid_ns, ca->item->pid) < 0) {
>>>> +		pr_err("Can't set next pid\n");
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +	fd = fdstore_get(pid_ns->user_ns->user.nsfd_id);
>>>> +	if (fd < 0) {
>>>> +		pr_err("Can't get ns fd\n");
>>>> +		return -1;
>>>> +	}
>>>> +
>>
>> Lower user_ns:
>>
>>>> +	if (setns(fd, CLONE_NEWUSER) < 0) {
>>>> +		pr_perror("Can't set user ns");
>>>> +		close(fd);
>>>> +		return -1;
>>>> +	}
>>>> +	close(fd);
>>>> +	ca->item->user_ns = pid_ns->user_ns;
>>>> +
>>>> +	if (ca->clone_flags & CLONE_FILES)
>>>> +		close_pid_proc();
>>>> +
>>
>> Create children with CLONE_PARENT:
>>
>>>> +	pid = clone_noasan(restore_task_with_children, ca->clone_flags | CLONE_PARENT | SIGCHLD, ca);
>>>> +
>>>> +	if (ca->item == root_item) {
>>>> +		ca->item->pid->real = pid;
>>>> +		pr_debug("PID: real %d virt %d\n", pid, vpid(ca->item));
>>>> +	}
>>>> +
>>
>> Exit:
>>
>>>> +	return pid > 0 ? 0 : -1;
>>>> +}
>>>> +
>>>> +static int do_fork_with_pid(struct pstree_item *item, struct ns_id *pid_ns, struct cr_clone_arg *ca)
>>>> +{
>>>> +	int status, i, sz, ret = 0;
>>>> +	struct pid *hlp_pid;
>>>> +	sigset_t sig_mask;
>>>> +	pid_t pid;
>>>> +
>>>> +	if (!(ca->clone_flags & CLONE_NEWPID) || !current || current->user_ns == pid_ns->user_ns) {
>>
>> If we do not create a pid_ns, or user_ns of the pid_ns is similar to current's user_ns,
>> then create the pid_ns without any tricks.
>>
>>>> +		if (set_next_pid(pid_ns, item->pid) < 0) {
>>>> +			pr_err("Can't set next pid\n");
>>>> +			return -1;
>>>> +		}
>>>> +		if (ca->clone_flags & CLONE_FILES)
>>>> +			close_pid_proc();
>>>> +		pid = clone_noasan(restore_task_with_children, ca->clone_flags | SIGCHLD, ca);
>>>> +		if (item == root_item) {
>>>> +			item->pid->real = pid;
>>>> +			pr_debug("PID: real %d virt %d\n", pid, vpid(item));
>>>> +		}
>>>> +		return pid > 0 ? 0 : -1;
>>>> +	}
>>
>> Otherwise, create a helper, lower user_ns from current->user_ns to pid_ns->user_ns
>> and create the pid_ns.
>>
>> Alloc memory for helper pid:
>>>> +
>>>> +	sz = sizeof(struct pid) + (item->pid->level - 2) * sizeof(((struct pid *)NULL)->ns[0]);
>>>> +	hlp_pid = xmalloc(sz);
>>>> +	if (!hlp_pid)
>>>> +		return -1;
>>>> +	memcpy(hlp_pid, item->pid, sz);
>>>> +	hlp_pid->level--;
>>>> +
>>
>> Choose helper pid (see comment, how it's choosed):
>>
>>>> +	for (i = 0; i < hlp_pid->level; i++) {
>>>> +		/*
>>>> +		 * Choose helper's pid[] as child's pid[] + 1.
>>>> +		 * This should guarantee the helper will not
>>>> +		 * occupy child's pids.
>>>> +		 */
>>>> +		hlp_pid->ns[i].virt++;
>>>> +		if (hlp_pid->ns[i].virt < 0)
>>>> +			hlp_pid->ns[i].virt = INIT_PID + 1;
>>>> +	}
>>
>> Set helper pid as next:
>>
>>>> +	if (set_next_pid(pid_ns->parent, hlp_pid) < 0) {
>>>> +		pr_err("Can't set next pid\n");
>>>> +		xfree(hlp_pid);
>>>> +		return -1;
>>>> +	}
>>>> +	xfree(hlp_pid);
>>>> +
>>>> +	if (ca->clone_flags & CLONE_FILES)
>>>> +		close_pid_proc();
>>>> +
>>>> +	if (block_sigmask(&sig_mask, SIGCHLD) < 0)
>>>> +		return -1;
>>>> +
>>
>> Create helper:
>>
>>>> +	pid = clone_noasan(call_clone_fn, SIGCHLD, ca);
>>>> +	if (pid < 0) {
>>>> +		pr_perror("Can't clone()");
>>>> +		ret = -1;
>>>> +		goto unblock;
>>>> +	}
>>>> +
>>
>> Wait till helper dies:
>>
>>>> +	errno = 0;
>>>> +	if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
>>>> +		pr_perror("Child process waiting: %d", status);
>>>> +		ret = -1;
>>>> +		goto unblock;
>>>> +	}
>>>> +
>>>> +unblock:
>>>> +	if (restore_sigmask(&sig_mask))
>>>> +		ret = -1;
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  static inline int fork_with_pid(struct pstree_item *item)
>>>>  {
>>>>  	struct cr_clone_arg ca;
>>>> @@ -1114,27 +1228,12 @@ static inline int fork_with_pid(struct pstree_item *item)
>>>>  		goto err_close;
>>>>  	}
>>>>  
>>>> -	if (set_next_pid(pid_ns, item->pid) < 0) {
>>>> -		pr_err("Can't set next pid\n");
>>>> -		goto err_unlock;
>>>> -	}
>>>> -
>>>> -	if (ca.clone_flags & CLONE_FILES)
>>>> -		close_pid_proc();
>>>> -
>>>> -	ret = clone_noasan(restore_task_with_children, ca.clone_flags | SIGCHLD, &ca);
>>>> +	ret = do_fork_with_pid(item, pid_ns, &ca);
>>>>  	if (ret < 0) {
>>>>  		pr_perror("Can't fork for %d", pid);
>>>>  		goto err_unlock;
>>>>  	}
>>>>  
>>>> -
>>>> -	if (item == root_item) {
>>>> -		item->pid->real = ret;
>>>> -		pr_debug("PID: real %d virt %d\n",
>>>> -				item->pid->real, vpid(item));
>>>> -	}
>>>> -
>>>>  err_unlock:
>>>>  	if (flock(ca.fd, LOCK_UN))
>>>>  		pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
>>>>


More information about the CRIU mailing list