[CRIU] [PATCH v2 36/36] ns: Allow nested user namespaces

Kirill Tkhai ktkhai at virtuozzo.com
Tue Feb 7 00:14:07 PST 2017


On 07.02.2017 03:51, Andrei Vagin wrote:
> On Fri, Feb 03, 2017 at 07:16:55PM +0300, Kirill Tkhai wrote:
>> Everything is prepared for nested user namespaces support.
>> The only thing, we should do more, is to enter to dumped
>> user namespace's parent before the dump.
>> We use CLONE_VM for child tasks, so they may populate
>> user_ns maps in parent memory without any tricks.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>> ---
>>  criu/include/namespaces.h |    2 +
>>  criu/namespaces.c         |   65 ++++++++++++++++++++++++++++++++++++++++++---
>>  2 files changed, 62 insertions(+), 5 deletions(-)
>>
>> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
>> index 67a96d6b3..c7fc0a05e 100644
>> --- a/criu/include/namespaces.h
>> +++ b/criu/include/namespaces.h
>> @@ -39,7 +39,7 @@
>>  #define CLONE_ALLNS	(CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
>>  
>>  /* Nested namespaces are supported only for these types */
>> -#define CLONE_SUBNS	(CLONE_NEWNS)
>> +#define CLONE_SUBNS	(CLONE_NEWNS | CLONE_NEWUSER)
>>  #define EXTRA_SIZE	20
>>  
>>  #ifndef NSIO
>> diff --git a/criu/namespaces.c b/criu/namespaces.c
>> index 0bbd57e96..5cf46ed8f 100644
>> --- a/criu/namespaces.c
>> +++ b/criu/namespaces.c
>> @@ -31,6 +31,7 @@
>>  #include "util.h"
>>  #include "images/ns.pb-c.h"
>>  #include "common/scm.h"
>> +#include "proc_parse.h"
>>  
>>  static struct ns_desc *ns_desc_array[] = {
>>  	&net_ns_desc,
>> @@ -970,11 +971,29 @@ static int parse_id_map(pid_t pid, char *name, UidGidExtent ***pb_exts)
>>  	return -1;
>>  }
>>  
>> -static int dump_user_ns(struct ns_id *ns);
>> +static int __dump_user_ns(struct ns_id *ns);
>> +
>> +static int dump_user_ns(void *arg)
>> +{
>> +	struct ns_id *ns = arg;
>> +
>> +	if (switch_ns(ns->parent->ns_pid, &user_ns_desc, NULL) < 0) {
>> +		pr_err("Can't enter user namespace\n");
>> +		return -1;
>> +	}
>> +
>> +	return __dump_user_ns(ns);
>> +}
>>  
>>  int collect_user_ns(struct ns_id *ns, void *oarg)
>>  {
>> +	int status, stack_size;
>> +	struct ns_id *p_ns;
>> +	pid_t pid = -1;
>>  	UsernsEntry *e;
>> +	char *stack;
>> +
>> +	p_ns = ns->parent;
>>  
>>  	e = xmalloc(sizeof(*e));
>>  	if (!e)
>> @@ -990,8 +1009,43 @@ int collect_user_ns(struct ns_id *ns, void *oarg)
>>  	 * mappings, which are used for convirting local id-s to
>>  	 * userns id-s (userns_uid(), userns_gid())
>>  	 */
>> -	if (dump_user_ns(ns))
>> -		return -1;
>> +	if (p_ns) {
>> +		/*
>> +		 * Currently, we are in NS_CRIU. To dump a NS_OTHER ns,
>> +		 * we need to enter its parent ns. As entered to user_ns
>> +		 * task has no a way back, we create a child for that.
>> +		 * NS_ROOT is dumped w/o clone(), it's xids maps is relatively
>> +		 * to NS_CRIU. We use CLONE_VM to make child share our memory,
>> +		 * and to allow us see allocated maps, he do. Child's open_proc()
>> +		 * may do changes about CRIU's internal files states in memory,
>> +		 * so pass CLONE_FILES to reflect that.
>> +		 */
>> +		stack_size = PAGE_SIZE;
>> +		stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN, -1, 0);
> 
> Maybe it is easier to allocate a big enough stack without MAP_GROWSDOWN.
> In this case we will not need to read /proc/self/maps to get its size,
> we will not need to care about guard pages, etc.

I don't think so. We call several nested functions there, and they are used
in generic code. It wouldn't be useful to have to check over all function
call tree to determine, if some places like this should enlarge their
stack allocation or not, when we modify these function. Big enough stack
is uncountable concept.
 
>> +		if (stack == MAP_FAILED) {
>> +			pr_perror("Can't allocate stack");
>> +			return -1;
>> +		}
>> +		pid = clone(dump_user_ns, stack + stack_size - 1, CLONE_VM | CLONE_FILES | SIGCHLD, ns);
>> +		if (pid == -1) {
>> +			pr_perror("Can't clone");
>> +			return -1;
>> +		}
>> +		if (waitpid(pid, &status, 0) != pid) {
>> +			pr_perror("Unable to wait the %d process", pid);
>> +			return -1;
>> +		}
>> +		if (status) {
>> +			pr_err("Can't dump nested user_ns\n");
>> +			return -1;
>> +		}
>> +		stack_size = find_vma_size((unsigned long *)&stack);
>> +		munmap(stack, stack_size);
>> +		return 0;
>> +	} else {
>> +		if (__dump_user_ns(ns))
>> +			return -1;
>> +	}
>>  
>>  	return 0;
>>  }
>> @@ -1030,6 +1084,9 @@ static int check_user_ns(struct ns_id *ns)
>>  	int status;
>>  	pid_t chld;
>>  
>> +	if (ns->type != NS_ROOT)
>> +		return 0;
>> +
>>  	chld = fork();
>>  	if (chld == -1) {
>>  		pr_perror("Unable to fork a process");
>> @@ -1125,7 +1182,7 @@ static int check_user_ns(struct ns_id *ns)
>>  	return 0;
>>  }
>>  
>> -static int dump_user_ns(struct ns_id *ns)
>> +static int __dump_user_ns(struct ns_id *ns)
>>  {
>>  	int ret, exit_code = -1;
>>  	pid_t pid = ns->ns_pid;
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU at openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list