[CRIU] [PATCH v2] ns: Do not reuse PROC_SELF after CLONE_VM child

Andrei Vagin avagin at virtuozzo.com
Thu Mar 23 15:50:58 PDT 2017


Applied

On Thu, Mar 23, 2017 at 01:25:23PM +0300, Kirill Tkhai wrote:
> Child opens PROC_SELF, populates open_proc_self_pid and exits. If parent creates
> one more child with the same pid later, the new child will try to reuse PROC_SELF,
> set by exited child. So, we need to close PROC_SELF after the first child has finished.
> 
> We have this issue in two places, which have the same code. Let's move the code
> into new function call_in_child_process() and fix the issue there using close_pid_proc().
> 
> https://travis-ci.org/tkhai/criu/builds/214182862
> 
> v2: Introduce the helper call_in_child_process() and fix issue there.
> 
> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
>  criu/include/util.h |    2 ++
>  criu/namespaces.c   |   30 ++----------------------------
>  criu/net.c          |   25 ++++---------------------
>  criu/util.c         |   34 ++++++++++++++++++++++++++++++++++
>  4 files changed, 42 insertions(+), 49 deletions(-)
> 
> diff --git a/criu/include/util.h b/criu/include/util.h
> index c1d38c93..0428490b 100644
> --- a/criu/include/util.h
> +++ b/criu/include/util.h
> @@ -309,4 +309,6 @@ extern int epoll_prepare(int nr_events, struct epoll_event **evs);
>  
>  extern int open_fd_of_real_pid(pid_t pid, int fd, int flags);
>  
> +extern int call_in_child_process(int (*fn)(void *), void *arg);
> +
>  #endif /* __CR_UTIL_H__ */
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index 77308ccb..7637c34e 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -993,11 +993,8 @@ static int dump_user_ns(void *arg)
>  
>  int collect_user_ns(struct ns_id *ns, void *oarg)
>  {
> -	int status, stack_size;
>  	struct ns_id *p_ns;
> -	pid_t pid = -1;
>  	UsernsEntry *e;
> -	char *stack;
>  
>  	p_ns = ns->parent;
>  
> @@ -1021,32 +1018,9 @@ int collect_user_ns(struct ns_id *ns, void *oarg)
>  		 * we need to enter its parent ns. As entered to user_ns
>  		 * task has no a way back, we create a child for that.
>  		 * NS_ROOT is dumped w/o clone(), it's xids maps is relatively
> -		 * to NS_CRIU. We use CLONE_VM to make child share our memory,
> -		 * and to allow us see allocated maps, he do. Child's open_proc()
> -		 * may do changes about CRIU's internal files states in memory,
> -		 * so pass CLONE_FILES to reflect that.
> +		 * to NS_CRIU.
>  		 */
> -		stack_size = 2 * 1024 * 1024;
> -		stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -		if (stack == MAP_FAILED) {
> -			pr_perror("Can't allocate stack");
> -			return -1;
> -		}
> -		pid = clone(dump_user_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, ns);
> -		if (pid == -1) {
> -			pr_perror("Can't clone");
> -			return -1;
> -		}
> -		if (waitpid(pid, &status, 0) != pid) {
> -			pr_perror("Unable to wait the %d process", pid);
> -			return -1;
> -		}
> -		if (!WIFEXITED(status) || WEXITSTATUS(status)) {
> -			pr_err("Can't dump nested user_ns: %x\n", status);
> -			return -1;
> -		}
> -		munmap(stack, stack_size);
> -		return 0;
> +		return call_in_child_process(dump_user_ns, ns);
>  	} else {
>  		if (__dump_user_ns(ns))
>  			return -1;
> diff --git a/criu/net.c b/criu/net.c
> index 45f6fcee..0737a61f 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1760,47 +1760,30 @@ static int create_net_ns(void *arg)
>  
>  int prepare_net_namespaces()
>  {
> -	int status, stack_size, ret = -1;
>  	struct ns_id *nsid;
> -	char *stack;
> -	pid_t pid;
> +	int ret = -1;
>  
>  	if (!(root_ns_mask & CLONE_NEWNET))
>  		return 0;
>  
> -	stack_size = 2 * 1024 * 1024;
> -	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -	if (stack == MAP_FAILED) {
> -		pr_perror("Can't allocate stack");
> -		return -1;
> -	}
> -
>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>  		if (nsid->nd != &net_ns_desc)
>  			continue;
>  
>  		if (root_user_ns && nsid->user_ns != root_user_ns) {
> -			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
> -			if (pid < 0) {
> -				pr_perror("Can't clone");
> -				goto err;
> -			}
> -			if (waitpid(pid, &status, 0) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
> -				pr_perror("Child process waiting %d", status);
> +			if (call_in_child_process(create_net_ns, nsid) < 0)
>  				goto err;
> -			}
>  		} else {
>  			if (do_create_net_ns(nsid))
>  				goto err;
> -
>  		}
> -
>  	}
>  
>  	close_service_fd(NS_FD_OFF);
>  	ret = 0;
>  err:
> -	munmap(stack, stack_size);
> +	if (ret)
> +		pr_err("Can't create net_ns\n");
>  
>  	return ret;
>  }
> diff --git a/criu/util.c b/criu/util.c
> index bbe8f200..cbbafeae 100644
> --- a/criu/util.c
> +++ b/criu/util.c
> @@ -1367,3 +1367,37 @@ int open_fd_of_real_pid(pid_t pid, int fd, int flags)
>  		BUG();
>  	return ret;
>  }
> +
> +int call_in_child_process(int (*fn)(void *), void *arg)
> +{
> +	int size, status, ret = -1;
> +	char *stack;
> +	pid_t pid;
> +
> +	size = 2 * 1024 * 1024; /* 2Mb */
> +	stack = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	if (stack == MAP_FAILED) {
> +		pr_perror("Can't allocate stack");
> +		return -1;
> +	}
> +	pid = clone(fn, stack + size, CLONE_VM | CLONE_FILES | SIGCHLD, arg);
> +	if (pid == -1) {
> +		pr_perror("Can't clone");
> +		goto out_munmap;
> +	}
> +	errno = 0;
> +	if (waitpid(pid, &status, 0) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
> +		pr_err("Can't wait or bad status: errno=%d, status=%d", errno, status);
> +		goto out_close;
> +	}
> +	ret = 0;
> +out_close:
> +	/*
> +	 * Child opened PROC_SELF for pid. If we create one more child
> +	 * with the same pid later, it will try to reuse this /proc/self.
> +	 */
> +	close_pid_proc();
> +out_munmap:
> +	munmap(stack, size);
> +	return ret;
> +}
> 


More information about the CRIU mailing list