[CRIU] [PATCH 2/3] restore: restore mntns before creating private vma-s (v2)

Pavel Emelyanov xemul at parallels.com
Fri Nov 13 08:57:55 PST 2015


On 11/13/2015 05:56 PM, Andrey Vagin wrote:
> From: Andrew Vagin <avagin at virtuozzo.com>
> 
> We need to open a file to restore a file mapping and this file
> can be from a current mntns.
> 
> v2: All namespaces are resotred from the root task and then
> other tasks calls setns() to set a proper mntns.
> 
> Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> ---
>  cr-restore.c         | 19 ++++++--------
>  include/namespaces.h |  1 +
>  mount.c              | 73 ++++++++++++++++++++++++++++++++--------------------
>  3 files changed, 54 insertions(+), 39 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 0350a23..c132588 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1520,9 +1520,6 @@ static int restore_task_with_children(void *_arg)
>  		if (mount_proc())
>  			goto err_fini_mnt;
>  
> -		if (close_old_fds(current))
> -			goto err_fini_mnt;
> -
>  		if (root_prepare_shared())
>  			goto err_fini_mnt;
>  
> @@ -1530,14 +1527,11 @@ static int restore_task_with_children(void *_arg)
>  			goto err_fini_mnt;
>  	}
>  
> -	if (prepare_mappings())
> +	if (restore_task_mnt_ns(current))
>  		goto err_fini_mnt;
>  
> -	if (!(ca->clone_flags & CLONE_FILES)) {
> -		ret = close_old_fds(current);
> -		if (ret)
> -			goto err_fini_mnt;
> -	}
> +	if (prepare_mappings())
> +		goto err_fini_mnt;
>  
>  	/*
>  	 * Call this _before_ forking to optimize cgroups
> @@ -1559,8 +1553,11 @@ static int restore_task_with_children(void *_arg)
>  	if (create_children_and_session())
>  		goto err_fini_mnt;
>  
> -	if (restore_task_mnt_ns(current))
> -		goto err_fini_mnt;
> +	if (!(ca->clone_flags & CLONE_FILES)) {
> +		ret = close_old_fds(current);
> +		if (ret)
> +			goto err_fini_mnt;
> +	}
>  
>  	if (unmap_guard_pages())
>  		goto err_fini_mnt;
> diff --git a/include/namespaces.h b/include/namespaces.h
> index f511290..9b7dca3 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -37,6 +37,7 @@ struct ns_id {
>  		struct {
>  			struct mount_info *mntinfo_list;
>  			struct mount_info *mntinfo_tree;
> +			int ns_fd;
>  		} mnt;
>  
>  		struct {
> diff --git a/mount.c b/mount.c
> index 5a5b90a..9666e5a 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -2590,36 +2590,13 @@ int mntns_maybe_create_roots(void)
>  
>  static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *current)
>  {
> -	char path[PATH_MAX];
> -
> -	if (nsid->ns_pid != current->pid.virt) {
> -		int fd;
> -
> -		futex_wait_while_eq(&nsid->ns_created, 0);
> -		fd = open_proc(nsid->ns_pid, "ns/mnt");
> -		if (fd < 0)
> -			return -1;
> -
> -		if (setns(fd, CLONE_NEWNS)) {
> -			pr_perror("Unable to change mount namespace");
> -			return -1;
> -		}
> -
> -		close(fd);
> -		return 0;
> -	}
> -
> -	if (unshare(CLONE_NEWNS)) {
> -		pr_perror("Unable to unshare mount namespace");
> +	if (setns(nsid->mnt.ns_fd, CLONE_NEWNS)) {
> +		pr_perror("Can't restore mntns");
>  		return -1;
>  	}
>  
> -	path[0] = '/';
> -	print_ns_root(nsid, path + 1, sizeof(path) - 1);
> -	if (cr_pivot_root(path))
> -		return -1;
> -
> -	futex_set_and_wake(&nsid->ns_created, 1);
> +	if (nsid->ns_pid == current->pid.virt)
> +		futex_set_and_wake(&nsid->ns_created, 1);
>  
>  	return 0;
>  }
> @@ -2642,6 +2619,9 @@ int restore_task_mnt_ns(struct pstree_item *current)
>  		if (root_item->ids->mnt_ns_id == id)
>  			return 0;
>  
> +		if (current->ids->mnt_ns_id == current->parent->ids->mnt_ns_id)
> +			return 0;

This check and the above one can be both merged into

if (!current->parent || id == current->parent->ids->mnt_ns_id)
	return 0;

doesn't it?

> +
>  		nsid = lookup_ns_by_id(id, &mnt_ns_desc);
>  		if (nsid == NULL) {
>  			pr_err("Can't find mount namespace %d\n", id);
> @@ -2762,9 +2742,10 @@ void cleanup_mnt_ns(void)
>  
>  int prepare_mnt_ns(void)
>  {
> -	int ret = -1;
> +	int ret = -1, rst = -1;
>  	struct mount_info *old;
>  	struct ns_id ns = { .type = NS_CRIU, .ns_pid = PROC_SELF, .nd = &mnt_ns_desc };
> +	struct ns_id *nsid;
>  
>  	if (!(root_ns_mask & CLONE_NEWNS))
>  		return rst_collect_local_mntns();
> @@ -2842,7 +2823,43 @@ int prepare_mnt_ns(void)
>  	if (!ret && opts.root)
>  		ret = cr_pivot_root(NULL);
>  
> +	rst = open_proc(PROC_SELF, "ns/mnt");
> +	if (rst < 0)
> +		return -1;
> +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> +		char path[PATH_MAX];
> +
> +		if (nsid->nd != &mnt_ns_desc)
> +			continue;
> +		if (root_item->ids->mnt_ns_id == nsid->id)
> +			continue;
> +
> +		if (unshare(CLONE_NEWNS)) {

What for? Please, add a comment describing what's going on here.

> +			pr_perror("Unable to create a new mntns");
> +			goto err;
> +		}
> +
> +		path[0] = '/';
> +		print_ns_root(nsid, path + 1, sizeof(path) - 1);
> +		if (cr_pivot_root(path))
> +			goto err;
> +
> +		nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
> +		if (nsid->mnt.ns_fd < 0)
> +			goto err;
> +
> +		if (setns(rst, CLONE_NEWNS)) {
> +			pr_perror("Can't restore mntns back");
> +			goto err;
> +		}

Why do we always need to get back to original mntns? Add comment here too.

> +	}
> +	close(rst);
> +
>  	return ret;
> +err:
> +	if (rst)
> +		restore_ns(rst, &mnt_ns_desc);
> +	return -1;
>  }
>  
>  int __mntns_get_root_fd(pid_t pid)
> 



More information about the CRIU mailing list