[CRIU] [PATCH 2/3] restore: restore mntns before creating private vma-s (v2)

Andrew Vagin avagin at virtuozzo.com
Fri Nov 13 09:52:01 PST 2015


On Fri, Nov 13, 2015 at 07:57:55PM +0300, Pavel Emelyanov wrote:
> On 11/13/2015 05:56 PM, Andrey Vagin wrote:
> > From: Andrew Vagin <avagin at virtuozzo.com>
> > 
> > We need to open a file to restore a file mapping and this file
> > can be from a current mntns.
> > 
> > v2: All namespaces are resotred from the root task and then
> > other tasks calls setns() to set a proper mntns.
> > 
> > Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
> > ---
> >  cr-restore.c         | 19 ++++++--------
> >  include/namespaces.h |  1 +
> >  mount.c              | 73 ++++++++++++++++++++++++++++++++--------------------
> >  3 files changed, 54 insertions(+), 39 deletions(-)
> > 
> > diff --git a/cr-restore.c b/cr-restore.c
> > index 0350a23..c132588 100644
> > --- a/cr-restore.c
> > +++ b/cr-restore.c
> > @@ -1520,9 +1520,6 @@ static int restore_task_with_children(void *_arg)
> >  		if (mount_proc())
> >  			goto err_fini_mnt;
> >  
> > -		if (close_old_fds(current))
> > -			goto err_fini_mnt;
> > -
> >  		if (root_prepare_shared())
> >  			goto err_fini_mnt;
> >  
> > @@ -1530,14 +1527,11 @@ static int restore_task_with_children(void *_arg)
> >  			goto err_fini_mnt;
> >  	}
> >  
> > -	if (prepare_mappings())
> > +	if (restore_task_mnt_ns(current))
> >  		goto err_fini_mnt;
> >  
> > -	if (!(ca->clone_flags & CLONE_FILES)) {
> > -		ret = close_old_fds(current);
> > -		if (ret)
> > -			goto err_fini_mnt;
> > -	}
> > +	if (prepare_mappings())
> > +		goto err_fini_mnt;
> >  
> >  	/*
> >  	 * Call this _before_ forking to optimize cgroups
> > @@ -1559,8 +1553,11 @@ static int restore_task_with_children(void *_arg)
> >  	if (create_children_and_session())
> >  		goto err_fini_mnt;
> >  
> > -	if (restore_task_mnt_ns(current))
> > -		goto err_fini_mnt;
> > +	if (!(ca->clone_flags & CLONE_FILES)) {
> > +		ret = close_old_fds(current);
> > +		if (ret)
> > +			goto err_fini_mnt;
> > +	}
> >  
> >  	if (unmap_guard_pages())
> >  		goto err_fini_mnt;
> > diff --git a/include/namespaces.h b/include/namespaces.h
> > index f511290..9b7dca3 100644
> > --- a/include/namespaces.h
> > +++ b/include/namespaces.h
> > @@ -37,6 +37,7 @@ struct ns_id {
> >  		struct {
> >  			struct mount_info *mntinfo_list;
> >  			struct mount_info *mntinfo_tree;
> > +			int ns_fd;
> >  		} mnt;
> >  
> >  		struct {
> > diff --git a/mount.c b/mount.c
> > index 5a5b90a..9666e5a 100644
> > --- a/mount.c
> > +++ b/mount.c
> > @@ -2590,36 +2590,13 @@ int mntns_maybe_create_roots(void)
> >  
> >  static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *current)
> >  {
> > -	char path[PATH_MAX];
> > -
> > -	if (nsid->ns_pid != current->pid.virt) {
> > -		int fd;
> > -
> > -		futex_wait_while_eq(&nsid->ns_created, 0);
> > -		fd = open_proc(nsid->ns_pid, "ns/mnt");
> > -		if (fd < 0)
> > -			return -1;
> > -
> > -		if (setns(fd, CLONE_NEWNS)) {
> > -			pr_perror("Unable to change mount namespace");
> > -			return -1;
> > -		}
> > -
> > -		close(fd);
> > -		return 0;
> > -	}
> > -
> > -	if (unshare(CLONE_NEWNS)) {
> > -		pr_perror("Unable to unshare mount namespace");
> > +	if (setns(nsid->mnt.ns_fd, CLONE_NEWNS)) {
> > +		pr_perror("Can't restore mntns");
> >  		return -1;
> >  	}
> >  
> > -	path[0] = '/';
> > -	print_ns_root(nsid, path + 1, sizeof(path) - 1);
> > -	if (cr_pivot_root(path))
> > -		return -1;
> > -
> > -	futex_set_and_wake(&nsid->ns_created, 1);
> > +	if (nsid->ns_pid == current->pid.virt)
> > +		futex_set_and_wake(&nsid->ns_created, 1);
> >  
> >  	return 0;
> >  }
> > @@ -2642,6 +2619,9 @@ int restore_task_mnt_ns(struct pstree_item *current)
> >  		if (root_item->ids->mnt_ns_id == id)
> >  			return 0;
> >  
> > +		if (current->ids->mnt_ns_id == current->parent->ids->mnt_ns_id)
> > +			return 0;
> 
> This check and the above one can be both merged into
> 
> if (!current->parent || id == current->parent->ids->mnt_ns_id)
> 	return 0;
> 
> doesn't it?

Yes. will do
> 
> > +
> >  		nsid = lookup_ns_by_id(id, &mnt_ns_desc);
> >  		if (nsid == NULL) {
> >  			pr_err("Can't find mount namespace %d\n", id);
> > @@ -2762,9 +2742,10 @@ void cleanup_mnt_ns(void)
> >  
> >  int prepare_mnt_ns(void)
> >  {
> > -	int ret = -1;
> > +	int ret = -1, rst = -1;
> >  	struct mount_info *old;
> >  	struct ns_id ns = { .type = NS_CRIU, .ns_pid = PROC_SELF, .nd = &mnt_ns_desc };
> > +	struct ns_id *nsid;
> >  
> >  	if (!(root_ns_mask & CLONE_NEWNS))
> >  		return rst_collect_local_mntns();
> > @@ -2842,7 +2823,43 @@ int prepare_mnt_ns(void)
> >  	if (!ret && opts.root)
> >  		ret = cr_pivot_root(NULL);
> >  
> > +	rst = open_proc(PROC_SELF, "ns/mnt");
> > +	if (rst < 0)
> > +		return -1;
> > +	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> > +		char path[PATH_MAX];
> > +
> > +		if (nsid->nd != &mnt_ns_desc)
> > +			continue;
> > +		if (root_item->ids->mnt_ns_id == nsid->id)
> > +			continue;
> > +
> > +		if (unshare(CLONE_NEWNS)) {
> 
> What for? Please, add a comment describing what's going on here.

How can we restore a namespace without creating it?;)

> 
> > +			pr_perror("Unable to create a new mntns");
> > +			goto err;
> > +		}
> > +
> > +		path[0] = '/';
> > +		print_ns_root(nsid, path + 1, sizeof(path) - 1);
> > +		if (cr_pivot_root(path))
> > +			goto err;
> > +
> > +		nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
> > +		if (nsid->mnt.ns_fd < 0)
> > +			goto err;
> > +
> > +		if (setns(rst, CLONE_NEWNS)) {
> > +			pr_perror("Can't restore mntns back");
> > +			goto err;
> > +		}
> 
> Why do we always need to get back to original mntns? Add comment here too.

to get access to the roots yard

> 
> > +	}
> > +	close(rst);
> > +
> >  	return ret;
> > +err:
> > +	if (rst)
> > +		restore_ns(rst, &mnt_ns_desc);
> > +	return -1;
> >  }
> >  
> >  int __mntns_get_root_fd(pid_t pid)
> > 
> 


More information about the CRIU mailing list