[CRIU] [PATCH 10/13] crtools: restore nested mount namespaces

Pavel Emelyanov xemul at parallels.com
Wed Apr 9 05:31:43 PDT 2014


On 04/09/2014 04:13 AM, Andrey Vagin wrote:
> Known issue:
> * currently only namespaces with the same root is supported
> * nested namespaces can be dumped and restored only if the root task
>   has own mount namespace.
> 
> All nested namespaces are restored in a root namespace in temporary
> directories. All mount points restored in one tree and then they are
> divided into namesaces.
> The task with minimal pid for each namespaces unshared mntns and
> then it makes pivot_root in a proper temporary directory. All other
> tasks makes setns to enter into a mount namespace of the task with
> minimal pid.
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c         |  12 +++++
>  include/mount.h      |   2 +
>  include/namespaces.h |   1 +
>  mount.c              | 139 ++++++++++++++++++++++++++++++++++++++++-----------
>  namespaces.c         |   1 +
>  5 files changed, 126 insertions(+), 29 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 2611bb5..e18ac55 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1281,6 +1281,18 @@ static int restore_task_with_children(void *_arg)
>  	if (create_children_and_session())
>  		goto err;
>  
> +	if (current->ids && current->ids->has_mnt_ns_id) {
> +		struct ns_id *nsid;
> +
> +		nsid = lookup_ns_by_id(current->ids->mnt_ns_id);
> +		if (nsid == NULL) {
> +			pr_err("Can't find mount namespace %d\n", current->ids->mnt_ns_id);
> +			goto err;
> +		}
> +		if (restore_task_mnt_ns(nsid, current->pid.real))
> +			goto err;
> +	}
> +
>  	if (unmap_guard_pages())
>  		goto err;
>  
> diff --git a/include/mount.h b/include/mount.h
> index c71f6e0..2b61811 100644
> --- a/include/mount.h
> +++ b/include/mount.h
> @@ -25,6 +25,8 @@ extern struct ns_desc mnt_ns_desc;
>  extern dev_t phys_stat_resolve_dev(dev_t st_dev, const char *path);
>  extern bool phys_stat_dev_match(dev_t st_dev, dev_t phys_dev, const char *path);
>  
> +struct ns_id;
> +extern int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid);
>  extern int fini_mnt_ns(void);
>  
>  #endif /* __CR_MOUNT_H__ */
> diff --git a/include/namespaces.h b/include/namespaces.h
> index d574438..a908332 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -15,6 +15,7 @@ struct ns_id {
>  	pid_t pid;
>  	struct ns_desc *nd;
>  	struct ns_id *next;
> +	futex_t created; /* boolean */
>  };
>  extern struct ns_id *ns_ids;
>  
> diff --git a/mount.c b/mount.c
> index d8b3ded..a90a104 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -1392,17 +1392,14 @@ static char *get_mnt_roots(bool create)
>  
>  }
>  
> -static struct mount_info *read_mnt_ns_img(int ns_pid)
> +static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid)
>  {
>  	MntEntry *me = NULL;
>  	int img, ret;
> -	struct mount_info *pms = NULL;
> -
> -	pr_info("Populating mount namespace\n");
>  
> -	img = open_image(CR_FD_MNTS, O_RSTR, ns_pid);
> +	img = open_image(CR_FD_MNTS, O_RSTR, nsid->id);
>  	if (img < 0)
> -		return NULL;
> +		return -1;
>  
>  	pr_debug("Reading mountpoint images\n");
>  
> @@ -1418,8 +1415,8 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
>  		if (!pm)
>  			goto err;
>  
> -		pm->next = pms;
> -		pms = pm;
> +		pm->next = *pms;
> +		*pms = pm;
>  
>  		pm->mnt_id		= me->mnt_id;
>  		pm->parent_mnt_id	= me->parent_mnt_id;
> @@ -1438,20 +1435,43 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
>  		if (!pm->root)
>  			goto err;
>  
> -		pr_debug("\t\tGetting mpt for %d:%s\n", pm->mnt_id, me->mountpoint);
> -		len  = strlen(me->mountpoint) + 2;
> -		pm->mountpoint = xmalloc(len);
> -		if (!pm->mountpoint)
> -			goto err;
> -		/*
> -		 * For bind-mounts we would also fix the root here
> -		 * too, but bind-mounts restore merges mountpoint
> -		 * and root paths together, so there's no need in
> -		 * that.
> -		 */
> +		if (nsid->id == root_item->ids->mnt_ns_id) {
> +			len  = strlen(me->mountpoint) + 2;
> +			pm->mountpoint = xmalloc(len);
> +			if (!pm->mountpoint)
> +				goto err;
> +			/*
> +			 * For bind-mounts we would also fix the root here
> +			 * too, but bind-mounts restore merges mountpoint
> +			 * and root paths together, so there's no need in
> +			 * that.
> +			 */
> +
> +			pm->mountpoint[0] = '.';
> +			strcpy(pm->mountpoint + 1, me->mountpoint);
> +		} else {
> +			char *mnt_roots = get_mnt_roots(false);
> +
> +			BUG_ON(mnt_roots == NULL);
> +
> +			/* All non-root mount namespaces are restored in
> +			 * a separate temporary directory, then a process with
> +			 * minimal pid will creates a new mount namespace and
> +			 * changes the root filesystem (pivot_root).
> +			 */
> +
> +			len = snprintf(NULL, 0, "%s/%d%s",
> +					mnt_roots, nsid->id, me->mountpoint);
> +
> +			pm->mountpoint = xmalloc(len + 1);
> +			if (pm->mountpoint == NULL)
> +				goto err;
> +
> +			snprintf(pm->mountpoint, len + 1,
> +					"%s/%d%s", mnt_roots, nsid->id, me->mountpoint);

Can we make prepending "." and "%s/%d" be done by the same code?

> +		}
>  
> -		pm->mountpoint[0] = '.';
> -		strcpy(pm->mountpoint + 1, me->mountpoint);
> +		pr_debug("\t\tGetting mpt for %d %s\n", pm->mnt_id, pm->mountpoint);
>  
>  		pr_debug("\t\tGetting source for %d\n", pm->mnt_id);
>  		pm->source = xstrdup(me->source);
> @@ -1470,18 +1490,79 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
>  		mnt_entry__free_unpacked(me, NULL);
>  
>  	close(img);
> -	return pms;
>  
> +	return 0;
>  err:
> -	while (pms) {
> -		struct mount_info *pm = pms;
> -		pms = pm->next;
> -		mnt_entry_free(pm);
> -	}
>  	close_safe(&img);
> +	return -1;
> +}
> +
> +static struct mount_info *read_mnt_ns_img()

Collect several mount namespaces should be in separate patch.

> +{
> +	struct mount_info *pms = NULL;
> +	struct ns_id *nsid;
> +	char *mnt_roots;
> +
> +	nsid = ns_ids;
> +	while (nsid) {
> +		if (nsid->nd != &mnt_ns_desc) {
> +			nsid = nsid->next;
> +			continue;
> +		}
> +
> +		if (nsid->id != root_item->ids->mnt_ns_id) {
> +			mnt_roots = get_mnt_roots(true);
> +			if (mnt_roots == NULL)
> +				return NULL;
> +		}
> +
> +		if (collect_mnt_from_image(&pms, nsid))
> +			goto err;
> +
> +		nsid = nsid->next;
> +	}
> +	return pms;
> +err:
>  	return NULL;
>  }
>  
> +int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid)
> +{
> +	char path[PATH_MAX];
> +
> +	if (root_item->ids->mnt_ns_id == nsid->id)
> +		return 0;
> +
> +	if (nsid->pid != getpid()) {
> +		int fd;
> +
> +		futex_wait_while_eq(&nsid->created, 0);
> +		fd = open_proc(nsid->pid, "ns/mnt");
> +		if (fd < 0)
> +			return -1;
> +
> +		if (setns(fd, CLONE_NEWNS)) {
> +			pr_perror("Unable to change mount namespace");
> +			return -1;
> +		}

close(fd);

> +		return 0;
> +	}
> +
> +	if (unshare(CLONE_NEWNS)) {
> +		pr_perror("Unable to unshare mount namespace");
> +		return -1;
> +	}
> +
> +	snprintf(path, sizeof(path), "%s/%d/", get_mnt_roots(false), nsid->id);
> +
> +	if (cr_pivot_root(path))
> +		return -1;
> +
> +	futex_set_and_wake(&nsid->created, 1);
> +
> +	return 0;
> +}
> +
>  /*
>   * All nested mount namespaces are restore as sub-trees of the root namespace.
>   */
> @@ -1521,7 +1602,7 @@ static int prepare_temporary_roots()
>  	return 0;
>  }
>  
> -static int populate_mnt_ns(int ns_pid, struct mount_info *mis)
> +static int populate_mnt_ns(struct mount_info *mis)

Trash

>  {
>  	struct mount_info *pms;
>  
> @@ -1626,7 +1707,7 @@ int prepare_mnt_ns(int ns_pid)
>  
>  	free_mounts();
>  
> -	ret = populate_mnt_ns(ns_pid, mis);
> +	ret = populate_mnt_ns(mis);

Trash

>  	if (ret)
>  		goto out;
>  
> diff --git a/namespaces.c b/namespaces.c
> index 120a5a1..2797de2 100644
> --- a/namespaces.c
> +++ b/namespaces.c
> @@ -136,6 +136,7 @@ int rst_add_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd)
>  	nsid->nd = nd;
>  	nsid->id = id;
>  	nsid->pid = pid;
> +	futex_set(&nsid->created, 0);
>  
>  	nsid->next = ns_ids;
>  	ns_ids = nsid;
> 




More information about the CRIU mailing list