[CRIU] [PATCH 10/13] crtools: restore nested mount namespaces
Pavel Emelyanov
xemul at parallels.com
Wed Apr 9 05:31:43 PDT 2014
On 04/09/2014 04:13 AM, Andrey Vagin wrote:
> Known issue:
> * currently only namespaces with the same root is supported
> * nested namespaces can be dumped and restored only if the root task
> has own mount namespace.
>
> All nested namespaces are restored in a root namespace in temporary
> directories. All mount points restored in one tree and then they are
> divided into namesaces.
> The task with minimal pid for each namespaces unshared mntns and
> then it makes pivot_root in a proper temporary directory. All other
> tasks makes setns to enter into a mount namespace of the task with
> minimal pid.
>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-restore.c | 12 +++++
> include/mount.h | 2 +
> include/namespaces.h | 1 +
> mount.c | 139 ++++++++++++++++++++++++++++++++++++++++-----------
> namespaces.c | 1 +
> 5 files changed, 126 insertions(+), 29 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 2611bb5..e18ac55 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1281,6 +1281,18 @@ static int restore_task_with_children(void *_arg)
> if (create_children_and_session())
> goto err;
>
> + if (current->ids && current->ids->has_mnt_ns_id) {
> + struct ns_id *nsid;
> +
> + nsid = lookup_ns_by_id(current->ids->mnt_ns_id);
> + if (nsid == NULL) {
> + pr_err("Can't find mount namespace %d\n", current->ids->mnt_ns_id);
> + goto err;
> + }
> + if (restore_task_mnt_ns(nsid, current->pid.real))
> + goto err;
> + }
> +
> if (unmap_guard_pages())
> goto err;
>
> diff --git a/include/mount.h b/include/mount.h
> index c71f6e0..2b61811 100644
> --- a/include/mount.h
> +++ b/include/mount.h
> @@ -25,6 +25,8 @@ extern struct ns_desc mnt_ns_desc;
> extern dev_t phys_stat_resolve_dev(dev_t st_dev, const char *path);
> extern bool phys_stat_dev_match(dev_t st_dev, dev_t phys_dev, const char *path);
>
> +struct ns_id;
> +extern int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid);
> extern int fini_mnt_ns(void);
>
> #endif /* __CR_MOUNT_H__ */
> diff --git a/include/namespaces.h b/include/namespaces.h
> index d574438..a908332 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -15,6 +15,7 @@ struct ns_id {
> pid_t pid;
> struct ns_desc *nd;
> struct ns_id *next;
> + futex_t created; /* boolean */
> };
> extern struct ns_id *ns_ids;
>
> diff --git a/mount.c b/mount.c
> index d8b3ded..a90a104 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -1392,17 +1392,14 @@ static char *get_mnt_roots(bool create)
>
> }
>
> -static struct mount_info *read_mnt_ns_img(int ns_pid)
> +static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid)
> {
> MntEntry *me = NULL;
> int img, ret;
> - struct mount_info *pms = NULL;
> -
> - pr_info("Populating mount namespace\n");
>
> - img = open_image(CR_FD_MNTS, O_RSTR, ns_pid);
> + img = open_image(CR_FD_MNTS, O_RSTR, nsid->id);
> if (img < 0)
> - return NULL;
> + return -1;
>
> pr_debug("Reading mountpoint images\n");
>
> @@ -1418,8 +1415,8 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
> if (!pm)
> goto err;
>
> - pm->next = pms;
> - pms = pm;
> + pm->next = *pms;
> + *pms = pm;
>
> pm->mnt_id = me->mnt_id;
> pm->parent_mnt_id = me->parent_mnt_id;
> @@ -1438,20 +1435,43 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
> if (!pm->root)
> goto err;
>
> - pr_debug("\t\tGetting mpt for %d:%s\n", pm->mnt_id, me->mountpoint);
> - len = strlen(me->mountpoint) + 2;
> - pm->mountpoint = xmalloc(len);
> - if (!pm->mountpoint)
> - goto err;
> - /*
> - * For bind-mounts we would also fix the root here
> - * too, but bind-mounts restore merges mountpoint
> - * and root paths together, so there's no need in
> - * that.
> - */
> + if (nsid->id == root_item->ids->mnt_ns_id) {
> + len = strlen(me->mountpoint) + 2;
> + pm->mountpoint = xmalloc(len);
> + if (!pm->mountpoint)
> + goto err;
> + /*
> + * For bind-mounts we would also fix the root here
> + * too, but bind-mounts restore merges mountpoint
> + * and root paths together, so there's no need in
> + * that.
> + */
> +
> + pm->mountpoint[0] = '.';
> + strcpy(pm->mountpoint + 1, me->mountpoint);
> + } else {
> + char *mnt_roots = get_mnt_roots(false);
> +
> + BUG_ON(mnt_roots == NULL);
> +
> + /* All non-root mount namespaces are restored in
> + * a separate temporary directory, then a process with
> + * minimal pid will creates a new mount namespace and
> + * changes the root filesystem (pivot_root).
> + */
> +
> + len = snprintf(NULL, 0, "%s/%d%s",
> + mnt_roots, nsid->id, me->mountpoint);
> +
> + pm->mountpoint = xmalloc(len + 1);
> + if (pm->mountpoint == NULL)
> + goto err;
> +
> + snprintf(pm->mountpoint, len + 1,
> + "%s/%d%s", mnt_roots, nsid->id, me->mountpoint);
Can we make prepending "." and "%s/%d" be done by the same code?
> + }
>
> - pm->mountpoint[0] = '.';
> - strcpy(pm->mountpoint + 1, me->mountpoint);
> + pr_debug("\t\tGetting mpt for %d %s\n", pm->mnt_id, pm->mountpoint);
>
> pr_debug("\t\tGetting source for %d\n", pm->mnt_id);
> pm->source = xstrdup(me->source);
> @@ -1470,18 +1490,79 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
> mnt_entry__free_unpacked(me, NULL);
>
> close(img);
> - return pms;
>
> + return 0;
> err:
> - while (pms) {
> - struct mount_info *pm = pms;
> - pms = pm->next;
> - mnt_entry_free(pm);
> - }
> close_safe(&img);
> + return -1;
> +}
> +
> +static struct mount_info *read_mnt_ns_img()
Collect several mount namespaces should be in separate patch.
> +{
> + struct mount_info *pms = NULL;
> + struct ns_id *nsid;
> + char *mnt_roots;
> +
> + nsid = ns_ids;
> + while (nsid) {
> + if (nsid->nd != &mnt_ns_desc) {
> + nsid = nsid->next;
> + continue;
> + }
> +
> + if (nsid->id != root_item->ids->mnt_ns_id) {
> + mnt_roots = get_mnt_roots(true);
> + if (mnt_roots == NULL)
> + return NULL;
> + }
> +
> + if (collect_mnt_from_image(&pms, nsid))
> + goto err;
> +
> + nsid = nsid->next;
> + }
> + return pms;
> +err:
> return NULL;
> }
>
> +int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid)
> +{
> + char path[PATH_MAX];
> +
> + if (root_item->ids->mnt_ns_id == nsid->id)
> + return 0;
> +
> + if (nsid->pid != getpid()) {
> + int fd;
> +
> + futex_wait_while_eq(&nsid->created, 0);
> + fd = open_proc(nsid->pid, "ns/mnt");
> + if (fd < 0)
> + return -1;
> +
> + if (setns(fd, CLONE_NEWNS)) {
> + pr_perror("Unable to change mount namespace");
> + return -1;
> + }
close(fd);
> + return 0;
> + }
> +
> + if (unshare(CLONE_NEWNS)) {
> + pr_perror("Unable to unshare mount namespace");
> + return -1;
> + }
> +
> + snprintf(path, sizeof(path), "%s/%d/", get_mnt_roots(false), nsid->id);
> +
> + if (cr_pivot_root(path))
> + return -1;
> +
> + futex_set_and_wake(&nsid->created, 1);
> +
> + return 0;
> +}
> +
> /*
> * All nested mount namespaces are restore as sub-trees of the root namespace.
> */
> @@ -1521,7 +1602,7 @@ static int prepare_temporary_roots()
> return 0;
> }
>
> -static int populate_mnt_ns(int ns_pid, struct mount_info *mis)
> +static int populate_mnt_ns(struct mount_info *mis)
Trash
> {
> struct mount_info *pms;
>
> @@ -1626,7 +1707,7 @@ int prepare_mnt_ns(int ns_pid)
>
> free_mounts();
>
> - ret = populate_mnt_ns(ns_pid, mis);
> + ret = populate_mnt_ns(mis);
Trash
> if (ret)
> goto out;
>
> diff --git a/namespaces.c b/namespaces.c
> index 120a5a1..2797de2 100644
> --- a/namespaces.c
> +++ b/namespaces.c
> @@ -136,6 +136,7 @@ int rst_add_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd)
> nsid->nd = nd;
> nsid->id = id;
> nsid->pid = pid;
> + futex_set(&nsid->created, 0);
>
> nsid->next = ns_ids;
> ns_ids = nsid;
>
More information about the CRIU
mailing list