[CRIU] [PATCH 10/27] crtools: restore nested mount namespaces (v2)

Andrey Vagin avagin at openvz.org
Thu Apr 10 04:03:53 PDT 2014


Known issue:
* currently only namespaces with the same root is supported
* nested namespaces can be dumped and restored only if the root task
  has own mount namespace.

All nested namespaces are restored in a root namespace in temporary
directories. All mount points restored in one tree and then they are
divided into namesaces.
The task with minimal pid for each namespaces unshared mntns and
then it makes pivot_root in a proper temporary directory. All other
tasks makes setns to enter into a mount namespace of the task with
minimal pid.

v2: clean up

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 cr-restore.c         |  12 ++++++
 include/mount.h      |   2 +
 include/namespaces.h |   1 +
 mount.c              | 101 +++++++++++++++++++++++++++++++++++++++++----------
 namespaces.c         |   1 +
 5 files changed, 98 insertions(+), 19 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index 2953c93..7066f47 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1286,6 +1286,18 @@ static int restore_task_with_children(void *_arg)
 	if (create_children_and_session())
 		goto err;
 
+	if (current->ids && current->ids->has_mnt_ns_id) {
+		struct ns_id *nsid;
+
+		nsid = lookup_ns_by_id(current->ids->mnt_ns_id, &mnt_ns_desc);
+		if (nsid == NULL) {
+			pr_err("Can't find mount namespace %d\n", current->ids->mnt_ns_id);
+			goto err;
+		}
+		if (restore_task_mnt_ns(nsid, current->pid.real))
+			goto err;
+	}
+
 	if (unmap_guard_pages())
 		goto err;
 
diff --git a/include/mount.h b/include/mount.h
index c71f6e0..2b61811 100644
--- a/include/mount.h
+++ b/include/mount.h
@@ -25,6 +25,8 @@ extern struct ns_desc mnt_ns_desc;
 extern dev_t phys_stat_resolve_dev(dev_t st_dev, const char *path);
 extern bool phys_stat_dev_match(dev_t st_dev, dev_t phys_dev, const char *path);
 
+struct ns_id;
+extern int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid);
 extern int fini_mnt_ns(void);
 
 #endif /* __CR_MOUNT_H__ */
diff --git a/include/namespaces.h b/include/namespaces.h
index ec410e2..a1fc714 100644
--- a/include/namespaces.h
+++ b/include/namespaces.h
@@ -15,6 +15,7 @@ struct ns_id {
 	pid_t pid;
 	struct ns_desc *nd;
 	struct ns_id *next;
+	futex_t created; /* boolean */
 };
 extern struct ns_id *ns_ids;
 
diff --git a/mount.c b/mount.c
index 9214daf..092fa86 100644
--- a/mount.c
+++ b/mount.c
@@ -1414,23 +1414,21 @@ static char *get_mnt_roots(bool create)
 
 }
 
-static struct mount_info *read_mnt_ns_img(int ns_pid)
+static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid)
 {
 	MntEntry *me = NULL;
 	int img, ret;
-	struct mount_info *pms = NULL;
-
-	pr_info("Populating mount namespace\n");
 
-	img = open_image(CR_FD_MNTS, O_RSTR, ns_pid);
+	img = open_image(CR_FD_MNTS, O_RSTR, nsid->id);
 	if (img < 0)
-		return NULL;
+		return -1;
 
 	pr_debug("Reading mountpoint images\n");
 
 	while (1) {
 		struct mount_info *pm;
-		int len;
+		char root[PATH_MAX] = ".";
+		int len, root_len = 1;
 
 		ret = pb_read_one_eof(img, &me, PB_MNT);
 		if (ret <= 0)
@@ -1440,8 +1438,8 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		if (!pm)
 			goto err;
 
-		pm->next = pms;
-		pms = pm;
+		pm->next = *pms;
+		*pms = pm;
 
 		pm->mnt_id		= me->mnt_id;
 		pm->parent_mnt_id	= me->parent_mnt_id;
@@ -1460,8 +1458,10 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		if (!pm->root)
 			goto err;
 
-		pr_debug("\t\tGetting mpt for %d:%s\n", pm->mnt_id, me->mountpoint);
-		len  = strlen(me->mountpoint) + 2;
+		if (nsid->id != root_item->ids->mnt_ns_id)
+			root_len = snprintf(root, sizeof(root), "%s/%d/",
+						get_mnt_roots(false), nsid->id);
+		len  = strlen(me->mountpoint) + root_len + 1;
 		pm->mountpoint = xmalloc(len);
 		if (!pm->mountpoint)
 			goto err;
@@ -1472,8 +1472,10 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		 * that.
 		 */
 
-		pm->mountpoint[0] = '.';
-		strcpy(pm->mountpoint + 1, me->mountpoint);
+		strcpy(pm->mountpoint, root);
+		strcpy(pm->mountpoint + root_len, me->mountpoint);
+
+		pr_debug("\t\tGetting mpt for %d %s\n", pm->mnt_id, pm->mountpoint);
 
 		pr_debug("\t\tGetting source for %d\n", pm->mnt_id);
 		pm->source = xstrdup(me->source);
@@ -1492,18 +1494,79 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		mnt_entry__free_unpacked(me, NULL);
 
 	close(img);
-	return pms;
 
+	return 0;
 err:
-	while (pms) {
-		struct mount_info *pm = pms;
-		pms = pm->next;
-		mnt_entry_free(pm);
-	}
 	close_safe(&img);
+	return -1;
+}
+
+static struct mount_info *read_mnt_ns_img()
+{
+	struct mount_info *pms = NULL;
+	struct ns_id *nsid;
+	char *mnt_roots;
+
+	nsid = ns_ids;
+	while (nsid) {
+		if (nsid->nd != &mnt_ns_desc) {
+			nsid = nsid->next;
+			continue;
+		}
+
+		if (nsid->id != root_item->ids->mnt_ns_id) {
+			mnt_roots = get_mnt_roots(true);
+			if (mnt_roots == NULL)
+				return NULL;
+		}
+
+		if (collect_mnt_from_image(&pms, nsid))
+			goto err;
+
+		nsid = nsid->next;
+	}
+	return pms;
+err:
 	return NULL;
 }
 
+int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid)
+{
+	char path[PATH_MAX];
+
+	if (root_item->ids->mnt_ns_id == nsid->id)
+		return 0;
+
+	if (nsid->pid != getpid()) {
+		int fd;
+
+		futex_wait_while_eq(&nsid->created, 0);
+		fd = open_proc(nsid->pid, "ns/mnt");
+		if (fd < 0)
+			return -1;
+
+		if (setns(fd, CLONE_NEWNS)) {
+			pr_perror("Unable to change mount namespace");
+			return -1;
+		}
+		return 0;
+	}
+
+	if (unshare(CLONE_NEWNS)) {
+		pr_perror("Unable to unshare mount namespace");
+		return -1;
+	}
+
+	snprintf(path, sizeof(path), "%s/%d/", get_mnt_roots(false), nsid->id);
+
+	if (cr_pivot_root(path))
+		return -1;
+
+	futex_set_and_wake(&nsid->created, 1);
+
+	return 0;
+}
+
 /*
  * All nested mount namespaces are restore as sub-trees of the root namespace.
  */
diff --git a/namespaces.c b/namespaces.c
index 31ca7c9..6597bcc 100644
--- a/namespaces.c
+++ b/namespaces.c
@@ -136,6 +136,7 @@ int rst_add_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd)
 	nsid->nd = nd;
 	nsid->id = id;
 	nsid->pid = pid;
+	futex_set(&nsid->created, 0);
 
 	nsid->next = ns_ids;
 	ns_ids = nsid;
-- 
1.8.5.3



More information about the CRIU mailing list