[CRIU] [PATCH 11/13] crtools: restore nested mount namespaces

Andrey Vagin avagin at openvz.org
Tue Mar 11 08:18:27 PDT 2014


Known issue:
* currently only namespaces with the same root is supported
* nested namespaces can be dumped and restored only if the root task
  has own mount namespace.

All nested namespaces are restored in a root namespace in temporary
directories. All mount points restored in one tree and then they are
divided into namesaces.
The task with minimal pid for each namespaces unshared mntns and
then it makes pivot_root in a proper temporary directory. All other
tasks makes setns to enter into a mount namespace of the task with
minimal pid.

Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 cr-restore.c         |  12 +++++
 include/mount.h      |   2 +
 include/namespaces.h |   1 +
 mount.c              | 139 ++++++++++++++++++++++++++++++++++++++++-----------
 namespaces.c         |   1 +
 5 files changed, 126 insertions(+), 29 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index 6430531..880f531 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1282,6 +1282,18 @@ static int restore_task_with_children(void *_arg)
 	if (create_children_and_session())
 		goto err;
 
+	if (current->ids && current->ids->has_mnt_ns_id) {
+		struct ns_id *nsid;
+
+		nsid = lookup_ns_by_id(current->ids->mnt_ns_id);
+		if (nsid == NULL) {
+			pr_err("Can't find mount namespace %d\n", current->ids->mnt_ns_id);
+			goto err;
+		}
+		if (restore_task_mnt_ns(nsid, current->pid.real))
+			goto err;
+	}
+
 	if (unmap_guard_pages())
 		goto err;
 
diff --git a/include/mount.h b/include/mount.h
index c71f6e0..2b61811 100644
--- a/include/mount.h
+++ b/include/mount.h
@@ -25,6 +25,8 @@ extern struct ns_desc mnt_ns_desc;
 extern dev_t phys_stat_resolve_dev(dev_t st_dev, const char *path);
 extern bool phys_stat_dev_match(dev_t st_dev, dev_t phys_dev, const char *path);
 
+struct ns_id;
+extern int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid);
 extern int fini_mnt_ns(void);
 
 #endif /* __CR_MOUNT_H__ */
diff --git a/include/namespaces.h b/include/namespaces.h
index 065c874..3faf223 100644
--- a/include/namespaces.h
+++ b/include/namespaces.h
@@ -15,6 +15,7 @@ struct ns_id {
 	pid_t pid;
 	struct ns_desc *nd;
 	struct ns_id *next;
+	futex_t created; /* boolean */
 };
 extern struct ns_id *ns_ids;
 
diff --git a/mount.c b/mount.c
index 29da4c7..d4db092 100644
--- a/mount.c
+++ b/mount.c
@@ -1333,17 +1333,14 @@ static char *get_mnt_roots(bool create)
 
 }
 
-static struct mount_info *read_mnt_ns_img(int ns_pid)
+static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid)
 {
 	MntEntry *me = NULL;
 	int img, ret;
-	struct mount_info *pms = NULL;
-
-	pr_info("Populating mount namespace\n");
 
-	img = open_image(CR_FD_MNTS, O_RSTR, ns_pid);
+	img = open_image(CR_FD_MNTS, O_RSTR, nsid->id);
 	if (img < 0)
-		return NULL;
+		return -1;
 
 	pr_debug("Reading mountpoint images\n");
 
@@ -1359,8 +1356,8 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		if (!pm)
 			goto err;
 
-		pm->next = pms;
-		pms = pm;
+		pm->next = *pms;
+		*pms = pm;
 
 		pm->mnt_id		= me->mnt_id;
 		pm->parent_mnt_id	= me->parent_mnt_id;
@@ -1379,20 +1376,43 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		if (!pm->root)
 			goto err;
 
-		pr_debug("\t\tGetting mpt for %d:%s\n", pm->mnt_id, me->mountpoint);
-		len  = strlen(me->mountpoint) + 2;
-		pm->mountpoint = xmalloc(len);
-		if (!pm->mountpoint)
-			goto err;
-		/*
-		 * For bind-mounts we would also fix the root here
-		 * too, but bind-mounts restore merges mountpoint
-		 * and root paths together, so there's no need in
-		 * that.
-		 */
+		if (nsid->id == root_item->ids->mnt_ns_id) {
+			len  = strlen(me->mountpoint) + 2;
+			pm->mountpoint = xmalloc(len);
+			if (!pm->mountpoint)
+				goto err;
+			/*
+			 * For bind-mounts we would also fix the root here
+			 * too, but bind-mounts restore merges mountpoint
+			 * and root paths together, so there's no need in
+			 * that.
+			 */
+
+			pm->mountpoint[0] = '.';
+			strcpy(pm->mountpoint + 1, me->mountpoint);
+		} else {
+			char *mnt_roots = get_mnt_roots(false);
+
+			BUG_ON(mnt_roots == NULL);
+
+			/* All non-root mount namespaces are restored in
+			 * a separate temporary directory, then a process with
+			 * minimal pid will creates a new mount namespace and
+			 * changes the root filesystem (pivot_root).
+			 */
+
+			len = snprintf(NULL, 0, "%s/%d%s",
+					mnt_roots, nsid->id, me->mountpoint);
+
+			pm->mountpoint = xmalloc(len + 1);
+			if (pm->mountpoint == NULL)
+				goto err;
+
+			snprintf(pm->mountpoint, len + 1,
+					"%s/%d%s", mnt_roots, nsid->id, me->mountpoint);
+		}
 
-		pm->mountpoint[0] = '.';
-		strcpy(pm->mountpoint + 1, me->mountpoint);
+		pr_debug("\t\tGetting mpt for %d %s\n", pm->mnt_id, pm->mountpoint);
 
 		pr_debug("\t\tGetting source for %d\n", pm->mnt_id);
 		pm->source = xstrdup(me->source);
@@ -1411,18 +1431,79 @@ static struct mount_info *read_mnt_ns_img(int ns_pid)
 		mnt_entry__free_unpacked(me, NULL);
 
 	close(img);
-	return pms;
 
+	return 0;
 err:
-	while (pms) {
-		struct mount_info *pm = pms;
-		pms = pm->next;
-		mnt_entry_free(pm);
-	}
 	close_safe(&img);
+	return -1;
+}
+
+static struct mount_info *read_mnt_ns_img()
+{
+	struct mount_info *pms = NULL;
+	struct ns_id *nsid;
+	char *mnt_roots;
+
+	nsid = ns_ids;
+	while (nsid) {
+		if (nsid->nd != &mnt_ns_desc) {
+			nsid = nsid->next;
+			continue;
+		}
+
+		if (nsid->id != root_item->ids->mnt_ns_id) {
+			mnt_roots = get_mnt_roots(true);
+			if (mnt_roots == NULL)
+				return NULL;
+		}
+
+		if (collect_mnt_from_image(&pms, nsid))
+			goto err;
+
+		nsid = nsid->next;
+	}
+	return pms;
+err:
 	return NULL;
 }
 
+int restore_task_mnt_ns(struct ns_id *nsid, pid_t pid)
+{
+	char path[PATH_MAX];
+
+	if (root_item->ids->mnt_ns_id == nsid->id)
+		return 0;
+
+	if (nsid->pid != getpid()) {
+		int fd;
+
+		futex_wait_while_eq(&nsid->created, 0);
+		fd = open_proc(nsid->pid, "ns/mnt");
+		if (fd < 0)
+			return -1;
+
+		if (setns(fd, CLONE_NEWNS)) {
+			pr_perror("Unable to change mount namespace");
+			return -1;
+		}
+		return 0;
+	}
+
+	if (unshare(CLONE_NEWNS)) {
+		pr_perror("Unable to unshare mount namespace");
+		return -1;
+	}
+
+	snprintf(path, sizeof(path), "%s/%d/", get_mnt_roots(false), nsid->id);
+
+	if (cr_pivot_root(path))
+		return -1;
+
+	futex_set_and_wake(&nsid->created, 1);
+
+	return 0;
+}
+
 /*
  * All nested mount namespaces are restore as sub-trees of the root namespace.
  */
@@ -1462,7 +1543,7 @@ static int prepare_temporary_roots()
 	return 0;
 }
 
-static int populate_mnt_ns(int ns_pid, struct mount_info *mis)
+static int populate_mnt_ns(struct mount_info *mis)
 {
 	struct mount_info *pms;
 
@@ -1544,7 +1625,7 @@ int prepare_mnt_ns(int ns_pid)
 
 	free_mounts();
 
-	ret = populate_mnt_ns(ns_pid, mis);
+	ret = populate_mnt_ns(mis);
 	if (ret)
 		goto out;
 
diff --git a/namespaces.c b/namespaces.c
index 145730b..f45e088 100644
--- a/namespaces.c
+++ b/namespaces.c
@@ -136,6 +136,7 @@ int add_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd)
 	nsid->nd = nd;
 	nsid->id = id;
 	nsid->pid = pid;
+	futex_set(&nsid->created, 0);
 
 	nsid->next = ns_ids;
 	ns_ids = nsid;
-- 
1.8.5.3



More information about the CRIU mailing list