[CRIU] [PATCH 1/3] mount: don't inherit mount namespace descriptors to each process

Andrey Vagin avagin at openvz.org
Sat Nov 14 22:07:03 PST 2015


From: Andrew Vagin <avagin at virtuozzo.com>

close_olds_fds() knows nothing about more than one set of service file
descriptros, so it's better to call it before forking children as it was
bedore 9d60724eca71 ("restore: restore mntns before creating private vma-s")

The root task restores all processes and pin them with file descriptors,
then a task restores a mount namespace by opening the file descriptor of
the root task via /proc/pid/fd/X.

Reported-by: Mr Jenkins
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 cr-restore.c    | 14 +++++++++-----
 include/mount.h |  1 +
 mount.c         | 25 ++++++++++++++++++++++---
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/cr-restore.c b/cr-restore.c
index c132588..c53cb13 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1509,9 +1509,6 @@ static int restore_task_with_children(void *_arg)
 		if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
 			goto err;
 
-		if (prepare_namespace(current, ca->clone_flags))
-			goto err_fini_mnt;
-
 		/*
 		 * We need non /proc proc mount for restoring pid and mount
 		 * namespaces and do not care for the rest of the cases.
@@ -1520,6 +1517,9 @@ static int restore_task_with_children(void *_arg)
 		if (mount_proc())
 			goto err_fini_mnt;
 
+		if (prepare_namespace(current, ca->clone_flags))
+			goto err_fini_mnt;
+
 		if (root_prepare_shared())
 			goto err_fini_mnt;
 
@@ -1567,8 +1567,12 @@ static int restore_task_with_children(void *_arg)
 	if (restore_finish_stage(CR_STATE_FORKING) < 0)
 		goto err_fini_mnt;
 
-	if (current->parent == NULL && depopulate_roots_yard())
-		goto err;
+	if (current->parent == NULL) {
+		if (depopulate_roots_yard())
+			goto err;
+
+		fini_restore_mntns();
+	}
 
 	if (restore_one_task(current->pid.virt, ca->core))
 		goto err;
diff --git a/include/mount.h b/include/mount.h
index 3b4d472..1228137 100644
--- a/include/mount.h
+++ b/include/mount.h
@@ -114,6 +114,7 @@ extern bool phys_stat_dev_match(dev_t st_dev, dev_t phys_dev,
 				struct ns_id *, const char *path);
 
 extern int restore_task_mnt_ns(struct pstree_item *current);
+extern void fini_restore_mntns(void);
 extern int depopulate_roots_yard(void);
 
 extern int rst_get_mnt_root(int mnt_id, char *path, int plen);
diff --git a/mount.c b/mount.c
index 0670a24..c7e2dde 100644
--- a/mount.c
+++ b/mount.c
@@ -2590,10 +2590,18 @@ int mntns_maybe_create_roots(void)
 
 static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *current)
 {
-	if (setns(nsid->mnt.ns_fd, CLONE_NEWNS)) {
+	int fd;
+
+	fd = open_proc(root_item->pid.virt, "fd/%d", nsid->mnt.ns_fd);
+	if (fd < 0)
+		return -1;
+
+	if (setns(fd, CLONE_NEWNS)) {
 		pr_perror("Can't restore mntns");
+		close(fd);
 		return -1;
 	}
+	close(fd);
 
 	if (nsid->ns_pid == current->pid.virt)
 		futex_set_and_wake(&nsid->ns_created, 1);
@@ -2634,6 +2642,19 @@ int restore_task_mnt_ns(struct pstree_item *current)
 	return 0;
 }
 
+void fini_restore_mntns(void)
+{
+	struct ns_id *nsid;
+
+	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+		if (nsid->nd != &mnt_ns_desc)
+			continue;
+		if (root_item->ids->mnt_ns_id == nsid->id)
+			continue;
+		close(nsid->mnt.ns_fd);
+	}
+}
+
 /*
  * All nested mount namespaces are restore as sub-trees of the root namespace.
  */
@@ -2753,8 +2774,6 @@ int prepare_mnt_ns(void)
 	if (old == NULL)
 		return -1;
 
-	close_proc();
-
 	if (!opts.root) {
 		if (chdir("/")) {
 			pr_perror("chdir(\"/\") failed");
-- 
2.4.3



More information about the CRIU mailing list