[CRIU] [PATCH 3/3] mount: open a root directory for mount namesapces via the root task

Andrey Vagin avagin at openvz.org
Wed Jul 27 11:21:07 PDT 2016


From: Andrew Vagin <avagin at virtuozzo.com>

We use openat() syscall to open files in various mount namespaces.
Recently we found that if a path contains absolute symlinks, openat()
syscall resolves them relative to the current root, but they has to be
resolved relative to the root of the target namespace, so we need to
change root before restoring a file descriprot.

But for that we need to rework a method how we open a root directory
for a specified mount namespace. Currently we open /proc/pid/root for
one of processes from this namespace. If a process will change a root
directory, this way will not work.

In this patch we open all namespaces in the root task and then any
process will be able to open one of these descriptors via /proc/pid/fd.

Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
 criu/cr-restore.c | 32 ++++++++++++++------------------
 criu/mount.c      | 48 ++++++++++++++++++++++++++++++++----------------
 criu/pstree.c     |  1 +
 3 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 3437b76..1de2682 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1469,16 +1469,6 @@ static int restore_task_with_children(void *_arg)
 
 	restore_pgid();
 
-	if (current->parent == NULL) {
-		/*
-		 * Wait when all tasks passed the CR_STATE_FORKING stage.
-		 * It means that all tasks entered into their namespaces.
-		 */
-		futex_wait_while_gt(&task_entries->nr_in_progress, 1);
-
-		fini_restore_mntns();
-	}
-
 	if (restore_finish_stage(CR_STATE_FORKING) < 0)
 		goto err;
 
@@ -2907,6 +2897,20 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		goto err_nv;
 
 	/*
+	 * Make root and cwd restore _that_ late not to break any
+	 * attempts to open files by paths above (e.g. /proc).
+	 */
+	if (restore_fs(current))
+		goto err;
+
+	if (current->parent == NULL) {
+		/* Wait when all tasks restored all files */
+		futex_wait_while_gt(&task_entries->nr_in_progress,
+						current->nr_threads);
+		fini_restore_mntns();
+	}
+
+	/*
 	 * We're about to search for free VM area and inject the restorer blob
 	 * into it. No irrelevent mmaps/mremaps beyond this point, otherwise
 	 * this unwanted mapping might get overlapped by the restorer.
@@ -3151,14 +3155,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	task_args->nr_threads		= current->nr_threads;
 	task_args->thread_args		= thread_args;
 
-	/*
-	 * Make root and cwd restore _that_ late not to break any
-	 * attempts to open files by paths above (e.g. /proc).
-	 */
-
-	if (restore_fs(current))
-		goto err;
-
 	close_image_dir();
 	close_proc();
 	close_service_fd(ROOT_FD_OFF);
diff --git a/criu/mount.c b/criu/mount.c
index 7c280c0..5a167b1 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -3160,9 +3160,7 @@ void fini_restore_mntns(void)
 		if (nsid->nd != &mnt_ns_desc)
 			continue;
 		close_safe(&nsid->mnt.ns_fd);
-		if (nsid->type != NS_ROOT)
-			close_safe(&nsid->mnt.root_fd);
-		nsid->ns_populated = true;
+		close_safe(&nsid->mnt.root_fd);
 	}
 }
 
@@ -3350,6 +3348,34 @@ void cleanup_mnt_ns(void)
 		pr_perror("Can't remove the directory %s", mnt_roots);
 }
 
+static int open_mnt_ns(struct ns_id *nsid, struct rst_info *rst)
+{
+	int fd, tfd;
+
+	/* Pin one with a file descriptor */
+	fd = open_proc(PROC_SELF, "ns/mnt");
+	if (fd < 0)
+		return -1;
+	tfd = reopen_as_unused_fd(fd, rst);
+	if (tfd < 0) {
+		close(fd);
+		return -1;
+	}
+	nsid->mnt.ns_fd = tfd;
+
+	fd = open_proc(PROC_SELF, "root");
+	if (fd < 0)
+		return -1;
+	tfd = reopen_as_unused_fd(fd, rst);
+	if (tfd < 0) {
+		close(fd);
+		return -1;
+	}
+	nsid->mnt.root_fd = tfd;
+
+	return 0;
+}
+
 int prepare_mnt_ns(void)
 {
 	int ret = -1, rst = -1;
@@ -3483,12 +3509,8 @@ ns_created:
 		if (nsid->nd != &mnt_ns_desc)
 			continue;
 		if (nsid->type == NS_ROOT) {
-			/* Pin one with a file descriptor */
-			nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
-			if (nsid->mnt.ns_fd < 0)
+			if (open_mnt_ns(nsid, rsti(root_item)))
 				goto err;
-			/* we set ns_populated so we don't need to open root_fd */
-			nsid->ns_populated = true;
 			continue;
 		}
 
@@ -3504,14 +3526,7 @@ ns_created:
 		if (cr_pivot_root(path))
 			goto err;
 
-		/* Pin one with a file descriptor */
-		nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
-		if (nsid->mnt.ns_fd < 0)
-			goto err;
-
-		/* root_fd is used to restore file mappings */
-		nsid->mnt.root_fd = open_proc(PROC_SELF, "root");
-		if (nsid->mnt.root_fd < 0)
+		if (open_mnt_ns(nsid, rsti(root_item)))
 			goto err;
 
 		/* And return back to regain the access to the roots yard */
@@ -3628,6 +3643,7 @@ int mntns_get_root_fd(struct ns_id *mntns)
 		fd = open_proc(root_item->pid.virt, "fd/%d", mntns->mnt.root_fd);
 		if (fd < 0)
 			return -1;
+		close_pid_proc();
 
 		return mntns_set_root_fd(mntns->ns_pid, fd);
 	}
diff --git a/criu/pstree.c b/criu/pstree.c
index af89dbb..94b8021 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -943,6 +943,7 @@ static int prepare_pstree_for_unshare(void)
 		fake_root->threads->virt = INIT_PID;
 		fake_root->ids = root_ids;
 		rsti(fake_root)->clone_flags = opts.unshare_flags | rsti(root_item)->clone_flags;
+		INIT_LIST_HEAD(&rsti(fake_root)->used);
 
 		rsti(fake_root)->helper_cb = do_fake_init;
 
-- 
2.7.4



More information about the CRIU mailing list