[CRIU] [PATCH 1/3] mount: don't inherit mount namespace descriptors to each process
Andrey Vagin
avagin at openvz.org
Sat Nov 14 22:07:03 PST 2015
From: Andrew Vagin <avagin at virtuozzo.com>
close_olds_fds() knows nothing about more than one set of service file
descriptros, so it's better to call it before forking children as it was
bedore 9d60724eca71 ("restore: restore mntns before creating private vma-s")
The root task restores all processes and pin them with file descriptors,
then a task restores a mount namespace by opening the file descriptor of
the root task via /proc/pid/fd/X.
Reported-by: Mr Jenkins
Signed-off-by: Andrew Vagin <avagin at virtuozzo.com>
---
cr-restore.c | 14 +++++++++-----
include/mount.h | 1 +
mount.c | 25 ++++++++++++++++++++++---
3 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/cr-restore.c b/cr-restore.c
index c132588..c53cb13 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -1509,9 +1509,6 @@ static int restore_task_with_children(void *_arg)
if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
goto err;
- if (prepare_namespace(current, ca->clone_flags))
- goto err_fini_mnt;
-
/*
* We need non /proc proc mount for restoring pid and mount
* namespaces and do not care for the rest of the cases.
@@ -1520,6 +1517,9 @@ static int restore_task_with_children(void *_arg)
if (mount_proc())
goto err_fini_mnt;
+ if (prepare_namespace(current, ca->clone_flags))
+ goto err_fini_mnt;
+
if (root_prepare_shared())
goto err_fini_mnt;
@@ -1567,8 +1567,12 @@ static int restore_task_with_children(void *_arg)
if (restore_finish_stage(CR_STATE_FORKING) < 0)
goto err_fini_mnt;
- if (current->parent == NULL && depopulate_roots_yard())
- goto err;
+ if (current->parent == NULL) {
+ if (depopulate_roots_yard())
+ goto err;
+
+ fini_restore_mntns();
+ }
if (restore_one_task(current->pid.virt, ca->core))
goto err;
diff --git a/include/mount.h b/include/mount.h
index 3b4d472..1228137 100644
--- a/include/mount.h
+++ b/include/mount.h
@@ -114,6 +114,7 @@ extern bool phys_stat_dev_match(dev_t st_dev, dev_t phys_dev,
struct ns_id *, const char *path);
extern int restore_task_mnt_ns(struct pstree_item *current);
+extern void fini_restore_mntns(void);
extern int depopulate_roots_yard(void);
extern int rst_get_mnt_root(int mnt_id, char *path, int plen);
diff --git a/mount.c b/mount.c
index 0670a24..c7e2dde 100644
--- a/mount.c
+++ b/mount.c
@@ -2590,10 +2590,18 @@ int mntns_maybe_create_roots(void)
static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *current)
{
- if (setns(nsid->mnt.ns_fd, CLONE_NEWNS)) {
+ int fd;
+
+ fd = open_proc(root_item->pid.virt, "fd/%d", nsid->mnt.ns_fd);
+ if (fd < 0)
+ return -1;
+
+ if (setns(fd, CLONE_NEWNS)) {
pr_perror("Can't restore mntns");
+ close(fd);
return -1;
}
+ close(fd);
if (nsid->ns_pid == current->pid.virt)
futex_set_and_wake(&nsid->ns_created, 1);
@@ -2634,6 +2642,19 @@ int restore_task_mnt_ns(struct pstree_item *current)
return 0;
}
+void fini_restore_mntns(void)
+{
+ struct ns_id *nsid;
+
+ for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
+ if (nsid->nd != &mnt_ns_desc)
+ continue;
+ if (root_item->ids->mnt_ns_id == nsid->id)
+ continue;
+ close(nsid->mnt.ns_fd);
+ }
+}
+
/*
* All nested mount namespaces are restore as sub-trees of the root namespace.
*/
@@ -2753,8 +2774,6 @@ int prepare_mnt_ns(void)
if (old == NULL)
return -1;
- close_proc();
-
if (!opts.root) {
if (chdir("/")) {
pr_perror("chdir(\"/\") failed");
--
2.4.3
More information about the CRIU
mailing list