[CRIU] [PATCH v5 4/5] mount: Forced mount unmounted binfmt_misc to do not lost its content

Kirill Tkhai ktkhai at virtuozzo.com
Thu Aug 4 06:49:39 PDT 2016


Umount does not remove binfmt_misc content. If it's mounted once again,
the same entries remain registered.

Criu does not dump content of umounted binfmt_misc. So, after C/R we
lose it at all.

This patch forces mounting of unmounted binfmt_misc after we collected
mountpoints. If it's unmounted, we mount it back and add this mount
to the tree of collected mounted mountpoints. Further, binfmt_misc
content is dumped in usual way with the only difference, that mount
point itself is not dumped.

v2: Print error in case of umount() fail.
    Move add_forced_mount() to another patch.
v3: Close binfmt_misc dir before its umount().
v4: Do not dump forced mounted mountpoint.
v5: Do not search for binfmt_misc mounted: use opts.has_binfmt_misc.
    Do not count number of entries in binfmt_misc directory.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 criu/cr-dump.c       |    1 
 criu/include/mount.h |    1 
 criu/mount.c         |  128 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+)

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 06ff2d7..5761c14 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1643,6 +1643,7 @@ static int cr_dump_finish(int ret)
 	if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
 		network_unlock();
 		delete_link_remaps();
+		clean_cr_time_mounts();
 	}
 
 	if (opts.lazy_pages)
diff --git a/criu/include/mount.h b/criu/include/mount.h
index d3f472d..3332c53 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
 extern int mntns_maybe_create_roots(void);
 extern int read_mnt_ns_img(void);
 extern void cleanup_mnt_ns(void);
+extern void clean_cr_time_mounts(void);
 
 #endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index 43ea96b..926817a 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -41,6 +41,7 @@
 #undef	LOG_PREFIX
 #define LOG_PREFIX "mnt: "
 
+#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
 static struct fstype fstypes[];
 
 int ext_mount_add(char *key, char *val)
@@ -1136,6 +1137,53 @@ static int attach_option(struct mount_info *pm, char *opt)
 	return pm->options ? 0 : -1;
 }
 
+static int add_cr_time_mount(struct mount_info *root, char *fsname, const char *path, unsigned int s_dev)
+{
+	struct mount_info *mi, *t, *parent;
+
+	mi = mnt_entry_alloc();
+	if (!mi)
+		return -1;
+	mi->mountpoint = xmalloc(strlen(path) + 2);
+	if (!mi->mountpoint)
+		return -1;
+	mi->ns_mountpoint = mi->mountpoint;
+	sprintf(mi->mountpoint, ".%s", path);
+	mi->mnt_id = mi->flags = mi->sb_flags = 0;
+	mi->root = xstrdup("/");
+	mi->fsname = xstrdup(fsname);
+	mi->source = xstrdup(fsname);
+	mi->options = xstrdup("");
+	if (!mi->root || !mi->fsname || !mi->source || !mi->options)
+		return -1;
+	mi->fstype = find_fstype_by_name(fsname);
+
+	mi->s_dev = mi->s_dev_rt = s_dev;
+
+	parent = root;
+	while (1) {
+		list_for_each_entry(t, &parent->children, siblings) {
+			if (strstartswith(mi->mountpoint, t->mountpoint)) {
+				parent = t;
+				break;
+			}
+		}
+		if (&t->siblings == &parent->children)
+			break;
+	}
+
+	mi->nsid = parent->nsid;
+	mi->parent = parent;
+	mi->parent_mnt_id = parent->mnt_id;
+	mi->next = parent->next;
+	parent->next = mi;
+	list_add(&mi->siblings, &parent->children);
+	pr_info("Add cr-time mountpoint %s with parent %s(%u)\n",
+		mi->mountpoint, parent->mountpoint, parent->mnt_id);
+	return 0;
+}
+
+
 /* Is it mounted w or w/o the newinstance option */
 static int devpts_parse(struct mount_info *pm)
 {
@@ -1281,6 +1329,46 @@ static int binfmt_misc_parse(struct mount_info *pm)
 
 }
 
+static int try_mount_binfmt_misc(struct ns_id *ns, unsigned int *s_dev)
+{
+	int mnt_fd, ret, exit_code = -1;
+	struct stat st;
+
+	ret = switch_ns(ns->ns_pid, &mnt_ns_desc, &mnt_fd);
+	if (ret < 0) {
+		pr_err("Can't switch mnt_ns\n");
+		goto out;
+	}
+
+	ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
+	if (ret < 0) {
+		if (errno == EPERM) {
+			pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
+			exit_code = 0;
+			goto restore_ns;
+		}
+		if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
+			pr_perror("Can't mount binfmt_misc");
+			goto restore_ns;
+		}
+		pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
+	} else {
+		if (stat(BINFMT_MISC_HOME, &st) < 0)
+			 pr_perror("Can't stat on binfmt_misc path\n");
+		else {
+			*s_dev = st.st_dev;
+			exit_code = 1;
+		}
+			goto restore_ns;
+	}
+
+	exit_code = 0;
+restore_ns:
+	ret = restore_ns(mnt_fd, &mnt_ns_desc);
+out:
+	return ret ? -1 : exit_code;
+}
+
 static int binfmt_misc_virtual(struct mount_info *pm)
 {
 	return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
@@ -1907,6 +1995,11 @@ static int dump_one_mountpoint(struct mount_info *pm, struct cr_img *img)
 	if (!pm->dumped && dump_one_fs(pm))
 		return -1;
 
+	if (pm->mnt_id == 0) {
+		pr_info("Skip dumping cr-time mountpoint: %s\n", pm->mountpoint);
+		return 0;
+	}
+
 	me.mnt_id		= pm->mnt_id;
 	me.root_dev		= pm->s_dev;
 	me.parent_mnt_id	= pm->parent_mnt_id;
@@ -1966,6 +2059,7 @@ static void free_mntinfo(struct mount_info *pms)
 struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
 {
 	struct mount_info *pm;
+	int ret;
 
 	pm = parse_mountinfo(ns->ns_pid, ns, for_dump);
 	if (!pm) {
@@ -1977,6 +2071,16 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
 	if (ns->mnt.mntinfo_tree == NULL)
 		goto err;
 
+	if (for_dump && ns->type == NS_ROOT && !opts.has_binfmt_misc) {
+		unsigned int s_dev = 0;
+		ret = try_mount_binfmt_misc(ns, &s_dev);
+		if (ret < 0)
+			return NULL;
+		if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc",
+						 BINFMT_MISC_HOME, s_dev) < 0)
+			return NULL;
+	}
+
 	ns->mnt.mntinfo_list = pm;
 	return pm;
 err:
@@ -3786,4 +3890,28 @@ int dump_mnt_namespaces(void)
 	return 0;
 }
 
+void clean_cr_time_mounts(void)
+{
+	struct mount_info *mi;
+	int mnt_fd, ret;
+
+	for (mi = mntinfo; mi; mi = mi->next) {
+		if (mi->mnt_id)
+			continue;
+		ret = switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &mnt_fd);
+		if (ret) {
+			pr_err("Can't switch to pid's %u mnt_ns\n", mi->nsid->ns_pid);
+			continue;
+		}
+
+		if (umount(mi->mountpoint) < 0)
+			pr_perror("Can't umount forced mount %s\n", mi->mountpoint);
+
+		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
+			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
+			return;
+		}
+	}
+}
+
 struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");



More information about the CRIU mailing list