[CRIU] [PATCH v4 2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content
Kirill Tkhai
ktkhai at virtuozzo.com
Wed Jul 20 03:13:11 PDT 2016
Umount does not remove binfmt_misc content. If it's mounted once again,
the same entries remain registered.
Criu does not dump content of umounted binfmt_misc. So, after C/R we
lose it at all.
This patch forces mounting of unmounted binfmt_misc after we collected
mountpoints. If it's unmounted, we mount it back and add this mount
to the tree of collected mounted mountpoints. Further, binfmt_misc
content is dumped in usual way with the only difference, that mount
point itself is not dumped.
v2: Print error in case of umount() fail.
Move add_forced_mount() to another patch.
v3: Close binfmt_misc dir before its umount().
v4: Do not dump forced mounted mountpoint.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
criu/cr-dump.c | 1
criu/include/mount.h | 1
criu/mount.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+), 1 deletion(-)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index febef16..b53e45f 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1616,6 +1616,7 @@ static int cr_dump_finish(int ret)
if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
network_unlock();
delete_link_remaps();
+ clean_cr_time_mounts();
}
pstree_switch_state(root_item,
(ret || post_dump_ret) ?
diff --git a/criu/include/mount.h b/criu/include/mount.h
index c7992ac..c8668c9 100644
--- a/criu/include/mount.h
+++ b/criu/include/mount.h
@@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
extern int mntns_maybe_create_roots(void);
extern int read_mnt_ns_img(void);
extern void cleanup_mnt_ns(void);
+extern void clean_cr_time_mounts(void);
#endif /* __CR_MOUNT_H__ */
diff --git a/criu/mount.c b/criu/mount.c
index 6512f51..d583429 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -40,6 +40,7 @@
#undef LOG_PREFIX
#define LOG_PREFIX "mnt: "
+#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
static struct fstype fstypes[];
int ext_mount_add(char *key, char *val)
@@ -1135,6 +1136,53 @@ static int attach_option(struct mount_info *pm, char *opt)
return pm->options ? 0 : -1;
}
+static int add_cr_time_mount(struct mount_info *root, char *fsname, const char *path, unsigned int s_dev)
+{
+ struct mount_info *mi, *t, *parent;
+
+ mi = mnt_entry_alloc();
+ if (!mi)
+ return -1;
+ mi->mountpoint = xmalloc(strlen(path) + 2);
+ if (!mi->mountpoint)
+ return -1;
+ mi->ns_mountpoint = mi->mountpoint;
+ sprintf(mi->mountpoint, ".%s", path);
+ mi->mnt_id = mi->flags = mi->sb_flags = 0;
+ mi->root = xstrdup("/");
+ mi->fsname = xstrdup(fsname);
+ mi->source = xstrdup(fsname);
+ mi->options = xstrdup("");
+ if (!mi->root || !mi->fsname || !mi->source || !mi->options)
+ return -1;
+ mi->fstype = find_fstype_by_name(fsname);
+
+ mi->s_dev = mi->s_dev_rt = s_dev;
+
+ parent = root;
+ while (1) {
+ list_for_each_entry(t, &parent->children, siblings) {
+ if (strstr(mi->mountpoint, t->mountpoint) == mi->mountpoint) {
+ parent = t;
+ break;
+ }
+ }
+ if (&t->siblings == &parent->children)
+ break;
+ }
+
+ mi->nsid = parent->nsid;
+ mi->parent = parent;
+ mi->parent_mnt_id = parent->mnt_id;
+ mi->next = parent->next;
+ parent->next = mi;
+ list_add(&mi->siblings, &parent->children);
+ pr_info("Add cr-time mountpoint %s with parent %s(%u)\n",
+ mi->mountpoint, parent->mountpoint, parent->mnt_id);
+ return 0;
+}
+
+
/* Is it mounted w or w/o the newinstance option */
static int devpts_parse(struct mount_info *pm)
{
@@ -1272,6 +1320,65 @@ static int devtmpfs_restore(struct mount_info *pm)
return ret;
}
+static int try_mount_binfmt_misc(struct ns_id *ns, unsigned int *s_dev)
+{
+ int num, mnt_fd, ret, exit_code = -1;
+ struct dirent *de;
+ struct stat st;
+ DIR *dir;
+
+ ret = switch_ns(ns->ns_pid, &mnt_ns_desc, &mnt_fd);
+ if (ret < 0) {
+ pr_err("Can't switch mnt_ns\n");
+ goto out;
+ }
+
+ ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
+ if (ret < 0) {
+ if (errno == EPERM) {
+ pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
+ exit_code = 0;
+ goto restore_ns;
+ }
+ if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
+ pr_perror("Can't mount binfmt_misc");
+ goto restore_ns;
+ }
+ pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
+ } else {
+ dir = opendir(BINFMT_MISC_HOME);
+ if (!dir) {
+ pr_perror("Can't read binfmt_misc dir");
+ goto restore_ns;
+ }
+
+ num = 0;
+ /* ".", "..", "register", "status" */
+ while (num <= 4 && (de = readdir(dir)) != NULL)
+ num++;
+ closedir(dir);
+ if (num <= 4) {
+ /* No entries */
+ if (umount(BINFMT_MISC_HOME) < 0)
+ pr_perror("Can't umount "BINFMT_MISC_HOME"\n");
+ } else {
+ if (stat(BINFMT_MISC_HOME, &st) < 0)
+ pr_perror("Can't stat on binfmt_misc path\n");
+ else {
+ *s_dev = st.st_dev;
+ exit_code = 1;
+ }
+ goto restore_ns;
+ }
+ }
+
+ exit_code = 0;
+restore_ns:
+ ret = restore_ns(mnt_fd, &mnt_ns_desc);
+out:
+ return ret ? -1 : exit_code;
+}
+
static int binfmt_misc_virtual(struct mount_info *pm)
{
return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
@@ -1873,6 +1980,11 @@ static int dump_one_mountpoint(struct mount_info *pm, struct cr_img *img)
if (!pm->dumped && dump_one_fs(pm))
return -1;
+ if (pm->mnt_id == 0) {
+ pr_info("Skip dumping cr-time mountpoint: %s\n", pm->mountpoint);
+ return 0;
+ }
+
me.mnt_id = pm->mnt_id;
me.root_dev = pm->s_dev;
me.parent_mnt_id = pm->parent_mnt_id;
@@ -1931,7 +2043,8 @@ static void free_mntinfo(struct mount_info *pms)
struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
{
- struct mount_info *pm;
+ struct mount_info *pm, *mi;
+ int ret;
pm = parse_mountinfo(ns->ns_pid, ns, for_dump);
if (!pm) {
@@ -1943,6 +2056,22 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
if (ns->mnt.mntinfo_tree == NULL)
goto err;
+ if (for_dump && ns->type == NS_ROOT) {
+ for (mi = pm; mi != NULL; mi = mi->next) {
+ if (strcmp(mi->fstype->name, "binfmt_misc") == 0)
+ break;
+ }
+ if (!mi) {
+ unsigned int s_dev = 0;
+ ret = try_mount_binfmt_misc(ns, &s_dev);
+ if (ret < 0)
+ return NULL;
+ if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc",
+ BINFMT_MISC_HOME, s_dev) < 0)
+ return NULL;
+ }
+ }
+
ns->mnt.mntinfo_list = pm;
return pm;
err:
@@ -3683,4 +3812,28 @@ int dump_mnt_namespaces(void)
return 0;
}
+void clean_cr_time_mounts(void)
+{
+ struct mount_info *mi;
+ int mnt_fd, ret;
+
+ for (mi = mntinfo; mi; mi = mi->next) {
+ if (mi->mnt_id)
+ continue;
+ ret = switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &mnt_fd);
+ if (ret) {
+ pr_err("Can't switch to pid's %u mnt_ns\n", mi->nsid->ns_pid);
+ continue;
+ }
+
+ if (umount(mi->mountpoint) < 0)
+ pr_perror("Can't umount forced mount %s\n", mi->mountpoint);
+
+ if (restore_ns(mnt_fd, &mnt_ns_desc)) {
+ pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
+ return;
+ }
+ }
+}
+
struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
More information about the CRIU
mailing list