[CRIU] [PATCH v5 4/5] mount: Forced mount unmounted binfmt_misc to do not lost its content
Kirill Tkhai
ktkhai at virtuozzo.com
Mon Aug 22 06:32:52 PDT 2016
On 22.08.2016 16:29, Pavel Emelyanov wrote:
> On 08/15/2016 11:44 AM, Kirill Tkhai wrote:
>>
>>
>> On 05.08.2016 18:38, Pavel Emelyanov wrote:
>>> On 08/04/2016 04:49 PM, Kirill Tkhai wrote:
>>>> Umount does not remove binfmt_misc content. If it's mounted once again,
>>>> the same entries remain registered.
>>>>
>>>> Criu does not dump content of umounted binfmt_misc. So, after C/R we
>>>> lose it at all.
>>>>
>>>> This patch forces mounting of unmounted binfmt_misc after we collected
>>>> mountpoints. If it's unmounted, we mount it back and add this mount
>>>> to the tree of collected mounted mountpoints. Further, binfmt_misc
>>>> content is dumped in usual way with the only difference, that mount
>>>> point itself is not dumped.
>>>>
>>>> v2: Print error in case of umount() fail.
>>>> Move add_forced_mount() to another patch.
>>>> v3: Close binfmt_misc dir before its umount().
>>>> v4: Do not dump forced mounted mountpoint.
>>>> v5: Do not search for binfmt_misc mounted: use opts.has_binfmt_misc.
>>>> Do not count number of entries in binfmt_misc directory.
>>>>
>>>> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>>>> ---
>>>> criu/cr-dump.c | 1
>>>> criu/include/mount.h | 1
>>>> criu/mount.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> 3 files changed, 130 insertions(+)
>>>>
>>>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>>>> index 06ff2d7..5761c14 100644
>>>> --- a/criu/cr-dump.c
>>>> +++ b/criu/cr-dump.c
>>>> @@ -1643,6 +1643,7 @@ static int cr_dump_finish(int ret)
>>>> if (ret || post_dump_ret || opts.final_state == TASK_ALIVE) {
>>>> network_unlock();
>>>> delete_link_remaps();
>>>> + clean_cr_time_mounts();
>>>> }
>>>>
>>>> if (opts.lazy_pages)
>>>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>>>> index d3f472d..3332c53 100644
>>>> --- a/criu/include/mount.h
>>>> +++ b/criu/include/mount.h
>>>> @@ -127,5 +127,6 @@ extern int ext_mount_add(char *key, char *val);
>>>> extern int mntns_maybe_create_roots(void);
>>>> extern int read_mnt_ns_img(void);
>>>> extern void cleanup_mnt_ns(void);
>>>> +extern void clean_cr_time_mounts(void);
>>>>
>>>> #endif /* __CR_MOUNT_H__ */
>>>> diff --git a/criu/mount.c b/criu/mount.c
>>>> index 43ea96b..926817a 100644
>>>> --- a/criu/mount.c
>>>> +++ b/criu/mount.c
>>>> @@ -41,6 +41,7 @@
>>>> #undef LOG_PREFIX
>>>> #define LOG_PREFIX "mnt: "
>>>>
>>>> +#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
>>>> static struct fstype fstypes[];
>>>>
>>>> int ext_mount_add(char *key, char *val)
>>>> @@ -1136,6 +1137,53 @@ static int attach_option(struct mount_info *pm, char *opt)
>>>> return pm->options ? 0 : -1;
>>>> }
>>>>
>>>> +static int add_cr_time_mount(struct mount_info *root, char *fsname, const char *path, unsigned int s_dev)
>>>> +{
>>>> + struct mount_info *mi, *t, *parent;
>>>> +
>>>> + mi = mnt_entry_alloc();
>>>> + if (!mi)
>>>> + return -1;
>>>> + mi->mountpoint = xmalloc(strlen(path) + 2);
>>>> + if (!mi->mountpoint)
>>>> + return -1;
>>>> + mi->ns_mountpoint = mi->mountpoint;
>>>> + sprintf(mi->mountpoint, ".%s", path);
>>>> + mi->mnt_id = mi->flags = mi->sb_flags = 0;
>>>> + mi->root = xstrdup("/");
>>>> + mi->fsname = xstrdup(fsname);
>>>> + mi->source = xstrdup(fsname);
>>>> + mi->options = xstrdup("");
>>>> + if (!mi->root || !mi->fsname || !mi->source || !mi->options)
>>>> + return -1;
>>>> + mi->fstype = find_fstype_by_name(fsname);
>>>> +
>>>> + mi->s_dev = mi->s_dev_rt = s_dev;
>>>> +
>>>> + parent = root;
>>>> + while (1) {
>>>> + list_for_each_entry(t, &parent->children, siblings) {
>>>> + if (strstartswith(mi->mountpoint, t->mountpoint)) {
>>>> + parent = t;
>>>> + break;
>>>> + }
>>>> + }
>>>> + if (&t->siblings == &parent->children)
>>>> + break;
>>>> + }
>>>> +
>>>> + mi->nsid = parent->nsid;
>>>> + mi->parent = parent;
>>>> + mi->parent_mnt_id = parent->mnt_id;
>>>> + mi->next = parent->next;
>>>> + parent->next = mi;
>>>> + list_add(&mi->siblings, &parent->children);
>>>> + pr_info("Add cr-time mountpoint %s with parent %s(%u)\n",
>>>> + mi->mountpoint, parent->mountpoint, parent->mnt_id);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +
>>>> /* Is it mounted w or w/o the newinstance option */
>>>> static int devpts_parse(struct mount_info *pm)
>>>> {
>>>> @@ -1281,6 +1329,46 @@ static int binfmt_misc_parse(struct mount_info *pm)
>>>>
>>>> }
>>>>
>>>> +static int try_mount_binfmt_misc(struct ns_id *ns, unsigned int *s_dev)
>>>> +{
>>>
>>> What's here in this function that deserves _binfmt_ in its name?
>>
>> Hm... Mount device type, doesn't it?
>
> Mount device type?
I don't understand what you are asking.
Name has _binfmt_misc_ prefix, because it mounts a pseudo device of binfmt_misc type.
If you see a problem here, please, explain it more clearly.
>>> Why not move all this code into add_cr_time_mount() and let is
>>> just call switch_ns() and mount(fstype->name, path, ...) ?
>>
>> Because add_cr_time_mount() is used on restore too (patch 5), and it's not necessary
>> to "try mount" it there.
>
> So there's add_cr_time_mount() and mount_cr_time_mount(). OK.
>
> -- Pavel
>
>>>> + int mnt_fd, ret, exit_code = -1;
>>>> + struct stat st;
>>>> +
>>>> + ret = switch_ns(ns->ns_pid, &mnt_ns_desc, &mnt_fd);
>>>> + if (ret < 0) {
>>>> + pr_err("Can't switch mnt_ns\n");
>>>> + goto out;
>>>> + }
>>>> +
>>>> + ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
>>>> + if (ret < 0) {
>>>> + if (errno == EPERM) {
>>>> + pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
>>>> + exit_code = 0;
>>>> + goto restore_ns;
>>>> + }
>>>> + if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
>>>> + pr_perror("Can't mount binfmt_misc");
>>>> + goto restore_ns;
>>>> + }
>>>> + pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
>>>> + } else {
>>>> + if (stat(BINFMT_MISC_HOME, &st) < 0)
>>>> + pr_perror("Can't stat on binfmt_misc path\n");
>>>> + else {
>>>> + *s_dev = st.st_dev;
>>>> + exit_code = 1;
>>>> + }
>>>> + goto restore_ns;
>>>> + }
>>>> +
>>>> + exit_code = 0;
>>>> +restore_ns:
>>>> + ret = restore_ns(mnt_fd, &mnt_ns_desc);
>>>> +out:
>>>> + return ret ? -1 : exit_code;
>>>> +}
>>>> +
>>>> static int binfmt_misc_virtual(struct mount_info *pm)
>>>> {
>>>> return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
>>>> @@ -1907,6 +1995,11 @@ static int dump_one_mountpoint(struct mount_info *pm, struct cr_img *img)
>>>> if (!pm->dumped && dump_one_fs(pm))
>>>> return -1;
>>>>
>>>> + if (pm->mnt_id == 0) {
>>>> + pr_info("Skip dumping cr-time mountpoint: %s\n", pm->mountpoint);
>>>> + return 0;
>>>> + }
>>>> +
>>>> me.mnt_id = pm->mnt_id;
>>>> me.root_dev = pm->s_dev;
>>>> me.parent_mnt_id = pm->parent_mnt_id;
>>>> @@ -1966,6 +2059,7 @@ static void free_mntinfo(struct mount_info *pms)
>>>> struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
>>>> {
>>>> struct mount_info *pm;
>>>> + int ret;
>>>>
>>>> pm = parse_mountinfo(ns->ns_pid, ns, for_dump);
>>>> if (!pm) {
>>>> @@ -1977,6 +2071,16 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
>>>> if (ns->mnt.mntinfo_tree == NULL)
>>>> goto err;
>>>>
>>>> + if (for_dump && ns->type == NS_ROOT && !opts.has_binfmt_misc) {
>>>> + unsigned int s_dev = 0;
>>>> + ret = try_mount_binfmt_misc(ns, &s_dev);
>>>> + if (ret < 0)
>>>> + return NULL;
>>>> + if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc",
>>>> + BINFMT_MISC_HOME, s_dev) < 0)
>>>> + return NULL;
>>>> + }
>>>> +
>>>> ns->mnt.mntinfo_list = pm;
>>>> return pm;
>>>> err:
>>>> @@ -3786,4 +3890,28 @@ int dump_mnt_namespaces(void)
>>>> return 0;
>>>> }
>>>>
>>>> +void clean_cr_time_mounts(void)
>>>> +{
>>>> + struct mount_info *mi;
>>>> + int mnt_fd, ret;
>>>> +
>>>> + for (mi = mntinfo; mi; mi = mi->next) {
>>>> + if (mi->mnt_id)
>>>> + continue;
>>>> + ret = switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &mnt_fd);
>>>> + if (ret) {
>>>> + pr_err("Can't switch to pid's %u mnt_ns\n", mi->nsid->ns_pid);
>>>> + continue;
>>>> + }
>>>> +
>>>> + if (umount(mi->mountpoint) < 0)
>>>> + pr_perror("Can't umount forced mount %s\n", mi->mountpoint);
>>>> +
>>>> + if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>>>> + pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>>>> + return;
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>>>
>>>> _______________________________________________
>>>> CRIU mailing list
>>>> CRIU at openvz.org
>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>> .
>>>>
>>>
>> .
>>
>
More information about the CRIU
mailing list