[CRIU] [PATCH v4 2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content
Kirill Tkhai
ktkhai at virtuozzo.com
Tue Aug 2 06:34:40 PDT 2016
On 02.08.2016 16:31, Pavel Emelyanov wrote:
> On 08/02/2016 04:25 PM, Kirill Tkhai wrote:
>
>>>
>>>> + parent = t;
>>>> + break;
>>>> + }
>>>> + }
>>>> + if (&t->siblings == &parent->children)
>>>> + break;
>>>> + }
>>>> +
>>>> + mi->nsid = parent->nsid;
>>>> + mi->parent = parent;
>>>> + mi->parent_mnt_id = parent->mnt_id;
>>>> + mi->next = parent->next;
>>>> + parent->next = mi;
>>>> + list_add(&mi->siblings, &parent->children);
>>>> + pr_info("Add cr-time mountpoint %s with parent %s(%u)\n",
>>>> + mi->mountpoint, parent->mountpoint, parent->mnt_id);
>>>> + return 0;
>>>> +}
>>>> +
>>>> +
>>>> /* Is it mounted w or w/o the newinstance option */
>>>> static int devpts_parse(struct mount_info *pm)
>>>> {
>>>> @@ -1272,6 +1320,65 @@ static int devtmpfs_restore(struct mount_info *pm)
>>>> return ret;
>>>> }
>>>>
>>>> +static int try_mount_binfmt_misc(struct ns_id *ns, unsigned int *s_dev)
>>>> +{
>>>> + int num, mnt_fd, ret, exit_code = -1;
>>>> + struct dirent *de;
>>>> + struct stat st;
>>>> + DIR *dir;
>>>> +
>>>> + ret = switch_ns(ns->ns_pid, &mnt_ns_desc, &mnt_fd);
>>>> + if (ret < 0) {
>>>> + pr_err("Can't switch mnt_ns\n");
>>>> + goto out;
>>>> + }
>>>> +
>>>> + ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
>>>> + if (ret < 0) {
>>>> + if (errno == EPERM) {
>>>> + pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
>>>> + exit_code = 0;
>>>> + goto restore_ns;
>>>> + }
>>>> + if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
>>>> + pr_perror("Can't mount binfmt_misc");
>>>> + goto restore_ns;
>>>> + }
>>>> + pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
>>>> + } else {
>>>> + dir = opendir(BINFMT_MISC_HOME);
>>>> + if (!dir) {
>>>> + pr_perror("Can't read binfmt_misc dir");
>>>> + goto restore_ns;
>>>> + }
>>>> +
>>>> + num = 0;
>>>> + /* ".", "..", "register", "status" */
>>>> + while (num <= 4 && (de = readdir(dir)) != NULL)
>>>> + num++;
>>>> + closedir(dir);
>>>> + if (num <= 4) {
>>>> + /* No entries */
>>>> + if (umount(BINFMT_MISC_HOME) < 0)
>>>> + pr_perror("Can't umount "BINFMT_MISC_HOME"\n");
>>>> + } else {
>>>> + if (stat(BINFMT_MISC_HOME, &st) < 0)
>>>> + pr_perror("Can't stat on binfmt_misc path\n");
>>>> + else {
>>>> + *s_dev = st.st_dev;
>>>> + exit_code = 1;
>>>> + }
>>>> + goto restore_ns;
>>>> + }
>>>
>>> What does this else branch do? Why can't we just add the entry and
>>> let ->dump callback do whatever is appropriate?
>>
>> This branch searches for dev of newly mounted binfmt_misc, because
>
> It also calls readdir(). What for?
There is a commentary. If no entries is registered, then we don't want to have forced mounted binfmt_misc.
>> ->dump callback checks for mount is virtual or not. I can pass 0 as
>> dev here if you want. Do you?
>
> No, I don't. Getting the mount's dev is needed.
>
>>>> + }
>>>> +
>>>> + exit_code = 0;
>>>> +restore_ns:
>>>> + ret = restore_ns(mnt_fd, &mnt_ns_desc);
>>>> +out:
>>>> + return ret ? -1 : exit_code;
>>>> +}
>>>> +
>>>> static int binfmt_misc_virtual(struct mount_info *pm)
>>>> {
>>>> return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
>>>> @@ -1873,6 +1980,11 @@ static int dump_one_mountpoint(struct mount_info *pm, struct cr_img *img)
>>>> if (!pm->dumped && dump_one_fs(pm))
>>>> return -1;
>>>>
>>>> + if (pm->mnt_id == 0) {
>>>> + pr_info("Skip dumping cr-time mountpoint: %s\n", pm->mountpoint);
>>>> + return 0;
>>>> + }
>>>> +
>>>> me.mnt_id = pm->mnt_id;
>>>> me.root_dev = pm->s_dev;
>>>> me.parent_mnt_id = pm->parent_mnt_id;
>>>> @@ -1931,7 +2043,8 @@ static void free_mntinfo(struct mount_info *pms)
>>>>
>>>> struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
>>>> {
>>>> - struct mount_info *pm;
>>>> + struct mount_info *pm, *mi;
>>>> + int ret;
>>>>
>>>> pm = parse_mountinfo(ns->ns_pid, ns, for_dump);
>>>> if (!pm) {
>>>> @@ -1943,6 +2056,22 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
>>>> if (ns->mnt.mntinfo_tree == NULL)
>>>> goto err;
>>>>
>>>> + if (for_dump && ns->type == NS_ROOT) {
>>>> + for (mi = pm; mi != NULL; mi = mi->next) {
>>>> + if (strcmp(mi->fstype->name, "binfmt_misc") == 0)
>>>> + break;
>>>> + }
>>>
>>> You don't have to search for the entry, just declare the ->parse callback
>>> and set the boolean flag. Or add flag (or count) on fstype struct and count
>>> for mountpoints of desired fstype.
>>
>> What is "just declare the ->parse callback"?
>
> criu/mount.c, line 1672
>
> }, {
> .name = "devpts",
> .parse = devpts_parse, <<<< this thing gets called when reading an entry in /proc/pid/mountinfo
> .code = FSTYPE__DEVPTS,
> }, {
And global boolean variable "binfmt_misc_collected"?
>
>>>> + if (!mi) {
>>>> + unsigned int s_dev = 0;
>>>> + ret = try_mount_binfmt_misc(ns, &s_dev);
>>>> + if (ret < 0)
>>>> + return NULL;
>>>> + if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc",
>>>> + BINFMT_MISC_HOME, s_dev) < 0)
>>>> + return NULL;
>>>> + }
>>>> + }
>>>> +
>>>> ns->mnt.mntinfo_list = pm;
>>>> return pm;
>>>> err:
>>>> @@ -3683,4 +3812,28 @@ int dump_mnt_namespaces(void)
>>>> return 0;
>>>> }
>>>>
>>>> +void clean_cr_time_mounts(void)
>>>> +{
>>>> + struct mount_info *mi;
>>>> + int mnt_fd, ret;
>>>> +
>>>> + for (mi = mntinfo; mi; mi = mi->next) {
>>>> + if (mi->mnt_id)
>>>> + continue;
>>>> + ret = switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &mnt_fd);
>>>> + if (ret) {
>>>> + pr_err("Can't switch to pid's %u mnt_ns\n", mi->nsid->ns_pid);
>>>> + continue;
>>>> + }
>>>> +
>>>> + if (umount(mi->mountpoint) < 0)
>>>> + pr_perror("Can't umount forced mount %s\n", mi->mountpoint);
>>>> +
>>>> + if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>>>
>>> Can we restore it once outside the loop?
>>
>> How can we restore hypothetical several forced mounts without a loop?
>
> I mean restore_ns() :\ Switch ns-s one by one and return to original one once.
Will open_proc() called in switch_ns() work in this case?
>>>> + pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>>>> + return;
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>>>
>>>> _______________________________________________
>>>> CRIU mailing list
>>>> CRIU at openvz.org
>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>> .
>>>>
>>>
>> .
>>
>
More information about the CRIU
mailing list