[CRIU] [PATCH 2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content
Kirill Tkhai
ktkhai at virtuozzo.com
Thu Jun 23 02:22:09 PDT 2016
On 23.06.2016 07:59, Andrew Vagin wrote:
> On Wed, Jun 22, 2016 at 05:19:15PM +0300, Kirill Tkhai wrote:
>> Umount does not remove binfmt_misc content. If it's mounted once again,
>> the same entries remain registered.
>>
>> Criu does not dump content of umounted binfmt_misc. So, after C/R we
>> lose it at all.
>>
>> This patch forces mounting of unmounted binfmt_misc before we collect
>> mountpoints. If it's unmounted, we mount it back and add this mount
>> to the list of forced mounted mountpoints. Next patch need this
>> list to mark the mount in special way in dump image.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
>> ---
>> criu/cr-dump.c | 13 +++++
>> criu/include/mount.h | 1
>> criu/mount.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++
>> 3 files changed, 143 insertions(+)
>>
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index 00d28e9..dbda4b7 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -747,6 +747,16 @@ static int dump_task_core_all(struct parasite_ctl *ctl,
>> return ret;
>> }
>>
>> +static int prepare_dump__tasks_freezed(void)
>> +{
>> + int ret;
>> +
>> + /* Tasks freezed, so we do not race with systemd's autofs unmounter */
>> + ret = try_mount_binfmt_misc(root_item->pid.real);
>
> Can we do this only when binfmt_misc is virtuallized?
>> +
>> + return ret;
>> +}
>> +
>> static int collect_pstree_ids_predump(void)
>> {
>> struct pstree_item *item;
>> @@ -1681,6 +1691,9 @@ int cr_dump_tasks(pid_t pid)
>> if (collect_pstree())
>> goto err;
>>
>> + if (prepare_dump__tasks_freezed())
>> + goto err;
>> +
>> if (collect_pstree_ids())
>> goto err;
>>
>> diff --git a/criu/include/mount.h b/criu/include/mount.h
>> index c7992ac..191f6a3 100644
>> --- a/criu/include/mount.h
>> +++ b/criu/include/mount.h
>> @@ -128,4 +128,5 @@ extern int mntns_maybe_create_roots(void);
>> extern int read_mnt_ns_img(void);
>> extern void cleanup_mnt_ns(void);
>>
>> +extern int try_mount_binfmt_misc(pid_t pid);
>> #endif /* __CR_MOUNT_H__ */
>> diff --git a/criu/mount.c b/criu/mount.c
>> index e891c92..f91654c 100644
>> --- a/criu/mount.c
>> +++ b/criu/mount.c
>> @@ -41,6 +41,13 @@
>> #define LOG_PREFIX "mnt: "
>>
>> static struct fstype fstypes[];
>> +static LIST_HEAD(forced_mounts_list);
>> +
>> +struct forced_mount {
>> + struct list_head list;
>> + unsigned int ns_id;
>> + unsigned int mnt_id;
>> +};
>>
>> int ext_mount_add(char *key, char *val)
>> {
>> @@ -3685,4 +3692,126 @@ int dump_mnt_namespaces(void)
>> return 0;
>> }
>>
>> +static int add_forced_mount(pid_t pid, const char *path)
>> +{
>> + unsigned int ns_id, mnt_id = 0;
>> + struct forced_mount *fm;
>> + int i, len, ret = 0;
>> + char *str, *p;
>> + struct bfd f;
>> +
>> + if (read_ns_id(pid, &mnt_ns_desc, &ns_id) < 0 || !ns_id) {
>> + pr_err("Can't read mnt_ns id\n");
>> + return -1;
>> + }
>> + f.fd = open_proc(pid, "mountinfo");
>> + if (f.fd < 0) {
>> + pr_perror("Can't open mountinfo to parse");
>> + return -1;
>> + }
>> + if (bfdopenr(&f))
>> + return -1;
>> + len = strlen(path);
>> +
>> + while (1) {
>> + p = str = breadline(&f);
>> + if (!p)
>> + break;
>> + i = 0;
>> + while (i < 4) {
>> + p = strchr(p, ' ');
>> + if (!str)
>> + break;
>> + i++;
>> + p++;
>> + }
>> +
>> + if (i != 4) {
>> + pr_err("Can't parse mountinfo\n");
>> + ret = -1;
>> + break;
>> + }
>> +
>> + if (strncmp(p, path, len))
>> + continue;
>> + if (sscanf(str, "%u", &mnt_id) != 1) {
>> + pr_err("Can't parse mountinfo\n");
>> + ret = -1;
>> + }
>> + /* Do not break as we're interested in the last entry */
>> + }
>
> If you want to get mnt_id, you can open path and then parse
> /proc/pid/fdinfo/X. I think we already have a helper to get mnt_id for a
> file descriptor.
>
>> +
>> + bclose(&f);
>> +
>> + if (ret || !mnt_id) {
>> + if (!ret)
>> + pr_err("Can't find %s mounted\n", path);
>> + return -1;
>> + }
>> +
>> + fm = xmalloc(sizeof(*fm));
>> + if (!fm)
>> + return -1;
>> + fm->ns_id = ns_id;
>> + fm->mnt_id = mnt_id;
>> + list_add(&fm->list, &forced_mounts_list);
>> +
>> + return 0;
>> +}
>> +
>> +#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
>> +
>> +int try_mount_binfmt_misc(pid_t pid)
>> +{
>> + int num, mnt_fd, ret, exit_code = -1;
>> + struct dirent *de;
>> + DIR *dir;
>> +
>> + ret = switch_ns(pid, &mnt_ns_desc, &mnt_fd);
>> + if (ret < 0) {
>> + pr_err("Can't switch mnt_ns\n");
>> + return -1;
>> + }
>> +
>> + ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
>
> Are you sure that this will work if the BINFMT_MISC_HOME path doesn't exist?
See below. ENOENT is not interpreted as an error.
>> + if (ret < 0) {
>> + if (errno == EPERM) {
>> + pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
>> + exit_code = 0;
>> + goto restore_ns;
>> + }
>> + if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
>> + pr_perror("Can't mount binfmt_misc");
>> + goto restore_ns;
>> + }
>> + pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
>> + } else {
>> + dir = opendir(BINFMT_MISC_HOME);
>> + if (!dir) {
>> + pr_perror("Can't read binfmt_misc dir");
>> + goto restore_ns;
>> + }
>> +
>> + num = 0;
>> + /* ".", "..", "register", "status" */
>> + while (num <= 4 && (de = readdir(dir)) != NULL)
>> + num++;
>> + if (num <= 4) {
>> + /* No entries */
>> + umount(BINFMT_MISC_HOME);
>
> Pls call pr_perror here in a case of any error
>
>> + } else {
>> + ret = add_forced_mount(pid, BINFMT_MISC_HOME);
>> + if (ret)
>> + goto restore_ns;
>> + }
>> + closedir(dir);
>> + }
>> +
>> + exit_code = 0;
>> +restore_ns:
>> + ret = restore_ns(mnt_fd, &mnt_ns_desc);
>> +
>> + return ret ? -1 : exit_code;
>> +}
>> +
>> struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU at openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list