[CRIU] [PATCH v4 2/3] mount: Forced mount unmounted binfmt_misc to do not lost its content

Kirill Tkhai ktkhai at virtuozzo.com
Tue Aug 2 06:34:40 PDT 2016


On 02.08.2016 16:31, Pavel Emelyanov wrote:
> On 08/02/2016 04:25 PM, Kirill Tkhai wrote:
> 
>>>
>>>> +				parent = t;
>>>> +				break;
>>>> +			}
>>>> +		}
>>>> +		if (&t->siblings == &parent->children)
>>>> +			break;
>>>> +	}
>>>> +
>>>> +	mi->nsid = parent->nsid;
>>>> +	mi->parent = parent;
>>>> +	mi->parent_mnt_id = parent->mnt_id;
>>>> +	mi->next = parent->next;
>>>> +	parent->next = mi;
>>>> +	list_add(&mi->siblings, &parent->children);
>>>> +	pr_info("Add cr-time mountpoint %s with parent %s(%u)\n",
>>>> +		mi->mountpoint, parent->mountpoint, parent->mnt_id);
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +
>>>>  /* Is it mounted w or w/o the newinstance option */
>>>>  static int devpts_parse(struct mount_info *pm)
>>>>  {
>>>> @@ -1272,6 +1320,65 @@ static int devtmpfs_restore(struct mount_info *pm)
>>>>  	return ret;
>>>>  }
>>>>  
>>>> +static int try_mount_binfmt_misc(struct ns_id *ns, unsigned int *s_dev)
>>>> +{
>>>> +	int num, mnt_fd, ret, exit_code = -1;
>>>> +	struct dirent *de;
>>>> +	struct stat st;
>>>> +	DIR *dir;
>>>> +
>>>> +	ret = switch_ns(ns->ns_pid, &mnt_ns_desc, &mnt_fd);
>>>> +	if (ret < 0) {
>>>> +		pr_err("Can't switch mnt_ns\n");
>>>> +		goto out;
>>>> +	}
>>>> +
>>>> +	ret = mount("binfmt_misc", BINFMT_MISC_HOME, "binfmt_misc", 0, NULL);
>>>> +	if (ret < 0) {
>>>> +		if (errno == EPERM) {
>>>> +			pr_info("Can't mount binfmt_misc: EPERM. Running in user_ns?\n");
>>>> +			exit_code = 0;
>>>> +			goto restore_ns;
>>>> +		}
>>>> +		if (errno != EBUSY && errno != ENODEV && errno != ENOENT) {
>>>> +			pr_perror("Can't mount binfmt_misc");
>>>> +			goto restore_ns;
>>>> +		}
>>>> +		pr_info("Prepare binfmt_misc: skipping(%d)\n", errno);
>>>> +	} else {
>>>> +		dir = opendir(BINFMT_MISC_HOME);
>>>> +		if (!dir) {
>>>> +			pr_perror("Can't read binfmt_misc dir");
>>>> +			goto restore_ns;
>>>> +		}
>>>> +
>>>> +		num = 0;
>>>> +		/* ".", "..", "register", "status" */
>>>> +		while (num <= 4 && (de = readdir(dir)) != NULL)
>>>> +			num++;
>>>> +		closedir(dir);
>>>> +		if (num <= 4) {
>>>> +			/* No entries */
>>>> +			if (umount(BINFMT_MISC_HOME) < 0)
>>>> +				pr_perror("Can't umount "BINFMT_MISC_HOME"\n");
>>>> +		} else {
>>>> +			if (stat(BINFMT_MISC_HOME, &st) < 0)
>>>> +				pr_perror("Can't stat on binfmt_misc path\n");
>>>> +			else {
>>>> +				*s_dev = st.st_dev;
>>>> +				exit_code = 1;
>>>> +			}
>>>> +			goto restore_ns;
>>>> +		}
>>>
>>> What does this else branch do? Why can't we just add the entry and
>>> let ->dump callback do whatever is appropriate?
>>
>> This branch searches for dev of newly mounted binfmt_misc, because
> 
> It also calls readdir(). What for?

There is a commentary. If no entries is registered, then we don't want to have forced mounted binfmt_misc.
 
>> ->dump callback checks for mount is virtual or not. I can pass 0 as
>> dev here if you want. Do you?
> 
> No, I don't. Getting the mount's dev is needed.
> 
>>>> +	}
>>>> +
>>>> +	exit_code = 0;
>>>> +restore_ns:
>>>> +	ret = restore_ns(mnt_fd, &mnt_ns_desc);
>>>> +out:
>>>> +	return ret ? -1 : exit_code;
>>>> +}
>>>> +
>>>>  static int binfmt_misc_virtual(struct mount_info *pm)
>>>>  {
>>>>  	return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
>>>> @@ -1873,6 +1980,11 @@ static int dump_one_mountpoint(struct mount_info *pm, struct cr_img *img)
>>>>  	if (!pm->dumped && dump_one_fs(pm))
>>>>  		return -1;
>>>>  
>>>> +	if (pm->mnt_id == 0) {
>>>> +		pr_info("Skip dumping cr-time mountpoint: %s\n", pm->mountpoint);
>>>> +		return 0;
>>>> +	}
>>>> +
>>>>  	me.mnt_id		= pm->mnt_id;
>>>>  	me.root_dev		= pm->s_dev;
>>>>  	me.parent_mnt_id	= pm->parent_mnt_id;
>>>> @@ -1931,7 +2043,8 @@ static void free_mntinfo(struct mount_info *pms)
>>>>  
>>>>  struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
>>>>  {
>>>> -	struct mount_info *pm;
>>>> +	struct mount_info *pm, *mi;
>>>> +	int ret;
>>>>  
>>>>  	pm = parse_mountinfo(ns->ns_pid, ns, for_dump);
>>>>  	if (!pm) {
>>>> @@ -1943,6 +2056,22 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump)
>>>>  	if (ns->mnt.mntinfo_tree == NULL)
>>>>  		goto err;
>>>>  
>>>> +	if (for_dump && ns->type == NS_ROOT) {
>>>> +		for (mi = pm; mi != NULL; mi = mi->next) {
>>>> +			if (strcmp(mi->fstype->name, "binfmt_misc") == 0)
>>>> +				break;
>>>> +		}
>>>
>>> You don't have to search for the entry, just declare the ->parse callback
>>> and set the boolean flag. Or add flag (or count) on fstype struct and count
>>> for mountpoints of desired fstype.
>>
>> What is "just declare the ->parse callback"? 
> 
> criu/mount.c, line 1672
> 
>         }, {
>                 .name = "devpts",
>                 .parse = devpts_parse,      <<<< this thing gets called when reading an entry in /proc/pid/mountinfo
>                 .code = FSTYPE__DEVPTS,
>         }, {

And global boolean variable "binfmt_misc_collected"?
 
> 
>>>> +		if (!mi) {
>>>> +			unsigned int s_dev = 0;
>>>> +			ret = try_mount_binfmt_misc(ns, &s_dev);
>>>> +			if (ret < 0)
>>>> +				return NULL;
>>>> +			if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc",
>>>> +							 BINFMT_MISC_HOME, s_dev) < 0)
>>>> +				return NULL;
>>>> +		}
>>>> +	}
>>>> +
>>>>  	ns->mnt.mntinfo_list = pm;
>>>>  	return pm;
>>>>  err:
>>>> @@ -3683,4 +3812,28 @@ int dump_mnt_namespaces(void)
>>>>  	return 0;
>>>>  }
>>>>  
>>>> +void clean_cr_time_mounts(void)
>>>> +{
>>>> +	struct mount_info *mi;
>>>> +	int mnt_fd, ret;
>>>> +
>>>> +	for (mi = mntinfo; mi; mi = mi->next) {
>>>> +		if (mi->mnt_id)
>>>> +			continue;
>>>> +		ret = switch_ns(mi->nsid->ns_pid, &mnt_ns_desc, &mnt_fd);
>>>> +		if (ret) {
>>>> +			pr_err("Can't switch to pid's %u mnt_ns\n", mi->nsid->ns_pid);
>>>> +			continue;
>>>> +		}
>>>> +
>>>> +		if (umount(mi->mountpoint) < 0)
>>>> +			pr_perror("Can't umount forced mount %s\n", mi->mountpoint);
>>>> +
>>>> +		if (restore_ns(mnt_fd, &mnt_ns_desc)) {
>>>
>>> Can we restore it once outside the loop?
>>
>> How can we restore hypothetical several forced mounts without a loop?
> 
> I mean restore_ns() :\ Switch ns-s one by one and return to original one once.

Will open_proc() called in switch_ns() work in this case?
 
>>>> +			pr_err("cleanup_forced_mounts exiting with wrong mnt_ns\n");
>>>> +			return;
>>>> +		}
>>>> +	}
>>>> +}
>>>> +
>>>>  struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt");
>>>>
>>>> _______________________________________________
>>>> CRIU mailing list
>>>> CRIU at openvz.org
>>>> https://lists.openvz.org/mailman/listinfo/criu
>>>> .
>>>>
>>>
>> .
>>
> 


More information about the CRIU mailing list