[CRIU] [RFC PATCH 4/5] autofs: restore procedure introduced

Stanislav Kinsburskiy skinsbursky at odin.com
Mon Nov 23 13:04:03 PST 2015



23.11.2015 20:54, Pavel Emelyanov пишет:
> On 11/23/2015 08:22 PM, Stanislav Kinsburskiy wrote:
>> To restore AutoFS we will need a bunch of tricks:
>>
>> 1) Autofs have to be mounted by a specialized routine, because we need to
>> create a context (see below), remove "timeout=" from mount options and set
>> proper "pgrp=" and "fd=" options. Also we need to set mount point as catatonic,
>> if it was so, and adjust timeout. Which is done via mount point ioctls.
>> To isolate all these from generic code, "mount" callback in fstype operations
>> was introduced.
>>
>> 2) When all mounts restored, we need replace AutoFS kernel pipe end with the
>> right one (belonging to the autmount/systemd/etc process). This is performed
>> via "post_open" callback for pipes. If process belongs to one of automount
>> contexts, and pipe fd is the right one, then AutoFS mount is set ctatonic and
>> it's pgrp and fd are changes to right ones via /dev/autofs ioctl.
>>
>> Note: This patch contains header file auto_dev-ioctl.h, which is used to
>> easy ioctl calls.
> Is this header exported to the user-space somehow?

I don't think it's a part of some package. Though I haven't tried to find.
Autofs utils sources carry this file with them. It's also available in 
kernel includes.

>> Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
>> ---
>>   auto_dev-ioctl.h     |  229 +++++++++++++++++++++++++++++++++++++++
>>   include/proc_parse.h |    3 +
>>   mount.c              |  291 ++++++++++++++++++++++++++++++++++++++++++++++++--
>>   pipes.c              |   18 +++
>>   4 files changed, 527 insertions(+), 14 deletions(-)
>>   create mode 100644 auto_dev-ioctl.h
>>
>> diff --git a/auto_dev-ioctl.h b/auto_dev-ioctl.h
>> new file mode 100644
>> index 0000000..850f39b
>> --- /dev/null
>> +++ b/auto_dev-ioctl.h
>> diff --git a/include/proc_parse.h b/include/proc_parse.h
> ...
>> index 123f5db..80fda92 100644
>> --- a/include/proc_parse.h
>> +++ b/include/proc_parse.h
>> @@ -106,6 +106,9 @@ struct fstype {
>>   	int (*dump)(struct mount_info *pm);
>>   	int (*restore)(struct mount_info *pm);
>>   	int (*parse)(struct mount_info *pm);
>> +	int (*mount)(const char *source, const char *target,
>> +		     const char *filesystemtype, unsigned long mountflags,
>> +		     const void *data);
> Introducing .mount callback should go with separate patch.

Ok.

>
>>   };
>>   
>>   struct vm_area_list;
>> diff --git a/mount.c b/mount.c
>> index c07d2d1..7a391ca 100644
>> --- a/mount.c
>> +++ b/mount.c
>> @@ -30,12 +30,18 @@
>>   
>>   #include "protobuf/mnt.pb-c.h"
>>   
>> +#include "auto_dev-ioctl.h"
>> +
>>   #define AUTODETECTED_MOUNT "CRIU:AUTOGENERATED"
>>   #define MS_PROPAGATE (MS_SHARED | MS_PRIVATE | MS_UNBINDABLE | MS_SLAVE)
>>   
>>   #undef	LOG_PREFIX
>>   #define LOG_PREFIX "mnt: "
>>   
>> +typedef int (*mount_fn_t)(const char *source, const char *target,
>> +		const char *filesystemtype, unsigned long mountflags,
>> +		const void *data);
>> +
>>   int ext_mount_add(char *key, char *val)
>>   {
>>   	struct ext_mount *em;
>> @@ -1130,26 +1136,30 @@ out:
>>   	return -1;
>>   }
>>   
>> -static int attach_option(struct mount_info *pm, char *opt)
>> +static char *attach_token(char *string, const char *token, char separator)
>>   {
>>   	char *buf;
>>   	int len, olen;
>>   
>> -	len = strlen(pm->options);
>> -	olen = strlen(opt);
>> -	buf = xrealloc(pm->options, len + olen + 2);
>> +	len = strlen(string);
>> +	olen = strlen(token);
>> +	buf = xrealloc(string, len + olen + 2);
>>   	if (buf == NULL)
>> -		return -1;
>> +		return NULL;
>>   
>> -	if (len && buf[len - 1] != ',') {
>> -		buf[len] = ',';
>> +	if (len && buf[len - 1] != separator) {
>> +		buf[len] = separator;
>>   		len++;
>>   	}
>>   
>> -	memcpy(buf + len, opt, olen + 1);
>> -	pm->options = buf;
>> +	memcpy(buf + len, token, olen + 1);
>> +	return buf;
>> +}
> The attach_token() should come with separate patch.

Ditto.

>
>> -	return 0;
>> +static int attach_option(struct mount_info *pm, char *opt)
>> +{
>> +	pm->options  = attach_token(pm->options, opt, ',');
>> +	return (pm->options) ? 0 : -1;
>>   }
>>   
>>   /* Is it mounted w or w/o the newinstance option */
>> @@ -1405,8 +1415,6 @@ static int autofs_fixup_options(struct mount_info *new)
>>   	for (i = 0; i < nr_opts; i++) {
>>   		char *opt = options[i];
>>   
>> -		if (!strncmp(opt, "timeout=", strlen("timeout=")))
>> -			continue;
>>   		if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
>>   			int real_pid, virt_pid = -1;
>>   			struct pstree_item *item;
>> @@ -1460,6 +1468,7 @@ static int autofs_dump(struct mount_info *pm)
>>   	int pipe_fd, pipe_size, steal_pipe[2];
>>   	ssize_t bytes;
>>   	int err;
>> +	struct mount_info *t;
>>   
>>   	/* TODO: we are called wthin proper pid namespace. We can't use blobal
>>   	 * proc and have to mount proc somewhere */
>> @@ -1512,6 +1521,13 @@ static int autofs_dump(struct mount_info *pm)
>>   	if (err)
>>   		goto close_steal;
>>   
>> +	list_for_each_entry(t, &pm->mnt_bind, mnt_bind) {
>> +		xfree(t->options);
>> +		t->options = xstrdup(pm->options);
>> +		if (!t->options)
>> +			goto close_steal;
>> +	}
>> +
> Runaway from patch #2?

Yes, it is. Will fix,

>>   	err = 0;
>>   
>>   close_steal:
>> @@ -1522,6 +1538,252 @@ close_pipe:
>>   	return err;
>>   }
>>   
>> +struct autofs_info_s {
>> +	int pipe_fd;
>> +	const char *mnt_path;
>> +	struct autofs_info_s *next;
>> +};
>> +
>> +struct autofs_service_s {
>> +	int pgrp;
>> +	struct autofs_info_s *mnt_info;
>> +	struct autofs_service_s *next;
>> +} *automount;
>> +
>> +int autofs_fixup_owner(struct autofs_info_s *info)
>> +{
>> +	int autofs_fd, mnt_fd, err = -1;
>> +	struct autofs_dev_ioctl param;
>> +
>> +	autofs_fd = open("/dev/autofs", O_RDONLY);
>> +	if (autofs_fd == -1) {
>> +		pr_perror("failed to open /dev/autofs");
>> +		return -1;
>> +	}
>> +
>> +	mnt_fd = open(info->mnt_path, O_RDONLY | O_DIRECTORY);
>> +	if (mnt_fd == -1) {
>> +		pr_perror("failed to open %s", info->mnt_path);
>> +		goto close_fd;
>> +	}
>> +
>> +	init_autofs_dev_ioctl(&param);
>> +	err = ioctl(mnt_fd, AUTOFS_IOC_CATATONIC, 0);
>> +	if (err == -1) {
>> +		pr_perror("failed to set mount point catatonic");
>> +		goto close_mnt_fd;
>> +
>> +	}
>> +
>> +	init_autofs_dev_ioctl(&param);
>> +	param.ioctlfd = mnt_fd;
>> +	param.setpipefd.pipefd = info->pipe_fd;
> The info->pipe_fd is set by autofs_mount() and the fd value is purely
> parsed from options. Who and when creates this very fd?

The pipe itself belongs to automount process and restored as any other fd.
Yes, we are probably lucky, that automount doesn't close write end of 
the pipe, which is sent to the kernel.
And I don't know yet, what to do, if user space process doesn't have 
this descriptor with pipe write end installed.
I think, it makes sense to forbid migration of such a pipe for a while. 
IOW, check on dump, that desired pipe end is available in the proper 
process on the right place, or something like this.
If migration of semi-pipe with only one end is supported, then, 
probably, this case to be handled somehow on top of this series.

>
>> +
>> +	err = ioctl(autofs_fd, AUTOFS_DEV_IOCTL_SETPIPEFD, &param);
>> +	if (err == -1) {
>> +		pr_perror("failed to send ioctl to /dev/autofs");
>> +		goto close_mnt_fd;
>> +
>> +	}
>> +
>> +	pr_info("autofs mount \"%s\" owner restored: pgrp=%d, fd=%d\n",
>> +			info->mnt_path, getpid(), mnt_fd);
>> +
>> +	err = 0;
>> +
>> +close_fd:
>> +	close(autofs_fd);
>> +close_mnt_fd:
>> +	close(mnt_fd);
>> +	return err;
>> +}
>> +
> ...
>> +
>> +static int autofs_mount(const char *source, const char *target,
>> +		const char *filesystemtype, unsigned long mountflags,
>> +		const void *data)
>> +{
>> +	char *mount_ops, **options;
>> +	int nr_opts, i, err = -1;
>> +	int pgrp = 0, fd = INT_MIN, timeout = 0;
>> +	int automount_pipe[2];
>> +
>> +	mount_ops = xstrdup(data);
>> +	if (!mount_ops)
>> +		return -1;
>> +
>> +	split(mount_ops, ',', &options, &nr_opts);
>> +	if (!options)
>> +		return -1;
>> +
>> +	memset(mount_ops, 0, strlen(mount_ops));
>> +
>> +	if (pipe(automount_pipe) < 0) {
>> +		pr_perror("Can't create pipe");
>> +		return -1;
>> +	}
>> +
>> +	sprintf(mount_ops, "fd=%d,pgrp=%d", automount_pipe[1], getpgrp());
>> +
>> +	for (i = 0; i < nr_opts; i++) {
>> +		char *opt = options[i];
>> +
>> +		if (!strncmp(opt, "timeout=", strlen("timeout="))) {
>> +			timeout = atoi(opt + strlen("timeout="));
>> +			continue;
>> +		}
>> +
>> +		if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
>> +			pgrp = atoi(opt + strlen("pgrp="));
>> +			continue;
>> +		}
>> +
>> +		if (!strncmp(opt, "fd=", strlen("fd="))) {
>> +			fd = atoi(opt + strlen("fd="));
>> +			continue;
>> +		}
>> +
>> +		strcat(mount_ops, ",");
>> +		strcat(mount_ops, opt);
>> +	}
>> +
>> +	if (fd == INT_MIN) {
>> +		pr_err("failed to find \"fd=\" option\n");
>> +		goto close_pipe;
>> +	}
>> +
>> +	if (pgrp == 0) {
>> +		pr_err("failed to find \"pgrp=\" option\n");
>> +		goto close_pipe;
>> +	}
>> +
>> +	pr_info("autofs mount: target: %s, pgrp: %d, fd: %d\n", target, pgrp, fd);
>> +	pr_info("autofs mount: adjusted options: \"%s\"\n", mount_ops);
>> +
>> +	if (mount(source, target, filesystemtype, mountflags, mount_ops) < 0) {
>> +		pr_perror("Can't mount at %s", target);
>> +		goto close_pipe;
>> +	}
>> +
>> +	if (autofs_set_timeout(target, timeout))
>> +		goto close_pipe;
>> +
>> +	if (fd == -1)
>> +		err = autofs_make_catatonic(target);
>> +	else
>> +		err = autofs_add_info(pgrp, fd, target);
> You will turn this into catatonic mode in autofs_fixup_owner() anyway,
> why not put here it into this mode always and fixup (if required) in
> autofs_fixup_owner()?

We can't put it to catatonic mode here, because we won't be able to 
create dentries for nested mounts. We could do it only after all mounts 
are created ,but then we will need one more callback or something like this.

>
>> +
>> +close_pipe:
>> +	close(automount_pipe[1]);
>> +	close(automount_pipe[0]);
>> +	return err;
>> +}
>> +
>>   static struct fstype fstypes[32] = {
>>   	{
>>   		.name = "unsupported",
>> @@ -1594,7 +1856,7 @@ static struct fstype fstypes[32] = {
>>   		.name = "autofs",
>>   		.code = FSTYPE__AUTOFS,
>>   		.dump = autofs_dump,
>> -		.restore = always_fail,
>> +		.mount = autofs_mount,
>>   	},
>>   };
>>   
>> @@ -2096,6 +2358,7 @@ static int do_new_mount(struct mount_info *mi)
>>   	char *src;
>>   	struct fstype *tp = mi->fstype;
>>   	bool remount_ro = (tp->restore && mi->sb_flags & MS_RDONLY);
>> +	mount_fn_t do_mount = (tp->mount) ? tp->mount: mount;
>>   
>>   	src = resolve_source(mi);
>>   	if (!src)
>> @@ -2110,7 +2373,7 @@ static int do_new_mount(struct mount_info *mi)
>>   	if (remount_ro)
>>   		sflags &= ~MS_RDONLY;
>>   
>> -	if (mount(src, mi->mountpoint, tp->name, sflags, mi->options) < 0) {
>> +	if (do_mount(src, mi->mountpoint, tp->name, sflags, mi->options) < 0) {
>>   		pr_perror("Can't mount at %s", mi->mountpoint);
>>   		return -1;
>>   	}
>> diff --git a/pipes.c b/pipes.c
>> index d4f70db..bda1c1d 100644
>> --- a/pipes.c
>> +++ b/pipes.c
>> @@ -380,11 +380,29 @@ static int want_transport(FdinfoEntry *fe, struct file_desc *d)
>>   	return !pi->create;
>>   }
>>   
>> +struct autofs_info_s;
>> +struct autofs_info_s *find_automount_info(int pgrp, int pipe_fd);
>> +int autofs_fixup_owner(struct autofs_info_s *info);
>> +
>> +static int pipe_post_open(struct file_desc *d, int fd)
>> +{
>> +	struct autofs_info_s *info;
>> +
>> +	pr_info("%s: searching info by pid %d and fd %d\n", __func__, getpid(), fd);
>> +	info = find_automount_info(getpid(), fd);
> The find_automount_info() is used only here. Why not merge this call into
> autofs_fixup_owner()?

Ok, will merge.

>
>> +	if (!info)
>> +		return 0;
>> +
>> +	pr_info("%s: found info by pid %d and fd %d\n", __func__, getpid(), fd);
>> +	return autofs_fixup_owner(info);
>> +}
>> +
>>   static struct file_desc_ops pipe_desc_ops = {
>>   	.type		= FD_TYPES__PIPE,
>>   	.open		= open_pipe,
>>   	.want_transport	= want_transport,
>>   	.name		= pipe_d_name,
>> +	.post_open	= pipe_post_open,
> This is way too slow to lookup autofs info for every single pipe in a system.
> I don't have any exact solution ATM, but doing it vice-versa, i.e. -- finding
> a pipe when collecting/creating autofs mountpoint and marking it with a bit,
> saying this should be done, is something I'd think about.

It this information is already loaded, and I can locate then the pipe 
via pid and ff, then yes, I can do so.

>
>>   };
>>   
>>   static int collect_one_pipe(void *o, ProtobufCMessage *base)
>>
>> _______________________________________________
>> CRIU mailing list
>> CRIU at openvz.org
>> https://lists.openvz.org/mailman/listinfo/criu
>> .
>>



More information about the CRIU mailing list