[CRIU] [PATCH 15/15] autofs: restore stage introduced

Stanislav Kinsburskiy skinsbursky at odin.com
Fri Dec 4 02:19:35 PST 2015


Please, ignore this one

03.12.2015 14:30, Stanislav Kinsburskiy пишет:
> AutoFS mount point is managed by user space daemon, which receives requests
> from kernel via pipe, passed on mount operation.
> Kernel hold write end, while user space process - read end.
> Thus, for successfull AutoFS migration, this connection has to be restored.
>
> Mount point is created in by init process. Per-mount Autofs context is
> created to fix up actual pipe connection, when all the mount
> points are restored.
>
> The problems with restoring pipe connection are:
>
> 1) It has to be done from user space process context, because pipe and pgrp
> values are taken from current.
> 2) Actual mounts are created by "init" process, and mount point owner (master)
> have to set later.
> 3) To update mount point master one requires: mountpoint fd and opened write end
> of pipe.
>
> To provide AutoFS mount point master with resources, mentioned above, the
> following was implemented:
>
> 1) Mount point is opened in "init" process. This is because in can be
> overmounted.
> 2) Artificial master file descriptor is created for mount point descriptor.
> 3) Mount point descriptor is passed to AutoFS mount point master as an
> artificial slave file descriptor. Thus master will receive mount point dile
> descriptor.
> 4) If Autofs master doesn't have write pipe end opened, it is created as a
> clone of read pipe end file descriptor and added to master's fds list.
> 5) Artificial file descriptor is created for write pipe end file descriptor to
> fixup the mount point (and close write pipe descriptor, if is was opened).
>
> Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
> ---
>   autofs.c         |  521 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   include/autofs.h |    3
>   mount.c          |    2
>   3 files changed, 525 insertions(+), 1 deletion(-)
>
> diff --git a/autofs.c b/autofs.c
> index 589e87a..6c44741 100644
> --- a/autofs.c
> +++ b/autofs.c
> @@ -1,12 +1,16 @@
>   #include <unistd.h>
>   #include <sys/stat.h>
>   #include <stdarg.h>
> +#include <sys/mount.h>
>   
>   #include "util.h"
>   #include "pstree.h"
>   #include "proc_parse.h"
>   #include "namespaces.h"
>   #include "autofs.h"
> +#include "pstree.h"
> +
> +#include "auto_dev-ioctl.h"
>   
>   static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
>   {
> @@ -320,3 +324,520 @@ fix_it:
>   				    read_fd);
>   }
>   
> +typedef struct autofs_mount_s {
> +	struct file_desc pipe_desc;
> +	struct file_desc mnt_desc;
> +	char *mnt_path;
> +	int mnt_fd;
> +	int fd;
> +	bool close_fd;
> +	int timeout;
> +} autofs_mount_t;
> +
> +static int autofs_ioctl(const char *path, int fd, int cmd, const void *param)
> +{
> +	int err;
> +
> +	err = ioctl(fd, cmd, param);
> +	if (err)
> +		pr_perror("%s ioctl failed", path);
> +
> +	return err;
> +}
> +
> +static int autofs_mnt_ioctl(const autofs_mount_t *m, int cmd, const void *param)
> +{
> +	return autofs_ioctl(m->mnt_path, m->mnt_fd, cmd, param);
> +}
> +
> +static int autofs_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
> +{
> +	char *path = "/dev/autofs";
> +	int fd, err;
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd == -1) {
> +		pr_perror("failed to open %s", path);
> +		return -1;
> +	}
> +
> +	err = autofs_ioctl(path, fd, cmd, param);
> +
> +	close(fd);
> +	return err;
> +}
> +
> +static int autofs_mnt_make_catatonic(const autofs_mount_t *m)
> +{
> +	pr_info("%s: set %s catatonic\n", __func__, m->mnt_path);
> +	return autofs_mnt_ioctl(m, AUTOFS_IOC_CATATONIC, NULL);
> +}
> +
> +static int autofs_mnt_set_timeout(const autofs_mount_t *m)
> +{
> +	time_t timeout = m->timeout;
> +
> +	pr_info("%s: set timeout %ld for %s\n", __func__, timeout, m->mnt_path);
> +	return autofs_mnt_ioctl(m, AUTOFS_IOC_SETTIMEOUT, &timeout);
> +}
> +
> +static int autofs_mnt_set_pipefd(const autofs_mount_t *m)
> +{
> +	struct autofs_dev_ioctl param;
> +
> +	/* Restore pipe and pgrp only for non-cataonic mounts */
> +	if (m->fd == -1)
> +		return 0;
> +
> +	pr_info("%s: set pipe fd %d (pgrp %d) for mount %s\n", __func__,
> +			m->fd, getpgrp(), m->mnt_path);
> +
> +	init_autofs_dev_ioctl(&param);
> +	param.ioctlfd = m->mnt_fd;
> +	param.setpipefd.pipefd = m->fd;
> +
> +	return autofs_dev_ioctl(AUTOFS_DEV_IOCTL_SETPIPEFD, &param);
> +}
> +
> +/* Here to fixup Autofs mount */
> +static int autofs_post_open(struct file_desc *d, int fd)
> +{
> +	autofs_mount_t *m = container_of(d, autofs_mount_t, pipe_desc);
> +
> +	pr_info("%s: restoring %s\n", __func__, m->mnt_path);
> +
> +	if (autofs_mnt_set_timeout(m)) {
> +		pr_err("Failed to set timeout %d for %s\n",
> +				m->timeout, m->mnt_path);
> +		return -1;
> +	}
> +	if (autofs_mnt_make_catatonic(m)) {
> +		pr_err("Failed to set %s catatonic\n", m->mnt_path);
> +		return -1;
> +	}
> +
> +	if (autofs_mnt_set_pipefd(m)) {
> +		pr_err("Failed to set %s owner\n", m->mnt_path);
> +		return -1;
> +	}
> +
> +	pr_info("autofs mount %s owner restored: pgrp=%d, fd=%d\n",
> +			m->mnt_path, getpgrp(), m->fd);
> +
> +	if (m->close_fd) {
> +		pr_info("%s: pid %d, closing write end %d\n", __func__,
> +				getpid(), m->fd);
> +		close(m->fd);
> +	}
> +
> +	pr_info("%s: pid %d, closing artificial pipe end %d\n", __func__,
> +					getpid(), fd);
> +	close(fd);
> +	return 0;
> +}
> +
> +static struct file_desc_ops autofs_pipe_desc_ops = {
> +	.type = FD_TYPES__ARTIFICIAL,
> +	.post_open = autofs_post_open,
> +};
> +
> +static struct fdinfo_list_entry *create_new_fdinfo(struct file_desc *desc,
> +						   int pid, int fd,
> +						   int type, int mode)
> +{
> +	struct fdinfo_list_entry *le;
> +	FdinfoEntry *fe;
> +
> +	fe = shmalloc(sizeof(*fe));
> +	if (!fe)
> +		return NULL;
> +
> +	fe->fd = fd;
> +	fe->type = type;
> +	fe->flags = mode;
> +
> +	le = shmalloc(sizeof(*le) + sizeof(u32));
> +	if (!le)
> +		return NULL;
> +
> +	le = (struct fdinfo_list_entry *)ALIGN((unsigned long)le, sizeof(u32));
> +
> +	futex_init(&le->real_pid);
> +	le->pid = pid;
> +	le->fe = fe;
> +
> +	le->desc = desc;
> +	list_add_tail(&le->desc_list, &desc->fd_info_head);
> +	return le;
> +}
> +
> +static int autofs_mount_post_open(struct file_desc *d, int fd)
> +{
> +	pr_info("%s: pid %d, closing %d\n", __func__,
> +			getpid(), fd);
> +	return close(fd);
> +}
> +
> +static int autofs_mount_open(struct file_desc *d)
> +{
> +	struct fdinfo_list_entry *fle;
> +
> +	fle = list_first_entry(&d->fd_info_head,
> +			struct fdinfo_list_entry, desc_list);
> +
> +	pr_info("%s: pid %d, mnt_fd: %d\n", __func__, getpid(), fle->fe->fd);
> +	return fle->fe->fd;
> +}
> +
> +static struct file_desc_ops autofs_mount_desc_ops = {
> +	.type = FD_TYPES__ARTIFICIAL,
> +	.open = autofs_mount_open,
> +	.post_open = autofs_mount_post_open,
> +};
> +
> +static autofs_mount_t *autofs_create_mount_info(const char *mnt_path,
> +						int timeout)
> +{
> +	autofs_mount_t *m;
> +
> +	m = shmalloc(sizeof(*m));
> +	if (!m)
> +		return NULL;
> +
> +	m->mnt_path = shmalloc(strlen(mnt_path) + 1);
> +	if (!m->mnt_path)
> +		return NULL;
> +	strcpy(m->mnt_path, mnt_path);
> +
> +	file_desc_init(&m->pipe_desc, 0, &autofs_pipe_desc_ops);
> +	file_desc_init(&m->mnt_desc, 0, &autofs_mount_desc_ops);
> +
> +	m->timeout = timeout;
> +
> +	return m;
> +}
> +
> +static struct fdinfo_list_entry *create_artificial_fle(struct rst_info *rsti,
> +			struct file_desc *desc, int pid, int fd, int mode)
> +{
> +	struct fdinfo_list_entry *fle;
> +
> +	fle = create_new_fdinfo(desc, pid, fd, FD_TYPES__ARTIFICIAL, mode);
> +	if (!fle)
> +		return NULL;
> +
> +	collect_used_fd(fle, rsti);
> +	return fle;
> +}
> +
> +static int autofs_add_mount_fle(struct pstree_item *task,
> +				autofs_mount_t *m, int mnt_fd)
> +{
> +	struct fdinfo_list_entry *fle;
> +	struct rst_info *rst_info;
> +
> +	rst_info = rsti(task);
> +
> +	fle = create_artificial_fle(rst_info, &m->mnt_desc,
> +				    task->pid.virt, mnt_fd,
> +				    O_RDONLY | O_DIRECTORY);
> +	if (!fle) {
> +		pr_err("Failed to create mnt fle for %s in process %d\n",
> +				m->mnt_path, task->pid.virt);
> +		return -1;
> +	}
> +	list_add_tail(&fle->ps_list, &rst_info->artificial);
> +
> +	pr_info("autofs: added mount fle: pid %d, fd %d, path: %s\n",
> +			fle->pid, fle->fe->fd, m->mnt_path);
> +	return 0;
> +}
> +
> +static int autofs_create_mount_fle(struct pstree_item *task, autofs_mount_t *m)
> +{
> +	int mnt_fd;
> +
> +	/* we have to pass mnt_fd to master process */
> +	mnt_fd = find_unused_fd(&rsti(task)->used, -1);
> +	if (mnt_fd < 0) {
> +		pr_err("Failed to find free fd in process %d\n",
> +				task->pid.virt);
> +		return -1;
> +	}
> +
> +	/* We need to add aftificial file, as a handler for mnt_fd close */
> +	if (autofs_add_mount_fle(task, m, mnt_fd) < 0) {
> +		pr_err("Failed to create mount point fd %s\n", m->mnt_path);
> +		return -1;
> +	}
> +
> +	return mnt_fd;
> +}
> +
> +static int autofs_open_mount(struct pstree_item *task, autofs_mount_t *m)
> +{
> +	int mnt_fd, fd;
> +
> +	/* Mount point is opened here to handle overmount cases (like systemd
> +	 * does) */
> +	fd = open(m->mnt_path, O_RDONLY | O_DIRECTORY);
> +	if (fd < 0) {
> +		pr_perror("Failed to open %s", m->mnt_path);
> +		return -1;
> +	}
> +
> +	mnt_fd = find_unused_fd(&rsti(task)->used, -1);
> +
> +	if (dup2(fd, mnt_fd) < 0) {
> +		pr_perror("Failed to dup2 %d to %d\n", fd, mnt_fd);
> +		return -1;
> +	}
> +	close(fd);
> +
> +	if (autofs_add_mount_fle(task, m, mnt_fd) < 0) {
> +		pr_err("Failed to create mount point fd %s\n", m->mnt_path);
> +		return -1;
> +	}
> +
> +	return mnt_fd;
> +}
> +
> +static int autofs_create_pipe_fle(struct pstree_item *task, autofs_mount_t *m)
> +{
> +	struct fdinfo_list_entry *fle;
> +	struct rst_info *rst_info;
> +	int pipe_fd;
> +
> +	rst_info = rsti(task);
> +	pipe_fd = find_unused_fd(&rst_info->used, -1);
> +
> +	fle = create_artificial_fle(rst_info, &m->pipe_desc,
> +				    task->pid.virt, pipe_fd, O_WRONLY);
> +	if (!fle) {
> +		pr_err("Failed to create pipe fle for fd %d in process %d\n",
> +				pipe_fd, task->pid.virt);
> +		return -1;
> +	}
> +	list_add(&fle->ps_list, &rst_info->artificial);
> +
> +	pr_info("autofs: added pipe fle: pid %d, fd %d\n",
> +			fle->pid, fle->fe->fd);
> +	return 0;
> +}
> +
> +static struct fdinfo_list_entry *find_fle_by_fd(struct list_head *head, int fd)
> +{
> +	struct fdinfo_list_entry *fle;
> +
> +	list_for_each_entry(fle, head, ps_list) {
> +		if (fle->fe->fd == fd)
> +			return fle;
> +	}
> +	return NULL;
> +}
> +
> +static int create_pipe_fle(struct rst_info *rsti, struct file_desc *desc,
> +				 int pid, int fd, int mode)
> +{
> +	struct fdinfo_list_entry *fle;
> +
> +	fle = create_new_fdinfo(desc, pid, fd, FD_TYPES__PIPE, mode);
> +	if (!fle)
> +		return -1;
> +
> +	list_add_tail(&fle->ps_list, &rsti->fds);
> +	collect_used_fd(fle, rsti);
> +	return 0;
> +}
> +
> +static int autofs_open_write_pipe(struct pstree_item *task,
> +			      int read_fd, int write_fd)
> +{
> +	struct fdinfo_list_entry *read_fle;
> +	struct rst_info *rst_info;
> +	int err;
> +
> +	rst_info = rsti(task);
> +	write_fd = find_unused_fd(&rst_info->used, write_fd);
> +
> +	read_fle = find_fle_by_fd(&rst_info->fds, read_fd);
> +	if (!read_fle) {
> +		pr_err("Failed to find fd %d in process %d\n", read_fd,
> +				task->pid.virt);
> +		return -1;
> +	}
> +	if (read_fle->fe->type != FD_TYPES__PIPE) {
> +		pr_err("Fd %d in process %d is not a pipe: %d\n", read_fd,
> +				task->pid.virt, read_fle->fe->type);
> +		return -1;
> +	}
> +
> +	err = create_pipe_fle(rst_info, read_fle->desc, read_fle->pid,
> +				    write_fd, O_WRONLY);
> +	if (err < 0) {
> +		pr_err("Failed to create pipe_fle for %d in process %d\n",
> +				write_fd, read_fle->pid);
> +		return -1;
> +	}
> +
> +	pr_info("autofs: added write pipe end fle: pid %d, fd %d\n",
> +			read_fle->pid, write_fd);
> +	return write_fd;
> +}
> +
> +static int autofs_add_mount(const char *mnt_path, int pgrp, int write_fd,
> +			    int read_fd, int timeout)
> +{
> +	int mnt_fd;
> +	bool close_write_fd = false;
> +	struct pstree_item *current, *master;
> +	autofs_mount_t *m;
> +
> +	m = autofs_create_mount_info(mnt_path, timeout);
> +	if (!m) {
> +		pr_err("Failed to allocate mount info\n");
> +		return -1;
> +	}
> +
> +	current = pstree_item_by_virt(getpid());
> +
> +	BUG_ON(current == NULL);
> +
> +	/* We need to open mount point fd, because it can be overmounted. */
> +	mnt_fd = autofs_open_mount(current, m);
> +	if (mnt_fd < 0) {
> +		pr_err("Failed to open mount point %s on fd %d\n", mnt_path,
> +				mnt_fd);
> +		return -1;
> +	}
> +
> +	if (write_fd == -1)
> +		/* Catatonic mounts are always init's */
> +		master = current;
> +	else
> +		master = pstree_item_by_virt(pgrp);
> +
> +	BUG_ON(master == NULL);
> +
> +	if (read_fd != INT_MIN) {
> +		/* Original pipe write was closed.
> +		 * We need create one to be able to fixup AutoFS mount. */
> +
> +		write_fd = autofs_open_write_pipe(master, read_fd, write_fd);
> +		if (write_fd < 0) {
> +			pr_err("Failed to find free fd in process %d\n",
> +					master->pid.virt);
> +			return -1;
> +		}
> +
> +		close_write_fd = true;
> +	}
> +
> +	/* Now we need to add artifial file, which will be used as a handler to
> +	 * AutoFS mount fixup */
> +	if (autofs_create_pipe_fle(master, m) < 0) {
> +		pr_err("Failed to create mount point fd %s\n", mnt_path);
> +		return -1;
> +	}
> +
> +	if (master != current) {
> +		/* We need to create artificial file, as a handler for mnt_fd close */
> +		mnt_fd = autofs_create_mount_fle(master, m);
> +		if (mnt_fd < 0) {
> +			pr_err("Failed to create mount point fd %s\n", mnt_path);
> +			return -1;
> +		}
> +	}
> +
> +	m->mnt_fd = mnt_fd;
> +	m->fd = write_fd;
> +	m->close_fd = close_write_fd;
> +	return 0;
> +}
> +
> +int autofs_mount(const char *source, const char *target,
> +		 const char *filesystemtype, unsigned long mountflags,
> +		 const void *data)
> +{
> +	char *mount_ops, **options;
> +	int i, control_pipe[2] = { -1, -1 }, nr_opts;
> +	int err = -1, timeout = INT_MIN, pgrp = INT_MIN;
> +	int fd = INT_MIN, read_fd = INT_MIN;
> +
> +	mount_ops = xstrdup(data);
> +	if (!mount_ops)
> +		return -1;
> +
> +	pr_info("autofs ===================================\n");
> +	pr_info("autofs original mount options: \"%s\"\n", mount_ops);
> +
> +	split(mount_ops, ',', &options, &nr_opts);
> +	if (!options)
> +		return -1;
> +
> +	xfree(mount_ops);
> +
> +	if (pipe(control_pipe) < 0) {
> +		pr_perror("Can't create pipe");
> +		return -1;
> +	}
> +
> +	mount_ops = construct_string("fd=%d,pgrp=%d", control_pipe[1],
> +						      getpgrp());
> +	if (!mount_ops)
> +		goto close_pipe;
> +
> +	for (i = 0; i < nr_opts; i++) {
> +		char *opt = options[i];
> +
> +		if (!strncmp(opt, "timeout=", strlen("timeout="))) {
> +			timeout = atoi(opt + strlen("timeout="));
> +			continue;
> +		} else if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
> +			pgrp = atoi(opt + strlen("pgrp="));
> +			continue;
> +		} else if (!strncmp(opt, "fd=", strlen("fd="))) {
> +			fd = atoi(opt + strlen("fd="));
> +			continue;
> +		} else if (!strncmp(opt, "read_fd=", strlen("readfd="))) {
> +			read_fd = atoi(opt + strlen("read_fd="));
> +			continue;
> +		} else if (!strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
> +			continue;
> +
> +		mount_ops = add_to_string(mount_ops, ",%s", opt);
> +		if (!mount_ops)
> +			goto close_pipe;
> +	}
> +
> +	if (fd == INT_MIN) {
> +		pr_err("Mount options doesn't have \"fd=\"\n");
> +		goto close_pipe;
> +	}
> +	if (pgrp == INT_MIN) {
> +		pr_err("Mount options doesn't have \"pgrp=\"\n");
> +		goto close_pipe;
> +	}
> +	if (timeout == INT_MIN) {
> +		pr_err("Mount options doesn't have \"timeout=\"\n");
> +		goto close_pipe;
> +	}
> +
> +	pr_info("autofs fixed mount options: \"%s\"\n", mount_ops);
> +
> +	if (mount(source, target, filesystemtype, mountflags, mount_ops) < 0) {
> +		pr_perror("Failed to mount AutoFS to %s", target);
> +		goto close_pipe;
> +	}
> +
> +	err = autofs_add_mount(target, pgrp, fd, read_fd, timeout);
> +	if (err < 0) {
> +		pr_err("Failed to collect AutoFS mount data\n");
> +		umount(target);
> +	}
> +
> +close_pipe:
> +	close(control_pipe[1]);
> +	close(control_pipe[0]);
> +	return err;
> +}
> diff --git a/include/autofs.h b/include/autofs.h
> index 3b9e46c..27808ad 100644
> --- a/include/autofs.h
> +++ b/include/autofs.h
> @@ -7,5 +7,8 @@
>   
>   struct mount_info;
>   int autofs_dump(struct mount_info *pm);
> +int autofs_mount(const char *source, const char *target,
> +		 const char *filesystemtype, unsigned long mountflags,
> +		 const void *data);
>   
>   #endif
> diff --git a/mount.c b/mount.c
> index a1dccfe..2f9ecb1 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -1460,7 +1460,7 @@ static struct fstype fstypes[32] = {
>   		.name = "autofs",
>   		.code = FSTYPE__AUTOFS,
>   		.dump = autofs_dump,
> -		.restore = always_fail,
> +		.mount = autofs_mount,
>   	},
>   };
>   
>



More information about the CRIU mailing list