[CRIU] [PATCH v2] autofs: restore stage introduced

Stanislav Kinsburskiy skinsbursky at virtuozzo.com
Fri Dec 4 02:18:38 PST 2015


AutoFS mount point is managed by user space daemon, which receives requests
from kernel via pipe, passed on mount operation.
Kernel hold write end, while user space process - read end.
Thus, for successfull AutoFS migration, this connection has to be restored.

Mount point is created in by init process. Per-mount Autofs context is
created to fix up actual pipe connection, when all the mount
points are restored.

The problems with restoring pipe connection are:

1) It has to be done from user space process context, because pipe and pgrp
values are taken from current.
2) Actual mounts are created by "init" process, and mount point owner (master)
have to set later.
3) To update mount point master one requires: mountpoint fd and opened write end
of pipe.

To provide AutoFS mount point master with resources, mentioned above, the
following was implemented:

1) Mount point is opened in "init" process. This is because in can be
overmounted.
2) Artificial master file descriptor is created for mount point descriptor.
3) Mount point descriptor is passed to AutoFS mount point master as an
artificial slave file descriptor. Thus master will receive mount point dile
descriptor.
4) If Autofs master doesn't have write pipe end opened, it is created as a
clone of read pipe end file descriptor and added to master's fds list.
5) Artificial file descriptor is created for write pipe end file descriptor to
fixup the mount point (and close write pipe descriptor, if is was opened).

v2: Autofs mount point can be reopened via ioctl. Thus, mount point aritficial
file descriptor is not required. Related code was removed.

Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
---
 autofs.c         |  464 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/autofs.h |    3 
 mount.c          |    2 
 3 files changed, 468 insertions(+), 1 deletion(-)

diff --git a/autofs.c b/autofs.c
index 589e87a..debdda0 100644
--- a/autofs.c
+++ b/autofs.c
@@ -1,12 +1,16 @@
 #include <unistd.h>
 #include <sys/stat.h>
 #include <stdarg.h>
+#include <sys/mount.h>
 
 #include "util.h"
 #include "pstree.h"
 #include "proc_parse.h"
 #include "namespaces.h"
 #include "autofs.h"
+#include "pstree.h"
+
+#include "auto_dev-ioctl.h"
 
 static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
 {
@@ -320,3 +324,463 @@ fix_it:
 				    read_fd);
 }
 
+typedef struct autofs_mount_s {
+	struct file_desc pipe_desc;
+	char *mnt_path;
+	int fd;
+	bool close_fd;
+	int timeout;
+	dev_t mnt_dev;
+} autofs_mount_t;
+
+static int autofs_ioctl(const char *path, int fd, int cmd, const void *param)
+{
+	int err;
+
+	err = ioctl(fd, cmd, param);
+	if (err)
+		pr_perror("%s ioctl failed", path);
+
+	return err;
+}
+
+static int autofs_mnt_ioctl(const autofs_mount_t *m, int mnt_fd,
+			    int cmd, const void *param)
+{
+	return autofs_ioctl(m->mnt_path, mnt_fd, cmd, param);
+}
+
+static int autofs_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
+{
+	char *path = "/dev/autofs";
+	int fd, err;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
+		pr_perror("failed to open %s", path);
+		return -1;
+	}
+
+	err = autofs_ioctl(path, fd, cmd, param);
+
+	close(fd);
+	return err;
+}
+
+static int autofs_mnt_make_catatonic(const autofs_mount_t *m, int mnt_fd)
+{
+	pr_info("%s: set %s catatonic\n", __func__, m->mnt_path);
+	return autofs_mnt_ioctl(m, mnt_fd, AUTOFS_IOC_CATATONIC, NULL);
+}
+
+static int autofs_mnt_set_timeout(const autofs_mount_t *m, int mnt_fd)
+{
+	time_t timeout = m->timeout;
+
+	pr_info("%s: set timeout %ld for %s\n", __func__, timeout, m->mnt_path);
+	return autofs_mnt_ioctl(m, mnt_fd, AUTOFS_IOC_SETTIMEOUT, &timeout);
+}
+
+static int autofs_mnt_set_pipefd(const autofs_mount_t *m, int mnt_fd)
+{
+	struct autofs_dev_ioctl param;
+
+	/* Restore pipe and pgrp only for non-cataonic mounts */
+	if (m->fd == -1)
+		return 0;
+
+	pr_info("%s: set pipe fd %d (pgrp %d) for mount %s\n", __func__,
+			m->fd, getpgrp(), m->mnt_path);
+
+	init_autofs_dev_ioctl(&param);
+	param.ioctlfd = mnt_fd;
+	param.setpipefd.pipefd = m->fd;
+
+	return autofs_dev_ioctl(AUTOFS_DEV_IOCTL_SETPIPEFD, &param);
+}
+
+static int autofs_mnt_close(const autofs_mount_t *m, int mnt_fd)
+{
+	struct autofs_dev_ioctl param;
+
+	pr_info("%s: closing fd %d for mount %s\n", __func__, mnt_fd,
+			m->mnt_path);
+
+	init_autofs_dev_ioctl(&param);
+	param.ioctlfd = mnt_fd;
+
+	return autofs_dev_ioctl(AUTOFS_DEV_IOCTL_CLOSEMOUNT, &param);
+}
+
+static int autofs_mnt_open(const autofs_mount_t *m)
+{
+	struct autofs_dev_ioctl *param;
+	size_t size, err, fd;
+
+	pr_info("%s: open mount %s\n", __func__, m->mnt_path);
+
+	size = sizeof(*param) + strlen(m->mnt_path) + 1;
+	param = xmalloc(size);
+	if (!param)
+		return -1;
+
+	init_autofs_dev_ioctl(param);
+	param->size = size;
+	strcpy(param->path, m->mnt_path);
+	param->openmount.devid = m->mnt_dev;
+
+	err = autofs_dev_ioctl(AUTOFS_DEV_IOCTL_OPENMOUNT, param);
+	fd = param->ioctlfd;
+	free(param);
+	if (err < 0) {
+		pr_err("Failed to get %s fd\n", m->mnt_path);
+		return -1;
+	}
+	return fd;
+}
+
+/* Here to fixup Autofs mount */
+static int autofs_post_open(struct file_desc *d, int fd)
+{
+	autofs_mount_t *m = container_of(d, autofs_mount_t, pipe_desc);
+	int mnt_fd;
+
+	pr_info("%s: restoring %s\n", __func__, m->mnt_path);
+
+	mnt_fd = autofs_mnt_open(m);
+	if (mnt_fd < 0) {
+		pr_err("Failed to open %s\n", m->mnt_path);
+		return -1;
+	}
+
+	if (autofs_mnt_set_timeout(m, mnt_fd)) {
+		pr_err("Failed to set timeout %d for %s\n",
+				m->timeout, m->mnt_path);
+		return -1;
+	}
+	if (autofs_mnt_make_catatonic(m, mnt_fd)) {
+		pr_err("Failed to set %s catatonic\n", m->mnt_path);
+		return -1;
+	}
+
+	if (autofs_mnt_set_pipefd(m, mnt_fd)) {
+		pr_err("Failed to set %s owner\n", m->mnt_path);
+		return -1;
+	}
+
+	if (autofs_mnt_close(m, mnt_fd) < 0) {
+		pr_err("Failed to close %s\n", m->mnt_path);
+		return -1;
+	}
+
+	pr_info("autofs mount %s owner restored: pgrp=%d, fd=%d\n",
+			m->mnt_path, getpgrp(), m->fd);
+
+	if (m->close_fd) {
+		pr_info("%s: pid %d, closing write end %d\n", __func__,
+				getpid(), m->fd);
+		close(m->fd);
+	}
+
+	pr_info("%s: pid %d, closing artificial pipe end %d\n", __func__,
+					getpid(), fd);
+	close(fd);
+	return 0;
+}
+
+static struct file_desc_ops autofs_pipe_desc_ops = {
+	.type = FD_TYPES__ARTIFICIAL,
+	.post_open = autofs_post_open,
+};
+
+static struct fdinfo_list_entry *create_new_fdinfo(struct file_desc *desc,
+						   int pid, int fd,
+						   int type, int mode)
+{
+	struct fdinfo_list_entry *le;
+	FdinfoEntry *fe;
+
+	fe = shmalloc(sizeof(*fe));
+	if (!fe)
+		return NULL;
+
+	fe->fd = fd;
+	fe->type = type;
+	fe->flags = mode;
+
+	le = shmalloc(sizeof(*le) + sizeof(u32));
+	if (!le)
+		return NULL;
+
+	le = (struct fdinfo_list_entry *)ALIGN((unsigned long)le, sizeof(u32));
+
+	futex_init(&le->real_pid);
+	le->pid = pid;
+	le->fe = fe;
+
+	le->desc = desc;
+	list_add_tail(&le->desc_list, &desc->fd_info_head);
+	return le;
+}
+
+static autofs_mount_t *autofs_create_mount_info(const char *mnt_path,
+						int timeout)
+{
+	autofs_mount_t *m;
+	struct stat buf;
+
+	m = shmalloc(sizeof(*m));
+	if (!m)
+		return NULL;
+
+	m->mnt_path = shmalloc(strlen(mnt_path) + 1);
+	if (!m->mnt_path)
+		return NULL;
+	strcpy(m->mnt_path, mnt_path);
+
+	if (stat(m->mnt_path, & buf) < 0) {
+		pr_perror("Failed to stat %s", m->mnt_path);
+		return NULL;
+	}
+
+	m->mnt_dev = buf.st_dev;
+	m->timeout = timeout;
+
+	file_desc_init(&m->pipe_desc, 0, &autofs_pipe_desc_ops);
+
+	return m;
+}
+
+static struct fdinfo_list_entry *create_artificial_fle(struct rst_info *rsti,
+			struct file_desc *desc, int pid, int fd, int mode)
+{
+	struct fdinfo_list_entry *fle;
+
+	fle = create_new_fdinfo(desc, pid, fd, FD_TYPES__ARTIFICIAL, mode);
+	if (!fle)
+		return NULL;
+
+	collect_used_fd(fle, rsti);
+	return fle;
+}
+
+static int autofs_create_pipe_fle(struct pstree_item *task, autofs_mount_t *m)
+{
+	struct fdinfo_list_entry *fle;
+	struct rst_info *rst_info;
+	int pipe_fd;
+
+	rst_info = rsti(task);
+	pipe_fd = find_unused_fd(&rst_info->used, -1);
+
+	fle = create_artificial_fle(rst_info, &m->pipe_desc,
+				    task->pid.virt, pipe_fd, O_WRONLY);
+	if (!fle) {
+		pr_err("Failed to create pipe fle for fd %d in process %d\n",
+				pipe_fd, task->pid.virt);
+		return -1;
+	}
+	list_add(&fle->ps_list, &rst_info->artificial);
+
+	pr_info("autofs: added pipe fle: pid %d, fd %d\n",
+			fle->pid, fle->fe->fd);
+	return 0;
+}
+
+static struct fdinfo_list_entry *find_fle_by_fd(struct list_head *head, int fd)
+{
+	struct fdinfo_list_entry *fle;
+
+	list_for_each_entry(fle, head, ps_list) {
+		if (fle->fe->fd == fd)
+			return fle;
+	}
+	return NULL;
+}
+
+static int create_pipe_fle(struct rst_info *rsti, struct file_desc *desc,
+				 int pid, int fd, int mode)
+{
+	struct fdinfo_list_entry *fle;
+
+	fle = create_new_fdinfo(desc, pid, fd, FD_TYPES__PIPE, mode);
+	if (!fle)
+		return -1;
+
+	list_add_tail(&fle->ps_list, &rsti->fds);
+	collect_used_fd(fle, rsti);
+	return 0;
+}
+
+static int autofs_open_write_pipe(struct pstree_item *task,
+			      int read_fd, int write_fd)
+{
+	struct fdinfo_list_entry *read_fle;
+	struct rst_info *rst_info;
+	int err;
+
+	rst_info = rsti(task);
+	write_fd = find_unused_fd(&rst_info->used, write_fd);
+
+	read_fle = find_fle_by_fd(&rst_info->fds, read_fd);
+	if (!read_fle) {
+		pr_err("Failed to find fd %d in process %d\n", read_fd,
+				task->pid.virt);
+		return -1;
+	}
+	if (read_fle->fe->type != FD_TYPES__PIPE) {
+		pr_err("Fd %d in process %d is not a pipe: %d\n", read_fd,
+				task->pid.virt, read_fle->fe->type);
+		return -1;
+	}
+
+	err = create_pipe_fle(rst_info, read_fle->desc, read_fle->pid,
+				    write_fd, O_WRONLY);
+	if (err < 0) {
+		pr_err("Failed to create pipe_fle for %d in process %d\n",
+				write_fd, read_fle->pid);
+		return -1;
+	}
+
+	pr_info("autofs: added write pipe end fle: pid %d, fd %d\n",
+			read_fle->pid, write_fd);
+	return write_fd;
+}
+
+static int autofs_add_mount(const char *mnt_path, int pgrp, int write_fd,
+			    int read_fd, int timeout)
+{
+	bool close_write_fd = false;
+	struct pstree_item *current, *master;
+	autofs_mount_t *m;
+
+	m = autofs_create_mount_info(mnt_path, timeout);
+	if (!m) {
+		pr_err("Failed to allocate mount info\n");
+		return -1;
+	}
+
+	current = pstree_item_by_virt(getpid());
+
+	BUG_ON(current == NULL);
+
+	if (write_fd == -1)
+		/* Catatonic mounts are always init's */
+		master = current;
+	else
+		master = pstree_item_by_virt(pgrp);
+
+	BUG_ON(master == NULL);
+
+	if (read_fd != INT_MIN) {
+		/* Original pipe write was closed.
+		 * We need create one to be able to fixup AutoFS mount. */
+
+		write_fd = autofs_open_write_pipe(master, read_fd, write_fd);
+		if (write_fd < 0) {
+			pr_err("Failed to find free fd in process %d\n",
+					master->pid.virt);
+			return -1;
+		}
+
+		close_write_fd = true;
+	}
+
+	/* Now we need to add artifial file, which will be used as a handler to
+	 * AutoFS mount fixup */
+	if (autofs_create_pipe_fle(master, m) < 0) {
+		pr_err("Failed to create mount point fd %s\n", mnt_path);
+		return -1;
+	}
+
+	m->fd = write_fd;
+	m->close_fd = close_write_fd;
+	return 0;
+}
+
+int autofs_mount(const char *source, const char *target,
+		 const char *filesystemtype, unsigned long mountflags,
+		 const void *data)
+{
+	char *mount_ops, **options;
+	int i, control_pipe[2] = { -1, -1 }, nr_opts;
+	int err = -1, timeout = INT_MIN, pgrp = INT_MIN;
+	int fd = INT_MIN, read_fd = INT_MIN;
+
+	mount_ops = xstrdup(data);
+	if (!mount_ops)
+		return -1;
+
+	pr_info("autofs ===================================\n");
+	pr_info("autofs original mount options: \"%s\"\n", mount_ops);
+
+	split(mount_ops, ',', &options, &nr_opts);
+	if (!options)
+		return -1;
+
+	xfree(mount_ops);
+
+	if (pipe(control_pipe) < 0) {
+		pr_perror("Can't create pipe");
+		return -1;
+	}
+
+	mount_ops = construct_string("fd=%d,pgrp=%d", control_pipe[1],
+						      getpgrp());
+	if (!mount_ops)
+		goto close_pipe;
+
+	for (i = 0; i < nr_opts; i++) {
+		char *opt = options[i];
+
+		if (!strncmp(opt, "timeout=", strlen("timeout="))) {
+			timeout = atoi(opt + strlen("timeout="));
+			continue;
+		} else if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
+			pgrp = atoi(opt + strlen("pgrp="));
+			continue;
+		} else if (!strncmp(opt, "fd=", strlen("fd="))) {
+			fd = atoi(opt + strlen("fd="));
+			continue;
+		} else if (!strncmp(opt, "read_fd=", strlen("readfd="))) {
+			read_fd = atoi(opt + strlen("read_fd="));
+			continue;
+		} else if (!strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
+			continue;
+
+		mount_ops = add_to_string(mount_ops, ",%s", opt);
+		if (!mount_ops)
+			goto close_pipe;
+	}
+
+	if (fd == INT_MIN) {
+		pr_err("Mount options doesn't have \"fd=\"\n");
+		goto close_pipe;
+	}
+	if (pgrp == INT_MIN) {
+		pr_err("Mount options doesn't have \"pgrp=\"\n");
+		goto close_pipe;
+	}
+	if (timeout == INT_MIN) {
+		pr_err("Mount options doesn't have \"timeout=\"\n");
+		goto close_pipe;
+	}
+
+	pr_info("autofs fixed mount options: \"%s\"\n", mount_ops);
+
+	if (mount(source, target, filesystemtype, mountflags, mount_ops) < 0) {
+		pr_perror("Failed to mount AutoFS to %s", target);
+		goto close_pipe;
+	}
+
+	err = autofs_add_mount(target, pgrp, fd, read_fd, timeout);
+	if (err < 0) {
+		pr_err("Failed to collect AutoFS mount data\n");
+		umount(target);
+	}
+
+close_pipe:
+	close(control_pipe[1]);
+	close(control_pipe[0]);
+	return err;
+}
diff --git a/include/autofs.h b/include/autofs.h
index 3b9e46c..27808ad 100644
--- a/include/autofs.h
+++ b/include/autofs.h
@@ -7,5 +7,8 @@
 
 struct mount_info;
 int autofs_dump(struct mount_info *pm);
+int autofs_mount(const char *source, const char *target,
+		 const char *filesystemtype, unsigned long mountflags,
+		 const void *data);
 
 #endif
diff --git a/mount.c b/mount.c
index a1dccfe..2f9ecb1 100644
--- a/mount.c
+++ b/mount.c
@@ -1460,7 +1460,7 @@ static struct fstype fstypes[32] = {
 		.name = "autofs",
 		.code = FSTYPE__AUTOFS,
 		.dump = autofs_dump,
-		.restore = always_fail,
+		.mount = autofs_mount,
 	},
 };
 



More information about the CRIU mailing list