[CRIU] [PATCH 15/15] autofs: restore stage introduced

Stanislav Kinsburskiy skinsbursky at virtuozzo.com
Thu Dec 3 05:30:11 PST 2015


AutoFS mount point is managed by user space daemon, which receives requests
from kernel via pipe, passed on mount operation.
Kernel hold write end, while user space process - read end.
Thus, for successfull AutoFS migration, this connection has to be restored.

Mount point is created in by init process. Per-mount Autofs context is
created to fix up actual pipe connection, when all the mount
points are restored.

The problems with restoring pipe connection are:

1) It has to be done from user space process context, because pipe and pgrp
values are taken from current.
2) Actual mounts are created by "init" process, and mount point owner (master)
have to set later.
3) To update mount point master one requires: mountpoint fd and opened write end
of pipe.

To provide AutoFS mount point master with resources, mentioned above, the
following was implemented:

1) Mount point is opened in "init" process. This is because in can be
overmounted.
2) Artificial master file descriptor is created for mount point descriptor.
3) Mount point descriptor is passed to AutoFS mount point master as an
artificial slave file descriptor. Thus master will receive mount point dile
descriptor.
4) If Autofs master doesn't have write pipe end opened, it is created as a
clone of read pipe end file descriptor and added to master's fds list.
5) Artificial file descriptor is created for write pipe end file descriptor to
fixup the mount point (and close write pipe descriptor, if is was opened).

Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
---
 autofs.c         |  521 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/autofs.h |    3 
 mount.c          |    2 
 3 files changed, 525 insertions(+), 1 deletion(-)

diff --git a/autofs.c b/autofs.c
index 589e87a..6c44741 100644
--- a/autofs.c
+++ b/autofs.c
@@ -1,12 +1,16 @@
 #include <unistd.h>
 #include <sys/stat.h>
 #include <stdarg.h>
+#include <sys/mount.h>
 
 #include "util.h"
 #include "pstree.h"
 #include "proc_parse.h"
 #include "namespaces.h"
 #include "autofs.h"
+#include "pstree.h"
+
+#include "auto_dev-ioctl.h"
 
 static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
 {
@@ -320,3 +324,520 @@ fix_it:
 				    read_fd);
 }
 
+typedef struct autofs_mount_s {
+	struct file_desc pipe_desc;
+	struct file_desc mnt_desc;
+	char *mnt_path;
+	int mnt_fd;
+	int fd;
+	bool close_fd;
+	int timeout;
+} autofs_mount_t;
+
+static int autofs_ioctl(const char *path, int fd, int cmd, const void *param)
+{
+	int err;
+
+	err = ioctl(fd, cmd, param);
+	if (err)
+		pr_perror("%s ioctl failed", path);
+
+	return err;
+}
+
+static int autofs_mnt_ioctl(const autofs_mount_t *m, int cmd, const void *param)
+{
+	return autofs_ioctl(m->mnt_path, m->mnt_fd, cmd, param);
+}
+
+static int autofs_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
+{
+	char *path = "/dev/autofs";
+	int fd, err;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
+		pr_perror("failed to open %s", path);
+		return -1;
+	}
+
+	err = autofs_ioctl(path, fd, cmd, param);
+
+	close(fd);
+	return err;
+}
+
+static int autofs_mnt_make_catatonic(const autofs_mount_t *m)
+{
+	pr_info("%s: set %s catatonic\n", __func__, m->mnt_path);
+	return autofs_mnt_ioctl(m, AUTOFS_IOC_CATATONIC, NULL);
+}
+
+static int autofs_mnt_set_timeout(const autofs_mount_t *m)
+{
+	time_t timeout = m->timeout;
+
+	pr_info("%s: set timeout %ld for %s\n", __func__, timeout, m->mnt_path);
+	return autofs_mnt_ioctl(m, AUTOFS_IOC_SETTIMEOUT, &timeout);
+}
+
+static int autofs_mnt_set_pipefd(const autofs_mount_t *m)
+{
+	struct autofs_dev_ioctl param;
+
+	/* Restore pipe and pgrp only for non-cataonic mounts */
+	if (m->fd == -1)
+		return 0;
+
+	pr_info("%s: set pipe fd %d (pgrp %d) for mount %s\n", __func__,
+			m->fd, getpgrp(), m->mnt_path);
+
+	init_autofs_dev_ioctl(&param);
+	param.ioctlfd = m->mnt_fd;
+	param.setpipefd.pipefd = m->fd;
+
+	return autofs_dev_ioctl(AUTOFS_DEV_IOCTL_SETPIPEFD, &param);
+}
+
+/* Here to fixup Autofs mount */
+static int autofs_post_open(struct file_desc *d, int fd)
+{
+	autofs_mount_t *m = container_of(d, autofs_mount_t, pipe_desc);
+
+	pr_info("%s: restoring %s\n", __func__, m->mnt_path);
+
+	if (autofs_mnt_set_timeout(m)) {
+		pr_err("Failed to set timeout %d for %s\n",
+				m->timeout, m->mnt_path);
+		return -1;
+	}
+	if (autofs_mnt_make_catatonic(m)) {
+		pr_err("Failed to set %s catatonic\n", m->mnt_path);
+		return -1;
+	}
+
+	if (autofs_mnt_set_pipefd(m)) {
+		pr_err("Failed to set %s owner\n", m->mnt_path);
+		return -1;
+	}
+
+	pr_info("autofs mount %s owner restored: pgrp=%d, fd=%d\n",
+			m->mnt_path, getpgrp(), m->fd);
+
+	if (m->close_fd) {
+		pr_info("%s: pid %d, closing write end %d\n", __func__,
+				getpid(), m->fd);
+		close(m->fd);
+	}
+
+	pr_info("%s: pid %d, closing artificial pipe end %d\n", __func__,
+					getpid(), fd);
+	close(fd);
+	return 0;
+}
+
+static struct file_desc_ops autofs_pipe_desc_ops = {
+	.type = FD_TYPES__ARTIFICIAL,
+	.post_open = autofs_post_open,
+};
+
+static struct fdinfo_list_entry *create_new_fdinfo(struct file_desc *desc,
+						   int pid, int fd,
+						   int type, int mode)
+{
+	struct fdinfo_list_entry *le;
+	FdinfoEntry *fe;
+
+	fe = shmalloc(sizeof(*fe));
+	if (!fe)
+		return NULL;
+
+	fe->fd = fd;
+	fe->type = type;
+	fe->flags = mode;
+
+	le = shmalloc(sizeof(*le) + sizeof(u32));
+	if (!le)
+		return NULL;
+
+	le = (struct fdinfo_list_entry *)ALIGN((unsigned long)le, sizeof(u32));
+
+	futex_init(&le->real_pid);
+	le->pid = pid;
+	le->fe = fe;
+
+	le->desc = desc;
+	list_add_tail(&le->desc_list, &desc->fd_info_head);
+	return le;
+}
+
+static int autofs_mount_post_open(struct file_desc *d, int fd)
+{
+	pr_info("%s: pid %d, closing %d\n", __func__,
+			getpid(), fd);
+	return close(fd);
+}
+
+static int autofs_mount_open(struct file_desc *d)
+{
+	struct fdinfo_list_entry *fle;
+
+	fle = list_first_entry(&d->fd_info_head,
+			struct fdinfo_list_entry, desc_list);
+
+	pr_info("%s: pid %d, mnt_fd: %d\n", __func__, getpid(), fle->fe->fd);
+	return fle->fe->fd;
+}
+
+static struct file_desc_ops autofs_mount_desc_ops = {
+	.type = FD_TYPES__ARTIFICIAL,
+	.open = autofs_mount_open,
+	.post_open = autofs_mount_post_open,
+};
+
+static autofs_mount_t *autofs_create_mount_info(const char *mnt_path,
+						int timeout)
+{
+	autofs_mount_t *m;
+
+	m = shmalloc(sizeof(*m));
+	if (!m)
+		return NULL;
+
+	m->mnt_path = shmalloc(strlen(mnt_path) + 1);
+	if (!m->mnt_path)
+		return NULL;
+	strcpy(m->mnt_path, mnt_path);
+
+	file_desc_init(&m->pipe_desc, 0, &autofs_pipe_desc_ops);
+	file_desc_init(&m->mnt_desc, 0, &autofs_mount_desc_ops);
+
+	m->timeout = timeout;
+
+	return m;
+}
+
+static struct fdinfo_list_entry *create_artificial_fle(struct rst_info *rsti,
+			struct file_desc *desc, int pid, int fd, int mode)
+{
+	struct fdinfo_list_entry *fle;
+
+	fle = create_new_fdinfo(desc, pid, fd, FD_TYPES__ARTIFICIAL, mode);
+	if (!fle)
+		return NULL;
+
+	collect_used_fd(fle, rsti);
+	return fle;
+}
+
+static int autofs_add_mount_fle(struct pstree_item *task,
+				autofs_mount_t *m, int mnt_fd)
+{
+	struct fdinfo_list_entry *fle;
+	struct rst_info *rst_info;
+
+	rst_info = rsti(task);
+
+	fle = create_artificial_fle(rst_info, &m->mnt_desc,
+				    task->pid.virt, mnt_fd,
+				    O_RDONLY | O_DIRECTORY);
+	if (!fle) {
+		pr_err("Failed to create mnt fle for %s in process %d\n",
+				m->mnt_path, task->pid.virt);
+		return -1;
+	}
+	list_add_tail(&fle->ps_list, &rst_info->artificial);
+
+	pr_info("autofs: added mount fle: pid %d, fd %d, path: %s\n",
+			fle->pid, fle->fe->fd, m->mnt_path);
+	return 0;
+}
+
+static int autofs_create_mount_fle(struct pstree_item *task, autofs_mount_t *m)
+{
+	int mnt_fd;
+
+	/* we have to pass mnt_fd to master process */
+	mnt_fd = find_unused_fd(&rsti(task)->used, -1);
+	if (mnt_fd < 0) {
+		pr_err("Failed to find free fd in process %d\n",
+				task->pid.virt);
+		return -1;
+	}
+
+	/* We need to add aftificial file, as a handler for mnt_fd close */
+	if (autofs_add_mount_fle(task, m, mnt_fd) < 0) {
+		pr_err("Failed to create mount point fd %s\n", m->mnt_path);
+		return -1;
+	}
+
+	return mnt_fd;
+}
+
+static int autofs_open_mount(struct pstree_item *task, autofs_mount_t *m)
+{
+	int mnt_fd, fd;
+
+	/* Mount point is opened here to handle overmount cases (like systemd
+	 * does) */
+	fd = open(m->mnt_path, O_RDONLY | O_DIRECTORY);
+	if (fd < 0) {
+		pr_perror("Failed to open %s", m->mnt_path);
+		return -1;
+	}
+
+	mnt_fd = find_unused_fd(&rsti(task)->used, -1);
+
+	if (dup2(fd, mnt_fd) < 0) {
+		pr_perror("Failed to dup2 %d to %d\n", fd, mnt_fd);
+		return -1;
+	}
+	close(fd);
+
+	if (autofs_add_mount_fle(task, m, mnt_fd) < 0) {
+		pr_err("Failed to create mount point fd %s\n", m->mnt_path);
+		return -1;
+	}
+
+	return mnt_fd;
+}
+
+static int autofs_create_pipe_fle(struct pstree_item *task, autofs_mount_t *m)
+{
+	struct fdinfo_list_entry *fle;
+	struct rst_info *rst_info;
+	int pipe_fd;
+
+	rst_info = rsti(task);
+	pipe_fd = find_unused_fd(&rst_info->used, -1);
+
+	fle = create_artificial_fle(rst_info, &m->pipe_desc,
+				    task->pid.virt, pipe_fd, O_WRONLY);
+	if (!fle) {
+		pr_err("Failed to create pipe fle for fd %d in process %d\n",
+				pipe_fd, task->pid.virt);
+		return -1;
+	}
+	list_add(&fle->ps_list, &rst_info->artificial);
+
+	pr_info("autofs: added pipe fle: pid %d, fd %d\n",
+			fle->pid, fle->fe->fd);
+	return 0;
+}
+
+static struct fdinfo_list_entry *find_fle_by_fd(struct list_head *head, int fd)
+{
+	struct fdinfo_list_entry *fle;
+
+	list_for_each_entry(fle, head, ps_list) {
+		if (fle->fe->fd == fd)
+			return fle;
+	}
+	return NULL;
+}
+
+static int create_pipe_fle(struct rst_info *rsti, struct file_desc *desc,
+				 int pid, int fd, int mode)
+{
+	struct fdinfo_list_entry *fle;
+
+	fle = create_new_fdinfo(desc, pid, fd, FD_TYPES__PIPE, mode);
+	if (!fle)
+		return -1;
+
+	list_add_tail(&fle->ps_list, &rsti->fds);
+	collect_used_fd(fle, rsti);
+	return 0;
+}
+
+static int autofs_open_write_pipe(struct pstree_item *task,
+			      int read_fd, int write_fd)
+{
+	struct fdinfo_list_entry *read_fle;
+	struct rst_info *rst_info;
+	int err;
+
+	rst_info = rsti(task);
+	write_fd = find_unused_fd(&rst_info->used, write_fd);
+
+	read_fle = find_fle_by_fd(&rst_info->fds, read_fd);
+	if (!read_fle) {
+		pr_err("Failed to find fd %d in process %d\n", read_fd,
+				task->pid.virt);
+		return -1;
+	}
+	if (read_fle->fe->type != FD_TYPES__PIPE) {
+		pr_err("Fd %d in process %d is not a pipe: %d\n", read_fd,
+				task->pid.virt, read_fle->fe->type);
+		return -1;
+	}
+
+	err = create_pipe_fle(rst_info, read_fle->desc, read_fle->pid,
+				    write_fd, O_WRONLY);
+	if (err < 0) {
+		pr_err("Failed to create pipe_fle for %d in process %d\n",
+				write_fd, read_fle->pid);
+		return -1;
+	}
+
+	pr_info("autofs: added write pipe end fle: pid %d, fd %d\n",
+			read_fle->pid, write_fd);
+	return write_fd;
+}
+
+static int autofs_add_mount(const char *mnt_path, int pgrp, int write_fd,
+			    int read_fd, int timeout)
+{
+	int mnt_fd;
+	bool close_write_fd = false;
+	struct pstree_item *current, *master;
+	autofs_mount_t *m;
+
+	m = autofs_create_mount_info(mnt_path, timeout);
+	if (!m) {
+		pr_err("Failed to allocate mount info\n");
+		return -1;
+	}
+
+	current = pstree_item_by_virt(getpid());
+
+	BUG_ON(current == NULL);
+
+	/* We need to open mount point fd, because it can be overmounted. */
+	mnt_fd = autofs_open_mount(current, m);
+	if (mnt_fd < 0) {
+		pr_err("Failed to open mount point %s on fd %d\n", mnt_path,
+				mnt_fd);
+		return -1;
+	}
+
+	if (write_fd == -1)
+		/* Catatonic mounts are always init's */
+		master = current;
+	else
+		master = pstree_item_by_virt(pgrp);
+
+	BUG_ON(master == NULL);
+
+	if (read_fd != INT_MIN) {
+		/* Original pipe write was closed.
+		 * We need create one to be able to fixup AutoFS mount. */
+
+		write_fd = autofs_open_write_pipe(master, read_fd, write_fd);
+		if (write_fd < 0) {
+			pr_err("Failed to find free fd in process %d\n",
+					master->pid.virt);
+			return -1;
+		}
+
+		close_write_fd = true;
+	}
+
+	/* Now we need to add artifial file, which will be used as a handler to
+	 * AutoFS mount fixup */
+	if (autofs_create_pipe_fle(master, m) < 0) {
+		pr_err("Failed to create mount point fd %s\n", mnt_path);
+		return -1;
+	}
+
+	if (master != current) {
+		/* We need to create artificial file, as a handler for mnt_fd close */
+		mnt_fd = autofs_create_mount_fle(master, m);
+		if (mnt_fd < 0) {
+			pr_err("Failed to create mount point fd %s\n", mnt_path);
+			return -1;
+		}
+	}
+
+	m->mnt_fd = mnt_fd;
+	m->fd = write_fd;
+	m->close_fd = close_write_fd;
+	return 0;
+}
+
+int autofs_mount(const char *source, const char *target,
+		 const char *filesystemtype, unsigned long mountflags,
+		 const void *data)
+{
+	char *mount_ops, **options;
+	int i, control_pipe[2] = { -1, -1 }, nr_opts;
+	int err = -1, timeout = INT_MIN, pgrp = INT_MIN;
+	int fd = INT_MIN, read_fd = INT_MIN;
+
+	mount_ops = xstrdup(data);
+	if (!mount_ops)
+		return -1;
+
+	pr_info("autofs ===================================\n");
+	pr_info("autofs original mount options: \"%s\"\n", mount_ops);
+
+	split(mount_ops, ',', &options, &nr_opts);
+	if (!options)
+		return -1;
+
+	xfree(mount_ops);
+
+	if (pipe(control_pipe) < 0) {
+		pr_perror("Can't create pipe");
+		return -1;
+	}
+
+	mount_ops = construct_string("fd=%d,pgrp=%d", control_pipe[1],
+						      getpgrp());
+	if (!mount_ops)
+		goto close_pipe;
+
+	for (i = 0; i < nr_opts; i++) {
+		char *opt = options[i];
+
+		if (!strncmp(opt, "timeout=", strlen("timeout="))) {
+			timeout = atoi(opt + strlen("timeout="));
+			continue;
+		} else if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
+			pgrp = atoi(opt + strlen("pgrp="));
+			continue;
+		} else if (!strncmp(opt, "fd=", strlen("fd="))) {
+			fd = atoi(opt + strlen("fd="));
+			continue;
+		} else if (!strncmp(opt, "read_fd=", strlen("readfd="))) {
+			read_fd = atoi(opt + strlen("read_fd="));
+			continue;
+		} else if (!strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
+			continue;
+
+		mount_ops = add_to_string(mount_ops, ",%s", opt);
+		if (!mount_ops)
+			goto close_pipe;
+	}
+
+	if (fd == INT_MIN) {
+		pr_err("Mount options doesn't have \"fd=\"\n");
+		goto close_pipe;
+	}
+	if (pgrp == INT_MIN) {
+		pr_err("Mount options doesn't have \"pgrp=\"\n");
+		goto close_pipe;
+	}
+	if (timeout == INT_MIN) {
+		pr_err("Mount options doesn't have \"timeout=\"\n");
+		goto close_pipe;
+	}
+
+	pr_info("autofs fixed mount options: \"%s\"\n", mount_ops);
+
+	if (mount(source, target, filesystemtype, mountflags, mount_ops) < 0) {
+		pr_perror("Failed to mount AutoFS to %s", target);
+		goto close_pipe;
+	}
+
+	err = autofs_add_mount(target, pgrp, fd, read_fd, timeout);
+	if (err < 0) {
+		pr_err("Failed to collect AutoFS mount data\n");
+		umount(target);
+	}
+
+close_pipe:
+	close(control_pipe[1]);
+	close(control_pipe[0]);
+	return err;
+}
diff --git a/include/autofs.h b/include/autofs.h
index 3b9e46c..27808ad 100644
--- a/include/autofs.h
+++ b/include/autofs.h
@@ -7,5 +7,8 @@
 
 struct mount_info;
 int autofs_dump(struct mount_info *pm);
+int autofs_mount(const char *source, const char *target,
+		 const char *filesystemtype, unsigned long mountflags,
+		 const void *data);
 
 #endif
diff --git a/mount.c b/mount.c
index a1dccfe..2f9ecb1 100644
--- a/mount.c
+++ b/mount.c
@@ -1460,7 +1460,7 @@ static struct fstype fstypes[32] = {
 		.name = "autofs",
 		.code = FSTYPE__AUTOFS,
 		.dump = autofs_dump,
-		.restore = always_fail,
+		.mount = autofs_mount,
 	},
 };
 



More information about the CRIU mailing list