[CRIU] [PATCH 04/15] autofs: dump stage introduced

Stanislav Kinsburskiy skinsbursky at virtuozzo.com
Thu Dec 3 05:28:59 PST 2015


AutoFS mount point is managed by user space daemon, which receives requests
from kernel via pipe, passed on mount operation.
Kernel hold write end, while user space process - read end.
Thus, for successfull AutoFS migration, this connection has to be restored.

Below are major tricks to be performed to dump AutoFS mount:

1) Read end of the pipe in the process have to be descovered, because in case
of write pipe end in closed in the process it will be used to restore the pipe.
Note: migration os AutoFs mounts with process group leader without read pipe
end opened is not supported.

2) Read pipe end have to be empty. If it's not empty, then kernel and process
are somewhere in the middle of mounting or unmounting some nested autofs mount
and carrying of such context doesnt' make any sense.

3) Mount points are collected from initail pid namespace, which leads to
real pgrp value in AutoFS options instead of virtual one. This have to be
fixed.

Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
---
 Makefile.crtools |    1 
 autofs.c         |  322 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/autofs.h |    3 +
 mount.c          |    6 +
 4 files changed, 332 insertions(+)
 create mode 100644 autofs.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 254a7a2..3926538 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -80,6 +80,7 @@ obj-y	+= fault-injection.o
 obj-y	+= pie/util-fd.o
 obj-y	+= pie/util.o
 obj-y	+= seccomp.o
+obj-y	+= autofs.o
 
 ifneq ($(MAKECMDGOALS),clean)
 incdeps := y
diff --git a/autofs.c b/autofs.c
new file mode 100644
index 0000000..589e87a
--- /dev/null
+++ b/autofs.c
@@ -0,0 +1,322 @@
+#include <unistd.h>
+#include <sys/stat.h>
+#include <stdarg.h>
+
+#include "util.h"
+#include "pstree.h"
+#include "proc_parse.h"
+#include "namespaces.h"
+#include "autofs.h"
+
+static char *add_to_string_vargs(char *str, const char *fmt, va_list args)
+{
+	size_t offset = 0, delta;
+	int ret;
+	char *new;
+	va_list tmp;
+
+	if (str)
+		offset = strlen(str);
+	delta = strlen(fmt) * 2;
+
+	do {
+		ret = -ENOMEM;
+		new = xrealloc(str, offset + delta);
+		if (new) {
+			va_copy(tmp, args);
+			ret = vsnprintf(new + offset, delta, fmt, tmp);
+			if (ret >= delta) {
+				delta = ret +1;
+				str = new;
+				ret = 0;
+			}
+		}
+	} while (ret == 0);
+
+	if (ret == -ENOMEM) {
+		/* realloc failed. We must release former string */
+		pr_err("Failed to allocate string\n");
+		xfree(str);
+	} else if (ret < 0) {
+		/* vsnprintf failed */
+		pr_err("Failed to print string\n");
+		xfree(new);
+		new = NULL;
+	}
+	return new;
+}
+
+static char *add_to_string(char *str, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	str = add_to_string_vargs(str, fmt, args);
+	va_end(args);
+
+	return str;
+}
+
+static char *construct_string(const char *fmt, ...)
+{
+	va_list args;
+	char *str;
+
+	va_start(args, fmt);
+	str = add_to_string_vargs(NULL, fmt, args);
+	va_end(args);
+
+	return str;
+}
+
+static int autofs_check_fd_stat(struct stat *stat, int prgp, int fd,
+				long ino, int *mode)
+{
+	struct fdinfo_common fdinfo;
+
+	if (!S_ISFIFO(stat->st_mode))
+		return 0;
+	if (stat->st_ino != ino)
+		return 0;
+	if (parse_fdinfo_pid(prgp, fd, FD_TYPES__UND, NULL, &fdinfo))
+		return -1;
+
+	*mode = fdinfo.flags & O_WRONLY;
+	return 1;
+}
+
+static int autofs_kernel_pipe_alive(int pgrp, int fd, int ino)
+{
+	struct stat buf;
+	char *path;
+	int ret, fd_mode;
+
+	path = construct_string("/proc/%d/fd/%d", pgrp, fd);
+	if (!path)
+		return -1;
+
+	if (stat(path, &buf) < 0) {
+		if (errno == ENOENT)
+			return 0;
+		pr_perror("Failed to stat %s", path);
+		return -1;
+	}
+
+	xfree(path);
+
+	ret = autofs_check_fd_stat(&buf, pgrp, fd, ino, &fd_mode);
+	if (ret <= 0)
+		return ret;
+
+	return O_WRONLY == fd_mode;
+}
+
+static int autofs_find_pipe_read_end(int pgrp, long ino, int *read_fd)
+{
+	DIR *dir;
+	struct dirent *de;
+	int ret = -1;
+
+	dir = opendir_proc(pgrp, "fd");
+	if (dir == NULL)
+		return -1;
+
+	while ((de = readdir(dir))) {
+		struct stat buf;
+		int found, mode, fd;
+
+		if (dir_dots(de))
+			continue;
+
+		if (fstatat(dirfd(dir), de->d_name, &buf, 0) < 0) {
+			pr_perror("Failed to fstatat");
+			break;
+		}
+
+		fd = atoi(de->d_name);
+
+		found = autofs_check_fd_stat(&buf, pgrp, fd, ino, &mode);
+		if (found < 0)
+			break;
+		if (found && (mode == O_RDONLY)) {
+			*read_fd = fd;
+			ret = 0;
+			break;
+		}
+	}
+
+	closedir(dir);
+	close_pid_proc();
+
+	return ret;
+}
+
+static int autofs_fixup_options(struct mount_info *pm, int pgrp, bool catatonic,
+				int kernel_fd, bool kernel_fd_alive,
+				int read_fd)
+{
+	char **options;
+	int nr_opts, i, err = -1;
+	char *new_opt;
+	struct mount_info *t;
+
+	new_opt = construct_string("pgrp=%d,fd=%d", pgrp, kernel_fd);
+	if (!new_opt)
+		return -1;
+
+	if (!catatonic && !kernel_fd_alive) {
+		/* Write end is closed or invalid
+		 * Let's introduce new options only for us to carry pipe
+		 * information. */
+		new_opt = add_to_string(new_opt, ",read_fd=%d", read_fd);
+	}
+	if (!new_opt)
+		return -1;
+
+	split(pm->options, ',', &options, &nr_opts);
+	if (!options)
+		return -1;
+
+	for (i = 0; i < nr_opts; i++) {
+		char *opt = options[i];
+
+		if (!strncmp(opt, "pgrp=", strlen("pgrp=")) ||
+		    !strncmp(opt, "fd=", strlen("fd=")) ||
+		    !strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
+			continue;
+
+		new_opt = add_to_string(new_opt, ",%s", opt);
+		if (!new_opt)
+			goto out;
+	}
+
+	xfree(pm->options);
+	pm->options = new_opt;
+
+	/* Change options for all bind-mounts */
+	list_for_each_entry(t, &pm->mnt_bind, mnt_bind) {
+		xfree(t->options);
+		t->options = xstrdup(pm->options);
+		if (!t->options)
+			goto out;
+	}
+
+	err = 0;
+
+	pr_info("autofs fixed options: \"%s\"\n", pm->options);
+
+out:
+	for (i = 0; i < nr_opts; i++)
+		xfree(options[i]);
+	xfree(options);
+	return err;
+}
+
+int autofs_dump(struct mount_info *pm)
+{
+	char *pgrp_opt, *fd_opt, *pipe_ino_opt;
+	int pgrp, vpgrp = 0;
+	int fd, kernel_fd, read_fd = -1;
+	long pipe_ino;
+	bool catatonic = false, kpipe_alive = false;
+
+	pr_info("autofs \"%s\" options: %s\n", pm->mountpoint, pm->options);
+
+	pgrp_opt = strstr(pm->options, "pgrp=");
+	if (!pgrp_opt) {
+		pr_err("Failed to find pgrp option\n");
+		return -1;
+	}
+
+	fd_opt = strstr(pm->options, "fd=");
+	if (!fd_opt) {
+		pr_err("Failed to find fd option\n");
+		return -1;
+	}
+
+	pipe_ino_opt = strstr(pm->options, "pipe_ino=");
+	if (!pipe_ino_opt) {
+		pr_err("Failed to find pipe_ino option\n");
+		return -1;
+	}
+
+	if (sscanf(pgrp_opt, "pgrp=%d", &pgrp) != 1) {
+		pr_err("Failed to get pgrp: %s\n", pgrp_opt);
+		return -1;
+	}
+
+	if (sscanf(fd_opt, "fd=%d", &kernel_fd) != 1) {
+		pr_err("Failed to get fd: %s\n", fd_opt);
+		return -1;
+	}
+
+	if (sscanf(pipe_ino_opt, "pipe_ino=%ld", &pipe_ino) != 1) {
+		pr_err("Failed to get pipe_ino: %s\n", pipe_ino_opt);
+		return -1;
+	}
+
+	/* When fd is equal to -1, it's a catatonic mount.
+	 * In this case fd, pipe_ino, and pgrp doesn't mean anything. */
+	if (kernel_fd == -1) {
+		catatonic = true;
+		goto fix_it;
+	}
+
+	/* We need to get virtual pgrp to restore mount */
+	vpgrp = pid_to_virt(pgrp);
+	if (!vpgrp) {
+		pr_err("failed to find pstree item with pid %d\n",
+				pgrp);
+		pr_err("Non-catatonic mount without master?\n");
+		return -1;
+	}
+
+	/* Let' check whether write end is still open */
+	switch (autofs_kernel_pipe_alive(pgrp, kernel_fd, pipe_ino)){
+		case 1:
+			/* This kernel pipe end fd.
+			 * We don't need to search for write end. */
+			kpipe_alive = true;
+		case 0:
+			break;
+		default:
+			pr_err("Failed to check fd %d in process %d\n",
+					kernel_fd, pgrp);
+			return -1;
+	}
+
+	/* Let's try to find process file descriptors, which corresponds to
+	 * desired pipe. */
+	if (autofs_find_pipe_read_end(pgrp, pipe_ino, &read_fd) < 0) {
+		pr_err("Failed to find read pipe fd (ino %ld) in process %d\n",
+				pipe_ino, pgrp);
+		return -1;
+	}
+
+	if (read_fd == -1) {
+		pr_err("Master %d doesn't have a read end of the pipe with "
+			"inode %ld opened\n", pgrp, pipe_ino);
+		pr_err("Abandoned mount or control was delegated to child?\n");
+		return -1;
+	}
+
+	/* Let's check, that read end is empty */
+	fd = open_proc(pgrp, "fd/%d", read_fd);
+	if (fd < 0)
+		return -1;
+
+	if (fd_has_data(fd)) {
+		pr_err("Process %d autofs pipe fd %d is not empty.\n", pgrp,
+				read_fd);
+		pr_err("Try again later.\n");
+		return -1;
+	}
+
+	close(fd);
+
+fix_it:
+	return autofs_fixup_options(pm, vpgrp, catatonic,
+				    kernel_fd, kpipe_alive,
+				    read_fd);
+}
+
diff --git a/include/autofs.h b/include/autofs.h
index b30d915..3b9e46c 100644
--- a/include/autofs.h
+++ b/include/autofs.h
@@ -5,4 +5,7 @@
 #define AUTOFS_MINOR	235
 #endif
 
+struct mount_info;
+int autofs_dump(struct mount_info *pm);
+
 #endif
diff --git a/mount.c b/mount.c
index ca10d43..8b92538 100644
--- a/mount.c
+++ b/mount.c
@@ -27,6 +27,7 @@
 #include "kerndat.h"
 #include "fs-magic.h"
 #include "sysfs_parse.h"
+#include "autofs.h"
 
 #include "protobuf/mnt.pb-c.h"
 
@@ -1455,6 +1456,11 @@ static struct fstype fstypes[32] = {
 		.name = "overlay",
 		.code = FSTYPE__OVERLAYFS,
 		.parse = overlayfs_parse,
+	}, {
+		.name = "autofs",
+		.code = FSTYPE__AUTOFS,
+		.dump = autofs_dump,
+		.restore = always_fail,
 	},
 };
 



More information about the CRIU mailing list