[CRIU] [PATCH v4 05/17] autofs: parse fsinfo stage introduced

Stanislav Kinsburskiy skinsbursky at virtuozzo.com
Thu Jan 7 08:09:41 PST 2016


AutoFS mount point is managed by user space daemon, which receives
requests from kernel via pipe, passed on mount operation.
Kernel hold write end, while user space process - read end.
Thus, for successfull AutoFS migration, this connection has to be
restored.

Below are major tricks to be performed to dump AutoFS mount:

1) Read end of the pipe in the process have to be descovered, because in
case of write pipe end in closed in the process it will be used to restore the
pipe.
Note: migration os AutoFS mounts with process group leader without read
pipe end opened is not supported.

2) Read pipe end has to be empty. If it's not empty, then kernel and
process are somewhere in the middle of mounting or unmounting of an autofs mount
and carrying of such context doesn't make any sense.

During this stage, all the autofs parameters and parsed and collected in
per-mount autofs structure. It also does check that autofs mounts is not in
imterim state.

Note: migration of autofs mount point in interim state is not supported. Interim
state is determined by control pipe contents: if it's not empty, then it
means, that kernel requested some operation (mount/umount), which is in
process right now.
In case of pipe is not empty, dump is aborted.

Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
---
 Makefile.crtools |    1 
 autofs.c         |  269 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/autofs.h |    3 +
 mount.c          |    6 +
 4 files changed, 279 insertions(+)
 create mode 100644 autofs.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 5788ef0..8a38fd1 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -78,6 +78,7 @@ obj-y	+= fault-injection.o
 obj-y	+= pie/util-fd.o
 obj-y	+= pie/util.o
 obj-y	+= seccomp.o
+obj-y	+= autofs.o
 
 ifneq ($(MAKECMDGOALS),clean)
 incdeps := y
diff --git a/autofs.c b/autofs.c
new file mode 100644
index 0000000..4133ebf
--- /dev/null
+++ b/autofs.c
@@ -0,0 +1,269 @@
+#include <unistd.h>
+#include <sys/stat.h>
+#include <stdarg.h>
+
+#include "proc_parse.h"
+#include "autofs.h"
+#include "util.h"
+#include "pstree.h"
+#include "namespaces.h"
+
+
+#define AUTOFS_OPT_UNKNOWN	INT_MIN
+
+#define AUTOFS_MODE_DIRECT	0
+#define AUTOFS_MODE_INDIRECT	1
+#define AUTOFS_MODE_OFFSET	2
+
+#define AUTOFS_CATATONIC_FD	-1
+
+static int autofs_check_fd_stat(struct stat *stat, int prgp, int fd,
+				long ino, int *mode)
+{
+	struct fdinfo_common fdinfo;
+
+	if (!S_ISFIFO(stat->st_mode))
+		return 0;
+	if (stat->st_ino != ino)
+		return 0;
+	if (parse_fdinfo_pid(prgp, fd, FD_TYPES__UND, NULL, &fdinfo))
+		return -1;
+
+	*mode = fdinfo.flags & O_WRONLY;
+	return 1;
+}
+
+static int autofs_kernel_pipe_alive(int pgrp, int fd, int ino)
+{
+	struct stat buf;
+	char *path;
+	int ret, fd_mode;
+
+	path = xsprintf("/proc/%d/fd/%d", pgrp, fd);
+	if (!path)
+		return -1;
+
+	if (stat(path, &buf) < 0) {
+		if (errno == ENOENT)
+			return 0;
+		pr_perror("Failed to stat %s", path);
+		return -1;
+	}
+
+	xfree(path);
+
+	ret = autofs_check_fd_stat(&buf, pgrp, fd, ino, &fd_mode);
+	if (ret <= 0)
+		return ret;
+
+	return O_WRONLY == fd_mode;
+}
+
+static int autofs_find_pipe_read_end(int pgrp, long ino, int *read_fd)
+{
+	DIR *dir;
+	struct dirent *de;
+	int ret = -1;
+
+	dir = opendir_proc(pgrp, "fd");
+	if (dir == NULL)
+		return -1;
+
+	*read_fd = -1;
+
+	while ((de = readdir(dir))) {
+		struct stat buf;
+		int found, mode, fd;
+
+		if (dir_dots(de))
+			continue;
+
+		if (fstatat(dirfd(dir), de->d_name, &buf, 0) < 0) {
+			pr_perror("Failed to fstatat");
+			break;
+		}
+
+		fd = atoi(de->d_name);
+
+		found = autofs_check_fd_stat(&buf, pgrp, fd, ino, &mode);
+		if (found < 0)
+			break;
+		if (found && (mode == O_RDONLY)) {
+			*read_fd = fd;
+			ret = 0;
+			break;
+		}
+	}
+
+	closedir(dir);
+	close_pid_proc();
+
+	return ret;
+}
+
+static int autofs_find_read_fd(int pgrp, long pipe_ino)
+{
+	int read_fd, fd;
+
+	/* We need to find read end and make sure, that it's empty */
+	if (autofs_find_pipe_read_end(pgrp, pipe_ino, &read_fd) < 0) {
+		pr_err("Failed to find read pipe fd (ino %ld) "
+			"in process %d\n", pipe_ino, pgrp);
+		return -1;
+	}
+
+	if (read_fd == -1) {
+		pr_err("Master %d doesn't have a read end of the pipe with "
+			"inode %ld opened\n", pgrp, pipe_ino);
+		pr_err("Abandoned mount or control was delegated to child?\n");
+		return -1;
+	}
+
+	/* Let's check, that read end is empty */
+	fd = open_proc(pgrp, "fd/%d", read_fd);
+	if (fd < 0)
+		return -1;
+
+	if (fd_has_data(fd)) {
+		pr_err("Process %d autofs pipe fd %d is not empty.\n", pgrp,
+				read_fd);
+		pr_err("Try again later.\n");
+		return -1;
+	}
+	close(fd);
+	return read_fd;
+}
+
+static int parse_options(char *options, int32_t *fd, int32_t *timeout,
+			 int32_t *minproto, int32_t *maxproto, int32_t *mode,
+			 int32_t *pgrp, int32_t *uid, int32_t *gid,
+			 long *pipe_ino)
+{
+	char **opts;
+	int nr_opts, i;
+
+	*fd = AUTOFS_OPT_UNKNOWN;
+	*timeout = AUTOFS_OPT_UNKNOWN;
+	*minproto = AUTOFS_OPT_UNKNOWN;
+	*maxproto = AUTOFS_OPT_UNKNOWN;
+	*mode = AUTOFS_OPT_UNKNOWN;
+	*pgrp = AUTOFS_OPT_UNKNOWN;
+	*uid = AUTOFS_OPT_UNKNOWN;
+	*gid = AUTOFS_OPT_UNKNOWN;
+	*pipe_ino = AUTOFS_OPT_UNKNOWN;
+
+	split(options, ',', &opts, &nr_opts);
+	if (!opts)
+		return -1;
+
+	for (i = 0; i < nr_opts; i++) {
+		char *opt = opts[i];
+
+		if (!strncmp(opt, "fd=", strlen("fd=")))
+			*fd = atoi(opt + strlen("fd="));
+		else if (!strncmp(opt, "pipe_ino=", strlen("pipe_ino=")))
+			*pipe_ino = atoi(opt + strlen("pipe_ino="));
+		else if (!strncmp(opt, "pgrp=", strlen("pgrp=")))
+			*pgrp = atoi(opt + strlen("pgrp="));
+		else if (!strncmp(opt, "timeout=", strlen("timeout=")))
+			*timeout = atoi(opt + strlen("timeout="));
+		else if (!strncmp(opt, "minproto=", strlen("minproto=")))
+			*minproto = atoi(opt + strlen("minproto="));
+		else if (!strncmp(opt, "maxproto=", strlen("maxproto=")))
+			*maxproto = atoi(opt + strlen("maxproto="));
+		else if (!strcmp(opt, "indirect"))
+			*mode = AUTOFS_MODE_INDIRECT;
+		else if (!strcmp(opt, "offset"))
+			*mode = AUTOFS_MODE_OFFSET;
+		else if (!strcmp(opt, "direct"))
+			*mode = AUTOFS_MODE_DIRECT;
+		else if (!strncmp(opt, "uid=", strlen("uid=")))
+			*uid = atoi(opt + strlen("uid="));
+		else if (!strncmp(opt, "gid=", strlen("gid=")))
+			*gid = atoi(opt + strlen("gid="));
+	}
+
+	for (i = 0; i < nr_opts; i++)
+		xfree(opts[i]);
+	xfree(opts);
+
+	if (*fd == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find fd option\n");
+		return -1;
+	}
+	if (*pgrp == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find pgrp option\n");
+		return -1;
+	}
+	if (*timeout == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find timeout option\n");
+		return -1;
+	}
+	if (*minproto == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find minproto option\n");
+		return -1;
+	}
+	if (*maxproto == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find maxproto option\n");
+		return -1;
+	}
+	if (*mode == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find mode (direct,indirect,offset) option\n");
+		return -1;
+	}
+	if (*pipe_ino == AUTOFS_OPT_UNKNOWN) {
+		pr_err("Failed to find pipe_ino option (old kernel?)\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int autofs_parse(struct mount_info *pm, bool for_dump)
+{
+	AutofsEntry *entry;
+	long pipe_ino;
+
+	if (!for_dump)
+		return 0;
+
+	entry = xmalloc(sizeof(*entry));
+	if (!entry)
+		return -1;
+	autofs_entry__init(entry);
+
+	if (parse_options(pm->options, &entry->fd, &entry->timeout,
+			  &entry->minproto, &entry->maxproto, &entry->mode,
+			  &entry->pgrp, &entry->uid, &entry->gid, &pipe_ino))
+		return -1;
+
+	if (entry->uid != AUTOFS_OPT_UNKNOWN)
+		entry->has_uid = true;
+	if (entry->gid != AUTOFS_OPT_UNKNOWN)
+		entry->has_gid = true;
+
+	if (entry->fd != AUTOFS_CATATONIC_FD) {
+		int found, read_fd;
+
+		read_fd = autofs_find_read_fd(entry->pgrp, pipe_ino);
+		if (read_fd < 0)
+			return -1;
+
+		/* Let' check whether write end is still open */
+		found = autofs_kernel_pipe_alive(entry->pgrp, entry->fd, pipe_ino);
+		if (found < 0) {
+			pr_err("Failed to check fd %d in process %d\n",
+					entry->fd, entry->pgrp);
+			return -1;
+		}
+		/* Write end is absent. we need to carry read end to restore. */
+		if (!found) {
+			entry->has_read_fd = true;
+			entry->read_fd = read_fd;
+		}
+	}
+
+	pm->autofs = entry;
+
+	return 0;
+}
diff --git a/include/autofs.h b/include/autofs.h
index b30d915..9770835 100644
--- a/include/autofs.h
+++ b/include/autofs.h
@@ -5,4 +5,7 @@
 #define AUTOFS_MINOR	235
 #endif
 
+struct mount_info;
+int autofs_parse(struct mount_info *pm, bool for_dump);
+
 #endif
diff --git a/mount.c b/mount.c
index 645c4b4..69c69e1 100644
--- a/mount.c
+++ b/mount.c
@@ -27,6 +27,7 @@
 #include "kerndat.h"
 #include "fs-magic.h"
 #include "sysfs_parse.h"
+#include "autofs.h"
 
 #include "protobuf/mnt.pb-c.h"
 #include "protobuf/binfmt-misc.pb-c.h"
@@ -1694,6 +1695,11 @@ static struct fstype fstypes[32] = {
 		.name = "overlay",
 		.code = FSTYPE__OVERLAYFS,
 		.parse = overlayfs_parse,
+	}, {
+		.name = "autofs",
+		.code = FSTYPE__AUTOFS,
+		.parse = autofs_parse,
+		.restore = always_fail,
 	},
 };
 



More information about the CRIU mailing list