[CRIU] [RFC PATCH 4/5] autofs: restore procedure introduced
Pavel Emelyanov
xemul at parallels.com
Mon Nov 23 11:54:12 PST 2015
On 11/23/2015 08:22 PM, Stanislav Kinsburskiy wrote:
> To restore AutoFS we will need a bunch of tricks:
>
> 1) Autofs have to be mounted by a specialized routine, because we need to
> create a context (see below), remove "timeout=" from mount options and set
> proper "pgrp=" and "fd=" options. Also we need to set mount point as catatonic,
> if it was so, and adjust timeout. Which is done via mount point ioctls.
> To isolate all these from generic code, "mount" callback in fstype operations
> was introduced.
>
> 2) When all mounts restored, we need replace AutoFS kernel pipe end with the
> right one (belonging to the autmount/systemd/etc process). This is performed
> via "post_open" callback for pipes. If process belongs to one of automount
> contexts, and pipe fd is the right one, then AutoFS mount is set ctatonic and
> it's pgrp and fd are changes to right ones via /dev/autofs ioctl.
>
> Note: This patch contains header file auto_dev-ioctl.h, which is used to
> easy ioctl calls.
Is this header exported to the user-space somehow?
> Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
> ---
> auto_dev-ioctl.h | 229 +++++++++++++++++++++++++++++++++++++++
> include/proc_parse.h | 3 +
> mount.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++++--
> pipes.c | 18 +++
> 4 files changed, 527 insertions(+), 14 deletions(-)
> create mode 100644 auto_dev-ioctl.h
>
> diff --git a/auto_dev-ioctl.h b/auto_dev-ioctl.h
> new file mode 100644
> index 0000000..850f39b
> --- /dev/null
> +++ b/auto_dev-ioctl.h
> diff --git a/include/proc_parse.h b/include/proc_parse.h
...
> index 123f5db..80fda92 100644
> --- a/include/proc_parse.h
> +++ b/include/proc_parse.h
> @@ -106,6 +106,9 @@ struct fstype {
> int (*dump)(struct mount_info *pm);
> int (*restore)(struct mount_info *pm);
> int (*parse)(struct mount_info *pm);
> + int (*mount)(const char *source, const char *target,
> + const char *filesystemtype, unsigned long mountflags,
> + const void *data);
Introducing .mount callback should go with separate patch.
> };
>
> struct vm_area_list;
> diff --git a/mount.c b/mount.c
> index c07d2d1..7a391ca 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -30,12 +30,18 @@
>
> #include "protobuf/mnt.pb-c.h"
>
> +#include "auto_dev-ioctl.h"
> +
> #define AUTODETECTED_MOUNT "CRIU:AUTOGENERATED"
> #define MS_PROPAGATE (MS_SHARED | MS_PRIVATE | MS_UNBINDABLE | MS_SLAVE)
>
> #undef LOG_PREFIX
> #define LOG_PREFIX "mnt: "
>
> +typedef int (*mount_fn_t)(const char *source, const char *target,
> + const char *filesystemtype, unsigned long mountflags,
> + const void *data);
> +
> int ext_mount_add(char *key, char *val)
> {
> struct ext_mount *em;
> @@ -1130,26 +1136,30 @@ out:
> return -1;
> }
>
> -static int attach_option(struct mount_info *pm, char *opt)
> +static char *attach_token(char *string, const char *token, char separator)
> {
> char *buf;
> int len, olen;
>
> - len = strlen(pm->options);
> - olen = strlen(opt);
> - buf = xrealloc(pm->options, len + olen + 2);
> + len = strlen(string);
> + olen = strlen(token);
> + buf = xrealloc(string, len + olen + 2);
> if (buf == NULL)
> - return -1;
> + return NULL;
>
> - if (len && buf[len - 1] != ',') {
> - buf[len] = ',';
> + if (len && buf[len - 1] != separator) {
> + buf[len] = separator;
> len++;
> }
>
> - memcpy(buf + len, opt, olen + 1);
> - pm->options = buf;
> + memcpy(buf + len, token, olen + 1);
> + return buf;
> +}
The attach_token() should come with separate patch.
> - return 0;
> +static int attach_option(struct mount_info *pm, char *opt)
> +{
> + pm->options = attach_token(pm->options, opt, ',');
> + return (pm->options) ? 0 : -1;
> }
>
> /* Is it mounted w or w/o the newinstance option */
> @@ -1405,8 +1415,6 @@ static int autofs_fixup_options(struct mount_info *new)
> for (i = 0; i < nr_opts; i++) {
> char *opt = options[i];
>
> - if (!strncmp(opt, "timeout=", strlen("timeout=")))
> - continue;
> if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
> int real_pid, virt_pid = -1;
> struct pstree_item *item;
> @@ -1460,6 +1468,7 @@ static int autofs_dump(struct mount_info *pm)
> int pipe_fd, pipe_size, steal_pipe[2];
> ssize_t bytes;
> int err;
> + struct mount_info *t;
>
> /* TODO: we are called wthin proper pid namespace. We can't use blobal
> * proc and have to mount proc somewhere */
> @@ -1512,6 +1521,13 @@ static int autofs_dump(struct mount_info *pm)
> if (err)
> goto close_steal;
>
> + list_for_each_entry(t, &pm->mnt_bind, mnt_bind) {
> + xfree(t->options);
> + t->options = xstrdup(pm->options);
> + if (!t->options)
> + goto close_steal;
> + }
> +
Runaway from patch #2?
> err = 0;
>
> close_steal:
> @@ -1522,6 +1538,252 @@ close_pipe:
> return err;
> }
>
> +struct autofs_info_s {
> + int pipe_fd;
> + const char *mnt_path;
> + struct autofs_info_s *next;
> +};
> +
> +struct autofs_service_s {
> + int pgrp;
> + struct autofs_info_s *mnt_info;
> + struct autofs_service_s *next;
> +} *automount;
> +
> +int autofs_fixup_owner(struct autofs_info_s *info)
> +{
> + int autofs_fd, mnt_fd, err = -1;
> + struct autofs_dev_ioctl param;
> +
> + autofs_fd = open("/dev/autofs", O_RDONLY);
> + if (autofs_fd == -1) {
> + pr_perror("failed to open /dev/autofs");
> + return -1;
> + }
> +
> + mnt_fd = open(info->mnt_path, O_RDONLY | O_DIRECTORY);
> + if (mnt_fd == -1) {
> + pr_perror("failed to open %s", info->mnt_path);
> + goto close_fd;
> + }
> +
> + init_autofs_dev_ioctl(¶m);
> + err = ioctl(mnt_fd, AUTOFS_IOC_CATATONIC, 0);
> + if (err == -1) {
> + pr_perror("failed to set mount point catatonic");
> + goto close_mnt_fd;
> +
> + }
> +
> + init_autofs_dev_ioctl(¶m);
> + param.ioctlfd = mnt_fd;
> + param.setpipefd.pipefd = info->pipe_fd;
The info->pipe_fd is set by autofs_mount() and the fd value is purely
parsed from options. Who and when creates this very fd?
> +
> + err = ioctl(autofs_fd, AUTOFS_DEV_IOCTL_SETPIPEFD, ¶m);
> + if (err == -1) {
> + pr_perror("failed to send ioctl to /dev/autofs");
> + goto close_mnt_fd;
> +
> + }
> +
> + pr_info("autofs mount \"%s\" owner restored: pgrp=%d, fd=%d\n",
> + info->mnt_path, getpid(), mnt_fd);
> +
> + err = 0;
> +
> +close_fd:
> + close(autofs_fd);
> +close_mnt_fd:
> + close(mnt_fd);
> + return err;
> +}
> +
...
> +
> +static int autofs_mount(const char *source, const char *target,
> + const char *filesystemtype, unsigned long mountflags,
> + const void *data)
> +{
> + char *mount_ops, **options;
> + int nr_opts, i, err = -1;
> + int pgrp = 0, fd = INT_MIN, timeout = 0;
> + int automount_pipe[2];
> +
> + mount_ops = xstrdup(data);
> + if (!mount_ops)
> + return -1;
> +
> + split(mount_ops, ',', &options, &nr_opts);
> + if (!options)
> + return -1;
> +
> + memset(mount_ops, 0, strlen(mount_ops));
> +
> + if (pipe(automount_pipe) < 0) {
> + pr_perror("Can't create pipe");
> + return -1;
> + }
> +
> + sprintf(mount_ops, "fd=%d,pgrp=%d", automount_pipe[1], getpgrp());
> +
> + for (i = 0; i < nr_opts; i++) {
> + char *opt = options[i];
> +
> + if (!strncmp(opt, "timeout=", strlen("timeout="))) {
> + timeout = atoi(opt + strlen("timeout="));
> + continue;
> + }
> +
> + if (!strncmp(opt, "pgrp=", strlen("pgrp="))) {
> + pgrp = atoi(opt + strlen("pgrp="));
> + continue;
> + }
> +
> + if (!strncmp(opt, "fd=", strlen("fd="))) {
> + fd = atoi(opt + strlen("fd="));
> + continue;
> + }
> +
> + strcat(mount_ops, ",");
> + strcat(mount_ops, opt);
> + }
> +
> + if (fd == INT_MIN) {
> + pr_err("failed to find \"fd=\" option\n");
> + goto close_pipe;
> + }
> +
> + if (pgrp == 0) {
> + pr_err("failed to find \"pgrp=\" option\n");
> + goto close_pipe;
> + }
> +
> + pr_info("autofs mount: target: %s, pgrp: %d, fd: %d\n", target, pgrp, fd);
> + pr_info("autofs mount: adjusted options: \"%s\"\n", mount_ops);
> +
> + if (mount(source, target, filesystemtype, mountflags, mount_ops) < 0) {
> + pr_perror("Can't mount at %s", target);
> + goto close_pipe;
> + }
> +
> + if (autofs_set_timeout(target, timeout))
> + goto close_pipe;
> +
> + if (fd == -1)
> + err = autofs_make_catatonic(target);
> + else
> + err = autofs_add_info(pgrp, fd, target);
You will turn this into catatonic mode in autofs_fixup_owner() anyway,
why not put here it into this mode always and fixup (if required) in
autofs_fixup_owner()?
> +
> +close_pipe:
> + close(automount_pipe[1]);
> + close(automount_pipe[0]);
> + return err;
> +}
> +
> static struct fstype fstypes[32] = {
> {
> .name = "unsupported",
> @@ -1594,7 +1856,7 @@ static struct fstype fstypes[32] = {
> .name = "autofs",
> .code = FSTYPE__AUTOFS,
> .dump = autofs_dump,
> - .restore = always_fail,
> + .mount = autofs_mount,
> },
> };
>
> @@ -2096,6 +2358,7 @@ static int do_new_mount(struct mount_info *mi)
> char *src;
> struct fstype *tp = mi->fstype;
> bool remount_ro = (tp->restore && mi->sb_flags & MS_RDONLY);
> + mount_fn_t do_mount = (tp->mount) ? tp->mount: mount;
>
> src = resolve_source(mi);
> if (!src)
> @@ -2110,7 +2373,7 @@ static int do_new_mount(struct mount_info *mi)
> if (remount_ro)
> sflags &= ~MS_RDONLY;
>
> - if (mount(src, mi->mountpoint, tp->name, sflags, mi->options) < 0) {
> + if (do_mount(src, mi->mountpoint, tp->name, sflags, mi->options) < 0) {
> pr_perror("Can't mount at %s", mi->mountpoint);
> return -1;
> }
> diff --git a/pipes.c b/pipes.c
> index d4f70db..bda1c1d 100644
> --- a/pipes.c
> +++ b/pipes.c
> @@ -380,11 +380,29 @@ static int want_transport(FdinfoEntry *fe, struct file_desc *d)
> return !pi->create;
> }
>
> +struct autofs_info_s;
> +struct autofs_info_s *find_automount_info(int pgrp, int pipe_fd);
> +int autofs_fixup_owner(struct autofs_info_s *info);
> +
> +static int pipe_post_open(struct file_desc *d, int fd)
> +{
> + struct autofs_info_s *info;
> +
> + pr_info("%s: searching info by pid %d and fd %d\n", __func__, getpid(), fd);
> + info = find_automount_info(getpid(), fd);
The find_automount_info() is used only here. Why not merge this call into
autofs_fixup_owner()?
> + if (!info)
> + return 0;
> +
> + pr_info("%s: found info by pid %d and fd %d\n", __func__, getpid(), fd);
> + return autofs_fixup_owner(info);
> +}
> +
> static struct file_desc_ops pipe_desc_ops = {
> .type = FD_TYPES__PIPE,
> .open = open_pipe,
> .want_transport = want_transport,
> .name = pipe_d_name,
> + .post_open = pipe_post_open,
This is way too slow to lookup autofs info for every single pipe in a system.
I don't have any exact solution ATM, but doing it vice-versa, i.e. -- finding
a pipe when collecting/creating autofs mountpoint and marking it with a bit,
saying this should be done, is something I'd think about.
> };
>
> static int collect_one_pipe(void *o, ProtobufCMessage *base)
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
> .
>
More information about the CRIU
mailing list