[CRIU] [PATCH v3 2/2] restore: correctly restore cgroup mounts inside a container
Pavel Emelyanov
xemul at virtuozzo.com
Mon Mar 28 03:42:55 PDT 2016
On 03/25/2016 10:26 PM, Tycho Andersen wrote:
> Before the nsroot= mount option, we were just getting lucky because the
> cgroup superblocks "matched" when inspecting them from userspace, so we
> were actually getting a bind mount from the host when migrating from within
> cgroup namespaces.
>
> Instead, let's actually do a new (i.e. not a bind mount) for cgroup
> namespaces. For this, we need two things:
>
> 1. to prepare the cgroup namespace (and thus the cgroups) before the mount
> ns, so when the mount() occurrs it is relative to the right cgroup path.
>
> 2. not reject cgroup filesystems with no root. A cgroup ns mount looks
> like:
>
> 223 222 0:22 /lxc/unpriv /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd,nsroot=/lxc/unpriv
>
> i.e. it has /lxc/unpriv as its root, and thus doesn't look rooted to CRIU.
> We introduce the fstype->munge hook to rewrite this root to /, since it
> is handled by the cgroup ns infrastructure.
>
> v2: add new fstype->munge hook, allowing fstypes to munge their parsed
> mountinfo entries if they want to. this allows us to get rid of the
> ugly hacks with FSTYPE__CGROUP everywhere in teh patch.
>
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
> criu/cr-restore.c | 18 +++++++++---------
> criu/include/proc_parse.h | 1 +
> criu/mount.c | 14 ++++++++++++++
> criu/proc_parse.c | 3 +++
> 4 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index ffd2f01..967d5fa 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -1608,6 +1608,15 @@ static int restore_task_with_children(void *_arg)
> goto err;
> }
>
> + /*
> + * Call this _before_ forking to optimize cgroups
> + * restore -- if all tasks live in one set of cgroups
> + * we will only move the root one there, others will
> + * just have it inherited.
> + */
> + if (prepare_task_cgroup(current) < 0)
> + goto err;
> +
> /* Restore root task */
> if (current->parent == NULL) {
> if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
> @@ -1640,15 +1649,6 @@ static int restore_task_with_children(void *_arg)
> if (prepare_mappings())
> goto err;
>
> - /*
> - * Call this _before_ forking to optimize cgroups
> - * restore -- if all tasks live in one set of cgroups
> - * we will only move the root one there, others will
> - * just have it inherited.
> - */
> - if (prepare_task_cgroup(current) < 0)
> - goto err;
> -
> if (prepare_sigactions() < 0)
> goto err;
>
> diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h
> index 5de5c86..b9b44aa 100644
> --- a/criu/include/proc_parse.h
> +++ b/criu/include/proc_parse.h
> @@ -114,6 +114,7 @@ struct fstype {
> int (*dump)(struct mount_info *pm);
> int (*restore)(struct mount_info *pm);
> int (*parse)(struct mount_info *pm);
> + int (*munge)(struct mount_info *pm);
> mount_fn_t mount;
> };
>
> diff --git a/criu/mount.c b/criu/mount.c
> index eb8d058..427ccc1 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -1631,6 +1631,19 @@ out:
> return ret;
> }
>
> +static int cgroup_munge(struct mount_info *pm)
> +{
> + if (!(root_ns_mask & CLONE_NEWCGROUP))
> + return 0;
> +
> + xfree(pm->root);
> + pm->root = xstrdup("/");
> + if (!pm->root)
> + return -1;
> +
> + return 0;
> +}
> +
> static int dump_empty_fs(struct mount_info *pm)
> {
> int fd, ret = -1;
> @@ -1716,6 +1729,7 @@ static struct fstype fstypes[32] = {
> }, {
> .name = "cgroup",
> .code = FSTYPE__CGROUP,
> + .munge = cgroup_munge,
> }, {
> .name = "aufs",
> .code = FSTYPE__AUFS,
> diff --git a/criu/proc_parse.c b/criu/proc_parse.c
> index 24a9154..d39bd96 100644
> --- a/criu/proc_parse.c
> +++ b/criu/proc_parse.c
> @@ -1295,6 +1295,9 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
> if (parse_sb_opt(opt, &new->sb_flags, new->options))
> goto err;
>
> + if (new->fstype->munge && new->fstype->munge(new) < 0)
> + goto err;
> +
Hm... Few lines below there's already a call to ->parse() callback :)
> ret = 0;
> ret:
> xfree(opt);
>
More information about the CRIU
mailing list