[CRIU] [PATCH v3 2/2] restore: correctly restore cgroup mounts inside a container

Pavel Emelyanov xemul at virtuozzo.com
Mon Mar 28 03:42:55 PDT 2016


On 03/25/2016 10:26 PM, Tycho Andersen wrote:
> Before the nsroot= mount option, we were just getting lucky because the
> cgroup superblocks "matched" when inspecting them from userspace, so we
> were actually getting a bind mount from the host when migrating from within
> cgroup namespaces.
> 
> Instead, let's actually do a new (i.e. not a bind mount) for cgroup
> namespaces. For this, we need two things:
> 
> 1. to prepare the cgroup namespace (and thus the cgroups) before the mount
>    ns, so when the mount() occurrs it is relative to the right cgroup path.
> 
> 2. not reject cgroup filesystems with no root. A cgroup ns mount looks
>    like:
> 
> 	 223 222 0:22 /lxc/unpriv /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd,nsroot=/lxc/unpriv
> 
>    i.e. it has /lxc/unpriv as its root, and thus doesn't look rooted to CRIU.
>    We introduce the fstype->munge hook to rewrite this root to /, since it
>    is handled by the cgroup ns infrastructure.
> 
> v2: add new fstype->munge hook, allowing fstypes to munge their parsed
>     mountinfo entries if they want to. this allows us to get rid of the
>     ugly hacks with FSTYPE__CGROUP everywhere in teh patch.
> 
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  criu/cr-restore.c         | 18 +++++++++---------
>  criu/include/proc_parse.h |  1 +
>  criu/mount.c              | 14 ++++++++++++++
>  criu/proc_parse.c         |  3 +++
>  4 files changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index ffd2f01..967d5fa 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -1608,6 +1608,15 @@ static int restore_task_with_children(void *_arg)
>  			goto err;
>  	}
>  
> +	/*
> +	 * Call this _before_ forking to optimize cgroups
> +	 * restore -- if all tasks live in one set of cgroups
> +	 * we will only move the root one there, others will
> +	 * just have it inherited.
> +	 */
> +	if (prepare_task_cgroup(current) < 0)
> +		goto err;
> +
>  	/* Restore root task */
>  	if (current->parent == NULL) {
>  		if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
> @@ -1640,15 +1649,6 @@ static int restore_task_with_children(void *_arg)
>  	if (prepare_mappings())
>  		goto err;
>  
> -	/*
> -	 * Call this _before_ forking to optimize cgroups
> -	 * restore -- if all tasks live in one set of cgroups
> -	 * we will only move the root one there, others will
> -	 * just have it inherited.
> -	 */
> -	if (prepare_task_cgroup(current) < 0)
> -		goto err;
> -
>  	if (prepare_sigactions() < 0)
>  		goto err;
>  
> diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h
> index 5de5c86..b9b44aa 100644
> --- a/criu/include/proc_parse.h
> +++ b/criu/include/proc_parse.h
> @@ -114,6 +114,7 @@ struct fstype {
>  	int (*dump)(struct mount_info *pm);
>  	int (*restore)(struct mount_info *pm);
>  	int (*parse)(struct mount_info *pm);
> +	int (*munge)(struct mount_info *pm);
>  	mount_fn_t mount;
>  };
>  
> diff --git a/criu/mount.c b/criu/mount.c
> index eb8d058..427ccc1 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -1631,6 +1631,19 @@ out:
>  	return ret;
>  }
>  
> +static int cgroup_munge(struct mount_info *pm)
> +{
> +	if (!(root_ns_mask & CLONE_NEWCGROUP))
> +		return 0;
> +
> +	xfree(pm->root);
> +	pm->root = xstrdup("/");
> +	if (!pm->root)
> +		return -1;
> +
> +	return 0;
> +}
> +
>  static int dump_empty_fs(struct mount_info *pm)
>  {
>  	int fd, ret = -1;
> @@ -1716,6 +1729,7 @@ static struct fstype fstypes[32] = {
>  	}, {
>  		.name = "cgroup",
>  		.code = FSTYPE__CGROUP,
> +		.munge = cgroup_munge,
>  	}, {
>  		.name = "aufs",
>  		.code = FSTYPE__AUFS,
> diff --git a/criu/proc_parse.c b/criu/proc_parse.c
> index 24a9154..d39bd96 100644
> --- a/criu/proc_parse.c
> +++ b/criu/proc_parse.c
> @@ -1295,6 +1295,9 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
>  	if (parse_sb_opt(opt, &new->sb_flags, new->options))
>  		goto err;
>  
> +	if (new->fstype->munge && new->fstype->munge(new) < 0)
> +		goto err;
> +

Hm... Few lines below there's already a call to ->parse() callback :)

>  	ret = 0;
>  ret:
>  	xfree(opt);
> 



More information about the CRIU mailing list