[CRIU] [PATCH v3 2/2] restore: correctly restore cgroup mounts inside a container

Tycho Andersen tycho.andersen at canonical.com
Fri Mar 25 12:26:26 PDT 2016


Before the nsroot= mount option, we were just getting lucky because the
cgroup superblocks "matched" when inspecting them from userspace, so we
were actually getting a bind mount from the host when migrating from within
cgroup namespaces.

Instead, let's actually do a new (i.e. not a bind mount) for cgroup
namespaces. For this, we need two things:

1. to prepare the cgroup namespace (and thus the cgroups) before the mount
   ns, so when the mount() occurrs it is relative to the right cgroup path.

2. not reject cgroup filesystems with no root. A cgroup ns mount looks
   like:

	 223 222 0:22 /lxc/unpriv /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd,nsroot=/lxc/unpriv

   i.e. it has /lxc/unpriv as its root, and thus doesn't look rooted to CRIU.
   We introduce the fstype->munge hook to rewrite this root to /, since it
   is handled by the cgroup ns infrastructure.

v2: add new fstype->munge hook, allowing fstypes to munge their parsed
    mountinfo entries if they want to. this allows us to get rid of the
    ugly hacks with FSTYPE__CGROUP everywhere in teh patch.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 criu/cr-restore.c         | 18 +++++++++---------
 criu/include/proc_parse.h |  1 +
 criu/mount.c              | 14 ++++++++++++++
 criu/proc_parse.c         |  3 +++
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index ffd2f01..967d5fa 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1608,6 +1608,15 @@ static int restore_task_with_children(void *_arg)
 			goto err;
 	}
 
+	/*
+	 * Call this _before_ forking to optimize cgroups
+	 * restore -- if all tasks live in one set of cgroups
+	 * we will only move the root one there, others will
+	 * just have it inherited.
+	 */
+	if (prepare_task_cgroup(current) < 0)
+		goto err;
+
 	/* Restore root task */
 	if (current->parent == NULL) {
 		if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
@@ -1640,15 +1649,6 @@ static int restore_task_with_children(void *_arg)
 	if (prepare_mappings())
 		goto err;
 
-	/*
-	 * Call this _before_ forking to optimize cgroups
-	 * restore -- if all tasks live in one set of cgroups
-	 * we will only move the root one there, others will
-	 * just have it inherited.
-	 */
-	if (prepare_task_cgroup(current) < 0)
-		goto err;
-
 	if (prepare_sigactions() < 0)
 		goto err;
 
diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h
index 5de5c86..b9b44aa 100644
--- a/criu/include/proc_parse.h
+++ b/criu/include/proc_parse.h
@@ -114,6 +114,7 @@ struct fstype {
 	int (*dump)(struct mount_info *pm);
 	int (*restore)(struct mount_info *pm);
 	int (*parse)(struct mount_info *pm);
+	int (*munge)(struct mount_info *pm);
 	mount_fn_t mount;
 };
 
diff --git a/criu/mount.c b/criu/mount.c
index eb8d058..427ccc1 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -1631,6 +1631,19 @@ out:
 	return ret;
 }
 
+static int cgroup_munge(struct mount_info *pm)
+{
+	if (!(root_ns_mask & CLONE_NEWCGROUP))
+		return 0;
+
+	xfree(pm->root);
+	pm->root = xstrdup("/");
+	if (!pm->root)
+		return -1;
+
+	return 0;
+}
+
 static int dump_empty_fs(struct mount_info *pm)
 {
 	int fd, ret = -1;
@@ -1716,6 +1729,7 @@ static struct fstype fstypes[32] = {
 	}, {
 		.name = "cgroup",
 		.code = FSTYPE__CGROUP,
+		.munge = cgroup_munge,
 	}, {
 		.name = "aufs",
 		.code = FSTYPE__AUFS,
diff --git a/criu/proc_parse.c b/criu/proc_parse.c
index 24a9154..d39bd96 100644
--- a/criu/proc_parse.c
+++ b/criu/proc_parse.c
@@ -1295,6 +1295,9 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
 	if (parse_sb_opt(opt, &new->sb_flags, new->options))
 		goto err;
 
+	if (new->fstype->munge && new->fstype->munge(new) < 0)
+		goto err;
+
 	ret = 0;
 ret:
 	xfree(opt);
-- 
2.7.3



More information about the CRIU mailing list