[CRIU] [PATCH] Attempt to restore cgroups

Tycho Andersen tycho.andersen at canonical.com
Wed Jul 9 12:23:24 PDT 2014


Hi Pavel,

On Wed, Jul 09, 2014 at 06:04:30PM +0400, Pavel Emelyanov wrote:
>
> Here we have some matching problem. I've applied the patch and run cgroup01 test.
> The result is:
> 
> Error (cgroup.c:382): cg: controller cpu,cpuacct not found
> 
> My cgroups are
> 
> # cat /proc/self/cgroup 
> ...
> 3:cpu,cpuacct:/
> ...
> 
> # cat /proc/cgroups 
> #subsys_name	hierarchy	num_cgroups	enabled
> ...
> cpu	3	1	1
> cpuacct	3	1	1
> ...
> 
> One of cg objects correctly has the cg->controllers[0] == "cpu" and
> cg->controllers[1] == "cpuacct", but the cc->name is "cpu,cpuacct"
> which doesn't match (with strcmp()) with them.

Ah, yes, my mistake. Looks like it requires a fix in two places, below
is the revised patch.

Tycho




During the dump phase, /proc/cgroups is parsed to find co-mounted cgroups.
Then, for each task /proc/self/cgroup is parsed for the cgroups that it is a
member of, and that cgroup is traversed to find any child cgroups which may
also need restoring. Any cgroups not currently mounted will be temporarily
mounted and traversed. All of this information is persisted along with the
original cg_sets, which indicate which cgroups a task is a member of.

On restore, an initial phase creates all the cgroups which were saved. Tasks
are then restored into these cgroups via cg_sets as usual.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 cgroup.c                         | 496 +++++++++++++++++++++++++++++++++++++--
 cr-dump.c                        |   3 +
 include/cgroup.h                 |  39 +++
 include/proc_parse.h             |   3 +
 include/util.h                   |  14 +-
 mount.c                          |   3 +
 proc_parse.c                     |  75 +++++-
 protobuf/cgroup.proto            |  14 +-
 protobuf/mnt.proto               |   1 +
 test/zdtm.sh                     |   2 +
 test/zdtm/live/static/Makefile   |   1 +
 test/zdtm/live/static/cgroup01.c | 111 +++++++++
 util.c                           |  77 ++++++
 13 files changed, 816 insertions(+), 23 deletions(-)
 create mode 100644 test/zdtm/live/static/cgroup01.c

diff --git a/cgroup.c b/cgroup.c
index 1fe5e6d..5114935 100644
--- a/cgroup.c
+++ b/cgroup.c
@@ -5,6 +5,9 @@
 #include <unistd.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
+#include <ftw.h>
+#include <libgen.h>
+#include "list.h"
 #include "xmalloc.h"
 #include "cgroup.h"
 #include "pstree.h"
@@ -18,7 +21,8 @@
 /*
  * This structure describes set of controller groups
  * a task lives in. The cg_ctl entries are stored in
- * the @ctls list sorted by the .name field.
+ * the @ctls list sorted by the .name field and then
+ * by the .path field.
  */
 
 struct cg_set {
@@ -31,11 +35,17 @@ struct cg_set {
 static LIST_HEAD(cg_sets);
 static unsigned int n_sets;
 static CgSetEntry **rst_sets;
+static unsigned int n_controllers;
+static CgControllerEntry **controllers;
 static char *cg_yard;
 static struct cg_set *root_cgset; /* Set root item lives in */
 static struct cg_set *criu_cgset; /* Set criu process lives in */
 static u32 cg_set_ids = 1;
 
+static LIST_HEAD(cgroups);
+static unsigned int n_cgroups;
+static struct mount_info *cg_mntinfo;
+
 static CgSetEntry *find_rst_set_by_id(u32 id)
 {
 	int i;
@@ -118,6 +128,310 @@ static struct cg_set *get_cg_set(struct list_head *ctls, unsigned int n_ctls)
 	return cs;
 }
 
+struct cg_controller *new_controller(const char *name, int heirarchy)
+{
+	struct cg_controller *nc = xmalloc(sizeof(*nc));
+	if (!nc)
+		return NULL;
+
+	nc->controllers = xmalloc(sizeof(char *));
+	if (!nc->controllers) {
+		xfree(nc);
+		return NULL;
+	}
+
+	nc->controllers[0] = xstrdup(name);
+	if (!nc->controllers[0]) {
+		xfree(nc->controllers);
+		xfree(nc);
+		return NULL;
+	}
+
+	nc->n_controllers = 1;
+	nc->heirarchy = heirarchy;
+
+	nc->n_heads = 0;
+	INIT_LIST_HEAD(&nc->heads);
+
+	return nc;
+}
+
+int parse_cg_info(void)
+{
+	if (parse_cgroups(&cgroups, &n_cgroups) < 0)
+		return -1;
+
+	cg_mntinfo = parse_mountinfo(getpid(), NULL);
+
+	if (!cg_mntinfo)
+		return -1;
+	return 0;
+}
+
+static int get_cgroup_mount_point(const char *controller, char *path)
+{
+	struct mount_info *m;
+	char name[1024];
+
+	for (m = cg_mntinfo; m != NULL; m = m->next) {
+		if (strcmp(m->fstype->name, "cgroup") == 0) {
+			char *start, *end;
+
+			start = strstr(m->options, "name=");
+			if (start) {
+				/* strlen("name=") == 5 */
+				start = start + 5;
+
+				end = strstr(start, ",");
+				if (end) {
+					strncpy(name, start, end - start);
+					name[end - start] = '\0';
+				} else
+					strcpy(name, start);
+			} else {
+				start = strrchr(m->mountpoint, '/');
+				if (!start) {
+					pr_err("bad path %s\n", m->mountpoint);
+					return -1;
+				}
+				strcpy(name, start+1);
+			}
+
+			if (strcmp(name, controller) == 0) {
+				/* skip the leading '.' in mountpoint */
+				strcpy(path, m->mountpoint + 1);
+				return 0;
+			}
+		}
+	}
+
+	return -1;
+}
+
+/* Check that co-mounted controllers from /proc/cgroups (e.g. cpu and cpuacct)
+ * are contained in a name from /proc/self/cgroup (e.g. cpu,cpuacct). */
+bool cgroup_contains(char **controllers, unsigned int n_controllers, char *name)
+{
+	unsigned int i;
+	bool all_match = true;
+	for (i = 0; i < n_controllers; i++) {
+		bool found = false;
+		const char *loc = name;
+		do {
+			loc = strstr(loc, controllers[i]);
+			if (loc) {
+				loc += strlen(controllers[i]);
+				switch (*loc) {
+				case '\0':
+				case ',':
+					found = true;
+					break;
+				}
+			}
+		} while (loc);
+		all_match &= found;
+	}
+
+	return all_match && n_controllers > 0;
+}
+
+/* This is for use in add_cgroup() as additional arguments for the ftw()
+ * callback */
+static struct cg_controller	*current_controller;
+
+#define EXACT_MATCH	0
+#define PARENT_MATCH	1
+#define NO_MATCH	2
+
+static int find_dir(const char *path, struct list_head *dirs, struct cgroup_dir **rdir)
+{
+	struct cgroup_dir *d;
+	list_for_each_entry(d, dirs, siblings) {
+		if (strcmp(d->path, path) == 0) {
+			*rdir = d;
+			return EXACT_MATCH;
+		}
+
+		if (strstartswith(path, d->path)) {
+			int ret = find_dir(path, &d->children, rdir);
+			if (ret == NO_MATCH) {
+				*rdir = d;
+				return PARENT_MATCH;
+			}
+			return ret;
+
+		}
+	}
+
+	return NO_MATCH;
+}
+
+static int add_cgroup(const char *fpath, const struct stat *sb, int typeflag)
+{
+	struct cgroup_dir *ncd = NULL, *match;
+	int ret = 0;
+	char pbuf[PATH_MAX];
+
+	if (typeflag == FTW_D) {
+		int mtype;
+		struct mount_info *mi;
+
+		strncpy(pbuf, fpath, PATH_MAX);
+
+		pr_info("adding cgroup %s\n", fpath);
+
+		ncd = xmalloc(sizeof(*ncd));
+		if (!ncd) {
+			ret = -1;
+			goto out;
+		}
+		ncd->path = NULL;
+
+		for (mi = cg_mntinfo; mi != NULL; mi = mi->next) {
+			if (is_path_prefix(fpath, mi->mountpoint + 1)) {
+				ncd->path = xstrdup(fpath + strlen(mi->mountpoint));
+				if (!ncd->path) {
+					ret = -1;
+					goto out;
+				}
+				break;
+			}
+		}
+
+		if (!ncd->path) {
+			/* We couldn't find fpath in mountinfo, which means we
+			 * mounted it ourselves, so we just chop off the first
+			 * strlen(".criu.cgmounts.XXXXXX").
+			 */
+			ncd->path = xstrdup(fpath + 21);
+			if (!ncd->path) {
+				ret = -1;
+				goto out;
+			}
+		}
+
+		mtype = find_dir(ncd->path, &current_controller->heads, &match);
+
+		switch (mtype) {
+		/* ignore co-mounted cgroups */
+		case EXACT_MATCH:
+			goto out;
+		case PARENT_MATCH:
+			list_add_tail(&ncd->siblings, &match->children);
+			match->n_children++;
+			break;
+		case NO_MATCH:
+			list_add_tail(&ncd->siblings, &current_controller->heads);
+			current_controller->n_heads++;
+			break;
+		}
+
+		INIT_LIST_HEAD(&ncd->children);
+		ncd->n_children = 0;
+		ncd->controller = current_controller;
+		return 0;
+	}
+
+out:
+	if (ncd) {
+		if (ncd->path)
+			xfree(ncd->path);
+		xfree(ncd);
+	}
+
+	return ret;
+}
+
+static int collect_cgroups(struct list_head *ctls)
+{
+	struct cg_ctl *cc;
+	int ret = 0;
+
+	list_for_each_entry(cc, ctls, l) {
+		char path[PATH_MAX];
+		char *name, mount_point[PATH_MAX], prefix[] = ".criu.cgmounts.XXXXXX";
+		bool temp_mount = false;
+		struct cg_controller *cg;
+
+		if (strstartswith(cc->name, "name="))
+			name = cc->name + 5;
+		else
+			name = cc->name;
+
+		if (get_cgroup_mount_point(name, mount_point) < 0) {
+			/* Someone is trying to dump a process that is in
+			 * a controller that isn't mounted, so we mount it for
+			 * them.
+			 */
+			char opts[1024];
+			temp_mount = true;
+
+			if (mkdtemp(prefix) == NULL) {
+				pr_perror("can't make dir for cg mounts\n");
+				return -1;
+			}
+
+			if (name == cc->name)
+				sprintf(opts, "%s", name);
+			else
+				sprintf(opts, "none,%s", cc->name);
+
+			if (mount("none", prefix, "cgroup", 0, opts) < 0) {
+				pr_perror("couldn't mount %s\n", opts);
+				rmdir(prefix);
+				return -1;
+			}
+
+			strcpy(mount_point, prefix);
+		}
+
+		snprintf(path, PATH_MAX, "%s/%s", mount_point, cc->path);
+
+		current_controller = NULL;
+
+		/* We should get all the "real" (i.e. not name=systemd type)
+		 * controller from parse_cgroups(), so find that controller if
+		 * it exists. */
+		list_for_each_entry(cg, &cgroups, l) {
+			if (cgroup_contains(cg->controllers, cg->n_controllers, cc->name)) {
+				current_controller = cg;
+				break;
+			}
+		}
+
+		if (!current_controller) {
+			/* only allow "fake" controllers to be created this way */
+			if (!strstartswith(cc->name, "name=")) {
+				pr_err("controller %s not found\n", cc->name);
+				ret = -1;
+				goto out;
+			} else {
+				struct cg_controller *nc = new_controller(cc->name, -1);
+				list_add_tail(&nc->l, &cg->l);
+				n_cgroups++;
+				current_controller = nc;
+			}
+		}
+
+		ret = ftw(path, add_cgroup, 4);
+		if (ret < 0) {
+			pr_perror("failed walking %s for empty cgroups\n", path);
+			goto out;
+		}
+
+out:
+		if (temp_mount) {
+			umount(prefix);
+			rmdir(prefix);
+		}
+
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
 {
 	int pid;
@@ -134,6 +448,9 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
 	if (parse_task_cgroup(pid, &ctls, &n_ctls))
 		return -1;
 
+	if (item == root_item && collect_cgroups(&ctls) < 0)
+		return -1;
+
 	cs = get_cg_set(&ctls, n_ctls);
 	if (!cs)
 		return -1;
@@ -152,6 +469,70 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
 	return 0;
 }
 
+static int dump_cg_dirs(struct list_head *dirs, size_t n_dirs, CgroupDirEntry ***ents)
+{
+	struct cgroup_dir *cur;
+	CgroupDirEntry *cde;
+	void *m;
+	int i = 0;
+
+	m = xmalloc(n_dirs * (sizeof(CgroupDirEntry *) + sizeof(CgroupDirEntry)));
+	*ents = m;
+	if (!m)
+		return -1;
+
+	cde = m + n_dirs * sizeof(CgroupDirEntry *);
+
+	list_for_each_entry(cur, dirs, siblings) {
+		cgroup_dir_entry__init(cde);
+
+		cde->path = cur->path;
+
+		cde->n_children = cur->n_children;
+		if (cur->n_children > 0)
+			if (dump_cg_dirs(&cur->children, cur->n_children, &cde->children) < 0) {
+				xfree(*ents);
+				return -1;
+			}
+		(*ents)[i++] = cde++;
+	}
+
+	return 0;
+}
+
+static int dump_controllers(CgroupEntry *cg)
+{
+	struct cg_controller *cur;
+	CgControllerEntry *ce;
+	void *m;
+	int i;
+
+	cg->n_controllers = n_cgroups;
+	m = xmalloc(n_cgroups * (sizeof(CgControllerEntry *) + sizeof(CgControllerEntry)));
+	cg->controllers = m;
+	ce = m + cg->n_controllers * sizeof(CgControllerEntry *);
+	if (!m)
+		return -1;
+
+	i = 0;
+	list_for_each_entry(cur, &cgroups, l) {
+		cg_controller_entry__init(ce);
+
+		ce->controllers = cur->controllers;
+		ce->n_controllers = cur->n_controllers;
+		ce->n_dirs = cur->n_heads;
+		if (ce->n_dirs > 0)
+			if (dump_cg_dirs(&cur->heads, cur->n_heads, &ce->dirs) < 0) {
+				xfree(cg->controllers);
+				return -1;
+			}
+		cg->controllers[i++] = ce++;
+	}
+
+	return 0;
+}
+
+
 static int dump_sets(CgroupEntry *cg)
 {
 	struct cg_set *set;
@@ -242,6 +623,8 @@ int dump_cgroups(void)
 
 	if (dump_sets(&cg))
 		return -1;
+	if (dump_controllers(&cg))
+		return -1;
 
 	pr_info("Writing CG image\n");
 	return pb_write_one(fdset_fd(glob_fdset, CR_FD_CGROUP), &cg, PB_CGROUP);
@@ -255,13 +638,38 @@ static int move_in_cgroup(CgSetEntry *se)
 	cg = get_service_fd(CGROUP_YARD);
 	for (i = 0; i < se->n_ctls; i++) {
 		char aux[1024];
-		int fd, err;
+		int fd, err, j, aux_off;
 		CgMemberEntry *ce = se->ctls[i];
+		CgControllerEntry *ctrl = NULL;
+
+		for (j = 0; j < n_controllers; j++) {
+			CgControllerEntry *cur = controllers[j];
+			if (cgroup_contains(cur->controllers, cur->n_controllers, ce->name)) {
+				ctrl = cur;
+				break;
+			}
+		}
+
+		if (!ctrl) {
+			pr_err("No cg_controller_entry found for %s/%s\n", ce->name, ce->path);
+			return -1;
+		}
+
+		aux_off = 0;
+		for (j = 0; j < ctrl->n_controllers; j++) {
+			char *name;
+			if (strstartswith(ce->name, "name="))
+				name = ctrl->controllers[j] + 5;
+			else
+				name = ctrl->controllers[j];
+			aux_off += sprintf(aux + aux_off, "%s,", name);
+		}
+
+		/* Chop off the last ','. */
+		aux_off -= 1;
+
+		sprintf(aux + aux_off, "/%s/tasks", ce->path);
 
-		if (strstartswith(ce->name, "name="))
-			sprintf(aux, "%s/%s/tasks", ce->name + 5, ce->path);
-		else
-			sprintf(aux, "%s/%s/tasks", ce->name, ce->path);
 		pr_debug("  `-> %s\n", aux);
 		err = fd = openat(cg, aux, O_WRONLY);
 		if (fd >= 0) {
@@ -323,6 +731,27 @@ void fini_cgroup(void)
 	xfree(cg_yard);
 }
 
+static int prepare_cgroup_dirs(char *paux, size_t off, CgroupDirEntry **ents, size_t n_ents)
+{
+	size_t i;
+	CgroupDirEntry *e;
+
+	for (i = 0; i < n_ents; i++) {
+		e = ents[i];
+
+		sprintf(paux + off, "/%s", e->path);
+
+		if (mkdirp(paux)) {
+			pr_perror("Can't make cgroup dir %s", paux);
+			return -1;
+		}
+
+		prepare_cgroup_dirs(paux, off, e->children, e->n_children);
+	}
+
+	return 0;
+}
+
 /*
  * Prepare the CGROUP_YARD service descriptor. This guy is
  * tmpfs mount with the set of ctl->name directories each
@@ -341,10 +770,10 @@ void fini_cgroup(void)
  * them in advance.
  */
 
-static int prepare_cgroup_sfd(CgSetEntry *root_set)
+static int prepare_cgroup_sfd(CgroupEntry *ce)
 {
 	int off, i;
-	char paux[PATH_MAX], aux[128];
+	char paux[PATH_MAX];
 
 	pr_info("Preparing cgroups yard\n");
 
@@ -370,26 +799,50 @@ static int prepare_cgroup_sfd(CgSetEntry *root_set)
 		goto err;
 	}
 
-	for (i = 0; i < root_set->n_ctls; i++) {
-		CgMemberEntry *ce = root_set->ctls[i];
-		char *opt = ce->name;
+	for (i = 0; i < ce->n_controllers; i++) {
+		CgControllerEntry *ctrl = ce->controllers[i];
+		int j, name_off, opt_off;
+		char name[1024], opt[1024];
+
+		if (ctrl->n_controllers < 1) {
+			pr_err("Each cg_controller_entry must have at least 1 controller");
+			goto err;
+		}
+
+		opt_off = 0;
+		if (strstartswith(ctrl->controllers[0], "name="))
+			opt_off = sprintf(opt, "none,");
 
-		if (strstartswith(ce->name, "name=")) {
-			sprintf(paux + off, "/%s", ce->name + 5);
-			sprintf(aux, "none,%s", ce->name);
-			opt = aux;
-		} else
-			sprintf(paux + off, "/%s", ce->name);
+		name_off = 0;
+		for (j = 0; j < ctrl->n_controllers; j++) {
+			char *n = ctrl->controllers[j];
+
+			if (strstartswith(ctrl->controllers[j], "name="))
+				n += 5;
+
+			name_off += sprintf(name + name_off, "%s,", n);
+			opt_off += sprintf(opt + opt_off, "%s,", ctrl->controllers[j]);
+		}
+
+		/* Chop off the last ',' to keep mount() happy. */
+		opt[strlen(opt) - 1] = '\0';
+		name[strlen(name) - 1] = '\0';
+
+		name_off = sprintf(paux + off, "/%s", name);
 
 		if (mkdir(paux, 0700)) {
-			pr_perror("Can't make cgyard subdir");
+			pr_perror("Can't make cgyard subdir %s", paux);
 			goto err;
 		}
 
 		if (mount("none", paux, "cgroup", 0, opt) < 0) {
-			pr_perror("Can't mount %s cgyard", ce->name);
+			pr_perror("Can't mount %s cgyard", paux);
 			goto err;
 		}
+
+		if (prepare_cgroup_dirs(paux, off + name_off, ctrl->dirs, ctrl->n_dirs))
+			goto err;
+
 	}
 
 	pr_debug("Opening %s as cg yard\n", cg_yard);
@@ -431,13 +884,16 @@ int prepare_cgroup(void)
 
 	n_sets = ce->n_sets;
 	rst_sets = ce->sets;
+	n_controllers = ce->n_controllers;
+	controllers = ce->controllers;
+
 	if (n_sets)
 		/*
 		 * We rely on the fact that all sets contain the same
 		 * set of controllers. This is checked during dump
 		 * with cg_set_compare(CGCMP_ISSUB) call.
 		 */
-		ret = prepare_cgroup_sfd(rst_sets[0]);
+		ret = prepare_cgroup_sfd(ce);
 	else
 		ret = 0;
 
diff --git a/cr-dump.c b/cr-dump.c
index 45f1f5f..d8ad0fc 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -1776,6 +1776,9 @@ int cr_dump_tasks(pid_t pid)
 	if (vdso_init())
 		goto err;
 
+	if (parse_cg_info())
+		goto err;
+
 	if (write_img_inventory())
 		goto err;
 
diff --git a/include/cgroup.h b/include/cgroup.h
index 148b26f..283f38f 100644
--- a/include/cgroup.h
+++ b/include/cgroup.h
@@ -8,4 +8,43 @@ int dump_cgroups(void);
 int prepare_task_cgroup(struct pstree_item *);
 int prepare_cgroup(void);
 void fini_cgroup(void);
+
+struct cg_controller;
+
+/* This describes a particular cgroup path, e.g. the '/lxc/u1' part of
+ * 'blkio/lxc/u1' and any properties it has.
+ */
+struct cgroup_dir {
+	char			*path;
+
+	/* this is how children are linked together */
+	struct list_head	siblings;
+
+	/* more cgroup_dirs */
+	struct list_head	children;
+	unsigned int		n_children;
+
+	struct cg_controller	*controller;
+};
+
+/* This describes a particular cgroup controller, e.g. blkio or cpuset.
+ * The heads are subdirectories organized in their tree format.
+ */
+struct cg_controller {
+	int			heirarchy;
+	unsigned int		n_controllers;
+	char			**controllers;
+
+	/* cgroup_dirs */
+	struct list_head 	heads;
+	unsigned int		n_heads;
+
+	/* for cgroup list in cgroup.c */
+	struct list_head	l;
+};
+struct cg_controller *new_controller(const char *name, int heirarchy);
+
+/* parse all global cgroup information into structures */
+int parse_cg_info(void);
+
 #endif /* __CR_CGROUP_H__ */
diff --git a/include/proc_parse.h b/include/proc_parse.h
index b153328..ff1ea5d 100644
--- a/include/proc_parse.h
+++ b/include/proc_parse.h
@@ -5,6 +5,7 @@
 #include "asm/types.h"
 #include "image.h"
 #include "list.h"
+#include "cgroup.h"
 
 #include "protobuf/eventfd.pb-c.h"
 #include "protobuf/eventpoll.pb-c.h"
@@ -203,4 +204,6 @@ struct cg_ctl {
 extern int parse_task_cgroup(int pid, struct list_head *l, unsigned int *n);
 extern void put_ctls(struct list_head *);
 
+int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups);
+
 #endif /* __CR_PROC_PARSE_H__ */
diff --git a/include/util.h b/include/util.h
index 22a0f3d..522fc33 100644
--- a/include/util.h
+++ b/include/util.h
@@ -288,7 +288,7 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn);
 /*
  * Check whether @str starts with @sub
  */
-static inline bool strstartswith(char *str, char *sub)
+static inline bool strstartswith(const char *str, const char *sub)
 {
 	while (1) {
 		if (*sub == '\0') /* end of sub -- match */
@@ -303,4 +303,16 @@ static inline bool strstartswith(char *str, char *sub)
 	}
 }
 
+/*
+ * mkdir -p
+ */
+int mkdirp(const char *path);
+
+/*
+ * Tests whether a path is a prefix of another path. This is different than
+ * strstartswith because "/foo" is _not_ a path prefix of "/foobar", since they
+ * refer to different directories.
+ */
+bool is_path_prefix(const char *path, const char *prefix);
+FILE *fopenat(int dirfd, char *path, char *cflags);
 #endif /* __CR_UTIL_H__ */
diff --git a/mount.c b/mount.c
index 4d84f48..32410eb 100644
--- a/mount.c
+++ b/mount.c
@@ -861,6 +861,9 @@ static struct fstype fstypes[] = {
 	}, {
 		.name = "debugfs",
 		.code = FSTYPE__DEBUGFS,
+	}, {
+		.name = "cgroup",
+		.code = FSTYPE__CGROUP,
 	}
 };
 
diff --git a/proc_parse.c b/proc_parse.c
index f2ea897..88f3c0a 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -1547,7 +1547,7 @@ int parse_task_cgroup(int pid, struct list_head *retl, unsigned int *n)
 		}
 
 		list_for_each_entry(cc, retl, l)
-			if (strcmp(cc->name, name) >= 0)
+			if (strcmp(cc->name, name) >= 0 && strcmp(cc->path, path) >= 0)
 				break;
 
 		list_add_tail(&ncc->l, &cc->l);
@@ -1573,3 +1573,76 @@ void put_ctls(struct list_head *l)
 		xfree(c);
 	}
 }
+
+
+/* Parse and create all the real controllers. This does not include things with
+ * the "name=" prefix, e.g. systemd.
+ */
+int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups)
+{
+	FILE *f;
+	char buf[1024], name[1024];
+	int heirarchy, ret = 0;
+	struct cg_controller *cur = NULL;
+
+	f = fopen("/proc/cgroups", "r");
+	if (!f) {
+		pr_perror("failed opening /proc/cgroups");
+		return -1;
+	}
+
+	/* throw away the header */
+	if (!fgets(buf, 1024, f)) {
+		ret = -1;
+		goto out;
+	}
+
+	while (fgets(buf, 1024, f)) {
+		char *n;
+		char found = 0;
+
+		sscanf(buf, "%s %d", name, &heirarchy);
+		list_for_each_entry(cur, cgroups, l) {
+			if (cur->heirarchy == heirarchy) {
+				void *m;
+
+				found = 1;
+				cur->n_controllers++;
+				m = xrealloc(cur->controllers, sizeof(char *) * cur->n_controllers);
+				if (!m) {
+					ret = -1;
+					goto out;
+				}
+
+				cur->controllers = m;
+				if (!cur->controllers) {
+					ret = -1;
+					goto out;
+				}
+
+				n = xstrdup(name);
+				if (!n) {
+					ret = -1;
+					goto out;
+				}
+
+				cur->controllers[cur->n_controllers-1] = n;
+				break;
+			}
+		}
+
+		if (!found) {
+			struct cg_controller *nc = new_controller(name, heirarchy);
+			if (!nc) {
+				ret = -1;
+				goto out;
+			}
+			list_add_tail(&nc->l, &cur->l);
+			(*n_cgroups)++;
+		}
+	}
+
+out:
+	fclose(f);
+	return ret;
+}
diff --git a/protobuf/cgroup.proto b/protobuf/cgroup.proto
index 139a3ad..f026683 100644
--- a/protobuf/cgroup.proto
+++ b/protobuf/cgroup.proto
@@ -1,3 +1,14 @@
+message cgroup_dir_entry {
+	required string 		path		= 1;
+	repeated cgroup_dir_entry	children 	= 4;
+}
+
+message cg_controller_entry {
+	required uint32			id		= 1;
+	repeated string			controllers	= 2;
+	repeated cgroup_dir_entry	dirs		= 3;
+}
+
 message cg_member_entry {
 	required string name	= 1;
 	required string path	= 2;
@@ -9,5 +20,6 @@ message cg_set_entry {
 }
 
 message cgroup_entry {
-	repeated cg_set_entry	sets	= 1;
+	repeated cg_set_entry		sets		= 1;
+	repeated cg_controller_entry	controllers	= 2;
 }
diff --git a/protobuf/mnt.proto b/protobuf/mnt.proto
index 63532ee..603bb37 100644
--- a/protobuf/mnt.proto
+++ b/protobuf/mnt.proto
@@ -11,6 +11,7 @@ enum fstype {
 	SECURITYFS		= 9;
 	FUSECTL			= 10;
 	DEBUGFS			= 11;
+	CGROUP			= 12;
 };
 
 message mnt_entry {
diff --git a/test/zdtm.sh b/test/zdtm.sh
index 479427a..eccbfaf 100755
--- a/test/zdtm.sh
+++ b/test/zdtm.sh
@@ -172,6 +172,7 @@ ns/static/tun
 static/netns-nf
 static/netns
 static/cgroup00
+static/cgroup01
 ns/static/clean_mntns
 "
 
@@ -203,6 +204,7 @@ bind-mount
 mountpoints
 inotify_irmap
 cgroup00
+cgroup01
 clean_mntns
 deleted_dev
 "
diff --git a/test/zdtm/live/static/Makefile b/test/zdtm/live/static/Makefile
index 34982bc..b944964 100644
--- a/test/zdtm/live/static/Makefile
+++ b/test/zdtm/live/static/Makefile
@@ -163,6 +163,7 @@ TST_DIR		=				\
 		bind-mount			\
 		cgroup00			\
 		rmdir_open			\
+		cgroup01			\
 
 TST_DIR_FILE	=				\
 		chroot				\
diff --git a/test/zdtm/live/static/cgroup01.c b/test/zdtm/live/static/cgroup01.c
new file mode 100644
index 0000000..f6a082f
--- /dev/null
+++ b/test/zdtm/live/static/cgroup01.c
@@ -0,0 +1,111 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include "zdtmtst.h"
+
+const char *test_doc	= "Check that empty cgroups are preserved";
+const char *test_author	= "Tycho Andersen <tycho.andersen at canonical.com>";
+
+char *dirname;
+TEST_OPTION(dirname, string, "cgroup directory name", 1);
+static const char *cgname = "zdtmtst";
+static const char *subname = "subcg";
+static const char *empty = "empty";
+
+int main(int argc, char **argv)
+{
+	int cgfd, l, ret = 1;
+	char aux[1024], paux[1024];
+	FILE *cgf;
+	struct stat st;
+
+	test_init(argc, argv);
+
+	if (mkdir(dirname, 0700) < 0) {
+		err("Can't make dir");
+		goto out;
+	}
+
+	sprintf(aux, "none,name=%s", cgname);
+	if (mount("none", dirname, "cgroup", 0, aux)) {
+		err("Can't mount cgroups");
+		goto out_rd;
+	}
+
+	sprintf(paux, "%s/%s", dirname, subname);
+	mkdir(paux, 0600);
+
+	l = sprintf(aux, "%d", getpid());
+	sprintf(paux, "%s/%s/tasks", dirname, subname);
+
+	cgfd = open(paux, O_WRONLY);
+	if (cgfd < 0) {
+		err("Can't open tasks");
+		goto out_rs;
+	}
+
+	l = write(cgfd, aux, l);
+	close(cgfd);
+
+	if (l < 0) {
+		err("Can't move self to subcg");
+		goto out_rs;
+	}
+
+	sprintf(paux, "%s/%s/%s", dirname, subname, empty);
+	mkdir(paux, 0600);
+
+	test_daemon();
+	test_waitsig();
+
+	cgf = fopen("/proc/self/mountinfo", "r");
+	if (cgf == NULL) {
+		fail("No mountinfo file");
+		goto out_rs;
+	}
+
+	while (fgets(paux, sizeof(paux), cgf)) {
+		char *s;
+
+		s = strstr(paux, cgname);
+		if (s) {
+			sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux);
+			test_msg("found cgroup at %s\n", aux);
+			sprintf(paux, "%s/%s/%s", aux, subname, empty);
+			if (stat(paux, &st)) {
+				fail("couldn't stat %s\n", paux);
+				ret = -1;
+				goto out_close;
+			}
+
+			if (!S_ISDIR(st.st_mode)) {
+				fail("%s is not a directory\n", paux);
+				ret = -1;
+				goto out_close;
+			}
+
+			pass();
+			ret = 0;
+			goto out_close;
+		}
+	}
+
+	fail("empty cgroup not found!\n");
+
+out_close:
+	fclose(cgf);
+
+	sprintf(paux, "%s/%s/%s", dirname, subname, empty);
+	rmdir(paux);
+out_rs:
+	sprintf(paux, "%s/%s", dirname, subname);
+	rmdir(paux);
+	umount(dirname);
+out_rd:
+	rmdir(dirname);
+out:
+	return ret;
+}
diff --git a/util.c b/util.c
index d697f7a..2553adc 100644
--- a/util.c
+++ b/util.c
@@ -678,3 +678,80 @@ struct vma_area *alloc_vma_area(void)
 
 	return p;
 }
+
+int mkdirp(const char *path)
+{
+	size_t i;
+	char made_path[PATH_MAX], *pos;
+
+	if (strlen(path) >= PATH_MAX) {
+		pr_err("path %s is longer than PATH_MAX", path);
+		return -1;
+	}
+
+	strcpy(made_path, path);
+
+	i = 0;
+	if (made_path[0] == '/')
+		i++;
+
+	for (; i < strlen(made_path); i++) {
+		pos = strchr(made_path + i, '/');
+		if (pos)
+			*pos = '\0';
+		if (mkdir(made_path, 0755) < 0 && errno != EEXIST) {
+			pr_perror("couldn't mkdirpat directory\n");
+			return -1;
+		}
+		if (pos) {
+			*pos = '/';
+			i = pos - made_path;
+		} else
+			break;
+	}
+
+	return 0;
+}
+
+bool is_path_prefix(const char *path, const char *prefix)
+{
+	if (strstartswith(path, prefix)) {
+		size_t len = strlen(prefix);
+		switch (path[len]) {
+		case '\0':
+		case '/':
+			return true;
+		}
+	}
+
+	return false;
+}
+
+FILE *fopenat(int dirfd, char *path, char *cflags)
+{
+	int tmp, flags = 0;
+	char *iter;
+
+	for (iter = cflags; *iter; iter++) {
+		switch (*iter) {
+		case 'r':
+			flags |= O_RDONLY;
+			break;
+		case 'a':
+			flags |= O_APPEND;
+			break;
+		case 'w':
+			flags |= O_WRONLY | O_CREAT;
+			break;
+		case '+':
+			flags = O_RDWR | O_CREAT;
+			break;
+		}
+	}
+
+	tmp = openat(dirfd, path, flags, S_IRUSR | S_IWUSR);
+	if (tmp < 0)
+		return NULL;
+
+	return fdopen(tmp, cflags);
+}
-- 
1.9.1



More information about the CRIU mailing list