[CRIU] [PATCH] Attempt to restore cgroups

Tycho Andersen tycho.andersen at canonical.com
Mon Jul 7 14:28:00 PDT 2014


During the dump phase, /proc/cgroups is parsed to find co-mounted cgroups.
Then, for each task /proc/self/cgroup is parsed for the cgroups that it is a
member of, and that cgroup is traversed to find any child cgroups which may
also need restoring. All of this information is persisted along with the
original cg_sets, which indicate which cgroups a task is a member of.

On restore, an initial phase creates all the cgroups which were saved and
attempts to restore any peroperties they had. Then the tasks are restored into
their respective cgroups via cg_sets as usual.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 cgroup.c                         | 469 +++++++++++++++++++++++++++++++++++++--
 cr-dump.c                        |   3 +
 include/cgroup.h                 |  45 ++++
 include/proc_parse.h             |   3 +
 include/util.h                   |  13 +-
 mount.c                          |   3 +
 proc_parse.c                     |  75 ++++++-
 protobuf/cgroup.proto            |  16 +-
 protobuf/mnt.proto               |   1 +
 test/zdtm.sh                     |   2 +
 test/zdtm/live/static/Makefile   |   1 +
 test/zdtm/live/static/cgroup01.c | 111 +++++++++
 util.c                           |  48 ++++
 13 files changed, 772 insertions(+), 18 deletions(-)
 create mode 100644 test/zdtm/live/static/cgroup01.c

diff --git a/cgroup.c b/cgroup.c
index 1fe5e6d..e8fa7e6 100644
--- a/cgroup.c
+++ b/cgroup.c
@@ -5,6 +5,9 @@
 #include <unistd.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
+#include <ftw.h>
+#include <libgen.h>
+#include "list.h"
 #include "xmalloc.h"
 #include "cgroup.h"
 #include "pstree.h"
@@ -18,7 +21,8 @@
 /*
  * This structure describes set of controller groups
  * a task lives in. The cg_ctl entries are stored in
- * the @ctls list sorted by the .name field.
+ * the @ctls list sorted by the .name field and then
+ * by the .path field.
  */
 
 struct cg_set {
@@ -36,6 +40,10 @@ static struct cg_set *root_cgset; /* Set root item lives in */
 static struct cg_set *criu_cgset; /* Set criu process lives in */
 static u32 cg_set_ids = 1;
 
+static LIST_HEAD(cgroups);
+static unsigned int n_cgroups;
+static struct mount_info *cg_mntinfo;
+
 static CgSetEntry *find_rst_set_by_id(u32 id)
 {
 	int i;
@@ -118,6 +126,295 @@ static struct cg_set *get_cg_set(struct list_head *ctls, unsigned int n_ctls)
 	return cs;
 }
 
+struct cg_controller *new_controller(const char *name, int heirarchy)
+{
+	struct cg_controller *nc = xmalloc(sizeof(*nc));
+	if (!nc)
+		return NULL;
+
+	nc->controllers = xmalloc(sizeof(char *));
+	if (!nc->controllers) {
+		xfree(nc);
+		return NULL;
+	}
+
+	nc->controllers[0] = xstrdup(name);
+	if (!nc->controllers[0]) {
+		xfree(nc->controllers);
+		xfree(nc);
+		return NULL;
+	}
+
+	nc->n_controllers = 1;
+	nc->heirarchy = heirarchy;
+
+	nc->n_heads = 0;
+	INIT_LIST_HEAD(&nc->heads);
+
+	return nc;
+}
+
+int parse_cg_info(void)
+{
+	if (parse_cgroups(&cgroups, &n_cgroups) < 0)
+		return -1;
+
+	cg_mntinfo = parse_mountinfo(getpid(), NULL);
+
+	if (!cg_mntinfo)
+		return -1;
+	return 0;
+}
+
+static int get_cgroup_mount_point(const char *controller, char *path)
+{
+	struct mount_info *m;
+	char name[1024];
+
+	for (m = cg_mntinfo; m != NULL; m = m->next) {
+		if (strcmp(m->fstype->name, "cgroup") == 0) {
+			char *start, *end;
+
+			start = strstr(m->options, "name=");
+			if (start) {
+				/* strlen("name=") == 5 */
+				start = start + 5;
+
+				end = strstr(start, ",");
+				if (end) {
+					strncpy(name, start, end - start);
+					name[end - start] = '\0';
+				} else
+					strcpy(name, start);
+			} else {
+				start = strrchr(m->mountpoint, '/');
+				if (!start) {
+					pr_err("bad path %s\n", m->mountpoint);
+					return -1;
+				}
+				strcpy(name, start+1);
+			}
+
+			if (strcmp(name, controller) == 0) {
+				/* skip the leading '.' in mountpoint */
+				strcpy(path, m->mountpoint + 1);
+				return 0;
+			}
+		}
+	}
+
+	return -1;
+}
+
+/* This is for use in add_cgroup() as additional arguments for the ftw()
+ * callback */
+static struct cg_controller	*current_controller;
+
+#define EXACT_MATCH	0
+#define PARENT_MATCH	1
+#define NO_MATCH	2
+
+static int find_dir(const char *path, struct list_head *dirs, struct cgroup_dir **rdir)
+{
+	struct cgroup_dir *d;
+	list_for_each_entry(d, dirs, siblings) {
+		if (strcmp(d->path, path) == 0) {
+			*rdir = d;
+			return EXACT_MATCH;
+		}
+
+		if (strstartswith(path, d->path)) {
+			int ret = find_dir(path, &d->children, rdir);
+			if (ret == NO_MATCH) {
+				*rdir = d;
+				return PARENT_MATCH;
+			}
+			return ret;
+
+		}
+	}
+
+	return NO_MATCH;
+}
+
+static int add_cgroup(const char *fpath, const struct stat *sb, int typeflag)
+{
+	struct cgroup_dir *ncd = NULL, *match;
+	int ret = 0;
+	char pbuf[PATH_MAX];
+
+	if (typeflag == FTW_D) {
+		FILE *f;
+		int mtype;
+		struct mount_info *mi;
+
+		strncpy(pbuf, fpath, PATH_MAX);
+
+		pr_info("adding cgroup %s\n", fpath);
+
+		ncd = xmalloc(sizeof(*ncd));
+		if (!ncd) {
+			ret = -1;
+			goto out;
+		}
+		ncd->path = NULL;
+
+		for (mi = cg_mntinfo; mi != NULL; mi = mi->next) {
+			if (is_path_prefix(fpath, mi->mountpoint + 1)) {
+				ncd->path = xstrdup(fpath + strlen(mi->mountpoint));
+				if (!ncd->path) {
+					ret = -1;
+					goto out;
+				}
+				break;
+			}
+		}
+
+		if (!ncd->path) {
+			pr_err("couldn't find %s in mountinfo\n", fpath);
+			ret = -1;
+			goto out;
+		}
+
+		mtype = find_dir(ncd->path, &current_controller->heads, &match);
+
+		switch (mtype) {
+		/* ignore co-mounted cgroups */
+		case EXACT_MATCH:
+			goto out;
+		case PARENT_MATCH:
+			list_add_tail(&ncd->siblings, &match->children);
+			match->n_children++;
+			break;
+		case NO_MATCH:
+			list_add_tail(&ncd->siblings, &current_controller->heads);
+			current_controller->n_heads++;
+			break;
+		}
+
+		INIT_LIST_HEAD(&ncd->children);
+		ncd->n_children = 0;
+		ncd->controller = current_controller;
+
+		ncd->flags = 0;
+
+		snprintf(pbuf, PATH_MAX, "%s/memory.limit_in_bytes", fpath);
+		f = fopen(pbuf, "r");
+		if (f) {
+			if (fscanf(f, "%" SCNu64, &ncd->mem_limit) != 1) {
+				pr_err("Failed scanning %s\n", pbuf);
+				ret = -1;
+				goto out;
+			}
+			ncd->flags |= HAS_MEM_LIMIT;
+			fclose(f);
+		}
+
+		snprintf(pbuf, PATH_MAX, "%s/cpu.shares", fpath);
+		f = fopen(pbuf, "r");
+		if (f) {
+			if (fscanf(f, "%" SCNu32, &ncd->cpu_shares) != 1) {
+				pr_err("Failed scanning %s for u32\n", pbuf);
+				ret = -1;
+				goto out;
+			}
+			ncd->flags |= HAS_CPU_SHARES;
+			fclose(f);
+		}
+
+		return 0;
+	}
+
+out:
+	if (ncd) {
+		if (ncd->path)
+			xfree(ncd->path);
+		xfree(ncd);
+	}
+
+	return ret;
+}
+
+static int collect_cgroups(struct list_head *ctls)
+{
+	struct cg_ctl *cc;
+	int ret = 0;
+
+	list_for_each_entry(cc, ctls, l) {
+		char path[PATH_MAX];
+		char *name, mount_point[PATH_MAX];
+		struct cg_controller *cg;
+		int i;
+
+		if (strstartswith(cc->name, "name="))
+			name = cc->name + 5;
+		else
+			name = cc->name;
+
+		if (get_cgroup_mount_point(name, mount_point) < 0) {
+			/* Someone is trying to dump a process that is in
+			 * a controller that isn't mounted, so we mount it for
+			 * them.
+			 */
+			char opts[1024], prefix[] = ".criu.cgmounts.XXXXXX";
+
+			if (mkdtemp(prefix) == NULL) {
+				pr_perror("can't make dir for cg mounts\n");
+				return -1;
+			}
+
+			if (name == cc->name)
+				sprintf(opts, "%s", name);
+			else
+				sprintf(opts, "none,%s", cc->name);
+
+			if (mount("none", prefix, "cgroup", 0, opts) < 0) {
+				pr_perror("couldn't mount %s\n", opts);
+				return -1;
+			}
+
+			strcpy(mount_point, prefix);
+		}
+
+		snprintf(path, PATH_MAX, "%s/%s", mount_point, cc->path);
+
+		current_controller = NULL;
+
+		/* We should get all the "real" (i.e. not name=systemd type)
+		 * controller from parse_cgroups(), so find that controller if
+		 * it exists. */
+		list_for_each_entry(cg, &cgroups, l) {
+			for (i = 0; i < cg->n_controllers; i++) {
+				if (strcmp(cg->controllers[i], cc->name) == 0) {
+					current_controller = cg;
+					break;
+				}
+			}
+		}
+
+		if (!current_controller) {
+			/* only allow "fake" controllers to be created this way */
+			if (!strstartswith(cc->name, "name=")) {
+				pr_err("controller %s not found\n", cc->name);
+				return -1;
+			} else {
+				struct cg_controller *nc = new_controller(cc->name, -1);
+				list_add_tail(&nc->l, &cg->l);
+				n_cgroups++;
+				current_controller = nc;
+			}
+		}
+
+		ret = ftw(path, add_cgroup, 4);
+		if (ret < 0) {
+			pr_perror("failed walking %s for empty cgroups\n", path);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
 {
 	int pid;
@@ -134,6 +431,9 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
 	if (parse_task_cgroup(pid, &ctls, &n_ctls))
 		return -1;
 
+	if (item == root_item && collect_cgroups(&ctls) < 0)
+		return -1;
+
 	cs = get_cg_set(&ctls, n_ctls);
 	if (!cs)
 		return -1;
@@ -152,6 +452,74 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
 	return 0;
 }
 
+static int dump_cg_dirs(struct list_head *dirs, size_t n_dirs, CgroupDirEntry ***ents)
+{
+	struct cgroup_dir *cur;
+	CgroupDirEntry *cde;
+	void *m;
+	int i = 0;
+
+	m = xmalloc(n_dirs * (sizeof(CgroupDirEntry *) + sizeof(CgroupDirEntry)));
+	*ents = m;
+	if (!m)
+		return -1;
+
+	cde = m + n_dirs * sizeof(CgroupDirEntry *);
+
+	list_for_each_entry(cur, dirs, siblings) {
+		cgroup_dir_entry__init(cde);
+
+		cde->path = cur->path;
+		cde->has_mem_limit = cur->flags & HAS_MEM_LIMIT;
+		cde->mem_limit = cur->mem_limit;
+		cde->has_cpu_shares = cur->flags & HAS_CPU_SHARES;
+		cde->cpu_shares = cur->cpu_shares;
+
+		cde->n_children = cur->n_children;
+		if (cur->n_children > 0)
+			if (dump_cg_dirs(&cur->children, cur->n_children, &cde->children) < 0) {
+				xfree(*ents);
+				return -1;
+			}
+		(*ents)[i++] = cde++;
+	}
+
+	return 0;
+}
+
+static int dump_controllers(CgroupEntry *cg)
+{
+	struct cg_controller *cur;
+	CgControllerEntry *ce;
+	void *m;
+	int i;
+
+	cg->n_controllers = n_cgroups;
+	m = xmalloc(n_cgroups * (sizeof(CgControllerEntry *) + sizeof(CgControllerEntry)));
+	cg->controllers = m;
+	ce = m + cg->n_controllers * sizeof(CgControllerEntry *);
+	if (!m)
+		return -1;
+
+	i = 0;
+	list_for_each_entry(cur, &cgroups, l) {
+		cg_controller_entry__init(ce);
+
+		ce->controllers = cur->controllers;
+		ce->n_controllers = cur->n_controllers;
+		ce->n_dirs = cur->n_heads;
+		if (ce->n_dirs > 0)
+			if (dump_cg_dirs(&cur->heads, cur->n_heads, &ce->dirs) < 0) {
+				xfree(cg->controllers);
+				return -1;
+			}
+		cg->controllers[i++] = ce++;
+	}
+
+	return 0;
+}
+
+
 static int dump_sets(CgroupEntry *cg)
 {
 	struct cg_set *set;
@@ -242,6 +610,8 @@ int dump_cgroups(void)
 
 	if (dump_sets(&cg))
 		return -1;
+	if (dump_controllers(&cg))
+		return -1;
 
 	pr_info("Writing CG image\n");
 	return pb_write_one(fdset_fd(glob_fdset, CR_FD_CGROUP), &cg, PB_CGROUP);
@@ -323,6 +693,57 @@ void fini_cgroup(void)
 	xfree(cg_yard);
 }
 
+static int prepare_cgroup_dirs(char *paux, size_t off, CgroupDirEntry **ents, size_t n_ents)
+{
+	size_t i, my_off;
+	CgroupDirEntry *e;
+
+	for (i = 0; i < n_ents; i++) {
+		e = ents[i];
+
+		my_off = sprintf(paux + off, "/%s", e->path);
+
+		if (mkdirp(paux)) {
+			pr_perror("Can't make cgroup dir %s", paux);
+			return -1;
+		}
+
+		if (e->has_mem_limit) {
+			FILE *f;
+
+			sprintf(paux + my_off + off, "/memory.limit_in_bytes");
+
+			f = fopen(paux, "w+");
+			if (!f) {
+				pr_perror("Couldn't open %s for writing\n", paux);
+				return -1;
+			}
+
+			fprintf(f, "%" SCNu64, e->mem_limit);
+			fclose(f);
+		}
+
+		if (e->has_cpu_shares) {
+			FILE *f;
+
+			sprintf(paux + my_off + off, "/cpu.shares");
+
+			f = fopen(paux, "w+");
+			if (!f) {
+				pr_perror("Couldn't open %s for writing\n", paux);
+				return -1;
+			}
+
+			fprintf(f, "%" SCNu32, e->cpu_shares);
+			fclose(f);
+		}
+
+		prepare_cgroup_dirs(paux, off, e->children, e->n_children);
+	}
+
+	return 0;
+}
+
 /*
  * Prepare the CGROUP_YARD service descriptor. This guy is
  * tmpfs mount with the set of ctl->name directories each
@@ -341,10 +762,10 @@ void fini_cgroup(void)
  * them in advance.
  */
 
-static int prepare_cgroup_sfd(CgSetEntry *root_set)
+static int prepare_cgroup_sfd(CgroupEntry *ce)
 {
 	int off, i;
-	char paux[PATH_MAX], aux[128];
+	char paux[PATH_MAX];
 
 	pr_info("Preparing cgroups yard\n");
 
@@ -370,26 +791,44 @@ static int prepare_cgroup_sfd(CgSetEntry *root_set)
 		goto err;
 	}
 
-	for (i = 0; i < root_set->n_ctls; i++) {
-		CgMemberEntry *ce = root_set->ctls[i];
-		char *opt = ce->name;
+	for (i = 0; i < ce->n_controllers; i++) {
+		CgControllerEntry *ctrl = ce->controllers[i];
+		int j, name_off, opt_off;
+		char *name, opt[1024];
+
+		if (ctrl->n_controllers < 1) {
+			pr_err("Each cg_controller_entry must have at least 1 controller");
+			goto err;
+		}
 
-		if (strstartswith(ce->name, "name=")) {
-			sprintf(paux + off, "/%s", ce->name + 5);
-			sprintf(aux, "none,%s", ce->name);
-			opt = aux;
-		} else
-			sprintf(paux + off, "/%s", ce->name);
+		if (strstartswith(ctrl->controllers[0], "name=")) {
+			name = ctrl->controllers[0] + 5;
+			opt_off = sprintf(opt, "none,%s", ctrl->controllers[0]);
+		} else {
+			name = ctrl->controllers[0];
+			opt_off = sprintf(opt, "%s", ctrl->controllers[0]);
+		}
+
+		for (j = 1; j < ctrl->n_controllers; j++) {
+			name = ctrl->controllers[i];
+			opt_off += sprintf(opt + opt_off, ",%s", ctrl->controllers[i]);
+		}
+
+		name_off = sprintf(paux + off, "/%s", name);
 
 		if (mkdir(paux, 0700)) {
-			pr_perror("Can't make cgyard subdir");
+			pr_perror("Can't make cgyard subdir %s", paux);
 			goto err;
 		}
 
 		if (mount("none", paux, "cgroup", 0, opt) < 0) {
-			pr_perror("Can't mount %s cgyard", ce->name);
+			pr_perror("Can't mount %s cgyard", paux);
 			goto err;
 		}
+
+		if (prepare_cgroup_dirs(paux, off + name_off, ctrl->dirs, ctrl->n_dirs))
+			goto err;
+
 	}
 
 	pr_debug("Opening %s as cg yard\n", cg_yard);
@@ -437,7 +876,7 @@ int prepare_cgroup(void)
 		 * set of controllers. This is checked during dump
 		 * with cg_set_compare(CGCMP_ISSUB) call.
 		 */
-		ret = prepare_cgroup_sfd(rst_sets[0]);
+		ret = prepare_cgroup_sfd(ce);
 	else
 		ret = 0;
 
diff --git a/cr-dump.c b/cr-dump.c
index 45f1f5f..d8ad0fc 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -1776,6 +1776,9 @@ int cr_dump_tasks(pid_t pid)
 	if (vdso_init())
 		goto err;
 
+	if (parse_cg_info())
+		goto err;
+
 	if (write_img_inventory())
 		goto err;
 
diff --git a/include/cgroup.h b/include/cgroup.h
index 148b26f..822743e 100644
--- a/include/cgroup.h
+++ b/include/cgroup.h
@@ -8,4 +8,49 @@ int dump_cgroups(void);
 int prepare_task_cgroup(struct pstree_item *);
 int prepare_cgroup(void);
 void fini_cgroup(void);
+
+#define HAS_MEM_LIMIT	(1 << 0)
+#define HAS_CPU_SHARES	(1 << 1)
+
+struct cg_controller;
+
+/* This describes a particular cgroup path, e.g. the '/lxc/u1' part of
+ * 'blkio/lxc/u1' and any properties it has.
+ */
+struct cgroup_dir {
+	char			*path;
+	u64			mem_limit;
+	u32			cpu_shares;
+	unsigned int		flags;
+
+	/* this is how children are linked together */
+	struct list_head	siblings;
+
+	/* more cgroup_dirs */
+	struct list_head	children;
+	unsigned int		n_children;
+
+	struct cg_controller	*controller;
+};
+
+/* This describes a particular cgroup controller, e.g. blkio or cpuset.
+ * The heads are subdirectories organized in their tree format.
+ */
+struct cg_controller {
+	int			heirarchy;
+	unsigned int		n_controllers;
+	char			**controllers;
+
+	/* cgroup_dirs */
+	struct list_head 	heads;
+	unsigned int		n_heads;
+
+	/* for cgroup list in cgroup.c */
+	struct list_head	l;
+};
+struct cg_controller *new_controller(const char *name, int heirarchy);
+
+/* parse all global cgroup information into structures */
+int parse_cg_info(void);
+
 #endif /* __CR_CGROUP_H__ */
diff --git a/include/proc_parse.h b/include/proc_parse.h
index b153328..ff1ea5d 100644
--- a/include/proc_parse.h
+++ b/include/proc_parse.h
@@ -5,6 +5,7 @@
 #include "asm/types.h"
 #include "image.h"
 #include "list.h"
+#include "cgroup.h"
 
 #include "protobuf/eventfd.pb-c.h"
 #include "protobuf/eventpoll.pb-c.h"
@@ -203,4 +204,6 @@ struct cg_ctl {
 extern int parse_task_cgroup(int pid, struct list_head *l, unsigned int *n);
 extern void put_ctls(struct list_head *);
 
+int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups);
+
 #endif /* __CR_PROC_PARSE_H__ */
diff --git a/include/util.h b/include/util.h
index 22a0f3d..a7d612f 100644
--- a/include/util.h
+++ b/include/util.h
@@ -288,7 +288,7 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn);
 /*
  * Check whether @str starts with @sub
  */
-static inline bool strstartswith(char *str, char *sub)
+static inline bool strstartswith(const char *str, const char *sub)
 {
 	while (1) {
 		if (*sub == '\0') /* end of sub -- match */
@@ -303,4 +303,15 @@ static inline bool strstartswith(char *str, char *sub)
 	}
 }
 
+/*
+ * mkdir -p
+ */
+int mkdirp(const char *path);
+
+/*
+ * Tests whether a path is a prefix of another path. This is different than
+ * strstartswith because "/foo" is _not_ a path prefix of "/foobar", since they
+ * refer to different directories.
+ */
+bool is_path_prefix(const char *path, const char *prefix);
 #endif /* __CR_UTIL_H__ */
diff --git a/mount.c b/mount.c
index 4d84f48..32410eb 100644
--- a/mount.c
+++ b/mount.c
@@ -861,6 +861,9 @@ static struct fstype fstypes[] = {
 	}, {
 		.name = "debugfs",
 		.code = FSTYPE__DEBUGFS,
+	}, {
+		.name = "cgroup",
+		.code = FSTYPE__CGROUP,
 	}
 };
 
diff --git a/proc_parse.c b/proc_parse.c
index f2ea897..88f3c0a 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -1547,7 +1547,7 @@ int parse_task_cgroup(int pid, struct list_head *retl, unsigned int *n)
 		}
 
 		list_for_each_entry(cc, retl, l)
-			if (strcmp(cc->name, name) >= 0)
+			if (strcmp(cc->name, name) >= 0 && strcmp(cc->path, path) >= 0)
 				break;
 
 		list_add_tail(&ncc->l, &cc->l);
@@ -1573,3 +1573,76 @@ void put_ctls(struct list_head *l)
 		xfree(c);
 	}
 }
+
+
+/* Parse and create all the real controllers. This does not include things with
+ * the "name=" prefix, e.g. systemd.
+ */
+int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups)
+{
+	FILE *f;
+	char buf[1024], name[1024];
+	int heirarchy, ret = 0;
+	struct cg_controller *cur = NULL;
+
+	f = fopen("/proc/cgroups", "r");
+	if (!f) {
+		pr_perror("failed opening /proc/cgroups");
+		return -1;
+	}
+
+	/* throw away the header */
+	if (!fgets(buf, 1024, f)) {
+		ret = -1;
+		goto out;
+	}
+
+	while (fgets(buf, 1024, f)) {
+		char *n;
+		char found = 0;
+
+		sscanf(buf, "%s %d", name, &heirarchy);
+		list_for_each_entry(cur, cgroups, l) {
+			if (cur->heirarchy == heirarchy) {
+				void *m;
+
+				found = 1;
+				cur->n_controllers++;
+				m = xrealloc(cur->controllers, sizeof(char *) * cur->n_controllers);
+				if (!m) {
+					ret = -1;
+					goto out;
+				}
+
+				cur->controllers = m;
+				if (!cur->controllers) {
+					ret = -1;
+					goto out;
+				}
+
+				n = xstrdup(name);
+				if (!n) {
+					ret = -1;
+					goto out;
+				}
+
+				cur->controllers[cur->n_controllers-1] = n;
+				break;
+			}
+		}
+
+		if (!found) {
+			struct cg_controller *nc = new_controller(name, heirarchy);
+			if (!nc) {
+				ret = -1;
+				goto out;
+			}
+			list_add_tail(&nc->l, &cur->l);
+			(*n_cgroups)++;
+		}
+	}
+
+out:
+	fclose(f);
+	return ret;
+}
diff --git a/protobuf/cgroup.proto b/protobuf/cgroup.proto
index 139a3ad..4be2249 100644
--- a/protobuf/cgroup.proto
+++ b/protobuf/cgroup.proto
@@ -1,3 +1,16 @@
+message cgroup_dir_entry {
+	required string 		path		= 1;
+	optional uint64 		mem_limit 	= 2;
+	optional uint32 		cpu_shares	= 3;
+	repeated cgroup_dir_entry	children 	= 4;
+}
+
+message cg_controller_entry {
+	required uint32			id		= 1;
+	repeated string			controllers	= 2;
+	repeated cgroup_dir_entry	dirs		= 3;
+}
+
 message cg_member_entry {
 	required string name	= 1;
 	required string path	= 2;
@@ -9,5 +22,6 @@ message cg_set_entry {
 }
 
 message cgroup_entry {
-	repeated cg_set_entry	sets	= 1;
+	repeated cg_set_entry		sets		= 1;
+	repeated cg_controller_entry	controllers	= 2;
 }
diff --git a/protobuf/mnt.proto b/protobuf/mnt.proto
index 63532ee..603bb37 100644
--- a/protobuf/mnt.proto
+++ b/protobuf/mnt.proto
@@ -11,6 +11,7 @@ enum fstype {
 	SECURITYFS		= 9;
 	FUSECTL			= 10;
 	DEBUGFS			= 11;
+	CGROUP			= 12;
 };
 
 message mnt_entry {
diff --git a/test/zdtm.sh b/test/zdtm.sh
index 4c3f2d2..38e7484 100755
--- a/test/zdtm.sh
+++ b/test/zdtm.sh
@@ -167,6 +167,7 @@ ns/static/tun
 static/netns-nf
 static/netns
 static/cgroup00
+static/cgroup01
 ns/static/clean_mntns
 "
 
@@ -198,6 +199,7 @@ bind-mount
 mountpoints
 inotify_irmap
 cgroup00
+cgroup01
 clean_mntns
 "
 
diff --git a/test/zdtm/live/static/Makefile b/test/zdtm/live/static/Makefile
index f41fd80..893a250 100644
--- a/test/zdtm/live/static/Makefile
+++ b/test/zdtm/live/static/Makefile
@@ -160,6 +160,7 @@ TST_DIR		=				\
 		tempfs				\
 		bind-mount			\
 		cgroup00			\
+		cgroup01			\
 
 TST_DIR_FILE	=				\
 		chroot				\
diff --git a/test/zdtm/live/static/cgroup01.c b/test/zdtm/live/static/cgroup01.c
new file mode 100644
index 0000000..f6a082f
--- /dev/null
+++ b/test/zdtm/live/static/cgroup01.c
@@ -0,0 +1,111 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include "zdtmtst.h"
+
+const char *test_doc	= "Check that empty cgroups are preserved";
+const char *test_author	= "Tycho Andersen <tycho.andersen at canonical.com>";
+
+char *dirname;
+TEST_OPTION(dirname, string, "cgroup directory name", 1);
+static const char *cgname = "zdtmtst";
+static const char *subname = "subcg";
+static const char *empty = "empty";
+
+int main(int argc, char **argv)
+{
+	int cgfd, l, ret = 1;
+	char aux[1024], paux[1024];
+	FILE *cgf;
+	struct stat st;
+
+	test_init(argc, argv);
+
+	if (mkdir(dirname, 0700) < 0) {
+		err("Can't make dir");
+		goto out;
+	}
+
+	sprintf(aux, "none,name=%s", cgname);
+	if (mount("none", dirname, "cgroup", 0, aux)) {
+		err("Can't mount cgroups");
+		goto out_rd;
+	}
+
+	sprintf(paux, "%s/%s", dirname, subname);
+	mkdir(paux, 0600);
+
+	l = sprintf(aux, "%d", getpid());
+	sprintf(paux, "%s/%s/tasks", dirname, subname);
+
+	cgfd = open(paux, O_WRONLY);
+	if (cgfd < 0) {
+		err("Can't open tasks");
+		goto out_rs;
+	}
+
+	l = write(cgfd, aux, l);
+	close(cgfd);
+
+	if (l < 0) {
+		err("Can't move self to subcg");
+		goto out_rs;
+	}
+
+	sprintf(paux, "%s/%s/%s", dirname, subname, empty);
+	mkdir(paux, 0600);
+
+	test_daemon();
+	test_waitsig();
+
+	cgf = fopen("/proc/self/mountinfo", "r");
+	if (cgf == NULL) {
+		fail("No mountinfo file");
+		goto out_rs;
+	}
+
+	while (fgets(paux, sizeof(paux), cgf)) {
+		char *s;
+
+		s = strstr(paux, cgname);
+		if (s) {
+			sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux);
+			test_msg("found cgroup at %s\n", aux);
+			sprintf(paux, "%s/%s/%s", aux, subname, empty);
+			if (stat(paux, &st)) {
+				fail("couldn't stat %s\n", paux);
+				ret = -1;
+				goto out_close;
+			}
+
+			if (!S_ISDIR(st.st_mode)) {
+				fail("%s is not a directory\n", paux);
+				ret = -1;
+				goto out_close;
+			}
+
+			pass();
+			ret = 0;
+			goto out_close;
+		}
+	}
+
+	fail("empty cgroup not found!\n");
+
+out_close:
+	fclose(cgf);
+
+	sprintf(paux, "%s/%s/%s", dirname, subname, empty);
+	rmdir(paux);
+out_rs:
+	sprintf(paux, "%s/%s", dirname, subname);
+	rmdir(paux);
+	umount(dirname);
+out_rd:
+	rmdir(dirname);
+out:
+	return ret;
+}
diff --git a/util.c b/util.c
index d697f7a..8c2e9a1 100644
--- a/util.c
+++ b/util.c
@@ -678,3 +678,51 @@ struct vma_area *alloc_vma_area(void)
 
 	return p;
 }
+
+int mkdirp(const char *path)
+{
+	size_t i;
+	char made_path[PATH_MAX], *pos;
+
+	if (strlen(path) >= PATH_MAX) {
+		pr_err("path %s is longer than PATH_MAX", path);
+		return -1;
+	}
+
+	strcpy(made_path, path);
+
+	i = 0;
+	if (made_path[0] == '/')
+		i++;
+
+	for (; i < strlen(made_path); i++) {
+		pos = strchr(made_path + i, '/');
+		if (pos)
+			*pos = '\0';
+		if (mkdir(made_path, 0755) < 0 && errno != EEXIST) {
+			pr_perror("couldn't mkdirpat directory\n");
+			return -1;
+		}
+		if (pos) {
+			*pos = '/';
+			i = pos - made_path;
+		} else
+			break;
+	}
+
+	return 0;
+}
+
+bool is_path_prefix(const char *path, const char *prefix)
+{
+	if (strstartswith(path, prefix)) {
+		size_t len = strlen(prefix);
+		switch (path[len]) {
+		case '\0':
+		case '/':
+			return true;
+		}
+	}
+
+	return false;
+}
-- 
1.9.1



More information about the CRIU mailing list