[CRIU] [PATCH] Attempt to restore cgroups
Andrew Vagin
avagin at parallels.com
Mon Jul 14 14:17:12 PDT 2014
Hi Tycho,
Jenkins worried about this patch. All jobs, which executes test in parallel,
start to fail after this patch. Tycho, could you investigate were is a problem?
$ bash -x test/jenkins/criu-dump.sh
...
Test: zdtm/live/static/sock_opts01, Result: FAIL
==================================== ERROR ====================================
Test: zdtm/live/static/sock_opts01, Namespace: 1
Dump log : /root/criu/test/dump/sock_opts01/28469/1/dump.log
--------------------------------- grep Error ---------------------------------
(00.021290) Error (cgroup.c:418): cg: failed walking /root/criu/test/dump/sock_opts00/28471/1/.criu.cgmounts.JocWZf// for empty cgroups
(00.021303) Error (cr-dump.c:1601): Dump core (pid: 28469) failed with -1
(00.025737) Error (cr-dump.c:1914): Dumping FAILED.
------------------------------------- END -------------------------------------
================================= ERROR OVER =================================
On Tue, Jul 08, 2014 at 12:36:41PM -0500, Tycho Andersen wrote:
> During the dump phase, /proc/cgroups is parsed to find co-mounted cgroups.
> Then, for each task /proc/self/cgroup is parsed for the cgroups that it is a
> member of, and that cgroup is traversed to find any child cgroups which may
> also need restoring. All of this information is persisted along with the
> original cg_sets, which indicate which cgroups a task is a member of.
>
> On restore, an initial phase creates all the cgroups which were saved and
> attempts to restore any peroperties they had. Then the tasks are restored into
> their respective cgroups via cg_sets as usual.
>
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
> cgroup.c | 534 +++++++++++++++++++++++++++++++++++++--
> cr-dump.c | 3 +
> cr-restore.c | 6 +-
> include/cgroup.h | 47 ++++
> include/proc_parse.h | 3 +
> include/util.h | 14 +-
> mount.c | 3 +
> proc_parse.c | 75 +++++-
> protobuf/cgroup.proto | 16 +-
> protobuf/mnt.proto | 1 +
> test/zdtm.sh | 2 +
> test/zdtm/live/static/Makefile | 1 +
> test/zdtm/live/static/cgroup01.c | 111 ++++++++
> util.c | 77 ++++++
> 14 files changed, 874 insertions(+), 19 deletions(-)
> create mode 100644 test/zdtm/live/static/cgroup01.c
>
> diff --git a/cgroup.c b/cgroup.c
> index 1fe5e6d..bbd14ae 100644
> --- a/cgroup.c
> +++ b/cgroup.c
> @@ -5,6 +5,9 @@
> #include <unistd.h>
> #include <sys/mount.h>
> #include <sys/stat.h>
> +#include <ftw.h>
> +#include <libgen.h>
> +#include "list.h"
> #include "xmalloc.h"
> #include "cgroup.h"
> #include "pstree.h"
> @@ -18,7 +21,8 @@
> /*
> * This structure describes set of controller groups
> * a task lives in. The cg_ctl entries are stored in
> - * the @ctls list sorted by the .name field.
> + * the @ctls list sorted by the .name field and then
> + * by the .path field.
> */
>
> struct cg_set {
> @@ -36,6 +40,13 @@ static struct cg_set *root_cgset; /* Set root item lives in */
> static struct cg_set *criu_cgset; /* Set criu process lives in */
> static u32 cg_set_ids = 1;
>
> +static LIST_HEAD(cgroups);
> +static unsigned int n_cgroups;
> +static struct mount_info *cg_mntinfo;
> +
> +static CgControllerEntry **cg_controllers;
> +static unsigned int n_controllers;
> +
> static CgSetEntry *find_rst_set_by_id(u32 id)
> {
> int i;
> @@ -118,6 +129,314 @@ static struct cg_set *get_cg_set(struct list_head *ctls, unsigned int n_ctls)
> return cs;
> }
>
> +struct cg_controller *new_controller(const char *name, int heirarchy)
> +{
> + struct cg_controller *nc = xmalloc(sizeof(*nc));
> + if (!nc)
> + return NULL;
> +
> + nc->controllers = xmalloc(sizeof(char *));
> + if (!nc->controllers) {
> + xfree(nc);
> + return NULL;
> + }
> +
> + nc->controllers[0] = xstrdup(name);
> + if (!nc->controllers[0]) {
> + xfree(nc->controllers);
> + xfree(nc);
> + return NULL;
> + }
> +
> + nc->n_controllers = 1;
> + nc->heirarchy = heirarchy;
> +
> + nc->n_heads = 0;
> + INIT_LIST_HEAD(&nc->heads);
> +
> + return nc;
> +}
> +
> +int parse_cg_info(void)
> +{
> + if (parse_cgroups(&cgroups, &n_cgroups) < 0)
> + return -1;
> +
> + cg_mntinfo = parse_mountinfo(getpid(), NULL);
> +
> + if (!cg_mntinfo)
> + return -1;
> + return 0;
> +}
> +
> +static int get_cgroup_mount_point(const char *controller, char *path)
> +{
> + struct mount_info *m;
> + char name[1024];
> +
> + for (m = cg_mntinfo; m != NULL; m = m->next) {
> + if (strcmp(m->fstype->name, "cgroup") == 0) {
> + char *start, *end;
> +
> + start = strstr(m->options, "name=");
> + if (start) {
> + /* strlen("name=") == 5 */
> + start = start + 5;
> +
> + end = strstr(start, ",");
> + if (end) {
> + strncpy(name, start, end - start);
> + name[end - start] = '\0';
> + } else
> + strcpy(name, start);
> + } else {
> + start = strrchr(m->mountpoint, '/');
> + if (!start) {
> + pr_err("bad path %s\n", m->mountpoint);
> + return -1;
> + }
> + strcpy(name, start+1);
> + }
> +
> + if (strcmp(name, controller) == 0) {
> + /* skip the leading '.' in mountpoint */
> + strcpy(path, m->mountpoint + 1);
> + return 0;
> + }
> + }
> + }
> +
> + return -1;
> +}
> +
> +/* This is for use in add_cgroup() as additional arguments for the ftw()
> + * callback */
> +static struct cg_controller *current_controller;
> +
> +#define EXACT_MATCH 0
> +#define PARENT_MATCH 1
> +#define NO_MATCH 2
> +
> +static int find_dir(const char *path, struct list_head *dirs, struct cgroup_dir **rdir)
> +{
> + struct cgroup_dir *d;
> + list_for_each_entry(d, dirs, siblings) {
> + if (strcmp(d->path, path) == 0) {
> + *rdir = d;
> + return EXACT_MATCH;
> + }
> +
> + if (strstartswith(path, d->path)) {
> + int ret = find_dir(path, &d->children, rdir);
> + if (ret == NO_MATCH) {
> + *rdir = d;
> + return PARENT_MATCH;
> + }
> + return ret;
> +
> + }
> + }
> +
> + return NO_MATCH;
> +}
> +
> +static int add_cgroup(const char *fpath, const struct stat *sb, int typeflag)
> +{
> + struct cgroup_dir *ncd = NULL, *match;
> + int ret = 0;
> + char pbuf[PATH_MAX];
> +
> + if (typeflag == FTW_D) {
> + FILE *f;
> + int mtype;
> + struct mount_info *mi;
> +
> + strncpy(pbuf, fpath, PATH_MAX);
> +
> + pr_info("adding cgroup %s\n", fpath);
> +
> + ncd = xmalloc(sizeof(*ncd));
> + if (!ncd) {
> + ret = -1;
> + goto out;
> + }
> + ncd->path = NULL;
> +
> + for (mi = cg_mntinfo; mi != NULL; mi = mi->next) {
> + if (is_path_prefix(fpath, mi->mountpoint + 1)) {
> + ncd->path = xstrdup(fpath + strlen(mi->mountpoint));
> + if (!ncd->path) {
> + ret = -1;
> + goto out;
> + }
> + break;
> + }
> + }
> +
> + if (!ncd->path) {
> + /* We couldn't find fpath in mountinfo, which means we
> + * mounted it ourselves, so we just chop off the first
> + * strlen(".criu.cgmounts.XXXXXX").
> + */
> + ncd->path = xstrdup(fpath + 21);
> + if (!ncd->path) {
> + ret = -1;
> + goto out;
> + }
> + }
> +
> + mtype = find_dir(ncd->path, ¤t_controller->heads, &match);
> +
> + switch (mtype) {
> + /* ignore co-mounted cgroups */
> + case EXACT_MATCH:
> + goto out;
> + case PARENT_MATCH:
> + list_add_tail(&ncd->siblings, &match->children);
> + match->n_children++;
> + break;
> + case NO_MATCH:
> + list_add_tail(&ncd->siblings, ¤t_controller->heads);
> + current_controller->n_heads++;
> + break;
> + }
> +
> + INIT_LIST_HEAD(&ncd->children);
> + ncd->n_children = 0;
> + ncd->controller = current_controller;
> +
> + ncd->flags = 0;
> +
> + snprintf(pbuf, PATH_MAX, "%s/memory.limit_in_bytes", fpath);
> + f = fopen(pbuf, "r");
> + if (f) {
> + if (fscanf(f, "%" SCNu64, &ncd->mem_limit) != 1) {
> + pr_err("Failed scanning %s\n", pbuf);
> + ret = -1;
> + goto out;
> + }
> + ncd->flags |= HAS_MEM_LIMIT;
> + fclose(f);
> + }
> +
> + snprintf(pbuf, PATH_MAX, "%s/cpu.shares", fpath);
> + f = fopen(pbuf, "r");
> + if (f) {
> + if (fscanf(f, "%" SCNu32, &ncd->cpu_shares) != 1) {
> + pr_err("Failed scanning %s for u32\n", pbuf);
> + ret = -1;
> + goto out;
> + }
> + ncd->flags |= HAS_CPU_SHARES;
> + fclose(f);
> + }
> +
> + return 0;
> + }
> +
> +out:
> + if (ncd) {
> + if (ncd->path)
> + xfree(ncd->path);
> + xfree(ncd);
> + }
> +
> + return ret;
> +}
> +
> +static int collect_cgroups(struct list_head *ctls)
> +{
> + struct cg_ctl *cc;
> + int ret = 0;
> +
> + list_for_each_entry(cc, ctls, l) {
> + char path[PATH_MAX];
> + char *name, mount_point[PATH_MAX], prefix[] = ".criu.cgmounts.XXXXXX";
> + bool temp_mount = false;
> + struct cg_controller *cg;
> + int i;
> +
> + if (strstartswith(cc->name, "name="))
> + name = cc->name + 5;
> + else
> + name = cc->name;
> +
> + if (get_cgroup_mount_point(name, mount_point) < 0) {
> + /* Someone is trying to dump a process that is in
> + * a controller that isn't mounted, so we mount it for
> + * them.
> + */
> + char opts[1024];
> + temp_mount = true;
> +
> + if (mkdtemp(prefix) == NULL) {
> + pr_perror("can't make dir for cg mounts\n");
> + return -1;
> + }
> +
> + if (name == cc->name)
> + sprintf(opts, "%s", name);
> + else
> + sprintf(opts, "none,%s", cc->name);
> +
> + if (mount("none", prefix, "cgroup", 0, opts) < 0) {
> + pr_perror("couldn't mount %s\n", opts);
> + rmdir(prefix);
> + return -1;
> + }
> +
> + strcpy(mount_point, prefix);
> + }
> +
> + snprintf(path, PATH_MAX, "%s/%s", mount_point, cc->path);
> +
> + current_controller = NULL;
> +
> + /* We should get all the "real" (i.e. not name=systemd type)
> + * controller from parse_cgroups(), so find that controller if
> + * it exists. */
> + list_for_each_entry(cg, &cgroups, l) {
> + for (i = 0; i < cg->n_controllers; i++) {
> + if (strcmp(cg->controllers[i], cc->name) == 0) {
> + current_controller = cg;
> + break;
> + }
> + }
> + }
> +
> + if (!current_controller) {
> + /* only allow "fake" controllers to be created this way */
> + if (!strstartswith(cc->name, "name=")) {
> + pr_err("controller %s not found\n", cc->name);
> + ret = -1;
> + goto out;
> + } else {
> + struct cg_controller *nc = new_controller(cc->name, -1);
> + list_add_tail(&nc->l, &cg->l);
> + n_cgroups++;
> + current_controller = nc;
> + }
> + }
> +
> + ret = ftw(path, add_cgroup, 4);
> + if (ret < 0) {
> + pr_perror("failed walking %s for empty cgroups\n", path);
> + goto out;
> + }
> +
> +out:
> + if (temp_mount) {
> + umount(prefix);
> + rmdir(prefix);
> + }
> +
> + if (ret < 0)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
> {
> int pid;
> @@ -134,6 +453,9 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
> if (parse_task_cgroup(pid, &ctls, &n_ctls))
> return -1;
>
> + if (item == root_item && collect_cgroups(&ctls) < 0)
> + return -1;
> +
> cs = get_cg_set(&ctls, n_ctls);
> if (!cs)
> return -1;
> @@ -152,6 +474,74 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
> return 0;
> }
>
> +static int dump_cg_dirs(struct list_head *dirs, size_t n_dirs, CgroupDirEntry ***ents)
> +{
> + struct cgroup_dir *cur;
> + CgroupDirEntry *cde;
> + void *m;
> + int i = 0;
> +
> + m = xmalloc(n_dirs * (sizeof(CgroupDirEntry *) + sizeof(CgroupDirEntry)));
> + *ents = m;
> + if (!m)
> + return -1;
> +
> + cde = m + n_dirs * sizeof(CgroupDirEntry *);
> +
> + list_for_each_entry(cur, dirs, siblings) {
> + cgroup_dir_entry__init(cde);
> +
> + cde->path = cur->path;
> + cde->has_mem_limit = cur->flags & HAS_MEM_LIMIT;
> + cde->mem_limit = cur->mem_limit;
> + cde->has_cpu_shares = cur->flags & HAS_CPU_SHARES;
> + cde->cpu_shares = cur->cpu_shares;
> +
> + cde->n_children = cur->n_children;
> + if (cur->n_children > 0)
> + if (dump_cg_dirs(&cur->children, cur->n_children, &cde->children) < 0) {
> + xfree(*ents);
> + return -1;
> + }
> + (*ents)[i++] = cde++;
> + }
> +
> + return 0;
> +}
> +
> +static int dump_controllers(CgroupEntry *cg)
> +{
> + struct cg_controller *cur;
> + CgControllerEntry *ce;
> + void *m;
> + int i;
> +
> + cg->n_controllers = n_cgroups;
> + m = xmalloc(n_cgroups * (sizeof(CgControllerEntry *) + sizeof(CgControllerEntry)));
> + cg->controllers = m;
> + ce = m + cg->n_controllers * sizeof(CgControllerEntry *);
> + if (!m)
> + return -1;
> +
> + i = 0;
> + list_for_each_entry(cur, &cgroups, l) {
> + cg_controller_entry__init(ce);
> +
> + ce->controllers = cur->controllers;
> + ce->n_controllers = cur->n_controllers;
> + ce->n_dirs = cur->n_heads;
> + if (ce->n_dirs > 0)
> + if (dump_cg_dirs(&cur->heads, cur->n_heads, &ce->dirs) < 0) {
> + xfree(cg->controllers);
> + return -1;
> + }
> + cg->controllers[i++] = ce++;
> + }
> +
> + return 0;
> +}
> +
> +
> static int dump_sets(CgroupEntry *cg)
> {
> struct cg_set *set;
> @@ -242,6 +632,8 @@ int dump_cgroups(void)
>
> if (dump_sets(&cg))
> return -1;
> + if (dump_controllers(&cg))
> + return -1;
>
> pr_info("Writing CG image\n");
> return pb_write_one(fdset_fd(glob_fdset, CR_FD_CGROUP), &cg, PB_CGROUP);
> @@ -323,6 +715,97 @@ void fini_cgroup(void)
> xfree(cg_yard);
> }
>
> +static int prepare_cgroup_dir_properties(char *controller, CgroupDirEntry **ents, unsigned int n_ents)
> +{
> + size_t i;
> + int cg;
> +
> + cg = get_service_fd(CGROUP_YARD);
> +
> + for (i = 0; i < n_ents; i++) {
> + CgroupDirEntry *e = ents[i];
> + char path[PATH_MAX];
> +
> + if (e->has_mem_limit) {
> + FILE *f;
> +
> + sprintf(path, "%s/%s/memory.limit_in_bytes", controller, e->path);
> +
> + f = fopenat(cg, path, "w+");
> + if (!f) {
> + pr_perror("Couldn't open %s for writing\n", path);
> + return -1;
> + }
> +
> + fprintf(f, "%" SCNu64, e->mem_limit);
> + fclose(f);
> + }
> +
> + if (e->has_cpu_shares) {
> + FILE *f;
> +
> + sprintf(path, "%s/%s/cpu.shares", controller, e->path);
> +
> + f = fopenat(cg, path, "w+");
> + if (!f) {
> + pr_perror("Couldn't open %s for writing\n", path);
> + return -1;
> + }
> +
> + fprintf(f, "%" SCNu32, e->cpu_shares);
> + fclose(f);
> + }
> +
> + if (prepare_cgroup_dir_properties(controller, e->children, e->n_children) < 0)
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +int prepare_cgroup_properties(void)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < n_controllers; i++) {
> + CgControllerEntry *c = cg_controllers[i];
> +
> + if (c->n_controllers < 1) {
> + pr_err("Each CgControllerEntry should have at least 1 contrller\n");
> + return -1;
> + }
> +
> + /* Here we just restore properties of the first controller.
> + * Since they are co-mounted everything will propagate.
> + */
> + if (prepare_cgroup_dir_properties(c->controllers[0], c->dirs, c->n_dirs) < 0)
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int prepare_cgroup_dirs(char *paux, size_t off, CgroupDirEntry **ents, size_t n_ents)
> +{
> + size_t i;
> + CgroupDirEntry *e;
> +
> + for (i = 0; i < n_ents; i++) {
> + e = ents[i];
> +
> + sprintf(paux + off, "/%s", e->path);
> +
> + if (mkdirp(paux)) {
> + pr_perror("Can't make cgroup dir %s", paux);
> + return -1;
> + }
> +
> + prepare_cgroup_dirs(paux, off, e->children, e->n_children);
> + }
> +
> + return 0;
> +}
> +
> /*
> * Prepare the CGROUP_YARD service descriptor. This guy is
> * tmpfs mount with the set of ctl->name directories each
> @@ -341,10 +824,10 @@ void fini_cgroup(void)
> * them in advance.
> */
>
> -static int prepare_cgroup_sfd(CgSetEntry *root_set)
> +static int prepare_cgroup_sfd(CgroupEntry *ce)
> {
> int off, i;
> - char paux[PATH_MAX], aux[128];
> + char paux[PATH_MAX];
>
> pr_info("Preparing cgroups yard\n");
>
> @@ -370,26 +853,44 @@ static int prepare_cgroup_sfd(CgSetEntry *root_set)
> goto err;
> }
>
> - for (i = 0; i < root_set->n_ctls; i++) {
> - CgMemberEntry *ce = root_set->ctls[i];
> - char *opt = ce->name;
> + for (i = 0; i < ce->n_controllers; i++) {
> + CgControllerEntry *ctrl = ce->controllers[i];
> + int j, name_off, opt_off;
> + char *name, opt[1024];
>
> - if (strstartswith(ce->name, "name=")) {
> - sprintf(paux + off, "/%s", ce->name + 5);
> - sprintf(aux, "none,%s", ce->name);
> - opt = aux;
> - } else
> - sprintf(paux + off, "/%s", ce->name);
> + if (ctrl->n_controllers < 1) {
> + pr_err("Each cg_controller_entry must have at least 1 controller");
> + goto err;
> + }
> +
> + if (strstartswith(ctrl->controllers[0], "name=")) {
> + name = ctrl->controllers[0] + 5;
> + opt_off = sprintf(opt, "none,%s", ctrl->controllers[0]);
> + } else {
> + name = ctrl->controllers[0];
> + opt_off = sprintf(opt, "%s", ctrl->controllers[0]);
> + }
> +
> + for (j = 1; j < ctrl->n_controllers; j++) {
> + name = ctrl->controllers[i];
> + opt_off += sprintf(opt + opt_off, ",%s", ctrl->controllers[i]);
> + }
> +
> + name_off = sprintf(paux + off, "/%s", name);
>
> if (mkdir(paux, 0700)) {
> - pr_perror("Can't make cgyard subdir");
> + pr_perror("Can't make cgyard subdir %s", paux);
> goto err;
> }
>
> if (mount("none", paux, "cgroup", 0, opt) < 0) {
> - pr_perror("Can't mount %s cgyard", ce->name);
> + pr_perror("Can't mount %s cgyard", paux);
> goto err;
> }
> +
> + if (prepare_cgroup_dirs(paux, off + name_off, ctrl->dirs, ctrl->n_dirs))
> + goto err;
> +
> }
>
> pr_debug("Opening %s as cg yard\n", cg_yard);
> @@ -431,13 +932,16 @@ int prepare_cgroup(void)
>
> n_sets = ce->n_sets;
> rst_sets = ce->sets;
> +
> + n_controllers = ce->n_controllers;
> + cg_controllers = ce->controllers;
> if (n_sets)
> /*
> * We rely on the fact that all sets contain the same
> * set of controllers. This is checked during dump
> * with cg_set_compare(CGCMP_ISSUB) call.
> */
> - ret = prepare_cgroup_sfd(rst_sets[0]);
> + ret = prepare_cgroup_sfd(ce);
> else
> ret = 0;
>
> diff --git a/cr-dump.c b/cr-dump.c
> index 45f1f5f..d8ad0fc 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -1776,6 +1776,9 @@ int cr_dump_tasks(pid_t pid)
> if (vdso_init())
> goto err;
>
> + if (parse_cg_info())
> + goto err;
> +
> if (write_img_inventory())
> goto err;
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 7d43aab..f9068fc 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1708,8 +1708,12 @@ int cr_restore_tasks(void)
> if (crtools_prepare_shared() < 0)
> goto err;
>
> - ret = restore_root_task(root_item);
> + if (restore_root_task(root_item) < 0)
> + goto err_fc;
>
> + ret = prepare_cgroup_properties();
> +
> +err_fc:
> fini_cgroup();
> err:
> cr_plugin_fini();
> diff --git a/include/cgroup.h b/include/cgroup.h
> index 148b26f..fc386fa 100644
> --- a/include/cgroup.h
> +++ b/include/cgroup.h
> @@ -7,5 +7,52 @@ int dump_task_cgroup(struct pstree_item *, u32 *);
> int dump_cgroups(void);
> int prepare_task_cgroup(struct pstree_item *);
> int prepare_cgroup(void);
> +/* Restore things like cpu_limit in known cgroups. */
> +int prepare_cgroup_properties(void);
> void fini_cgroup(void);
> +
> +#define HAS_MEM_LIMIT (1 << 0)
> +#define HAS_CPU_SHARES (1 << 1)
> +
> +struct cg_controller;
> +
> +/* This describes a particular cgroup path, e.g. the '/lxc/u1' part of
> + * 'blkio/lxc/u1' and any properties it has.
> + */
> +struct cgroup_dir {
> + char *path;
> + u64 mem_limit;
> + u32 cpu_shares;
> + unsigned int flags;
> +
> + /* this is how children are linked together */
> + struct list_head siblings;
> +
> + /* more cgroup_dirs */
> + struct list_head children;
> + unsigned int n_children;
> +
> + struct cg_controller *controller;
> +};
> +
> +/* This describes a particular cgroup controller, e.g. blkio or cpuset.
> + * The heads are subdirectories organized in their tree format.
> + */
> +struct cg_controller {
> + int heirarchy;
> + unsigned int n_controllers;
> + char **controllers;
> +
> + /* cgroup_dirs */
> + struct list_head heads;
> + unsigned int n_heads;
> +
> + /* for cgroup list in cgroup.c */
> + struct list_head l;
> +};
> +struct cg_controller *new_controller(const char *name, int heirarchy);
> +
> +/* parse all global cgroup information into structures */
> +int parse_cg_info(void);
> +
> #endif /* __CR_CGROUP_H__ */
> diff --git a/include/proc_parse.h b/include/proc_parse.h
> index b153328..ff1ea5d 100644
> --- a/include/proc_parse.h
> +++ b/include/proc_parse.h
> @@ -5,6 +5,7 @@
> #include "asm/types.h"
> #include "image.h"
> #include "list.h"
> +#include "cgroup.h"
>
> #include "protobuf/eventfd.pb-c.h"
> #include "protobuf/eventpoll.pb-c.h"
> @@ -203,4 +204,6 @@ struct cg_ctl {
> extern int parse_task_cgroup(int pid, struct list_head *l, unsigned int *n);
> extern void put_ctls(struct list_head *);
>
> +int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups);
> +
> #endif /* __CR_PROC_PARSE_H__ */
> diff --git a/include/util.h b/include/util.h
> index 22a0f3d..522fc33 100644
> --- a/include/util.h
> +++ b/include/util.h
> @@ -288,7 +288,7 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn);
> /*
> * Check whether @str starts with @sub
> */
> -static inline bool strstartswith(char *str, char *sub)
> +static inline bool strstartswith(const char *str, const char *sub)
> {
> while (1) {
> if (*sub == '\0') /* end of sub -- match */
> @@ -303,4 +303,16 @@ static inline bool strstartswith(char *str, char *sub)
> }
> }
>
> +/*
> + * mkdir -p
> + */
> +int mkdirp(const char *path);
> +
> +/*
> + * Tests whether a path is a prefix of another path. This is different than
> + * strstartswith because "/foo" is _not_ a path prefix of "/foobar", since they
> + * refer to different directories.
> + */
> +bool is_path_prefix(const char *path, const char *prefix);
> +FILE *fopenat(int dirfd, char *path, char *cflags);
> #endif /* __CR_UTIL_H__ */
> diff --git a/mount.c b/mount.c
> index 4d84f48..32410eb 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -861,6 +861,9 @@ static struct fstype fstypes[] = {
> }, {
> .name = "debugfs",
> .code = FSTYPE__DEBUGFS,
> + }, {
> + .name = "cgroup",
> + .code = FSTYPE__CGROUP,
> }
> };
>
> diff --git a/proc_parse.c b/proc_parse.c
> index f2ea897..88f3c0a 100644
> --- a/proc_parse.c
> +++ b/proc_parse.c
> @@ -1547,7 +1547,7 @@ int parse_task_cgroup(int pid, struct list_head *retl, unsigned int *n)
> }
>
> list_for_each_entry(cc, retl, l)
> - if (strcmp(cc->name, name) >= 0)
> + if (strcmp(cc->name, name) >= 0 && strcmp(cc->path, path) >= 0)
> break;
>
> list_add_tail(&ncc->l, &cc->l);
> @@ -1573,3 +1573,76 @@ void put_ctls(struct list_head *l)
> xfree(c);
> }
> }
> +
> +
> +/* Parse and create all the real controllers. This does not include things with
> + * the "name=" prefix, e.g. systemd.
> + */
> +int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups)
> +{
> + FILE *f;
> + char buf[1024], name[1024];
> + int heirarchy, ret = 0;
> + struct cg_controller *cur = NULL;
> +
> + f = fopen("/proc/cgroups", "r");
> + if (!f) {
> + pr_perror("failed opening /proc/cgroups");
> + return -1;
> + }
> +
> + /* throw away the header */
> + if (!fgets(buf, 1024, f)) {
> + ret = -1;
> + goto out;
> + }
> +
> + while (fgets(buf, 1024, f)) {
> + char *n;
> + char found = 0;
> +
> + sscanf(buf, "%s %d", name, &heirarchy);
> + list_for_each_entry(cur, cgroups, l) {
> + if (cur->heirarchy == heirarchy) {
> + void *m;
> +
> + found = 1;
> + cur->n_controllers++;
> + m = xrealloc(cur->controllers, sizeof(char *) * cur->n_controllers);
> + if (!m) {
> + ret = -1;
> + goto out;
> + }
> +
> + cur->controllers = m;
> + if (!cur->controllers) {
> + ret = -1;
> + goto out;
> + }
> +
> + n = xstrdup(name);
> + if (!n) {
> + ret = -1;
> + goto out;
> + }
> +
> + cur->controllers[cur->n_controllers-1] = n;
> + break;
> + }
> + }
> +
> + if (!found) {
> + struct cg_controller *nc = new_controller(name, heirarchy);
> + if (!nc) {
> + ret = -1;
> + goto out;
> + }
> + list_add_tail(&nc->l, &cur->l);
> + (*n_cgroups)++;
> + }
> + }
> +
> +out:
> + fclose(f);
> + return ret;
> +}
> diff --git a/protobuf/cgroup.proto b/protobuf/cgroup.proto
> index 139a3ad..4be2249 100644
> --- a/protobuf/cgroup.proto
> +++ b/protobuf/cgroup.proto
> @@ -1,3 +1,16 @@
> +message cgroup_dir_entry {
> + required string path = 1;
> + optional uint64 mem_limit = 2;
> + optional uint32 cpu_shares = 3;
> + repeated cgroup_dir_entry children = 4;
> +}
> +
> +message cg_controller_entry {
> + required uint32 id = 1;
> + repeated string controllers = 2;
> + repeated cgroup_dir_entry dirs = 3;
> +}
> +
> message cg_member_entry {
> required string name = 1;
> required string path = 2;
> @@ -9,5 +22,6 @@ message cg_set_entry {
> }
>
> message cgroup_entry {
> - repeated cg_set_entry sets = 1;
> + repeated cg_set_entry sets = 1;
> + repeated cg_controller_entry controllers = 2;
> }
> diff --git a/protobuf/mnt.proto b/protobuf/mnt.proto
> index 63532ee..603bb37 100644
> --- a/protobuf/mnt.proto
> +++ b/protobuf/mnt.proto
> @@ -11,6 +11,7 @@ enum fstype {
> SECURITYFS = 9;
> FUSECTL = 10;
> DEBUGFS = 11;
> + CGROUP = 12;
> };
>
> message mnt_entry {
> diff --git a/test/zdtm.sh b/test/zdtm.sh
> index 4c3f2d2..38e7484 100755
> --- a/test/zdtm.sh
> +++ b/test/zdtm.sh
> @@ -167,6 +167,7 @@ ns/static/tun
> static/netns-nf
> static/netns
> static/cgroup00
> +static/cgroup01
> ns/static/clean_mntns
> "
>
> @@ -198,6 +199,7 @@ bind-mount
> mountpoints
> inotify_irmap
> cgroup00
> +cgroup01
> clean_mntns
> "
>
> diff --git a/test/zdtm/live/static/Makefile b/test/zdtm/live/static/Makefile
> index f41fd80..893a250 100644
> --- a/test/zdtm/live/static/Makefile
> +++ b/test/zdtm/live/static/Makefile
> @@ -160,6 +160,7 @@ TST_DIR = \
> tempfs \
> bind-mount \
> cgroup00 \
> + cgroup01 \
>
> TST_DIR_FILE = \
> chroot \
> diff --git a/test/zdtm/live/static/cgroup01.c b/test/zdtm/live/static/cgroup01.c
> new file mode 100644
> index 0000000..f6a082f
> --- /dev/null
> +++ b/test/zdtm/live/static/cgroup01.c
> @@ -0,0 +1,111 @@
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <sys/stat.h>
> +#include <sys/mount.h>
> +#include "zdtmtst.h"
> +
> +const char *test_doc = "Check that empty cgroups are preserved";
> +const char *test_author = "Tycho Andersen <tycho.andersen at canonical.com>";
> +
> +char *dirname;
> +TEST_OPTION(dirname, string, "cgroup directory name", 1);
> +static const char *cgname = "zdtmtst";
> +static const char *subname = "subcg";
> +static const char *empty = "empty";
> +
> +int main(int argc, char **argv)
> +{
> + int cgfd, l, ret = 1;
> + char aux[1024], paux[1024];
> + FILE *cgf;
> + struct stat st;
> +
> + test_init(argc, argv);
> +
> + if (mkdir(dirname, 0700) < 0) {
> + err("Can't make dir");
> + goto out;
> + }
> +
> + sprintf(aux, "none,name=%s", cgname);
> + if (mount("none", dirname, "cgroup", 0, aux)) {
> + err("Can't mount cgroups");
> + goto out_rd;
> + }
> +
> + sprintf(paux, "%s/%s", dirname, subname);
> + mkdir(paux, 0600);
> +
> + l = sprintf(aux, "%d", getpid());
> + sprintf(paux, "%s/%s/tasks", dirname, subname);
> +
> + cgfd = open(paux, O_WRONLY);
> + if (cgfd < 0) {
> + err("Can't open tasks");
> + goto out_rs;
> + }
> +
> + l = write(cgfd, aux, l);
> + close(cgfd);
> +
> + if (l < 0) {
> + err("Can't move self to subcg");
> + goto out_rs;
> + }
> +
> + sprintf(paux, "%s/%s/%s", dirname, subname, empty);
> + mkdir(paux, 0600);
> +
> + test_daemon();
> + test_waitsig();
> +
> + cgf = fopen("/proc/self/mountinfo", "r");
> + if (cgf == NULL) {
> + fail("No mountinfo file");
> + goto out_rs;
> + }
> +
> + while (fgets(paux, sizeof(paux), cgf)) {
> + char *s;
> +
> + s = strstr(paux, cgname);
> + if (s) {
> + sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux);
> + test_msg("found cgroup at %s\n", aux);
> + sprintf(paux, "%s/%s/%s", aux, subname, empty);
> + if (stat(paux, &st)) {
> + fail("couldn't stat %s\n", paux);
> + ret = -1;
> + goto out_close;
> + }
> +
> + if (!S_ISDIR(st.st_mode)) {
> + fail("%s is not a directory\n", paux);
> + ret = -1;
> + goto out_close;
> + }
> +
> + pass();
> + ret = 0;
> + goto out_close;
> + }
> + }
> +
> + fail("empty cgroup not found!\n");
> +
> +out_close:
> + fclose(cgf);
> +
> + sprintf(paux, "%s/%s/%s", dirname, subname, empty);
> + rmdir(paux);
> +out_rs:
> + sprintf(paux, "%s/%s", dirname, subname);
> + rmdir(paux);
> + umount(dirname);
> +out_rd:
> + rmdir(dirname);
> +out:
> + return ret;
> +}
> diff --git a/util.c b/util.c
> index d697f7a..2553adc 100644
> --- a/util.c
> +++ b/util.c
> @@ -678,3 +678,80 @@ struct vma_area *alloc_vma_area(void)
>
> return p;
> }
> +
> +int mkdirp(const char *path)
> +{
> + size_t i;
> + char made_path[PATH_MAX], *pos;
> +
> + if (strlen(path) >= PATH_MAX) {
> + pr_err("path %s is longer than PATH_MAX", path);
> + return -1;
> + }
> +
> + strcpy(made_path, path);
> +
> + i = 0;
> + if (made_path[0] == '/')
> + i++;
> +
> + for (; i < strlen(made_path); i++) {
> + pos = strchr(made_path + i, '/');
> + if (pos)
> + *pos = '\0';
> + if (mkdir(made_path, 0755) < 0 && errno != EEXIST) {
> + pr_perror("couldn't mkdirpat directory\n");
> + return -1;
> + }
> + if (pos) {
> + *pos = '/';
> + i = pos - made_path;
> + } else
> + break;
> + }
> +
> + return 0;
> +}
> +
> +bool is_path_prefix(const char *path, const char *prefix)
> +{
> + if (strstartswith(path, prefix)) {
> + size_t len = strlen(prefix);
> + switch (path[len]) {
> + case '\0':
> + case '/':
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +FILE *fopenat(int dirfd, char *path, char *cflags)
> +{
> + int tmp, flags = 0;
> + char *iter;
> +
> + for (iter = cflags; *iter; iter++) {
> + switch (*iter) {
> + case 'r':
> + flags |= O_RDONLY;
> + break;
> + case 'a':
> + flags |= O_APPEND;
> + break;
> + case 'w':
> + flags |= O_WRONLY | O_CREAT;
> + break;
> + case '+':
> + flags = O_RDWR | O_CREAT;
> + break;
> + }
> + }
> +
> + tmp = openat(dirfd, path, flags, S_IRUSR | S_IWUSR);
> + if (tmp < 0)
> + return NULL;
> +
> + return fdopen(tmp, cflags);
> +}
> --
> 1.9.1
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list