[CRIU] [PATCH 2/2] Attempt to restore cgroups
Pavel Emelyanov
xemul at parallels.com
Fri Jul 4 03:56:14 PDT 2014
On 07/03/2014 06:23 PM, Tycho Andersen wrote:
> Ugh, and here's one without the stupid debug statement again, sorry
> about that.
Would you also run this patch through linux kernel's scripts/checkpatch.pl
script. It finds ~20 coding style errors. Warnings can be ignored :)
And some more comments inline.
> During the dump phase, /proc/cgroups is parsed to find co-mounted cgroups.
> Then, for each task /proc/self/cgroup is parsed for the cgroups that it is a
> member of, and that cgroup is traversed to find any child cgroups which may
> also need restoring. All of this information is persisted along with the
> original cg_sets, which indicate which cgroups a task is a member of.
>
> On restore, an initial phase creates all the cgroups which were saved and
> attempts to restore any peroperties they had. Then the tasks are restored into
> their respective cgroups via cg_sets as usual.
Can you also add test into zdtm/live/static/ suite? These are run on 24x7 manner
and make sure nothing gets broken accidentally.
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
> cgroup.c | 449 +++++++++++++++++++++++++++++++++++++++++++++++---
> cr-dump.c | 3 +
> include/cgroup.h | 46 ++++++
> include/proc_parse.h | 3 +
> include/util.h | 7 +-
> mount.c | 3 +
> proc_parse.c | 75 ++++++++-
> protobuf/cgroup.proto | 16 +-
> protobuf/mnt.proto | 1 +
> util.c | 34 ++++
> 10 files changed, 609 insertions(+), 28 deletions(-)
>
> diff --git a/cgroup.c b/cgroup.c
> index 1fe5e6d..306bfe3 100644
> --- a/cgroup.c
> +++ b/cgroup.c
> @@ -18,7 +20,8 @@
> /*
> * This structure describes set of controller groups
> * a task lives in. The cg_ctl entries are stored in
> - * the @ctls list sorted by the .name field.
> + * the @ctls list sorted by the .name field and then
> + * by the .path field.
Why do we need the .path sorting as well?
> */
>
> struct cg_set {
> @@ -118,6 +125,265 @@ static struct cg_set *get_cg_set(struct list_head *ctls, unsigned int n_ctls)
> return cs;
> }
>
> +int parse_cg_info()
> +{
> + if (parse_cgroups(&cgroups, &n_cgroups) < 0)
> + return -1;
> +
> + cg_mntinfo = parse_mountinfo(getpid(), NULL);
> +
> + if (!cg_mntinfo)
> + return -1;
> + return 0;
> +}
> +
> +char *get_cgroup_mount_point(const char *controller)
This one can be static.
> +{
> + struct mount_info *m;
> + char path[PATH_MAX];
> +
> + for (m = cg_mntinfo; m != NULL; m = m->next) {
> + if (strcmp(m->fstype->name, "cgroup") == 0) {
> + char *slash;
> +
> + slash = strrchr(m->mountpoint, '/');
> + if (!slash) {
> + pr_err("bad path %s\n", path);
> + return NULL;
> + }
> +
> + if (strcmp(slash + 1, controller) == 0) {
> + /* skip the leading '.' in mountpoint */
> + strncpy(path, m->mountpoint + 1, slash - m->mountpoint + 1);
> + path[slash - m->mountpoint] = '\0';
> +
> + return xstrdup(path);
> + }
> + }
> + }
> +
> + return NULL;
> +}
> +
Please, put here comment saying that these two are arguments for the ftw() callback.
> +static struct cg_controller *current_controller;
> +static char *current_controller_name;
> +
> +#define EXACT_MATCH 0
> +#define PARENT_MATCH 1
> +#define NO_MATCH 2
> +
> +static int add_cgroup(const char *fpath, const struct stat *sb, int typeflag)
> +{
> + struct cgroup_dir *ncd = NULL, *match;
> + int ret = 0;
> + char pbuf[PATH_MAX], *name, *path;
> +
> +
> + if (typeflag == FTW_D) {
> + FILE *f;
> + int mtype;
> +
> + strncpy(pbuf, fpath, PATH_MAX);
> +
> + pr_info("adding cgroup %s\n", fpath);
> +
> + ncd = xmalloc(sizeof(*ncd));
> + if(!ncd) {
> + ret = -1;
> + goto out;
> + }
> + ncd->path = NULL;
> +
> + name = strstr(fpath, current_controller_name);
> + if (!name) {
> + pr_err("%s has no name component (%s)\n", fpath, current_controller_name);
> + ret = -1;
> + goto out;
> + }
> +
> + path = strchr(name, '/');
> + if (!path) {
> + pr_err("Bad path %s\n", name);
> + ret = -1;
> + goto out;
> + }
> +
> + ncd->path = xstrdup(path);
> + if (!ncd->path) {
> + ret = -1;
> + goto out;
> + }
> +
> + mtype = find_dir(path, ¤t_controller->heads, &match);
Taking into account that a) now we call the collect_cgroups() for root item only
and b) ftw guarantees to descend the file tree, do we still need this _MATCH-ing
engine?
> +
> + switch(mtype) {
> + /* ignore co-mounted cgroups */
> + case EXACT_MATCH :
> + goto out;
> + case PARENT_MATCH :
> + list_add_tail(&ncd->siblings, &match->children);
> + match->n_children++;
> + break;
> + case NO_MATCH :
> + list_add_tail(&ncd->siblings, ¤t_controller->heads);
> + current_controller->n_heads++;
> + break;
> + }
> +
> + INIT_LIST_HEAD(&ncd->children);
> + ncd->n_children = 0;
> + ncd->controller = current_controller;
> +
> + ncd->flags = 0;
> +
> + snprintf(pbuf, PATH_MAX, "%s/memory.limit_in_bytes", fpath);
> + f = fopen(pbuf, "r");
> + if (f) {
> + if (fscanf(f, "%" SCNu64, &ncd->mem_limit) != 1) {
> + pr_err("Failed scanning %s\n", pbuf);
> + ret = -1;
> + goto out;
> + }
> + ncd->flags |= HAS_MEM_LIMIT;
> + fclose(f);
> + }
> +
> + snprintf(pbuf, PATH_MAX, "%s/cpu.shares", fpath);
> + f = fopen(pbuf, "r");
> + if (f) {
> + if (fscanf(f, "%" SCNu32, &ncd->cpu_shares) != 1) {
> + pr_err("Failed scanning %s for u32\n", pbuf);
> + ret = -1;
> + goto out;
> + }
> + ncd->flags |= HAS_CPU_SHARES;
> + fclose(f);
> + }
> +
> + return 0;
> + }
> +
> +out:
> + if (ncd) {
> + if (ncd->path)
> + xfree(ncd->path);
> + xfree(ncd);
> + }
> +
> + return ret;
> +}
> +
> +static int collect_cgroups(struct list_head *ctls)
> +{
> + struct cg_ctl *cc;
> + int ret = 0;
> +
> + list_for_each_entry(cc, ctls, l) {
> + char path[PATH_MAX];
> + char *name, *mount_point;
> + struct cg_controller *cg;
> + int i;
> +
> + if (strstartswith(cc->name, "name="))
> + name = cc->name + 5;
> + else
> + name = cc->name;
> +
> + mount_point = get_cgroup_mount_point(name);
> + if (!mount_point) {
> + pr_err("cgroup mount point for %s not found!\n", name);
> + return -1;
> + }
> +
> + snprintf(path, PATH_MAX, "%s/%s%s", mount_point, name, cc->path);
> +
> + current_controller = NULL;
> + current_controller_name = name;
> +
> + /* Use the previously allocated struct for this controller if
> + * there is one */
I'm not sure I understand this comment. Can you qualify one, please?
> + list_for_each_entry(cg, &cgroups, l) {
> + for (i = 0; i < cg->n_controllers; i++) {
> + if (strcmp(cg->controllers[i], cc->name) == 0) {
> + current_controller = cg;
> + break;
> + }
> + }
> + }
> +
> + if (!current_controller) {
> + /* only allow "fake" controllers to be created this way */
> + if (!strstartswith(cc->name, "name=")) {
> + pr_err("controller %s not found\n", cc->name);
> + return -1;
> + } else {
> + struct cg_controller *nc = new_controller(cc->name, -1);
> + list_add_tail(&nc->l, &cg->l);
> + n_cgroups++;
> + current_controller = nc;
> + }
> + }
> +
> + if ((ret = ftw(path, add_cgroup, 4)) < 0) {
> + pr_perror("failed walking %s for empty cgroups\n", path);
> + return ret;
> + }
> + }
> +
> + return 0;
> +}
> +
> int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
> {
> int pid;
> @@ -431,15 +840,5 @@ int prepare_cgroup(void)
>
> n_sets = ce->n_sets;
> rst_sets = ce->sets;
> - if (n_sets)
> - /*
> - * We rely on the fact that all sets contain the same
> - * set of controllers. This is checked during dump
> - * with cg_set_compare(CGCMP_ISSUB) call.
> - */
> - ret = prepare_cgroup_sfd(rst_sets[0]);
> - else
> - ret = 0;
> -
> - return ret;
> + return prepare_cgroup_sfd(ce);
This if (n_sets) handled the case when we didn't have cgroups to dump.
E.g. -- when we just dump the tasks that live in the same cgroup set as
criu does. Will this hunk break this?
> }
> diff --git a/mount.c b/mount.c
> index 4d84f48..32410eb 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -861,6 +861,9 @@ static struct fstype fstypes[] = {
> }, {
> .name = "debugfs",
> .code = FSTYPE__DEBUGFS,
> + }, {
> + .name = "cgroup",
> + .code = FSTYPE__CGROUP,
This would only handle the case when cgroup root is mounted inside container.
AFAIK cgroups are typically bind-mounted into container from the directory
where the container lives in. Maybe it makes sense to implement full cgroup
mount support separately?
> }
> };
>
> diff --git a/util.c b/util.c
> index d697f7a..d4033c9 100644
> --- a/util.c
> +++ b/util.c
> @@ -678,3 +678,37 @@ struct vma_area *alloc_vma_area(void)
>
> return p;
> }
> +
> +int mkdirp(const char* path)
> +{
> + size_t i;
> + char made_path[PATH_MAX], *pos;
> +
> + if (strlen(path) >= PATH_MAX) {
> + pr_err("path %s is longer than PATH_MAX", path);
> + return -1;
> + }
> +
> + strcpy(made_path, path);
> +
> + i = 0;
> + if (strchr(made_path, '/') == made_path)
It's equivalent to made_path[0] == '/' :)
> + i++;
> +
> + for (; i < strlen(made_path); i++) {
> + pos = strchr(made_path + i, '/');
> + if (pos)
> + *pos = '\0';
> + if (mkdir(made_path, 0755) < 0 && errno != EEXIST) {
> + pr_perror("couldn't mkdirpat directory\n");
> + return -1;
> + }
> + if (pos) {
> + *pos = '/';
> + i = pos - made_path;
> + } else
> + break;
> + }
> +
> + return 0;
> +}
>
Thanks,
Pavel
More information about the CRIU
mailing list