[CRIU] [PATCH 2/2] Attempt to restore cgroups

Pavel Emelyanov xemul at parallels.com
Fri Jul 4 03:56:14 PDT 2014


On 07/03/2014 06:23 PM, Tycho Andersen wrote:
> Ugh, and here's one without the stupid debug statement again, sorry
> about that.

Would you also run this patch through linux kernel's scripts/checkpatch.pl
script. It finds ~20 coding style errors. Warnings can be ignored :)

And some more comments inline.

> During the dump phase, /proc/cgroups is parsed to find co-mounted cgroups.
> Then, for each task /proc/self/cgroup is parsed for the cgroups that it is a
> member of, and that cgroup is traversed to find any child cgroups which may
> also need restoring. All of this information is persisted along with the
> original cg_sets, which indicate which cgroups a task is a member of.
> 
> On restore, an initial phase creates all the cgroups which were saved and
> attempts to restore any peroperties they had. Then the tasks are restored into
> their respective cgroups via cg_sets as usual.

Can you also add test into zdtm/live/static/ suite? These are run on 24x7 manner
and make sure nothing gets broken accidentally.

> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  cgroup.c              | 449 +++++++++++++++++++++++++++++++++++++++++++++++---
>  cr-dump.c             |   3 +
>  include/cgroup.h      |  46 ++++++
>  include/proc_parse.h  |   3 +
>  include/util.h        |   7 +-
>  mount.c               |   3 +
>  proc_parse.c          |  75 ++++++++-
>  protobuf/cgroup.proto |  16 +-
>  protobuf/mnt.proto    |   1 +
>  util.c                |  34 ++++
>  10 files changed, 609 insertions(+), 28 deletions(-)
> 
> diff --git a/cgroup.c b/cgroup.c
> index 1fe5e6d..306bfe3 100644
> --- a/cgroup.c
> +++ b/cgroup.c
> @@ -18,7 +20,8 @@
>  /*
>   * This structure describes set of controller groups
>   * a task lives in. The cg_ctl entries are stored in
> - * the @ctls list sorted by the .name field.
> + * the @ctls list sorted by the .name field and then
> + * by the .path field.

Why do we need the .path sorting as well?

>   */
>  
>  struct cg_set {
> @@ -118,6 +125,265 @@ static struct cg_set *get_cg_set(struct list_head *ctls, unsigned int n_ctls)
>  	return cs;
>  }
>  

> +int parse_cg_info()
> +{
> +	if (parse_cgroups(&cgroups, &n_cgroups) < 0)
> +		return -1;
> +
> +	cg_mntinfo = parse_mountinfo(getpid(), NULL);
> +
> +	if (!cg_mntinfo)
> +		return -1;
> +	return 0;
> +}
> +
> +char *get_cgroup_mount_point(const char *controller)

This one can be static.

> +{
> +	struct mount_info *m;
> +	char path[PATH_MAX];
> +
> +	for (m = cg_mntinfo; m != NULL; m = m->next) {
> +		if (strcmp(m->fstype->name, "cgroup") == 0) {
> +			char *slash;
> +
> +			slash = strrchr(m->mountpoint, '/');
> +			if (!slash) {
> +				pr_err("bad path %s\n", path);
> +				return NULL;
> +			}
> +
> +			if (strcmp(slash + 1, controller) == 0) {
> +				/* skip the leading '.' in mountpoint */
> +				strncpy(path, m->mountpoint + 1, slash - m->mountpoint + 1);
> +				path[slash - m->mountpoint] = '\0';
> +
> +				return xstrdup(path);
> +			}
> +		}
> +	}
> +
> +	return NULL;
> +}
> +

Please, put here comment saying that these two are arguments for the ftw() callback.

> +static struct cg_controller	*current_controller;
> +static char			*current_controller_name;
> +
> +#define EXACT_MATCH	0
> +#define PARENT_MATCH	1
> +#define NO_MATCH	2
> +

> +static int add_cgroup(const char *fpath, const struct stat *sb, int typeflag)
> +{
> +	struct cgroup_dir *ncd = NULL, *match;
> +	int ret = 0;
> +	char pbuf[PATH_MAX], *name, *path;
> +
> +
> +	if (typeflag == FTW_D) {
> +		FILE *f;
> +		int mtype;
> +
> +		strncpy(pbuf, fpath, PATH_MAX);
> +
> +		pr_info("adding cgroup %s\n", fpath);
> +
> +		ncd = xmalloc(sizeof(*ncd));
> +		if(!ncd) {
> +			ret = -1;
> +			goto out;
> +		}
> +		ncd->path = NULL;
> +
> +		name = strstr(fpath, current_controller_name);
> +		if (!name) {
> +			pr_err("%s has no name component (%s)\n", fpath, current_controller_name);
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		path = strchr(name, '/');
> +		if (!path) {
> +			pr_err("Bad path %s\n", name);
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		ncd->path = xstrdup(path);
> +		if (!ncd->path) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		mtype = find_dir(path, &current_controller->heads, &match);

Taking into account that a) now we call the collect_cgroups() for root item only
and b) ftw guarantees to descend the file tree, do we still need this _MATCH-ing
engine?

> +
> +		switch(mtype) {
> +			/* ignore co-mounted cgroups */
> +			case EXACT_MATCH :
> +				goto out;
> +			case PARENT_MATCH :
> +				list_add_tail(&ncd->siblings, &match->children);
> +				match->n_children++;
> +				break;
> +			case NO_MATCH :
> +				list_add_tail(&ncd->siblings, &current_controller->heads);
> +				current_controller->n_heads++;
> +				break;
> +		}
> +
> +		INIT_LIST_HEAD(&ncd->children);
> +		ncd->n_children = 0;
> +		ncd->controller = current_controller;
> +
> +		ncd->flags = 0;
> +
> +		snprintf(pbuf, PATH_MAX, "%s/memory.limit_in_bytes", fpath);
> +		f = fopen(pbuf, "r");
> +		if (f) {
> +			if (fscanf(f, "%" SCNu64, &ncd->mem_limit) != 1) {
> +				pr_err("Failed scanning %s\n", pbuf);
> +				ret = -1;
> +				goto out;
> +			}
> +			ncd->flags |= HAS_MEM_LIMIT;
> +			fclose(f);
> +		}
> +
> +		snprintf(pbuf, PATH_MAX, "%s/cpu.shares", fpath);
> +		f = fopen(pbuf, "r");
> +		if (f) {
> +			if (fscanf(f, "%" SCNu32, &ncd->cpu_shares) != 1) {
> +				pr_err("Failed scanning %s for u32\n", pbuf);
> +				ret = -1;
> +				goto out;
> +			}
> +			ncd->flags |= HAS_CPU_SHARES;
> +			fclose(f);
> +		}
> +
> +		return 0;
> +	}
> +
> +out:
> +	if (ncd) {
> +		if (ncd->path)
> +			xfree(ncd->path);
> +		xfree(ncd);
> +	}
> +
> +	return ret;
> +}
> +
> +static int collect_cgroups(struct list_head *ctls)
> +{
> +	struct cg_ctl *cc;
> +	int ret = 0;
> +
> +	list_for_each_entry(cc, ctls, l) {
> +		char path[PATH_MAX];
> +		char *name, *mount_point;
> +		struct cg_controller *cg;
> +		int i;
> +
> +		if (strstartswith(cc->name, "name="))
> +			name = cc->name + 5;
> +		else
> +			name = cc->name;
> +
> +		mount_point = get_cgroup_mount_point(name);
> +		if (!mount_point) {
> +			pr_err("cgroup mount point for %s not found!\n", name);
> +			return -1;
> +		}
> +
> +		snprintf(path, PATH_MAX, "%s/%s%s", mount_point, name, cc->path);
> +
> +		current_controller = NULL;
> +		current_controller_name = name;
> +
> +		/* Use the previously allocated struct for this controller if
> +		 * there is one */

I'm not sure I understand this comment. Can you qualify one, please?

> +		list_for_each_entry(cg, &cgroups, l) {
> +			for (i = 0; i < cg->n_controllers; i++) {
> +				if (strcmp(cg->controllers[i], cc->name) == 0) {
> +					current_controller = cg;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (!current_controller) {
> +			/* only allow "fake" controllers to be created this way */
> +			if (!strstartswith(cc->name, "name=")) {
> +				pr_err("controller %s not found\n", cc->name);
> +				return -1;
> +			} else {
> +				struct cg_controller *nc = new_controller(cc->name, -1);
> +				list_add_tail(&nc->l, &cg->l);
> +				n_cgroups++;
> +				current_controller = nc;
> +			}
> +		}
> +
> +		if ((ret = ftw(path, add_cgroup, 4)) < 0) {
> +			pr_perror("failed walking %s for empty cgroups\n", path);
> +			return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  int dump_task_cgroup(struct pstree_item *item, u32 *cg_id)
>  {
>  	int pid;

> @@ -431,15 +840,5 @@ int prepare_cgroup(void)
>  
>  	n_sets = ce->n_sets;
>  	rst_sets = ce->sets;
> -	if (n_sets)
> -		/*
> -		 * We rely on the fact that all sets contain the same
> -		 * set of controllers. This is checked during dump
> -		 * with cg_set_compare(CGCMP_ISSUB) call.
> -		 */
> -		ret = prepare_cgroup_sfd(rst_sets[0]);
> -	else
> -		ret = 0;
> -
> -	return ret;
> +	return prepare_cgroup_sfd(ce);

This if (n_sets) handled the case when we didn't have cgroups to dump.
E.g. -- when we just dump the tasks that live in the same cgroup set as
criu does. Will this hunk break this?

>  }

> diff --git a/mount.c b/mount.c
> index 4d84f48..32410eb 100644
> --- a/mount.c
> +++ b/mount.c
> @@ -861,6 +861,9 @@ static struct fstype fstypes[] = {
>  	}, {
>  		.name = "debugfs",
>  		.code = FSTYPE__DEBUGFS,
> +	}, {
> +		.name = "cgroup",
> +		.code = FSTYPE__CGROUP,

This would only handle the case when cgroup root is mounted inside container.
AFAIK cgroups are typically bind-mounted into container from the directory
where the container lives in. Maybe it makes sense to implement full cgroup
mount support separately?

>  	}
>  };
>  

> diff --git a/util.c b/util.c
> index d697f7a..d4033c9 100644
> --- a/util.c
> +++ b/util.c
> @@ -678,3 +678,37 @@ struct vma_area *alloc_vma_area(void)
>  
>  	return p;
>  }
> +
> +int mkdirp(const char* path)
> +{
> +	size_t i;
> +	char made_path[PATH_MAX], *pos;
> +
> +	if (strlen(path) >= PATH_MAX) {
> +		pr_err("path %s is longer than PATH_MAX", path);
> +		return -1;
> +	}
> +
> +	strcpy(made_path, path);
> +
> +	i = 0;
> +	if (strchr(made_path, '/') == made_path)

It's equivalent to made_path[0] == '/' :)

> +		i++;
> +
> +	for (; i < strlen(made_path); i++) {
> +		pos = strchr(made_path + i, '/');
> +		if (pos)
> +			*pos = '\0';
> +		if (mkdir(made_path, 0755) < 0 && errno != EEXIST) {
> +			pr_perror("couldn't mkdirpat directory\n");
> +			return -1;
> +		}
> +		if (pos) {
> +			*pos = '/';
> +			i = pos - made_path;
> +		} else
> +			break;
> +	}
> +
> +	return 0;
> +}
> 

Thanks,
Pavel



More information about the CRIU mailing list