[Devel] Re: [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup

Li Zefan lizf at cn.fujitsu.com
Mon Mar 16 23:44:50 PDT 2009


Paul Menage wrote:
> [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup
> 
> Currently the cgroups code makes the assumption that the subsystem
> pointers in a struct css_set uniquely identify the hierarchy->cgroup
> mappings associated with the css_set; and there's no way to directly
> identify the associated set of cgroups other than by indirecting
> through the appropriate subsystem state pointers.
> 
> This patch removes the need for that assumption by adding a
> back-pointer from struct cg_cgroup_link object to its associated
> cgroup; this allows the set of cgroups to be determined by traversing
> the cg_links list in the struct css_set.
> 
> Signed-off-by: Paul Menage <menage at google.com>
> 
> ---
> 
>  kernel/cgroup.c |  218 +++++++++++++++++++++++++++++++++++++++++++------------
>  1 files changed, 169 insertions(+), 49 deletions(-)
> 
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index ecf00ad..9cdbbac 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -202,6 +202,7 @@ struct cg_cgroup_link {
>  	 * cgroup, anchored on cgroup->css_sets
>  	 */
>  	struct list_head cgrp_link_list;
> +	struct cgroup *cgrp;
>  	/*
>  	 * List running through cg_cgroup_links pointing at a
>  	 * single css_set object, anchored on css_set->cg_links
> @@ -228,8 +229,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
>  static DEFINE_RWLOCK(css_set_lock);
>  static int css_set_count;
>  
> -/* hash table for cgroup groups. This improves the performance to
> - * find an existing css_set */
> +/*
> + * hash table for cgroup groups. This improves the performance to find
> + * an existing css_set. This hash doesn't (currently) take into
> + * account cgroups in empty hierarchies.
> + */
>  #define CSS_SET_HASH_BITS	7
>  #define CSS_SET_TABLE_SIZE	(1 << CSS_SET_HASH_BITS)
>  static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
> @@ -339,6 +343,77 @@ static inline void put_css_set_taskexit(struct css_set *cg)
>  }
>  
>  /*
> + * compare_css_sets - helper function for find_existing_css_set().
> + * @cg: candidate css_set being tested
> + * @old_cg: existing css_set for a task
> + * @new_cgrp: cgroup that's being entered by the task
> + * @template: desired set of css pointers in css_set (pre-calculated)
> + *
> + * Returns true if "cg" matches "old_cg" except for the hierarchy
> + * which "new_cgrp" belongs to, for which it should match "new_cgrp".
> + */
> +static bool compare_css_sets(struct css_set *cg,
> +			     struct css_set *old_cg,
> +			     struct cgroup *new_cgrp,
> +			     struct cgroup_subsys_state *template[])
> +{
> +	struct list_head *l1, *l2;

a blank line is needed. this comment can apply to some other places
in this patchset.

> +	if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
> +		/* Not all subsystems matched */
> +		return false;
> +	}
> +
> +	/*
> +	 * Compare cgroup pointers in order to distinguish between
> +	 * different cgroups in heirarchies with no subsystems. We
> +	 * could get by with just this check alone (and skip the
> +	 * memcmp above) but on most setups the memcmp check will
> +	 * avoid the need for this more expensive check on almost all
> +	 * candidates.
> +	 */
> +
> +	l1 = &cg->cg_links;
> +	l2 = &old_cg->cg_links;
> +	while (1) {
> +		struct cg_cgroup_link *cgl1, *cgl2;
> +		struct cgroup *cg1, *cg2;
> +
> +		l1 = l1->next;
> +		l2 = l2->next;
> +		/* See if we reached the end - both lists are equal length. */
> +		if (l1 == &cg->cg_links) {
> +			BUG_ON(l2 != &old_cg->cg_links);
> +			break;
> +		} else {
> +			BUG_ON(l2 == &old_cg->cg_links);
> +		}
> +		/* Locate the cgroups associated with these links. */
> +		cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
> +		cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
> +		cg1 = cgl1->cgrp;
> +		cg2 = cgl2->cgrp;
> +		/* Hierarchies should be linked in the same order. */
> +		BUG_ON(cg1->root != cg2->root);
> +
> +		/*
> +		 * If this hierarchy is the hierarchy of the cgroup
> +		 * that's changing, then we need to check that this
> +		 * css_set points to the new cgroup; if it's any other
> +		 * hierarchy, then this css_set should point to the
> +		 * same cgroup as the old css_set.
> +		 */
> +		if (cg1->root == new_cgrp->root) {
> +			if (cg1 != new_cgrp)
> +				return false;
> +		} else {
> +			if (cg1 != cg2)
> +				return false;
> +		}
> +	}
> +	return true;
> +}
> +
> +/*
>   * find_existing_css_set() is a helper for
>   * find_css_set(), and checks to see whether an existing
>   * css_set is suitable.
> @@ -379,10 +454,11 @@ static struct css_set *find_existing_css_set(
>  
>  	hhead = css_set_hash(template);
>  	hlist_for_each_entry(cg, node, hhead, hlist) {
> -		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
> -			/* All subsystems matched */
> -			return cg;
> -		}
> +		if (!compare_css_sets(cg, oldcg, cgrp, template))
> +			continue;
> +
> +		/* This css_set matches what we need */
> +		return cg;
>  	}
>  
>  	/* No existing cgroup group matched */
> @@ -436,8 +512,13 @@ static void link_css_set(struct list_head *tmp_cg_links,
>  	link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
>  				cgrp_link_list);
>  	link->cg = cg;
> +	link->cgrp = cgrp;
>  	list_move(&link->cgrp_link_list, &cgrp->css_sets);
> -	list_add(&link->cg_link_list, &cg->cg_links);
> +	/*
> +	 * Always add links to the tail of the list so that the list
> +	 * is sorted by order of hierarchy creation
> +	 */
> +	list_add_tail(&link->cg_link_list, &cg->cg_links);
>  }
>  
>  /*
> @@ -457,6 +538,7 @@ static struct css_set *find_css_set(
>  	struct list_head tmp_cg_links;
>  
>  	struct hlist_head *hhead;
> +	struct cg_cgroup_link *link, *saved_link;
>  
>  	/* First see if we already have a cgroup group that matches
>  	 * the desired set */
> @@ -492,18 +574,15 @@ static struct css_set *find_css_set(
>  	/* Add reference counts and links from the new css_set. */
>  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
>  		struct cgroup *cgrp = res->subsys[i]->cgroup;
> -		struct cgroup_subsys *ss = subsys[i];
>  		atomic_inc(&cgrp->count);
> -		/*
> -		 * We want to add a link once per cgroup, so we
> -		 * only do it for the first subsystem in each
> -		 * hierarchy
> -		 */
> -		if (ss->root->subsys_list.next == &ss->sibling)
> -			link_css_set(&tmp_cg_links, res, cgrp);
>  	}
> -	if (list_empty(&rootnode.subsys_list))
> -		link_css_set(&tmp_cg_links, res, dummytop);
> +	list_for_each_entry_safe(link, saved_link, &oldcg->cg_links,
> +				 cg_link_list) {

list_for_each_entry() is ok. this comment can also apply to some
other places in this patchset.

> +		struct cgroup *c = link->cgrp;
> +		if (c->root == cgrp->root)
> +			c = cgrp;
> +		link_css_set(&tmp_cg_links, res, c);
> +	}
>  
>  	BUG_ON(!list_empty(&tmp_cg_links));
>  
> @@ -519,6 +598,42 @@ static struct css_set *find_css_set(
>  }
>  
>  /*
> + * Return the cgroup for "task" from the given hierarchy. Must be
> + * called with cgroup_mutex held.
> + */
> +struct cgroup *task_cgroup_from_root(struct task_struct *task,
> +				     struct cgroupfs_root *root)

static

> +{
> +	struct css_set *css;
> +	struct cgroup *res = NULL;
> +
> +	BUG_ON(!mutex_is_locked(&cgroup_mutex));
> +	read_lock(&css_set_lock);
> +	/*
> +	 * No need to lock the task - since we hold cgroup_mutex the
> +	 * task can't change groups, so the only thing that can happen
> +	 * is that it exits and its css is set back to init_css_set.
> +	 */
> +	css = task->cgroups;
> +	if (css == &init_css_set) {
> +		res = &root->top_cgroup;
> +	} else {
> +		struct cg_cgroup_link *link, *saved_link;
> +		list_for_each_entry_safe(link, saved_link, &css->cg_links,
> +					 cg_link_list) {
> +			struct cgroup *c = link->cgrp;
> +			if (c->root == root) {
> +				res = c;
> +				break;
> +			}
> +		}
> +	}
> +	read_unlock(&css_set_lock);
> +	BUG_ON(!res);
> +	return res;
> +}
> +
> +/*
>   * There is one global cgroup mutex. We also require taking
>   * task_lock() when dereferencing a task's cgroup subsys pointers.
>   * See "The task_lock() exception", at the end of this comment.
> @@ -1281,27 +1396,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
>  	return 0;
>  }
>  
> -/*
> - * Return the first subsystem attached to a cgroup's hierarchy, and
> - * its subsystem id.
> - */
> -
> -static void get_first_subsys(const struct cgroup *cgrp,
> -			struct cgroup_subsys_state **css, int *subsys_id)
> -{
> -	const struct cgroupfs_root *root = cgrp->root;
> -	const struct cgroup_subsys *test_ss;
> -	BUG_ON(list_empty(&root->subsys_list));
> -	test_ss = list_entry(root->subsys_list.next,
> -			     struct cgroup_subsys, sibling);
> -	if (css) {
> -		*css = cgrp->subsys[test_ss->subsys_id];
> -		BUG_ON(!*css);
> -	}
> -	if (subsys_id)
> -		*subsys_id = test_ss->subsys_id;
> -}
> -
>  /**
>   * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
>   * @cgrp: the cgroup the task is attaching to
> @@ -1318,12 +1412,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
>  	struct css_set *cg;
>  	struct css_set *newcg;
>  	struct cgroupfs_root *root = cgrp->root;
> -	int subsys_id;
> -
> -	get_first_subsys(cgrp, NULL, &subsys_id);
>  
>  	/* Nothing to do if the task is already in that cgroup */
> -	oldcgrp = task_cgroup(tsk, subsys_id);
> +	oldcgrp = task_cgroup_from_root(tsk, root);
>  	if (cgrp == oldcgrp)
>  		return 0;
>  
> @@ -1881,7 +1972,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
>   * the start of a css_set
>   */
>  static void cgroup_advance_iter(struct cgroup *cgrp,
> -					  struct cgroup_iter *it)
> +				struct cgroup_iter *it)
>  {
>  	struct list_head *l = it->cg_link;
>  	struct cg_cgroup_link *link;
> @@ -2829,6 +2920,7 @@ int __init cgroup_init_early(void)
>  	init_task.cgroups = &init_css_set;
>  
>  	init_css_set_link.cg = &init_css_set;
> +	init_css_set_link.cgrp = dummytop;
>  	list_add(&init_css_set_link.cgrp_link_list,
>  		 &rootnode.top_cgroup.css_sets);
>  	list_add(&init_css_set_link.cg_link_list,
> @@ -2936,7 +3028,6 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
>  	for_each_active_root(root) {
>  		struct cgroup_subsys *ss;
>  		struct cgroup *cgrp;
> -		int subsys_id;
>  		int count = 0;
>  
>  		seq_printf(m, "%lu:", root->subsys_bits);
> @@ -2946,8 +3037,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
>  			seq_printf(m, "%sname=%s",
>  				   count++ ? "," : "", root->name);
>  		seq_putc(m, ':');
> -		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
> -		cgrp = task_cgroup(tsk, subsys_id);
> +		cgrp = task_cgroup_from_root(tsk, root);
>  		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
>  		if (retval < 0)
>  			goto out_unlock;
> @@ -3273,13 +3363,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
>  {
>  	int ret;
>  	struct cgroup *target;
> -	int subsys_id;
>  
>  	if (cgrp == dummytop)
>  		return 1;
>  
> -	get_first_subsys(cgrp, NULL, &subsys_id);
> -	target = task_cgroup(task, subsys_id);
> +	target = task_cgroup_from_root(task, cgrp->root);
>  	while (cgrp != target && cgrp!= cgrp->top_cgroup)
>  		cgrp = cgrp->parent;
>  	ret = (cgrp == target);
> @@ -3694,6 +3782,33 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
>  	return count;
>  }
>  
> +static int current_css_set_cg_links_read(struct cgroup *cont,
> +					 struct cftype *cft,
> +					 struct seq_file *seq)
> +{
> +	struct cg_cgroup_link *link, *saved_link;
> +	struct css_set *cg;
> +	write_lock_irq(&css_set_lock);
> +	task_lock(current);
> +	cg = current->cgroups;
> +	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
> +				 cg_link_list) {
> +		struct cgroup *c = link->cgrp;
> +		const char *name;
> +		rcu_read_lock();
> +		if (c->dentry)
> +			name = c->dentry->d_name.name;
> +		else
> +			name = "?";
> +		seq_printf(seq, "Root %lu group %s\n",
> +			   c->root->subsys_bits, name);
> +		rcu_read_unlock();
> +	}
> +	task_unlock(current);
> +	write_unlock_irq(&css_set_lock);
> +	return 0;
> +}
> +
>  static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
>  {
>  	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
> @@ -3720,6 +3835,11 @@ static struct cftype debug_files[] =  {
>  	},
>  
>  	{
> +		.name = "current_css_set_cg_links",
> +		.read_seq_string = current_css_set_cg_links_read,
> +	},
> +
> +	{
>  		.name = "releasable",
>  		.read_u64 = releasable_read,
>  	},
> 
> 
> 


_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list