[Devel] Re: [PATCH 3/7] [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup
Li Zefan
lizf at cn.fujitsu.com
Mon Mar 16 23:44:50 PDT 2009
Paul Menage wrote:
> [RFC] Add a back-pointer from struct cg_cgroup_link to struct cgroup
>
> Currently the cgroups code makes the assumption that the subsystem
> pointers in a struct css_set uniquely identify the hierarchy->cgroup
> mappings associated with the css_set; and there's no way to directly
> identify the associated set of cgroups other than by indirecting
> through the appropriate subsystem state pointers.
>
> This patch removes the need for that assumption by adding a
> back-pointer from struct cg_cgroup_link object to its associated
> cgroup; this allows the set of cgroups to be determined by traversing
> the cg_links list in the struct css_set.
>
> Signed-off-by: Paul Menage <menage at google.com>
>
> ---
>
> kernel/cgroup.c | 218 +++++++++++++++++++++++++++++++++++++++++++------------
> 1 files changed, 169 insertions(+), 49 deletions(-)
>
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index ecf00ad..9cdbbac 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -202,6 +202,7 @@ struct cg_cgroup_link {
> * cgroup, anchored on cgroup->css_sets
> */
> struct list_head cgrp_link_list;
> + struct cgroup *cgrp;
> /*
> * List running through cg_cgroup_links pointing at a
> * single css_set object, anchored on css_set->cg_links
> @@ -228,8 +229,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
> static DEFINE_RWLOCK(css_set_lock);
> static int css_set_count;
>
> -/* hash table for cgroup groups. This improves the performance to
> - * find an existing css_set */
> +/*
> + * hash table for cgroup groups. This improves the performance to find
> + * an existing css_set. This hash doesn't (currently) take into
> + * account cgroups in empty hierarchies.
> + */
> #define CSS_SET_HASH_BITS 7
> #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
> static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
> @@ -339,6 +343,77 @@ static inline void put_css_set_taskexit(struct css_set *cg)
> }
>
> /*
> + * compare_css_sets - helper function for find_existing_css_set().
> + * @cg: candidate css_set being tested
> + * @old_cg: existing css_set for a task
> + * @new_cgrp: cgroup that's being entered by the task
> + * @template: desired set of css pointers in css_set (pre-calculated)
> + *
> + * Returns true if "cg" matches "old_cg" except for the hierarchy
> + * which "new_cgrp" belongs to, for which it should match "new_cgrp".
> + */
> +static bool compare_css_sets(struct css_set *cg,
> + struct css_set *old_cg,
> + struct cgroup *new_cgrp,
> + struct cgroup_subsys_state *template[])
> +{
> + struct list_head *l1, *l2;
a blank line is needed. this comment can apply to some other places
in this patchset.
> + if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
> + /* Not all subsystems matched */
> + return false;
> + }
> +
> + /*
> + * Compare cgroup pointers in order to distinguish between
> + * different cgroups in heirarchies with no subsystems. We
> + * could get by with just this check alone (and skip the
> + * memcmp above) but on most setups the memcmp check will
> + * avoid the need for this more expensive check on almost all
> + * candidates.
> + */
> +
> + l1 = &cg->cg_links;
> + l2 = &old_cg->cg_links;
> + while (1) {
> + struct cg_cgroup_link *cgl1, *cgl2;
> + struct cgroup *cg1, *cg2;
> +
> + l1 = l1->next;
> + l2 = l2->next;
> + /* See if we reached the end - both lists are equal length. */
> + if (l1 == &cg->cg_links) {
> + BUG_ON(l2 != &old_cg->cg_links);
> + break;
> + } else {
> + BUG_ON(l2 == &old_cg->cg_links);
> + }
> + /* Locate the cgroups associated with these links. */
> + cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
> + cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
> + cg1 = cgl1->cgrp;
> + cg2 = cgl2->cgrp;
> + /* Hierarchies should be linked in the same order. */
> + BUG_ON(cg1->root != cg2->root);
> +
> + /*
> + * If this hierarchy is the hierarchy of the cgroup
> + * that's changing, then we need to check that this
> + * css_set points to the new cgroup; if it's any other
> + * hierarchy, then this css_set should point to the
> + * same cgroup as the old css_set.
> + */
> + if (cg1->root == new_cgrp->root) {
> + if (cg1 != new_cgrp)
> + return false;
> + } else {
> + if (cg1 != cg2)
> + return false;
> + }
> + }
> + return true;
> +}
> +
> +/*
> * find_existing_css_set() is a helper for
> * find_css_set(), and checks to see whether an existing
> * css_set is suitable.
> @@ -379,10 +454,11 @@ static struct css_set *find_existing_css_set(
>
> hhead = css_set_hash(template);
> hlist_for_each_entry(cg, node, hhead, hlist) {
> - if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
> - /* All subsystems matched */
> - return cg;
> - }
> + if (!compare_css_sets(cg, oldcg, cgrp, template))
> + continue;
> +
> + /* This css_set matches what we need */
> + return cg;
> }
>
> /* No existing cgroup group matched */
> @@ -436,8 +512,13 @@ static void link_css_set(struct list_head *tmp_cg_links,
> link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
> cgrp_link_list);
> link->cg = cg;
> + link->cgrp = cgrp;
> list_move(&link->cgrp_link_list, &cgrp->css_sets);
> - list_add(&link->cg_link_list, &cg->cg_links);
> + /*
> + * Always add links to the tail of the list so that the list
> + * is sorted by order of hierarchy creation
> + */
> + list_add_tail(&link->cg_link_list, &cg->cg_links);
> }
>
> /*
> @@ -457,6 +538,7 @@ static struct css_set *find_css_set(
> struct list_head tmp_cg_links;
>
> struct hlist_head *hhead;
> + struct cg_cgroup_link *link, *saved_link;
>
> /* First see if we already have a cgroup group that matches
> * the desired set */
> @@ -492,18 +574,15 @@ static struct css_set *find_css_set(
> /* Add reference counts and links from the new css_set. */
> for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
> struct cgroup *cgrp = res->subsys[i]->cgroup;
> - struct cgroup_subsys *ss = subsys[i];
> atomic_inc(&cgrp->count);
> - /*
> - * We want to add a link once per cgroup, so we
> - * only do it for the first subsystem in each
> - * hierarchy
> - */
> - if (ss->root->subsys_list.next == &ss->sibling)
> - link_css_set(&tmp_cg_links, res, cgrp);
> }
> - if (list_empty(&rootnode.subsys_list))
> - link_css_set(&tmp_cg_links, res, dummytop);
> + list_for_each_entry_safe(link, saved_link, &oldcg->cg_links,
> + cg_link_list) {
list_for_each_entry() is ok. this comment can also apply to some
other places in this patchset.
> + struct cgroup *c = link->cgrp;
> + if (c->root == cgrp->root)
> + c = cgrp;
> + link_css_set(&tmp_cg_links, res, c);
> + }
>
> BUG_ON(!list_empty(&tmp_cg_links));
>
> @@ -519,6 +598,42 @@ static struct css_set *find_css_set(
> }
>
> /*
> + * Return the cgroup for "task" from the given hierarchy. Must be
> + * called with cgroup_mutex held.
> + */
> +struct cgroup *task_cgroup_from_root(struct task_struct *task,
> + struct cgroupfs_root *root)
static
> +{
> + struct css_set *css;
> + struct cgroup *res = NULL;
> +
> + BUG_ON(!mutex_is_locked(&cgroup_mutex));
> + read_lock(&css_set_lock);
> + /*
> + * No need to lock the task - since we hold cgroup_mutex the
> + * task can't change groups, so the only thing that can happen
> + * is that it exits and its css is set back to init_css_set.
> + */
> + css = task->cgroups;
> + if (css == &init_css_set) {
> + res = &root->top_cgroup;
> + } else {
> + struct cg_cgroup_link *link, *saved_link;
> + list_for_each_entry_safe(link, saved_link, &css->cg_links,
> + cg_link_list) {
> + struct cgroup *c = link->cgrp;
> + if (c->root == root) {
> + res = c;
> + break;
> + }
> + }
> + }
> + read_unlock(&css_set_lock);
> + BUG_ON(!res);
> + return res;
> +}
> +
> +/*
> * There is one global cgroup mutex. We also require taking
> * task_lock() when dereferencing a task's cgroup subsys pointers.
> * See "The task_lock() exception", at the end of this comment.
> @@ -1281,27 +1396,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
> return 0;
> }
>
> -/*
> - * Return the first subsystem attached to a cgroup's hierarchy, and
> - * its subsystem id.
> - */
> -
> -static void get_first_subsys(const struct cgroup *cgrp,
> - struct cgroup_subsys_state **css, int *subsys_id)
> -{
> - const struct cgroupfs_root *root = cgrp->root;
> - const struct cgroup_subsys *test_ss;
> - BUG_ON(list_empty(&root->subsys_list));
> - test_ss = list_entry(root->subsys_list.next,
> - struct cgroup_subsys, sibling);
> - if (css) {
> - *css = cgrp->subsys[test_ss->subsys_id];
> - BUG_ON(!*css);
> - }
> - if (subsys_id)
> - *subsys_id = test_ss->subsys_id;
> -}
> -
> /**
> * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
> * @cgrp: the cgroup the task is attaching to
> @@ -1318,12 +1412,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
> struct css_set *cg;
> struct css_set *newcg;
> struct cgroupfs_root *root = cgrp->root;
> - int subsys_id;
> -
> - get_first_subsys(cgrp, NULL, &subsys_id);
>
> /* Nothing to do if the task is already in that cgroup */
> - oldcgrp = task_cgroup(tsk, subsys_id);
> + oldcgrp = task_cgroup_from_root(tsk, root);
> if (cgrp == oldcgrp)
> return 0;
>
> @@ -1881,7 +1972,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
> * the start of a css_set
> */
> static void cgroup_advance_iter(struct cgroup *cgrp,
> - struct cgroup_iter *it)
> + struct cgroup_iter *it)
> {
> struct list_head *l = it->cg_link;
> struct cg_cgroup_link *link;
> @@ -2829,6 +2920,7 @@ int __init cgroup_init_early(void)
> init_task.cgroups = &init_css_set;
>
> init_css_set_link.cg = &init_css_set;
> + init_css_set_link.cgrp = dummytop;
> list_add(&init_css_set_link.cgrp_link_list,
> &rootnode.top_cgroup.css_sets);
> list_add(&init_css_set_link.cg_link_list,
> @@ -2936,7 +3028,6 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
> for_each_active_root(root) {
> struct cgroup_subsys *ss;
> struct cgroup *cgrp;
> - int subsys_id;
> int count = 0;
>
> seq_printf(m, "%lu:", root->subsys_bits);
> @@ -2946,8 +3037,7 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
> seq_printf(m, "%sname=%s",
> count++ ? "," : "", root->name);
> seq_putc(m, ':');
> - get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
> - cgrp = task_cgroup(tsk, subsys_id);
> + cgrp = task_cgroup_from_root(tsk, root);
> retval = cgroup_path(cgrp, buf, PAGE_SIZE);
> if (retval < 0)
> goto out_unlock;
> @@ -3273,13 +3363,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
> {
> int ret;
> struct cgroup *target;
> - int subsys_id;
>
> if (cgrp == dummytop)
> return 1;
>
> - get_first_subsys(cgrp, NULL, &subsys_id);
> - target = task_cgroup(task, subsys_id);
> + target = task_cgroup_from_root(task, cgrp->root);
> while (cgrp != target && cgrp!= cgrp->top_cgroup)
> cgrp = cgrp->parent;
> ret = (cgrp == target);
> @@ -3694,6 +3782,33 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
> return count;
> }
>
> +static int current_css_set_cg_links_read(struct cgroup *cont,
> + struct cftype *cft,
> + struct seq_file *seq)
> +{
> + struct cg_cgroup_link *link, *saved_link;
> + struct css_set *cg;
> + write_lock_irq(&css_set_lock);
> + task_lock(current);
> + cg = current->cgroups;
> + list_for_each_entry_safe(link, saved_link, &cg->cg_links,
> + cg_link_list) {
> + struct cgroup *c = link->cgrp;
> + const char *name;
> + rcu_read_lock();
> + if (c->dentry)
> + name = c->dentry->d_name.name;
> + else
> + name = "?";
> + seq_printf(seq, "Root %lu group %s\n",
> + c->root->subsys_bits, name);
> + rcu_read_unlock();
> + }
> + task_unlock(current);
> + write_unlock_irq(&css_set_lock);
> + return 0;
> +}
> +
> static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
> {
> return test_bit(CGRP_RELEASABLE, &cgrp->flags);
> @@ -3720,6 +3835,11 @@ static struct cftype debug_files[] = {
> },
>
> {
> + .name = "current_css_set_cg_links",
> + .read_seq_string = current_css_set_cg_links_read,
> + },
> +
> + {
> .name = "releasable",
> .read_u64 = releasable_read,
> },
>
>
>
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list