[Devel] [PATCH] cgroup: make sure a parent css isn't offlined before its children
Kirill Tkhai
ktkhai at virtuozzo.com
Tue May 19 11:32:51 MSK 2020
On 15.05.2020 19:13, Andrey Ryabinin wrote:
> From: Tejun Heo <tj at kernel.org>
>
> There are three subsystem callbacks in css shutdown path -
> css_offline(), css_released() and css_free(). Except for
> css_released(), cgroup core didn't guarantee the order of invocation.
> css_offline() or css_free() could be called on a parent css before its
> children. This behavior is unexpected and led to bugs in cpu and
> memory controller.
>
> This patch updates offline path so that a parent css is never offlined
> before its children. Each css keeps online_cnt which reaches zero iff
> itself and all its children are offline and offline_css() is invoked
> only after online_cnt reaches zero.
>
> This fixes the memory controller bug and allows the fix for cpu
> controller.
>
> Signed-off-by: Tejun Heo <tj at kernel.org>
> Reported-and-tested-by: Christian Borntraeger <borntraeger at de.ibm.com>
> Reported-by: Brian Christiansen <brian.o.christiansen at gmail.com>
> Link: http://lkml.kernel.org/g/5698A023.9070703@de.ibm.com
> Link: http://lkml.kernel.org/g/CAKB58ikDkzc8REt31WBkD99+hxNzjK4+FBmhkgS+NVrC9vjMSg@mail.gmail.com
> Cc: Heiko Carstens <heiko.carstens at de.ibm.com>
> Cc: Peter Zijlstra <peterz at infradead.org>
> Cc: stable at vger.kernel.org
>
> https://jira.sw.ru/browse/PSBM-104029
> (cherry picked from commit aa226ff4a1ce79f229c6b7a4c0a14e17fececd01)
> [aryabinin: User refcounts instead of atomics]
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> include/linux/cgroup.h | 6 ++++
> kernel/cgroup.c | 66 +++++++++++++++++++++++++-----------------
> 2 files changed, 45 insertions(+), 27 deletions(-)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index bd48216e7ac2..1c1ee0c458e0 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -242,6 +242,12 @@ struct cgroup {
> */
> atomic_t count;
>
> + /*
> + * Incremented by online self and children. Used to guarantee that
> + * parents are not offlined before their children.
> + */
> + refcount_t online_cnt;
> +
> int id; /* ida allocated in-hierarchy ID */
>
> /*
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 50d4b3df0f0a..17ba1ab9c7b0 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1422,6 +1422,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
> INIT_LIST_HEAD(&root->subsys_list);
> INIT_LIST_HEAD(&root->root_list);
> INIT_LIST_HEAD(&root->allcg_list);
> + refcount_set(&cgrp->online_cnt, 1);
> root->number_of_cgroups = 1;
> cgrp->root = root;
> cgrp->name = &root_cgroup_name;
> @@ -4238,8 +4239,13 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
>
> if (ss->css_online)
> ret = ss->css_online(cgrp);
> - if (!ret)
> + if (!ret) {
> cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
> +
> + refcount_inc(&cgrp->online_cnt);
> + if (cgrp->parent)
> + refcount_inc(&cgrp->parent->online_cnt);
> + }
> return ret;
> }
>
> @@ -4509,9 +4515,11 @@ static void cgroup_css_killed(struct cgroup *cgrp)
> if (!atomic_dec_and_test(&cgrp->css_kill_cnt))
> return;
>
> - /* percpu ref's of all css's are killed, kick off the next step */
> - INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
> - queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
> + if (refcount_dec_and_test(&cgrp->online_cnt)) {
> + /* percpu ref's of all css's are killed, kick off the next step */
> + INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
> + queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
> + }
> }
>
> static void css_ref_killed_fn(struct percpu_ref *ref)
> @@ -4649,37 +4657,41 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
> static void cgroup_offline_fn(struct work_struct *work)
> {
> struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
> - struct cgroup *parent = cgrp->parent;
> - struct dentry *d = cgrp->dentry;
> struct cgroup_subsys *ss;
>
> mutex_lock(&cgroup_mutex);
>
> - /*
> - * css_tryget() is guaranteed to fail now. Tell subsystems to
> - * initate destruction.
> - */
> - for_each_subsys(cgrp->root, ss)
> - offline_css(ss, cgrp);
> + do {
> + struct cgroup *parent = cgrp->parent;
> + struct dentry *d = cgrp->dentry;
>
> - /*
> - * Put the css refs from cgroup_destroy_locked(). Each css holds
> - * an extra reference to the cgroup's dentry and cgroup removal
> - * proceeds regardless of css refs. On the last put of each css,
> - * whenever that may be, the extra dentry ref is put so that dentry
> - * destruction happens only after all css's are released.
> - */
> - for_each_subsys(cgrp->root, ss)
> - css_put(cgrp->subsys[ss->subsys_id]);
> + /*
> + * css_tryget() is guaranteed to fail now. Tell subsystems to
> + * initate destruction.
> + */
> + for_each_subsys(cgrp->root, ss)
> + offline_css(ss, cgrp);
>
> - /* delete this cgroup from parent->children */
> - list_del_rcu(&cgrp->sibling);
> - list_del_init(&cgrp->allcg_node);
> + /*
> + * Put the css refs from cgroup_destroy_locked(). Each css holds
> + * an extra reference to the cgroup's dentry and cgroup removal
> + * proceeds regardless of css refs. On the last put of each css,
> + * whenever that may be, the extra dentry ref is put so that dentry
> + * destruction happens only after all css's are released.
> + */
> + for_each_subsys(cgrp->root, ss)
> + css_put(cgrp->subsys[ss->subsys_id]);
>
> - dput(d);
> + /* delete this cgroup from parent->children */
> + list_del_rcu(&cgrp->sibling);
> + list_del_init(&cgrp->allcg_node);
> +
> + dput(d);
>
> - set_bit(CGRP_RELEASABLE, &parent->flags);
> - check_for_release(parent);
> + set_bit(CGRP_RELEASABLE, &parent->flags);
> + check_for_release(parent);
> + cgrp = parent;
> + } while (cgrp && refcount_dec_and_test(&cgrp->online_cnt));
>
> mutex_unlock(&cgroup_mutex);
> }
>
More information about the Devel
mailing list