[Devel] [PATCH 2/2] cgroup, memcg: Rework css_stacks debugging.
Kirill Tkhai
ktkhai at virtuozzo.com
Thu May 21 16:34:46 MSK 2020
On 18.05.2020 18:14, Andrey Ryabinin wrote:
> Current css_stacks debugging isn't very usefull, it records only ~128 stacks
> which is not enough and doesn't allow us to identify the problem.
> Instead of recording full stacks, record only instruction pointer
> and the counter of how often we hit it.
>
> Example of output:
> css_relese: css_get/put ips
> 1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
> 851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
> 776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
> 776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
> 70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
> 166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
> 776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
> 614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
> 116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
> 116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
> 352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
> 51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
> 51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
> 22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
> 1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
> 1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
> 1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
> 1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0
>
> https://jira.sw.ru/browse/PSBM-98148
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> include/linux/cgroup.h | 22 ++++++++++++++--------
> kernel/cgroup.c | 37 ++++++++++++++++++-------------------
> mm/memcontrol.c | 18 ++++++++++--------
> 3 files changed, 42 insertions(+), 35 deletions(-)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 1c1ee0c458e0..919cddd78168 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
>
> extern int proc_cgroup_show(struct seq_file *, void *);
>
> +#define CSS_IPS_COUNT 512
> +/* Instruction pointer and count */
> struct css_stacks {
> - atomic_t offset;
> - unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
> + unsigned long ips[CSS_IPS_COUNT];
> + atomic_t count[CSS_IPS_COUNT];
> };
>
> /* Per-subsystem/per-cgroup state maintained by the system. */
> @@ -154,7 +156,7 @@ enum {
> extern struct static_key css_stacks_on;
> void __save_css_stack(struct cgroup_subsys_state *css);
>
> -static inline void save_css_stack(struct cgroup_subsys_state *css)
> +static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
> {
> if (static_key_false(&css_stacks_on))
> __save_css_stack(css);
> @@ -166,7 +168,7 @@ static inline void save_css_stack(struct cgroup_subsys_state *css)
> * - an existing ref-counted reference to the css
> * - task->cgroups for a locked task
> */
> -static inline void css_get(struct cgroup_subsys_state *css)
> +static __always_inline void css_get(struct cgroup_subsys_state *css)
> {
> /* We don't need to reference count the root state */
> if (!(css->flags & CSS_ROOT)) {
> @@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
> * the css has been destroyed.
> */
>
> -static inline bool css_tryget(struct cgroup_subsys_state *css)
> +static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
> {
> + bool ret;
> +
> if (css->flags & CSS_ROOT)
> return true;
> - save_css_stack(css);
> - return percpu_ref_tryget_live(&css->refcnt);
> + ret = percpu_ref_tryget_live(&css->refcnt);
> + if (ret)
> + save_css_stack(css);
> + return ret;
> }
>
> /*
> @@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
> * css_get() or css_tryget()
> */
>
> -static inline void css_put(struct cgroup_subsys_state *css)
> +static __always_inline void css_put(struct cgroup_subsys_state *css)
> {
> if (!(css->flags & CSS_ROOT)) {
> save_css_stack(css);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index bf218e88ac9d..389128d37334 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work)
>
> css_stacks = css->stacks;
> if (css_stacks)
> - free_pages((unsigned long)css_stacks, 1);
> + kfree(css_stacks);
>
> percpu_ref_exit(&css->refcnt);
> cgroup_dput(css->cgroup);
> @@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
> {
> css->cgroup = cgrp;
> css->flags = 0;
> - if (static_key_false(&css_stacks_on) && slab_is_available())
> - css->stacks = (struct css_stacks *)
> - __get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
> + if (static_key_false(&css_stacks_on) && slab_is_available() &&
> + ss == &mem_cgroup_subsys)
> + css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
> else
> css->stacks = 0;
> if (cgrp == dummytop)
> @@ -4261,28 +4261,27 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
>
> void __save_css_stack(struct cgroup_subsys_state *css)
> {
> - unsigned long entries[8];
> - unsigned int offset;
> + int i;
> struct css_stacks *css_stacks;
> - struct stack_trace trace = {
> - .nr_entries = 0,
> - .entries = entries,
> - .max_entries = 8,
> - .skip = 0
> - };
> + unsigned long ip = _RET_IP_;
>
> css_stacks = css->stacks;
> if (!css_stacks)
> return;
>
> - memset(entries, 0, sizeof(entries));
> - offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
> - if (offset == 0) {
> - offset += 8;
> - trace.max_entries = 7;
> + for (i = 0; i < CSS_IPS_COUNT; i++) {
I hope this won't too slow.
> + if (css_stacks->ips[i] == 0) {
> + if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0)
> + continue;
> + atomic_inc(&css_stacks->count[i]);
> + break;
> + }
> + if (css_stacks->ips[i] == ip) {
> + atomic_inc(&css_stacks->count[i]);
> + break;
> + }
> }
> - save_stack_trace(&trace);
> - memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
> + WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
> }
> EXPORT_SYMBOL(__save_css_stack);
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c3586e8e27ca..6c8989f1260d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
>
> css_stacks = css->stacks;
> if (css_stacks) {
> - pr_err("css_relese: css_stacks offset %d\n",
> - atomic_read(&css_stacks->offset));
> - for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
> - if (css_stacks->stacks[i])
> - pr_err("\t%pS\n",
> - (void *)css_stacks->stacks[i]);
> + pr_err("css_relese: css_get/put ips\n");
> + for (i = 0; i < CSS_IPS_COUNT; i++) {
> + if (css_stacks->ips[i])
> + pr_err("\t%d, [%lx] %pS\n",
> + atomic_read(&css_stacks->count[i]),
> + css_stacks->ips[i],
> + (void *)css_stacks->ips[i]);
> else
> - continue;
> + break;
> }
> + BUG();
> }
> - BUG();
> +
> }
>
> }
>
More information about the Devel
mailing list