[Devel] [PATCH 2/2] cgroup, memcg: Rework css_stacks debugging.

Kirill Tkhai ktkhai at virtuozzo.com
Thu May 21 16:34:46 MSK 2020


On 18.05.2020 18:14, Andrey Ryabinin wrote:
> Current css_stacks debugging isn't very usefull, it records only ~128 stacks
> which is not enough and doesn't allow us to identify the problem.
> Instead of recording full stacks, record only instruction pointer
> and the counter of how often we hit it.
> 
> Example of output:
>  css_relese: css_get/put ips
>   1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
>   851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
>   776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
>   776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
>   70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
>   166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
>   776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
>   614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
>   116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
>   116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
>   352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
>   51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
>   51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
>   22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
>   1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
>   1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
>   1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
>   1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0
> 
> https://jira.sw.ru/browse/PSBM-98148
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>

Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>

> ---
>  include/linux/cgroup.h | 22 ++++++++++++++--------
>  kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
>  mm/memcontrol.c        | 18 ++++++++++--------
>  3 files changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 1c1ee0c458e0..919cddd78168 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
>  
>  extern int proc_cgroup_show(struct seq_file *, void *);
>  
> +#define CSS_IPS_COUNT 512
> +/* Instruction pointer and count */
>  struct css_stacks {
> -	atomic_t offset;
> -	unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
> +	unsigned long ips[CSS_IPS_COUNT];
> +	atomic_t count[CSS_IPS_COUNT];
>  };
>  
>  /* Per-subsystem/per-cgroup state maintained by the system. */
> @@ -154,7 +156,7 @@ enum {
>  extern struct static_key css_stacks_on;
>  void __save_css_stack(struct cgroup_subsys_state *css);
>  
> -static inline void save_css_stack(struct cgroup_subsys_state *css)
> +static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
>  {
>  	if (static_key_false(&css_stacks_on))
>  		__save_css_stack(css);
> @@ -166,7 +168,7 @@ static inline void save_css_stack(struct cgroup_subsys_state *css)
>   * - an existing ref-counted reference to the css
>   * - task->cgroups for a locked task
>   */
> -static inline void css_get(struct cgroup_subsys_state *css)
> +static __always_inline void css_get(struct cgroup_subsys_state *css)
>  {
>  	/* We don't need to reference count the root state */
>  	if (!(css->flags & CSS_ROOT)) {
> @@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
>   * the css has been destroyed.
>   */
>  
> -static inline bool css_tryget(struct cgroup_subsys_state *css)
> +static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
>  {
> +	bool ret;
> +
>  	if (css->flags & CSS_ROOT)
>  		return true;
> -	save_css_stack(css);
> -	return percpu_ref_tryget_live(&css->refcnt);
> +	ret = percpu_ref_tryget_live(&css->refcnt);
> +	if (ret)
> +		save_css_stack(css);
> +	return ret;
>  }
>  
>  /*
> @@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
>   * css_get() or css_tryget()
>   */
>  
> -static inline void css_put(struct cgroup_subsys_state *css)
> +static __always_inline void css_put(struct cgroup_subsys_state *css)
>  {
>  	if (!(css->flags & CSS_ROOT)) {
>  		save_css_stack(css);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index bf218e88ac9d..389128d37334 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work)
>  
>  	css_stacks = css->stacks;
>  	if (css_stacks)
> -		free_pages((unsigned long)css_stacks, 1);
> +		kfree(css_stacks);
>  
>  	percpu_ref_exit(&css->refcnt);
>  	cgroup_dput(css->cgroup);
> @@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
>  {
>  	css->cgroup = cgrp;
>  	css->flags = 0;
> -	if (static_key_false(&css_stacks_on) && slab_is_available())
> -		css->stacks = (struct css_stacks *)
> -			__get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
> +	if (static_key_false(&css_stacks_on) && slab_is_available() &&
> +	    ss == &mem_cgroup_subsys)
> +		css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
>  	else
>  		css->stacks = 0;
>  	if (cgrp == dummytop)
> @@ -4261,28 +4261,27 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
>  
>  void __save_css_stack(struct cgroup_subsys_state *css)
>  {
> -	unsigned long entries[8];
> -	unsigned int offset;
> +	int i;
>  	struct css_stacks *css_stacks;
> -	struct stack_trace trace = {
> -		.nr_entries = 0,
> -		.entries = entries,
> -		.max_entries = 8,
> -		.skip = 0
> -	};
> +	unsigned long ip = _RET_IP_;
>  
>  	css_stacks = css->stacks;
>  	if (!css_stacks)
>  		return;
>  
> -	memset(entries, 0, sizeof(entries));
> -	offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
> -	if (offset == 0) {
> -		offset += 8;
> -		trace.max_entries = 7;
> +	for (i = 0; i < CSS_IPS_COUNT; i++) {

I hope this won't too slow.

> +		if (css_stacks->ips[i] == 0) {
> +			if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0)
> +				continue;
> +			atomic_inc(&css_stacks->count[i]);
> +			break;
> +		}
> +		if (css_stacks->ips[i] == ip) {
> +			atomic_inc(&css_stacks->count[i]);
> +			break;
> +		}
>  	}
> -	save_stack_trace(&trace);
> -	memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
> +	WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
>  }
>  EXPORT_SYMBOL(__save_css_stack);
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c3586e8e27ca..6c8989f1260d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
>  
>  		css_stacks = css->stacks;
>  		if (css_stacks) {
> -			pr_err("css_relese: css_stacks offset %d\n",
> -			       atomic_read(&css_stacks->offset));
> -			for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
> -				if (css_stacks->stacks[i])
> -					pr_err("\t%pS\n",
> -					       (void *)css_stacks->stacks[i]);
> +			pr_err("css_relese: css_get/put ips\n");
> +			for (i = 0; i < CSS_IPS_COUNT; i++) {
> +				if (css_stacks->ips[i])
> +					pr_err("\t%d, [%lx] %pS\n",
> +						atomic_read(&css_stacks->count[i]),
> +						css_stacks->ips[i],
> +						(void *)css_stacks->ips[i]);
>  				else
> -					continue;
> +					break;
>  			}
> +			BUG();
>  		}
> -		BUG();
> +
>  	}
>  
>  }
> 



More information about the Devel mailing list