[Devel] [PATCH RHEL7 COMMIT] cgroup, memcg: Rework css_stacks debugging.
Konstantin Khorenko
khorenko at virtuozzo.com
Tue May 19 21:31:35 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1127.8.2.vz7.151.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.8.2.vz7.151.2
------>
commit 3b47ac65d9e06bdbeae9234253641d229f53ff39
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date: Tue May 19 21:31:35 2020 +0300
cgroup, memcg: Rework css_stacks debugging.
Current css_stacks debugging isn't very usefull, it records only ~128 stacks
which is not enough and doesn't allow us to identify the problem.
Instead of recording full stacks, record only instruction pointer
and the counter of how often we hit it.
Example of output:
css_relese: css_get/put ips
1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0
https://jira.sw.ru/browse/PSBM-98148
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/cgroup.h | 22 ++++++++++++++--------
kernel/cgroup.c | 41 ++++++++++++++++++++++-------------------
mm/memcontrol.c | 18 ++++++++++--------
3 files changed, 46 insertions(+), 35 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1c1ee0c458e06..919cddd781684 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
extern int proc_cgroup_show(struct seq_file *, void *);
+#define CSS_IPS_COUNT 512
+/* Instruction pointer and count */
struct css_stacks {
- atomic_t offset;
- unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
+ unsigned long ips[CSS_IPS_COUNT];
+ atomic_t count[CSS_IPS_COUNT];
};
/* Per-subsystem/per-cgroup state maintained by the system. */
@@ -154,7 +156,7 @@ enum {
extern struct static_key css_stacks_on;
void __save_css_stack(struct cgroup_subsys_state *css);
-static inline void save_css_stack(struct cgroup_subsys_state *css)
+static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
{
if (static_key_false(&css_stacks_on))
__save_css_stack(css);
@@ -166,7 +168,7 @@ static inline void save_css_stack(struct cgroup_subsys_state *css)
* - an existing ref-counted reference to the css
* - task->cgroups for a locked task
*/
-static inline void css_get(struct cgroup_subsys_state *css)
+static __always_inline void css_get(struct cgroup_subsys_state *css)
{
/* We don't need to reference count the root state */
if (!(css->flags & CSS_ROOT)) {
@@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
* the css has been destroyed.
*/
-static inline bool css_tryget(struct cgroup_subsys_state *css)
+static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
{
+ bool ret;
+
if (css->flags & CSS_ROOT)
return true;
- save_css_stack(css);
- return percpu_ref_tryget_live(&css->refcnt);
+ ret = percpu_ref_tryget_live(&css->refcnt);
+ if (ret)
+ save_css_stack(css);
+ return ret;
}
/*
@@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
* css_get() or css_tryget()
*/
-static inline void css_put(struct cgroup_subsys_state *css)
+static __always_inline void css_put(struct cgroup_subsys_state *css)
{
if (!(css->flags & CSS_ROOT)) {
save_css_stack(css);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bf218e88ac9dc..48853175b5a4b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work)
css_stacks = css->stacks;
if (css_stacks)
- free_pages((unsigned long)css_stacks, 1);
+ kfree(css_stacks);
percpu_ref_exit(&css->refcnt);
cgroup_dput(css->cgroup);
@@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
{
css->cgroup = cgrp;
css->flags = 0;
- if (static_key_false(&css_stacks_on) && slab_is_available())
- css->stacks = (struct css_stacks *)
- __get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
+ if (static_key_false(&css_stacks_on) && slab_is_available() &&
+ ss == &mem_cgroup_subsys)
+ css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
else
css->stacks = 0;
if (cgrp == dummytop)
@@ -4261,28 +4261,31 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
void __save_css_stack(struct cgroup_subsys_state *css)
{
- unsigned long entries[8];
- unsigned int offset;
+ int i;
struct css_stacks *css_stacks;
- struct stack_trace trace = {
- .nr_entries = 0,
- .entries = entries,
- .max_entries = 8,
- .skip = 0
- };
+ unsigned long ip = _RET_IP_;
css_stacks = css->stacks;
if (!css_stacks)
return;
- memset(entries, 0, sizeof(entries));
- offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
- if (offset == 0) {
- offset += 8;
- trace.max_entries = 7;
+ for (i = 0; i < CSS_IPS_COUNT; i++) {
+ if (css_stacks->ips[i] == 0) {
+ unsigned long old_ip;
+
+ old_ip = cmpxchg(&css_stacks->ips[i], 0, ip);
+ if (old_ip != 0 && old_ip != ip)
+ continue;
+
+ atomic_inc(&css_stacks->count[i]);
+ break;
+ }
+ if (css_stacks->ips[i] == ip) {
+ atomic_inc(&css_stacks->count[i]);
+ break;
+ }
}
- save_stack_trace(&trace);
- memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
+ WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
}
EXPORT_SYMBOL(__save_css_stack);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 010d580f39ccc..6492f7d15b563 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
css_stacks = css->stacks;
if (css_stacks) {
- pr_err("css_relese: css_stacks offset %d\n",
- atomic_read(&css_stacks->offset));
- for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
- if (css_stacks->stacks[i])
- pr_err("\t%pS\n",
- (void *)css_stacks->stacks[i]);
+ pr_err("css_relese: css_get/put ips\n");
+ for (i = 0; i < CSS_IPS_COUNT; i++) {
+ if (css_stacks->ips[i])
+ pr_err("\t%d, [%lx] %pS\n",
+ atomic_read(&css_stacks->count[i]),
+ css_stacks->ips[i],
+ (void *)css_stacks->ips[i]);
else
- continue;
+ break;
}
+ BUG();
}
- BUG();
+
}
}
More information about the Devel
mailing list