[Devel] [PATCH rh7 v3] mm/memcg: save css_get/put stacks

Konstantin Khorenko khorenko at virtuozzo.com
Tue Feb 11 18:17:06 MSK 2020


From: Andrey Ryabinin <aryabinin at virtuozzo.com>

Save stack traces on css_get()/css_put() and spit them into
dmesg when memcg_css_release_check_kmem() discovers a problem.
This supposed to help us to find css_get/css_put imbalance.

This patch for debug purposes only.

khorenko@: notice that css_tryget() saves the stack unconditionally,
so there can be stacks for failed tryget-s.

https://jira.sw.ru/browse/PSBM-98148
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>

v3: add a new "stacks" field in css instead of reusing "flags"
---
 Makefile               |  2 +-
 include/linux/cgroup.h | 19 +++++++++++++++++--
 kernel/cgroup.c        | 38 ++++++++++++++++++++++++++++++++++++++
 mm/memcontrol.c        | 21 +++++++++++++++++++--
 4 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 848a20a8da8a3..065f6098b4a08 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ RHEL_DRM_VERSION = 5
 RHEL_DRM_PATCHLEVEL = 0
 RHEL_DRM_SUBLEVEL = 10
 # VZVERSION = ovz.131.2
-VZVERSION = ovz.custom
+VZVERSION = ovz.finist
 
 ifeq ($(VZVERSION), ovz.custom)
   GIT_DIR := .git
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aa91e476b912e..a2c5f07170725 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -117,6 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
 
 extern int proc_cgroup_show(struct seq_file *, void *);
 
+struct css_stacks {
+	atomic_t offset;
+	unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
+};
+
 /* Per-subsystem/per-cgroup state maintained by the system. */
 struct cgroup_subsys_state {
 	/*
@@ -130,6 +135,8 @@ struct cgroup_subsys_state {
 	struct percpu_ref refcnt;
 
 	unsigned long flags;
+	/* saving stacks for css get/put - debug */
+	struct css_stacks *stacks;
 	/* ID for this css, if possible */
 	struct css_id __rcu *id;
 
@@ -149,12 +156,16 @@ enum {
  * - an existing ref-counted reference to the css
  * - task->cgroups for a locked task
  */
+void save_css_stack(struct cgroup_subsys_state *css);
 
 static inline void css_get(struct cgroup_subsys_state *css)
 {
 	/* We don't need to reference count the root state */
-	if (!(css->flags & CSS_ROOT))
+	if (!(css->flags & CSS_ROOT)) {
+		save_css_stack(css);
 		percpu_ref_get(&css->refcnt);
+	}
+
 }
 
 /*
@@ -167,6 +178,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
 {
 	if (css->flags & CSS_ROOT)
 		return true;
+	save_css_stack(css);
 	return percpu_ref_tryget_live(&css->refcnt);
 }
 
@@ -177,8 +189,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
 
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_ROOT))
+	if (!(css->flags & CSS_ROOT)) {
+		save_css_stack(css);
 		percpu_ref_put(&css->refcnt);
+
+	}
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9ca8af9f73d91..f46ef8de43ee6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -60,6 +60,7 @@
 #include <linux/flex_array.h> /* used in cgroup_attach_task */
 #include <linux/kthread.h>
 #include <linux/ve.h>
+#include <linux/stacktrace.h>
 
 #include <linux/atomic.h>
 
@@ -4143,6 +4144,11 @@ static void css_dput_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, dput_work);
+	struct css_stacks *css_stacks;
+
+	css_stacks = css->stacks;
+	if (css_stacks)
+		free_pages((unsigned long)css_stacks, 1);
 
 	percpu_ref_exit(&css->refcnt);
 	cgroup_dput(css->cgroup);
@@ -4165,6 +4171,11 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
 {
 	css->cgroup = cgrp;
 	css->flags = 0;
+	if (slab_is_available())
+		css->stacks = (struct css_stacks *)
+			__get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
+	else
+		css->stacks = 0;
 	if (cgrp == dummytop)
 		css->flags |= CSS_ROOT;
 	BUG_ON(cgrp->subsys[ss->subsys_id]);
@@ -4193,6 +4204,33 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	return ret;
 }
 
+void save_css_stack(struct cgroup_subsys_state *css)
+{
+	unsigned long entries[8];
+	unsigned int offset;
+	struct css_stacks *css_stacks;
+	struct stack_trace trace = {
+		.nr_entries = 0,
+		.entries = entries,
+		.max_entries = 8,
+		.skip = 0
+	};
+
+	css_stacks = css->stacks;
+	if (!css_stacks)
+		return;
+
+	memset(entries, 0, sizeof(entries));
+	offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
+	if (offset == 0) {
+		offset += 8;
+		trace.max_entries = 7;
+	}
+	save_stack_trace(&trace);
+	memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
+}
+EXPORT_SYMBOL(save_css_stack);
+
 /* if the CSS is online, invoke ->pre_destory() on it and mark it offline */
 static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0514f9b2b2308..c8d3c151ff99b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -621,6 +621,7 @@ void sock_release_memcg(struct sock *sk)
 		struct mem_cgroup *memcg;
 		WARN_ON(!sk->sk_cgrp->memcg);
 		memcg = sk->sk_cgrp->memcg;
+		save_css_stack(&sk->sk_cgrp->memcg->css);
 		css_put(&sk->sk_cgrp->memcg->css);
 	}
 }
@@ -2990,9 +2991,25 @@ void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
 
 	memcg = mem_cgroup_from_cont(css->cgroup);
 	kmem_counter = page_counter_read(&memcg->kmem);
-	WARN_ONCE(kmem_counter,
+	if(WARN_ONCE(kmem_counter,
 		  "Last put on memcg %p kmem=%lu css->flags=%#lx",
-		  memcg, kmem_counter, css->flags);
+		  memcg, kmem_counter, css->flags)) {
+		struct css_stacks *css_stacks;
+		int i;
+
+		css_stacks = css->stacks;
+		if (css_stacks) {
+			pr_err("css_relese: css_stacks offset %d\n", atomic_read(&css_stacks->offset));
+			for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
+				if (css_stacks->stacks[i])
+					pr_err("\t%pS\n", (void *)css_stacks->stacks[i]);
+				else
+					continue;
+			}
+		}
+		BUG();
+	}
+
 }
 EXPORT_SYMBOL(memcg_css_release_check_kmem);
 
-- 
2.15.1



More information about the Devel mailing list