[Devel] [PATCH rh7 2/2] memcg: introduce dcache age stats

Vladimir Davydov vdavydov at virtuozzo.com
Tue Oct 27 08:08:06 PDT 2015


With the aid of the idle page tracking feature we can estimate a working
set size that consists of user pages, but it does not give us a clue
about how many dentries are actually unused and can be dropped. We can
assume that all dentries can be safely reclaimed, but this can result in
performance degradation if the workload does need dcache. Alternatively,
we can count all dentires as used, but this will result in sub-optimal
memory distribution if there are a lot of dentries accessed only once.

To take into account dcache age, this patch adds new file to the memory
cgroup controller, memory.kmem.unused_dcache_stat. It contains 32
numeric values separated by spaces. Value #i shows the number of unused
dentries that have not been touched for the last i periods. Naturally,
the first value (i = 0) shows the total number of unused dentries. A
period lasts for 10 seconds by default, but it can be tuned using sysctl
vm.dcache_age_delay.

The stats are updated at run time. To achieve that, we store the time
when a dentry was unused in each dentry. When a dentry is touched we
update the counters. When a period expires we shift the counters array.
To reduce contention on the counters we gather the statistics per NUMA
node.

Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 fs/dcache.c                |  10 +++
 include/linux/dcache.h     |   8 +-
 include/linux/memcontrol.h |  16 ++++
 kernel/sysctl.c            |  12 +++
 mm/memcontrol.c            | 194 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 237 insertions(+), 3 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index a4f60d1f2aaf..01e660a1063a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -39,6 +39,7 @@
 #include <linux/prefetch.h>
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
+#include <linux/memcontrol.h>
 #include <linux/vzstat.h>
 #include <linux/ve.h>
 #include "internal.h"
@@ -352,6 +353,7 @@ static void d_lru_add(struct dentry *dentry)
 	D_FLAG_VERIFY(dentry, 0);
 	dentry->d_flags |= DCACHE_LRU_LIST;
 	this_cpu_inc(nr_dentry_unused);
+	memcg_dcache_inc_unused(dentry);
 	WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
@@ -360,6 +362,7 @@ static void d_lru_del(struct dentry *dentry)
 	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
 	dentry->d_flags &= ~DCACHE_LRU_LIST;
 	this_cpu_dec(nr_dentry_unused);
+	memcg_dcache_dec_unused(dentry);
 	WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
@@ -369,6 +372,7 @@ static void d_shrink_del(struct dentry *dentry)
 	list_del_init(&dentry->d_lru);
 	dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
 	this_cpu_dec(nr_dentry_unused);
+	memcg_dcache_dec_unused(dentry);
 }
 
 static void d_shrink_add(struct dentry *dentry, struct list_head *list)
@@ -377,6 +381,7 @@ static void d_shrink_add(struct dentry *dentry, struct list_head *list)
 	list_add(&dentry->d_lru, list);
 	dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
 	this_cpu_inc(nr_dentry_unused);
+	memcg_dcache_inc_unused(dentry);
 }
 
 /*
@@ -390,6 +395,7 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
 	D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
 	dentry->d_flags &= ~DCACHE_LRU_LIST;
 	this_cpu_dec(nr_dentry_unused);
+	memcg_dcache_dec_unused(dentry);
 	list_lru_isolate(lru, &dentry->d_lru);
 }
 
@@ -408,6 +414,8 @@ static void dentry_lru_add(struct dentry *dentry)
 {
 	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
 		d_lru_add(dentry);
+	else
+		memcg_dcache_update_unused(dentry);
 }
 
 /*
@@ -1024,6 +1032,8 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
 	d_lru_shrink_move(lru, dentry, freeable);
 	spin_unlock(&dentry->d_lock);
 
+	memcg_dcache_dec_unused(dentry);
+
 	return LRU_REMOVED;
 }
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 56766a1a7b2f..c5e4f859beae 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -92,12 +92,12 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int);
  * large memory footprint increase).
  */
 #ifdef CONFIG_64BIT
-# define DNAME_INLINE_LEN 32 /* 192 bytes */
+# define DNAME_INLINE_LEN 24 /* 192 bytes */
 #else
 # ifdef CONFIG_SMP
-#  define DNAME_INLINE_LEN 36 /* 128 bytes */
+#  define DNAME_INLINE_LEN 32 /* 128 bytes */
 # else
-#  define DNAME_INLINE_LEN 40 /* 128 bytes */
+#  define DNAME_INLINE_LEN 36 /* 128 bytes */
 # endif
 #endif
 
@@ -114,6 +114,8 @@ struct dentry {
 					 * negative */
 	unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
 
+	unsigned long d_unused_timestamp;
+
 	/* Ref lookup also touches following */
 	struct lockref d_lockref;	/* per-dentry lock and refcount */
 	const struct dentry_operations *d_op;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8a159c43ee7f..6eaeb565f884 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -624,6 +624,10 @@ static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
 		return NULL;
 	return __mem_cgroup_from_kmem(ptr);
 }
+
+void memcg_dcache_inc_unused(struct dentry *dentry);
+void memcg_dcache_dec_unused(struct dentry *dentry);
+void memcg_dcache_update_unused(struct dentry *dentry);
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -680,6 +684,18 @@ static inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
 {
 	return NULL;
 }
+
+static inline void memcg_dcache_inc_unused(struct dentry *dentry)
+{
+}
+
+static inline void memcg_dcache_dec_unused(struct dentry *dentry)
+{
+}
+
+static inline void memcg_dcache_update_unused(struct dentry *dentry)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9c081e3f350f..db5af297c68c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -116,6 +116,9 @@ extern int sysctl_nr_trim_pages;
 #ifdef CONFIG_BLOCK
 extern int blk_iopoll_enabled;
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+extern int sysctl_dcache_age_delay;
+#endif
 
 int ve_allow_module_load = 1;
 EXPORT_SYMBOL(ve_allow_module_load);
@@ -1553,6 +1556,15 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one_hundred,
 	},
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	{
+		.procname	= "dcache_age_delay",
+		.data		= &sysctl_dcache_age_delay,
+		.maxlen		= sizeof(sysctl_dcache_age_delay),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+	},
+#endif
 	{ }
 };
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 11a4c41fc079..856fc8e1ffd2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -184,8 +184,20 @@ struct mem_cgroup_per_zone {
 						/* use container_of	   */
 };
 
+#define DCACHE_AGE_HISTORY	32
+
 struct mem_cgroup_per_node {
 	struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
+
+	/* Protects dcache_timestamp and nr_dentry_unused. */
+	spinlock_t dcache_stat_lock;
+
+	/* Stored in a dentry when it is unused. Increased periodically. */
+	unsigned long dcache_timestamp;
+
+	/* Element i contains the number of dentries that has been unused for
+	 * exactly i last periods. */
+	unsigned long nr_dentry_unused[DCACHE_AGE_HISTORY];
 };
 
 struct mem_cgroup_lru_info {
@@ -3634,6 +3646,184 @@ struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
 
 	return memcg;
 }
+
+int sysctl_dcache_age_delay = 10 * HZ;
+
+static void age_dcache_memcg_node(struct mem_cgroup *memcg, int nid)
+{
+	struct mem_cgroup_per_node *pn = memcg->info.nodeinfo[nid];
+	int i;
+
+	/*
+	 * When a period expires we shift the array containing unused dentry
+	 * counters and increase the timestamp so that unused dentries will
+	 * decrement right buckets when they are referenced.
+	 */
+	spin_lock(&pn->dcache_stat_lock);
+	pn->dcache_timestamp++;
+	pn->nr_dentry_unused[DCACHE_AGE_HISTORY - 1] +=
+		pn->nr_dentry_unused[DCACHE_AGE_HISTORY - 2];
+	for (i = DCACHE_AGE_HISTORY - 2; i >= 1; i--)
+		pn->nr_dentry_unused[i] = pn->nr_dentry_unused[i - 1];
+	pn->nr_dentry_unused[0] = 0;
+	spin_unlock(&pn->dcache_stat_lock);
+}
+
+static void age_dcache_memcg(struct mem_cgroup *memcg)
+{
+	int nid;
+
+	for_each_online_node(nid)
+		age_dcache_memcg_node(memcg, nid);
+}
+
+static void age_dcache(struct work_struct *w)
+{
+	struct delayed_work *work = to_delayed_work(w);
+	struct mem_cgroup *memcg;
+
+	for_each_kmemcg(memcg)
+		age_dcache_memcg(memcg);
+
+	schedule_delayed_work(work, sysctl_dcache_age_delay);
+}
+
+static int __init dcache_aging_init(void)
+{
+	static DECLARE_DELAYED_WORK(age_dcache_work, age_dcache);
+
+	schedule_delayed_work(&age_dcache_work, sysctl_dcache_age_delay);
+	return 0;
+}
+module_init(dcache_aging_init);
+
+static void memcg_dcache_stat_node(struct mem_cgroup *memcg, int nid,
+				   unsigned long *nr_dentry_unused)
+{
+	struct mem_cgroup_per_node *pn = memcg->info.nodeinfo[nid];
+	unsigned long sum = 0;
+	int i;
+
+	/*
+	 * Element i is supposed to report the number of dentries that has been
+	 * unused for i last periods. This includes dentries unused for i + 1,
+	 * i + 2, ... last periods.
+	 */
+	spin_lock(&pn->dcache_stat_lock);
+	for (i = DCACHE_AGE_HISTORY - 1; i >= 0; i--) {
+		sum += pn->nr_dentry_unused[i];
+		nr_dentry_unused[i] += sum;
+	}
+	spin_unlock(&pn->dcache_stat_lock);
+}
+
+static void memcg_dcache_stat(struct mem_cgroup *memcg,
+			      unsigned long *nr_dentry_unused)
+{
+	int nid;
+
+	for_each_online_node(nid)
+		memcg_dcache_stat_node(memcg, nid, nr_dentry_unused);
+}
+
+static int memcg_unused_dcache_show(struct cgroup *cont, struct cftype *cft,
+				    struct seq_file *m)
+{
+	static DEFINE_MUTEX(mutex);
+	static unsigned long nr_dentry_unused[DCACHE_AGE_HISTORY];
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct mem_cgroup *mi;
+	int i;
+
+	if (!memcg_kmem_is_active(memcg))
+		return -EIO;
+
+	mutex_lock(&mutex);
+	memset(nr_dentry_unused, 0, sizeof(nr_dentry_unused));
+	for_each_kmemcg_tree(mi, memcg)
+		memcg_dcache_stat(mi, nr_dentry_unused);
+	mutex_unlock(&mutex);
+
+	for (i = 0; i < DCACHE_AGE_HISTORY; i++)
+		seq_printf(m, "%lu%c", nr_dentry_unused[i],
+			   i < DCACHE_AGE_HISTORY - 1 ? ' ' : '\n');
+	return 0;
+}
+
+static void __memcg_dcache_inc_unused(struct mem_cgroup_per_node *pn,
+				      struct dentry *dentry)
+{
+	dentry->d_unused_timestamp = pn->dcache_timestamp;
+	pn->nr_dentry_unused[0]++;
+}
+
+static void __memcg_dcache_dec_unused(struct mem_cgroup_per_node *pn,
+				      struct dentry *dentry)
+{
+	long age;
+
+	age = pn->dcache_timestamp - dentry->d_unused_timestamp;
+	if (WARN_ON_ONCE(age < 0))
+		age = 0;
+	if (age >= DCACHE_AGE_HISTORY)
+		age = DCACHE_AGE_HISTORY - 1;
+	pn->nr_dentry_unused[age]--;
+}
+
+/*
+ * Called when a dentry is added to LRU list.
+ */
+void memcg_dcache_inc_unused(struct dentry *dentry)
+{
+	struct mem_cgroup *memcg;
+	struct mem_cgroup_per_node *pn;
+
+	memcg = mem_cgroup_from_kmem(dentry);
+	if (!memcg)
+		return;
+
+	pn = memcg->info.nodeinfo[page_to_nid(virt_to_page(dentry))];
+	spin_lock(&pn->dcache_stat_lock);
+	__memcg_dcache_inc_unused(pn, dentry);
+	spin_unlock(&pn->dcache_stat_lock);
+}
+
+/*
+ * Called when a dentry is removed from LRU list.
+ */
+void memcg_dcache_dec_unused(struct dentry *dentry)
+{
+	struct mem_cgroup *memcg;
+	struct mem_cgroup_per_node *pn;
+
+	memcg = mem_cgroup_from_kmem(dentry);
+	if (!memcg)
+		return;
+
+	pn = memcg->info.nodeinfo[page_to_nid(virt_to_page(dentry))];
+	spin_lock(&pn->dcache_stat_lock);
+	__memcg_dcache_dec_unused(pn, dentry);
+	spin_unlock(&pn->dcache_stat_lock);
+}
+
+/*
+ * Called when a dentry on LRU list is referenced.
+ */
+void memcg_dcache_update_unused(struct dentry *dentry)
+{
+	struct mem_cgroup *memcg;
+	struct mem_cgroup_per_node *pn;
+
+	memcg = mem_cgroup_from_kmem(dentry);
+	if (!memcg)
+		return;
+
+	pn = memcg->info.nodeinfo[page_to_nid(virt_to_page(dentry))];
+	spin_lock(&pn->dcache_stat_lock);
+	__memcg_dcache_dec_unused(pn, dentry);
+	__memcg_dcache_inc_unused(pn, dentry);
+	spin_unlock(&pn->dcache_stat_lock);
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -6299,6 +6489,10 @@ static struct cftype mem_cgroup_files[] = {
 		.read_seq_string = mem_cgroup_slabinfo_read,
 	},
 #endif
+	{
+		.name = "kmem.unused_dcache_stat",
+		.read_seq_string = memcg_unused_dcache_show,
+	},
 #endif
 	{ },	/* terminate */
 };
-- 
2.1.4




More information about the Devel mailing list