[Devel] [PATCH RHEL7 COMMIT] ve/mm: introduce min threshold for dcache

Konstantin Khorenko khorenko at virtuozzo.com
Thu May 19 04:23:42 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.4
------>
commit 5075f1c8310f1494cdaef014a6aa589445e6be7b
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Thu May 19 15:23:42 2016 +0400

    ve/mm: introduce min threshold for dcache
    
    This patch adds new sysctl vm.vfs_cache_min_ratio. If the ratio of
    reclaimable slabs (i.e. dcache and icache) to total memory usage of a
    cgroup is less than the value of this sysctl (2% by default), slabs
    won't be reclaimed from this cgroup on memory pressure.
    
    https://jira.sw.ru/browse/PSBM-34161
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 fs/dcache.c                |  2 ++
 fs/super.c                 | 22 ++++++++++++++++++++++
 include/linux/dcache.h     |  1 +
 include/linux/memcontrol.h |  6 ++++++
 include/linux/shrinker.h   |  2 ++
 kernel/sysctl.c            |  9 +++++++++
 mm/memcontrol.c            | 12 ++++++++++++
 mm/vmscan.c                | 10 ++++++----
 8 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 1ea5081..0c2826d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -86,6 +86,8 @@
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
+int sysctl_vfs_cache_min_ratio __read_mostly = 2;
+
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(rename_lock);
diff --git a/fs/super.c b/fs/super.c
index 4d6ce6d..50ac293 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -34,6 +34,7 @@
 #include <linux/cleancache.h>
 #include <linux/fsnotify.h>
 #include <linux/lockdep.h>
+#include <linux/memcontrol.h>
 #include "internal.h"
 
 const unsigned super_block_wrapper_version = 0;
@@ -47,6 +48,24 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 	"sb_internal",
 };
 
+static bool dcache_is_low(struct mem_cgroup *memcg)
+{
+	unsigned long anon, file, dcache;
+
+	if (sysctl_vfs_cache_min_ratio <= 0)
+		return false;
+
+	if (memcg)
+		return mem_cgroup_dcache_is_low(memcg);
+
+	anon = global_page_state(NR_ANON_PAGES);
+	file = global_page_state(NR_FILE_PAGES);
+	dcache = global_page_state(NR_SLAB_RECLAIMABLE);
+
+	return dcache / sysctl_vfs_cache_min_ratio <
+			(anon + file + dcache) / 100;
+}
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -112,6 +131,9 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	struct super_block *sb;
 	long	total_objects = 0;
 
+	if (!sc->for_drop_caches && dcache_is_low(sc->memcg))
+		return 0;
+
 	sb = container_of(shrink, struct super_block, s_shrink);
 
 	/*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 51dffd2..b55fb2e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -464,6 +464,7 @@ static inline bool d_is_positive(const struct dentry *dentry)
 }
 
 extern int sysctl_vfs_cache_pressure;
+extern int sysctl_vfs_cache_min_ratio;
 
 static inline unsigned long vfs_pressure_ratio(unsigned long val)
 {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0dbb653..1c5f916 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -121,6 +121,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
  * For memory reclaim.
  */
 int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg);
 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
 bool mem_cgroup_cleancache_disabled(struct page *page);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
@@ -368,6 +369,11 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return 1;
 }
 
+static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
+{
+	return false;
+}
+
 static inline bool mem_cgroup_low(struct mem_cgroup *root,
 				  struct mem_cgroup *memcg)
 {
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 4fcacd9..5e706a5 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -23,6 +23,8 @@ struct shrink_control {
 
 	/* current memcg being shrunk (for memcg aware shrinkers) */
 	struct mem_cgroup *memcg;
+
+	bool for_drop_caches;
 };
 
 #define SHRINK_STOP (~0UL)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 867eb5a..c8f7bc3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1453,6 +1453,15 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "vfs_cache_min_ratio",
+		.data		= &sysctl_vfs_cache_min_ratio,
+		.maxlen		= sizeof(sysctl_vfs_cache_min_ratio),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 	{
 		.procname	= "legacy_va_layout",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4763520..89c9edf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1580,6 +1580,18 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return inactive * inactive_ratio < active;
 }
 
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
+{
+	unsigned long anon, file, dcache;
+
+	anon = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
+	file = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
+	dcache = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SLAB_RECLAIMABLE);
+
+	return dcache / sysctl_vfs_cache_min_ratio <
+			(anon + file + dcache) / 100;
+}
+
 /**
  * mem_cgroup_low - check if memory consumption is below the normal range
  * @root: the highest ancestor to consider
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a7cc964..3f6ce18 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -351,7 +351,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 				 struct mem_cgroup *memcg,
 				 unsigned long nr_scanned,
-				 unsigned long nr_eligible)
+				 unsigned long nr_eligible,
+				 bool for_drop_caches)
 {
 	struct shrinker *shrinker;
 	unsigned long freed = 0;
@@ -381,6 +382,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 			.gfp_mask = gfp_mask,
 			.nid = nid,
 			.memcg = memcg,
+			.for_drop_caches = for_drop_caches,
 		};
 
 		if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
@@ -408,7 +410,7 @@ void drop_slab_node(int nid)
 		freed = 0;
 		do {
 			freed += shrink_slab(GFP_KERNEL, nid, memcg,
-					     1000, 1000);
+					     1000, 1000, true);
 		} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
 	} while (freed > 10);
 }
@@ -2365,7 +2367,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 			if (memcg && is_classzone)
 				shrink_slab(slab_gfp, zone_to_nid(zone),
 					    memcg, sc->nr_scanned - scanned,
-					    lru_pages);
+					    lru_pages, false);
 
 			/*
 			 * Direct reclaim and kswapd have to scan all memory
@@ -2409,7 +2411,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 			}
 
 			shrink_slab(slab_gfp, zone_to_nid(zone), NULL,
-				    scanned, eligible);
+				    scanned, eligible, false);
 		}
 
 		if (reclaim_state) {


More information about the Devel mailing list