[Devel] [PATCH RHEL COMMIT] ve/mm: introduce min threshold for dcache

Konstantin Khorenko khorenko at virtuozzo.com
Thu Sep 30 17:44:02 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 6ddbff07208f2c135871e804b64b9b0664911f9d
Author: Vladimir Davydov <vdavydov.dev at gmail.com>
Date:   Thu Sep 30 17:44:02 2021 +0300

    ve/mm: introduce min threshold for dcache
    
    This patch adds new sysctl vm.vfs_cache_min_ratio. If the ratio of
    reclaimable slabs (i.e. dcache and icache) to total memory usage of a
    cgroup is less than the value of this sysctl (2% by default), slabs
    won't be reclaimed from this cgroup on memory pressure.
    
    https://jira.sw.ru/browse/PSBM-34161
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    +++
    ve/mm/dcache: Honor changing per-memcg s[un]reclaimable counters to bytes in dcache min threshold
    
    RHEL8.4 has following ms commit backported:
    d42f3245c7e2 ("mm: memcg: convert vmstat slab counters to bytes")
    
    So, update places were we use per-memcg counters NR_SLAB_[UN]RECLAIMABLE_B
    accordingly.
    
    https://jira.sw.ru/browse/PSBM-132893
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    (cherry-picked from vz8 commit a3cff910211e ("ve/mm: introduce min
    threshold for dcache"))
    
    Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 fs/dcache.c                |  2 ++
 fs/super.c                 | 23 +++++++++++++++++++++++
 include/linux/dcache.h     |  1 +
 include/linux/memcontrol.h |  7 +++++++
 include/linux/shrinker.h   |  2 ++
 kernel/sysctl.c            |  9 +++++++++
 mm/memcontrol.c            | 16 ++++++++++++++++
 mm/vmscan.c                |  7 ++++---
 8 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fd..fa0a8fe12bfd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -74,6 +74,8 @@
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
+int sysctl_vfs_cache_min_ratio __read_mostly = 2;
+
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(rename_lock);
diff --git a/fs/super.c b/fs/super.c
index c72159ea66fa..f40b431420f7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/memcontrol.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/writeback.h>		/* for the emergency remount stuff */
@@ -53,6 +54,25 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
 	"sb_internal",
 };
 
+static bool dcache_is_low(struct mem_cgroup *memcg)
+{
+	unsigned long anon, file, dcache;
+	int vfs_cache_min_ratio = READ_ONCE(sysctl_vfs_cache_min_ratio);
+
+	if (vfs_cache_min_ratio <= 0)
+		return false;
+
+	if (memcg)
+		return mem_cgroup_dcache_is_low(memcg, vfs_cache_min_ratio);
+
+	anon = global_node_page_state(NR_ANON_MAPPED);
+	file = global_node_page_state(NR_FILE_PAGES);
+	dcache = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
+
+	return dcache / vfs_cache_min_ratio <
+			(anon + file + dcache) / 100;
+}
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -123,6 +143,9 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	struct super_block *sb;
 	long	total_objects = 0;
 
+	if (!sc->for_drop_caches && dcache_is_low(sc->memcg))
+		return 0;
+
 	sb = container_of(shrink, struct super_block, s_shrink);
 
 	/*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1..b88f64c97558 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -508,6 +508,7 @@ static inline bool d_is_fallthru(const struct dentry *dentry)
 
 
 extern int sysctl_vfs_cache_pressure;
+extern int sysctl_vfs_cache_min_ratio;
 
 static inline unsigned long vfs_pressure_ratio(unsigned long val)
 {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b716a5bc806f..46b92cc0bdc5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -909,6 +909,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
 /*
  * For memory reclaim.
  */
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio);
 bool mem_cgroup_cleancache_disabled(struct page *page);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
@@ -1384,6 +1385,12 @@ static inline bool mem_cgroup_cleancache_disabled(struct page *page)
 	return false;
 }
 
+static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg,
+	int vfs_cache_min_ratio)
+{
+	return false;
+}
+
 static inline unsigned long mm_overdraft(struct mm_struct *mm)
 {
 	return 0;
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 9814fff58a69..3dbb5b0d1052 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -31,6 +31,8 @@ struct shrink_control {
 
 	/* current memcg being shrunk (for memcg aware shrinkers) */
 	struct mem_cgroup *memcg;
+
+	bool for_drop_caches;
 };
 
 #define SHRINK_STOP (~0UL)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7244a1d1f2b8..5abb6df3b1d0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3061,6 +3061,15 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 	},
+	{
+		.procname       = "vfs_cache_min_ratio",
+		.data           = &sysctl_vfs_cache_min_ratio,
+		.maxlen         = sizeof(sysctl_vfs_cache_min_ratio),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+		.extra1         = SYSCTL_ZERO,
+		.extra2         = &one_hundred,
+	},
 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 	{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c2b527cf73dc..47384b7fce0a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1335,6 +1335,22 @@ unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg)
 	return usage > guarantee ? (usage - guarantee) : 0;
 }
 
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio)
+{
+	unsigned long anon, file, dcache;
+
+	anon = memcg_page_state(memcg, NR_ANON_MAPPED);
+	file = memcg_page_state(memcg, NR_FILE_PAGES);
+	/*
+	 * After ms commit d42f3245c7e2 ("mm: memcg: convert vmstat slab
+	 * counters to bytes") NR_SLAB_{,UN}RECLAIMABLE_B are in bytes.
+	 */
+	dcache = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT;
+
+	return dcache / vfs_cache_min_ratio <
+			(anon + file + dcache) / 100;
+}
+
 /**
  * mem_cgroup_margin - calculate chargeable space of a memory cgroup
  * @memcg: the memory cgroup
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f55e24e18874..dfc094cafb9b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -844,7 +844,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
  */
 static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 				 struct mem_cgroup *memcg,
-				 int priority)
+				 int priority, bool for_drop_caches)
 {
 	unsigned long ret, freed = 0;
 	struct shrinker *shrinker;
@@ -870,6 +870,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 			.gfp_mask = gfp_mask,
 			.nid = nid,
 			.memcg = memcg,
+			.for_drop_caches = for_drop_caches,
 		};
 
 		ret = do_shrink_slab(&sc, shrinker, priority);
@@ -906,7 +907,7 @@ void drop_slab_node(int nid)
 		freed = 0;
 		memcg = mem_cgroup_iter(NULL, NULL, NULL);
 		do {
-			freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
+			freed += shrink_slab(GFP_KERNEL, nid, memcg, 0, true);
 		} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
 	} while (freed > 10);
 }
@@ -2880,7 +2881,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 		shrink_lruvec(lruvec, sc);
 
 		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
-			    sc->priority);
+			    sc->priority, false);
 
 		/* Record the group's reclaim efficiency */
 		vmpressure(sc->gfp_mask, memcg, false,


More information about the Devel mailing list