[Devel] [PATCH vz9 08/16] ve/mm: introduce min threshold for dcache
Nikita Yushchenko
nikita.yushchenko at virtuozzo.com
Wed Sep 29 10:00:09 MSK 2021
From: Vladimir Davydov <vdavydov at virtuozzo.com>
This patch adds new sysctl vm.vfs_cache_min_ratio. If the ratio of
reclaimable slabs (i.e. dcache and icache) to total memory usage of a
cgroup is less than the value of this sysctl (2% by default), slabs
won't be reclaimed from this cgroup on memory pressure.
https://jira.sw.ru/browse/PSBM-34161
Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
+++
ve/mm/dcache: Honor changing per-memcg s[un]reclaimable counters to bytes in dcache min threshold
RHEL8.4 has following ms commit backported:
d42f3245c7e2 ("mm: memcg: convert vmstat slab counters to bytes")
So, update places were we use per-memcg counters NR_SLAB_[UN]RECLAIMABLE_B
accordingly.
https://jira.sw.ru/browse/PSBM-132893
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
(cherry-picked from vz8 commit a3cff910211e ("ve/mm: introduce min
threshold for dcache"))
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
fs/dcache.c | 2 ++
fs/super.c | 23 +++++++++++++++++++++++
include/linux/dcache.h | 1 +
include/linux/memcontrol.h | 7 +++++++
include/linux/shrinker.h | 2 ++
kernel/sysctl.c | 9 +++++++++
mm/memcontrol.c | 16 ++++++++++++++++
mm/vmscan.c | 7 ++++---
8 files changed, 64 insertions(+), 3 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fd..fa0a8fe12bfd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -74,6 +74,8 @@
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
+int sysctl_vfs_cache_min_ratio __read_mostly = 2;
+
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(rename_lock);
diff --git a/fs/super.c b/fs/super.c
index c72159ea66fa..f40b431420f7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
+#include <linux/memcontrol.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h> /* for the emergency remount stuff */
@@ -53,6 +54,25 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
"sb_internal",
};
+static bool dcache_is_low(struct mem_cgroup *memcg)
+{
+ unsigned long anon, file, dcache;
+ int vfs_cache_min_ratio = READ_ONCE(sysctl_vfs_cache_min_ratio);
+
+ if (vfs_cache_min_ratio <= 0)
+ return false;
+
+ if (memcg)
+ return mem_cgroup_dcache_is_low(memcg, vfs_cache_min_ratio);
+
+ anon = global_node_page_state(NR_ANON_MAPPED);
+ file = global_node_page_state(NR_FILE_PAGES);
+ dcache = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
+
+ return dcache / vfs_cache_min_ratio <
+ (anon + file + dcache) / 100;
+}
+
/*
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
@@ -123,6 +143,9 @@ static unsigned long super_cache_count(struct shrinker *shrink,
struct super_block *sb;
long total_objects = 0;
+ if (!sc->for_drop_caches && dcache_is_low(sc->memcg))
+ return 0;
+
sb = container_of(shrink, struct super_block, s_shrink);
/*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1..b88f64c97558 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -508,6 +508,7 @@ static inline bool d_is_fallthru(const struct dentry *dentry)
extern int sysctl_vfs_cache_pressure;
+extern int sysctl_vfs_cache_min_ratio;
static inline unsigned long vfs_pressure_ratio(unsigned long val)
{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b716a5bc806f..46b92cc0bdc5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -909,6 +909,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
/*
* For memory reclaim.
*/
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio);
bool mem_cgroup_cleancache_disabled(struct page *page);
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
@@ -1384,6 +1385,12 @@ static inline bool mem_cgroup_cleancache_disabled(struct page *page)
return false;
}
+static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg,
+ int vfs_cache_min_ratio)
+{
+ return false;
+}
+
static inline unsigned long mm_overdraft(struct mm_struct *mm)
{
return 0;
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 9814fff58a69..3dbb5b0d1052 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -31,6 +31,8 @@ struct shrink_control {
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
+
+ bool for_drop_caches;
};
#define SHRINK_STOP (~0UL)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99dabb3a64e7..563e44b26634 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3057,6 +3057,15 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
+ {
+ .procname = "vfs_cache_min_ratio",
+ .data = &sysctl_vfs_cache_min_ratio,
+ .maxlen = sizeof(sysctl_vfs_cache_min_ratio),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &one_hundred,
+ },
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c2b527cf73dc..47384b7fce0a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1335,6 +1335,22 @@ unsigned long mem_cgroup_overdraft(struct mem_cgroup *memcg)
return usage > guarantee ? (usage - guarantee) : 0;
}
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg, int vfs_cache_min_ratio)
+{
+ unsigned long anon, file, dcache;
+
+ anon = memcg_page_state(memcg, NR_ANON_MAPPED);
+ file = memcg_page_state(memcg, NR_FILE_PAGES);
+ /*
+ * After ms commit d42f3245c7e2 ("mm: memcg: convert vmstat slab
+ * counters to bytes") NR_SLAB_{,UN}RECLAIMABLE_B are in bytes.
+ */
+ dcache = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) >> PAGE_SHIFT;
+
+ return dcache / vfs_cache_min_ratio <
+ (anon + file + dcache) / 100;
+}
+
/**
* mem_cgroup_margin - calculate chargeable space of a memory cgroup
* @memcg: the memory cgroup
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f55e24e18874..dfc094cafb9b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -844,7 +844,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
*/
static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg,
- int priority)
+ int priority, bool for_drop_caches)
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
@@ -870,6 +870,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
+ .for_drop_caches = for_drop_caches,
};
ret = do_shrink_slab(&sc, shrinker, priority);
@@ -906,7 +907,7 @@ void drop_slab_node(int nid)
freed = 0;
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
+ freed += shrink_slab(GFP_KERNEL, nid, memcg, 0, true);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
} while (freed > 10);
}
@@ -2880,7 +2881,7 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
shrink_lruvec(lruvec, sc);
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
- sc->priority);
+ sc->priority, false);
/* Record the group's reclaim efficiency */
vmpressure(sc->gfp_mask, memcg, false,
--
2.30.2
More information about the Devel
mailing list