[Devel] [PATCH RHEL7 COMMIT] ve/mm: introduce min threshold for dcache
Konstantin Khorenko
khorenko at virtuozzo.com
Thu May 19 04:23:42 PDT 2016
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.4
------>
commit 5075f1c8310f1494cdaef014a6aa589445e6be7b
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date: Thu May 19 15:23:42 2016 +0400
ve/mm: introduce min threshold for dcache
This patch adds new sysctl vm.vfs_cache_min_ratio. If the ratio of
reclaimable slabs (i.e. dcache and icache) to total memory usage of a
cgroup is less than the value of this sysctl (2% by default), slabs
won't be reclaimed from this cgroup on memory pressure.
https://jira.sw.ru/browse/PSBM-34161
Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
fs/dcache.c | 2 ++
fs/super.c | 22 ++++++++++++++++++++++
include/linux/dcache.h | 1 +
include/linux/memcontrol.h | 6 ++++++
include/linux/shrinker.h | 2 ++
kernel/sysctl.c | 9 +++++++++
mm/memcontrol.c | 12 ++++++++++++
mm/vmscan.c | 10 ++++++----
8 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 1ea5081..0c2826d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -86,6 +86,8 @@
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
+int sysctl_vfs_cache_min_ratio __read_mostly = 2;
+
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(rename_lock);
diff --git a/fs/super.c b/fs/super.c
index 4d6ce6d..50ac293 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -34,6 +34,7 @@
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
+#include <linux/memcontrol.h>
#include "internal.h"
const unsigned super_block_wrapper_version = 0;
@@ -47,6 +48,24 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
"sb_internal",
};
+static bool dcache_is_low(struct mem_cgroup *memcg)
+{
+ unsigned long anon, file, dcache;
+
+ if (sysctl_vfs_cache_min_ratio <= 0)
+ return false;
+
+ if (memcg)
+ return mem_cgroup_dcache_is_low(memcg);
+
+ anon = global_page_state(NR_ANON_PAGES);
+ file = global_page_state(NR_FILE_PAGES);
+ dcache = global_page_state(NR_SLAB_RECLAIMABLE);
+
+ return dcache / sysctl_vfs_cache_min_ratio <
+ (anon + file + dcache) / 100;
+}
+
/*
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
@@ -112,6 +131,9 @@ static unsigned long super_cache_count(struct shrinker *shrink,
struct super_block *sb;
long total_objects = 0;
+ if (!sc->for_drop_caches && dcache_is_low(sc->memcg))
+ return 0;
+
sb = container_of(shrink, struct super_block, s_shrink);
/*
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 51dffd2..b55fb2e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -464,6 +464,7 @@ static inline bool d_is_positive(const struct dentry *dentry)
}
extern int sysctl_vfs_cache_pressure;
+extern int sysctl_vfs_cache_min_ratio;
static inline unsigned long vfs_pressure_ratio(unsigned long val)
{
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0dbb653..1c5f916 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -121,6 +121,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
* For memory reclaim.
*/
int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg);
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
bool mem_cgroup_cleancache_disabled(struct page *page);
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
@@ -368,6 +369,11 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
return 1;
}
+static inline bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
+{
+ return false;
+}
+
static inline bool mem_cgroup_low(struct mem_cgroup *root,
struct mem_cgroup *memcg)
{
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 4fcacd9..5e706a5 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -23,6 +23,8 @@ struct shrink_control {
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
+
+ bool for_drop_caches;
};
#define SHRINK_STOP (~0UL)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 867eb5a..c8f7bc3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1453,6 +1453,15 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec,
.extra1 = &zero,
},
+ {
+ .procname = "vfs_cache_min_ratio",
+ .data = &sysctl_vfs_cache_min_ratio,
+ .maxlen = sizeof(sysctl_vfs_cache_min_ratio),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
{
.procname = "legacy_va_layout",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4763520..89c9edf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1580,6 +1580,18 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
return inactive * inactive_ratio < active;
}
+bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
+{
+ unsigned long anon, file, dcache;
+
+ anon = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
+ file = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
+ dcache = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SLAB_RECLAIMABLE);
+
+ return dcache / sysctl_vfs_cache_min_ratio <
+ (anon + file + dcache) / 100;
+}
+
/**
* mem_cgroup_low - check if memory consumption is below the normal range
* @root: the highest ancestor to consider
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a7cc964..3f6ce18 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -351,7 +351,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg,
unsigned long nr_scanned,
- unsigned long nr_eligible)
+ unsigned long nr_eligible,
+ bool for_drop_caches)
{
struct shrinker *shrinker;
unsigned long freed = 0;
@@ -381,6 +382,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
+ .for_drop_caches = for_drop_caches,
};
if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
@@ -408,7 +410,7 @@ void drop_slab_node(int nid)
freed = 0;
do {
freed += shrink_slab(GFP_KERNEL, nid, memcg,
- 1000, 1000);
+ 1000, 1000, true);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
} while (freed > 10);
}
@@ -2365,7 +2367,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
if (memcg && is_classzone)
shrink_slab(slab_gfp, zone_to_nid(zone),
memcg, sc->nr_scanned - scanned,
- lru_pages);
+ lru_pages, false);
/*
* Direct reclaim and kswapd have to scan all memory
@@ -2409,7 +2411,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
}
shrink_slab(slab_gfp, zone_to_nid(zone), NULL,
- scanned, eligible);
+ scanned, eligible, false);
}
if (reclaim_state) {
More information about the Devel
mailing list