[Devel] [PATCH RHEL8 COMMIT] mm/vmscan: shrink tcache upfront everything else
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Apr 2 16:35:31 MSK 2020
The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.4
------>
commit fab68564f8c6252b213acdbc405b0fd4eaa9d3b0
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date: Thu Apr 2 16:35:30 2020 +0300
mm/vmscan: shrink tcache upfront everything else
We don't want to evict page cache or anon to swap while
there are a lot of reclaimable pages in tcache. Reclaim it first,
and only after that reclaim the rest if still required
Notes:
1) Keep tcache generic shrinkers so if new tcache are generated
heavily, background kswapd thread does not forget to shrink tcache.
2) in shrink_tcache() we don't break for_each_node_mask() cycle even
in case shrinking first node gives us enough nr_reclaimed.
We want to make similar memory pressure on all nodes and not to trash
only the first one and stop.
https://jira.sw.ru/browse/PSBM-89403
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
mm/internal.h | 29 +++++++++++++++++++++++++++++
mm/tcache.c | 8 ++++----
mm/vmscan.c | 43 +++++++++++++++++++++++++++++++++++++++++--
3 files changed, 74 insertions(+), 6 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 9e3654d70289..eec4fc63cf4b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -507,6 +507,35 @@ struct tlbflush_unmap_batch;
*/
extern struct workqueue_struct *mm_percpu_wq;
+#ifdef CONFIG_TCACHE
+unsigned long tcache_shrink_scan(struct shrinker *shrinker,
+ struct shrink_control *sc);
+unsigned long tcache_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+
+static inline unsigned long shrink_tcache_node(struct shrink_control *sc)
+{
+ unsigned long ret;
+ extern bool tcache_enabled;
+
+ if (!READ_ONCE(tcache_enabled))
+ return 0;
+
+ ret = tcache_shrink_count(NULL, sc);
+ if (!ret)
+ return ret;
+
+ ret = tcache_shrink_scan(NULL, sc);
+ if (ret == SHRINK_STOP)
+ ret = 0;
+ return ret;
+}
+#else
+static inline unsigned long tcache_shrink_node(struct shrink_control *sc)
+{ return 0; }
+#endif
+
+
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
diff --git a/mm/tcache.c b/mm/tcache.c
index 6660687e3871..c7c5008fdac8 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -170,7 +170,7 @@ static struct tcache_nodeinfo *tcache_nodeinfo;
*/
/* Enable/disable tcache backend (set at boot time) */
-static bool tcache_enabled __read_mostly = true;
+bool tcache_enabled __read_mostly = true;
module_param_named(enabled, tcache_enabled, bool, 0444);
/* Enable/disable populating the cache */
@@ -1176,7 +1176,7 @@ static struct page *tcache_alloc_page(struct tcache_pool *pool)
return page;
}
-static unsigned long tcache_shrink_count(struct shrinker *shrink,
+unsigned long tcache_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
atomic_long_t *nr_pages = &tcache_nodeinfo[sc->nid].nr_pages;
@@ -1190,13 +1190,13 @@ static unsigned long tcache_shrink_count(struct shrinker *shrink,
#define TCACHE_SCAN_BATCH 128UL
static DEFINE_PER_CPU(struct page * [TCACHE_SCAN_BATCH], tcache_page_vec);
-static unsigned long tcache_shrink_scan(struct shrinker *shrink,
+unsigned long tcache_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct page **pages = get_cpu_var(tcache_page_vec);
int nr_isolated, nr_reclaimed;
- if (WARN_ON(sc->nr_to_scan > TCACHE_SCAN_BATCH))
+ if (sc->nr_to_scan > TCACHE_SCAN_BATCH)
sc->nr_to_scan = TCACHE_SCAN_BATCH;
nr_isolated = tcache_lru_isolate(sc->nid, pages, sc->nr_to_scan);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 123dfa493fd9..d99fb2be1c36 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2800,6 +2800,35 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
target_lruvec->refaults = refaults;
}
+static void shrink_tcache(struct scan_control *scan_ctrl)
+{
+ int nid;
+ unsigned long shrunk;
+ nodemask_t *nodemask = scan_ctrl->nodemask ? : &node_online_map;
+
+ do {
+ shrunk = 0;
+
+ for_each_node_mask(nid, *nodemask) {
+ struct shrink_control sc = {
+ .gfp_mask = scan_ctrl->gfp_mask,
+ .nid = nid,
+ .memcg = NULL,
+ .nr_to_scan = scan_ctrl->nr_to_reclaim -
+ scan_ctrl->nr_reclaimed,
+ };
+ shrunk = shrink_tcache_node(&sc);
+ scan_ctrl->nr_reclaimed += shrunk;
+ /*
+ * We scan all nodes even if we reclaim more than
+ * nr_to_reclaim, we want to make similar memory
+ * pressure on all nodes and not to trash only the
+ * first one and stop.
+ */
+ }
+ } while (shrunk && scan_ctrl->nr_reclaimed < scan_ctrl->nr_to_reclaim);
+}
+
/*
* This is the main entry point to direct page reclaim.
*
@@ -2826,8 +2855,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
retry:
delayacct_freepages_start();
- if (!cgroup_reclaim(sc))
+ if (!cgroup_reclaim(sc)) {
__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
+ shrink_tcache(sc);
+ if (sc->nr_reclaimed >= sc->nr_to_reclaim)
+ goto out;
+ }
do {
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
@@ -2866,7 +2899,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
}
}
-
+out:
delayacct_freepages_end();
if (sc->nr_reclaimed)
@@ -3393,6 +3426,12 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
sc.gfp_mask, &nr_soft_scanned);
sc.nr_reclaimed += nr_soft_reclaimed;
+ shrink_tcache(&sc);
+ if (sc.nr_reclaimed >= sc.nr_to_reclaim &&
+ pgdat_balanced(pgdat, order, classzone_idx))
+ goto out;
+
+
/*
* There should be no need to raise the scanning priority if
* enough pages are already being scanned that that high
More information about the Devel
mailing list