[Devel] [PATCH RHEL7 COMMIT] mm/vmscan: active lru protection fixups.

Konstantin Khorenko khorenko at virtuozzo.com
Wed Mar 6 14:08:26 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-957.5.1.vz7.84.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.5.1.vz7.84.4
------>
commit 7b62fc3715c56542a902ddab1788d30fa3adf930
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Wed Mar 6 14:08:23 2019 +0300

    mm/vmscan: active lru protection fixups.
    
    There are several problems with our active list protection
    algorithm:
     - relying on sc->may_thrash is wrong and also very slow.
       sc->may_thrash is set only if we reclaimed nothing after priority
       dropped down to 0. If we reclaimed something, (which could be slab e.g.)
       we won't set it. So the active list protection becomes too strong.
       Instead of sc->may_thrash, use sc->may_shrink_active and sc->has_inactive
       to identify if memcg tree has cgroups with big inactive.
    
     - Anon aging. On every reclaim cycle we shrink some of active anon list
       even if we don't wan't to reclaim anon. With active list protection,
       anon aging makes anon reclaiming a lot more aggressive than page cache
       and leads to anon thrashing when page cache inactive is low.
       Move aging under "if (sc->may_shrink_active)" to fix that.
    
    https://pmc.acronis.com/browse/VSTOR-20859
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 mm/vmscan.c | 55 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 17 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 87384a4fb436..a122e4cfa1a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -103,6 +103,10 @@ struct scan_control {
 	/* Reclaim only slab */
 	bool slab_only;
 
+	bool may_shrink_active;
+
+	bool has_inactive;
+
 	/*
 	 * The memory cgroup that hit its limit and as a result is the
 	 * primary target of this reclaim invocation.
@@ -2043,16 +2047,11 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 				 struct scan_control *sc)
 {
 	if (is_active_lru(lru)) {
-		if (sc->may_thrash &&
-		    inactive_list_is_low(lruvec, is_file_lru(lru), memcg, true))
+		if (inactive_list_is_low(lruvec, is_file_lru(lru), memcg, true))
 			shrink_active_list(nr_to_scan, lruvec, sc, lru);
 		return 0;
 	}
-	if (sc->may_thrash ||
-	    !inactive_list_is_low(lruvec, is_file_lru(lru),
-				  sc->target_mem_cgroup, false))
-		return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
-	return 0;
+	return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
 }
 
 #ifdef CONFIG_MEMCG
@@ -2132,6 +2131,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	bool force_scan = false;
 	unsigned long ap, fp;
 	enum lru_list lru;
+	bool inactive_file_low = inactive_list_is_low(lruvec, true, memcg, false);
+	bool inactive_anon_low = inactive_list_is_low(lruvec, false, memcg, false);
 
 	/*
 	 * If the zone or memcg is small, nr[l] can be 0.  This
@@ -2208,7 +2209,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	 * There is enough inactive page cache, do not reclaim
 	 * anything from the anonymous working set right now.
 	 */
-	if (!inactive_list_is_low(lruvec, true, memcg, false) &&
+	if (!inactive_file_low &&
 	    lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
 		scan_balance = SCAN_FILE;
 		goto out;
@@ -2261,6 +2262,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 	fraction[1] = fp;
 	denominator = ap + fp + 1;
 out:
+	sc->has_inactive = !inactive_file_low ||
+		((scan_balance != SCAN_FILE) && !inactive_anon_low);
 	*lru_pages = 0;
 	for_each_evictable_lru(lru) {
 		int file = is_file_lru(lru);
@@ -2270,6 +2273,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 		size = lruvec_lru_size(lruvec, lru);
 		scan = size >> sc->priority;
 
+		if (!sc->may_shrink_active &&
+		    ((file && inactive_file_low) || (!file && inactive_anon_low)))
+			scan = 0;
+
 		if (!scan && force_scan)
 			scan = min(size, SWAP_CLUSTER_MAX);
 
@@ -2300,6 +2307,15 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 		*lru_pages += size;
 		nr[lru] = scan;
 	}
+
+	/*
+	 * Even if we did not try to evict anon pages at all, we want to
+	 * rebalance the anon lru active/inactive ratio to maintain
+	 * enough reclaim candidates for the next reclaim cycle.
+	 */
+	if (scan_balance != SCAN_FILE && inactive_anon_low &&
+	    sc->may_shrink_active)
+		nr[LRU_ACTIVE_ANON] += SWAP_CLUSTER_MAX;
 }
 
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
@@ -2429,14 +2445,6 @@ static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg,
 	blk_finish_plug(&plug);
 	sc->nr_reclaimed += nr_reclaimed;
 
-	/*
-	 * Even if we did not try to evict anon pages at all, we want to
-	 * rebalance the anon lru active/inactive ratio.
-	 */
-	if (inactive_list_is_low(lruvec, false, memcg, true))
-		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
-				   sc, LRU_ACTIVE_ANON);
-
 	throttle_vm_writeout(sc->gfp_mask);
 }
 
@@ -2522,6 +2530,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 	unsigned long nr_reclaimed, nr_scanned;
 	gfp_t slab_gfp = sc->gfp_mask;
 	bool slab_only = sc->slab_only;
+	bool retry;
 
 	/* Disable fs-related IO for direct reclaim */
 	if (!sc->target_mem_cgroup &&
@@ -2538,6 +2547,9 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 		struct mem_cgroup *memcg;
 		struct reclaim_stat stat = {};
 
+		retry = false;
+		sc->has_inactive = false;
+
 		sc->stat = &stat;
 
 		nr_reclaimed = sc->nr_reclaimed;
@@ -2585,6 +2597,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 			}
 		} while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
 
+		if (!sc->has_inactive && !sc->may_shrink_active) {
+			sc->may_shrink_active = 1;
+			retry = true;
+			continue;
+		}
+
 		if (global_reclaim(sc)) {
 			/*
 			 * If reclaim is isolating dirty pages under writeback, it implies
@@ -2649,7 +2667,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 			   sc->nr_scanned - nr_scanned,
 			   sc->nr_reclaimed - nr_reclaimed);
 
-	} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
+	} while (retry || should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
 					 sc->nr_scanned - nr_scanned, sc));
 }
 
@@ -3225,6 +3243,9 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
 	if (!total_swap_pages)
 		return;
 
+	if (!sc->may_shrink_active)
+		return;
+
 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 	do {
 		struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);



More information about the Devel mailing list