[Devel] [PATCH RHEL7 COMMIT] mm/vmscan: active lru protection fixups.

Konstantin Khorenko khorenko at virtuozzo.com
Wed Mar 6 11:49:02 MSK 2019


The commit is pushed to "branch-rh7-3.10.0-957.1.3.vz7.83.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.1.3.vz7.83.16
------>
commit 9b87d9c1149fc7f619ea464ce0327806885ec218
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Wed Mar 6 10:28:39 2019 +0300

    mm/vmscan: active lru protection fixups.
    
    There are several problems with our active list protection
    algorithm:
     - relying on sc->may_thrash is wrong and also very slow.
       sc->may_thrash is set only if we reclaimed nothing after priority
       dropped down to 0. If we reclaimed something, (which could be slab e.g.)
       we won't set it. So the active list protection becomes too strong.
       Instead of sc->may_thrash, use sc->may_shrink_active and sc->has_inactive
       to identify if memcg tree has cgroups with big inactive.
    
     - Anon aging. On every reclaim cycle we shrink some of active anon list
       even if we don't wan't to reclaim anon. With active list protection,
       anon aging makes anon reclaiming a lot more aggressive than page cache
       and leads to anon thrashing when page cache inactive is low.
       Move aging under "if (sc->may_shrink_active)" to fix that.
    
    https://pmc.acronis.com/browse/VSTOR-20859
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    Note: ported to vz7.83.x branch without patches for working set.
---
 mm/vmscan.c | 56 +++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8ede70b78f6f..2a01c8791672 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -106,6 +106,9 @@ struct scan_control {
 	/* anon vs. file LRUs scanning "ratio" */
 	int swappiness;
 
+	bool may_shrink_active;
+	bool has_inactive;
+
 	/*
 	 * The memory cgroup that hit its limit and as a result is the
 	 * primary target of this reclaim invocation.
@@ -2045,17 +2048,12 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
 				 struct lruvec *lruvec, struct scan_control *sc)
 {
 	if (is_active_lru(lru)) {
-		if (sc->may_thrash &&
-		    inactive_list_is_low(lruvec, is_file_lru(lru),
+		if (inactive_list_is_low(lruvec, is_file_lru(lru),
 					 sc->target_mem_cgroup, true))
 			shrink_active_list(nr_to_scan, lruvec, sc, lru);
 		return 0;
 	}
-	if (sc->may_thrash ||
-	    !inactive_list_is_low(lruvec, is_file_lru(lru),
-				  sc->target_mem_cgroup, false))
-		return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
-	return 0;
+	return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
 }
 
 #ifdef CONFIG_MEMCG
@@ -2132,6 +2130,10 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	bool force_scan = false;
 	unsigned long ap, fp;
 	enum lru_list lru;
+	bool inactive_file_low = inactive_list_is_low(lruvec, true,
+				 sc->target_mem_cgroup, false);
+	bool inactive_anon_low = inactive_list_is_low(lruvec, false,
+				 sc->target_mem_cgroup, false);
 
 	/*
 	 * If the zone or memcg is small, nr[l] can be 0.  This
@@ -2208,7 +2210,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	 * There is enough inactive page cache, do not reclaim
 	 * anything from the anonymous working set right now.
 	 */
-	if (!inactive_list_is_low(lruvec, true, sc->target_mem_cgroup, false) &&
+	if (!inactive_file_low &&
 	    get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority > 0) {
 		scan_balance = SCAN_FILE;
 		goto out;
@@ -2261,6 +2263,8 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	fraction[1] = fp;
 	denominator = ap + fp + 1;
 out:
+	sc->has_inactive = !inactive_file_low ||
+		((scan_balance != SCAN_FILE) && !inactive_anon_low);
 	*lru_pages = 0;
 	for_each_evictable_lru(lru) {
 		int file = is_file_lru(lru);
@@ -2270,6 +2274,10 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 		size = get_lru_size(lruvec, lru);
 		scan = size >> sc->priority;
 
+		if (!sc->may_shrink_active &&
+		    ((file && inactive_file_low) || (!file && inactive_anon_low)))
+			scan = 0;
+
 		if (!scan && force_scan)
 			scan = min(size, SWAP_CLUSTER_MAX);
 
@@ -2300,6 +2308,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 		*lru_pages += size;
 		nr[lru] = scan;
 	}
+
+	/*
+	 * Even if we did not try to evict anon pages at all, we want to
+	 * rebalance the anon lru active/inactive ratio to maintain
+	 * enough reclaim candidates for the next reclaim cycle.
+	 */
+	if (scan_balance != SCAN_FILE && inactive_anon_low &&
+	    sc->may_shrink_active)
+		nr[LRU_ACTIVE_ANON] += SWAP_CLUSTER_MAX;
 }
 
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
@@ -2428,14 +2445,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc,
 	blk_finish_plug(&plug);
 	sc->nr_reclaimed += nr_reclaimed;
 
-	/*
-	 * Even if we did not try to evict anon pages at all, we want to
-	 * rebalance the anon lru active/inactive ratio.
-	 */
-	if (inactive_list_is_low(lruvec, false, sc->target_mem_cgroup, true))
-		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
-				   sc, LRU_ACTIVE_ANON);
-
 	throttle_vm_writeout(sc->gfp_mask);
 }
 
@@ -2521,6 +2530,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 	unsigned long nr_reclaimed, nr_scanned;
 	gfp_t slab_gfp = sc->gfp_mask;
 	bool slab_only = sc->slab_only;
+	bool retry;
 
 	/* Disable fs-related IO for direct reclaim */
 	if (!sc->target_mem_cgroup &&
@@ -2537,6 +2547,9 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 		struct mem_cgroup *memcg;
 		struct reclaim_stat stat = {};
 
+		retry = false;
+		sc->has_inactive = false;
+
 		sc->stat = &stat;
 
 		nr_reclaimed = sc->nr_reclaimed;
@@ -2587,6 +2600,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 			}
 		} while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
 
+		if (!sc->has_inactive && !sc->may_shrink_active) {
+			sc->may_shrink_active = 1;
+			retry = true;
+			continue;
+		}
+
 		if (global_reclaim(sc)) {
 			/*
 			 * If reclaim is isolating dirty pages under writeback, it implies
@@ -2651,7 +2670,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 			   sc->nr_scanned - nr_scanned,
 			   sc->nr_reclaimed - nr_reclaimed);
 
-	} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
+	} while (retry || should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
 					 sc->nr_scanned - nr_scanned, sc));
 }
 
@@ -3211,6 +3230,9 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc)
 	if (!total_swap_pages)
 		return;
 
+	if (!sc->may_shrink_active)
+		return;
+
 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 	do {
 		struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);



More information about the Devel mailing list