[Devel] [PATCH vz8 09/15] mm: vmscan: replace shrink_node() loop with a retry jump
Andrey Ryabinin
aryabinin at virtuozzo.com
Thu Mar 26 21:09:35 MSK 2020
From: Johannes Weiner <hannes at cmpxchg.org>
Most of the function body is inside a loop, which imposes an additional
indentation and scoping level that makes the code a bit hard to follow and
modify.
The looping only happens in case of reclaim-compaction, which isn't the
common case. So rather than adding yet another function level to the
reclaim path and have every reclaim invocation go through a level that
only exists for one specific cornercase, use a retry goto.
Link: http://lkml.kernel.org/r/20191022144803.302233-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
Reviewed-by: Roman Gushchin <guro at fb.com>
Reviewed-by: Shakeel Butt <shakeelb at google.com>
Cc: Michal Hocko <mhocko at suse.com>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
(cherry picked from commit d2af339706be318dadcbe14c8935426ff401d7b1)
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
mm/vmscan.c | 261 ++++++++++++++++++++++++++--------------------------
1 file changed, 131 insertions(+), 130 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dbbef91f5d2b..e12bd53b846f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2493,161 +2493,162 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
+ struct mem_cgroup *root = sc->target_mem_cgroup;
unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false;
+ struct mem_cgroup *memcg;
+ struct mem_cgroup_reclaim_cookie reclaim = {
+ .pgdat = pgdat,
+ .priority = sc->priority,
+ };
- do {
- struct mem_cgroup *root = sc->target_mem_cgroup;
- struct mem_cgroup_reclaim_cookie reclaim = {
- .pgdat = pgdat,
- .priority = sc->priority,
- };
- struct mem_cgroup *memcg;
-
- memset(&sc->nr, 0, sizeof(sc->nr));
-
- nr_reclaimed = sc->nr_reclaimed;
- nr_scanned = sc->nr_scanned;
+again:
+ reclaim.generation = 0;
+ memset(&sc->nr, 0, sizeof(sc->nr));
+ nr_reclaimed = sc->nr_reclaimed;
+ nr_scanned = sc->nr_scanned;
- memcg = mem_cgroup_iter(root, NULL, &reclaim);
- do {
- unsigned long reclaimed;
- unsigned long scanned;
+ memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ do {
+ unsigned long lru_pages;
+ unsigned long reclaimed;
+ unsigned long scanned;
- switch (mem_cgroup_protected(root, memcg)) {
- case MEMCG_PROT_MIN:
- /*
- * Hard protection.
- * If there is no reclaimable memory, OOM.
- */
+ switch (mem_cgroup_protected(root, memcg)) {
+ case MEMCG_PROT_MIN:
+ /*
+ * Hard protection.
+ * If there is no reclaimable memory, OOM.
+ */
+ continue;
+ case MEMCG_PROT_LOW:
+ /*
+ * Soft protection.
+ * Respect the protection only as long as
+ * there is an unprotected supply
+ * of reclaimable memory from other cgroups.
+ */
+ if (!sc->memcg_low_reclaim) {
+ sc->memcg_low_skipped = 1;
continue;
- case MEMCG_PROT_LOW:
- /*
- * Soft protection.
- * Respect the protection only as long as
- * there is an unprotected supply
- * of reclaimable memory from other cgroups.
- */
- if (!sc->memcg_low_reclaim) {
- sc->memcg_low_skipped = 1;
- continue;
- }
- memcg_memory_event(memcg, MEMCG_LOW);
- break;
- case MEMCG_PROT_NONE:
- break;
}
+ memcg_memory_event(memcg, MEMCG_LOW);
+ break;
+ case MEMCG_PROT_NONE:
+ break;
+ }
- reclaimed = sc->nr_reclaimed;
- scanned = sc->nr_scanned;
- shrink_node_memcg(pgdat, memcg, sc);
+ reclaimed = sc->nr_reclaimed;
+ scanned = sc->nr_scanned;
+ shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
- if (memcg)
- shrink_slab(sc->gfp_mask, pgdat->node_id,
- memcg, sc->priority, false);
+ if (memcg)
+ shrink_slab(sc->gfp_mask, pgdat->node_id,
+ memcg, sc->priority, false);
- /* Record the group's reclaim efficiency */
- vmpressure(sc->gfp_mask, memcg, false,
- sc->nr_scanned - scanned,
- sc->nr_reclaimed - reclaimed);
+ /* Record the group's reclaim efficiency */
+ vmpressure(sc->gfp_mask, memcg, false,
+ sc->nr_scanned - scanned,
+ sc->nr_reclaimed - reclaimed);
- /*
- * Direct reclaim and kswapd have to scan all memory
- * cgroups to fulfill the overall scan target for the
- * node.
- *
- * Limit reclaim, on the other hand, only cares about
- * nr_to_reclaim pages to be reclaimed and it will
- * retry with decreasing priority if one round over the
- * whole hierarchy is not sufficient.
- */
- if (cgroup_reclaim(sc) &&
- sc->nr_reclaimed >= sc->nr_to_reclaim) {
- mem_cgroup_iter_break(root, memcg);
- break;
- }
- } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
+ /*
+ * Direct reclaim and kswapd have to scan all memory
+ * cgroups to fulfill the overall scan target for the
+ * node.
+ *
+ * Limit reclaim, on the other hand, only cares about
+ * nr_to_reclaim pages to be reclaimed and it will
+ * retry with decreasing priority if one round over the
+ * whole hierarchy is not sufficient.
+ */
+ if (cgroup_reclaim(sc) &&
+ sc->nr_reclaimed >= sc->nr_to_reclaim) {
+ mem_cgroup_iter_break(root, memcg);
+ break;
+ }
+ } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
- if (!cgroup_reclaim(sc))
- shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
- sc->priority, false);
+ if (!cgroup_reclaim(sc))
+ shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
+ sc->priority, false);
- if (reclaim_state) {
- sc->nr_reclaimed += reclaim_state->reclaimed_slab;
- reclaim_state->reclaimed_slab = 0;
- }
+ if (reclaim_state) {
+ sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+ reclaim_state->reclaimed_slab = 0;
+ }
- /* Record the subtree's reclaim efficiency */
- vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+ /* Record the subtree's reclaim efficiency */
+ vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
sc->nr_scanned - nr_scanned,
sc->nr_reclaimed - nr_reclaimed);
- if (sc->nr_reclaimed - nr_reclaimed)
- reclaimable = true;
+ if (sc->nr_reclaimed - nr_reclaimed)
+ reclaimable = true;
- if (current_is_kswapd()) {
- /*
- * If reclaim is isolating dirty pages under writeback,
- * it implies that the long-lived page allocation rate
- * is exceeding the page laundering rate. Either the
- * global limits are not being effective at throttling
- * processes due to the page distribution throughout
- * zones or there is heavy usage of a slow backing
- * device. The only option is to throttle from reclaim
- * context which is not ideal as there is no guarantee
- * the dirtying process is throttled in the same way
- * balance_dirty_pages() manages.
- *
- * Once a node is flagged PGDAT_WRITEBACK, kswapd will
- * count the number of pages under pages flagged for
- * immediate reclaim and stall if any are encountered
- * in the nr_immediate check below.
- */
- if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
- set_bit(PGDAT_WRITEBACK, &pgdat->flags);
-
- /*
- * Tag a node as congested if all the dirty pages
- * scanned were backed by a congested BDI and
- * wait_iff_congested will stall.
- */
- if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
- set_bit(PGDAT_CONGESTED, &pgdat->flags);
-
- /* Allow kswapd to start writing pages during reclaim.*/
- if (sc->nr.unqueued_dirty == sc->nr.file_taken)
- set_bit(PGDAT_DIRTY, &pgdat->flags);
-
- /*
- * If kswapd scans pages marked marked for immediate
- * reclaim and under writeback (nr_immediate), it
- * implies that pages are cycling through the LRU
- * faster than they are written so also forcibly stall.
- */
- if (sc->nr.immediate)
- congestion_wait(BLK_RW_ASYNC, HZ/10);
- }
+ if (current_is_kswapd()) {
+ /*
+ * If reclaim is isolating dirty pages under writeback,
+ * it implies that the long-lived page allocation rate
+ * is exceeding the page laundering rate. Either the
+ * global limits are not being effective at throttling
+ * processes due to the page distribution throughout
+ * zones or there is heavy usage of a slow backing
+ * device. The only option is to throttle from reclaim
+ * context which is not ideal as there is no guarantee
+ * the dirtying process is throttled in the same way
+ * balance_dirty_pages() manages.
+ *
+ * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+ * count the number of pages under pages flagged for
+ * immediate reclaim and stall if any are encountered
+ * in the nr_immediate check below.
+ */
+ if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+ set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/*
- * Legacy memcg will stall in page writeback so avoid forcibly
- * stalling in wait_iff_congested().
+ * Tag a node as congested if all the dirty pages
+ * scanned were backed by a congested BDI and
+ * wait_iff_congested will stall.
*/
- if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
- sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
- set_memcg_congestion(pgdat, root, true);
+ if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+ set_bit(PGDAT_CONGESTED, &pgdat->flags);
+
+ /* Allow kswapd to start writing pages during reclaim.*/
+ if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+ set_bit(PGDAT_DIRTY, &pgdat->flags);
/*
- * Stall direct reclaim for IO completions if underlying BDIs
- * and node is congested. Allow kswapd to continue until it
- * starts encountering unqueued dirty pages or cycling through
- * the LRU too quickly.
+ * If kswapd scans pages marked marked for immediate
+ * reclaim and under writeback (nr_immediate), it
+ * implies that pages are cycling through the LRU
+ * faster than they are written so also forcibly stall.
*/
- if (!sc->hibernation_mode && !current_is_kswapd() &&
- current_may_throttle() && pgdat_memcg_congested(pgdat, root))
- wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+ if (sc->nr.immediate)
+ congestion_wait(BLK_RW_ASYNC, HZ/10);
+ }
+
+ /*
+ * Legacy memcg will stall in page writeback so avoid forcibly
+ * stalling in wait_iff_congested().
+ */
+ if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
+ sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+ set_memcg_congestion(pgdat, root, true);
+
+ /*
+ * Stall direct reclaim for IO completions if underlying BDIs
+ * and node is congested. Allow kswapd to continue until it
+ * starts encountering unqueued dirty pages or cycling through
+ * the LRU too quickly.
+ */
+ if (!sc->hibernation_mode && !current_is_kswapd() &&
+ current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+ wait_iff_congested(BLK_RW_ASYNC, HZ/10);
- } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
- sc->nr_scanned - nr_scanned, sc));
+ if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
+ sc->nr_scanned - nr_scanned, sc))
+ goto again;
/*
* Kswapd gives up on balancing particular nodes after too
--
2.24.1
More information about the Devel
mailing list