[Devel] [PATCH 03/17] mm: vmscan: do not scan lruvec if it seems to be unreclaimable

Vladimir Davydov vdavydov at parallels.com
Fri Aug 14 10:03:27 PDT 2015


Currently, on memcg reclaim we only invoke oom if we fail to reclaim
anything from the memory cgroup several times in a row. As a result,
When a memcg is near its limit, processes might get stuck in
__mem_cgroup_try_charge invoking reclaimer over and over again and
consuming a lot of cpu time in the meantime instead of calling oom.

The global reclaimer has a heuristic to detect such situations: if we
scan all reclaimable pages more than 6 times w/o reclaiming anything, we
stop reclaiming and go oom. This patch implements a similar technique
for memcg reclaim.

I will try to submit something like this upstream.

https://jira.sw.ru/browse/PSBM-35155

Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 include/linux/mmzone.h |  1 +
 mm/swap.c              |  1 +
 mm/vmscan.c            | 23 +++++++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index abe7110d8fbe..faf3c96abed2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -211,6 +211,7 @@ struct zone_reclaim_stat {
 struct lruvec {
 	struct list_head lists[NR_LRU_LISTS];
 	struct zone_reclaim_stat reclaim_stat;
+	unsigned long pages_scanned;
 #ifdef CONFIG_MEMCG
 	struct zone *zone;
 #endif
diff --git a/mm/swap.c b/mm/swap.c
index 851fff9e1853..eb3add716907 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -60,6 +60,7 @@ static void __page_cache_release(struct page *page)
 		VM_BUG_ON_PAGE(!PageLRU(page), page);
 		__ClearPageLRU(page);
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
+		lruvec->pages_scanned = 0;
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cdebfcd9f154..c5a31fa7c10e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -178,6 +178,20 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 	return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
 }
 
+static bool lruvec_reclaimable(struct lruvec *lruvec, bool may_swap)
+{
+	unsigned long reclaimable;
+
+	reclaimable = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
+		      get_lru_size(lruvec, LRU_INACTIVE_FILE);
+
+	if (may_swap && get_nr_swap_pages() > 0)
+		reclaimable += get_lru_size(lruvec, LRU_ACTIVE_ANON) +
+			       get_lru_size(lruvec, LRU_INACTIVE_ANON);
+
+	return lruvec->pages_scanned < reclaimable * 6;
+}
+
 /*
  * Add a shrinker callback to be called from the vm.
  */
@@ -1533,6 +1547,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 
+	lruvec->pages_scanned += nr_scanned;
 	if (global_reclaim(sc)) {
 		zone->pages_scanned += nr_scanned;
 		if (current_is_kswapd())
@@ -1552,6 +1567,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 	spin_lock_irq(&zone->lru_lock);
 
+	if (nr_reclaimed)
+		lruvec->pages_scanned = 0;
+
 	reclaim_stat->recent_scanned[file] += nr_taken;
 
 	if (global_reclaim(sc)) {
@@ -1725,6 +1743,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
 	nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
 				     &nr_scanned, sc, isolate_mode, lru);
+	lruvec->pages_scanned += nr_scanned;
 	if (global_reclaim(sc))
 		zone->pages_scanned += nr_scanned;
 
@@ -2360,6 +2379,10 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc,
 				continue;
 
 			lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+			if (!global_reclaim(sc) &&
+			    !lruvec_reclaimable(lruvec, sc->may_swap))
+				continue;
+
 			scanned = sc->nr_scanned;
 
 			shrink_lruvec(lruvec, sc, &lru_pages);
-- 
2.1.4




More information about the Devel mailing list