[Devel] [PATCH rh7 01/11] mm: introduce memalloc_noreclaim_{save, restore}

Andrey Ryabinin aryabinin at virtuozzo.com
Mon Sep 10 18:39:30 MSK 2018


From: Vlastimil Babka <vbabka at suse.cz>

The previous patch ("mm: prevent potential recursive reclaim due to
clearing PF_MEMALLOC") has shown that simply setting and clearing
PF_MEMALLOC in current->flags can result in wrongly clearing a
pre-existing PF_MEMALLOC flag and potentially lead to recursive reclaim.
Let's introduce helpers that support proper nesting by saving the
previous stat of the flag, similar to the existing memalloc_noio_* and
memalloc_nofs_* helpers.  Convert existing setting/clearing of
PF_MEMALLOC within mm to the new helpers.

There are no known issues with the converted code, but the change makes
it more robust.

Link: http://lkml.kernel.org/r/20170405074700.29871-3-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka at suse.cz>
Suggested-by: Michal Hocko <mhocko at suse.com>
Acked-by: Michal Hocko <mhocko at suse.com>
Acked-by: Hillf Danton <hillf.zj at alibaba-inc.com>
Cc: Mel Gorman <mgorman at techsingularity.net>
Cc: Johannes Weiner <hannes at cmpxchg.org>
Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
Cc: Boris Brezillon <boris.brezillon at free-electrons.com>
Cc: Chris Leech <cleech at redhat.com>
Cc: "David S. Miller" <davem at davemloft.net>
Cc: Eric Dumazet <edumazet at google.com>
Cc: Josef Bacik <jbacik at fb.com>
Cc: Lee Duncan <lduncan at suse.com>
Cc: Michal Hocko <mhocko at suse.com>
Cc: Richard Weinberger <richard at nod.at>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>

https://jira.sw.ru/browse/PSBM-88420
(cherry-picked from commit 499118e966f1d2150bd66647c8932343c4e9a0b8)
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 include/linux/sched/mm.h | 12 ++++++++++++
 mm/page_alloc.c          |  6 ++++--
 mm/vmscan.c              | 15 +++++++++++----
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a7adba1cd0a9..0a9df3e73581 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -3,4 +3,16 @@
 
 #include <linux/sched.h>
 
+static inline unsigned int memalloc_noreclaim_save(void)
+{
+	unsigned int flags = current->flags & PF_MEMALLOC;
+	current->flags |= PF_MEMALLOC;
+	return flags;
+}
+
+static inline void memalloc_noreclaim_restore(unsigned int flags)
+{
+	current->flags = (current->flags & ~PF_MEMALLOC) | flags;
+}
+
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3d22c4301dc9..b1ae4305a257 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -61,6 +61,7 @@
 #include <linux/migrate.h>
 #include <linux/page_ext.h>
 #include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/rt.h>
 #include <linux/kthread.h>
 #include <linux/nmi.h>
@@ -2803,12 +2804,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist,
 {
 	struct reclaim_state reclaim_state;
 	int progress;
+	unsigned int noreclaim_flag;
 
 	cond_resched();
 
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
-	current->flags |= PF_MEMALLOC;
+	noreclaim_flag = memalloc_noreclaim_save();
 	lockdep_set_current_reclaim_state(gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	current->reclaim_state = &reclaim_state;
@@ -2817,7 +2819,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist,
 
 	current->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
-	current->flags &= ~PF_MEMALLOC;
+	memalloc_noreclaim_restore(noreclaim_flag);
 
 	cond_resched();
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7a463b5e508..43577b531ce9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/memcontrol.h>
+#include <linux/sched/mm.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
 #include <linux/oom.h>
@@ -3114,6 +3115,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 	unsigned long nr_reclaimed;
 	struct reclaim_state reclaim_state = { 0 };
 	int nid;
+
 	struct scan_control sc = {
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
@@ -3784,8 +3786,9 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 	struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
 	struct task_struct *p = current;
 	unsigned long nr_reclaimed;
+	unsigned int noreclaim_flag;
 
-	p->flags |= PF_MEMALLOC;
+	noreclaim_flag = memalloc_noreclaim_save();
 	lockdep_set_current_reclaim_state(sc.gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
@@ -3794,7 +3797,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 
 	p->reclaim_state = NULL;
 	lockdep_clear_current_reclaim_state();
-	p->flags &= ~PF_MEMALLOC;
+	memalloc_noreclaim_restore(noreclaim_flag);
 
 	return nr_reclaimed;
 }
@@ -3958,6 +3961,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	const unsigned long nr_pages = 1 << order;
 	struct task_struct *p = current;
 	struct reclaim_state reclaim_state;
+	unsigned int noreclaim_flag;
+
 	struct scan_control sc = {
 		.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
 		.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -3974,7 +3979,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * and we also need to be able to write out pages for RECLAIM_WRITE
 	 * and RECLAIM_SWAP.
 	 */
-	p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
+	noreclaim_flag = memalloc_noreclaim_save();
+	p->flags |= PF_SWAPWRITE;
 	lockdep_set_current_reclaim_state(gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
@@ -3990,7 +3996,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	}
 
 	p->reclaim_state = NULL;
-	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
+	current->flags &= ~PF_SWAPWRITE;
+	memalloc_noreclaim_restore(noreclaim_flag);
 	lockdep_clear_current_reclaim_state();
 	return sc.nr_reclaimed >= nr_pages;
 }
-- 
2.16.4



More information about the Devel mailing list