[Devel] [PATCH rh7 01/11] mm: introduce memalloc_noreclaim_{save, restore}
Andrey Ryabinin
aryabinin at virtuozzo.com
Mon Sep 10 18:39:30 MSK 2018
From: Vlastimil Babka <vbabka at suse.cz>
The previous patch ("mm: prevent potential recursive reclaim due to
clearing PF_MEMALLOC") has shown that simply setting and clearing
PF_MEMALLOC in current->flags can result in wrongly clearing a
pre-existing PF_MEMALLOC flag and potentially lead to recursive reclaim.
Let's introduce helpers that support proper nesting by saving the
previous stat of the flag, similar to the existing memalloc_noio_* and
memalloc_nofs_* helpers. Convert existing setting/clearing of
PF_MEMALLOC within mm to the new helpers.
There are no known issues with the converted code, but the change makes
it more robust.
Link: http://lkml.kernel.org/r/20170405074700.29871-3-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka at suse.cz>
Suggested-by: Michal Hocko <mhocko at suse.com>
Acked-by: Michal Hocko <mhocko at suse.com>
Acked-by: Hillf Danton <hillf.zj at alibaba-inc.com>
Cc: Mel Gorman <mgorman at techsingularity.net>
Cc: Johannes Weiner <hannes at cmpxchg.org>
Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
Cc: Boris Brezillon <boris.brezillon at free-electrons.com>
Cc: Chris Leech <cleech at redhat.com>
Cc: "David S. Miller" <davem at davemloft.net>
Cc: Eric Dumazet <edumazet at google.com>
Cc: Josef Bacik <jbacik at fb.com>
Cc: Lee Duncan <lduncan at suse.com>
Cc: Michal Hocko <mhocko at suse.com>
Cc: Richard Weinberger <richard at nod.at>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
https://jira.sw.ru/browse/PSBM-88420
(cherry-picked from commit 499118e966f1d2150bd66647c8932343c4e9a0b8)
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/sched/mm.h | 12 ++++++++++++
mm/page_alloc.c | 6 ++++--
mm/vmscan.c | 15 +++++++++++----
3 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a7adba1cd0a9..0a9df3e73581 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -3,4 +3,16 @@
#include <linux/sched.h>
+static inline unsigned int memalloc_noreclaim_save(void)
+{
+ unsigned int flags = current->flags & PF_MEMALLOC;
+ current->flags |= PF_MEMALLOC;
+ return flags;
+}
+
+static inline void memalloc_noreclaim_restore(unsigned int flags)
+{
+ current->flags = (current->flags & ~PF_MEMALLOC) | flags;
+}
+
#endif /* _LINUX_SCHED_MM_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3d22c4301dc9..b1ae4305a257 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -61,6 +61,7 @@
#include <linux/migrate.h>
#include <linux/page_ext.h>
#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
#include <linux/sched/rt.h>
#include <linux/kthread.h>
#include <linux/nmi.h>
@@ -2803,12 +2804,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist,
{
struct reclaim_state reclaim_state;
int progress;
+ unsigned int noreclaim_flag;
cond_resched();
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
@@ -2817,7 +2819,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist,
current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
- current->flags &= ~PF_MEMALLOC;
+ memalloc_noreclaim_restore(noreclaim_flag);
cond_resched();
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7a463b5e508..43577b531ce9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -39,6 +39,7 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
+#include <linux/sched/mm.h>
#include <linux/delayacct.h>
#include <linux/sysctl.h>
#include <linux/oom.h>
@@ -3114,6 +3115,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_reclaimed;
struct reclaim_state reclaim_state = { 0 };
int nid;
+
struct scan_control sc = {
.may_writepage = !laptop_mode,
.may_unmap = 1,
@@ -3784,8 +3786,9 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
unsigned long nr_reclaimed;
+ unsigned int noreclaim_flag;
- p->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3794,7 +3797,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
p->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
- p->flags &= ~PF_MEMALLOC;
+ memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed;
}
@@ -3958,6 +3961,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
+ unsigned int noreclaim_flag;
+
struct scan_control sc = {
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -3974,7 +3979,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* and we also need to be able to write out pages for RECLAIM_WRITE
* and RECLAIM_SWAP.
*/
- p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
+ noreclaim_flag = memalloc_noreclaim_save();
+ p->flags |= PF_SWAPWRITE;
lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3990,7 +3996,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
}
p->reclaim_state = NULL;
- current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
+ current->flags &= ~PF_SWAPWRITE;
+ memalloc_noreclaim_restore(noreclaim_flag);
lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages;
}
--
2.16.4
More information about the Devel
mailing list