[Devel] [PATCH RHEL8 COMMIT] ms/mm: clean up and clarify lruvec lookup procedure

Konstantin Khorenko khorenko at virtuozzo.com
Thu Apr 2 16:02:53 MSK 2020


The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.4
------>
commit 1793cc28e1cfb62ffafcf83839af10705422b885
Author: Johannes Weiner <hannes at cmpxchg.org>
Date:   Thu Apr 2 16:02:52 2020 +0300

    ms/mm: clean up and clarify lruvec lookup procedure
    
    There is a per-memcg lruvec and a NUMA node lruvec.  Which one is being
    used is somewhat confusing right now, and it's easy to make mistakes -
    especially when it comes to global reclaim.
    
    How it works: when memory cgroups are enabled, we always use the
    root_mem_cgroup's per-node lruvecs.  When memory cgroups are not compiled
    in or disabled at runtime, we use pgdat->lruvec.
    
    Document that in a comment.
    
    Due to the way the reclaim code is generalized, all lookups use the
    mem_cgroup_lruvec() helper function, and nobody should have to find the
    right lruvec manually right now.  But to avoid future mistakes, rename the
    pgdat->lruvec member to pgdat->__lruvec and delete the convenience wrapper
    that suggests it's a commonly accessed member.
    
    While in this area, swap the mem_cgroup_lruvec() argument order.  The name
    suggests a memcg operation, yet it takes a pgdat first and a memcg second.
    I have to double take every time I call this.  Fix that.
    
    Link: http://lkml.kernel.org/r/20191022144803.302233-3-hannes@cmpxchg.org
    Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
    Acked-by: Michal Hocko <mhocko at suse.com>
    Reviewed-by: Shakeel Butt <shakeelb at google.com>
    Cc: Roman Gushchin <guro at fb.com>
    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
    
    (cherry picked from commit 867e5e1de14b2b2bde324cdfeec3f3f83eb21424)
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 include/linux/memcontrol.h | 24 ++++++++++++------------
 include/linux/mmzone.h     | 15 ++++++++-------
 mm/memcontrol.c            |  6 +++---
 mm/page_alloc.c            |  2 +-
 mm/vmscan.c                |  6 +++---
 mm/workingset.c            |  6 +++---
 6 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 30e737be4cf7..c359e48cf266 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -356,22 +356,22 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
 }
 
 /**
- * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
+ * mem_cgroup_lruvec - get the lru list vector for a memcg & node
  * @node: node of the wanted lruvec
  * @memcg: memcg of the wanted lruvec
  *
- * Returns the lru list vector holding pages for a given @node or a given
- * @memcg and @zone. This can be the node lruvec, if the memory controller
- * is disabled.
+ * Returns the lru list vector holding pages for a given @memcg &
+ * @node combination. This can be the node lruvec, if the memory
+ * controller is disabled.
  */
-static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
-				struct mem_cgroup *memcg)
+static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
+					       struct pglist_data *pgdat)
 {
 	struct mem_cgroup_per_node *mz;
 	struct lruvec *lruvec;
 
 	if (mem_cgroup_disabled()) {
-		lruvec = node_lruvec(pgdat);
+		lruvec = &pgdat->__lruvec;
 		goto out;
 	}
 
@@ -701,7 +701,7 @@ static inline void __mod_lruvec_page_state(struct page *page,
 		return;
 	}
 
-	lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
+	lruvec = mem_cgroup_lruvec(page->mem_cgroup, pgdat);
 	__mod_lruvec_state(lruvec, idx, val);
 }
 
@@ -879,16 +879,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
 {
 }
 
-static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
-				struct mem_cgroup *memcg)
+static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
+					       struct pglist_data *pgdat)
 {
-	return node_lruvec(pgdat);
+	return &pgdat->__lruvec;
 }
 
 static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
 						    struct pglist_data *pgdat)
 {
-	return &pgdat->lruvec;
+	return &pgdat->__lruvec;
 }
 
 static inline bool mm_match_cgroup(struct mm_struct *mm,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 01623f17d2a5..2d81ecb3f852 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -719,7 +719,13 @@ typedef struct pglist_data {
 #endif
 
 	/* Fields commonly accessed by the page reclaim scanner */
-	struct lruvec		lruvec;
+
+	/*
+	 * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
+	 *
+	 * Use mem_cgroup_lruvec() to look up lruvecs.
+	 */
+	struct lruvec		__lruvec;
 
 	unsigned long		flags;
 
@@ -749,11 +755,6 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone)
 	return &zone->zone_pgdat->lru_lock;
 }
 
-static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
-{
-	return &pgdat->lruvec;
-}
-
 static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
 {
 	return pgdat->node_start_pfn + pgdat->node_spanned_pages;
@@ -810,7 +811,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
 #ifdef CONFIG_MEMCG
 	return lruvec->pgdat;
 #else
-	return container_of(lruvec, struct pglist_data, lruvec);
+	return container_of(lruvec, struct pglist_data, __lruvec);
 #endif
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 47304c1161bc..6909824a0ba1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -587,7 +587,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 					   int nid, unsigned int lru_mask)
 {
-	struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
 	unsigned long nr = 0;
 	enum lru_list lru;
 
@@ -945,7 +945,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
 	struct lruvec *lruvec;
 
 	if (mem_cgroup_disabled()) {
-		lruvec = &pgdat->lruvec;
+		lruvec = &pgdat->__lruvec;
 		goto out;
 	}
 
@@ -3464,7 +3464,7 @@ static int memcg_numa_migrate_pages(struct mem_cgroup *memcg,
 			if (node_isset(nid, *target_nodes))
 				continue;
 
-			lruvec = mem_cgroup_lruvec(NODE_DATA(nid), mi);
+			lruvec = mem_cgroup_lruvec(mi, NODE_DATA(nid));
 			/*
 			 * For the sake of simplicity, do not attempt to migrate
 			 * unevictable pages. It should be fine as long as there
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4eb4ced55a6a..aadcc166bd0f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6283,7 +6283,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 #endif
 	pgdat_page_ext_init(pgdat);
 	spin_lock_init(&pgdat->lru_lock);
-	lruvec_init(node_lruvec(pgdat));
+	lruvec_init(&pgdat->__lruvec);
 
 	pgdat->per_cpu_nodestats = &boot_nodestats;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6204c90703bf..97f2d297d4a1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2296,7 +2296,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
 			      struct scan_control *sc)
 {
-	struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long targets[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
@@ -2807,7 +2807,7 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
 		unsigned long refaults;
 		struct lruvec *lruvec;
 
-		lruvec = mem_cgroup_lruvec(pgdat, memcg);
+		lruvec = mem_cgroup_lruvec(memcg, pgdat);
 		refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
 		lruvec->refaults = refaults;
 	} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
@@ -3156,7 +3156,7 @@ static void age_active_anon(struct pglist_data *pgdat,
 
 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 	do {
-		struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
+		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
 
 		if (inactive_list_is_low(lruvec, false, sc, true))
 			shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
diff --git a/mm/workingset.c b/mm/workingset.c
index 1f9c53c900de..931dc2343073 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -234,7 +234,7 @@ void *workingset_eviction(struct page *page)
 	VM_BUG_ON_PAGE(page_count(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-	lruvec = mem_cgroup_lruvec(pgdat, memcg);
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 	eviction = atomic_long_inc_return(&lruvec->inactive_age);
 	return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
 }
@@ -281,7 +281,7 @@ void workingset_refault(struct page *page, void *shadow)
 	memcg = mem_cgroup_from_id(memcgid);
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
-	lruvec = mem_cgroup_lruvec(pgdat, memcg);
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 	refault = atomic_long_read(&lruvec->inactive_age);
 	active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
 
@@ -346,7 +346,7 @@ void workingset_activation(struct page *page)
 	memcg = page_memcg_rcu(page);
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
-	lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+	lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
 	atomic_long_inc(&lruvec->inactive_age);
 out:
 	rcu_read_unlock();


More information about the Devel mailing list