[Devel] [PATCH RHEL8 COMMIT] ms/mm: clean up and clarify lruvec lookup procedure
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Apr 2 16:02:53 MSK 2020
The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.4
------>
commit 1793cc28e1cfb62ffafcf83839af10705422b885
Author: Johannes Weiner <hannes at cmpxchg.org>
Date: Thu Apr 2 16:02:52 2020 +0300
ms/mm: clean up and clarify lruvec lookup procedure
There is a per-memcg lruvec and a NUMA node lruvec. Which one is being
used is somewhat confusing right now, and it's easy to make mistakes -
especially when it comes to global reclaim.
How it works: when memory cgroups are enabled, we always use the
root_mem_cgroup's per-node lruvecs. When memory cgroups are not compiled
in or disabled at runtime, we use pgdat->lruvec.
Document that in a comment.
Due to the way the reclaim code is generalized, all lookups use the
mem_cgroup_lruvec() helper function, and nobody should have to find the
right lruvec manually right now. But to avoid future mistakes, rename the
pgdat->lruvec member to pgdat->__lruvec and delete the convenience wrapper
that suggests it's a commonly accessed member.
While in this area, swap the mem_cgroup_lruvec() argument order. The name
suggests a memcg operation, yet it takes a pgdat first and a memcg second.
I have to double take every time I call this. Fix that.
Link: http://lkml.kernel.org/r/20191022144803.302233-3-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
Acked-by: Michal Hocko <mhocko at suse.com>
Reviewed-by: Shakeel Butt <shakeelb at google.com>
Cc: Roman Gushchin <guro at fb.com>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
(cherry picked from commit 867e5e1de14b2b2bde324cdfeec3f3f83eb21424)
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/memcontrol.h | 24 ++++++++++++------------
include/linux/mmzone.h | 15 ++++++++-------
mm/memcontrol.c | 6 +++---
mm/page_alloc.c | 2 +-
mm/vmscan.c | 6 +++---
mm/workingset.c | 6 +++---
6 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 30e737be4cf7..c359e48cf266 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -356,22 +356,22 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
}
/**
- * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
+ * mem_cgroup_lruvec - get the lru list vector for a memcg & node
* @node: node of the wanted lruvec
* @memcg: memcg of the wanted lruvec
*
- * Returns the lru list vector holding pages for a given @node or a given
- * @memcg and @zone. This can be the node lruvec, if the memory controller
- * is disabled.
+ * Returns the lru list vector holding pages for a given @memcg &
+ * @node combination. This can be the node lruvec, if the memory
+ * controller is disabled.
*/
-static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
- struct mem_cgroup *memcg)
+static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
+ struct pglist_data *pgdat)
{
struct mem_cgroup_per_node *mz;
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
- lruvec = node_lruvec(pgdat);
+ lruvec = &pgdat->__lruvec;
goto out;
}
@@ -701,7 +701,7 @@ static inline void __mod_lruvec_page_state(struct page *page,
return;
}
- lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
+ lruvec = mem_cgroup_lruvec(page->mem_cgroup, pgdat);
__mod_lruvec_state(lruvec, idx, val);
}
@@ -879,16 +879,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
{
}
-static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
- struct mem_cgroup *memcg)
+static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
+ struct pglist_data *pgdat)
{
- return node_lruvec(pgdat);
+ return &pgdat->__lruvec;
}
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
struct pglist_data *pgdat)
{
- return &pgdat->lruvec;
+ return &pgdat->__lruvec;
}
static inline bool mm_match_cgroup(struct mm_struct *mm,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 01623f17d2a5..2d81ecb3f852 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -719,7 +719,13 @@ typedef struct pglist_data {
#endif
/* Fields commonly accessed by the page reclaim scanner */
- struct lruvec lruvec;
+
+ /*
+ * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
+ *
+ * Use mem_cgroup_lruvec() to look up lruvecs.
+ */
+ struct lruvec __lruvec;
unsigned long flags;
@@ -749,11 +755,6 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone)
return &zone->zone_pgdat->lru_lock;
}
-static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
-{
- return &pgdat->lruvec;
-}
-
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
return pgdat->node_start_pfn + pgdat->node_spanned_pages;
@@ -810,7 +811,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
#ifdef CONFIG_MEMCG
return lruvec->pgdat;
#else
- return container_of(lruvec, struct pglist_data, lruvec);
+ return container_of(lruvec, struct pglist_data, __lruvec);
#endif
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 47304c1161bc..6909824a0ba1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -587,7 +587,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid, unsigned int lru_mask)
{
- struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
unsigned long nr = 0;
enum lru_list lru;
@@ -945,7 +945,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
- lruvec = &pgdat->lruvec;
+ lruvec = &pgdat->__lruvec;
goto out;
}
@@ -3464,7 +3464,7 @@ static int memcg_numa_migrate_pages(struct mem_cgroup *memcg,
if (node_isset(nid, *target_nodes))
continue;
- lruvec = mem_cgroup_lruvec(NODE_DATA(nid), mi);
+ lruvec = mem_cgroup_lruvec(mi, NODE_DATA(nid));
/*
* For the sake of simplicity, do not attempt to migrate
* unevictable pages. It should be fine as long as there
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4eb4ced55a6a..aadcc166bd0f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6283,7 +6283,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
#endif
pgdat_page_ext_init(pgdat);
spin_lock_init(&pgdat->lru_lock);
- lruvec_init(node_lruvec(pgdat));
+ lruvec_init(&pgdat->__lruvec);
pgdat->per_cpu_nodestats = &boot_nodestats;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6204c90703bf..97f2d297d4a1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2296,7 +2296,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
struct scan_control *sc)
{
- struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
unsigned long nr[NR_LRU_LISTS];
unsigned long targets[NR_LRU_LISTS];
unsigned long nr_to_scan;
@@ -2807,7 +2807,7 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
unsigned long refaults;
struct lruvec *lruvec;
- lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
lruvec->refaults = refaults;
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
@@ -3156,7 +3156,7 @@ static void age_active_anon(struct pglist_data *pgdat,
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
diff --git a/mm/workingset.c b/mm/workingset.c
index 1f9c53c900de..931dc2343073 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -234,7 +234,7 @@ void *workingset_eviction(struct page *page)
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
- lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
eviction = atomic_long_inc_return(&lruvec->inactive_age);
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
}
@@ -281,7 +281,7 @@ void workingset_refault(struct page *page, void *shadow)
memcg = mem_cgroup_from_id(memcgid);
if (!mem_cgroup_disabled() && !memcg)
goto out;
- lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
refault = atomic_long_read(&lruvec->inactive_age);
active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
@@ -346,7 +346,7 @@ void workingset_activation(struct page *page)
memcg = page_memcg_rcu(page);
if (!mem_cgroup_disabled() && !memcg)
goto out;
- lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+ lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
atomic_long_inc(&lruvec->inactive_age);
out:
rcu_read_unlock();
More information about the Devel
mailing list