[Devel] [PATCH rh7 5/8] mm/mem_cgroup_iter: Invalidate iterators only if needed

Konstantin Khorenko khorenko at virtuozzo.com
Sat Feb 20 12:24:11 MSK 2021


Previously we invalidated all iterators up to the memcg root,
but this is overkill: we can check if currently dying memcg is
stored in iterator's 'last_visited' and invalidate only those
unlucky iterators.

https://jira.sw.ru/browse/PSBM-123655

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
 mm/memcontrol.c | 48 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6cde4dbe3c7b..d60dc768f762 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -192,8 +192,8 @@ struct mem_cgroup_stat2_cpu {
 
 struct mem_cgroup_reclaim_iter {
 	/*
-	 * last scanned hierarchy member. Valid only if last_dead_count
-	 * matches memcg->dead_count of the hierarchy root group.
+	 * Last scanned hierarchy member.
+	 * If stored memcg is destroyed, the field is wiped.
 	 */
 	struct mem_cgroup *last_visited;
 	unsigned long last_dead_count;
@@ -1576,6 +1576,28 @@ static void mem_cgroup_iter_invalidate(struct mem_cgroup *root,
 	int zone, node, i;
 	unsigned seq;
 
+	/*
+	 * When a group in the hierarchy below root is destroyed,
+	 * the hierarchy iterator can no longer be trusted iif
+	 * iter->last_visited contains the cgroup being destroyed.
+	 * Check if we get this unlikely case and invalidate the iterator
+	 * if so.
+	 *
+	 * Note, only "break-ed" iterators can store iter->last_visited
+	 * == dead_memcg because normally 'last_visited' is assigned
+	 * under 'last_visited_lock' in mem_cgroup_iter_update() and
+	 * 'new_position' is under css_tryget() there (ref inc-ed in
+	 * __mem_cgroup_iter_next()) and thus css will not be offlined.
+	 *
+	 * mem_cgroup_iter_break() in its turn puts memcg's css but does
+	 * not wipe it from iter->last_visited.
+	 *
+	 * Q: Why dying memcg (dead_memcg) cannot get into
+	 *    iter->last_visited a bit later after we wipe it here?
+	 * A: Because up to the moment of the current function execution
+	 *    css_tryget() is guaranteed to fail on 'dead_memcg'.
+	 */
+
 	for_each_node(node) {
 		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 			mz = mem_cgroup_zoneinfo(root, node, zone);
@@ -1584,10 +1606,17 @@ static void mem_cgroup_iter_invalidate(struct mem_cgroup *root,
 				iter = &mz->reclaim_iter[i];
 				lock = &iter->last_visited_lock;
 
-				write_seqlock(lock);
-				pos = iter->last_visited;
-				iter->last_visited = NULL;
-				write_sequnlock(lock);
+				do {
+					seq = read_seqbegin(lock);
+					pos = READ_ONCE(iter->last_visited);
+				} while (read_seqretry(lock, seq));
+
+				if (pos == dead_memcg) {
+					write_seqlock(lock);
+					pos = iter->last_visited;
+					if (pos == dead_memcg)
+						iter->last_visited = NULL;
+					write_sequnlock(lock);
 				}
 			}
 		}
@@ -1648,10 +1677,9 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
 				   int sequence)
 {
 	/*
-	 * We store the sequence count from the time @last_visited was
-	 * loaded successfully instead of rereading it here so that we
-	 * don't lose destruction events in between.  We could have
-	 * raced with the destruction of @new_position after all.
+	 * The position saved in 'last_visited' is always valid.
+	 * If the stored corresponding cgroup is destroyed,
+	 * 'last_visited' is NULLed.
 	 */
 	write_seqlock(&iter->last_visited_lock);
 	iter->last_visited = new_position;
-- 
2.24.3



More information about the Devel mailing list