[Devel] [PATCH RHEL7 COMMIT] mm/memcontrol: memcg_move_account() get rid of lock_page dependency.

Konstantin Khorenko khorenko at virtuozzo.com
Thu Mar 5 12:16:10 MSK 2020


The commit is pushed to "branch-rh7-3.10.0-1062.12.1.vz7.131.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.12.1.vz7.131.6
------>
commit 1f7e9f93cfa7c2998ac40ca0b794c0a145a13735
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Wed Mar 4 15:56:08 2020 +0300

    mm/memcontrol: memcg_move_account() get rid of lock_page dependency.
    
    Instead of trylock'ing the page, use the lru_lock to prevent the
    racing against mem_cgroup_migrate(). So the memcg_move_account() doesn't
    depend on hung IO.
    
    The trylock seems to be needed to prevent mem_cgroup_migrate() to see
    disbalanced pc->mem_cgroup && pc->flags state. We could achieve the same
    by using lru_lock instead. memcg_move_account() only works with isolated
    from LRU pages, so it can race only with mem_cgroup_migrate() called
    with lrucare=true, otherwise the page is not on LRU so the memcg_move_account()
    can't see it.
    
    Also memcg_move_account() might race against try_get_mem_cgroup_from_page()
    when mem_cgroup_move_account() called for online cgroup. This happens when
    task moved between cgroups and memory.move_charge_at_immigrate set at
    certain value. Keep the trylock_page() to protect us from such race just
    in case.
    
    For offlined cgroup the race agains try_get_mem_cgroup_from_page()
    shouldn't be possible as it will bail out on failed css_tryget().
    
    https://jira.sw.ru/browse/PSBM-101639
    https://jira.sw.ru/browse/PSBM-101757
    https://jira.sw.ru/browse/PSBM-94117
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    v3: read is_offline status of source mem cgroup only once to avoid
    possible race.
---
 mm/memcontrol.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7d6f1b2c8b41b..4846e7a9bf63b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3961,6 +3961,7 @@ static int mem_cgroup_move_account(struct page *page,
 {
 	unsigned long flags;
 	int ret;
+	bool is_offline;
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3974,13 +3975,19 @@ static int mem_cgroup_move_account(struct page *page,
 	if (nr_pages > 1 && !PageTransHuge(page))
 		goto out;
 
-	/*
-	 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
-	 * of its source page while we change it: page migration takes
-	 * both pages off the LRU, but page cache replacement doesn't.
-	 */
-	if (!trylock_page(page))
-		goto out;
+	is_offline = READ_ONCE(from->is_offline);
+	if (!is_offline) {
+		/* Protect us from racing against try_get_mem_cgroup_from_page() */
+		if (!trylock_page(page))
+			goto out;
+	} else {
+		/*
+		 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+		 * of its source page while we change it: page migration takes
+		 * both pages off the LRU, but page cache replacement doesn't.
+		 */
+		spin_lock_irq(&page_zone(page)->lru_lock);
+	}
 
 	ret = -EINVAL;
 	if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
@@ -4013,7 +4020,10 @@ static int mem_cgroup_move_account(struct page *page,
 	memcg_check_events(from, page);
 	local_irq_restore(flags);
 out_unlock:
-	unlock_page(page);
+	if (!is_offline)
+		unlock_page(page);
+	else
+		spin_unlock_irq(&page_zone(page)->lru_lock);
 out:
 	return ret;
 }
@@ -7795,6 +7805,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 {
 	unsigned int nr_pages = 1;
 	struct page_cgroup *pc;
+	struct mem_cgroup *memcg;
 	int isolated;
 
 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
@@ -7828,17 +7839,18 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 	if (lrucare)
 		lock_page_lru(oldpage, &isolated);
 
+	memcg = READ_ONCE(pc->mem_cgroup);
 	pc->flags = 0;
 
 	if (lrucare)
 		unlock_page_lru(oldpage, isolated);
 
 	local_irq_disable();
-	mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages);
-	memcg_check_events(pc->mem_cgroup, oldpage);
+	mem_cgroup_charge_statistics(memcg, oldpage, -nr_pages);
+	memcg_check_events(memcg, oldpage);
 	local_irq_enable();
 
-	commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+	commit_charge(newpage, memcg, nr_pages, lrucare);
 }
 
 static int __init cgroup_memory(char *s)


More information about the Devel mailing list