[Devel] [PATCH RHEL9 COMMIT] ms/mm/swap: convert __read_swap_cache_async() to use a folio
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Feb 5 19:14:54 MSK 2024
The commit is pushed to "branch-rh9-5.14.0-362.8.1.vz9.35.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-362.8.1.vz9.35.9
------>
commit 3bb87414bb57239d5183c895f60b8d676a41a3db
Author: Matthew Wilcox (Oracle) <willy at infradead.org>
Date: Thu Feb 1 14:33:16 2024 +0800
ms/mm/swap: convert __read_swap_cache_async() to use a folio
Remove a few hidden (and one visible) calls to compound_head().
Link: https://lkml.kernel.org/r/20220902194653.1739778-12-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy at infradead.org>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
(cherry picked from commit a0d3374b070776e985bbd7b165b178fa688bf37a)
Change: Also update vz specific hunk SetPageActive->folio_set_active.
https://virtuozzo.atlassian.net/browse/PSBM-153264
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Feature: fix ms/mm
======
Patchset description:
mm/swap: fix swapcache page/folio inconsistency
This ports the switching from page to folio in add_to_swap_cache().
We saw multiple different hangs on mmap_lock, where the task holding the
lock was livelocked spinning in this stack:
+-> __x64_sys_ioctl
+-> kvm_vcpu_ioctl
+-> kvm_arch_vcpu_ioctl_run
+-> vcpu_run
+-> vcpu_enter_guest
+-> kvm_mmu_page_fault
+-> kvm_tdp_page_fault
+-> kvm_faultin_pfn
+-> __kvm_faultin_pfn
+-> hva_to_pfn
+-> get_user_pages_unlocked
+-> get_user_pages_unlocked
+-> mmap_read_lock # 1
+-> __get_user_pages_locked # 2
+-> for-loop # taken once
+-> __get_user_pages
+-> retry-loop # constantly spinning
+-> faultin_page # return 0 to trigger retry
+-> handle_mm_fault
+-> __handle_mm_fault
+-> handle_pte_fault
+-> do_swap_page
+-> lookup_swap_cache # returns non-NULL
+-> if (swapcache)
+-> if (!folio_test_swapcache || page_private(page) != entry.val)
+-> goto out_page
+-> return 0
That can be due to an inconsistency in swapcache flag setting/reading,
one can see that PageSwapCache reads the flag from folio, but
SetPageSwapCache/ClearPageSwapCache instead affect the flag from page.
After applying those patches SetPageSwapCache/ClearPageSwapCache become
unused, thus all paths seek this flag from folio now. With it I don't
see any hangs on mmap_lock anymore (on the same test setup).
Matthew Wilcox (Oracle) (2):
mm/swap: convert __read_swap_cache_async() to use a folio
mm/swap: convert add_to_swap_cache() to take a folio
---
mm/swap_state.c | 39 ++++++++++++++++++++-------------------
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b16dc348bbd2..3a2323aba6f9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -423,7 +423,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
bool *new_page_allocated, bool activate)
{
struct swap_info_struct *si;
- struct page *page;
+ struct folio *folio;
void *shadow = NULL;
*new_page_allocated = false;
@@ -438,11 +438,11 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
si = get_swap_device(entry);
if (!si)
return NULL;
- page = find_get_page(swap_address_space(entry),
- swp_offset(entry));
+ folio = filemap_get_folio(swap_address_space(entry),
+ swp_offset(entry));
put_swap_device(si);
- if (page)
- return page;
+ if (folio)
+ return folio_file_page(folio, swp_offset(entry));
/*
* Just skip read ahead for unused swap slot.
@@ -460,8 +460,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
* cause any racers to loop around until we add it to cache.
*/
- page = alloc_page_vma(gfp_mask, vma, addr);
- if (!page)
+ folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false);
+ if (!folio)
return NULL;
/*
@@ -471,7 +471,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
if (!err)
break;
- put_page(page);
+ folio_put(folio);
if (err != -EEXIST)
return NULL;
@@ -489,32 +489,33 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* The swap entry is ours to swap in. Prepare the new page.
*/
- __SetPageLocked(page);
- __SetPageSwapBacked(page);
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
- if (mem_cgroup_swapin_charge_page(page, NULL, gfp_mask, entry))
+ if (mem_cgroup_swapin_charge_page(&folio->page, NULL, gfp_mask, entry))
goto fail_unlock;
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
+ if (add_to_swap_cache(&folio->page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
goto fail_unlock;
mem_cgroup_swapin_uncharge_swap(entry);
if (shadow)
- workingset_refault(page_folio(page), shadow);
+ workingset_refault(folio, shadow);
/* Caller will initiate read into locked page */
if (activate)
- SetPageActive(page);
- lru_cache_add(page);
+ folio_set_active(folio);
+ /* Caller will initiate read into locked folio */
+ folio_add_lru(folio);
*new_page_allocated = true;
- return page;
+ return &folio->page;
fail_unlock:
- put_swap_page(page, entry);
- unlock_page(page);
- put_page(page);
+ put_swap_page(&folio->page, entry);
+ folio_unlock(folio);
+ folio_put(folio);
return NULL;
}
More information about the Devel
mailing list