[Devel] [PATCH RHEL9 COMMIT] ms/mm/swap: convert add_to_swap_cache() to take a folio
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Feb 5 19:14:57 MSK 2024
The commit is pushed to "branch-rh9-5.14.0-362.8.1.vz9.35.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-362.8.1.vz9.35.9
------>
commit b669125e80b71ad280fb84fb0acbfe3d91025c69
Author: Matthew Wilcox (Oracle) <willy at infradead.org>
Date: Thu Feb 1 14:33:17 2024 +0800
ms/mm/swap: convert add_to_swap_cache() to take a folio
With all callers using folios, we can convert add_to_swap_cache() to take
a folio and use it throughout.
Link: https://lkml.kernel.org/r/20220902194653.1739778-13-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy at infradead.org>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
(cherry picked from commit a4c366f01f10073e0220656561b875627ff7cd90)
https://virtuozzo.atlassian.net/browse/PSBM-153264
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Feature: fix ms/mm
======
Patchset description:
mm/swap: fix swapcache page/folio inconsistency
This ports the switching from page to folio in add_to_swap_cache().
We saw multiple different hangs on mmap_lock, where the task holding the
lock was livelocked spinning in this stack:
+-> __x64_sys_ioctl
+-> kvm_vcpu_ioctl
+-> kvm_arch_vcpu_ioctl_run
+-> vcpu_run
+-> vcpu_enter_guest
+-> kvm_mmu_page_fault
+-> kvm_tdp_page_fault
+-> kvm_faultin_pfn
+-> __kvm_faultin_pfn
+-> hva_to_pfn
+-> get_user_pages_unlocked
+-> get_user_pages_unlocked
+-> mmap_read_lock # 1
+-> __get_user_pages_locked # 2
+-> for-loop # taken once
+-> __get_user_pages
+-> retry-loop # constantly spinning
+-> faultin_page # return 0 to trigger retry
+-> handle_mm_fault
+-> __handle_mm_fault
+-> handle_pte_fault
+-> do_swap_page
+-> lookup_swap_cache # returns non-NULL
+-> if (swapcache)
+-> if (!folio_test_swapcache || page_private(page) != entry.val)
+-> goto out_page
+-> return 0
That can be due to an inconsistency in swapcache flag setting/reading,
one can see that PageSwapCache reads the flag from folio, but
SetPageSwapCache/ClearPageSwapCache instead affect the flag from page.
After applying those patches SetPageSwapCache/ClearPageSwapCache become
unused, thus all paths seek this flag from folio now. With it I don't
see any hangs on mmap_lock anymore (on the same test setup).
Matthew Wilcox (Oracle) (2):
mm/swap: convert __read_swap_cache_async() to use a folio
mm/swap: convert add_to_swap_cache() to take a folio
---
mm/shmem.c | 2 +-
mm/swap.h | 4 ++--
mm/swap_state.c | 34 +++++++++++++++++-----------------
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 50ccad78b315..fbfecb73e740 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (list_empty(&info->swaplist))
list_add(&info->swaplist, &shmem_swaplist);
- if (add_to_swap_cache(&folio->page, swap,
+ if (add_to_swap_cache(folio, swap,
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
NULL) == 0) {
spin_lock_irq(&info->lock);
diff --git a/mm/swap.h b/mm/swap.h
index f23941d6cc69..4c2b7f16783e 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -34,7 +34,7 @@ extern struct address_space *swapper_spaces[];
void show_swap_cache_info(void);
bool add_to_swap(struct folio *folio);
void *get_shadow_from_swap_cache(swp_entry_t entry);
-int add_to_swap_cache(struct page *page, swp_entry_t entry,
+int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
gfp_t gfp, void **shadowp);
void __delete_from_swap_cache(struct folio *folio,
swp_entry_t entry, void *shadow);
@@ -126,7 +126,7 @@ static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
return NULL;
}
-static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
+static inline int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
gfp_t gfp_mask, void **shadowp)
{
return -1;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3a2323aba6f9..9aed413b61e6 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -85,21 +85,21 @@ void *get_shadow_from_swap_cache(swp_entry_t entry)
* add_to_swap_cache resembles filemap_add_folio on swapper_space,
* but sets SwapCache flag and private instead of mapping and index.
*/
-int add_to_swap_cache(struct page *page, swp_entry_t entry,
+int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
gfp_t gfp, void **shadowp)
{
struct address_space *address_space = swap_address_space(entry);
pgoff_t idx = swp_offset(entry);
- XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
- unsigned long i, nr = thp_nr_pages(page);
+ XA_STATE_ORDER(xas, &address_space->i_pages, idx, folio_order(folio));
+ unsigned long i, nr = folio_nr_pages(folio);
void *old;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageSwapCache(page), page);
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio);
+ VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
- page_ref_add(page, nr);
- SetPageSwapCache(page);
+ folio_ref_add(folio, nr);
+ folio_set_swapcache(folio);
do {
xas_lock_irq(&xas);
@@ -107,19 +107,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
if (xas_error(&xas))
goto unlock;
for (i = 0; i < nr; i++) {
- VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
+ VM_BUG_ON_FOLIO(xas.xa_index != idx + i, folio);
old = xas_load(&xas);
if (xa_is_value(old)) {
if (shadowp)
*shadowp = old;
}
- set_page_private(page + i, entry.val + i);
- xas_store(&xas, page);
+ set_page_private(folio_page(folio, i), entry.val + i);
+ xas_store(&xas, folio);
xas_next(&xas);
}
address_space->nrpages += nr;
- __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
- __mod_lruvec_page_state(page, NR_SWAPCACHE, nr);
+ __node_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+ __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr);
unlock:
xas_unlock_irq(&xas);
} while (xas_nomem(&xas, gfp));
@@ -127,8 +127,8 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
if (!xas_error(&xas))
return 0;
- ClearPageSwapCache(page);
- page_ref_sub(page, nr);
+ folio_clear_swapcache(folio);
+ folio_ref_sub(folio, nr);
return xas_error(&xas);
}
@@ -194,7 +194,7 @@ bool add_to_swap(struct folio *folio)
/*
* Add it to the swap cache.
*/
- err = add_to_swap_cache(&folio->page, entry,
+ err = add_to_swap_cache(folio, entry,
__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
if (err)
/*
@@ -496,7 +496,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
goto fail_unlock;
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(&folio->page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
+ if (add_to_swap_cache(folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
goto fail_unlock;
mem_cgroup_swapin_uncharge_swap(entry);
More information about the Devel
mailing list