[Devel] [PATCH RHEL9 COMMIT] ms/mm/swap: convert add_to_swap_cache() to take a folio

Konstantin Khorenko khorenko at virtuozzo.com
Mon Feb 5 19:14:57 MSK 2024


The commit is pushed to "branch-rh9-5.14.0-362.8.1.vz9.35.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-362.8.1.vz9.35.9
------>
commit b669125e80b71ad280fb84fb0acbfe3d91025c69
Author: Matthew Wilcox (Oracle) <willy at infradead.org>
Date:   Thu Feb 1 14:33:17 2024 +0800

    ms/mm/swap: convert add_to_swap_cache() to take a folio
    
    With all callers using folios, we can convert add_to_swap_cache() to take
    a folio and use it throughout.
    
    Link: https://lkml.kernel.org/r/20220902194653.1739778-13-willy@infradead.org
    Signed-off-by: Matthew Wilcox (Oracle) <willy at infradead.org>
    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
    
    (cherry picked from commit a4c366f01f10073e0220656561b875627ff7cd90)
    https://virtuozzo.atlassian.net/browse/PSBM-153264
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    Feature: fix ms/mm
    
    ======
    Patchset description:
    mm/swap: fix swapcache page/folio inconsistency
    
    This ports the switching from page to folio in add_to_swap_cache().
    
    We saw multiple different hangs on mmap_lock, where the task holding the
    lock was livelocked spinning in this stack:
    
      +-> __x64_sys_ioctl
        +-> kvm_vcpu_ioctl
          +-> kvm_arch_vcpu_ioctl_run
            +-> vcpu_run
              +-> vcpu_enter_guest
                +-> kvm_mmu_page_fault
                  +-> kvm_tdp_page_fault
                    +-> kvm_faultin_pfn
                      +-> __kvm_faultin_pfn
                        +-> hva_to_pfn
                          +-> get_user_pages_unlocked
    
      +-> get_user_pages_unlocked
        +-> mmap_read_lock # 1
        +-> __get_user_pages_locked # 2
          +-> for-loop # taken once
            +-> __get_user_pages
              +-> retry-loop # constantly spinning
                +-> faultin_page # return 0 to trigger retry
                  +-> handle_mm_fault
                    +-> __handle_mm_fault
                      +-> handle_pte_fault
                        +-> do_swap_page
                          +-> lookup_swap_cache # returns non-NULL
                          +-> if (swapcache)
                            +-> if (!folio_test_swapcache || page_private(page) != entry.val)
                              +-> goto out_page
                                +-> return 0
    
    That can be due to an inconsistency in swapcache flag setting/reading,
    one can see that PageSwapCache reads the flag from folio, but
    SetPageSwapCache/ClearPageSwapCache instead affect the flag from page.
    
    After applying those patches SetPageSwapCache/ClearPageSwapCache become
    unused, thus all paths seek this flag from folio now. With it I don't
    see any hangs on mmap_lock anymore (on the same test setup).
    
    Matthew Wilcox (Oracle) (2):
      mm/swap: convert __read_swap_cache_async() to use a folio
      mm/swap: convert add_to_swap_cache() to take a folio
---
 mm/shmem.c      |  2 +-
 mm/swap.h       |  4 ++--
 mm/swap_state.c | 34 +++++++++++++++++-----------------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 50ccad78b315..fbfecb73e740 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	if (list_empty(&info->swaplist))
 		list_add(&info->swaplist, &shmem_swaplist);
 
-	if (add_to_swap_cache(&folio->page, swap,
+	if (add_to_swap_cache(folio, swap,
 			__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
 			NULL) == 0) {
 		spin_lock_irq(&info->lock);
diff --git a/mm/swap.h b/mm/swap.h
index f23941d6cc69..4c2b7f16783e 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -34,7 +34,7 @@ extern struct address_space *swapper_spaces[];
 void show_swap_cache_info(void);
 bool add_to_swap(struct folio *folio);
 void *get_shadow_from_swap_cache(swp_entry_t entry);
-int add_to_swap_cache(struct page *page, swp_entry_t entry,
+int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
 		      gfp_t gfp, void **shadowp);
 void __delete_from_swap_cache(struct folio *folio,
 			      swp_entry_t entry, void *shadow);
@@ -126,7 +126,7 @@ static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
 	return NULL;
 }
 
-static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
+static inline int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
 					gfp_t gfp_mask, void **shadowp)
 {
 	return -1;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3a2323aba6f9..9aed413b61e6 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -85,21 +85,21 @@ void *get_shadow_from_swap_cache(swp_entry_t entry)
  * add_to_swap_cache resembles filemap_add_folio on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-int add_to_swap_cache(struct page *page, swp_entry_t entry,
+int add_to_swap_cache(struct folio *folio, swp_entry_t entry,
 			gfp_t gfp, void **shadowp)
 {
 	struct address_space *address_space = swap_address_space(entry);
 	pgoff_t idx = swp_offset(entry);
-	XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
-	unsigned long i, nr = thp_nr_pages(page);
+	XA_STATE_ORDER(xas, &address_space->i_pages, idx, folio_order(folio));
+	unsigned long i, nr = folio_nr_pages(folio);
 	void *old;
 
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	VM_BUG_ON_PAGE(PageSwapCache(page), page);
-	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio);
+	VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
 
-	page_ref_add(page, nr);
-	SetPageSwapCache(page);
+	folio_ref_add(folio, nr);
+	folio_set_swapcache(folio);
 
 	do {
 		xas_lock_irq(&xas);
@@ -107,19 +107,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
 		if (xas_error(&xas))
 			goto unlock;
 		for (i = 0; i < nr; i++) {
-			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
+			VM_BUG_ON_FOLIO(xas.xa_index != idx + i, folio);
 			old = xas_load(&xas);
 			if (xa_is_value(old)) {
 				if (shadowp)
 					*shadowp = old;
 			}
-			set_page_private(page + i, entry.val + i);
-			xas_store(&xas, page);
+			set_page_private(folio_page(folio, i), entry.val + i);
+			xas_store(&xas, folio);
 			xas_next(&xas);
 		}
 		address_space->nrpages += nr;
-		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
-		__mod_lruvec_page_state(page, NR_SWAPCACHE, nr);
+		__node_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+		__lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr);
 unlock:
 		xas_unlock_irq(&xas);
 	} while (xas_nomem(&xas, gfp));
@@ -127,8 +127,8 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
 	if (!xas_error(&xas))
 		return 0;
 
-	ClearPageSwapCache(page);
-	page_ref_sub(page, nr);
+	folio_clear_swapcache(folio);
+	folio_ref_sub(folio, nr);
 	return xas_error(&xas);
 }
 
@@ -194,7 +194,7 @@ bool add_to_swap(struct folio *folio)
 	/*
 	 * Add it to the swap cache.
 	 */
-	err = add_to_swap_cache(&folio->page, entry,
+	err = add_to_swap_cache(folio, entry,
 			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
 	if (err)
 		/*
@@ -496,7 +496,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		goto fail_unlock;
 
 	/* May fail (-ENOMEM) if XArray node allocation failed. */
-	if (add_to_swap_cache(&folio->page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
+	if (add_to_swap_cache(folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
 		goto fail_unlock;
 
 	mem_cgroup_swapin_uncharge_swap(entry);


More information about the Devel mailing list