[Devel] [PATCH RHEL9 COMMIT] ms/mm/swap: convert __read_swap_cache_async() to use a folio

Konstantin Khorenko khorenko at virtuozzo.com
Mon Feb 5 19:21:19 MSK 2024


Please, disregard this,
those patches will be applied to vz9.35.x branch only.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 05.02.2024 17:13, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh9-5.14.0-284.25.1.vz9.30.19
> ------>
> commit 6e85be54c23a1232da56e44bf5a43ded75fb1310
> Author: Matthew Wilcox (Oracle) <willy at infradead.org>
> Date:   Thu Feb 1 14:33:16 2024 +0800
> 
>      ms/mm/swap: convert __read_swap_cache_async() to use a folio
>      
>      Remove a few hidden (and one visible) calls to compound_head().
>      
>      Link: https://lkml.kernel.org/r/20220902194653.1739778-12-willy@infradead.org
>      Signed-off-by: Matthew Wilcox (Oracle) <willy at infradead.org>
>      Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
>      
>      (cherry picked from commit a0d3374b070776e985bbd7b165b178fa688bf37a)
>      Change: Also update vz specific hunk SetPageActive->folio_set_active.
>      
>      https://virtuozzo.atlassian.net/browse/PSBM-153264
>      Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
>      
>      Feature: fix ms/mm
>      
>      ======
>      Patchset description:
>      mm/swap: fix swapcache page/folio inconsistency
>      
>      This ports the switching from page to folio in add_to_swap_cache().
>      
>      We saw multiple different hangs on mmap_lock, where the task holding the
>      lock was livelocked spinning in this stack:
>      
>        +-> __x64_sys_ioctl
>          +-> kvm_vcpu_ioctl
>            +-> kvm_arch_vcpu_ioctl_run
>              +-> vcpu_run
>                +-> vcpu_enter_guest
>                  +-> kvm_mmu_page_fault
>                    +-> kvm_tdp_page_fault
>                      +-> kvm_faultin_pfn
>                        +-> __kvm_faultin_pfn
>                          +-> hva_to_pfn
>                            +-> get_user_pages_unlocked
>      
>        +-> get_user_pages_unlocked
>          +-> mmap_read_lock # 1
>          +-> __get_user_pages_locked # 2
>            +-> for-loop # taken once
>              +-> __get_user_pages
>                +-> retry-loop # constantly spinning
>                  +-> faultin_page # return 0 to trigger retry
>                    +-> handle_mm_fault
>                      +-> __handle_mm_fault
>                        +-> handle_pte_fault
>                          +-> do_swap_page
>                            +-> lookup_swap_cache # returns non-NULL
>                            +-> if (swapcache)
>                              +-> if (!folio_test_swapcache || page_private(page) != entry.val)
>                                +-> goto out_page
>                                  +-> return 0
>      
>      That can be due to an inconsistency in swapcache flag setting/reading,
>      one can see that PageSwapCache reads the flag from folio, but
>      SetPageSwapCache/ClearPageSwapCache instead affect the flag from page.
>      
>      After applying those patches SetPageSwapCache/ClearPageSwapCache become
>      unused, thus all paths seek this flag from folio now. With it I don't
>      see any hangs on mmap_lock anymore (on the same test setup).
>      
>      Matthew Wilcox (Oracle) (2):
>        mm/swap: convert __read_swap_cache_async() to use a folio
>        mm/swap: convert add_to_swap_cache() to take a folio
> ---
>   mm/swap_state.c | 39 ++++++++++++++++++++-------------------
>   1 file changed, 20 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index a55c42e2cbc4..32395ef698d3 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -427,7 +427,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   			bool *new_page_allocated, bool activate)
>   {
>   	struct swap_info_struct *si;
> -	struct page *page;
> +	struct folio *folio;
>   	void *shadow = NULL;
>   
>   	*new_page_allocated = false;
> @@ -442,11 +442,11 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   		si = get_swap_device(entry);
>   		if (!si)
>   			return NULL;
> -		page = find_get_page(swap_address_space(entry),
> -				     swp_offset(entry));
> +		folio = filemap_get_folio(swap_address_space(entry),
> +						swp_offset(entry));
>   		put_swap_device(si);
> -		if (page)
> -			return page;
> +		if (folio)
> +			return folio_file_page(folio, swp_offset(entry));
>   
>   		/*
>   		 * Just skip read ahead for unused swap slot.
> @@ -464,8 +464,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   		 * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
>   		 * cause any racers to loop around until we add it to cache.
>   		 */
> -		page = alloc_page_vma(gfp_mask, vma, addr);
> -		if (!page)
> +		folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false);
> +		if (!folio)
>   			return NULL;
>   
>   		/*
> @@ -475,7 +475,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   		if (!err)
>   			break;
>   
> -		put_page(page);
> +		folio_put(folio);
>   		if (err != -EEXIST)
>   			return NULL;
>   
> @@ -493,32 +493,33 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   	 * The swap entry is ours to swap in. Prepare the new page.
>   	 */
>   
> -	__SetPageLocked(page);
> -	__SetPageSwapBacked(page);
> +	__folio_set_locked(folio);
> +	__folio_set_swapbacked(folio);
>   
> -	if (mem_cgroup_swapin_charge_page(page, NULL, gfp_mask, entry))
> +	if (mem_cgroup_swapin_charge_page(&folio->page, NULL, gfp_mask, entry))
>   		goto fail_unlock;
>   
>   	/* May fail (-ENOMEM) if XArray node allocation failed. */
> -	if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
> +	if (add_to_swap_cache(&folio->page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
>   		goto fail_unlock;
>   
>   	mem_cgroup_swapin_uncharge_swap(entry);
>   
>   	if (shadow)
> -		workingset_refault(page_folio(page), shadow);
> +		workingset_refault(folio, shadow);
>   
>   	/* Caller will initiate read into locked page */
>   	if (activate)
> -		SetPageActive(page);
> -	lru_cache_add(page);
> +		folio_set_active(folio);
> +	/* Caller will initiate read into locked folio */
> +	folio_add_lru(folio);
>   	*new_page_allocated = true;
> -	return page;
> +	return &folio->page;
>   
>   fail_unlock:
> -	put_swap_page(page, entry);
> -	unlock_page(page);
> -	put_page(page);
> +	put_swap_page(&folio->page, entry);
> +	folio_unlock(folio);
> +	folio_put(folio);
>   	return NULL;
>   }
>   


More information about the Devel mailing list