[Devel] [PATCH RHEL9 COMMIT] oracle/mm: parallelize unmap_page_range() for some large VMAs

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jan 23 23:35:48 MSK 2025


The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.5
------>
commit d5b8d0336c60e7fdb4b39ff981974d383d298d54
Author: Anthony Yznaga <anthony.yznaga at oracle.com>
Date:   Wed Dec 7 14:29:34 2022 -0800

    oracle/mm: parallelize unmap_page_range() for some large VMAs
    
    The time to exec a new qemu for live update is increasing linearly with
    memory size and taking an excessive amount of time for very large VMs.
    This is due to the time it takes to unmap the VM memory from the exiting
    qemu process, and for legacy live update, also the time it takes to copy
    the pagetable entries for preserved VM memory.
    
    This patch uses padata to parallelize the work done by unmap_page_range().
    padata is only used when unmapping a single VMA as part of normal process
    exit where the VMA is backed by anonymous memory or by shmem.
    
    Performance results for the series are in the last patch.
    
    Orabug: 35054621
    Signed-off-by: Anthony Yznaga <anthony.yznaga at oracle.com>
    Reviewed-by: Daniel Jordan <daniel.m.jordan at oracle.com>
    
    https://virtuozzo.atlassian.net/browse/VSTOR-96305
    
    (cherry picked from Oracle commit 778309b5fd888b909a8adc50f7aa5f053e5d3f9e)
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    Feature: oracle/mm: MADV_DOEXEC madvise() flag
---
 mm/memory.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index ebd08a1f2c9a..872de67ca2d7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -78,6 +78,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
 #include <linux/ve.h>
+#include <linux/padata.h>
 
 #include <trace/events/kmem.h>
 
@@ -1620,6 +1621,69 @@ void unmap_page_range(struct mmu_gather *tlb,
 	tlb_end_vma(tlb, vma);
 }
 
+#ifdef CONFIG_PADATA
+
+struct unmap_page_range_args {
+	struct mmu_gather *tlb;
+	struct vm_area_struct *vma;
+	struct zap_details *details;
+	unsigned long start;
+	unsigned long end;
+};
+
+static int unmap_page_range_chunk(unsigned long addr, unsigned long end,
+				  void *arg)
+{
+	struct unmap_page_range_args *args = arg;
+	struct mmu_gather *tlb = args->tlb;
+	struct vm_area_struct *vma = args->vma;
+	struct zap_details *details = args->details;
+	struct mm_struct *mm = tlb->mm;
+	struct mmu_gather local_tlb;
+	bool use_local_gather = false;
+
+	/*
+	 * The mmu gather API is not designed to operate on a single
+	 * mmu_gather in parallel. Use a local mmu_gather when multi-
+	 * threaded and avoid the additional overhead when not.
+	 */
+	if (addr != args->start || end != args->end) {
+		tlb = &local_tlb;
+		use_local_gather = true;
+	}
+
+	if (use_local_gather)
+		tlb_gather_mmu_fullmm(tlb, mm);
+
+	unmap_page_range(tlb, vma, addr, end, details);
+
+	if (use_local_gather)
+		tlb_finish_mmu(tlb);
+
+	return 0;
+}
+
+static void unmap_page_range_mt(struct mmu_gather *tlb,
+			 struct vm_area_struct *vma,
+			 unsigned long addr, unsigned long end,
+			 struct zap_details *details)
+{
+	struct unmap_page_range_args args = { tlb, vma, details, addr, end };
+	struct padata_mt_job job = {
+		.thread_fn   = unmap_page_range_chunk,
+		.fn_arg      = &args,
+		.start       = addr,
+		.size        = end - addr,
+		.align       = PMD_SIZE,
+		.min_chunk   = max(1ul << 27, PMD_SIZE),
+		.max_threads = 16,
+	};
+
+	BUG_ON(addr >= end);
+	padata_do_multithreaded(&job);
+}
+
+#endif /* CONFIG_PADATA */
 
 static void unmap_single_vma(struct mmu_gather *tlb,
 		struct vm_area_struct *vma, unsigned long start_addr,
@@ -1660,6 +1724,14 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 				__unmap_hugepage_range_final(tlb, vma, start, end,
 							     NULL, zap_flags);
 			}
+#ifdef CONFIG_PADATA
+		/*
+		 * Only possibly use multiple threads to unmap when the
+		 * entire address space is being unmapped.
+		 */
+		} else if (tlb->fullmm && (vma_is_anonymous(vma) || vma_is_shmem(vma))) {
+			unmap_page_range_mt(tlb, vma, start, end, details);
+#endif
 		} else
 			unmap_page_range(tlb, vma, start, end, details);
 	}


More information about the Devel mailing list