[Devel] [PATCH RHEL9 COMMIT] fs/fuse kio: Refactor pcs_mr to avoid large contiguous memory allocation

Konstantin Khorenko khorenko at virtuozzo.com
Mon Aug 4 11:39:17 MSK 2025


The commit is pushed to "branch-rh9-5.14.0-427.77.1.vz9.86.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.77.1.vz9.86.3
------>
commit 03f5cf5a326c677d2b1dd9b8e572286c580bbdec
Author: Liu Kui <kui.liu at virtuozzo.com>
Date:   Fri Aug 1 16:54:30 2025 +0800

    fs/fuse kio: Refactor pcs_mr to avoid large contiguous memory allocation
    
    Fix page allocation failure in kcalloc by converting the large 1-dimensional
    array into a 2-dimensional array of order-0 (4KB) pages. This eliminates the
    need for non-order-0 allocations, improving allocation reliability under memory
    pressure. The 2D array ensures virtual contiguity for the caller maintaining
    functional equivalence.
    
    https://virtuozzo.atlassian.net/browse/VSTOR-112413
    
    Signed-off-by: Liu Kui <kui.liu at virtuozzo.com>
    Acked-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
    
    Feature: fuse: kRPC - single RPC for kernel and userspace
---
 fs/fuse/kio/pcs/pcs_mr.c | 55 ++++++++++++++++++++++++++++++------------------
 fs/fuse/kio/pcs/pcs_mr.h | 21 ++++++++++++------
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_mr.c b/fs/fuse/kio/pcs/pcs_mr.c
index cbd3b440dd1b8..8e1dbc1121cec 100644
--- a/fs/fuse/kio/pcs/pcs_mr.c
+++ b/fs/fuse/kio/pcs/pcs_mr.c
@@ -13,11 +13,18 @@
 void pcs_umem_release(struct pcs_umem *umem)
 {
 	struct mm_struct *mm_s = umem->mm;
+	int i, npages = umem->npages;
 
-	unpin_user_pages(umem->pages, umem->npages);
+	for (i = 0; npages; i++) {
+		int to_free = min_t(int, PCS_PAGES_PER_CHUNK, npages);
+
+		unpin_user_pages(umem->page_chunk[i].pages, to_free);
+		kfree(umem->page_chunk[i].pages);
+		npages -= to_free;
+	}
 	atomic64_sub(umem->npages, &mm_s->pinned_vm);
 	mmdrop(mm_s);
-	kfree(umem->pages);
+	kfree(umem->page_chunk);
 	kfree(umem);
 }
 
@@ -27,14 +34,13 @@ void pcs_umem_release(struct pcs_umem *umem)
 struct pcs_umem *pcs_umem_get(u64 start, u64 len)
 {
 	struct pcs_umem *umem = NULL;
-	struct page **pages;
-	int npages;
-	u64 fp_va;
 	struct mm_struct *mm_s;
-	int got, ret;
+	u64 fp_va;
+	int npages, nchunks, i, ret;
 
 	fp_va = start & PAGE_MASK;
 	npages = PAGE_ALIGN(start + len - fp_va) >> PAGE_SHIFT;
+	nchunks = (npages >> PCS_PAGE_CHUNK_SHIFT) + 1;
 
 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 	if (!umem)
@@ -48,25 +54,34 @@ struct pcs_umem *pcs_umem_get(u64 start, u64 len)
 	mmap_read_lock(mm_s);
 
 	umem->fp_addr = fp_va;
-	umem->pages = kcalloc(npages, sizeof(struct page *), GFP_KERNEL);
-	if (!umem->pages) {
+	umem->page_chunk = kcalloc(nchunks, sizeof(struct pcs_page_chunk *), GFP_KERNEL);
+	if (!umem->page_chunk) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
 
-	got = 0;
-	while (npages) {
-		pages = &umem->pages[got];
-		ret = pin_user_pages(fp_va, npages, FOLL_WRITE | FOLL_LONGTERM, pages, NULL);
-		if (ret < 0)
-			goto out_err;
+	for (i = 0; npages; i++) {
+		int n = min_t(int, npages, PCS_PAGES_PER_CHUNK);
+		struct page **pages = kcalloc(n, sizeof(struct page *), GFP_KERNEL);
 
-		WARN_ON(ret == 0);
-		umem->npages += ret;
-		atomic64_add(ret, &mm_s->pinned_vm);
-		fp_va += ret * PAGE_SIZE;
-		npages -= ret;
-		got += ret;
+		if (!pages) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+		umem->page_chunk[i].pages = pages;
+
+		while (n) {
+			ret = pin_user_pages(fp_va, n, FOLL_WRITE | FOLL_LONGTERM, pages, NULL);
+			if (ret < 0)
+				goto out_err;
+
+			atomic64_add(ret, &mm_s->pinned_vm);
+			umem->npages += ret;
+			fp_va += ret * PAGE_SIZE;
+			pages += ret;
+			n -= ret;
+			npages -= ret;
+		}
 	}
 	mmap_read_unlock(mm_s);
 
diff --git a/fs/fuse/kio/pcs/pcs_mr.h b/fs/fuse/kio/pcs/pcs_mr.h
index dae9931d9967e..64f237f57deca 100644
--- a/fs/fuse/kio/pcs/pcs_mr.h
+++ b/fs/fuse/kio/pcs/pcs_mr.h
@@ -11,6 +11,8 @@
 struct pcs_umem;
 
 #define PCS_MAX_MR		0x10000
+#define PCS_PAGE_CHUNK_SHIFT 9
+#define PCS_PAGES_PER_CHUNK (1 << PCS_PAGE_CHUNK_SHIFT)
 
 struct pcs_mr_set {
 	struct xarray	mr_xa;	/* array of registered MRs*/
@@ -18,10 +20,15 @@ struct pcs_mr_set {
 	atomic_t	mr_num;		/* number of registered MRs*/
 };
 
+struct pcs_page_chunk
+{
+	struct page **pages; /* array of pinned pages */
+};
+
 struct pcs_umem {
-	u64	fp_addr;	/* First page base address */
-	int	npages;		/* number of pinned pages */
-	struct page	**pages;	/* array of pinned pages */
+	struct pcs_page_chunk *page_chunk;
+	int npages; /* number of pinned pages */
+	u64 fp_addr; /* First page base address */
 	struct mm_struct *mm;	/* mm the memory belongs to */
 };
 
@@ -40,10 +47,12 @@ struct pcs_mr {
  */
 static inline struct page *pcs_umem_page(struct pcs_umem *umem, u64 addr)
 {
-	unsigned int idx = (addr - umem->fp_addr) >> PAGE_SHIFT;
+	unsigned int page_idx = (addr - umem->fp_addr) >> PAGE_SHIFT;
+	unsigned int chunk_idx = page_idx >> PCS_PAGE_CHUNK_SHIFT;
+	unsigned int page_in_chunk = page_idx & (PCS_PAGES_PER_CHUNK - 1);
 
-	if (likely(idx < umem->npages))
-		return umem->pages[idx];
+	if (likely(page_idx < umem->npages))
+		return umem->page_chunk[chunk_idx].pages[page_in_chunk];
 	return NULL;
 }
 


More information about the Devel mailing list