[Devel] [PATCH RHEL9 COMMIT] oracle/padata: return first error code from a multithreaded job

Thu Jan 23 23:35:48 MSK 2025

The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.5
------>
commit ea619c63fde66e419a32fc4b40b3d22dd18ec1d8
Author: Daniel Jordan <daniel.m.jordan at oracle.com>
Date:   Tue May 26 15:38:15 2020 -0400

    oracle/padata: return first error code from a multithreaded job
    
    Return the first error code encountered from a multithreaded job.
    
    Threads in a job can fail for different reasons, which may need tracking
    in the future, but returning the first will do for VFIO page pinning
    because the kernel unwinds the same way no matter the error and the one
    current user of the optimization, qemu, only uses the code to decide
    whether to retry in one case and otherwise just logs error messages.
    
    Orabug: 31771399
    Signed-off-by: Daniel Jordan <daniel.m.jordan at oracle.com>
    Reviewed-by: Khalid Aziz <khalid.aziz at oracle.com>
    Reviewed-by: George Kennedy <george.kennedy at oracle.com>
    
    https://virtuozzo.atlassian.net/browse/VSTOR-96305
    
    (cherry picked from Oracle commit 16c2ea3e7c0201d5a8b111c89bdaf716970572f4)
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    Feature: oracle/mm: MADV_DOEXEC madvise() flag
---
 include/linux/padata.h |  4 ++--
 kernel/padata.c        | 25 ++++++++++++++++++-------
 mm/page_alloc.c        |  4 +++-
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/include/linux/padata.h b/include/linux/padata.h
index e542b2f01c64..3da4bc95f1f9 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -139,7 +139,7 @@ struct padata_shell {
  *               depending on task size and minimum chunk size.
  */
 struct padata_mt_job {
-	void (*thread_fn)(unsigned long start, unsigned long end, void *arg);
+	int (*thread_fn)(unsigned long start, unsigned long end, void *arg);
 	void			*fn_arg;
 	unsigned long		start;
 	unsigned long		size;
@@ -189,7 +189,7 @@ extern void padata_free_shell(struct padata_shell *ps);
 extern int padata_do_parallel(struct padata_shell *ps,
 			      struct padata_priv *padata, int *cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
-extern void padata_do_multithreaded(struct padata_mt_job *job);
+extern int padata_do_multithreaded(struct padata_mt_job *job);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
 #endif
diff --git a/kernel/padata.c b/kernel/padata.c
index 2b6303604a7a..3d5a35fc0939 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -54,6 +54,7 @@ struct padata_mt_job_state {
 	struct padata_mt_job	*job;
 	int			nworks;
 	int			nworks_fini;
+	int			error; /* first error from thread_fn */
 	unsigned long		chunk_size;
 };
 
@@ -451,8 +452,9 @@ static void padata_mt_helper(struct work_struct *w)
 
 	spin_lock(&ps->lock);
 
-	while (job->size > 0) {
+	while (job->size > 0 && ps->error == 0) {
 		unsigned long start, size, end;
+		int ret;
 
 		start = job->start;
 		/* So end is chunk size aligned if enough work remains. */
@@ -464,8 +466,12 @@ static void padata_mt_helper(struct work_struct *w)
 		job->size -= size;
 
 		spin_unlock(&ps->lock);
-		job->thread_fn(start, end, job->fn_arg);
+		ret = job->thread_fn(start, end, job->fn_arg);
 		spin_lock(&ps->lock);
+
+		/* Save first error code only. */
+		if (ps->error == 0)
+			ps->error = ret;
 	}
 
 	++ps->nworks_fini;
@@ -481,18 +487,20 @@ static void padata_mt_helper(struct work_struct *w)
  * @job: Description of the job.
  *
  * See the definition of struct padata_mt_job for more details.
+ *
+ * Return: 0 for success or a client-specific nonzero error code.
  */
-void padata_do_multithreaded(struct padata_mt_job *job)
+int padata_do_multithreaded(struct padata_mt_job *job)
 {
 	/* In case threads finish at different times. */
 	static const unsigned long load_balance_factor = 4;
 	struct padata_work my_work, *pw;
 	struct padata_mt_job_state ps;
 	LIST_HEAD(works);
-	int nworks;
+	int nworks, ret;
 
 	if (job->size == 0)
-		return;
+		return 0;
 
 	/* Ensure at least one thread when size < min_chunk. */
 	nworks = max(job->size / job->min_chunk, 1ul);
@@ -500,8 +508,9 @@ void padata_do_multithreaded(struct padata_mt_job *job)
 
 	if (nworks == 1) {
 		/* Single thread, no coordination needed, cut to the chase. */
-		job->thread_fn(job->start, job->start + job->size, job->fn_arg);
-		return;
+		ret = job->thread_fn(job->start, job->start + job->size,
+				     job->fn_arg);
+		return ret;
 	}
 
 	spin_lock_init(&ps.lock);
@@ -509,6 +518,7 @@ void padata_do_multithreaded(struct padata_mt_job *job)
 	ps.job	       = job;
 	ps.nworks      = padata_work_alloc_mt(nworks, &ps, &works);
 	ps.nworks_fini = 0;
+	ps.error       = 0;
 
 	/*
 	 * Chunk size is the amount of work a helper does per call to the
@@ -532,6 +542,7 @@ void padata_do_multithreaded(struct padata_mt_job *job)
 
 	destroy_work_on_stack(&my_work.pw_work);
 	padata_works_free(&works);
+	return ps.error;
 }
 
 static void __padata_list_init(struct padata_list *pd_list)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b64a368ab3de..0b8c5556f460 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2135,7 +2135,7 @@ deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
 	return nr_pages;
 }
 
-static void __init
+static int __init
 deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
 			   void *arg)
 {
@@ -2153,6 +2153,8 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
 		deferred_init_maxorder(&i, zone, &spfn, &epfn);
 		cond_resched();
 	}
+
+	return 0;
 }
 
 /* An arch may override for more concurrency. */