[Devel] [PATCH vz9 v1 40/63] dm-ploop: process pios via runners

Alexander Atanasov alexander.atanasov at virtuozzo.com
Fri Jan 24 18:36:14 MSK 2025


Process pios in runner threads while preserving order.

Metadata writeback requries all prios pios to be processed,
since they can generate updates, so we have to wait before
processing writeback. Fsync is yet sequential too.
Both can be improved in a next iterration.

https://virtuozzo.atlassian.net/browse/VSTOR-91821
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
 drivers/md/dm-ploop-map.c | 138 ++++++++++++++++----------------------
 1 file changed, 59 insertions(+), 79 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 9f19e0cb9bf7..037d7806b179 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -1537,17 +1537,6 @@ static void ploop_process_one_delta_cow(struct ploop *ploop, struct pio *aux_pio
 	}
 }
 
-static void ploop_process_delta_cow(struct ploop *ploop,
-				    struct llist_node *cow_llist)
-{
-	struct llist_node *pos, *t;
-	struct pio *aux_pio;
-
-	llist_for_each_safe(pos, t, cow_llist) {
-		aux_pio = list_entry((struct list_head *)pos, typeof(*aux_pio), list);
-		ploop_process_one_delta_cow(ploop, aux_pio);
-	}
-}
 /*
  * This allocates a new clu (if clu wb is not pending yet),
  * or tries to attach a bio to a planned page index wb.
@@ -1705,7 +1694,8 @@ void ploop_index_wb_submit(struct ploop *ploop, struct ploop_index_wb *piwb)
 	pio->endio_cb = ploop_bat_write_complete;
 	pio->endio_cb_data = piwb;
 
-	ploop_submit_rw_mapped(ploop, pio);
+	pio->queue_list_id = PLOOP_LIST_WRITEBACK;
+	ploop_runners_add_work(ploop, pio);
 }
 
 static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
@@ -1796,20 +1786,6 @@ static void ploop_prepare_embedded_pios(struct ploop *ploop,
 	}
 }
 
-static void ploop_process_deferred_pios(struct ploop *ploop,
-					struct llist_head *pios)
-{
-	struct pio *pio;
-
-	struct llist_node *pos, *t;
-
-	llist_for_each_safe(pos, t, pios->first) {
-		pio = list_entry((struct list_head *)pos, typeof(*pio), list);
-		INIT_LIST_HEAD(&pio->list); /* until type is changed */
-		ploop_process_one_deferred_bio(ploop, pio);
-	}
-}
-
 static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
 {
 	bool bat_update_prepared = false;
@@ -1869,39 +1845,12 @@ static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
 	ploop_pio_endio(pio);
 }
 
-static void ploop_process_discard_pios(struct ploop *ploop,
-				       struct llist_node *discpios)
-{
-	struct pio *pio;
-	struct llist_node *pos, *t;
-
-	llist_for_each_safe(pos, t, discpios) {
-		pio = list_entry((struct list_head *)pos, typeof(*pio), list);
-		INIT_LIST_HEAD(&pio->list);
-		ploop_process_one_discard_pio(ploop, pio);
-	}
-}
-
-static void ploop_process_resubmit_pios(struct ploop *ploop,
-					struct llist_node *llpios)
-{
-	struct pio *pio;
-	struct llist_node *pos, *t;
-
-	llist_for_each_safe(pos, t, llpios) {
-		pio = list_entry((struct list_head *)pos, typeof(*pio), list);
-		pio->queue_list_id = PLOOP_LIST_INVALID;
-		INIT_LIST_HEAD(&pio->list); /* Until type is changed */
-		ploop_submit_rw_mapped(ploop, pio);
-	}
-}
-
 static inline int ploop_runners_have_pending(struct ploop *ploop)
 {
 	return atomic_read(&ploop->kt_worker->inflight_pios);
 }
 
-static int ploop_submit_metadata_writeback(struct ploop *ploop)
+static inline int ploop_submit_metadata_writeback(struct ploop *ploop, int force)
 {
 	unsigned long flags;
 	LIST_HEAD(ll_skipped);
@@ -1922,7 +1871,7 @@ static int ploop_submit_metadata_writeback(struct ploop *ploop)
 	 */
 	llist_for_each_safe(pos, t, ll_wb_batch) {
 		md = list_entry((struct list_head *)pos, typeof(*md), wb_link);
-		if (!llist_empty(&md->wait_llist) ||
+		if (!llist_empty(&md->wait_llist) || force ||
 		    test_bit(MD_HIGHPRIO, &md->status) ||
 		    time_before(md->dirty_timeout, timeout) ||
 		    ploop->force_md_writeback) {
@@ -1942,11 +1891,11 @@ static int ploop_submit_metadata_writeback(struct ploop *ploop)
 	return ret;
 }
 
-static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llflush_pios)
+static int process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llflush_pios)
 {
 	struct file *file;
 	struct pio *pio;
-	int ret;
+	int ret, npios = 0;
 	struct llist_node *pos, *t;
 
 	file = ploop_top_delta(ploop)->file;
@@ -1963,6 +1912,7 @@ static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llf
 		}
 		ploop_pio_endio(pio);
 	}
+	return npios;
 }
 
 static inline int ploop_runners_add_work(struct ploop *ploop, struct pio *pio)
@@ -2002,6 +1952,7 @@ void do_ploop_run_work(struct ploop *ploop)
 	struct llist_node *llresubmit;
 	struct llist_node *llflush_pios;
 	unsigned int old_flags = current->flags;
+	int npios;
 
 	current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
 
@@ -2013,33 +1964,55 @@ void do_ploop_run_work(struct ploop *ploop)
 	llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
 
 	/* add old deferred back to the list */
-	if (lldeferred_pios) {
-		struct llist_node *pos, *t;
-		/* Add one by one we need last for batch add */
-		llist_for_each_safe(pos, t, lldeferred_pios) {
-			llist_add(pos, &deferred_pios);
-		}
-	}
+	if (lldeferred_pios)
+		ploop_runners_add_work_list(ploop, lldeferred_pios);
 
-	ploop_prepare_embedded_pios(ploop, llembedded_pios, &deferred_pios);
+	if (llembedded_pios)
+		ploop_prepare_embedded_pios(ploop, llist_reverse_order(llembedded_pios),
+					    &deferred_pios);
 
 	llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
 
 	if (llresubmit)
-		ploop_process_resubmit_pios(ploop, llist_reverse_order(llresubmit));
+		ploop_runners_add_work_list(ploop, llresubmit);
 
-	ploop_process_deferred_pios(ploop, &deferred_pios);
+	ploop_runners_add_work_list(ploop, deferred_pios.first);
 
 	if (lldiscard_pios)
-		ploop_process_discard_pios(ploop, llist_reverse_order(lldiscard_pios));
+		ploop_runners_add_work_list(ploop, lldiscard_pios);
 
 	if (llcow_pios)
-		ploop_process_delta_cow(ploop, llist_reverse_order(llcow_pios));
+		ploop_runners_add_work_list(ploop, llcow_pios);
 
-	ploop_submit_metadata_writeback(ploop);
+	/* wait for all pios to be executed before metadata updates */
+	current->flags = old_flags;
+	wait_event_interruptible(ploop->dispatcher_wq_data, (!ploop_runners_have_pending(ploop)));
+	current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
+
+	/* if we have a flush we must sync md data too */
+	npios = ploop_submit_metadata_writeback(ploop, !!llflush_pios | ploop->force_md_writeback);
 
-	if (llflush_pios)
-		process_ploop_fsync_work(ploop, llist_reverse_order(llflush_pios));
+	if (llflush_pios) {
+		if (npios) {
+			/* wait for metadata writeback to complete */
+			current->flags = old_flags;
+			/* First wait all pios to be processed */
+			wait_event_interruptible(ploop->dispatcher_wq_data,
+						 (!ploop_runners_have_pending(ploop)));
+			current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
+		}
+		/* Now process fsync pios after we have done all other */
+		npios = process_ploop_fsync_work(ploop, llflush_pios);
+		/* Since dispatcher is single thread no other work can be queued */
+#ifdef USE_RUNNERS__NOT_READY
+		if (npios) {
+			current->flags = old_flags;
+			wait_event_interruptible(ploop->dispatcher_wq_fsync,
+						 atomic_read(&wrkr->fsync_pios) != 0);
+			current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
+		}
+#endif
+	}
 
 	current->flags = old_flags;
 }
@@ -2141,8 +2114,9 @@ int ploop_worker(void *data)
 		    llist_empty(&ploop->llresubmit_pios) &&
 		    !ploop->force_md_writeback) {
 			if (kthread_should_stop()) {
-				wait_event_interruptible(ploop->dispatcher_wq_data,
-						(!ploop_runners_have_pending(ploop)));
+				wait_event_interruptible(
+					ploop->dispatcher_wq_data,
+					(!ploop_runners_have_pending(ploop)));
 				__set_current_state(TASK_RUNNING);
 				break;
 			}
@@ -2155,7 +2129,7 @@ int ploop_worker(void *data)
 		cond_resched(); /* give other processes chance to run */
 		if (kthread_should_stop()) {
 			wait_event_interruptible(ploop->dispatcher_wq_data,
-						(!ploop_runners_have_pending(ploop)));
+					(!ploop_runners_have_pending(ploop)));
 			__set_current_state(TASK_RUNNING);
 			break;
 		}
@@ -2202,6 +2176,7 @@ static int ploop_prepare_flush(struct ploop *ploop, struct pio *pio)
 	spin_lock_irqsave(&ploop->bat_lock, flags);
 	llist_for_each_safe(pos, t, ploop->wb_batch_llist.first) {
 		md = list_entry((struct list_head *)pos, typeof(*md), wb_link);
+		WARN_ON(md->piwb->flush_pio);
 		md_inflight++;
 		md->piwb->flush_pio = flush_pio;
 		set_bit(MD_HIGHPRIO, &md->status);
@@ -2218,8 +2193,8 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
 	struct ploop_rq *prq = pio->endio_cb_data;
 	struct request *rq = prq->rq;
 	LLIST_HEAD(deferred_pios);
+	int ret = 0;
 	struct pio *spio;
-	int ret;
 
 	if (blk_rq_bytes(rq)) {
 		pio->queue_list_id = PLOOP_LIST_PREPARE;
@@ -2239,15 +2214,20 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
 		ret = ploop_prepare_flush(ploop, pio);
 		if (ret < 0)
 			return;
+		/*
+		 * Either we have a preflush pio with queue_list_id set to PLOOP_LIST_FLUSH
+		 * or it is attached to md writeback
+		 * only if it is zero, no inflight then fall thru
+		 */
 		if (ret > 0)
 			goto out;
-		/* Will add to prepare list and schedule work */
+		/* if it is zero we will process it here */
 	}
 
 	if (pio->queue_list_id == PLOOP_LIST_FLUSH) {
 		/* Let the FLUSH go last from the queue , do not run here to preserve ordering */
 		llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
-		return;
+		goto out;
 	}
 
 	ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
@@ -2262,7 +2242,7 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
 			ret = ploop_process_one_deferred_bio(ploop, spio);
 			current->flags = old_flags;
 			if (ret == 1) {
-				/* not queued add back to deferreed*/
+				/* not queued add back to deferred */
 				llist_add((struct llist_node *)(&spio->list),
 					  &ploop->pios[PLOOP_LIST_DEFERRED]);
 			} else {
-- 
2.43.0



More information about the Devel mailing list