[Devel] [RFC PATCH vz9 v6 61/62] dm-ploop: rework bat completion logic

Alexander Atanasov alexander.atanasov at virtuozzo.com
Fri Dec 6 00:56:34 MSK 2024


a pio may complete after md page update, in that case we
must not complete the update but wait for the last data pio
and only then complete them all.

https://virtuozzo.atlassian.net/browse/VSTOR-91821
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
 drivers/md/dm-ploop-map.c | 57 ++++++++++++++++++++++++++-------------
 drivers/md/dm-ploop.h     |  1 +
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 482022d6b60b..9da996935945 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -598,7 +598,6 @@ static void ploop_unlink_completed_pio(struct ploop *ploop, struct pio *pio)
 
 static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
 {
-	unsigned long flags;
 	bool new = false;
 
 	/*
@@ -896,6 +895,7 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
 	if (wait_llist_pending) {
 		llist_for_each_safe(pos, t, wait_llist_pending) {
 			pio = list_entry((struct list_head *)pos, typeof(*pio), list);
+			INIT_LIST_HEAD(&pio->list);
 			list_add(&pio->list, &list);
 		}
 	}
@@ -962,15 +962,22 @@ static void ploop_bat_write_complete(struct pio *pio, void *piwb_ptr,
 			 */
 			ploop_advance_local_after_bat_wb(ploop, piwb, true);
 		}
+	} else {
+		PL_ERR("piwb not completed\n");
 	}
 
 	spin_lock_irqsave(&piwb->lock, flags);
-	if (completed)
+	if (completed) {
 		piwb->completed = completed;
-	piwb->bi_status = bi_status;
-	ll_ready_pios = llist_reverse_order(llist_del_all(&piwb->llready_data_pios));
+		ll_ready_pios = llist_del_all(&piwb->llready_data_pios);
+	} else {
+		piwb->wait_for_last = 1;
+	}
+	if (bi_status)
+		piwb->bi_status = bi_status;
 	spin_unlock_irqrestore(&piwb->lock, flags);
-
+	if (!completed)
+		return;
 	ll_cow_pios = llist_reverse_order(llist_del_all(&piwb->cow_llist));
 
 	/*
@@ -978,8 +985,8 @@ static void ploop_bat_write_complete(struct pio *pio, void *piwb_ptr,
 	 */
 
 	llist_for_each_safe(pos, t, ll_ready_pios) {
-		pio = list_entry((struct list_head *)pos, typeof(*aux_pio), list);
-		INIT_LIST_HEAD(&pio->list);
+		data_pio = list_entry((struct list_head *)pos, typeof(*data_pio), list);
+		INIT_LIST_HEAD(&data_pio->list);
 		if (bi_status)
 			data_pio->bi_status = bi_status;
 		ploop_pio_endio(data_pio);
@@ -1026,9 +1033,10 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
 	piwb->pio = pio = ploop_alloc_pio(ploop, GFP_NOIO);
 	if (!page || !pio)
 		goto err;
-	piwb->kmpage = kmap(piwb->bat_page);
 	ploop_init_pio(ploop, REQ_OP_WRITE, pio);
-
+	piwb->kmpage = kmap(piwb->bat_page);
+	WARN_ON(!piwb->kmpage);
+	piwb->wait_for_last = 0;
 	bat_entries = md->kmpage;
 
 	spin_lock_irq(&md->md_lock);
@@ -1251,6 +1259,7 @@ static bool ploop_data_pio_end(struct pio *pio)
 	struct ploop_index_wb *piwb = pio->piwb;
 	unsigned long flags;
 	bool completed;
+	int last_pio = 0;
 
 	spin_lock_irqsave(&piwb->lock, flags);
 	completed = piwb->completed;
@@ -1258,12 +1267,26 @@ static bool ploop_data_pio_end(struct pio *pio)
 		llist_add((struct llist_node *)(&pio->list), &piwb->llready_data_pios);
 	else if (!pio->bi_status)
 		pio->bi_status = piwb->bi_status;
-	spin_unlock_irqrestore(&piwb->lock, flags);
 	/* If pio is late then end it here. this can happen with flushes */
-	if (completed)
+	if (atomic_read(&piwb->count) == 2) {
+		if (piwb->wait_for_last)
+			last_pio = 1;
+	}
+	spin_unlock_irqrestore(&piwb->lock, flags);
+	ploop_put_piwb(piwb);
+	if (completed) {
+		struct ploop *ploop = pio->ploop;
+
+		PL_ERR("late pio already completed\n");
 		ploop_pio_endio(pio);
+	} else if (last_pio) {
+		/*
+		 * Status is set from first call to ploop_bat_write_complete
+		 * zero keeps it as is
+		 */
+		ploop_bat_write_complete(piwb->pio, piwb, piwb->bi_status);
+	}
 
-	ploop_put_piwb(piwb);
 
 	return completed;
 }
@@ -1309,8 +1332,6 @@ static void ploop_check_standby_mode(struct ploop *ploop, long res)
 
 static void ploop_data_rw_complete(struct pio *pio)
 {
-	bool completed;
-
 	if (pio->ret != pio->bi_iter.bi_size) {
 		if (pio->ret >= 0 || pio->ret == -ENOTBLK) {
 			/* Partial IO or request to retry in buffered mode */
@@ -1338,12 +1359,10 @@ static void ploop_data_rw_complete(struct pio *pio)
 		pio->bi_status = errno_to_blk_status(pio->ret);
 	}
 	if (pio->is_data_alloc) {
-		completed = ploop_data_pio_end(pio);
-		if (!completed)
-			return;
+		ploop_data_pio_end(pio);
+	} else {
+		ploop_pio_endio(pio);
 	}
-
-	ploop_pio_endio(pio);
 }
 
 /*
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 5ad2a5142521..cc66f9dcd27f 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -111,6 +111,7 @@ struct ploop_index_wb {
 	u32 page_id;
 	struct bio_vec aux_bvec;
 	struct pio *flush_pio;
+	int wait_for_last;
 };
 
 /* Metadata page */
-- 
2.43.0



More information about the Devel mailing list