[Devel] [PATCH RHEL8 COMMIT] ploop: Async md writeback

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jul 2 22:43:30 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.6.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.el8
------>
commit ae99c3cecca83675037f47f6854f652f60b78b4f
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Fri Jul 2 22:43:29 2021 +0300

    ploop: Async md writeback
    
    Do not wait till md writeback completed.
    
    https://jira.sw.ru/browse/PSBM-124550
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    ==========================
    Parallel async BAT pages submission and improvements
    
    [1-15]:
    https://jira.sw.ru/browse/PSBM-124550
    
    [16-22]:
    following improvements
    
    Kirill Tkhai (22):
          ploop: Remove absolet comment
          ploop: Add md and piwb cross pointers
          ploop: Add @md argument to locate_new_cluster_and_attach_pio()
          ploop: Refactoring in process_one_discard_pio()
          ploop: Pass type argument to ploop_prepare_bat_update()
          ploop: Move md is busy check to delay_if_md_busy()
          ploop: Introduce batch list for md pages writeback
          ploop: Check for md dirty instead of md piwb
          ploop: Reread piwb after ploop_prepare_bat_update()
          ploop: Change argument in ploop_prepare_bat_update()
          ploop: Return md from ploop_prepare_reloc_index_wb()
          ploop: Change arguments and rename ploop_reset_bat_update()
          ploop: Allow parallel wb of md pages
          ploop: Async md writeback
          ploop: Rename ploop_submit_index_wb_sync()
          ploop: Resubmit pios from main kwork
          ploop: Rename process_delta_wb()
          ploop: Do fsync after bat page write
          ploop: Do not iterate excess clusters in notify_delta_merged()
          ploop: Use kvec in ploop_delta_check_header()
          ploop: Add argument to ploop_read_delta_metadata()
          ploop: Underline clu and page is u32
---
 drivers/md/dm-ploop-cmd.c | 12 +++++++--
 drivers/md/dm-ploop-map.c | 66 ++++++++++++++++++++++++++++++++---------------
 drivers/md/dm-ploop.h     |  3 ++-
 3 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index 8f2e76c0e1a8..7a5012bbe2fb 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -279,6 +279,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
 	unsigned int new_dst, clu, dst_clu;
 	struct pio *pio = cmd->resize.pio;
 	struct ploop_index_wb *piwb;
+	struct completion comp;
 	struct md_page *md;
 	bool is_locked;
 	int ret = 0;
@@ -317,10 +318,13 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
 	}
 
 	ploop_make_md_wb(ploop, md);
+	init_completion(&comp);
+	piwb->comp = &comp;
 	/* Write new index on disk */
 	ploop_submit_index_wb_sync(ploop, piwb);
+	wait_for_completion(&comp);
+
 	ret = blk_status_to_errno(piwb->bi_status);
-	ploop_break_bat_update(ploop, md);
 	if (ret)
 		goto out;
 
@@ -349,6 +353,7 @@ static int ploop_grow_update_header(struct ploop *ploop,
 	struct ploop_pvd_header *hdr;
 	struct ploop_index_wb *piwb;
 	u32 nr_be, offset, clus;
+	struct completion comp;
 	struct md_page *md;
 	u64 sectors;
 	int ret;
@@ -372,7 +377,11 @@ static int ploop_grow_update_header(struct ploop *ploop,
 	kunmap_atomic(hdr);
 
 	ploop_make_md_wb(ploop, md);
+	init_completion(&comp);
+	piwb->comp = &comp;
 	ploop_submit_index_wb_sync(ploop, piwb);
+	wait_for_completion(&comp);
+
 	ret = blk_status_to_errno(piwb->bi_status);
 	if (!ret) {
 		/* Now update our cached page */
@@ -383,7 +392,6 @@ static int ploop_grow_update_header(struct ploop *ploop,
 		kunmap_atomic(hdr);
 	}
 
-	ploop_break_bat_update(ploop, md);
 	return ret;
 }
 
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 2c71ed501236..319acfa831eb 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -42,7 +42,7 @@ static unsigned int pio_nr_segs(struct pio *pio)
 void ploop_index_wb_init(struct ploop_index_wb *piwb, struct ploop *ploop)
 {
 	piwb->ploop = ploop;
-	init_completion(&piwb->comp);
+	piwb->comp = NULL;
 	spin_lock_init(&piwb->lock);
 	piwb->md = NULL;
 	piwb->bat_page = NULL;
@@ -716,6 +716,7 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
 
 	WARN_ON_ONCE(!(md->status & MD_WRITEBACK));
 	md->status &= ~MD_WRITEBACK;
+	md->piwb = NULL;
 	list_splice_tail_init(&md->wait_list, &list);
 	write_unlock_irqrestore(&ploop->bat_rwlock, flags);
 	kunmap_atomic(dst_clu);
@@ -725,6 +726,13 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
 		dispatch_pios(ploop, NULL, &list);
 }
 
+static void free_piwb(struct ploop_index_wb *piwb)
+{
+	kfree(piwb->pio);
+	put_page(piwb->bat_page);
+	kfree(piwb);
+}
+
 static void put_piwb(struct ploop_index_wb *piwb)
 {
 	if (atomic_dec_and_test(&piwb->count)) {
@@ -736,7 +744,9 @@ static void put_piwb(struct ploop_index_wb *piwb)
 		if (piwb->bi_status)
 			ploop_advance_local_after_bat_wb(ploop, piwb, false);
 
-		complete(&piwb->comp);
+		if (piwb->comp)
+			complete(piwb->comp);
+		free_piwb(piwb);
 	}
 }
 
@@ -797,6 +807,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
 	bool is_last_page = true;
 	u32 page_id = md->id;
 	struct page *page;
+	struct pio *pio;
 	map_index_t *to;
 
 	piwb = kmalloc(sizeof(*piwb), GFP_NOIO);
@@ -805,8 +816,10 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
 	ploop_index_wb_init(piwb, ploop);
 
 	piwb->bat_page = page = alloc_page(GFP_NOIO);
-	if (!page)
+	piwb->pio = pio = kmalloc(sizeof(*pio), GFP_NOIO);
+	if (!page || !pio)
 		goto err;
+	init_pio(ploop, REQ_OP_WRITE, pio);
 
 	bat_entries = kmap_atomic(md->page);
 
@@ -851,7 +864,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
 	piwb->type = type;
 	return 0;
 err:
-	kfree(piwb);
+	free_piwb(piwb);
 	return -ENOMEM;
 }
 
@@ -862,12 +875,10 @@ void ploop_break_bat_update(struct ploop *ploop, struct md_page *md)
 
 	write_lock_irqsave(&ploop->bat_rwlock, flags);
 	piwb = md->piwb;
-	md->piwb->md = NULL;
 	md->piwb = NULL;
 	write_unlock_irqrestore(&ploop->bat_rwlock, flags);
 
-	put_page(piwb->bat_page);
-	kfree(piwb);
+	free_piwb(piwb);
 }
 
 static void ploop_bat_page_zero_cluster(struct ploop *ploop,
@@ -1466,25 +1477,39 @@ static int process_one_deferred_bio(struct ploop *ploop, struct pio *pio)
 	return 0;
 }
 
-void ploop_submit_index_wb_sync(struct ploop *ploop,
-				struct ploop_index_wb *piwb)
+static void md_write_endio(struct pio *pio, void *piwb_ptr, blk_status_t bi_status)
 {
-	blk_status_t status = BLK_STS_OK;
+	struct ploop_index_wb *piwb = piwb_ptr;
+	struct ploop *ploop = piwb->ploop;
 	u32 dst_clu;
-	int ret;
 
-	/* track_bio() will be called in ploop_bat_write_complete() */
+	dst_clu = POS_TO_CLU(ploop, (u64)piwb->page_id << PAGE_SHIFT);
+	track_dst_cluster(ploop, dst_clu);
 
-	ret = ploop_rw_page_sync(WRITE, top_delta(ploop)->file,
-				 piwb->page_id, piwb->bat_page);
-	if (ret)
-		status = errno_to_blk_status(ret);
+	ploop_bat_write_complete(piwb, bi_status);
+}
 
-	dst_clu = ((u64)piwb->page_id << PAGE_SHIFT) / CLU_SIZE(ploop);
-	track_dst_cluster(ploop, dst_clu);
+void ploop_submit_index_wb_sync(struct ploop *ploop,
+				struct ploop_index_wb *piwb)
+{
+	loff_t pos = (loff_t)piwb->page_id << PAGE_SHIFT;
+	struct pio *pio = piwb->pio;
+	struct bio_vec bvec = {
+		.bv_page = piwb->bat_page,
+		.bv_len = PAGE_SIZE,
+		.bv_offset = 0,
+	};
 
-	ploop_bat_write_complete(piwb, status);
-	wait_for_completion(&piwb->comp);
+	pio->bi_iter.bi_sector = to_sector(pos);
+	pio->bi_iter.bi_size = PAGE_SIZE;
+	pio->bi_iter.bi_idx = 0;
+	pio->bi_iter.bi_bvec_done = 0;
+	pio->bi_io_vec = &bvec;
+	pio->level = top_level(ploop);
+	pio->endio_cb = md_write_endio;
+	pio->endio_cb_data = piwb;
+
+	submit_rw_mapped(ploop, pio);
 }
 
 static void process_deferred_pios(struct ploop *ploop, struct list_head *pios)
@@ -1584,7 +1609,6 @@ static void submit_metadata_writeback(struct ploop *ploop)
 		write_unlock_irq(&ploop->bat_rwlock);
 
 		ploop_submit_index_wb_sync(ploop, md->piwb);
-		ploop_break_bat_update(ploop, md);
 	}
 }
 
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 1b76e74b60b7..1634ba8fc5da 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -93,10 +93,11 @@ enum piwb_type {
 
 struct ploop_index_wb {
 	struct ploop *ploop;
-	struct completion comp;
+	struct completion *comp;
 	enum piwb_type type;
 	spinlock_t lock;
 	struct md_page *md;
+	struct pio *pio;
 	struct page *bat_page;
 	struct list_head ready_data_pios;
 	struct list_head cow_list;


More information about the Devel mailing list