[Devel] [PATCH RHEL8 COMMIT] ploop: Introduce batch list for md pages writeback

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jul 2 22:43:26 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.6.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.el8
------>
commit 63d14109f789da1907d2383d43a4f6f185ab155d
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Fri Jul 2 22:43:26 2021 +0300

    ploop: Introduce batch list for md pages writeback
    
    Every iteration of do_ploop_work collects changes
    to md pages. On the end of work we submit all of
    the md pages writeback in parallel.
    
    https://jira.sw.ru/browse/PSBM-124550
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    ==========================
    Parallel async BAT pages submission and improvements
    
    [1-15]:
    https://jira.sw.ru/browse/PSBM-124550
    
    [16-22]:
    following improvements
    
    Kirill Tkhai (22):
          ploop: Remove absolet comment
          ploop: Add md and piwb cross pointers
          ploop: Add @md argument to locate_new_cluster_and_attach_pio()
          ploop: Refactoring in process_one_discard_pio()
          ploop: Pass type argument to ploop_prepare_bat_update()
          ploop: Move md is busy check to delay_if_md_busy()
          ploop: Introduce batch list for md pages writeback
          ploop: Check for md dirty instead of md piwb
          ploop: Reread piwb after ploop_prepare_bat_update()
          ploop: Change argument in ploop_prepare_bat_update()
          ploop: Return md from ploop_prepare_reloc_index_wb()
          ploop: Change arguments and rename ploop_reset_bat_update()
          ploop: Allow parallel wb of md pages
          ploop: Async md writeback
          ploop: Rename ploop_submit_index_wb_sync()
          ploop: Resubmit pios from main kwork
          ploop: Rename process_delta_wb()
          ploop: Do fsync after bat page write
          ploop: Do not iterate excess clusters in notify_delta_merged()
          ploop: Use kvec in ploop_delta_check_header()
          ploop: Add argument to ploop_read_delta_metadata()
          ploop: Underline clu and page is u32
---
 drivers/md/dm-ploop-bat.c    |  2 ++
 drivers/md/dm-ploop-cmd.c    |  9 +++++++
 drivers/md/dm-ploop-map.c    | 57 ++++++++++++++++++++++++++++++++++++++++----
 drivers/md/dm-ploop-target.c |  1 +
 drivers/md/dm-ploop.h        |  7 ++++++
 5 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-ploop-bat.c b/drivers/md/dm-ploop-bat.c
index b445663c8be6..ad9c3a171dc9 100644
--- a/drivers/md/dm-ploop-bat.c
+++ b/drivers/md/dm-ploop-bat.c
@@ -79,7 +79,9 @@ static struct md_page * alloc_md_page(unsigned int id)
 	if (!page)
 		goto err_page;
 	INIT_LIST_HEAD(&md->wait_list);
+	INIT_LIST_HEAD(&md->wb_link);
 
+	md->status = 0;
 	md->bat_levels = levels;
 	md->piwb = NULL;
 	md->page = page;
diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index bf9e2c04138a..183524f61243 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -266,6 +266,13 @@ static int ploop_write_zero_cluster_sync(struct ploop *ploop,
 	return ploop_write_cluster_sync(ploop, pio, clu);
 }
 
+static void ploop_make_md_wb(struct ploop *ploop, struct md_page *md)
+{
+	write_lock_irq(&ploop->bat_rwlock);
+	md->status |= MD_WRITEBACK;
+	write_unlock_irq(&ploop->bat_rwlock);
+}
+
 static int ploop_grow_relocate_cluster(struct ploop *ploop,
 				       struct ploop_index_wb *piwb,
 				       struct ploop_cmd *cmd)
@@ -308,6 +315,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
 		goto out;
 	}
 
+	ploop_make_md_wb(ploop, piwb->md);
 	/* Write new index on disk */
 	ploop_submit_index_wb_sync(ploop, piwb);
 	ret = blk_status_to_errno(piwb->bi_status);
@@ -360,6 +368,7 @@ static int ploop_grow_update_header(struct ploop *ploop,
 	offset = hdr->m_FirstBlockOffset = cpu_to_le32(first_block_off);
 	kunmap_atomic(hdr);
 
+	ploop_make_md_wb(ploop, piwb->md);
 	ploop_submit_index_wb_sync(ploop, piwb);
 	ret = blk_status_to_errno(piwb->bi_status);
 	if (!ret) {
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 877e29226a45..d91baef66973 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -463,6 +463,23 @@ static void unlink_completed_pio(struct ploop *ploop, struct pio *pio)
 		dispatch_pios(ploop, NULL, &pio_list);
 }
 
+static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
+{
+	unsigned long flags;
+	bool new = false;
+
+	write_lock_irqsave(&ploop->bat_rwlock, flags);
+	WARN_ON_ONCE((md->status & MD_WRITEBACK));
+        if (!(md->status & MD_DIRTY)) {
+                md->status |= MD_DIRTY;
+                list_add_tail(&md->wb_link, &ploop->wb_batch_list);
+                new = true;
+	}
+	write_unlock_irqrestore(&ploop->bat_rwlock, flags);
+
+	return new;
+}
+
 static bool pio_endio_if_all_zeros(struct pio *pio)
 {
 	struct bvec_iter bi = {
@@ -695,6 +712,8 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
 		}
 	}
 
+	WARN_ON_ONCE(!(md->status & MD_WRITEBACK));
+	md->status &= ~MD_WRITEBACK;
 	list_splice_tail_init(&md->wait_list, &list);
 	write_unlock_irqrestore(&ploop->bat_rwlock, flags);
 	kunmap_atomic(dst_clu);
@@ -1259,6 +1278,7 @@ static void submit_cow_index_wb(struct ploop_cow *cow,
 		/* No index wb in process. Prepare a new one */
 		if (ploop_prepare_bat_update(ploop, page_id, piwb, PIWB_TYPE_ALLOC) < 0)
 			goto err_resource;
+		ploop_md_make_dirty(ploop, md);
 	}
 
 	clu -= page_id * PAGE_SIZE / sizeof(map_index_t) - PLOOP_MAP_OFFSET;
@@ -1354,6 +1374,9 @@ static bool locate_new_cluster_and_attach_pio(struct ploop *ploop,
 		goto error;
 	}
 
+	if (bat_update_prepared)
+		ploop_md_make_dirty(ploop, md);
+
 	ploop_attach_end_action(pio, piwb);
 	attached = true;
 out:
@@ -1497,6 +1520,9 @@ static void process_one_discard_pio(struct ploop *ploop, struct pio *pio,
 		list_add_tail(&pio->list, &piwb->ready_data_pios);
 	}
 	kunmap_atomic(to);
+
+	if (bat_update_prepared)
+		ploop_md_make_dirty(ploop, md);
 out:
 	return;
 err:
@@ -1524,6 +1550,31 @@ static void process_resubmit_pios(struct ploop *ploop, struct list_head *pios)
 	}
 }
 
+static void submit_metadata_writeback(struct ploop *ploop)
+{
+	struct md_page *md;
+
+	while (1) {
+		write_lock_irq(&ploop->bat_rwlock);
+		md = list_first_entry_or_null(&ploop->wb_batch_list,
+				struct md_page, wb_link);
+		if (!md) {
+			write_unlock_irq(&ploop->bat_rwlock);
+			break;
+		}
+		list_del_init(&md->wb_link);
+		/* L1L2 mustn't be redirtyed, when wb in-flight! */
+		WARN_ON_ONCE(!(md->status & MD_DIRTY) ||
+			     (md->status & MD_WRITEBACK));
+		md->status |= MD_WRITEBACK;
+		md->status &= ~MD_DIRTY;
+		write_unlock_irq(&ploop->bat_rwlock);
+
+		ploop_submit_index_wb_sync(ploop, md->piwb);
+		ploop_reset_bat_update(md->piwb);
+	}
+}
+
 void do_ploop_work(struct work_struct *ws)
 {
 	struct ploop *ploop = container_of(ws, struct ploop, worker);
@@ -1555,11 +1606,7 @@ void do_ploop_work(struct work_struct *ws)
 	process_discard_pios(ploop, &discard_pios, &piwb);
 	process_delta_wb(ploop, &cow_pios, &piwb);
 
-	if (piwb.page_id != PAGE_NR_NONE) {
-		/* Index wb was prepared -- submit and wait it */
-		ploop_submit_index_wb_sync(ploop, &piwb);
-		ploop_reset_bat_update(&piwb);
-	}
+	submit_metadata_writeback(ploop);
 
 	current->flags = (current->flags & ~PF_IO_THREAD) | pf_io_thread;
 }
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index d4413ff259a5..705f2252e822 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -334,6 +334,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	INIT_LIST_HEAD(&ploop->resubmit_pios);
 	INIT_LIST_HEAD(&ploop->enospc_pios);
 	INIT_LIST_HEAD(&ploop->cluster_lk_list);
+	INIT_LIST_HEAD(&ploop->wb_batch_list);
 	ploop->bat_entries = RB_ROOT;
 	timer_setup(&ploop->enospc_timer, ploop_enospc_timer, 0);
 
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 879d7c5b25e7..babdfdb7f672 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -110,9 +110,14 @@ struct ploop_index_wb {
 struct md_page {
 	struct rb_node node;
 	unsigned int id; /* Number of this page starting from hdr */
+#define MD_DIRTY	(1U << 1) /* Page contains changes and wants writeback */
+#define MD_WRITEBACK	(1U << 2) /* Writeback was submitted */
+	unsigned int status;
 	struct page *page;
 	u8 *bat_levels;
 	struct list_head wait_list;
+
+	struct list_head wb_link;
 	struct ploop_index_wb *piwb;
 };
 
@@ -146,6 +151,8 @@ struct ploop {
 	unsigned int hb_nr; /* holes_bitmap size in bits */
 	rwlock_t bat_rwlock;
 
+	struct list_head wb_batch_list;
+
 	void *tracking_bitmap;
 	unsigned int tb_nr; /* tracking_bitmap size in bits */
 	unsigned int tb_cursor;


More information about the Devel mailing list