[Devel] [PATCH RHEL8 COMMIT] ploop: Async md writeback
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jul 2 22:43:30 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.6.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.el8
------>
commit ae99c3cecca83675037f47f6854f652f60b78b4f
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Fri Jul 2 22:43:29 2021 +0300
ploop: Async md writeback
Do not wait till md writeback completed.
https://jira.sw.ru/browse/PSBM-124550
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
==========================
Parallel async BAT pages submission and improvements
[1-15]:
https://jira.sw.ru/browse/PSBM-124550
[16-22]:
following improvements
Kirill Tkhai (22):
ploop: Remove absolet comment
ploop: Add md and piwb cross pointers
ploop: Add @md argument to locate_new_cluster_and_attach_pio()
ploop: Refactoring in process_one_discard_pio()
ploop: Pass type argument to ploop_prepare_bat_update()
ploop: Move md is busy check to delay_if_md_busy()
ploop: Introduce batch list for md pages writeback
ploop: Check for md dirty instead of md piwb
ploop: Reread piwb after ploop_prepare_bat_update()
ploop: Change argument in ploop_prepare_bat_update()
ploop: Return md from ploop_prepare_reloc_index_wb()
ploop: Change arguments and rename ploop_reset_bat_update()
ploop: Allow parallel wb of md pages
ploop: Async md writeback
ploop: Rename ploop_submit_index_wb_sync()
ploop: Resubmit pios from main kwork
ploop: Rename process_delta_wb()
ploop: Do fsync after bat page write
ploop: Do not iterate excess clusters in notify_delta_merged()
ploop: Use kvec in ploop_delta_check_header()
ploop: Add argument to ploop_read_delta_metadata()
ploop: Underline clu and page is u32
---
drivers/md/dm-ploop-cmd.c | 12 +++++++--
drivers/md/dm-ploop-map.c | 66 ++++++++++++++++++++++++++++++++---------------
drivers/md/dm-ploop.h | 3 ++-
3 files changed, 57 insertions(+), 24 deletions(-)
diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index 8f2e76c0e1a8..7a5012bbe2fb 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -279,6 +279,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
unsigned int new_dst, clu, dst_clu;
struct pio *pio = cmd->resize.pio;
struct ploop_index_wb *piwb;
+ struct completion comp;
struct md_page *md;
bool is_locked;
int ret = 0;
@@ -317,10 +318,13 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
}
ploop_make_md_wb(ploop, md);
+ init_completion(&comp);
+ piwb->comp = ∁
/* Write new index on disk */
ploop_submit_index_wb_sync(ploop, piwb);
+ wait_for_completion(&comp);
+
ret = blk_status_to_errno(piwb->bi_status);
- ploop_break_bat_update(ploop, md);
if (ret)
goto out;
@@ -349,6 +353,7 @@ static int ploop_grow_update_header(struct ploop *ploop,
struct ploop_pvd_header *hdr;
struct ploop_index_wb *piwb;
u32 nr_be, offset, clus;
+ struct completion comp;
struct md_page *md;
u64 sectors;
int ret;
@@ -372,7 +377,11 @@ static int ploop_grow_update_header(struct ploop *ploop,
kunmap_atomic(hdr);
ploop_make_md_wb(ploop, md);
+ init_completion(&comp);
+ piwb->comp = ∁
ploop_submit_index_wb_sync(ploop, piwb);
+ wait_for_completion(&comp);
+
ret = blk_status_to_errno(piwb->bi_status);
if (!ret) {
/* Now update our cached page */
@@ -383,7 +392,6 @@ static int ploop_grow_update_header(struct ploop *ploop,
kunmap_atomic(hdr);
}
- ploop_break_bat_update(ploop, md);
return ret;
}
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 2c71ed501236..319acfa831eb 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -42,7 +42,7 @@ static unsigned int pio_nr_segs(struct pio *pio)
void ploop_index_wb_init(struct ploop_index_wb *piwb, struct ploop *ploop)
{
piwb->ploop = ploop;
- init_completion(&piwb->comp);
+ piwb->comp = NULL;
spin_lock_init(&piwb->lock);
piwb->md = NULL;
piwb->bat_page = NULL;
@@ -716,6 +716,7 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
WARN_ON_ONCE(!(md->status & MD_WRITEBACK));
md->status &= ~MD_WRITEBACK;
+ md->piwb = NULL;
list_splice_tail_init(&md->wait_list, &list);
write_unlock_irqrestore(&ploop->bat_rwlock, flags);
kunmap_atomic(dst_clu);
@@ -725,6 +726,13 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
dispatch_pios(ploop, NULL, &list);
}
+static void free_piwb(struct ploop_index_wb *piwb)
+{
+ kfree(piwb->pio);
+ put_page(piwb->bat_page);
+ kfree(piwb);
+}
+
static void put_piwb(struct ploop_index_wb *piwb)
{
if (atomic_dec_and_test(&piwb->count)) {
@@ -736,7 +744,9 @@ static void put_piwb(struct ploop_index_wb *piwb)
if (piwb->bi_status)
ploop_advance_local_after_bat_wb(ploop, piwb, false);
- complete(&piwb->comp);
+ if (piwb->comp)
+ complete(piwb->comp);
+ free_piwb(piwb);
}
}
@@ -797,6 +807,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
bool is_last_page = true;
u32 page_id = md->id;
struct page *page;
+ struct pio *pio;
map_index_t *to;
piwb = kmalloc(sizeof(*piwb), GFP_NOIO);
@@ -805,8 +816,10 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
ploop_index_wb_init(piwb, ploop);
piwb->bat_page = page = alloc_page(GFP_NOIO);
- if (!page)
+ piwb->pio = pio = kmalloc(sizeof(*pio), GFP_NOIO);
+ if (!page || !pio)
goto err;
+ init_pio(ploop, REQ_OP_WRITE, pio);
bat_entries = kmap_atomic(md->page);
@@ -851,7 +864,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
piwb->type = type;
return 0;
err:
- kfree(piwb);
+ free_piwb(piwb);
return -ENOMEM;
}
@@ -862,12 +875,10 @@ void ploop_break_bat_update(struct ploop *ploop, struct md_page *md)
write_lock_irqsave(&ploop->bat_rwlock, flags);
piwb = md->piwb;
- md->piwb->md = NULL;
md->piwb = NULL;
write_unlock_irqrestore(&ploop->bat_rwlock, flags);
- put_page(piwb->bat_page);
- kfree(piwb);
+ free_piwb(piwb);
}
static void ploop_bat_page_zero_cluster(struct ploop *ploop,
@@ -1466,25 +1477,39 @@ static int process_one_deferred_bio(struct ploop *ploop, struct pio *pio)
return 0;
}
-void ploop_submit_index_wb_sync(struct ploop *ploop,
- struct ploop_index_wb *piwb)
+static void md_write_endio(struct pio *pio, void *piwb_ptr, blk_status_t bi_status)
{
- blk_status_t status = BLK_STS_OK;
+ struct ploop_index_wb *piwb = piwb_ptr;
+ struct ploop *ploop = piwb->ploop;
u32 dst_clu;
- int ret;
- /* track_bio() will be called in ploop_bat_write_complete() */
+ dst_clu = POS_TO_CLU(ploop, (u64)piwb->page_id << PAGE_SHIFT);
+ track_dst_cluster(ploop, dst_clu);
- ret = ploop_rw_page_sync(WRITE, top_delta(ploop)->file,
- piwb->page_id, piwb->bat_page);
- if (ret)
- status = errno_to_blk_status(ret);
+ ploop_bat_write_complete(piwb, bi_status);
+}
- dst_clu = ((u64)piwb->page_id << PAGE_SHIFT) / CLU_SIZE(ploop);
- track_dst_cluster(ploop, dst_clu);
+void ploop_submit_index_wb_sync(struct ploop *ploop,
+ struct ploop_index_wb *piwb)
+{
+ loff_t pos = (loff_t)piwb->page_id << PAGE_SHIFT;
+ struct pio *pio = piwb->pio;
+ struct bio_vec bvec = {
+ .bv_page = piwb->bat_page,
+ .bv_len = PAGE_SIZE,
+ .bv_offset = 0,
+ };
- ploop_bat_write_complete(piwb, status);
- wait_for_completion(&piwb->comp);
+ pio->bi_iter.bi_sector = to_sector(pos);
+ pio->bi_iter.bi_size = PAGE_SIZE;
+ pio->bi_iter.bi_idx = 0;
+ pio->bi_iter.bi_bvec_done = 0;
+ pio->bi_io_vec = &bvec;
+ pio->level = top_level(ploop);
+ pio->endio_cb = md_write_endio;
+ pio->endio_cb_data = piwb;
+
+ submit_rw_mapped(ploop, pio);
}
static void process_deferred_pios(struct ploop *ploop, struct list_head *pios)
@@ -1584,7 +1609,6 @@ static void submit_metadata_writeback(struct ploop *ploop)
write_unlock_irq(&ploop->bat_rwlock);
ploop_submit_index_wb_sync(ploop, md->piwb);
- ploop_break_bat_update(ploop, md);
}
}
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 1b76e74b60b7..1634ba8fc5da 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -93,10 +93,11 @@ enum piwb_type {
struct ploop_index_wb {
struct ploop *ploop;
- struct completion comp;
+ struct completion *comp;
enum piwb_type type;
spinlock_t lock;
struct md_page *md;
+ struct pio *pio;
struct page *bat_page;
struct list_head ready_data_pios;
struct list_head cow_list;
More information about the Devel
mailing list