[Devel] [PATCH RHEL8 COMMIT] ploop: Introduce batch list for md pages writeback
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jul 2 22:43:26 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.6.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.el8
------>
commit 63d14109f789da1907d2383d43a4f6f185ab155d
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Fri Jul 2 22:43:26 2021 +0300
ploop: Introduce batch list for md pages writeback
Every iteration of do_ploop_work collects changes
to md pages. On the end of work we submit all of
the md pages writeback in parallel.
https://jira.sw.ru/browse/PSBM-124550
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
==========================
Parallel async BAT pages submission and improvements
[1-15]:
https://jira.sw.ru/browse/PSBM-124550
[16-22]:
following improvements
Kirill Tkhai (22):
ploop: Remove absolet comment
ploop: Add md and piwb cross pointers
ploop: Add @md argument to locate_new_cluster_and_attach_pio()
ploop: Refactoring in process_one_discard_pio()
ploop: Pass type argument to ploop_prepare_bat_update()
ploop: Move md is busy check to delay_if_md_busy()
ploop: Introduce batch list for md pages writeback
ploop: Check for md dirty instead of md piwb
ploop: Reread piwb after ploop_prepare_bat_update()
ploop: Change argument in ploop_prepare_bat_update()
ploop: Return md from ploop_prepare_reloc_index_wb()
ploop: Change arguments and rename ploop_reset_bat_update()
ploop: Allow parallel wb of md pages
ploop: Async md writeback
ploop: Rename ploop_submit_index_wb_sync()
ploop: Resubmit pios from main kwork
ploop: Rename process_delta_wb()
ploop: Do fsync after bat page write
ploop: Do not iterate excess clusters in notify_delta_merged()
ploop: Use kvec in ploop_delta_check_header()
ploop: Add argument to ploop_read_delta_metadata()
ploop: Underline clu and page is u32
---
drivers/md/dm-ploop-bat.c | 2 ++
drivers/md/dm-ploop-cmd.c | 9 +++++++
drivers/md/dm-ploop-map.c | 57 ++++++++++++++++++++++++++++++++++++++++----
drivers/md/dm-ploop-target.c | 1 +
drivers/md/dm-ploop.h | 7 ++++++
5 files changed, 71 insertions(+), 5 deletions(-)
diff --git a/drivers/md/dm-ploop-bat.c b/drivers/md/dm-ploop-bat.c
index b445663c8be6..ad9c3a171dc9 100644
--- a/drivers/md/dm-ploop-bat.c
+++ b/drivers/md/dm-ploop-bat.c
@@ -79,7 +79,9 @@ static struct md_page * alloc_md_page(unsigned int id)
if (!page)
goto err_page;
INIT_LIST_HEAD(&md->wait_list);
+ INIT_LIST_HEAD(&md->wb_link);
+ md->status = 0;
md->bat_levels = levels;
md->piwb = NULL;
md->page = page;
diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index bf9e2c04138a..183524f61243 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -266,6 +266,13 @@ static int ploop_write_zero_cluster_sync(struct ploop *ploop,
return ploop_write_cluster_sync(ploop, pio, clu);
}
+static void ploop_make_md_wb(struct ploop *ploop, struct md_page *md)
+{
+ write_lock_irq(&ploop->bat_rwlock);
+ md->status |= MD_WRITEBACK;
+ write_unlock_irq(&ploop->bat_rwlock);
+}
+
static int ploop_grow_relocate_cluster(struct ploop *ploop,
struct ploop_index_wb *piwb,
struct ploop_cmd *cmd)
@@ -308,6 +315,7 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
goto out;
}
+ ploop_make_md_wb(ploop, piwb->md);
/* Write new index on disk */
ploop_submit_index_wb_sync(ploop, piwb);
ret = blk_status_to_errno(piwb->bi_status);
@@ -360,6 +368,7 @@ static int ploop_grow_update_header(struct ploop *ploop,
offset = hdr->m_FirstBlockOffset = cpu_to_le32(first_block_off);
kunmap_atomic(hdr);
+ ploop_make_md_wb(ploop, piwb->md);
ploop_submit_index_wb_sync(ploop, piwb);
ret = blk_status_to_errno(piwb->bi_status);
if (!ret) {
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 877e29226a45..d91baef66973 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -463,6 +463,23 @@ static void unlink_completed_pio(struct ploop *ploop, struct pio *pio)
dispatch_pios(ploop, NULL, &pio_list);
}
+static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
+{
+ unsigned long flags;
+ bool new = false;
+
+ write_lock_irqsave(&ploop->bat_rwlock, flags);
+ WARN_ON_ONCE((md->status & MD_WRITEBACK));
+ if (!(md->status & MD_DIRTY)) {
+ md->status |= MD_DIRTY;
+ list_add_tail(&md->wb_link, &ploop->wb_batch_list);
+ new = true;
+ }
+ write_unlock_irqrestore(&ploop->bat_rwlock, flags);
+
+ return new;
+}
+
static bool pio_endio_if_all_zeros(struct pio *pio)
{
struct bvec_iter bi = {
@@ -695,6 +712,8 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
}
}
+ WARN_ON_ONCE(!(md->status & MD_WRITEBACK));
+ md->status &= ~MD_WRITEBACK;
list_splice_tail_init(&md->wait_list, &list);
write_unlock_irqrestore(&ploop->bat_rwlock, flags);
kunmap_atomic(dst_clu);
@@ -1259,6 +1278,7 @@ static void submit_cow_index_wb(struct ploop_cow *cow,
/* No index wb in process. Prepare a new one */
if (ploop_prepare_bat_update(ploop, page_id, piwb, PIWB_TYPE_ALLOC) < 0)
goto err_resource;
+ ploop_md_make_dirty(ploop, md);
}
clu -= page_id * PAGE_SIZE / sizeof(map_index_t) - PLOOP_MAP_OFFSET;
@@ -1354,6 +1374,9 @@ static bool locate_new_cluster_and_attach_pio(struct ploop *ploop,
goto error;
}
+ if (bat_update_prepared)
+ ploop_md_make_dirty(ploop, md);
+
ploop_attach_end_action(pio, piwb);
attached = true;
out:
@@ -1497,6 +1520,9 @@ static void process_one_discard_pio(struct ploop *ploop, struct pio *pio,
list_add_tail(&pio->list, &piwb->ready_data_pios);
}
kunmap_atomic(to);
+
+ if (bat_update_prepared)
+ ploop_md_make_dirty(ploop, md);
out:
return;
err:
@@ -1524,6 +1550,31 @@ static void process_resubmit_pios(struct ploop *ploop, struct list_head *pios)
}
}
+static void submit_metadata_writeback(struct ploop *ploop)
+{
+ struct md_page *md;
+
+ while (1) {
+ write_lock_irq(&ploop->bat_rwlock);
+ md = list_first_entry_or_null(&ploop->wb_batch_list,
+ struct md_page, wb_link);
+ if (!md) {
+ write_unlock_irq(&ploop->bat_rwlock);
+ break;
+ }
+ list_del_init(&md->wb_link);
+ /* L1L2 mustn't be redirtyed, when wb in-flight! */
+ WARN_ON_ONCE(!(md->status & MD_DIRTY) ||
+ (md->status & MD_WRITEBACK));
+ md->status |= MD_WRITEBACK;
+ md->status &= ~MD_DIRTY;
+ write_unlock_irq(&ploop->bat_rwlock);
+
+ ploop_submit_index_wb_sync(ploop, md->piwb);
+ ploop_reset_bat_update(md->piwb);
+ }
+}
+
void do_ploop_work(struct work_struct *ws)
{
struct ploop *ploop = container_of(ws, struct ploop, worker);
@@ -1555,11 +1606,7 @@ void do_ploop_work(struct work_struct *ws)
process_discard_pios(ploop, &discard_pios, &piwb);
process_delta_wb(ploop, &cow_pios, &piwb);
- if (piwb.page_id != PAGE_NR_NONE) {
- /* Index wb was prepared -- submit and wait it */
- ploop_submit_index_wb_sync(ploop, &piwb);
- ploop_reset_bat_update(&piwb);
- }
+ submit_metadata_writeback(ploop);
current->flags = (current->flags & ~PF_IO_THREAD) | pf_io_thread;
}
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index d4413ff259a5..705f2252e822 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -334,6 +334,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_LIST_HEAD(&ploop->resubmit_pios);
INIT_LIST_HEAD(&ploop->enospc_pios);
INIT_LIST_HEAD(&ploop->cluster_lk_list);
+ INIT_LIST_HEAD(&ploop->wb_batch_list);
ploop->bat_entries = RB_ROOT;
timer_setup(&ploop->enospc_timer, ploop_enospc_timer, 0);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 879d7c5b25e7..babdfdb7f672 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -110,9 +110,14 @@ struct ploop_index_wb {
struct md_page {
struct rb_node node;
unsigned int id; /* Number of this page starting from hdr */
+#define MD_DIRTY (1U << 1) /* Page contains changes and wants writeback */
+#define MD_WRITEBACK (1U << 2) /* Writeback was submitted */
+ unsigned int status;
struct page *page;
u8 *bat_levels;
struct list_head wait_list;
+
+ struct list_head wb_link;
struct ploop_index_wb *piwb;
};
@@ -146,6 +151,8 @@ struct ploop {
unsigned int hb_nr; /* holes_bitmap size in bits */
rwlock_t bat_rwlock;
+ struct list_head wb_batch_list;
+
void *tracking_bitmap;
unsigned int tb_nr; /* tracking_bitmap size in bits */
unsigned int tb_cursor;
More information about the Devel
mailing list