[Devel] [PATCH vz9 v1 35/63] dm-ploop: prepare bat updates under bat_lock
Alexander Atanasov
alexander.atanasov at virtuozzo.com
Fri Jan 24 18:36:09 MSK 2025
Prepare for threads. When preparing bat updates there are two important
things to protect - md->status MD_DIRTY bit and holes bitmap.
Use bat_lock to protect them.
https://virtuozzo.atlassian.net/browse/VSTOR-91821
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
drivers/md/dm-ploop-cmd.c | 2 +
drivers/md/dm-ploop-map.c | 100 ++++++++++++++++++++++++++------------
2 files changed, 72 insertions(+), 30 deletions(-)
diff --git a/drivers/md/dm-ploop-cmd.c b/drivers/md/dm-ploop-cmd.c
index dc0be65261f1..84e965855698 100644
--- a/drivers/md/dm-ploop-cmd.c
+++ b/drivers/md/dm-ploop-cmd.c
@@ -315,7 +315,9 @@ static int ploop_grow_relocate_cluster(struct ploop *ploop,
goto out;
}
+ spin_lock_irq(&ploop->bat_lock);
ret = ploop_prepare_reloc_index_wb(ploop, &md, clu, &new_dst);
+ spin_unlock_irq(&ploop->bat_lock);
if (ret < 0) {
PL_ERR("reloc: can't prepare it: %d", ret);
goto out;
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 8b32f559fdb9..129447510033 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -554,16 +554,20 @@ static void ploop_unlink_completed_pio(struct ploop *ploop, struct pio *pio)
static bool ploop_md_make_dirty(struct ploop *ploop, struct md_page *md)
{
- unsigned long flags;
bool new = false;
- spin_lock_irqsave(&ploop->bat_lock, flags);
+ /*
+ * Usual pattern is check for dirty, prepare bat update
+ * and then call make_dirty - to avoid races callers must lock
+ */
+
+ lockdep_assert_held(&ploop->bat_lock);
+
WARN_ON_ONCE(test_bit(MD_WRITEBACK, &md->status));
if (!test_and_set_bit(MD_DIRTY, &md->status)) {
llist_add((struct llist_node *)&md->wb_link, &ploop->wb_batch_llist);
new = true;
}
- spin_unlock_irqrestore(&ploop->bat_lock, flags);
md->dirty_timeout = jiffies + ploop->md_submit_delay_ms*HZ/1000;
return new;
@@ -809,6 +813,9 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
dst_clu = kmap_local_page(piwb->bat_page);
+ /* holes bit map requires bat_lock */
+ spin_lock_irqsave(&ploop->bat_lock, flags);
+ spin_lock(&md->md_lock);
if (piwb->type == PIWB_TYPE_ALLOC)
goto skip_apply;
@@ -832,10 +839,10 @@ static void ploop_advance_local_after_bat_wb(struct ploop *ploop,
WARN_ON_ONCE(!test_bit(MD_WRITEBACK, &md->status));
/* protect piwb */
- spin_lock_irqsave(&md->md_lock, flags); /* write */
clear_bit(MD_WRITEBACK, &md->status);
md->piwb = NULL;
- spin_unlock_irqrestore(&md->md_lock, flags);
+ spin_unlock(&md->md_lock);
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
kunmap_local(dst_clu);
wait_llist_pending = llist_del_all(&md->wait_llist);
@@ -946,6 +953,8 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
map_index_t *to;
u8 level;
+ lockdep_assert_held(&ploop->bat_lock);
+
piwb = kmalloc(sizeof(*piwb), GFP_NOIO);
if (!piwb)
return -ENOMEM;
@@ -959,7 +968,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
bat_entries = md->kmpage;
- spin_lock_irq(&md->md_lock); /* write */
+ spin_lock(&md->md_lock); /* write */
WARN_ON_ONCE(md->piwb);
md->piwb = piwb;
piwb->md = md;
@@ -991,7 +1000,7 @@ static int ploop_prepare_bat_update(struct ploop *ploop, struct md_page *md,
if (clu == BAT_ENTRY_NONE || level < ploop_top_level(ploop))
to[i] = 0;
}
- spin_unlock_irq(&md->md_lock);
+ spin_unlock(&md->md_lock);
if (is_last_page) {
/* Fill tail of page with 0 */
@@ -1081,10 +1090,22 @@ static int ploop_allocate_cluster(struct ploop *ploop, u32 *dst_clu)
u32 clu_size = CLU_SIZE(ploop);
loff_t off, pos, end, old_size;
struct file *file = top->file;
+ unsigned long flags;
int ret;
- if (ploop_find_dst_clu_bit(ploop, dst_clu) < 0)
+ spin_lock_irqsave(&ploop->bat_lock, flags);
+ if (ploop_find_dst_clu_bit(ploop, dst_clu) < 0) {
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
return -EIO;
+ }
+ /*
+ * Mark clu as used. Find & clear bit must be locked,
+ * since this may be called from different threads.
+ * Note, that set_bit may be made from many places.
+ * We only care to clear what find got. parallel set is ok.
+ */
+ ploop_hole_clear_bit(*dst_clu, ploop);
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
pos = CLU_TO_POS(ploop, *dst_clu);
end = pos + clu_size;
@@ -1098,6 +1119,8 @@ static int ploop_allocate_cluster(struct ploop *ploop, u32 *dst_clu)
else
ret = ploop_zero_range(file, pos, off - pos);
if (ret) {
+ ploop_hole_set_bit(*dst_clu, ploop);
+
if (ret != -EOPNOTSUPP)
PL_ERR("punch/zero area: %d", ret);
else if (ploop->falloc_new_clu)
@@ -1112,18 +1135,15 @@ static int ploop_allocate_cluster(struct ploop *ploop, u32 *dst_clu)
if (end > old_size) {
ret = ploop_truncate_prealloc_safe(ploop, top, end, __func__);
- if (ret)
+ if (ret) {
+ ploop_hole_set_bit(*dst_clu, ploop);
return ret;
+ }
}
if (end > top->file_preallocated_area_start)
top->file_preallocated_area_start = end;
- /*
- * Mark clu as used. Find & clear bit is unlocked,
- * since currently this may be called only from deferred
- * kwork. Note, that set_bit may be made from many places.
- */
- ploop_hole_clear_bit(*dst_clu, ploop);
+
return 0;
}
ALLOW_ERROR_INJECTION(ploop_allocate_cluster, ERRNO);
@@ -1433,15 +1453,21 @@ static void ploop_submit_cow_index_wb(struct ploop_cow *cow)
page_id = ploop_bat_clu_to_page_nr(clu);
md = ploop_md_page_find(ploop, page_id);
- if (ploop_delay_if_md_busy(ploop, md, PIWB_TYPE_ALLOC, cow->aux_pio))
+ spin_lock_irqsave(&ploop->bat_lock, flags);
+ /* MD_DIRTY is set and cleared concurrently, do it under bat_lock */
+ if (ploop_delay_if_md_busy(ploop, md, PIWB_TYPE_ALLOC, cow->aux_pio)) {
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
goto out;
+ }
if (!test_bit(MD_DIRTY, &md->status)) {
- /* MD_DIRTY is set and cleared from this work */
- if (ploop_prepare_bat_update(ploop, md, PIWB_TYPE_ALLOC) < 0)
+ if (ploop_prepare_bat_update(ploop, md, PIWB_TYPE_ALLOC) < 0) {
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
goto err_resource;
+ }
ploop_md_make_dirty(ploop, md);
}
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
piwb = md->piwb;
@@ -1532,32 +1558,38 @@ static bool ploop_locate_new_cluster_and_attach_pio(struct ploop *ploop,
bool attached = false;
u32 page_id;
int err;
+ unsigned long flags;
WARN_ON_ONCE(pio->queue_list_id != PLOOP_LIST_DEFERRED);
- if (ploop_delay_if_md_busy(ploop, md, PIWB_TYPE_ALLOC, pio))
+ spin_lock_irqsave(&ploop->bat_lock, flags);
+ if (ploop_delay_if_md_busy(ploop, md, PIWB_TYPE_ALLOC, pio)) {
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
goto out;
+ }
+
if (!test_bit(MD_DIRTY, &md->status)) {
- /* Unlocked since MD_DIRTY is set and cleared from this work */
+ /* Locked since MD_DIRTY is set and cleared concurrently */
page_id = ploop_bat_clu_to_page_nr(clu);
if (ploop_prepare_bat_update(ploop, md, PIWB_TYPE_ALLOC) < 0) {
pio->bi_status = BLK_STS_RESOURCE;
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
goto error;
}
bat_update_prepared = true;
+ ploop_md_make_dirty(ploop, md);
}
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
piwb = md->piwb;
err = ploop_alloc_cluster(ploop, piwb, clu, dst_clu);
if (err) {
pio->bi_status = errno_to_blk_status(err);
+ clear_bit(MD_DIRTY, &md->status);
goto error;
}
- if (bat_update_prepared)
- ploop_md_make_dirty(ploop, md);
-
if (pio->bi_op & REQ_FUA) {
piwb->pio->bi_op |= REQ_FUA;
ploop_attach_end_action(pio, piwb);
@@ -1771,23 +1803,28 @@ static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
struct ploop_index_wb *piwb;
struct md_page *md;
map_index_t *to;
+ unsigned long flags;
WARN_ON(ploop->nr_deltas != 1 ||
pio->queue_list_id != PLOOP_LIST_DISCARD);
page_id = ploop_bat_clu_to_page_nr(clu);
md = ploop_md_page_find(ploop, page_id);
- if (ploop_delay_if_md_busy(ploop, md, PIWB_TYPE_DISCARD, pio))
- goto out;
+ spin_lock_irqsave(&ploop->bat_lock, flags);
+ if (ploop_delay_if_md_busy(ploop, md, PIWB_TYPE_DISCARD, pio)) {
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
+ return;
+ }
if (!test_bit(MD_DIRTY, &md->status)) {
- /* Unlocked since MD_DIRTY is set and cleared from this work */
if (ploop_prepare_bat_update(ploop, md, PIWB_TYPE_DISCARD) < 0) {
pio->bi_status = BLK_STS_RESOURCE;
- goto err;
+ goto err_unlck;
}
bat_update_prepared = true;
+ ploop_md_make_dirty(ploop, md);
}
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
piwb = md->piwb;
@@ -1797,18 +1834,21 @@ static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
to = kmap_local_page(piwb->bat_page);
if (WARN_ON_ONCE(!to[clu])) {
pio->bi_status = BLK_STS_IOERR;
+ clear_bit(MD_DIRTY, &md->status);
goto err;
} else {
WRITE_ONCE(to[clu], 0);
+ spin_lock_irqsave(&piwb->lock, flags);
list_add_tail(&pio->list, &piwb->ready_data_pios);
+ spin_unlock_irqrestore(&piwb->lock, flags);
}
kunmap_local(to);
- if (bat_update_prepared)
- ploop_md_make_dirty(ploop, md);
ploop_md_up_prio(ploop, md);
-out:
+
return;
+err_unlck:
+ spin_unlock_irqrestore(&ploop->bat_lock, flags);
err:
if (bat_update_prepared)
ploop_break_bat_update(ploop, md);
--
2.43.0
More information about the Devel
mailing list