[Devel] [RFC PATCH vz9 v3 03/11] ploop: fsync after all pios are sent
Alexander Atanasov
alexander.atanasov at virtuozzo.com
Mon Oct 21 13:13:50 MSK 2024
Currently there are two workers one to handle pios,
one to handle flush (via vfs_fsync). This workers are
created unbound which means they are run whenever there is a free
CPU. When ploop sends pios (via ploop_dispatch_pios) it checks
if there are data and if there are flush pios. If both are
present both workers are scheduled to run. Which results in
a lot of writes and sync in parallel - which is slow and incorrect.
Slow due to the underlaying fs trying to write and sync, instead of
cache writes and then sync them. And incorrect due to the fact
that REQ_FLUSH ploop handles must complete after all is send to disk
which when run in parallel is not guaranteed to happen.
To address this process flushes after all pending pios are processed
and submitted and then process any pending flushes.
https://virtuozzo.atlassian.net/browse/VSTOR-93454
Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
---
drivers/md/dm-ploop-map.c | 51 ++++++++++++++++++++++++---------------
1 file changed, 31 insertions(+), 20 deletions(-)
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index edfa44b7214a..0036df692689 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -369,7 +369,7 @@ void ploop_dispatch_pios(struct ploop *ploop, struct pio *pio,
if (is_data)
queue_work(ploop->wq, &ploop->worker);
- if (is_flush)
+ else if (is_flush)
queue_work(ploop->wq, &ploop->fsync_worker);
}
@@ -1780,6 +1780,29 @@ static void ploop_submit_metadata_writeback(struct ploop *ploop)
}
}
+static void process_ploop_fsync_work(struct ploop *ploop)
+{
+ LIST_HEAD(flush_pios);
+ struct file *file;
+ struct pio *pio;
+ int ret;
+ spin_lock_irq(&ploop->deferred_lock);
+ list_splice_init(&ploop->pios[PLOOP_LIST_FLUSH], &flush_pios);
+ spin_unlock_irq(&ploop->deferred_lock);
+
+ file = ploop_top_delta(ploop)->file;
+ ret = vfs_fsync(file, 0);
+
+ while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
+ if (unlikely(ret)) {
+ pio->bi_status = errno_to_blk_status(ret);
+ if (static_branch_unlikely(&ploop_standby_check))
+ ploop_check_standby_mode(ploop, ret);
+ }
+ ploop_pio_endio(pio);
+ }
+}
+
void do_ploop_work(struct work_struct *ws)
{
struct ploop *ploop = container_of(ws, struct ploop, worker);
@@ -1788,6 +1811,7 @@ void do_ploop_work(struct work_struct *ws)
LIST_HEAD(discard_pios);
LIST_HEAD(cow_pios);
LIST_HEAD(resubmit_pios);
+ bool do_fsync = false;
unsigned int old_flags = current->flags;
current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
@@ -1798,6 +1822,8 @@ void do_ploop_work(struct work_struct *ws)
list_splice_init(&ploop->pios[PLOOP_LIST_DISCARD], &discard_pios);
list_splice_init(&ploop->pios[PLOOP_LIST_COW], &cow_pios);
list_splice_init(&ploop->resubmit_pios, &resubmit_pios);
+ if (!list_empty(&ploop->pios[PLOOP_LIST_FLUSH]))
+ do_fsync = true;
spin_unlock_irq(&ploop->deferred_lock);
ploop_prepare_embedded_pios(ploop, &embedded_pios, &deferred_pios);
@@ -1810,31 +1836,16 @@ void do_ploop_work(struct work_struct *ws)
ploop_submit_metadata_writeback(ploop);
current->flags = old_flags;
+
+ if (do_fsync)
+ process_ploop_fsync_work(ploop);
}
void do_ploop_fsync_work(struct work_struct *ws)
{
struct ploop *ploop = container_of(ws, struct ploop, fsync_worker);
- LIST_HEAD(flush_pios);
- struct file *file;
- struct pio *pio;
- int ret;
- spin_lock_irq(&ploop->deferred_lock);
- list_splice_init(&ploop->pios[PLOOP_LIST_FLUSH], &flush_pios);
- spin_unlock_irq(&ploop->deferred_lock);
-
- file = ploop_top_delta(ploop)->file;
- ret = vfs_fsync(file, 0);
-
- while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
- if (unlikely(ret)) {
- pio->bi_status = errno_to_blk_status(ret);
- if (static_branch_unlikely(&ploop_standby_check))
- ploop_check_standby_mode(ploop, ret);
- }
- ploop_pio_endio(pio);
- }
+ process_ploop_fsync_work(ploop);
}
static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
--
2.43.0
More information about the Devel
mailing list