[Devel] [PATCH vz9] ploop: fsync after all pios are sent

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Tue Oct 8 14:35:52 MSK 2024



On 10/4/24 09:58, Alexander Atanasov wrote:
> Currently there are two workers one to handle pios,
> one to handle flush (via vfs_fsync). This workers are
> created unbound which means they are run whenever there is a free
> CPU. When ploop sends pios (via ploop_dispatch_pios) it checks
> if there are data and if there are flush pios. If both are
> present both workers are scheduled to run. Which results in
> a lot of writes and sync in parallel - which is slow and incorrect.
> Slow due to the underlaying fs trying to write and sync, instead of
> cache writes and then sync them. And incorrect due to the fact
> that REQ_FLUSH ploop handles must complete after all is send to disk
> which when run in parallel is not guaranteed to happen.

I think we are not expected to meet such strict REQ_FLUSH requirements. 
First of all, flushes can be reordered with other in-flight requests on 
block layer level, so the proper way for the user is to issue flush 
after completion of requests he needs to flush.
And I think we can even get already reordered flushes/writes from layers 
above.

> 
> To address this process flushes after all pending pios are processed
> and submitted and then process any pending flushes.

Could you enlighten me if KIOCB API really does follow submission order? 
Is it possible that some submitted writes can be done after later 
submitted flush?
Also our metadata writeback can trigger cow write, which will only be 
submitted on the next thread iteration.

> 
> https://virtuozzo.atlassian.net/browse/VSTOR-93454
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> ---
>   drivers/md/dm-ploop-map.c | 51 ++++++++++++++++++++++++---------------
>   1 file changed, 31 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index ca2659ba30f5..481ca6556d69 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -369,7 +369,7 @@ void ploop_dispatch_pios(struct ploop *ploop, struct pio *pio,
>   
>   	if (is_data)
>   		queue_work(ploop->wq, &ploop->worker);
> -	if (is_flush)
> +	else if (is_flush)

What does this change?
Do we expect is_flush and is_data at the same time?

>   		queue_work(ploop->wq, &ploop->fsync_worker);
>   }
>   
> @@ -1780,6 +1780,29 @@ static void ploop_submit_metadata_writeback(struct ploop *ploop)
>   	}
>   }
>   
> +static void process_ploop_fsync_work(struct ploop *ploop)
> +{
> +	LIST_HEAD(flush_pios);
> +	struct file *file;
> +	struct pio *pio;
> +	int ret;
> +	spin_lock_irq(&ploop->deferred_lock);
> +	list_splice_init(&ploop->pios[PLOOP_LIST_FLUSH], &flush_pios);
> +	spin_unlock_irq(&ploop->deferred_lock);
> +
> +	file = ploop_top_delta(ploop)->file;
> +	ret = vfs_fsync(file, 0);
> +
> +	while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
> +		if (unlikely(ret)) {
> +			pio->bi_status = errno_to_blk_status(ret);
> +			if (static_branch_unlikely(&ploop_standby_check))
> +				ploop_check_standby_mode(ploop, ret);
> +		}
> +		ploop_pio_endio(pio);
> +	}
> +}
> +
>   void do_ploop_work(struct work_struct *ws)
>   {
>   	struct ploop *ploop = container_of(ws, struct ploop, worker);
> @@ -1788,6 +1811,7 @@ void do_ploop_work(struct work_struct *ws)
>   	LIST_HEAD(discard_pios);
>   	LIST_HEAD(cow_pios);
>   	LIST_HEAD(resubmit_pios);
> +	bool do_fsync = false;
>   	unsigned int old_flags = current->flags;
>   
>   	current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
> @@ -1798,6 +1822,8 @@ void do_ploop_work(struct work_struct *ws)
>   	list_splice_init(&ploop->pios[PLOOP_LIST_DISCARD], &discard_pios);
>   	list_splice_init(&ploop->pios[PLOOP_LIST_COW], &cow_pios);
>   	list_splice_init(&ploop->resubmit_pios, &resubmit_pios);
> +	if (!list_empty(&ploop->pios[PLOOP_LIST_FLUSH]))
> +		do_fsync = true;
>   	spin_unlock_irq(&ploop->deferred_lock);
>   
>   	ploop_prepare_embedded_pios(ploop, &embedded_pios, &deferred_pios);
> @@ -1810,31 +1836,16 @@ void do_ploop_work(struct work_struct *ws)
>   	ploop_submit_metadata_writeback(ploop);
>   
>   	current->flags = old_flags;
> +
> +	if (do_fsync)
> +		process_ploop_fsync_work(ploop);

With this, do we even need ploop->fsync_worker?
Also, if we queue something on fsync_worker, won't it just straight run 
do_ploop_fsync_work() on some event thread on another processor? And 
here the workqueue will likely be already empty when called

>   }
>   
>   void do_ploop_fsync_work(struct work_struct *ws)
>   {
>   	struct ploop *ploop = container_of(ws, struct ploop, fsync_worker);
> -	LIST_HEAD(flush_pios);
> -	struct file *file;
> -	struct pio *pio;
> -	int ret;
>   
> -	spin_lock_irq(&ploop->deferred_lock);
> -	list_splice_init(&ploop->pios[PLOOP_LIST_FLUSH], &flush_pios);
> -	spin_unlock_irq(&ploop->deferred_lock);
> -
> -	file = ploop_top_delta(ploop)->file;
> -	ret = vfs_fsync(file, 0);
> -
> -	while ((pio = ploop_pio_list_pop(&flush_pios)) != NULL) {
> -		if (unlikely(ret)) {
> -			pio->bi_status = errno_to_blk_status(ret);
> -			if (static_branch_unlikely(&ploop_standby_check))
> -				ploop_check_standby_mode(ploop, ret);
> -		}
> -		ploop_pio_endio(pio);
> -	}
> +	process_ploop_fsync_work(ploop);
>   }
>   
>   static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)


More information about the Devel mailing list