[Devel] [RFC PATCH vz9 v6 14/62] dm-ploop: move preparations of pios into the caller from worker

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Fri Dec 13 11:52:08 MSK 2024



On 12/5/24 22:55, Alexander Atanasov wrote:
> Prepare pios earlier in preparation to try to execute them earlier.
> Convert more places to use lock less lists.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-91820
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> ---
>   drivers/md/dm-ploop-map.c | 95 ++++++++++++++++++++++++---------------
>   1 file changed, 60 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 8d165bd4fa9d..475494b951a3 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -292,11 +292,15 @@ static struct pio *ploop_split_and_chain_pio(struct ploop *ploop,
>   ALLOW_ERROR_INJECTION(ploop_split_and_chain_pio, NULL);
>   
>   static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> -			     struct list_head *ret_list)
> +			     struct llist_head *ret_list)
>   {
>   	u32 clu_size = CLU_SIZE(ploop);
> +	struct llist_node *pos, *t;
>   	struct pio *split;
> -	LIST_HEAD(list);
> +	LLIST_HEAD(llist);
> +	struct llist_node *lltmp;
> +
> +	WARN_ON(!pio->bi_iter.bi_size);
>   
>   	while (1) {
>   		loff_t start = to_bytes(pio->bi_iter.bi_sector);
> @@ -314,17 +318,24 @@ static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
>   		if (!split)
>   			goto err;
>   
> -		list_add_tail(&split->list, &list);
> +		llist_add((struct llist_node *)(&split->list), &llist);
>   	}
>   
> -	list_splice_tail(&list, ret_list);
> -	list_add_tail(&pio->list, ret_list);
> +	INIT_LIST_HEAD(&pio->list);
> +	llist_add((struct llist_node *)(&pio->list), &llist);
> +	lltmp = llist_reverse_order(llist_del_all(&llist));
> +	pio->list.next = NULL;
> +	llist_add_batch(lltmp, (struct llist_node *)(&pio->list), ret_list);
> +
>   	return 0;
>   err:
> -	while ((pio = ploop_pio_list_pop(&list)) != NULL) {
> +	llist_for_each_safe(pos, t, llist.first) {
> +		pio = list_entry((struct list_head *)pos, typeof(*pio), list);
>   		pio->bi_status = BLK_STS_RESOURCE;
> +		INIT_LIST_HEAD(&pio->list);
>   		ploop_pio_endio(pio);
>   	}
> +
>   	return -ENOMEM;
>   }
>   ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
> @@ -341,7 +352,7 @@ static void ploop_schedule_work(struct ploop *ploop)
>   
>   static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
>   {
> -	struct llist_head *list = (struct llist_head *)&ploop->pios[pio->queue_list_id];
> +	struct llist_head *list = &ploop->pios[pio->queue_list_id];
>   
>   	lockdep_assert_not_held(&ploop->deferred_lock);
>   	WARN_ON_ONCE(pio->queue_list_id >= PLOOP_LIST_COUNT);
> @@ -1622,12 +1633,11 @@ ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
>   
>   static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
>   					   struct pio *pio,
> -					   struct list_head *deferred_pios)
> +					   struct llist_head *lldeferred_pios)
>   {
>   	struct ploop_rq *prq = pio->endio_cb_data;
>   	struct request *rq = prq->rq;
>   	struct bio_vec *bvec = NULL;
> -	LIST_HEAD(list);
>   	int ret;
>   
>   	if (rq->bio != rq->biotail) {
> @@ -1646,16 +1656,18 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
>   		pio->bi_iter.bi_idx = 0;
>   		pio->bi_iter.bi_bvec_done = 0;
>   	} else {
> -		/* Single bio already provides bvec array */
> +		/* Single bio already provides bvec array
> +		 * bvec is updated to the correct on submit
> +		 * it is different after partial IO
> +		 */
>   		bvec = rq->bio->bi_io_vec;
> -
>   		pio->bi_iter = rq->bio->bi_iter;
>   	}
>   	pio->bi_iter.bi_sector = ploop_rq_pos(ploop, rq);
>   	pio->bi_io_vec = bvec;
>   
>   	pio->queue_list_id = PLOOP_LIST_DEFERRED;
> -	ret = ploop_split_pio_to_list(ploop, pio, deferred_pios);
> +	ret = ploop_split_pio_to_list(ploop, pio, lldeferred_pios);
>   	if (ret)
>   		goto err_nomem;
>   
> @@ -1667,7 +1679,7 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
>   
>   static void ploop_prepare_embedded_pios(struct ploop *ploop,
>   					struct llist_node *pios,
> -					struct list_head *deferred_pios)
> +					struct llist_head *deferred_pios)
>   {
>   	struct pio *pio;
>   	struct llist_node *pos, *t;
> @@ -1684,12 +1696,17 @@ static void ploop_prepare_embedded_pios(struct ploop *ploop,
>   }
>   
>   static void ploop_process_deferred_pios(struct ploop *ploop,
> -					struct list_head *pios)
> +					struct llist_head *pios)
>   {
>   	struct pio *pio;
>   
> -	while ((pio = ploop_pio_list_pop(pios)) != NULL)
> +	struct llist_node *pos, *t;
> +
> +	llist_for_each_safe(pos, t, pios->first) {
> +		pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> +		INIT_LIST_HEAD(&pio->list); /* until type is changed */
>   		ploop_process_one_deferred_bio(ploop, pio);
> +	}
>   }
>   
>   static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
> @@ -1787,19 +1804,13 @@ static void ploop_submit_metadata_writeback(struct ploop *ploop)
>   	}
>   }
>   
> -static void process_ploop_fsync_work(struct ploop *ploop)
> +static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llflush_pios)
>   {
>   	struct file *file;
>   	struct pio *pio;
>   	int ret;
> -	struct llist_node *llflush_pios;
>   	struct llist_node *pos, *t;
>   
> -	llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> -
> -	if (!llflush_pios)
> -		return;
> -
>   	file = ploop_top_delta(ploop)->file;
>   	/* All flushes are done as one */
>   	ret = vfs_fsync(file, 0);
> @@ -1818,38 +1829,39 @@ static void process_ploop_fsync_work(struct ploop *ploop)
>   
>   void do_ploop_run_work(struct ploop *ploop)
>   {
> -	LIST_HEAD(deferred_pios);
> +	LLIST_HEAD(deferred_pios);
>   	struct llist_node *llembedded_pios;
>   	struct llist_node *lldeferred_pios;
>   	struct llist_node *lldiscard_pios;
>   	struct llist_node *llcow_pios;
>   	struct llist_node *llresubmit;
> +	struct llist_node *llflush_pios;
>   	unsigned int old_flags = current->flags;
>   
>   	current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
>   
>   	llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
> -	smp_wmb(); /* */
> -
>   	lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
> +	smp_wmb(); /* */
> +	llresubmit = llist_del_all(&ploop->llresubmit_pios);
>   	lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
>   	llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
> -	llresubmit = llist_del_all(&ploop->llresubmit_pios);
>   
>   	/* add old deferred back to the list */
>   	if (lldeferred_pios) {
>   		struct llist_node *pos, *t;
>   		struct pio *pio;
> -
> +		/* Add one by one we need last for batch add */
>   		llist_for_each_safe(pos, t, lldeferred_pios) {
> -			pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> -			INIT_LIST_HEAD(&pio->list);
> -			list_add(&pio->list, &deferred_pios);
> +			llist_add(pos, &deferred_pios);
>   		}
>   	}
>   
>   	ploop_prepare_embedded_pios(ploop, llembedded_pios, &deferred_pios);
>   
> +	llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> +	smp_wmb(); /* */

For what do we need this memory barrier?

> +
>   	if (llresubmit)
>   		ploop_process_resubmit_pios(ploop, llist_reverse_order(llresubmit));
>   
> @@ -1863,8 +1875,9 @@ void do_ploop_run_work(struct ploop *ploop)
>   
>   	ploop_submit_metadata_writeback(ploop);
>   
> -	if (!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]))
> -		process_ploop_fsync_work(ploop);
> +	if (llflush_pios)
> +		process_ploop_fsync_work(ploop, llist_reverse_order(llflush_pios));
> +
>   	current->flags = old_flags;
>   }
>   
> @@ -1913,6 +1926,10 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
>   	struct ploop_rq *prq = pio->endio_cb_data;
>   	struct request *rq = prq->rq;
>   	bool queue = true;
> +	LLIST_HEAD(deferred_pios);
> +	int ret = 0;
> +	struct pio *spio, *stmp;
> +	struct llist_node *pos, *t;
>   
>   	if (blk_rq_bytes(rq)) {
>   		pio->queue_list_id = PLOOP_LIST_PREPARE;
> @@ -1928,19 +1945,27 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
>   	}
>   
>   	ploop_inc_nr_inflight(ploop, pio);
> -	llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
> +	ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
>   
> +	llist_for_each_safe(pos, t, deferred_pios.first) {
> +		spio = list_entry((struct list_head *)pos, typeof(*pio), list);
> +		llist_add((struct llist_node *)(&spio->list), &ploop->pios[PLOOP_LIST_DEFERRED]);
> +	}
>   out:
>   	if (queue)
>   		ploop_schedule_work(ploop);
>   }
>   
> -void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
> +void ploop_submit_embedded_pios(struct ploop *ploop, struct llist_node *list)
>   {
>   	struct pio *pio;
> +	struct llist_node *pos, *t;
>   
> -	while ((pio = ploop_pio_list_pop(list)) != NULL)
> +	llist_for_each_safe(pos, t, list) {
> +		pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> +		INIT_LIST_HEAD(&pio->list);
>   		ploop_submit_embedded_pio(ploop, pio);
> +	}
>   }
>   
>   int ploop_clone_and_map(struct dm_target *ti, struct request *rq,


More information about the Devel mailing list