[Devel] [RFC PATCH vz9 v6 15/62] dm-ploop: fast path execution for reads

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Fri Dec 13 15:48:04 MSK 2024


Honestly I do not see any benefit in changes within 
ploop_process_one_deferred_bio().
For now the fast path is supposed to work only with reads. Reads can be 
either zero, top or some bottom delta.
I think implementing small function to handle these cases is better than 
adding many returns which no one uses

On 12/5/24 22:55, Alexander Atanasov wrote:
> After a pio is split and prepared try to execute if immediately
> without going to the worker thread.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-91820
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> ---
>   drivers/md/dm-ploop-map.c | 95 ++++++++++++++++++++++++++++-----------
>   1 file changed, 70 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 475494b951a3..91941658c339 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -606,7 +606,7 @@ static int ploop_zero_range(struct file *file, loff_t pos, loff_t len)
>   			     pos, len);
>   }
>   
> -static void ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio,
> +static int ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio,
>   			       u32 clu, u32 dst_clu)
>   {
>   	struct pio *inflight_h;
> @@ -623,12 +623,12 @@ static void ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio,
>   		 * this implementing REQ_OP_WRITE_ZEROES etc.
>   		 */
>   		ploop_pio_endio(pio);
> -		return;
> +		return 1;
>   	}
>   
>   	if (!ploop_cluster_is_in_top_delta(ploop, clu)) {
>   		ploop_pio_endio(pio);
> -		return;
> +		return 1;
>   	}
>   
>   	/* We can't end with EOPNOTSUPP, since blk-mq prints error */
> @@ -645,7 +645,7 @@ static void ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio,
>   		/* @pio will be requeued on inflight_h's pio end */
>   		pr_err_once(PL_FMT("delayed discard: device is used as raw?"),
>   			ploop_device_name(ploop));
> -		return;
> +		return 0;
>   	}
>   
>   	ploop_add_cluster_lk(ploop, pio, clu);
> @@ -660,10 +660,11 @@ static void ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio,
>   		if (ret)
>   			pio->bi_status = errno_to_blk_status(ret);
>   		ploop_pio_endio(pio);
> -		return;
> +		return 1;
>   	}
>   
>   	ploop_queue_discard_index_wb(ploop, pio);
> +	return 0;
>   }
>   
>   static void ploop_discard_index_pio_end(struct ploop *ploop, struct pio *pio)
> @@ -1255,17 +1256,18 @@ void ploop_map_and_submit_rw(struct ploop *ploop, u32 dst_clu,
>   	ploop_submit_rw_mapped(ploop, pio);
>   }
>   
> -static void ploop_initiate_delta_read(struct ploop *ploop, unsigned int level,
> +static int ploop_initiate_delta_read(struct ploop *ploop, unsigned int level,
>   				      u32 dst_clu, struct pio *pio)
>   {
>   	if (dst_clu == BAT_ENTRY_NONE) {
>   		/* No one delta contains dst_clu. */
>   		ploop_zero_fill_pio(pio);
>   		ploop_pio_endio(pio);
> -		return;
> +		return 1;
>   	}
>   
>   	ploop_map_and_submit_rw(ploop, dst_clu, pio, level);
> +	return 0;
>   }
>   
>   static void ploop_cow_endio(struct pio *aux_pio, void *data,
> @@ -1326,14 +1328,15 @@ static int ploop_submit_cluster_cow(struct ploop *ploop, unsigned int level,
>   	return -ENOMEM;
>   }
>   
> -static void ploop_initiate_cluster_cow(struct ploop *ploop, unsigned int level,
> +static int ploop_initiate_cluster_cow(struct ploop *ploop, unsigned int level,
>   				       u32 clu, u32 dst_clu, struct pio *pio)
>   {
>   	if (!ploop_submit_cluster_cow(ploop, level, clu, dst_clu, pio))
> -		return;
> +		return 0;
>   
>   	pio->bi_status = BLK_STS_RESOURCE;
>   	ploop_pio_endio(pio);
> +	return 1;
>   }
>   ALLOW_ERROR_INJECTION(ploop_submit_cluster_cow, ERRNO);
>   
> @@ -1507,23 +1510,26 @@ static int ploop_process_one_deferred_bio(struct ploop *ploop, struct pio *pio)
>   	struct md_page *md;
>   	u32 clu, dst_clu;
>   	u8 level;
> -	bool ret;
> +	int ret;
>   
>   	clu = SEC_TO_CLU(ploop, sector);
> +	/* Pio is put into endio_list */
>   	if (ploop_postpone_if_cluster_locked(ploop, pio, clu))
> -		goto out;
> +		goto handled;
>   
>   	dst_clu = ploop_bat_entries(ploop, clu, &level, &md);
>   	if (op_is_discard(pio->bi_op)) {
>   		/* FIXME: check there is no parallel alloc */
> -		ploop_handle_discard_pio(ploop, pio, clu, dst_clu);
> -		goto out;
> +		ret = ploop_handle_discard_pio(ploop, pio, clu, dst_clu);
> +		if (ret)
> +			goto handled;
> +		goto executed;
>   	}
>   
>   	if (ploop_cluster_is_in_top_delta(ploop, clu)) {
>   		/* Already mapped */
>   		if (ploop_pio_endio_if_merge_fake_pio(pio))
> -			goto out;
> +			goto handled;
>   		goto queue;
>   	} else if (!op_is_write(pio->bi_op)) {
>   		/*
> @@ -1531,7 +1537,7 @@ static int ploop_process_one_deferred_bio(struct ploop *ploop, struct pio *pio)
>   		 * (Also handles the case dst_clu == BAT_ENTRY_NONE).
>   		 */
>   		ploop_initiate_delta_read(ploop, level, dst_clu, pio);
> -		goto out;
> +		goto executed;
>   	} else if (dst_clu != BAT_ENTRY_NONE) {
>   		/*
>   		 * Read secondary delta and write to top delta. May fail.
> @@ -1539,23 +1545,27 @@ static int ploop_process_one_deferred_bio(struct ploop *ploop, struct pio *pio)
>   		 * a lot of other corner cases, but we don't do that as
>   		 * snapshots are used and COW occurs very rare.
>   		 */
> -		ploop_initiate_cluster_cow(ploop, level, clu, dst_clu, pio);
> -		goto out;
> +		if (!ploop_initiate_cluster_cow(ploop, level, clu, dst_clu, pio))
> +			goto executed;
> +		goto handled;
>   	}
>   
> +	/* pio is completed on success */
>   	if (unlikely(ploop_pio_endio_if_all_zeros(pio)))
> -		goto out;
> +		goto handled;
>   
>   	/* Cluster exists nowhere. Allocate it and setup pio as outrunning */
>   	ret = ploop_locate_new_cluster_and_attach_pio(ploop, md, clu,
>   						      &dst_clu, pio);
>   	if (!ret)
> -		goto out;
> -queue:
> +		goto executed;
> +queue:		/* pio needs to go to the queue */
>   	ploop_link_submitting_pio(ploop, pio, clu);
> -
>   	ploop_map_and_submit_rw(ploop, dst_clu, pio, ploop_top_level(ploop));
> -out:
> +	// return 1;

I guess some leftovers?
Actually this function will never return 1.

> +executed:	/* pio op started  */
> +	return 2;
> +handled:	/* do not touch pio is completed */
>   	return 0;
>   }
>   
> @@ -1945,12 +1955,47 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
>   	}
>   
>   	ploop_inc_nr_inflight(ploop, pio);
> +
> +	if (pio->queue_list_id == PLOOP_LIST_FLUSH) {
> +		/* Let the FLUSH go last from the queue , do not run here to preserve ordering */
> +		llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
> +		goto out;
> +	}
> +
>   	ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
> +	/* check if result is a single pio , so we can try fast path */
> +	if (deferred_pios.first && deferred_pios.first->next == NULL) {
> +		spio = list_entry((struct list_head *)deferred_pios.first, typeof(*spio), list);
> +		INIT_LIST_HEAD(&spio->list);
> +		if (req_op(rq) == REQ_OP_READ) {
> +			unsigned int old_flags = current->flags;
> +
> +			current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
> +			ret = ploop_process_one_deferred_bio(ploop, spio);
> +			current->flags = old_flags;
> +			if (ret == 1) {

I do not think ploop_process_one_deferred_bio() can return 1

> +				/* not queued add back to deferreed*/
> +				llist_add((struct llist_node *)(&spio->list),
> +					  &ploop->pios[PLOOP_LIST_DEFERRED]);
> +			} else {
> +				queue = false;
> +			}
> +
> +		} else { /* OP which can not go thru fast path */
> +			INIT_LIST_HEAD(&spio->list);
> +			llist_add((struct llist_node *)(&spio->list),
> +				  &ploop->pios[PLOOP_LIST_DEFERRED]);
> +		}
> +	} else {
> +		struct llist_node *pos, *t;
>   
> -	llist_for_each_safe(pos, t, deferred_pios.first) {
> -		spio = list_entry((struct list_head *)pos, typeof(*pio), list);
> -		llist_add((struct llist_node *)(&spio->list), &ploop->pios[PLOOP_LIST_DEFERRED]);
> +		llist_for_each_safe(pos, t, deferred_pios.first) {
> +			spio = list_entry((struct list_head *)pos, typeof(*pio), list);
> +			llist_add((struct llist_node *)(&spio->list),
> +				  &ploop->pios[PLOOP_LIST_DEFERRED]);
> +		}
>   	}
> +
>   out:
>   	if (queue)
>   		ploop_schedule_work(ploop);


More information about the Devel mailing list