[Devel] [RFC PATCH vz9 v6 14/62] dm-ploop: move preparations of pios into the caller from worker
Andrey Zhadchenko
andrey.zhadchenko at virtuozzo.com
Fri Dec 13 11:52:08 MSK 2024
On 12/5/24 22:55, Alexander Atanasov wrote:
> Prepare pios earlier in preparation to try to execute them earlier.
> Convert more places to use lock less lists.
>
> https://virtuozzo.atlassian.net/browse/VSTOR-91820
> Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> ---
> drivers/md/dm-ploop-map.c | 95 ++++++++++++++++++++++++---------------
> 1 file changed, 60 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 8d165bd4fa9d..475494b951a3 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -292,11 +292,15 @@ static struct pio *ploop_split_and_chain_pio(struct ploop *ploop,
> ALLOW_ERROR_INJECTION(ploop_split_and_chain_pio, NULL);
>
> static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> - struct list_head *ret_list)
> + struct llist_head *ret_list)
> {
> u32 clu_size = CLU_SIZE(ploop);
> + struct llist_node *pos, *t;
> struct pio *split;
> - LIST_HEAD(list);
> + LLIST_HEAD(llist);
> + struct llist_node *lltmp;
> +
> + WARN_ON(!pio->bi_iter.bi_size);
>
> while (1) {
> loff_t start = to_bytes(pio->bi_iter.bi_sector);
> @@ -314,17 +318,24 @@ static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> if (!split)
> goto err;
>
> - list_add_tail(&split->list, &list);
> + llist_add((struct llist_node *)(&split->list), &llist);
> }
>
> - list_splice_tail(&list, ret_list);
> - list_add_tail(&pio->list, ret_list);
> + INIT_LIST_HEAD(&pio->list);
> + llist_add((struct llist_node *)(&pio->list), &llist);
> + lltmp = llist_reverse_order(llist_del_all(&llist));
> + pio->list.next = NULL;
> + llist_add_batch(lltmp, (struct llist_node *)(&pio->list), ret_list);
> +
> return 0;
> err:
> - while ((pio = ploop_pio_list_pop(&list)) != NULL) {
> + llist_for_each_safe(pos, t, llist.first) {
> + pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> pio->bi_status = BLK_STS_RESOURCE;
> + INIT_LIST_HEAD(&pio->list);
> ploop_pio_endio(pio);
> }
> +
> return -ENOMEM;
> }
> ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
> @@ -341,7 +352,7 @@ static void ploop_schedule_work(struct ploop *ploop)
>
> static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
> {
> - struct llist_head *list = (struct llist_head *)&ploop->pios[pio->queue_list_id];
> + struct llist_head *list = &ploop->pios[pio->queue_list_id];
>
> lockdep_assert_not_held(&ploop->deferred_lock);
> WARN_ON_ONCE(pio->queue_list_id >= PLOOP_LIST_COUNT);
> @@ -1622,12 +1633,11 @@ ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
>
> static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> struct pio *pio,
> - struct list_head *deferred_pios)
> + struct llist_head *lldeferred_pios)
> {
> struct ploop_rq *prq = pio->endio_cb_data;
> struct request *rq = prq->rq;
> struct bio_vec *bvec = NULL;
> - LIST_HEAD(list);
> int ret;
>
> if (rq->bio != rq->biotail) {
> @@ -1646,16 +1656,18 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> pio->bi_iter.bi_idx = 0;
> pio->bi_iter.bi_bvec_done = 0;
> } else {
> - /* Single bio already provides bvec array */
> + /* Single bio already provides bvec array
> + * bvec is updated to the correct on submit
> + * it is different after partial IO
> + */
> bvec = rq->bio->bi_io_vec;
> -
> pio->bi_iter = rq->bio->bi_iter;
> }
> pio->bi_iter.bi_sector = ploop_rq_pos(ploop, rq);
> pio->bi_io_vec = bvec;
>
> pio->queue_list_id = PLOOP_LIST_DEFERRED;
> - ret = ploop_split_pio_to_list(ploop, pio, deferred_pios);
> + ret = ploop_split_pio_to_list(ploop, pio, lldeferred_pios);
> if (ret)
> goto err_nomem;
>
> @@ -1667,7 +1679,7 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
>
> static void ploop_prepare_embedded_pios(struct ploop *ploop,
> struct llist_node *pios,
> - struct list_head *deferred_pios)
> + struct llist_head *deferred_pios)
> {
> struct pio *pio;
> struct llist_node *pos, *t;
> @@ -1684,12 +1696,17 @@ static void ploop_prepare_embedded_pios(struct ploop *ploop,
> }
>
> static void ploop_process_deferred_pios(struct ploop *ploop,
> - struct list_head *pios)
> + struct llist_head *pios)
> {
> struct pio *pio;
>
> - while ((pio = ploop_pio_list_pop(pios)) != NULL)
> + struct llist_node *pos, *t;
> +
> + llist_for_each_safe(pos, t, pios->first) {
> + pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> + INIT_LIST_HEAD(&pio->list); /* until type is changed */
> ploop_process_one_deferred_bio(ploop, pio);
> + }
> }
>
> static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
> @@ -1787,19 +1804,13 @@ static void ploop_submit_metadata_writeback(struct ploop *ploop)
> }
> }
>
> -static void process_ploop_fsync_work(struct ploop *ploop)
> +static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llflush_pios)
> {
> struct file *file;
> struct pio *pio;
> int ret;
> - struct llist_node *llflush_pios;
> struct llist_node *pos, *t;
>
> - llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> -
> - if (!llflush_pios)
> - return;
> -
> file = ploop_top_delta(ploop)->file;
> /* All flushes are done as one */
> ret = vfs_fsync(file, 0);
> @@ -1818,38 +1829,39 @@ static void process_ploop_fsync_work(struct ploop *ploop)
>
> void do_ploop_run_work(struct ploop *ploop)
> {
> - LIST_HEAD(deferred_pios);
> + LLIST_HEAD(deferred_pios);
> struct llist_node *llembedded_pios;
> struct llist_node *lldeferred_pios;
> struct llist_node *lldiscard_pios;
> struct llist_node *llcow_pios;
> struct llist_node *llresubmit;
> + struct llist_node *llflush_pios;
> unsigned int old_flags = current->flags;
>
> current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
>
> llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
> - smp_wmb(); /* */
> -
> lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
> + smp_wmb(); /* */
> + llresubmit = llist_del_all(&ploop->llresubmit_pios);
> lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
> llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
> - llresubmit = llist_del_all(&ploop->llresubmit_pios);
>
> /* add old deferred back to the list */
> if (lldeferred_pios) {
> struct llist_node *pos, *t;
> struct pio *pio;
> -
> + /* Add one by one we need last for batch add */
> llist_for_each_safe(pos, t, lldeferred_pios) {
> - pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> - INIT_LIST_HEAD(&pio->list);
> - list_add(&pio->list, &deferred_pios);
> + llist_add(pos, &deferred_pios);
> }
> }
>
> ploop_prepare_embedded_pios(ploop, llembedded_pios, &deferred_pios);
>
> + llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> + smp_wmb(); /* */
For what do we need this memory barrier?
> +
> if (llresubmit)
> ploop_process_resubmit_pios(ploop, llist_reverse_order(llresubmit));
>
> @@ -1863,8 +1875,9 @@ void do_ploop_run_work(struct ploop *ploop)
>
> ploop_submit_metadata_writeback(ploop);
>
> - if (!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]))
> - process_ploop_fsync_work(ploop);
> + if (llflush_pios)
> + process_ploop_fsync_work(ploop, llist_reverse_order(llflush_pios));
> +
> current->flags = old_flags;
> }
>
> @@ -1913,6 +1926,10 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> struct ploop_rq *prq = pio->endio_cb_data;
> struct request *rq = prq->rq;
> bool queue = true;
> + LLIST_HEAD(deferred_pios);
> + int ret = 0;
> + struct pio *spio, *stmp;
> + struct llist_node *pos, *t;
>
> if (blk_rq_bytes(rq)) {
> pio->queue_list_id = PLOOP_LIST_PREPARE;
> @@ -1928,19 +1945,27 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> }
>
> ploop_inc_nr_inflight(ploop, pio);
> - llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
> + ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
>
> + llist_for_each_safe(pos, t, deferred_pios.first) {
> + spio = list_entry((struct list_head *)pos, typeof(*pio), list);
> + llist_add((struct llist_node *)(&spio->list), &ploop->pios[PLOOP_LIST_DEFERRED]);
> + }
> out:
> if (queue)
> ploop_schedule_work(ploop);
> }
>
> -void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
> +void ploop_submit_embedded_pios(struct ploop *ploop, struct llist_node *list)
> {
> struct pio *pio;
> + struct llist_node *pos, *t;
>
> - while ((pio = ploop_pio_list_pop(list)) != NULL)
> + llist_for_each_safe(pos, t, list) {
> + pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> + INIT_LIST_HEAD(&pio->list);
> ploop_submit_embedded_pio(ploop, pio);
> + }
> }
>
> int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
More information about the Devel
mailing list