[Devel] [RFC PATCH vz9 v6 14/62] dm-ploop: move preparations of pios into the caller from worker

Alexey Kuznetsov alexey.n.kuznetsov at gmail.com
Fri Dec 13 12:21:17 MSK 2024


Hello!

Frankly, I would seriosly consider dropping off llist. It is just an opinion.

This braindead construct is a lie, it is _not_ lockless at all, it
uses the slowest possible
way of synchronization (cmpxchg) and still it has damn complicated set
of usage rules.
Personally I would never use it. Either crystal clear and not less
efficient dumb construct:

struct lockedlist  {
        spinlock_t      lock;
        struct hlist_head       list;
}

or when I am ready to go berserk, something really lockless, without cmpxchg and
with full *mb() nightmare.


On Fri, Dec 13, 2024 at 4:56 PM Andrey Zhadchenko
<andrey.zhadchenko at virtuozzo.com> wrote:
>
>
>
> On 12/5/24 22:55, Alexander Atanasov wrote:
> > Prepare pios earlier in preparation to try to execute them earlier.
> > Convert more places to use lock less lists.
> >
> > https://virtuozzo.atlassian.net/browse/VSTOR-91820
> > Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> > ---
> >   drivers/md/dm-ploop-map.c | 95 ++++++++++++++++++++++++---------------
> >   1 file changed, 60 insertions(+), 35 deletions(-)
> >
> > diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> > index 8d165bd4fa9d..475494b951a3 100644
> > --- a/drivers/md/dm-ploop-map.c
> > +++ b/drivers/md/dm-ploop-map.c
> > @@ -292,11 +292,15 @@ static struct pio *ploop_split_and_chain_pio(struct ploop *ploop,
> >   ALLOW_ERROR_INJECTION(ploop_split_and_chain_pio, NULL);
> >
> >   static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> > -                          struct list_head *ret_list)
> > +                          struct llist_head *ret_list)
> >   {
> >       u32 clu_size = CLU_SIZE(ploop);
> > +     struct llist_node *pos, *t;
> >       struct pio *split;
> > -     LIST_HEAD(list);
> > +     LLIST_HEAD(llist);
> > +     struct llist_node *lltmp;
> > +
> > +     WARN_ON(!pio->bi_iter.bi_size);
> >
> >       while (1) {
> >               loff_t start = to_bytes(pio->bi_iter.bi_sector);
> > @@ -314,17 +318,24 @@ static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> >               if (!split)
> >                       goto err;
> >
> > -             list_add_tail(&split->list, &list);
> > +             llist_add((struct llist_node *)(&split->list), &llist);
> >       }
> >
> > -     list_splice_tail(&list, ret_list);
> > -     list_add_tail(&pio->list, ret_list);
> > +     INIT_LIST_HEAD(&pio->list);
> > +     llist_add((struct llist_node *)(&pio->list), &llist);
> > +     lltmp = llist_reverse_order(llist_del_all(&llist));
> > +     pio->list.next = NULL;
> > +     llist_add_batch(lltmp, (struct llist_node *)(&pio->list), ret_list);
> > +
> >       return 0;
> >   err:
> > -     while ((pio = ploop_pio_list_pop(&list)) != NULL) {
> > +     llist_for_each_safe(pos, t, llist.first) {
> > +             pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> >               pio->bi_status = BLK_STS_RESOURCE;
> > +             INIT_LIST_HEAD(&pio->list);
> >               ploop_pio_endio(pio);
> >       }
> > +
> >       return -ENOMEM;
> >   }
> >   ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
> > @@ -341,7 +352,7 @@ static void ploop_schedule_work(struct ploop *ploop)
> >
> >   static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
> >   {
> > -     struct llist_head *list = (struct llist_head *)&ploop->pios[pio->queue_list_id];
> > +     struct llist_head *list = &ploop->pios[pio->queue_list_id];
> >
> >       lockdep_assert_not_held(&ploop->deferred_lock);
> >       WARN_ON_ONCE(pio->queue_list_id >= PLOOP_LIST_COUNT);
> > @@ -1622,12 +1633,11 @@ ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
> >
> >   static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> >                                          struct pio *pio,
> > -                                        struct list_head *deferred_pios)
> > +                                        struct llist_head *lldeferred_pios)
> >   {
> >       struct ploop_rq *prq = pio->endio_cb_data;
> >       struct request *rq = prq->rq;
> >       struct bio_vec *bvec = NULL;
> > -     LIST_HEAD(list);
> >       int ret;
> >
> >       if (rq->bio != rq->biotail) {
> > @@ -1646,16 +1656,18 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> >               pio->bi_iter.bi_idx = 0;
> >               pio->bi_iter.bi_bvec_done = 0;
> >       } else {
> > -             /* Single bio already provides bvec array */
> > +             /* Single bio already provides bvec array
> > +              * bvec is updated to the correct on submit
> > +              * it is different after partial IO
> > +              */
> >               bvec = rq->bio->bi_io_vec;
> > -
> >               pio->bi_iter = rq->bio->bi_iter;
> >       }
> >       pio->bi_iter.bi_sector = ploop_rq_pos(ploop, rq);
> >       pio->bi_io_vec = bvec;
> >
> >       pio->queue_list_id = PLOOP_LIST_DEFERRED;
> > -     ret = ploop_split_pio_to_list(ploop, pio, deferred_pios);
> > +     ret = ploop_split_pio_to_list(ploop, pio, lldeferred_pios);
> >       if (ret)
> >               goto err_nomem;
> >
> > @@ -1667,7 +1679,7 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> >
> >   static void ploop_prepare_embedded_pios(struct ploop *ploop,
> >                                       struct llist_node *pios,
> > -                                     struct list_head *deferred_pios)
> > +                                     struct llist_head *deferred_pios)
> >   {
> >       struct pio *pio;
> >       struct llist_node *pos, *t;
> > @@ -1684,12 +1696,17 @@ static void ploop_prepare_embedded_pios(struct ploop *ploop,
> >   }
> >
> >   static void ploop_process_deferred_pios(struct ploop *ploop,
> > -                                     struct list_head *pios)
> > +                                     struct llist_head *pios)
> >   {
> >       struct pio *pio;
> >
> > -     while ((pio = ploop_pio_list_pop(pios)) != NULL)
> > +     struct llist_node *pos, *t;
> > +
> > +     llist_for_each_safe(pos, t, pios->first) {
> > +             pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > +             INIT_LIST_HEAD(&pio->list); /* until type is changed */
> >               ploop_process_one_deferred_bio(ploop, pio);
> > +     }
> >   }
> >
> >   static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
> > @@ -1787,19 +1804,13 @@ static void ploop_submit_metadata_writeback(struct ploop *ploop)
> >       }
> >   }
> >
> > -static void process_ploop_fsync_work(struct ploop *ploop)
> > +static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llflush_pios)
> >   {
> >       struct file *file;
> >       struct pio *pio;
> >       int ret;
> > -     struct llist_node *llflush_pios;
> >       struct llist_node *pos, *t;
> >
> > -     llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> > -
> > -     if (!llflush_pios)
> > -             return;
> > -
> >       file = ploop_top_delta(ploop)->file;
> >       /* All flushes are done as one */
> >       ret = vfs_fsync(file, 0);
> > @@ -1818,38 +1829,39 @@ static void process_ploop_fsync_work(struct ploop *ploop)
> >
> >   void do_ploop_run_work(struct ploop *ploop)
> >   {
> > -     LIST_HEAD(deferred_pios);
> > +     LLIST_HEAD(deferred_pios);
> >       struct llist_node *llembedded_pios;
> >       struct llist_node *lldeferred_pios;
> >       struct llist_node *lldiscard_pios;
> >       struct llist_node *llcow_pios;
> >       struct llist_node *llresubmit;
> > +     struct llist_node *llflush_pios;
> >       unsigned int old_flags = current->flags;
> >
> >       current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
> >
> >       llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
> > -     smp_wmb(); /* */
> > -
> >       lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
> > +     smp_wmb(); /* */
> > +     llresubmit = llist_del_all(&ploop->llresubmit_pios);
> >       lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
> >       llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
> > -     llresubmit = llist_del_all(&ploop->llresubmit_pios);
> >
> >       /* add old deferred back to the list */
> >       if (lldeferred_pios) {
> >               struct llist_node *pos, *t;
> >               struct pio *pio;
> > -
> > +             /* Add one by one we need last for batch add */
> >               llist_for_each_safe(pos, t, lldeferred_pios) {
> > -                     pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > -                     INIT_LIST_HEAD(&pio->list);
> > -                     list_add(&pio->list, &deferred_pios);
> > +                     llist_add(pos, &deferred_pios);
> >               }
> >       }
> >
> >       ploop_prepare_embedded_pios(ploop, llembedded_pios, &deferred_pios);
> >
> > +     llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> > +     smp_wmb(); /* */
>
> For what do we need this memory barrier?
>
> > +
> >       if (llresubmit)
> >               ploop_process_resubmit_pios(ploop, llist_reverse_order(llresubmit));
> >
> > @@ -1863,8 +1875,9 @@ void do_ploop_run_work(struct ploop *ploop)
> >
> >       ploop_submit_metadata_writeback(ploop);
> >
> > -     if (!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]))
> > -             process_ploop_fsync_work(ploop);
> > +     if (llflush_pios)
> > +             process_ploop_fsync_work(ploop, llist_reverse_order(llflush_pios));
> > +
> >       current->flags = old_flags;
> >   }
> >
> > @@ -1913,6 +1926,10 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> >       struct ploop_rq *prq = pio->endio_cb_data;
> >       struct request *rq = prq->rq;
> >       bool queue = true;
> > +     LLIST_HEAD(deferred_pios);
> > +     int ret = 0;
> > +     struct pio *spio, *stmp;
> > +     struct llist_node *pos, *t;
> >
> >       if (blk_rq_bytes(rq)) {
> >               pio->queue_list_id = PLOOP_LIST_PREPARE;
> > @@ -1928,19 +1945,27 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> >       }
> >
> >       ploop_inc_nr_inflight(ploop, pio);
> > -     llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
> > +     ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
> >
> > +     llist_for_each_safe(pos, t, deferred_pios.first) {
> > +             spio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > +             llist_add((struct llist_node *)(&spio->list), &ploop->pios[PLOOP_LIST_DEFERRED]);
> > +     }
> >   out:
> >       if (queue)
> >               ploop_schedule_work(ploop);
> >   }
> >
> > -void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
> > +void ploop_submit_embedded_pios(struct ploop *ploop, struct llist_node *list)
> >   {
> >       struct pio *pio;
> > +     struct llist_node *pos, *t;
> >
> > -     while ((pio = ploop_pio_list_pop(list)) != NULL)
> > +     llist_for_each_safe(pos, t, list) {
> > +             pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > +             INIT_LIST_HEAD(&pio->list);
> >               ploop_submit_embedded_pio(ploop, pio);
> > +     }
> >   }
> >
> >   int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel



More information about the Devel mailing list