[Devel] [RFC PATCH vz9 v6 14/62] dm-ploop: move preparations of pios into the caller from worker
Alexey Kuznetsov
alexey.n.kuznetsov at gmail.com
Fri Dec 13 12:21:17 MSK 2024
Hello!
Frankly, I would seriosly consider dropping off llist. It is just an opinion.
This braindead construct is a lie, it is _not_ lockless at all, it
uses the slowest possible
way of synchronization (cmpxchg) and still it has damn complicated set
of usage rules.
Personally I would never use it. Either crystal clear and not less
efficient dumb construct:
struct lockedlist {
spinlock_t lock;
struct hlist_head list;
}
or when I am ready to go berserk, something really lockless, without cmpxchg and
with full *mb() nightmare.
On Fri, Dec 13, 2024 at 4:56 PM Andrey Zhadchenko
<andrey.zhadchenko at virtuozzo.com> wrote:
>
>
>
> On 12/5/24 22:55, Alexander Atanasov wrote:
> > Prepare pios earlier in preparation to try to execute them earlier.
> > Convert more places to use lock less lists.
> >
> > https://virtuozzo.atlassian.net/browse/VSTOR-91820
> > Signed-off-by: Alexander Atanasov <alexander.atanasov at virtuozzo.com>
> > ---
> > drivers/md/dm-ploop-map.c | 95 ++++++++++++++++++++++++---------------
> > 1 file changed, 60 insertions(+), 35 deletions(-)
> >
> > diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> > index 8d165bd4fa9d..475494b951a3 100644
> > --- a/drivers/md/dm-ploop-map.c
> > +++ b/drivers/md/dm-ploop-map.c
> > @@ -292,11 +292,15 @@ static struct pio *ploop_split_and_chain_pio(struct ploop *ploop,
> > ALLOW_ERROR_INJECTION(ploop_split_and_chain_pio, NULL);
> >
> > static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> > - struct list_head *ret_list)
> > + struct llist_head *ret_list)
> > {
> > u32 clu_size = CLU_SIZE(ploop);
> > + struct llist_node *pos, *t;
> > struct pio *split;
> > - LIST_HEAD(list);
> > + LLIST_HEAD(llist);
> > + struct llist_node *lltmp;
> > +
> > + WARN_ON(!pio->bi_iter.bi_size);
> >
> > while (1) {
> > loff_t start = to_bytes(pio->bi_iter.bi_sector);
> > @@ -314,17 +318,24 @@ static int ploop_split_pio_to_list(struct ploop *ploop, struct pio *pio,
> > if (!split)
> > goto err;
> >
> > - list_add_tail(&split->list, &list);
> > + llist_add((struct llist_node *)(&split->list), &llist);
> > }
> >
> > - list_splice_tail(&list, ret_list);
> > - list_add_tail(&pio->list, ret_list);
> > + INIT_LIST_HEAD(&pio->list);
> > + llist_add((struct llist_node *)(&pio->list), &llist);
> > + lltmp = llist_reverse_order(llist_del_all(&llist));
> > + pio->list.next = NULL;
> > + llist_add_batch(lltmp, (struct llist_node *)(&pio->list), ret_list);
> > +
> > return 0;
> > err:
> > - while ((pio = ploop_pio_list_pop(&list)) != NULL) {
> > + llist_for_each_safe(pos, t, llist.first) {
> > + pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > pio->bi_status = BLK_STS_RESOURCE;
> > + INIT_LIST_HEAD(&pio->list);
> > ploop_pio_endio(pio);
> > }
> > +
> > return -ENOMEM;
> > }
> > ALLOW_ERROR_INJECTION(ploop_split_pio_to_list, ERRNO);
> > @@ -341,7 +352,7 @@ static void ploop_schedule_work(struct ploop *ploop)
> >
> > static void ploop_dispatch_pio(struct ploop *ploop, struct pio *pio)
> > {
> > - struct llist_head *list = (struct llist_head *)&ploop->pios[pio->queue_list_id];
> > + struct llist_head *list = &ploop->pios[pio->queue_list_id];
> >
> > lockdep_assert_not_held(&ploop->deferred_lock);
> > WARN_ON_ONCE(pio->queue_list_id >= PLOOP_LIST_COUNT);
> > @@ -1622,12 +1633,11 @@ ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
> >
> > static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> > struct pio *pio,
> > - struct list_head *deferred_pios)
> > + struct llist_head *lldeferred_pios)
> > {
> > struct ploop_rq *prq = pio->endio_cb_data;
> > struct request *rq = prq->rq;
> > struct bio_vec *bvec = NULL;
> > - LIST_HEAD(list);
> > int ret;
> >
> > if (rq->bio != rq->biotail) {
> > @@ -1646,16 +1656,18 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> > pio->bi_iter.bi_idx = 0;
> > pio->bi_iter.bi_bvec_done = 0;
> > } else {
> > - /* Single bio already provides bvec array */
> > + /* Single bio already provides bvec array
> > + * bvec is updated to the correct on submit
> > + * it is different after partial IO
> > + */
> > bvec = rq->bio->bi_io_vec;
> > -
> > pio->bi_iter = rq->bio->bi_iter;
> > }
> > pio->bi_iter.bi_sector = ploop_rq_pos(ploop, rq);
> > pio->bi_io_vec = bvec;
> >
> > pio->queue_list_id = PLOOP_LIST_DEFERRED;
> > - ret = ploop_split_pio_to_list(ploop, pio, deferred_pios);
> > + ret = ploop_split_pio_to_list(ploop, pio, lldeferred_pios);
> > if (ret)
> > goto err_nomem;
> >
> > @@ -1667,7 +1679,7 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> >
> > static void ploop_prepare_embedded_pios(struct ploop *ploop,
> > struct llist_node *pios,
> > - struct list_head *deferred_pios)
> > + struct llist_head *deferred_pios)
> > {
> > struct pio *pio;
> > struct llist_node *pos, *t;
> > @@ -1684,12 +1696,17 @@ static void ploop_prepare_embedded_pios(struct ploop *ploop,
> > }
> >
> > static void ploop_process_deferred_pios(struct ploop *ploop,
> > - struct list_head *pios)
> > + struct llist_head *pios)
> > {
> > struct pio *pio;
> >
> > - while ((pio = ploop_pio_list_pop(pios)) != NULL)
> > + struct llist_node *pos, *t;
> > +
> > + llist_for_each_safe(pos, t, pios->first) {
> > + pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > + INIT_LIST_HEAD(&pio->list); /* until type is changed */
> > ploop_process_one_deferred_bio(ploop, pio);
> > + }
> > }
> >
> > static void ploop_process_one_discard_pio(struct ploop *ploop, struct pio *pio)
> > @@ -1787,19 +1804,13 @@ static void ploop_submit_metadata_writeback(struct ploop *ploop)
> > }
> > }
> >
> > -static void process_ploop_fsync_work(struct ploop *ploop)
> > +static void process_ploop_fsync_work(struct ploop *ploop, struct llist_node *llflush_pios)
> > {
> > struct file *file;
> > struct pio *pio;
> > int ret;
> > - struct llist_node *llflush_pios;
> > struct llist_node *pos, *t;
> >
> > - llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> > -
> > - if (!llflush_pios)
> > - return;
> > -
> > file = ploop_top_delta(ploop)->file;
> > /* All flushes are done as one */
> > ret = vfs_fsync(file, 0);
> > @@ -1818,38 +1829,39 @@ static void process_ploop_fsync_work(struct ploop *ploop)
> >
> > void do_ploop_run_work(struct ploop *ploop)
> > {
> > - LIST_HEAD(deferred_pios);
> > + LLIST_HEAD(deferred_pios);
> > struct llist_node *llembedded_pios;
> > struct llist_node *lldeferred_pios;
> > struct llist_node *lldiscard_pios;
> > struct llist_node *llcow_pios;
> > struct llist_node *llresubmit;
> > + struct llist_node *llflush_pios;
> > unsigned int old_flags = current->flags;
> >
> > current->flags |= PF_IO_THREAD|PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
> >
> > llembedded_pios = llist_del_all(&ploop->pios[PLOOP_LIST_PREPARE]);
> > - smp_wmb(); /* */
> > -
> > lldeferred_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DEFERRED]);
> > + smp_wmb(); /* */
> > + llresubmit = llist_del_all(&ploop->llresubmit_pios);
> > lldiscard_pios = llist_del_all(&ploop->pios[PLOOP_LIST_DISCARD]);
> > llcow_pios = llist_del_all(&ploop->pios[PLOOP_LIST_COW]);
> > - llresubmit = llist_del_all(&ploop->llresubmit_pios);
> >
> > /* add old deferred back to the list */
> > if (lldeferred_pios) {
> > struct llist_node *pos, *t;
> > struct pio *pio;
> > -
> > + /* Add one by one we need last for batch add */
> > llist_for_each_safe(pos, t, lldeferred_pios) {
> > - pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > - INIT_LIST_HEAD(&pio->list);
> > - list_add(&pio->list, &deferred_pios);
> > + llist_add(pos, &deferred_pios);
> > }
> > }
> >
> > ploop_prepare_embedded_pios(ploop, llembedded_pios, &deferred_pios);
> >
> > + llflush_pios = llist_del_all(&ploop->pios[PLOOP_LIST_FLUSH]);
> > + smp_wmb(); /* */
>
> For what do we need this memory barrier?
>
> > +
> > if (llresubmit)
> > ploop_process_resubmit_pios(ploop, llist_reverse_order(llresubmit));
> >
> > @@ -1863,8 +1875,9 @@ void do_ploop_run_work(struct ploop *ploop)
> >
> > ploop_submit_metadata_writeback(ploop);
> >
> > - if (!llist_empty(&ploop->pios[PLOOP_LIST_FLUSH]))
> > - process_ploop_fsync_work(ploop);
> > + if (llflush_pios)
> > + process_ploop_fsync_work(ploop, llist_reverse_order(llflush_pios));
> > +
> > current->flags = old_flags;
> > }
> >
> > @@ -1913,6 +1926,10 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> > struct ploop_rq *prq = pio->endio_cb_data;
> > struct request *rq = prq->rq;
> > bool queue = true;
> > + LLIST_HEAD(deferred_pios);
> > + int ret = 0;
> > + struct pio *spio, *stmp;
> > + struct llist_node *pos, *t;
> >
> > if (blk_rq_bytes(rq)) {
> > pio->queue_list_id = PLOOP_LIST_PREPARE;
> > @@ -1928,19 +1945,27 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> > }
> >
> > ploop_inc_nr_inflight(ploop, pio);
> > - llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
> > + ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
> >
> > + llist_for_each_safe(pos, t, deferred_pios.first) {
> > + spio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > + llist_add((struct llist_node *)(&spio->list), &ploop->pios[PLOOP_LIST_DEFERRED]);
> > + }
> > out:
> > if (queue)
> > ploop_schedule_work(ploop);
> > }
> >
> > -void ploop_submit_embedded_pios(struct ploop *ploop, struct list_head *list)
> > +void ploop_submit_embedded_pios(struct ploop *ploop, struct llist_node *list)
> > {
> > struct pio *pio;
> > + struct llist_node *pos, *t;
> >
> > - while ((pio = ploop_pio_list_pop(list)) != NULL)
> > + llist_for_each_safe(pos, t, list) {
> > + pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> > + INIT_LIST_HEAD(&pio->list);
> > ploop_submit_embedded_pio(ploop, pio);
> > + }
> > }
> >
> > int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
More information about the Devel
mailing list