[Devel] [PATCH vz9/vz10] dm-ploop: fallback to kvmalloc for large bvec allocations

Alexey Kuznetsov kuznet at virtuozzo.com
Wed Oct 22 15:38:54 MSK 2025


Hello!

Beware, it used GFP_ATOMIC. Does not this mean t his code can be
executed in interrupt context?
If so, then kvmalloc is a strict no.

On Wed, Oct 22, 2025 at 8:11 PM Vasileios Almpanis
<vasileios.almpanis at virtuozzo.com> wrote:
>
> When handling multiple concurrent dm-ploop requests, large bio_vec arrays
> can be allocated during request processing. These allocations are currently
> done with kmalloc_array(GFP_ATOMIC), which can fail under memory pressure
> for higher orders (order >= 6, ~256KB). Such failures result in partial or
> corrupted I/O, leading to EXT4 directory checksum errors and read-only
> remounts under heavy parallel workloads.
>
> This patch adds a fallback mechanism to use kvmalloc_array for
> large or failed allocations. If the estimated allocation order is >= 6, or
> if the kmalloc_array allocation fails. This avoids high-order GFP_ATOMIC
> allocations from interrupt context and ensures more reliable memory allocation
> behavior.
>
> https://virtuozzo.atlassian.net/browse/VSTOR-109595
> Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
> Feature: dm-ploop: ploop target driver
> ---
>  drivers/md/dm-ploop-map.c | 46 ++++++++++++++++++++++++++++++---------
>  drivers/md/dm-ploop.h     |  1 +
>  2 files changed, 37 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 3fb841f8bcea..899b9bf088b3 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -16,6 +16,7 @@
>  #include <linux/error-injection.h>
>  #include <linux/uio.h>
>  #include <linux/blk-mq.h>
> +#include <linux/mm.h>
>  #include <uapi/linux/falloc.h>
>  #include "dm-ploop.h"
>  #include "dm-rq.h"
> @@ -89,6 +90,7 @@ void ploop_init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
>         pio->ref_index = PLOOP_REF_INDEX_INVALID;
>         pio->queue_list_id = PLOOP_LIST_DEFERRED;
>         pio->bi_status = BLK_STS_OK;
> +       pio->use_kvmalloc = false;
>         atomic_set(&pio->remaining, 1);
>         pio->piwb = NULL;
>         INIT_LIST_HEAD(&pio->list);
> @@ -193,8 +195,12 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,
>         struct ploop_rq *prq = prq_ptr;
>         struct request *rq = prq->rq;
>
> -       if (prq->bvec)
> -               kfree(prq->bvec);
> +       if (prq->bvec) {
> +               if (pio->use_kvmalloc)
> +                       kvfree(prq->bvec);
> +               else
> +                       kfree(prq->bvec);
> +       }
>         if (prq->css)
>                 css_put(prq->css);
>         /*
> @@ -1963,26 +1969,40 @@ void ploop_index_wb_submit(struct ploop *ploop, struct ploop_index_wb *piwb)
>         ploop_runners_add_work(ploop, pio);
>  }
>
> -static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
> +static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq, bool use_kvmalloc)
>  {
>         struct bio_vec bv, *bvec, *tmp;
>         struct req_iterator rq_iter;
>         unsigned int nr_bvec = 0;
> +       unsigned int order = 0;
>
>         rq_for_each_bvec(bv, rq, rq_iter)
>                 nr_bvec++;
>
> -       bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
> -                            GFP_ATOMIC);
> -       if (!bvec)
> -               goto out;
> +       if (use_kvmalloc) {
> +               bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec),
> +                                     GFP_NOIO);
> +               if (!bvec)
> +                       return ERR_PTR(-ENOMEM);
> +       } else {
> +               order = get_order(nr_bvec * sizeof(struct bio_vec));
> +               /*
> +                * order 6 is 262144 bytes. Lets defer such big
> +                * allocations to workqueue.
> +                */
> +               if (order >= 6)
> +                       return ERR_PTR(-EAGAIN);
> +               bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
> +                                    GFP_ATOMIC | __GFP_NOWARN);
> +               if (!bvec)
> +                       return ERR_PTR(-EAGAIN);
> +       }
>
>         tmp = bvec;
>         rq_for_each_bvec(bv, rq, rq_iter) {
>                 *tmp = bv;
>                 tmp++;
>         }
> -out:
>         return bvec;
>  }
>  ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
> @@ -2003,9 +2023,15 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
>                  * Transform a set of bvec arrays related to bios
>                  * into a single bvec array (which we can iterate).
>                  */
> -               bvec = ploop_create_bvec_from_rq(rq);
> -               if (!bvec)
> +               bvec = ploop_create_bvec_from_rq(rq, pio->use_kvmalloc);
> +               if (IS_ERR(bvec)) {
> +                       if (PTR_ERR(bvec) == -EAGAIN) {
> +                               pio->use_kvmalloc = true;
> +                               llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
> +                               return;
> +                       }
>                         goto err_nomem;
> +               }
>                 prq->bvec = bvec;
>  skip_bvec:
>                 pio->bi_iter.bi_size = blk_rq_bytes(rq);
> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
> index fc12efeb0cd9..53e8d12064bd 100644
> --- a/drivers/md/dm-ploop.h
> +++ b/drivers/md/dm-ploop.h
> @@ -316,6 +316,7 @@ struct pio {
>         unsigned int ref_index:2;
>
>         u8 queue_list_id; /* id in ploop->pios */
> +       bool use_kvmalloc;
>
>         struct ploop_index_wb *piwb;
>
> --
> 2.43.0
>
> _______________________________________________
> Devel mailing list
> Devel at openvz.org
> https://lists.openvz.org/mailman/listinfo/devel



More information about the Devel mailing list