[Devel] [PATCH vz9/vz10] dm-ploop: fallback to kvmalloc for large bvec allocations
    Vasileios Almpanis 
    vasileios.ampanis at virtuozzo.com
       
    Wed Oct 22 16:35:44 MSK 2025
    
    
  
Yes its exactly as you put it.
On 10/22/25 3:34 PM, Pavel Tikhomirov wrote:
>
>
> On 10/22/25 20:38, Alexey Kuznetsov wrote:
>> Hello!
>>
>> Beware, it used GFP_ATOMIC. Does not this mean t his code can be
>> executed in interrupt context?
>> If so, then kvmalloc is a strict no.
>
> The idea was if we require high order allocation (when we create bvec 
> from rq) in interrupt (I guess it is ploop_clone_and_map() path) 
> instead of just failing, as it was before this patch, we put pio to a 
> "to be handled later" list (see ploop_prepare_one_embedded_pio). And 
> we handle allocation for this pio in ploop kernel threads, which 
> already run in non-atomic context and can use kvmalloc safely.
>
>>
>> On Wed, Oct 22, 2025 at 8:11 PM Vasileios Almpanis
>> <vasileios.almpanis at virtuozzo.com> wrote:
>>>
>>> When handling multiple concurrent dm-ploop requests, large bio_vec 
>>> arrays
>>> can be allocated during request processing. These allocations are 
>>> currently
>>> done with kmalloc_array(GFP_ATOMIC), which can fail under memory 
>>> pressure
>>> for higher orders (order >= 6, ~256KB). Such failures result in 
>>> partial or
>>> corrupted I/O, leading to EXT4 directory checksum errors and read-only
>>> remounts under heavy parallel workloads.
>>>
>>> This patch adds a fallback mechanism to use kvmalloc_array for
>>> large or failed allocations. If the estimated allocation order is >= 
>>> 6, or
>>> if the kmalloc_array allocation fails. This avoids high-order 
>>> GFP_ATOMIC
>>> allocations from interrupt context and ensures more reliable memory 
>>> allocation
>>> behavior.
>>>
>>> https://virtuozzo.atlassian.net/browse/VSTOR-109595
>>> Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
>>> Feature: dm-ploop: ploop target driver
>>> ---
>>>   drivers/md/dm-ploop-map.c | 46 
>>> ++++++++++++++++++++++++++++++---------
>>>   drivers/md/dm-ploop.h     |  1 +
>>>   2 files changed, 37 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
>>> index 3fb841f8bcea..899b9bf088b3 100644
>>> --- a/drivers/md/dm-ploop-map.c
>>> +++ b/drivers/md/dm-ploop-map.c
>>> @@ -16,6 +16,7 @@
>>>   #include <linux/error-injection.h>
>>>   #include <linux/uio.h>
>>>   #include <linux/blk-mq.h>
>>> +#include <linux/mm.h>
>>>   #include <uapi/linux/falloc.h>
>>>   #include "dm-ploop.h"
>>>   #include "dm-rq.h"
>>> @@ -89,6 +90,7 @@ void ploop_init_pio(struct ploop *ploop, unsigned 
>>> int bi_op, struct pio *pio)
>>>          pio->ref_index = PLOOP_REF_INDEX_INVALID;
>>>          pio->queue_list_id = PLOOP_LIST_DEFERRED;
>>>          pio->bi_status = BLK_STS_OK;
>>> +       pio->use_kvmalloc = false;
>>>          atomic_set(&pio->remaining, 1);
>>>          pio->piwb = NULL;
>>>          INIT_LIST_HEAD(&pio->list);
>>> @@ -193,8 +195,12 @@ static void ploop_prq_endio(struct pio *pio, 
>>> void *prq_ptr,
>>>          struct ploop_rq *prq = prq_ptr;
>>>          struct request *rq = prq->rq;
>>>
>>> -       if (prq->bvec)
>>> -               kfree(prq->bvec);
>>> +       if (prq->bvec) {
>>> +               if (pio->use_kvmalloc)
>>> +                       kvfree(prq->bvec);
>>> +               else
>>> +                       kfree(prq->bvec);
>>> +       }
>>>          if (prq->css)
>>>                  css_put(prq->css);
>>>          /*
>>> @@ -1963,26 +1969,40 @@ void ploop_index_wb_submit(struct ploop 
>>> *ploop, struct ploop_index_wb *piwb)
>>>          ploop_runners_add_work(ploop, pio);
>>>   }
>>>
>>> -static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
>>> +static struct bio_vec *ploop_create_bvec_from_rq(struct request 
>>> *rq, bool use_kvmalloc)
>>>   {
>>>          struct bio_vec bv, *bvec, *tmp;
>>>          struct req_iterator rq_iter;
>>>          unsigned int nr_bvec = 0;
>>> +       unsigned int order = 0;
>>>
>>>          rq_for_each_bvec(bv, rq, rq_iter)
>>>                  nr_bvec++;
>>>
>>> -       bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
>>> -                            GFP_ATOMIC);
>>> -       if (!bvec)
>>> -               goto out;
>>> +       if (use_kvmalloc) {
>>> +               bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec),
>>> +                                     GFP_NOIO);
>>> +               if (!bvec)
>>> +                       return ERR_PTR(-ENOMEM);
>>> +       } else {
>>> +               order = get_order(nr_bvec * sizeof(struct bio_vec));
>>> +               /*
>>> +                * order 6 is 262144 bytes. Lets defer such big
>>> +                * allocations to workqueue.
>>> +                */
>>> +               if (order >= 6)
>>> +                       return ERR_PTR(-EAGAIN);
>>> +               bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
>>> +                                    GFP_ATOMIC | __GFP_NOWARN);
>>> +               if (!bvec)
>>> +                       return ERR_PTR(-EAGAIN);
>>> +       }
>>>
>>>          tmp = bvec;
>>>          rq_for_each_bvec(bv, rq, rq_iter) {
>>>                  *tmp = bv;
>>>                  tmp++;
>>>          }
>>> -out:
>>>          return bvec;
>>>   }
>>>   ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
>>> @@ -2003,9 +2023,15 @@ static void 
>>> ploop_prepare_one_embedded_pio(struct ploop *ploop,
>>>                   * Transform a set of bvec arrays related to bios
>>>                   * into a single bvec array (which we can iterate).
>>>                   */
>>> -               bvec = ploop_create_bvec_from_rq(rq);
>>> -               if (!bvec)
>>> +               bvec = ploop_create_bvec_from_rq(rq, 
>>> pio->use_kvmalloc);
>>> +               if (IS_ERR(bvec)) {
>>> +                       if (PTR_ERR(bvec) == -EAGAIN) {
>>> +                               pio->use_kvmalloc = true;
>>> +                               llist_add((struct llist_node 
>>> *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
>>> +                               return;
>>> +                       }
>>>                          goto err_nomem;
>>> +               }
>>>                  prq->bvec = bvec;
>>>   skip_bvec:
>>>                  pio->bi_iter.bi_size = blk_rq_bytes(rq);
>>> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
>>> index fc12efeb0cd9..53e8d12064bd 100644
>>> --- a/drivers/md/dm-ploop.h
>>> +++ b/drivers/md/dm-ploop.h
>>> @@ -316,6 +316,7 @@ struct pio {
>>>          unsigned int ref_index:2;
>>>
>>>          u8 queue_list_id; /* id in ploop->pios */
>>> +       bool use_kvmalloc;
>>>
>>>          struct ploop_index_wb *piwb;
>>>
>>> -- 
>>> 2.43.0
>>>
>>> _______________________________________________
>>> Devel mailing list
>>> Devel at openvz.org
>>> https://lists.openvz.org/mailman/listinfo/devel
>
    
    
More information about the Devel
mailing list