[Devel] [PATCH vz9/vz10] dm-ploop: fallback to kvmalloc for large bvec allocations
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Wed Oct 22 16:34:35 MSK 2025
On 10/22/25 20:38, Alexey Kuznetsov wrote:
> Hello!
>
> Beware, it used GFP_ATOMIC. Does not this mean t his code can be
> executed in interrupt context?
> If so, then kvmalloc is a strict no.
The idea was if we require high order allocation (when we create bvec
from rq) in interrupt (I guess it is ploop_clone_and_map() path) instead
of just failing, as it was before this patch, we put pio to a "to be
handled later" list (see ploop_prepare_one_embedded_pio). And we handle
allocation for this pio in ploop kernel threads, which already run in
non-atomic context and can use kvmalloc safely.
>
> On Wed, Oct 22, 2025 at 8:11 PM Vasileios Almpanis
> <vasileios.almpanis at virtuozzo.com> wrote:
>>
>> When handling multiple concurrent dm-ploop requests, large bio_vec arrays
>> can be allocated during request processing. These allocations are currently
>> done with kmalloc_array(GFP_ATOMIC), which can fail under memory pressure
>> for higher orders (order >= 6, ~256KB). Such failures result in partial or
>> corrupted I/O, leading to EXT4 directory checksum errors and read-only
>> remounts under heavy parallel workloads.
>>
>> This patch adds a fallback mechanism to use kvmalloc_array for
>> large or failed allocations. If the estimated allocation order is >= 6, or
>> if the kmalloc_array allocation fails. This avoids high-order GFP_ATOMIC
>> allocations from interrupt context and ensures more reliable memory allocation
>> behavior.
>>
>> https://virtuozzo.atlassian.net/browse/VSTOR-109595
>> Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
>> Feature: dm-ploop: ploop target driver
>> ---
>> drivers/md/dm-ploop-map.c | 46 ++++++++++++++++++++++++++++++---------
>> drivers/md/dm-ploop.h | 1 +
>> 2 files changed, 37 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
>> index 3fb841f8bcea..899b9bf088b3 100644
>> --- a/drivers/md/dm-ploop-map.c
>> +++ b/drivers/md/dm-ploop-map.c
>> @@ -16,6 +16,7 @@
>> #include <linux/error-injection.h>
>> #include <linux/uio.h>
>> #include <linux/blk-mq.h>
>> +#include <linux/mm.h>
>> #include <uapi/linux/falloc.h>
>> #include "dm-ploop.h"
>> #include "dm-rq.h"
>> @@ -89,6 +90,7 @@ void ploop_init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
>> pio->ref_index = PLOOP_REF_INDEX_INVALID;
>> pio->queue_list_id = PLOOP_LIST_DEFERRED;
>> pio->bi_status = BLK_STS_OK;
>> + pio->use_kvmalloc = false;
>> atomic_set(&pio->remaining, 1);
>> pio->piwb = NULL;
>> INIT_LIST_HEAD(&pio->list);
>> @@ -193,8 +195,12 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,
>> struct ploop_rq *prq = prq_ptr;
>> struct request *rq = prq->rq;
>>
>> - if (prq->bvec)
>> - kfree(prq->bvec);
>> + if (prq->bvec) {
>> + if (pio->use_kvmalloc)
>> + kvfree(prq->bvec);
>> + else
>> + kfree(prq->bvec);
>> + }
>> if (prq->css)
>> css_put(prq->css);
>> /*
>> @@ -1963,26 +1969,40 @@ void ploop_index_wb_submit(struct ploop *ploop, struct ploop_index_wb *piwb)
>> ploop_runners_add_work(ploop, pio);
>> }
>>
>> -static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
>> +static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq, bool use_kvmalloc)
>> {
>> struct bio_vec bv, *bvec, *tmp;
>> struct req_iterator rq_iter;
>> unsigned int nr_bvec = 0;
>> + unsigned int order = 0;
>>
>> rq_for_each_bvec(bv, rq, rq_iter)
>> nr_bvec++;
>>
>> - bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
>> - GFP_ATOMIC);
>> - if (!bvec)
>> - goto out;
>> + if (use_kvmalloc) {
>> + bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec),
>> + GFP_NOIO);
>> + if (!bvec)
>> + return ERR_PTR(-ENOMEM);
>> + } else {
>> + order = get_order(nr_bvec * sizeof(struct bio_vec));
>> + /*
>> + * order 6 is 262144 bytes. Lets defer such big
>> + * allocations to workqueue.
>> + */
>> + if (order >= 6)
>> + return ERR_PTR(-EAGAIN);
>> + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
>> + GFP_ATOMIC | __GFP_NOWARN);
>> + if (!bvec)
>> + return ERR_PTR(-EAGAIN);
>> + }
>>
>> tmp = bvec;
>> rq_for_each_bvec(bv, rq, rq_iter) {
>> *tmp = bv;
>> tmp++;
>> }
>> -out:
>> return bvec;
>> }
>> ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
>> @@ -2003,9 +2023,15 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
>> * Transform a set of bvec arrays related to bios
>> * into a single bvec array (which we can iterate).
>> */
>> - bvec = ploop_create_bvec_from_rq(rq);
>> - if (!bvec)
>> + bvec = ploop_create_bvec_from_rq(rq, pio->use_kvmalloc);
>> + if (IS_ERR(bvec)) {
>> + if (PTR_ERR(bvec) == -EAGAIN) {
>> + pio->use_kvmalloc = true;
>> + llist_add((struct llist_node *)(&pio->list), &ploop->pios[PLOOP_LIST_PREPARE]);
>> + return;
>> + }
>> goto err_nomem;
>> + }
>> prq->bvec = bvec;
>> skip_bvec:
>> pio->bi_iter.bi_size = blk_rq_bytes(rq);
>> diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
>> index fc12efeb0cd9..53e8d12064bd 100644
>> --- a/drivers/md/dm-ploop.h
>> +++ b/drivers/md/dm-ploop.h
>> @@ -316,6 +316,7 @@ struct pio {
>> unsigned int ref_index:2;
>>
>> u8 queue_list_id; /* id in ploop->pios */
>> + bool use_kvmalloc;
>>
>> struct ploop_index_wb *piwb;
>>
>> --
>> 2.43.0
>>
>> _______________________________________________
>> Devel mailing list
>> Devel at openvz.org
>> https://lists.openvz.org/mailman/listinfo/devel
--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.
More information about the Devel
mailing list