[Devel] [PATCH v5 vz9/vz10] dm-ploop: fallback to kvmalloc for large bvec allocations
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Tue Oct 28 07:45:23 MSK 2025
Thank you, looks good.
On 10/27/25 18:02, Vasileios Almpanis wrote:
> When handling multiple concurrent dm-ploop requests, large bio_vec arrays
> can be allocated during request processing. These allocations are currently
> done with kmalloc_array(GFP_ATOMIC), which can fail under memory pressure
> for higher orders (order >= 6, ~256KB). Such failures result in partial or
> corrupted I/O, leading to EXT4 directory checksum errors and read-only
> remounts under heavy parallel workloads.
>
> This patch adds a fallback mechanism to use kvmalloc_array for
> large or failed allocations in case the kmalloc_array allocation fails.
> This avoids high-order GFP_ATOMIC allocations from interrupt context
> and ensures more reliable memory allocation behavior.
>
> Additionally, the GFP flags used for atomic allocations now include
> __GFP_NOWARN. This is intentional since we now defer failed atomic
> allocations to the workqueue.
>
https://virtuozzo.atlassian.net/browse/VSTOR-109595
Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>
> Acked-by: Denis V. Lunev <den at openvz.org>
>
> Feature: dm-ploop: ploop target driver
> ---
> drivers/md/dm-ploop-map.c | 37 +++++++++++++++++++++++++------------
> 1 file changed, 25 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 3fb841f8bcea..e26e12381e98 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -194,7 +194,7 @@ static void ploop_prq_endio(struct pio *pio, void *prq_ptr,
> struct request *rq = prq->rq;
>
> if (prq->bvec)
> - kfree(prq->bvec);
> + kvfree(prq->bvec);
> if (prq->css)
> css_put(prq->css);
> /*
> @@ -1963,7 +1963,7 @@ void ploop_index_wb_submit(struct ploop *ploop, struct ploop_index_wb *piwb)
> ploop_runners_add_work(ploop, pio);
> }
>
> -static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
> +static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq, gfp_t flags)
> {
> struct bio_vec bv, *bvec, *tmp;
> struct req_iterator rq_iter;
> @@ -1972,8 +1972,10 @@ static struct bio_vec *ploop_create_bvec_from_rq(struct request *rq)
> rq_for_each_bvec(bv, rq, rq_iter)
> nr_bvec++;
>
> - bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
> - GFP_ATOMIC);
> + if (gfpflags_allow_blocking(flags))
> + bvec = kvmalloc_array(nr_bvec, sizeof(struct bio_vec), flags);
> + else
> + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), flags);
> if (!bvec)
> goto out;
>
> @@ -1989,7 +1991,8 @@ ALLOW_ERROR_INJECTION(ploop_create_bvec_from_rq, NULL);
>
> static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> struct pio *pio,
> - struct llist_head *lldeferred_pios)
> + struct llist_head *lldeferred_pios,
> + gfp_t flags)
> {
> struct ploop_rq *prq = pio->endio_cb_data;
> struct request *rq = prq->rq;
> @@ -2003,9 +2006,19 @@ static void ploop_prepare_one_embedded_pio(struct ploop *ploop,
> * Transform a set of bvec arrays related to bios
> * into a single bvec array (which we can iterate).
> */
> - bvec = ploop_create_bvec_from_rq(rq);
> - if (!bvec)
> + bvec = ploop_create_bvec_from_rq(rq, flags);
> + if (!bvec) {
> + /*
> + * If allocation in atomic context fails defer
> + * it to blocking context.
> + */
> + if (!gfpflags_allow_blocking(flags)) {
> + llist_add((struct llist_node *)(&pio->list),
> + &ploop->pios[PLOOP_LIST_PREPARE]);
> + return;
> + }
> goto err_nomem;
> + }
> prq->bvec = bvec;
> skip_bvec:
> pio->bi_iter.bi_size = blk_rq_bytes(rq);
> @@ -2044,7 +2057,7 @@ static void ploop_prepare_embedded_pios(struct ploop *ploop,
> pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> INIT_LIST_HEAD(&pio->list); /* until type is changed */
> if (pio->queue_list_id != PLOOP_LIST_FLUSH)
> - ploop_prepare_one_embedded_pio(ploop, pio, deferred_pios);
> + ploop_prepare_one_embedded_pio(ploop, pio, deferred_pios, GFP_NOIO);
> else
> llist_add((struct llist_node *)(&pio->list),
> &ploop->pios[PLOOP_LIST_FLUSH]);
> @@ -2573,7 +2586,7 @@ static int ploop_prepare_flush(struct ploop *ploop, struct pio *pio)
> return md_inflight;
> }
>
> -static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> +static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio, gfp_t flags)
> {
> struct ploop_rq *prq = pio->endio_cb_data;
> struct request *rq = prq->rq;
> @@ -2615,7 +2628,7 @@ static void ploop_submit_embedded_pio(struct ploop *ploop, struct pio *pio)
> goto out;
> }
>
> - ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios);
> + ploop_prepare_one_embedded_pio(ploop, pio, &deferred_pios, flags);
> /*
> * Disable fast path due to rcu lockups fs -> ploop -> fs - fses are not reentrant
> * we can however try another fast path skip dispatcher thread and pass directly to
> @@ -2666,7 +2679,7 @@ void ploop_submit_embedded_pios(struct ploop *ploop, struct llist_node *list)
> llist_for_each_safe(pos, t, list) {
> pio = list_entry((struct list_head *)pos, typeof(*pio), list);
> INIT_LIST_HEAD(&pio->list);
> - ploop_submit_embedded_pio(ploop, pio);
> + ploop_submit_embedded_pio(ploop, pio, GFP_NOIO);
> }
> }
>
> @@ -2693,7 +2706,7 @@ int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
>
> ploop_init_prq_and_embedded_pio(ploop, rq, prq, pio);
>
> - ploop_submit_embedded_pio(ploop, pio);
> + ploop_submit_embedded_pio(ploop, pio, GFP_ATOMIC | __GFP_NOWARN);
> return DM_MAPIO_SUBMITTED;
> }
>
--
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.
More information about the Devel
mailing list