[Devel] [PATCH v2 vz9/vz10] dm-ploop: fix concurrent COW operations on same cluster

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Sep 18 16:57:12 MSK 2025


On 9/18/25 21:20, Vasileios Almpanis wrote:
> When multiple threads attempt operations on the same cluster simultaneously,
> the current code logs warnings but continues with potentially conflicting
> operations, leading to WARN_ON triggers.
> 
> Fix by implementing proper deferral when cluster locks fail:
> 
> 1. Change ploop_link_pio() to add identical pios to the lock holder's
>     endio list instead of issuing warnings and continuing
> 2. Rename ploop_add_cluster_lk() to ploop_try_add_cluster_lk() to reflect
>     the new behavior
> 3. Update COW and discard paths to abort operations when cluster locking
>     fails, allowing proper deferral.
> 
> This ensures that conflicting operations are serialized rather than
> corrupting shared data structures.
> 
> Observed during snapshot creation where filesystem metadata updates
> trigger concurrent operations on the same clusters.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-115446
> Signed-off-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com>

Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

> 
> v2:
> - Removed excess brackets in ploop_handle_discard_pio
> - Returned cluster locking below inflight-delay code
> - Improved code readability in ploop_submit_cluster_cow
> 
> Feature: dm-ploop: ploop target driver
> ---
>   drivers/md/dm-ploop-map.c | 32 ++++++++++++++++++--------------
>   1 file changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
> index 56481d0f8323..ddd0132e7270 100644
> --- a/drivers/md/dm-ploop-map.c
> +++ b/drivers/md/dm-ploop-map.c
> @@ -535,10 +535,10 @@ static int ploop_link_pio(struct hlist_head head[], struct pio *pio,
>   		if (pe == pio)
>   			return 1;
>   
> -		WARN_ON_ONCE(pe != NULL);
> -
> -		if (pe)
> -			PL_ERR("clu:%u already exclusively locked\n", clu);
> +		if (pe) {
> +			ploop_add_endio_pio(pe, pio);
> +			return 0;
> +		}
>   	}
>   
>   	if (!hlist_unhashed_lockless(&pio->hlist_node)) {
> @@ -569,7 +569,7 @@ static int ploop_unlink_pio(struct ploop *ploop, struct pio *pio,
>   	return 1;
>   }
>   
> -static int ploop_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 clu)
> +static int ploop_try_add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 clu)
>   {
>   	unsigned long flags;
>   	int ret;
> @@ -753,9 +753,9 @@ static int ploop_handle_discard_pio(struct ploop *ploop, struct pio *pio,
>   			ploop_device_name(ploop));
>   		return 0;
>   	}
> +	if (!ploop_try_add_cluster_lk(ploop, pio, clu))
> +		return 1;
>   
> -	if (!ploop_add_cluster_lk(ploop, pio, clu))
> -		PL_ERR("dis clu %d already locked\n", clu);
>   	pio->wants_discard_index_cleanup = true;
>   
>   punch_hole:
> @@ -1585,12 +1585,19 @@ static int ploop_submit_cluster_cow(struct ploop *ploop, unsigned int level,
>   {
>   	struct ploop_cow *cow = NULL;
>   	struct pio *aux_pio = NULL;
> +	int ret = 0;
>   
>   	/* Prepare new delta read */
>   	aux_pio = ploop_alloc_pio_with_pages(ploop);
>   	cow = kmem_cache_alloc(cow_cache, GFP_ATOMIC);
> -	if (!aux_pio || !cow)
> -		goto err;
> +	if (!aux_pio || !cow) {
> +		ret = -ENOMEM;
> +		goto cleanup;
> +	}
> +
> +	if (!ploop_try_add_cluster_lk(ploop, cow_pio, clu))
> +		goto cleanup;
> +
>   	ploop_init_pio(ploop, REQ_OP_READ, aux_pio);
>   	ploop_pio_prepare_offsets(ploop, aux_pio, clu);
>   	aux_pio->css = cow_pio->css;
> @@ -1602,17 +1609,14 @@ static int ploop_submit_cluster_cow(struct ploop *ploop, unsigned int level,
>   	cow->aux_pio = aux_pio;
>   	cow->cow_pio = cow_pio;
>   
> -	if (!ploop_add_cluster_lk(ploop, cow_pio, clu))
> -		PL_ERR("cowclu %d already locked\n", clu);
> -
>   	/* Stage #0: read secondary delta full clu */
>   	ploop_map_and_submit_rw(ploop, dst_clu, aux_pio, level);
>   	return 0;
> -err:
> +cleanup:
>   	if (aux_pio)
>   		ploop_free_pio_with_pages(ploop, aux_pio);
>   	kfree(cow);
> -	return -ENOMEM;
> +	return ret;
>   }
>   
>   static int ploop_initiate_cluster_cow(struct ploop *ploop, unsigned int level,

-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.



More information about the Devel mailing list