[Devel] [PATCH VZ9] dm-qcow2: allow specifying depth to merge intermediate images

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Wed Sep 17 11:29:07 MSK 2025



On 9/9/25 01:27, Andrey Zhadchenko wrote:
> dm-qcow2 is almost ready to merge intermediate images, thanks
> to separated metadata tables and qio being per-qcow2.
> So this patch mostly propagates which qcow2 we want to use
> instead of tgt->top.
> 
> qcow2_merge_backward_complete is a bit harder due to in-middle
> replacement and needing to recalculate img_id for a part of
> image chain.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-101375
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
> ---
>   drivers/md/dm-qcow2-cmd.c | 82 ++++++++++++++++++++++++++++-----------
>   drivers/md/dm-qcow2.h     |  1 +
>   2 files changed, 61 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
> index e77052ad61e6b..d59fa82dbffbc 100644
> --- a/drivers/md/dm-qcow2-cmd.c
> +++ b/drivers/md/dm-qcow2-cmd.c
> @@ -116,9 +116,9 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
>   }
>   ALLOW_ERROR_INJECTION(qcow2_service_iter, ERRNO);
>   
> -static int qcow2_merge_common(struct qcow2_target *tgt)
> +static int qcow2_merge_common(struct qcow2_target *tgt, struct qcow2 *qcow2)
>   {
> -	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
> +	struct qcow2 *lower = qcow2->lower;
>   	u32 clu_size = qcow2->clu_size;
>   	loff_t end = lower->hdr.size;
>   
> @@ -139,9 +139,8 @@ static int qcow2_merge_forward(struct qcow2_target *tgt)
>   }
>   ALLOW_ERROR_INJECTION(qcow2_merge_forward, ERRNO);
>   
> -static int qcow2_break_l1cow(struct qcow2_target *tgt)
> +static int qcow2_break_l1cow(struct qcow2_target *tgt, struct qcow2 *qcow2)
>   {
> -	struct qcow2 *qcow2 = tgt->top;
>   	loff_t end = qcow2->hdr.size;
>   	loff_t step = (u64)qcow2->l2_entries * qcow2->clu_size;
>   
> @@ -152,25 +151,35 @@ static int qcow2_break_l1cow(struct qcow2_target *tgt)
>   static void set_backward_merge_in_process(struct qcow2_target *tgt,
>   				     struct qcow2 *qcow2, bool set)
>   {
> +	struct qcow2 *top = tgt->top;
>   	LIST_HEAD(list);
>   
> +	/*
> +	 * There are no writes if it is not the top qcow2 image.
> +	 * so we do not need to stop and flush requests when setting
> +	 */
> +	if (qcow2 != top && set) {
> +		qcow2->backward_merge_in_process = set;
> +		return;
> +	}
> +
>   	/*
>   	 * To avoid race between allocations and COWS
>   	 * we completely stop queueing qios and wait
>   	 * for pending qios. Lock is for visability.
>   	 */
> -	spin_lock_irq(&qcow2->deferred_lock);
> -	qcow2->pause_submitting_qios = true;
> -	spin_unlock_irq(&qcow2->deferred_lock);
> +	spin_lock_irq(&top->deferred_lock);
> +	top->pause_submitting_qios = true;
> +	spin_unlock_irq(&top->deferred_lock);
>   	qcow2_inflight_ref_switch(tgt);
>   
>   	/* queue is stopped */
> -	spin_lock_irq(&qcow2->deferred_lock);
> +	spin_lock_irq(&top->deferred_lock);
>   	WARN_ON_ONCE(qcow2->backward_merge_in_process == set);
>   	qcow2->backward_merge_in_process = set;
> -	qcow2->pause_submitting_qios = false;
> -	list_splice_init(&qcow2->paused_qios, &list);
> -	spin_unlock_irq(&qcow2->deferred_lock);
> +	top->pause_submitting_qios = false;
> +	list_splice_init(&top->paused_qios, &list);
> +	spin_unlock_irq(&top->deferred_lock);
>   
>   	qcow2_submit_embedded_qios(tgt, &list);
>   }
> @@ -240,11 +249,15 @@ static int qcow2_merge_backward_progress(struct qcow2_target *tgt,
>   
>   static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd);
>   
> -static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
> +static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd, int depth)
>   {
> -	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
> +	struct qcow2 *qcow2 = tgt->top, *lower;
>   	int ret;
>   
> +	while (depth-- > 0 && qcow2->lower)
> +		qcow2 = qcow2->lower;

note: If depth is unsigned, we can simplify "depth-- > 0" check to just 
"depth--".

> +	lower = qcow2->lower;
> +
>   	lockdep_assert_held(&tgt->ctl_mutex);
>   
>   	if (!lower)
> @@ -263,6 +276,7 @@ static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
>   	if (ret)
>   		return ret;
>   
> +	tgt->backward_merge.qcow2 = qcow2;
>   	tgt->backward_merge.state = BACKWARD_MERGE_START;
>   	__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_START);
>   	tgt->backward_merge.error = 0;
> @@ -291,7 +305,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
>   	__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_BREAK_L1COW);
>   	mutex_unlock(&tgt->ctl_mutex);
>   
> -	qcow2 = tgt->top;
> +	qcow2 = tgt->backward_merge.qcow2;
>   	lower = qcow2->lower;
>   
>   	/*
> @@ -300,7 +314,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
>   	 * we'd have to freeze IO going to all data clusters
>   	 * under every L1 entry related to several snapshots.
>   	 */
> -	ret = qcow2_break_l1cow(tgt);
> +	ret = qcow2_break_l1cow(tgt, qcow2);
>   	if (ret) {
>   		QC_ERR(tgt->ti, "Can't break L1 COW");
>   		goto out_err;
> @@ -316,7 +330,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
>   
>   	/* Start merge */
>   	backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_RUNNING);
> -	ret = qcow2_merge_common(tgt);
> +	ret = qcow2_merge_common(tgt, qcow2);
>   	if (ret) {
>   		set_backward_merge_in_process(tgt, qcow2, false);
>   		ret2 = qcow2_set_image_file_features(lower, false);
> @@ -357,9 +371,29 @@ static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
>   
>   	if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
>   		return -EBUSY;
> +
> +	if (tgt->top == tgt->backward_merge.qcow2) {
> +		tgt->top = lower;
> +	} else {
> +		struct qcow2 *top = tgt->top;
> +
> +		while (top && top->lower != tgt->backward_merge.qcow2)
> +			top = top->lower;
> +		if (!top)
> +			return -EFAULT;
> +
> +		qcow2 = tgt->backward_merge.qcow2;
> +		lower = qcow2->lower;
> +		top->lower = lower;
> +
> +		top = tgt->top;
> +		while (top != lower) {
> +			top->img_id--;
> +			top = top->lower;
> +		}
> +	}
> +
>   	__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_COMPLETING);
> -
> -	tgt->top = lower;
>   	smp_wmb(); /* Pairs with qcow2_ref_inc() */
>   	qcow2_inflight_ref_switch(tgt); /* Pending qios */
>   	qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
> @@ -396,7 +430,7 @@ void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
>   	} else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
>   		flush = true;
>   	} else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) {
> -		set_backward_merge_in_process(tgt, tgt->top, false);
> +		set_backward_merge_in_process(tgt, tgt->backward_merge.qcow2, false);
>   		tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
>   		__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE);
>   	}
> @@ -572,7 +606,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
>   	struct qcow2_target *tgt = to_qcow2_target(ti);
>   	int ret = -EPERM;
>   	u32 val, val2;
> -	int efd;
> +	int efd, depth = 0;
>   
>   	if (!capable(CAP_SYS_ADMIN))
>   		goto out;
> @@ -652,11 +686,15 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
>   	} else if (!strcmp(argv[0], "merge_backward")) {
>   		/* argc >= 2 */
>   		if (!strcmp(argv[1], "start")) {
> -			if (argc != 3 || kstrtoint(argv[2], 10, &efd) || efd < 0) {
> +			if (argc < 3 || argc > 4 || kstrtoint(argv[2], 10, &efd) || efd < 0) {
>   				ret = -EINVAL;
>   				goto out_unlock;
>   			}
> -			ret = qcow2_merge_backward_start(tgt, efd);
> +			if (argc == 4 && kstrtou32(argv[3], 10, &depth)) {

UINT32_MAX is 2^32-1 and INT_MAX is 2^31-1, this leads to possibly 
negative depth passed to qcow2_merge_backward_start. I would advise 
either using u32 everywhere or int everywhere, not both u32 and int for 
the same thing.

> +				ret = -EINVAL;
> +				goto out_unlock;
> +			}
> +			ret = qcow2_merge_backward_start(tgt, efd, depth);
>   		} else if (!strcmp(argv[1], "complete")) {
>   			if (argc != 2) {
>   				ret = -EINVAL;
> diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
> index 5aa00c6a5ebd5..c2b2193561461 100644
> --- a/drivers/md/dm-qcow2.h
> +++ b/drivers/md/dm-qcow2.h
> @@ -172,6 +172,7 @@ enum qcow2_backward_merge_stage {
>   
>   struct qcow2_backward_merge {
>   	struct work_struct work;
> +	struct qcow2 *qcow2;
>   	enum qcow2_backward_merge_state state;
>   	int error;
>   	struct eventfd_ctx *eventfd_ctx;

-- 
Best regards, Pavel Tikhomirov
Senior Software Developer, Virtuozzo.



More information about the Devel mailing list