[Devel] [PATCH v4 VZ9 3/5] dm-qcow2: make merge_backward command asyncronous

Alexander Atanasov alexander.atanasov at virtuozzo.com
Tue Mar 4 14:55:47 MSK 2025


On 3.03.25 11:37, Pavel Tikhomirov wrote:
> This adds merge_backward "start", "complete" and "cancel" commands. By
> that we are able to split single merge_backward into two stages: start
> asyncronous merging and completion. That can be usefull for restarting
> qemu process while allowing backward merging to run asyncronously in
> kernel.
> 
> The "start" command runs merging preparations in workqueue work. After
> it finishes, the "complete" command can be called to finish the process
> and actually replace the top qcow2 with it's lower. The "cancel" command
> forces the work to stop and flushes it. In case we are in completion
> waiting state already and there is no work running, the "cancel" command
> also reverts merging preparations.
> 
> Locking:
> 
> Data in tgt->backward_merge is protected by tgt->ctl_mutex. The "start"
> and "complete" commands are fully under this lock, and the "cancel"
> operation takes the lock explicitly and releases it for work flushing.
> The work also takes the lock but only when updating tgt->backward_merge
> data. For checks, if the work was caneled in the middle, we read the
> state without locking as we don't modify the state there, also we would
> re-check the state again before exiting the work function under lock.
> 
> Now on target suspend we "cancel" currently running backward merge,
> previously we were just hanging untill backward merge have been
> finished for possibly a long time, cancelling seems cleaner. Though we
> don't really expect hypervisor suspending the target in the middle of
> backward merge that it by itself started.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-100466
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> 
> --
> v2: Cancel from BACKWARD_MERGE_START state should not try to stop the
> work via BACKWARD_MERGE_STOP state, else we will deadlock in this state.
> ---
>   drivers/md/dm-qcow2-cmd.c    | 142 +++++++++++++++++++++++++++++++----
>   drivers/md/dm-qcow2-target.c |   6 ++
>   drivers/md/dm-qcow2.h        |  19 +++++
>   3 files changed, 153 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
> index 7b4b0ee68ad9f..04a992f3ebba6 100644
> --- a/drivers/md/dm-qcow2-cmd.c
> +++ b/drivers/md/dm-qcow2-cmd.c
> @@ -52,6 +52,8 @@ static void service_qio_endio(struct qcow2_target *tgt, struct qio *qio,
>   	wake_up(&tgt->service_wq);
>   }
>   
> +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt);
> +
>   static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
>   		  loff_t end, loff_t step, unsigned int bi_op, u8 qio_flags)
>   {
> @@ -63,7 +65,7 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
>   	WRITE_ONCE(service_status, BLK_STS_OK);
>   
>   	for (pos = 0; pos < end; pos += step) {
> -		if (fatal_signal_pending(current)) {
> +		if (qcow2_backward_merge_should_stop(tgt)) {
>   			ret = -EINTR;
>   			break;
>   		}

Is it okay to remove termination on signal - here and the killable 
mutex? Without signal handling it can prevent clean shutdown or leave it
stuck if something goes wrong in the code.

> @@ -161,10 +163,11 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt,
>   	qcow2_submit_embedded_qios(tgt, &list);
>   }
>   
> -static int qcow2_merge_backward(struct qcow2_target *tgt)
> +static int qcow2_merge_backward_start(struct qcow2_target *tgt)
>   {
>   	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
> -	int ret, ret2;
> +
> +	lockdep_assert_held(&tgt->ctl_mutex);
>   
>   	if (!lower)
>   		return -ENOENT;
> @@ -174,6 +177,35 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
>   		return -EOPNOTSUPP;
>   	if (lower->hdr.size < qcow2->hdr.size)
>   		return -EBADSLT;
> +
> +	if (tgt->backward_merge.state != BACKWARD_MERGE_STOPPED)
> +		return -EBUSY;
> +	tgt->backward_merge.state = BACKWARD_MERGE_START;
> +	tgt->backward_merge.error = 0;
> +
> +	schedule_work(&tgt->backward_merge.work);
> +	return 0;
> +}
> +ALLOW_ERROR_INJECTION(qcow2_merge_backward_start, ERRNO);
> +
> +void qcow2_merge_backward_work(struct work_struct *work)
> +{
> +	struct qcow2_target *tgt = container_of(work, struct qcow2_target,
> +						backward_merge.work);
> +	struct qcow2 *qcow2, *lower;
> +	int ret, ret2;
> +
> +	mutex_lock(&tgt->ctl_mutex);
> +	if (tgt->backward_merge.state != BACKWARD_MERGE_START) {
> +		mutex_unlock(&tgt->ctl_mutex);
> +		return;
> +	}
> +	tgt->backward_merge.state = BACKWARD_MERGE_RUN;
> +	mutex_unlock(&tgt->ctl_mutex);
> +
> +	qcow2 = tgt->top;
> +	lower = qcow2->lower;
> +
>   	/*
>   	 * Break all COW clus at L1 level. Otherwise, later
>   	 * there would be problems with unusing them:
> @@ -183,13 +215,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
>   	ret = qcow2_break_l1cow(tgt);
>   	if (ret) {
>   		QC_ERR(tgt->ti, "Can't break L1 COW");
> -		return ret;
> +		goto out_err;
>   	}
>   
>   	ret = qcow2_set_image_file_features(lower, true);
>   	if (ret) {
>   		QC_ERR(tgt->ti, "Can't set dirty bit");
> -		return ret;
> +		goto out_err;
>   	}
>   	set_backward_merge_in_process(tgt, qcow2, true);
>   
> @@ -200,22 +232,85 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
>   		ret2 = qcow2_set_image_file_features(lower, false);
>   		if (ret2 < 0)
>   			QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2);
> -		return ret;
>   	}
> +
> +out_err:
> +	mutex_lock(&tgt->ctl_mutex);
> +	if (ret) {
> +		/* Error */
> +		tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> +		tgt->backward_merge.error = ret;
> +	} else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
> +		/* Merge is canceled */
> +		set_backward_merge_in_process(tgt, qcow2, false);
> +		tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> +		tgt->backward_merge.error = -EINTR;
> +	} else {
> +		/* Finish merge */
> +		tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION;
> +	}
> +	mutex_unlock(&tgt->ctl_mutex);
> +}
> +
> +static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
> +{
> +	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
> +	int ret;
> +
> +	lockdep_assert_held(&tgt->ctl_mutex);
> +
> +	if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
> +		return -EBUSY;
> +
>   	tgt->top = lower;
>   	smp_wmb(); /* Pairs with qcow2_ref_inc() */
>   	qcow2_inflight_ref_switch(tgt); /* Pending qios */
>   	qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
>   	qcow2->lower = NULL;
>   
> -	ret2 = qcow2_set_image_file_features(qcow2, false);
> -	if (ret2 < 0)
> -		QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2);
> +	ret = qcow2_set_image_file_features(qcow2, false);
> +	if (ret < 0)
> +		QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret);
>   	qcow2_destroy(qcow2);
>   
> +	tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> +
>   	return 0;
>   }
> -ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO);
> +ALLOW_ERROR_INJECTION(qcow2_merge_backward_complete, ERRNO);
> +
> +void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
> +{
> +	bool flush = false;
> +
> +	mutex_lock(&tgt->ctl_mutex);
> +	if (tgt->backward_merge.state == BACKWARD_MERGE_STOPPED) {
> +		mutex_unlock(&tgt->ctl_mutex);
> +		return;
> +	}
> +
> +	if (tgt->backward_merge.state == BACKWARD_MERGE_START) {
> +		tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> +		flush = true;
> +	} else if (tgt->backward_merge.state == BACKWARD_MERGE_RUN) {
> +		tgt->backward_merge.state = BACKWARD_MERGE_STOP;
> +		flush = true;
> +	} else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
> +		flush = true;
> +	} else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) {
> +		set_backward_merge_in_process(tgt, tgt->top, false);
> +		tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> +	}
> +	mutex_unlock(&tgt->ctl_mutex);
> +
> +	if (flush)
> +		flush_work(&tgt->backward_merge.work);
> +}
> +
> +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt)
> +{
> +	return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP;
> +}
>   
>   static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 *ref_index)
>   {
> @@ -374,11 +469,19 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
>   		}
>   		ret = qcow2_get_event(tgt, result, maxlen);
>   		goto out;
> +	} else if (!strcmp(argv[0], "merge_backward")) {
> +		if (argc != 2) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +		if (!strcmp(argv[1], "cancel")) {
> +			qcow2_merge_backward_cancel(tgt);
> +			ret = 0;
> +			goto out;
> +		}
>   	}
>   
> -	ret = mutex_lock_killable(&tgt->ctl_mutex);
> -	if (ret)
> -		goto out;
> +	mutex_lock(&tgt->ctl_mutex);
>   
>   	if (!strcmp(argv[0], "get_errors")) {
>   		ret = qcow2_get_errors(tgt, result, maxlen);
> @@ -388,7 +491,18 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
>   	} else if (!strcmp(argv[0], "merge_forward")) {
>   		ret = qcow2_merge_forward(tgt);
>   	} else if (!strcmp(argv[0], "merge_backward")) {
> -		ret = qcow2_merge_backward(tgt);
> +		if (argc != 2) {
> +			ret = -EINVAL;
> +			mutex_unlock(&tgt->ctl_mutex);
> +			goto out;
> +		}
> +		if (!strcmp(argv[1], "start")) {
> +			ret = qcow2_merge_backward_start(tgt);
> +		} else if (!strcmp(argv[1], "complete")) {
> +			ret = qcow2_merge_backward_complete(tgt);
> +		} else {
> +			ret = -ENOTTY;
> +		}
>   	} else {
>   		ret = -ENOTTY;
>   	}
> diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c
> index 540c03cb3c44f..6e2e583ba0b8b 100644
> --- a/drivers/md/dm-qcow2-target.c
> +++ b/drivers/md/dm-qcow2-target.c
> @@ -25,6 +25,8 @@ static void qcow2_set_service_operations(struct dm_target *ti, bool allowed)
>   	mutex_lock(&tgt->ctl_mutex);
>   	tgt->service_operations_allowed = allowed;
>   	mutex_unlock(&tgt->ctl_mutex);
> +	if (!allowed)
> +		qcow2_merge_backward_cancel(tgt);
>   }
>   static void qcow2_set_wants_suspend(struct dm_target *ti, bool wants)
>   {
> @@ -251,6 +253,7 @@ static void qcow2_tgt_destroy(struct qcow2_target *tgt)
>   		/* Now kill the queue */
>   		destroy_workqueue(tgt->wq);
>   	}
> +	qcow2_merge_backward_cancel(tgt);
>   
>   	mempool_destroy(tgt->qio_pool);
>   	mempool_destroy(tgt->qrq_pool);
> @@ -494,6 +497,9 @@ static struct qcow2_target *alloc_qcow2_target(struct dm_target *ti)
>   	timer_setup(&tgt->enospc_timer, qcow2_enospc_timer, 0);
>   	ti->private = tgt;
>   	tgt->ti = ti;
> +
> +	INIT_WORK(&tgt->backward_merge.work, qcow2_merge_backward_work);
> +
>   	qcow2_set_service_operations(ti, false);
>   
>   	return tgt;
> diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
> index a89fe3db2196d..bebfdc50ed6d4 100644
> --- a/drivers/md/dm-qcow2.h
> +++ b/drivers/md/dm-qcow2.h
> @@ -149,6 +149,20 @@ struct md_page {
>   	struct list_head wpc_readers_wait_list;
>   };
>   
> +enum qcow2_backward_merge_state {
> +	BACKWARD_MERGE_STOPPED = 0,

nit: this init is excess

> +	BACKWARD_MERGE_START,
> +	BACKWARD_MERGE_RUN,
> +	BACKWARD_MERGE_WAIT_COMPLETION,
> +	BACKWARD_MERGE_STOP,
> +};
> +
> +struct qcow2_backward_merge {
> +	struct work_struct work;
> +	enum qcow2_backward_merge_state state;
> +	int error;
> +};

May be add merge error to values returned in qcow2_get_errors,
for the users that use dm events interface.

> +
>   struct qcow2_target {
>   	struct dm_target *ti;
>   #define QCOW2_QRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() */
> @@ -180,6 +194,8 @@ struct qcow2_target {
>   	struct work_struct event_work;
>   	spinlock_t event_lock;
>   	struct mutex ctl_mutex;
> +
> +	struct qcow2_backward_merge backward_merge;
>   };
>   
>   enum {
> @@ -375,6 +391,9 @@ int qcow2_inflight_ref_switch(struct qcow2_target *tgt);
>   void qcow2_flush_deferred_activity(struct qcow2_target *tgt, struct qcow2 *qcow2);
>   int qcow2_truncate_safe(struct file *file, loff_t new_len);
>   
> +void qcow2_merge_backward_work(struct work_struct *work);
> +void qcow2_merge_backward_cancel(struct qcow2_target *tgt);
> +
>   static inline struct qcow2_target *to_qcow2_target(struct dm_target *ti)
>   {
>   	return ti->private;



-- 
Regards,
Alexander Atanasov


More information about the Devel mailing list