[Devel] [PATCH v4 VZ9 3/5] dm-qcow2: make merge_backward command asyncronous
Alexander Atanasov
alexander.atanasov at virtuozzo.com
Tue Mar 4 14:55:47 MSK 2025
On 3.03.25 11:37, Pavel Tikhomirov wrote:
> This adds merge_backward "start", "complete" and "cancel" commands. By
> that we are able to split single merge_backward into two stages: start
> asyncronous merging and completion. That can be usefull for restarting
> qemu process while allowing backward merging to run asyncronously in
> kernel.
>
> The "start" command runs merging preparations in workqueue work. After
> it finishes, the "complete" command can be called to finish the process
> and actually replace the top qcow2 with it's lower. The "cancel" command
> forces the work to stop and flushes it. In case we are in completion
> waiting state already and there is no work running, the "cancel" command
> also reverts merging preparations.
>
> Locking:
>
> Data in tgt->backward_merge is protected by tgt->ctl_mutex. The "start"
> and "complete" commands are fully under this lock, and the "cancel"
> operation takes the lock explicitly and releases it for work flushing.
> The work also takes the lock but only when updating tgt->backward_merge
> data. For checks, if the work was caneled in the middle, we read the
> state without locking as we don't modify the state there, also we would
> re-check the state again before exiting the work function under lock.
>
> Now on target suspend we "cancel" currently running backward merge,
> previously we were just hanging untill backward merge have been
> finished for possibly a long time, cancelling seems cleaner. Though we
> don't really expect hypervisor suspending the target in the middle of
> backward merge that it by itself started.
>
> https://virtuozzo.atlassian.net/browse/VSTOR-100466
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
>
> --
> v2: Cancel from BACKWARD_MERGE_START state should not try to stop the
> work via BACKWARD_MERGE_STOP state, else we will deadlock in this state.
> ---
> drivers/md/dm-qcow2-cmd.c | 142 +++++++++++++++++++++++++++++++----
> drivers/md/dm-qcow2-target.c | 6 ++
> drivers/md/dm-qcow2.h | 19 +++++
> 3 files changed, 153 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
> index 7b4b0ee68ad9f..04a992f3ebba6 100644
> --- a/drivers/md/dm-qcow2-cmd.c
> +++ b/drivers/md/dm-qcow2-cmd.c
> @@ -52,6 +52,8 @@ static void service_qio_endio(struct qcow2_target *tgt, struct qio *qio,
> wake_up(&tgt->service_wq);
> }
>
> +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt);
> +
> static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
> loff_t end, loff_t step, unsigned int bi_op, u8 qio_flags)
> {
> @@ -63,7 +65,7 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
> WRITE_ONCE(service_status, BLK_STS_OK);
>
> for (pos = 0; pos < end; pos += step) {
> - if (fatal_signal_pending(current)) {
> + if (qcow2_backward_merge_should_stop(tgt)) {
> ret = -EINTR;
> break;
> }
Is it okay to remove termination on signal - here and the killable
mutex? Without signal handling it can prevent clean shutdown or leave it
stuck if something goes wrong in the code.
> @@ -161,10 +163,11 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt,
> qcow2_submit_embedded_qios(tgt, &list);
> }
>
> -static int qcow2_merge_backward(struct qcow2_target *tgt)
> +static int qcow2_merge_backward_start(struct qcow2_target *tgt)
> {
> struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
> - int ret, ret2;
> +
> + lockdep_assert_held(&tgt->ctl_mutex);
>
> if (!lower)
> return -ENOENT;
> @@ -174,6 +177,35 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
> return -EOPNOTSUPP;
> if (lower->hdr.size < qcow2->hdr.size)
> return -EBADSLT;
> +
> + if (tgt->backward_merge.state != BACKWARD_MERGE_STOPPED)
> + return -EBUSY;
> + tgt->backward_merge.state = BACKWARD_MERGE_START;
> + tgt->backward_merge.error = 0;
> +
> + schedule_work(&tgt->backward_merge.work);
> + return 0;
> +}
> +ALLOW_ERROR_INJECTION(qcow2_merge_backward_start, ERRNO);
> +
> +void qcow2_merge_backward_work(struct work_struct *work)
> +{
> + struct qcow2_target *tgt = container_of(work, struct qcow2_target,
> + backward_merge.work);
> + struct qcow2 *qcow2, *lower;
> + int ret, ret2;
> +
> + mutex_lock(&tgt->ctl_mutex);
> + if (tgt->backward_merge.state != BACKWARD_MERGE_START) {
> + mutex_unlock(&tgt->ctl_mutex);
> + return;
> + }
> + tgt->backward_merge.state = BACKWARD_MERGE_RUN;
> + mutex_unlock(&tgt->ctl_mutex);
> +
> + qcow2 = tgt->top;
> + lower = qcow2->lower;
> +
> /*
> * Break all COW clus at L1 level. Otherwise, later
> * there would be problems with unusing them:
> @@ -183,13 +215,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
> ret = qcow2_break_l1cow(tgt);
> if (ret) {
> QC_ERR(tgt->ti, "Can't break L1 COW");
> - return ret;
> + goto out_err;
> }
>
> ret = qcow2_set_image_file_features(lower, true);
> if (ret) {
> QC_ERR(tgt->ti, "Can't set dirty bit");
> - return ret;
> + goto out_err;
> }
> set_backward_merge_in_process(tgt, qcow2, true);
>
> @@ -200,22 +232,85 @@ static int qcow2_merge_backward(struct qcow2_target *tgt)
> ret2 = qcow2_set_image_file_features(lower, false);
> if (ret2 < 0)
> QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2);
> - return ret;
> }
> +
> +out_err:
> + mutex_lock(&tgt->ctl_mutex);
> + if (ret) {
> + /* Error */
> + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> + tgt->backward_merge.error = ret;
> + } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
> + /* Merge is canceled */
> + set_backward_merge_in_process(tgt, qcow2, false);
> + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> + tgt->backward_merge.error = -EINTR;
> + } else {
> + /* Finish merge */
> + tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION;
> + }
> + mutex_unlock(&tgt->ctl_mutex);
> +}
> +
> +static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
> +{
> + struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
> + int ret;
> +
> + lockdep_assert_held(&tgt->ctl_mutex);
> +
> + if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
> + return -EBUSY;
> +
> tgt->top = lower;
> smp_wmb(); /* Pairs with qcow2_ref_inc() */
> qcow2_inflight_ref_switch(tgt); /* Pending qios */
> qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
> qcow2->lower = NULL;
>
> - ret2 = qcow2_set_image_file_features(qcow2, false);
> - if (ret2 < 0)
> - QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2);
> + ret = qcow2_set_image_file_features(qcow2, false);
> + if (ret < 0)
> + QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret);
> qcow2_destroy(qcow2);
>
> + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> +
> return 0;
> }
> -ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO);
> +ALLOW_ERROR_INJECTION(qcow2_merge_backward_complete, ERRNO);
> +
> +void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
> +{
> + bool flush = false;
> +
> + mutex_lock(&tgt->ctl_mutex);
> + if (tgt->backward_merge.state == BACKWARD_MERGE_STOPPED) {
> + mutex_unlock(&tgt->ctl_mutex);
> + return;
> + }
> +
> + if (tgt->backward_merge.state == BACKWARD_MERGE_START) {
> + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> + flush = true;
> + } else if (tgt->backward_merge.state == BACKWARD_MERGE_RUN) {
> + tgt->backward_merge.state = BACKWARD_MERGE_STOP;
> + flush = true;
> + } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
> + flush = true;
> + } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) {
> + set_backward_merge_in_process(tgt, tgt->top, false);
> + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
> + }
> + mutex_unlock(&tgt->ctl_mutex);
> +
> + if (flush)
> + flush_work(&tgt->backward_merge.work);
> +}
> +
> +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt)
> +{
> + return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP;
> +}
>
> static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 *ref_index)
> {
> @@ -374,11 +469,19 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
> }
> ret = qcow2_get_event(tgt, result, maxlen);
> goto out;
> + } else if (!strcmp(argv[0], "merge_backward")) {
> + if (argc != 2) {
> + ret = -EINVAL;
> + goto out;
> + }
> + if (!strcmp(argv[1], "cancel")) {
> + qcow2_merge_backward_cancel(tgt);
> + ret = 0;
> + goto out;
> + }
> }
>
> - ret = mutex_lock_killable(&tgt->ctl_mutex);
> - if (ret)
> - goto out;
> + mutex_lock(&tgt->ctl_mutex);
>
> if (!strcmp(argv[0], "get_errors")) {
> ret = qcow2_get_errors(tgt, result, maxlen);
> @@ -388,7 +491,18 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
> } else if (!strcmp(argv[0], "merge_forward")) {
> ret = qcow2_merge_forward(tgt);
> } else if (!strcmp(argv[0], "merge_backward")) {
> - ret = qcow2_merge_backward(tgt);
> + if (argc != 2) {
> + ret = -EINVAL;
> + mutex_unlock(&tgt->ctl_mutex);
> + goto out;
> + }
> + if (!strcmp(argv[1], "start")) {
> + ret = qcow2_merge_backward_start(tgt);
> + } else if (!strcmp(argv[1], "complete")) {
> + ret = qcow2_merge_backward_complete(tgt);
> + } else {
> + ret = -ENOTTY;
> + }
> } else {
> ret = -ENOTTY;
> }
> diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c
> index 540c03cb3c44f..6e2e583ba0b8b 100644
> --- a/drivers/md/dm-qcow2-target.c
> +++ b/drivers/md/dm-qcow2-target.c
> @@ -25,6 +25,8 @@ static void qcow2_set_service_operations(struct dm_target *ti, bool allowed)
> mutex_lock(&tgt->ctl_mutex);
> tgt->service_operations_allowed = allowed;
> mutex_unlock(&tgt->ctl_mutex);
> + if (!allowed)
> + qcow2_merge_backward_cancel(tgt);
> }
> static void qcow2_set_wants_suspend(struct dm_target *ti, bool wants)
> {
> @@ -251,6 +253,7 @@ static void qcow2_tgt_destroy(struct qcow2_target *tgt)
> /* Now kill the queue */
> destroy_workqueue(tgt->wq);
> }
> + qcow2_merge_backward_cancel(tgt);
>
> mempool_destroy(tgt->qio_pool);
> mempool_destroy(tgt->qrq_pool);
> @@ -494,6 +497,9 @@ static struct qcow2_target *alloc_qcow2_target(struct dm_target *ti)
> timer_setup(&tgt->enospc_timer, qcow2_enospc_timer, 0);
> ti->private = tgt;
> tgt->ti = ti;
> +
> + INIT_WORK(&tgt->backward_merge.work, qcow2_merge_backward_work);
> +
> qcow2_set_service_operations(ti, false);
>
> return tgt;
> diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
> index a89fe3db2196d..bebfdc50ed6d4 100644
> --- a/drivers/md/dm-qcow2.h
> +++ b/drivers/md/dm-qcow2.h
> @@ -149,6 +149,20 @@ struct md_page {
> struct list_head wpc_readers_wait_list;
> };
>
> +enum qcow2_backward_merge_state {
> + BACKWARD_MERGE_STOPPED = 0,
nit: this init is excess
> + BACKWARD_MERGE_START,
> + BACKWARD_MERGE_RUN,
> + BACKWARD_MERGE_WAIT_COMPLETION,
> + BACKWARD_MERGE_STOP,
> +};
> +
> +struct qcow2_backward_merge {
> + struct work_struct work;
> + enum qcow2_backward_merge_state state;
> + int error;
> +};
May be add merge error to values returned in qcow2_get_errors,
for the users that use dm events interface.
> +
> struct qcow2_target {
> struct dm_target *ti;
> #define QCOW2_QRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() */
> @@ -180,6 +194,8 @@ struct qcow2_target {
> struct work_struct event_work;
> spinlock_t event_lock;
> struct mutex ctl_mutex;
> +
> + struct qcow2_backward_merge backward_merge;
> };
>
> enum {
> @@ -375,6 +391,9 @@ int qcow2_inflight_ref_switch(struct qcow2_target *tgt);
> void qcow2_flush_deferred_activity(struct qcow2_target *tgt, struct qcow2 *qcow2);
> int qcow2_truncate_safe(struct file *file, loff_t new_len);
>
> +void qcow2_merge_backward_work(struct work_struct *work);
> +void qcow2_merge_backward_cancel(struct qcow2_target *tgt);
> +
> static inline struct qcow2_target *to_qcow2_target(struct dm_target *ti)
> {
> return ti->private;
--
Regards,
Alexander Atanasov
More information about the Devel
mailing list