From ptikhomirov at virtuozzo.com Mon Mar 3 10:24:31 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:24:31 +0800 Subject: [Devel] [PATCH v3 VZ9 0/5] dm-qcow2: make backward merge asyncronous Message-ID: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> That can be usefull for restarting qemu process while allowing backward merging to run asyncronously in kernel. v2: rebase on top of vz9.80.19, make completion event consistent, fix deadlock when cancel after start and before work run v3: weaken locking in progress printing a bit to decrease possible lock contention https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov Pavel Tikhomirov (5): dm-qcow2: fix warning about wrong printk format for size_t dm-qcow2: cleanup error handling in qcow2_merge_backward dm-qcow2: make merge_backward command asyncronous dm-qcow2: add merge_backward set_eventfd command dm-qcow2: add merge_backward progress command drivers/md/dm-qcow2-cmd.c | 278 ++++++++++++++++++++++++++++++++--- drivers/md/dm-qcow2-map.c | 4 +- drivers/md/dm-qcow2-target.c | 6 + drivers/md/dm-qcow2.h | 35 +++++ 4 files changed, 297 insertions(+), 26 deletions(-) -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 10:24:32 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:24:32 +0800 Subject: [Devel] [PATCH v3 VZ9 1/5] dm-qcow2: fix warning about wrong printk format for size_t In-Reply-To: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> References: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303072506.1206960-2-ptikhomirov@virtuozzo.com> In file included from ./include/linux/kernel.h:20, from ./include/linux/list.h:9, from ./include/linux/preempt.h:12, from ./include/linux/spinlock.h:56, from drivers/md/dm-qcow2-map.c:5: drivers/md/dm-qcow2-map.c: In function ?process_compressed_read?: ./include/linux/kern_levels.h:5:25: warning: format ?%d? expects argument of type ?int?, but argument 3 has type ?size_t? {aka ?long unsigned int?} [-Wformat=] 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */ | ^~~~~~ ./include/linux/printk.h:497:25: note: in definition of macro ?printk_index_wrap? 497 | _p_func(_fmt, ##__VA_ARGS__); \ | ^~~~ ./include/linux/printk.h:568:9: note: in expansion of macro ?printk? 568 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~ ./include/linux/kern_levels.h:11:25: note: in expansion of macro ?KERN_SOH? 11 | #define KERN_ERR KERN_SOH "3" /* error conditions */ | ^~~~~~~~ ./include/linux/printk.h:568:16: note: in expansion of macro ?KERN_ERR? 568 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~ drivers/md/dm-qcow2.h:215:33: note: in expansion of macro ?pr_err? 215 | #define QC_ERR(dmti, fmt, ...) pr_err (QCOW2_FMT(fmt), \ | ^~~~~~ drivers/md/dm-qcow2-map.c:3691:41: note: in expansion of macro ?QC_ERR? 3691 | QC_ERR(qcow2->tgt->ti, | ^~~~~~ While on it fix line wrap alignment. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v2: Rebase on top of vz9.80.19, "%lu" is also incorrect, see Documentation/core-api/printk-formats.rst. --- drivers/md/dm-qcow2-map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c index 7a1312a74e9fb..f7cb036bb416e 100644 --- a/drivers/md/dm-qcow2-map.c +++ b/drivers/md/dm-qcow2-map.c @@ -3689,8 +3689,8 @@ static void process_compressed_read(struct list_head *read_list, buf = kvmalloc(qcow2->clu_size + dctxlen, GFP_NOIO); if (!buf) { QC_ERR(qcow2->tgt->ti, - "can not allocate decompression buffer:%lu", - qcow2->clu_size + dctxlen); + "can not allocate decompression buffer:%zu", + qcow2->clu_size + dctxlen); end_qios(read_list, BLK_STS_RESOURCE); return; } -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 10:24:33 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:24:33 +0800 Subject: [Devel] [PATCH v3 VZ9 2/5] dm-qcow2: cleanup error handling in qcow2_merge_backward In-Reply-To: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> References: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303072506.1206960-3-ptikhomirov@virtuozzo.com> The label "out" is excess, lets remove it in accordance with: "If there is no cleanup needed then just return directly." https://www.kernel.org/doc/html/v4.10/process/coding-style.html#centralized-exiting-of-functions https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov --- drivers/md/dm-qcow2-cmd.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 6dc7e07220557..7b4b0ee68ad9f 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -166,18 +166,14 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; int ret, ret2; - ret = -ENOENT; if (!lower) - goto out; - ret = -EACCES; + return -ENOENT; if (!(lower->file->f_mode & FMODE_WRITE)) - goto out; - ret = -EOPNOTSUPP; + return -EACCES; if (qcow2->clu_size != lower->clu_size) - goto out; - ret = -EBADSLT; + return -EOPNOTSUPP; if (lower->hdr.size < qcow2->hdr.size) - goto out; + return -EBADSLT; /* * Break all COW clus at L1 level. Otherwise, later * there would be problems with unusing them: @@ -187,13 +183,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret = qcow2_break_l1cow(tgt); if (ret) { QC_ERR(tgt->ti, "Can't break L1 COW"); - goto out; + return ret; } ret = qcow2_set_image_file_features(lower, true); if (ret) { QC_ERR(tgt->ti, "Can't set dirty bit"); - goto out; + return ret; } set_backward_merge_in_process(tgt, qcow2, true); @@ -204,7 +200,7 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret2 = qcow2_set_image_file_features(lower, false); if (ret2 < 0) QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2); - goto out; + return ret; } tgt->top = lower; smp_wmb(); /* Pairs with qcow2_ref_inc() */ @@ -216,8 +212,8 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) if (ret2 < 0) QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2); qcow2_destroy(qcow2); -out: - return ret; + + return 0; } ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO); -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 10:24:34 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:24:34 +0800 Subject: [Devel] [PATCH v3 VZ9 3/5] dm-qcow2: make merge_backward command asyncronous In-Reply-To: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> References: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303072506.1206960-4-ptikhomirov@virtuozzo.com> This adds merge_backward "start", "complete" and "cancel" commands. By that we are able to split single merge_backward into two stages: start asyncronous merging and completion. That can be usefull for restarting qemu process while allowing backward merging to run asyncronously in kernel. The "start" command runs merging preparations in workqueue work. After it finishes, the "complete" command can be called to finish the process and actually replace the top qcow2 with it's lower. The "cancel" command forces the work to stop and flushes it. In case we are in completion waiting state already and there is no work running, the "cancel" command also reverts merging preparations. Locking: Data in tgt->backward_merge is protected by tgt->ctl_mutex. The "start" and "complete" commands are fully under this lock, and the "cancel" operation takes the lock explicitly and releases it for work flushing. The work also takes the lock but only when updating tgt->backward_merge data. For checks, if the work was caneled in the middle, we read the state without locking as we don't modify the state there, also we would re-check the state again before exiting the work function under lock. Now on target suspend we "cancel" currently running backward merge, previously we were just hanging untill backward merge have been finished for possibly a long time, cancelling seems cleaner. Though we don't really expect hypervisor suspending the target in the middle of backward merge that it by itself started. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v2: Cancel from BACKWARD_MERGE_START state should not try to stop the work via BACKWARD_MERGE_STOP state, else we will deadlock in this state. --- drivers/md/dm-qcow2-cmd.c | 142 +++++++++++++++++++++++++++++++---- drivers/md/dm-qcow2-target.c | 6 ++ drivers/md/dm-qcow2.h | 19 +++++ 3 files changed, 153 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 7b4b0ee68ad9f..04a992f3ebba6 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -52,6 +52,8 @@ static void service_qio_endio(struct qcow2_target *tgt, struct qio *qio, wake_up(&tgt->service_wq); } +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt); + static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, loff_t end, loff_t step, unsigned int bi_op, u8 qio_flags) { @@ -63,7 +65,7 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, WRITE_ONCE(service_status, BLK_STS_OK); for (pos = 0; pos < end; pos += step) { - if (fatal_signal_pending(current)) { + if (qcow2_backward_merge_should_stop(tgt)) { ret = -EINTR; break; } @@ -161,10 +163,11 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt, qcow2_submit_embedded_qios(tgt, &list); } -static int qcow2_merge_backward(struct qcow2_target *tgt) +static int qcow2_merge_backward_start(struct qcow2_target *tgt) { struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; - int ret, ret2; + + lockdep_assert_held(&tgt->ctl_mutex); if (!lower) return -ENOENT; @@ -174,6 +177,35 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) return -EOPNOTSUPP; if (lower->hdr.size < qcow2->hdr.size) return -EBADSLT; + + if (tgt->backward_merge.state != BACKWARD_MERGE_STOPPED) + return -EBUSY; + tgt->backward_merge.state = BACKWARD_MERGE_START; + tgt->backward_merge.error = 0; + + schedule_work(&tgt->backward_merge.work); + return 0; +} +ALLOW_ERROR_INJECTION(qcow2_merge_backward_start, ERRNO); + +void qcow2_merge_backward_work(struct work_struct *work) +{ + struct qcow2_target *tgt = container_of(work, struct qcow2_target, + backward_merge.work); + struct qcow2 *qcow2, *lower; + int ret, ret2; + + mutex_lock(&tgt->ctl_mutex); + if (tgt->backward_merge.state != BACKWARD_MERGE_START) { + mutex_unlock(&tgt->ctl_mutex); + return; + } + tgt->backward_merge.state = BACKWARD_MERGE_RUN; + mutex_unlock(&tgt->ctl_mutex); + + qcow2 = tgt->top; + lower = qcow2->lower; + /* * Break all COW clus at L1 level. Otherwise, later * there would be problems with unusing them: @@ -183,13 +215,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret = qcow2_break_l1cow(tgt); if (ret) { QC_ERR(tgt->ti, "Can't break L1 COW"); - return ret; + goto out_err; } ret = qcow2_set_image_file_features(lower, true); if (ret) { QC_ERR(tgt->ti, "Can't set dirty bit"); - return ret; + goto out_err; } set_backward_merge_in_process(tgt, qcow2, true); @@ -200,22 +232,85 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret2 = qcow2_set_image_file_features(lower, false); if (ret2 < 0) QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2); - return ret; } + +out_err: + mutex_lock(&tgt->ctl_mutex); + if (ret) { + /* Error */ + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + tgt->backward_merge.error = ret; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) { + /* Merge is canceled */ + set_backward_merge_in_process(tgt, qcow2, false); + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + tgt->backward_merge.error = -EINTR; + } else { + /* Finish merge */ + tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION; + } + mutex_unlock(&tgt->ctl_mutex); +} + +static int qcow2_merge_backward_complete(struct qcow2_target *tgt) +{ + struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; + int ret; + + lockdep_assert_held(&tgt->ctl_mutex); + + if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION) + return -EBUSY; + tgt->top = lower; smp_wmb(); /* Pairs with qcow2_ref_inc() */ qcow2_inflight_ref_switch(tgt); /* Pending qios */ qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */ qcow2->lower = NULL; - ret2 = qcow2_set_image_file_features(qcow2, false); - if (ret2 < 0) - QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2); + ret = qcow2_set_image_file_features(qcow2, false); + if (ret < 0) + QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret); qcow2_destroy(qcow2); + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + return 0; } -ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO); +ALLOW_ERROR_INJECTION(qcow2_merge_backward_complete, ERRNO); + +void qcow2_merge_backward_cancel(struct qcow2_target *tgt) +{ + bool flush = false; + + mutex_lock(&tgt->ctl_mutex); + if (tgt->backward_merge.state == BACKWARD_MERGE_STOPPED) { + mutex_unlock(&tgt->ctl_mutex); + return; + } + + if (tgt->backward_merge.state == BACKWARD_MERGE_START) { + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + flush = true; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_RUN) { + tgt->backward_merge.state = BACKWARD_MERGE_STOP; + flush = true; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) { + flush = true; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) { + set_backward_merge_in_process(tgt, tgt->top, false); + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + } + mutex_unlock(&tgt->ctl_mutex); + + if (flush) + flush_work(&tgt->backward_merge.work); +} + +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt) +{ + return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP; +} static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 *ref_index) { @@ -374,11 +469,19 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, } ret = qcow2_get_event(tgt, result, maxlen); goto out; + } else if (!strcmp(argv[0], "merge_backward")) { + if (argc != 2) { + ret = -EINVAL; + goto out; + } + if (!strcmp(argv[1], "cancel")) { + qcow2_merge_backward_cancel(tgt); + ret = 0; + goto out; + } } - ret = mutex_lock_killable(&tgt->ctl_mutex); - if (ret) - goto out; + mutex_lock(&tgt->ctl_mutex); if (!strcmp(argv[0], "get_errors")) { ret = qcow2_get_errors(tgt, result, maxlen); @@ -388,7 +491,18 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, } else if (!strcmp(argv[0], "merge_forward")) { ret = qcow2_merge_forward(tgt); } else if (!strcmp(argv[0], "merge_backward")) { - ret = qcow2_merge_backward(tgt); + if (argc != 2) { + ret = -EINVAL; + mutex_unlock(&tgt->ctl_mutex); + goto out; + } + if (!strcmp(argv[1], "start")) { + ret = qcow2_merge_backward_start(tgt); + } else if (!strcmp(argv[1], "complete")) { + ret = qcow2_merge_backward_complete(tgt); + } else { + ret = -ENOTTY; + } } else { ret = -ENOTTY; } diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c index 540c03cb3c44f..6e2e583ba0b8b 100644 --- a/drivers/md/dm-qcow2-target.c +++ b/drivers/md/dm-qcow2-target.c @@ -25,6 +25,8 @@ static void qcow2_set_service_operations(struct dm_target *ti, bool allowed) mutex_lock(&tgt->ctl_mutex); tgt->service_operations_allowed = allowed; mutex_unlock(&tgt->ctl_mutex); + if (!allowed) + qcow2_merge_backward_cancel(tgt); } static void qcow2_set_wants_suspend(struct dm_target *ti, bool wants) { @@ -251,6 +253,7 @@ static void qcow2_tgt_destroy(struct qcow2_target *tgt) /* Now kill the queue */ destroy_workqueue(tgt->wq); } + qcow2_merge_backward_cancel(tgt); mempool_destroy(tgt->qio_pool); mempool_destroy(tgt->qrq_pool); @@ -494,6 +497,9 @@ static struct qcow2_target *alloc_qcow2_target(struct dm_target *ti) timer_setup(&tgt->enospc_timer, qcow2_enospc_timer, 0); ti->private = tgt; tgt->ti = ti; + + INIT_WORK(&tgt->backward_merge.work, qcow2_merge_backward_work); + qcow2_set_service_operations(ti, false); return tgt; diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index a89fe3db2196d..bebfdc50ed6d4 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -149,6 +149,20 @@ struct md_page { struct list_head wpc_readers_wait_list; }; +enum qcow2_backward_merge_state { + BACKWARD_MERGE_STOPPED = 0, + BACKWARD_MERGE_START, + BACKWARD_MERGE_RUN, + BACKWARD_MERGE_WAIT_COMPLETION, + BACKWARD_MERGE_STOP, +}; + +struct qcow2_backward_merge { + struct work_struct work; + enum qcow2_backward_merge_state state; + int error; +}; + struct qcow2_target { struct dm_target *ti; #define QCOW2_QRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() */ @@ -180,6 +194,8 @@ struct qcow2_target { struct work_struct event_work; spinlock_t event_lock; struct mutex ctl_mutex; + + struct qcow2_backward_merge backward_merge; }; enum { @@ -375,6 +391,9 @@ int qcow2_inflight_ref_switch(struct qcow2_target *tgt); void qcow2_flush_deferred_activity(struct qcow2_target *tgt, struct qcow2 *qcow2); int qcow2_truncate_safe(struct file *file, loff_t new_len); +void qcow2_merge_backward_work(struct work_struct *work); +void qcow2_merge_backward_cancel(struct qcow2_target *tgt); + static inline struct qcow2_target *to_qcow2_target(struct dm_target *ti) { return ti->private; -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 10:24:35 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:24:35 +0800 Subject: [Devel] [PATCH v3 VZ9 4/5] dm-qcow2: add merge_backward set_eventfd command In-Reply-To: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> References: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303072506.1206960-5-ptikhomirov@virtuozzo.com> This eventfd can be used to get an event when merge_backward start work have finished and is waiting for completion. Note: The eventfd can be changed even while work is running. Locking: The backward_merge.eventfd_ctx is protected from being released by tgt->ctl_mutex. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v2: Always report that work finished, e.g. also on error or then it was canceled, this should be more consistent from the userspace perspective. --- drivers/md/dm-qcow2-cmd.c | 39 ++++++++++++++++++++++++++++++++++++++- drivers/md/dm-qcow2.h | 2 ++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 04a992f3ebba6..7f9c582778d5f 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include "dm-qcow2.h" @@ -197,6 +199,8 @@ void qcow2_merge_backward_work(struct work_struct *work) mutex_lock(&tgt->ctl_mutex); if (tgt->backward_merge.state != BACKWARD_MERGE_START) { + if (tgt->backward_merge.eventfd_ctx) + eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); mutex_unlock(&tgt->ctl_mutex); return; } @@ -249,6 +253,8 @@ void qcow2_merge_backward_work(struct work_struct *work) /* Finish merge */ tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION; } + if (tgt->backward_merge.eventfd_ctx) + eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); mutex_unlock(&tgt->ctl_mutex); } @@ -312,6 +318,24 @@ static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt) return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP; } +#define QCOW2_FILE_UNBIND -1 + +static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd) +{ + struct eventfd_ctx *ctx = NULL; + + ctx = efd == QCOW2_FILE_UNBIND ? NULL : eventfd_ctx_fdget(efd); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&tgt->ctl_mutex); + swap(ctx, tgt->backward_merge.eventfd_ctx); + if (ctx) + eventfd_ctx_put(ctx); + mutex_unlock(&tgt->ctl_mutex); + return 0; +} + static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 *ref_index) { struct qcow2 *qcow2; @@ -470,14 +494,27 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, ret = qcow2_get_event(tgt, result, maxlen); goto out; } else if (!strcmp(argv[0], "merge_backward")) { - if (argc != 2) { + if (argc < 2) { ret = -EINVAL; goto out; } if (!strcmp(argv[1], "cancel")) { + if (argc != 2) { + ret = -EINVAL; + goto out; + } qcow2_merge_backward_cancel(tgt); ret = 0; goto out; + } else if (!strcmp(argv[1], "set_eventfd")) { + int efd; + + if (argc != 3 || kstrtoint(argv[2], 10, &efd)) { + ret = -EINVAL; + goto out; + } + ret = qcow2_merge_backward_set_eventfd(tgt, efd); + goto out; } } diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index bebfdc50ed6d4..c4956e3fd0eb7 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "dm-core.h" #define DM_MSG_PREFIX "qcow2" @@ -161,6 +162,7 @@ struct qcow2_backward_merge { struct work_struct work; enum qcow2_backward_merge_state state; int error; + struct eventfd_ctx *eventfd_ctx; }; struct qcow2_target { -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 10:24:36 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:24:36 +0800 Subject: [Devel] [PATCH v3 VZ9 5/5] dm-qcow2: add merge_backward progress command In-Reply-To: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> References: <20250303072506.1206960-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303072506.1206960-6-ptikhomirov@virtuozzo.com> This allows to see progress of backward merge. It shows the stage we are at and for iterative stages it provides progress in form of how many iteratious are done and how many iterations there are in total. Locking: The progress data consistency is protected by tgt->ctl_mutex, we always update stage and error consistently under lock. Inside iterative stages for progress updating we have xchg instead of lock so that changes to progress are atomic and imply memory barrier (this way we would not see progress greater than max_progress in progress reporting), but at the same time there is less contention on tgt->ctl_mutex. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v3: Adress Kostya's review comments: move progress printing out of lock, remove excess updates of max_progress, make progress updates without lock. --- drivers/md/dm-qcow2-cmd.c | 83 +++++++++++++++++++++++++++++++++++++++ drivers/md/dm-qcow2.h | 14 +++++++ 2 files changed, 97 insertions(+) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 7f9c582778d5f..b9d37e78b7577 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -54,6 +54,10 @@ static void service_qio_endio(struct qcow2_target *tgt, struct qio *qio, wake_up(&tgt->service_wq); } +static void backward_merge_update_progress(struct qcow2_target *tgt, + long long progress); +static void backward_merge_update_max_progress(struct qcow2_target *tgt, + long long max_progress); static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt); static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, @@ -66,7 +70,10 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, WRITE_ONCE(service_status, BLK_STS_OK); + backward_merge_update_max_progress(tgt, end); for (pos = 0; pos < end; pos += step) { + backward_merge_update_progress(tgt, pos); + if (qcow2_backward_merge_should_stop(tgt)) { ret = -EINTR; break; @@ -165,6 +172,66 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt, qcow2_submit_embedded_qios(tgt, &list); } +static void __backward_merge_update_stage(struct qcow2_target *tgt, + enum qcow2_backward_merge_stage stage) +{ + tgt->backward_merge.stage = stage; + tgt->backward_merge.progress = 0; + tgt->backward_merge.max_progress = 0; +} + +static void backward_merge_update_stage(struct qcow2_target *tgt, + enum qcow2_backward_merge_stage stage) +{ + mutex_lock(&tgt->ctl_mutex); + __backward_merge_update_stage(tgt, stage); + mutex_unlock(&tgt->ctl_mutex); +} + +static void backward_merge_update_max_progress(struct qcow2_target *tgt, + long long max_progress) +{ + xchg(&tgt->backward_merge.max_progress, max_progress); +} + +static void backward_merge_update_progress(struct qcow2_target *tgt, + long long progress) +{ + xchg(&tgt->backward_merge.progress, progress); +} + +char *backward_merge_stage_names[] = { + "none", + "break_l1cow", + "set_dirty", + "running", + "waiting_completion", + "completing", + "fail", +}; + +static int qcow2_merge_backward_progress(struct qcow2_target *tgt, + char *result, unsigned int maxlen) +{ + struct qcow2_backward_merge backward_merge; + unsigned int sz = 0; + int ret; + + BUILD_BUG_ON(ARRAY_SIZE(backward_merge_stage_names) != BACKWARD_MERGE_STAGE_MAX); + + mutex_lock(&tgt->ctl_mutex); + backward_merge = tgt->backward_merge; + mutex_unlock(&tgt->ctl_mutex); + + ret = DMEMIT("stage=%s\nprogress=%lld\nmax_progress=%lld\nerror=%d\n", + backward_merge_stage_names[backward_merge.stage], + backward_merge.progress, + backward_merge.max_progress, + backward_merge.error); + + return ret ? 1 : 0; +} + static int qcow2_merge_backward_start(struct qcow2_target *tgt) { struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; @@ -205,6 +272,7 @@ void qcow2_merge_backward_work(struct work_struct *work) return; } tgt->backward_merge.state = BACKWARD_MERGE_RUN; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_BREAK_L1COW); mutex_unlock(&tgt->ctl_mutex); qcow2 = tgt->top; @@ -222,6 +290,7 @@ void qcow2_merge_backward_work(struct work_struct *work) goto out_err; } + backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_SET_DIRTY); ret = qcow2_set_image_file_features(lower, true); if (ret) { QC_ERR(tgt->ti, "Can't set dirty bit"); @@ -230,6 +299,7 @@ void qcow2_merge_backward_work(struct work_struct *work) set_backward_merge_in_process(tgt, qcow2, true); /* Start merge */ + backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_RUNNING); ret = qcow2_merge_common(tgt); if (ret) { set_backward_merge_in_process(tgt, qcow2, false); @@ -244,14 +314,17 @@ void qcow2_merge_backward_work(struct work_struct *work) /* Error */ tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; tgt->backward_merge.error = ret; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_FAIL); } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) { /* Merge is canceled */ set_backward_merge_in_process(tgt, qcow2, false); tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; tgt->backward_merge.error = -EINTR; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_FAIL); } else { /* Finish merge */ tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_WAITING_COMPLETION); } if (tgt->backward_merge.eventfd_ctx) eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); @@ -267,6 +340,7 @@ static int qcow2_merge_backward_complete(struct qcow2_target *tgt) if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION) return -EBUSY; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_COMPLETING); tgt->top = lower; smp_wmb(); /* Pairs with qcow2_ref_inc() */ @@ -280,6 +354,7 @@ static int qcow2_merge_backward_complete(struct qcow2_target *tgt) qcow2_destroy(qcow2); tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE); return 0; } @@ -306,6 +381,7 @@ void qcow2_merge_backward_cancel(struct qcow2_target *tgt) } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) { set_backward_merge_in_process(tgt, tgt->top, false); tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE); } mutex_unlock(&tgt->ctl_mutex); @@ -515,6 +591,13 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, } ret = qcow2_merge_backward_set_eventfd(tgt, efd); goto out; + } else if (!strcmp(argv[1], "progress")) { + if (argc != 2) { + ret = -EINVAL; + goto out; + } + ret = qcow2_merge_backward_progress(tgt, result, maxlen); + goto out; } } diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index c4956e3fd0eb7..ed7cf79348052 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -158,11 +158,25 @@ enum qcow2_backward_merge_state { BACKWARD_MERGE_STOP, }; +enum qcow2_backward_merge_stage { + BACKWARD_MERGE_STAGE_NONE = 0, + BACKWARD_MERGE_STAGE_BREAK_L1COW, + BACKWARD_MERGE_STAGE_SET_DIRTY, + BACKWARD_MERGE_STAGE_RUNNING, + BACKWARD_MERGE_STAGE_WAITING_COMPLETION, + BACKWARD_MERGE_STAGE_COMPLETING, + BACKWARD_MERGE_STAGE_FAIL, + BACKWARD_MERGE_STAGE_MAX, +}; + struct qcow2_backward_merge { struct work_struct work; enum qcow2_backward_merge_state state; int error; struct eventfd_ctx *eventfd_ctx; + enum qcow2_backward_merge_stage stage; + long long progress; + long long max_progress; }; struct qcow2_target { -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 10:37:04 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 15:37:04 +0800 Subject: [Devel] [PATCH VZ9] vhost/vsock: remove unused variable i in VHOST_RESET_OWNER ioctl Message-ID: <20250303073842.1209656-1-ptikhomirov@virtuozzo.com> Fixes compilation warning: drivers/vhost/vsock.c: In function ?vhost_vsock_reset_owner?: drivers/vhost/vsock.c:846:16: warning: unused variable ?i? [-Wunused-variable] 846 | size_t i; | ^ Fixes: ad35221ad1341 ("vhost/vsock: add VHOST_RESET_OWNER ioctl") Signed-off-by: Pavel Tikhomirov Feature: vhost-vsock: VHOST_RESET_OWNER ioctl --- drivers/vhost/vsock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 3654fa0fd5849..36750e163052f 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -843,7 +843,6 @@ static int vhost_vsock_reset_owner(struct vhost_vsock *vsock) { struct vhost_iotlb *umem; long err; - size_t i; mutex_lock(&vsock->dev.mutex); err = vhost_dev_check_owner(&vsock->dev); -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 12:37:20 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 17:37:20 +0800 Subject: [Devel] [PATCH v4 VZ9 0/5] dm-qcow2: make backward merge asyncronous Message-ID: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> That can be usefull for restarting qemu process while allowing backward merging to run asyncronously in kernel. v2: rebase on top of vz9.80.19, make completion event consistent, fix deadlock when cancel after start and before work run v3: weaken locking in progress printing a bit to decrease possible lock contention v4: signal that we are at completion waiting on change of eventfd https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov Pavel Tikhomirov (5): dm-qcow2: fix warning about wrong printk format for size_t dm-qcow2: cleanup error handling in qcow2_merge_backward dm-qcow2: make merge_backward command asyncronous dm-qcow2: add merge_backward set_eventfd command dm-qcow2: add merge_backward progress command drivers/md/dm-qcow2-cmd.c | 281 ++++++++++++++++++++++++++++++++--- drivers/md/dm-qcow2-map.c | 4 +- drivers/md/dm-qcow2-target.c | 6 + drivers/md/dm-qcow2.h | 35 +++++ 4 files changed, 300 insertions(+), 26 deletions(-) -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 12:37:21 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 17:37:21 +0800 Subject: [Devel] [PATCH v4 VZ9 1/5] dm-qcow2: fix warning about wrong printk format for size_t In-Reply-To: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> References: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303093802.1233834-2-ptikhomirov@virtuozzo.com> In file included from ./include/linux/kernel.h:20, from ./include/linux/list.h:9, from ./include/linux/preempt.h:12, from ./include/linux/spinlock.h:56, from drivers/md/dm-qcow2-map.c:5: drivers/md/dm-qcow2-map.c: In function ?process_compressed_read?: ./include/linux/kern_levels.h:5:25: warning: format ?%d? expects argument of type ?int?, but argument 3 has type ?size_t? {aka ?long unsigned int?} [-Wformat=] 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */ | ^~~~~~ ./include/linux/printk.h:497:25: note: in definition of macro ?printk_index_wrap? 497 | _p_func(_fmt, ##__VA_ARGS__); \ | ^~~~ ./include/linux/printk.h:568:9: note: in expansion of macro ?printk? 568 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~ ./include/linux/kern_levels.h:11:25: note: in expansion of macro ?KERN_SOH? 11 | #define KERN_ERR KERN_SOH "3" /* error conditions */ | ^~~~~~~~ ./include/linux/printk.h:568:16: note: in expansion of macro ?KERN_ERR? 568 | printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~ drivers/md/dm-qcow2.h:215:33: note: in expansion of macro ?pr_err? 215 | #define QC_ERR(dmti, fmt, ...) pr_err (QCOW2_FMT(fmt), \ | ^~~~~~ drivers/md/dm-qcow2-map.c:3691:41: note: in expansion of macro ?QC_ERR? 3691 | QC_ERR(qcow2->tgt->ti, | ^~~~~~ While on it fix line wrap alignment. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v2: Rebase on top of vz9.80.19, "%lu" is also incorrect, see Documentation/core-api/printk-formats.rst. --- drivers/md/dm-qcow2-map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c index 7a1312a74e9fb..f7cb036bb416e 100644 --- a/drivers/md/dm-qcow2-map.c +++ b/drivers/md/dm-qcow2-map.c @@ -3689,8 +3689,8 @@ static void process_compressed_read(struct list_head *read_list, buf = kvmalloc(qcow2->clu_size + dctxlen, GFP_NOIO); if (!buf) { QC_ERR(qcow2->tgt->ti, - "can not allocate decompression buffer:%lu", - qcow2->clu_size + dctxlen); + "can not allocate decompression buffer:%zu", + qcow2->clu_size + dctxlen); end_qios(read_list, BLK_STS_RESOURCE); return; } -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 12:37:22 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 17:37:22 +0800 Subject: [Devel] [PATCH v4 VZ9 2/5] dm-qcow2: cleanup error handling in qcow2_merge_backward In-Reply-To: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> References: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303093802.1233834-3-ptikhomirov@virtuozzo.com> The label "out" is excess, lets remove it in accordance with: "If there is no cleanup needed then just return directly." https://www.kernel.org/doc/html/v4.10/process/coding-style.html#centralized-exiting-of-functions https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov --- drivers/md/dm-qcow2-cmd.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 6dc7e07220557..7b4b0ee68ad9f 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -166,18 +166,14 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; int ret, ret2; - ret = -ENOENT; if (!lower) - goto out; - ret = -EACCES; + return -ENOENT; if (!(lower->file->f_mode & FMODE_WRITE)) - goto out; - ret = -EOPNOTSUPP; + return -EACCES; if (qcow2->clu_size != lower->clu_size) - goto out; - ret = -EBADSLT; + return -EOPNOTSUPP; if (lower->hdr.size < qcow2->hdr.size) - goto out; + return -EBADSLT; /* * Break all COW clus at L1 level. Otherwise, later * there would be problems with unusing them: @@ -187,13 +183,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret = qcow2_break_l1cow(tgt); if (ret) { QC_ERR(tgt->ti, "Can't break L1 COW"); - goto out; + return ret; } ret = qcow2_set_image_file_features(lower, true); if (ret) { QC_ERR(tgt->ti, "Can't set dirty bit"); - goto out; + return ret; } set_backward_merge_in_process(tgt, qcow2, true); @@ -204,7 +200,7 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret2 = qcow2_set_image_file_features(lower, false); if (ret2 < 0) QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2); - goto out; + return ret; } tgt->top = lower; smp_wmb(); /* Pairs with qcow2_ref_inc() */ @@ -216,8 +212,8 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) if (ret2 < 0) QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2); qcow2_destroy(qcow2); -out: - return ret; + + return 0; } ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO); -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 12:37:23 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 17:37:23 +0800 Subject: [Devel] [PATCH v4 VZ9 3/5] dm-qcow2: make merge_backward command asyncronous In-Reply-To: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> References: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303093802.1233834-4-ptikhomirov@virtuozzo.com> This adds merge_backward "start", "complete" and "cancel" commands. By that we are able to split single merge_backward into two stages: start asyncronous merging and completion. That can be usefull for restarting qemu process while allowing backward merging to run asyncronously in kernel. The "start" command runs merging preparations in workqueue work. After it finishes, the "complete" command can be called to finish the process and actually replace the top qcow2 with it's lower. The "cancel" command forces the work to stop and flushes it. In case we are in completion waiting state already and there is no work running, the "cancel" command also reverts merging preparations. Locking: Data in tgt->backward_merge is protected by tgt->ctl_mutex. The "start" and "complete" commands are fully under this lock, and the "cancel" operation takes the lock explicitly and releases it for work flushing. The work also takes the lock but only when updating tgt->backward_merge data. For checks, if the work was caneled in the middle, we read the state without locking as we don't modify the state there, also we would re-check the state again before exiting the work function under lock. Now on target suspend we "cancel" currently running backward merge, previously we were just hanging untill backward merge have been finished for possibly a long time, cancelling seems cleaner. Though we don't really expect hypervisor suspending the target in the middle of backward merge that it by itself started. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v2: Cancel from BACKWARD_MERGE_START state should not try to stop the work via BACKWARD_MERGE_STOP state, else we will deadlock in this state. --- drivers/md/dm-qcow2-cmd.c | 142 +++++++++++++++++++++++++++++++---- drivers/md/dm-qcow2-target.c | 6 ++ drivers/md/dm-qcow2.h | 19 +++++ 3 files changed, 153 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 7b4b0ee68ad9f..04a992f3ebba6 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -52,6 +52,8 @@ static void service_qio_endio(struct qcow2_target *tgt, struct qio *qio, wake_up(&tgt->service_wq); } +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt); + static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, loff_t end, loff_t step, unsigned int bi_op, u8 qio_flags) { @@ -63,7 +65,7 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, WRITE_ONCE(service_status, BLK_STS_OK); for (pos = 0; pos < end; pos += step) { - if (fatal_signal_pending(current)) { + if (qcow2_backward_merge_should_stop(tgt)) { ret = -EINTR; break; } @@ -161,10 +163,11 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt, qcow2_submit_embedded_qios(tgt, &list); } -static int qcow2_merge_backward(struct qcow2_target *tgt) +static int qcow2_merge_backward_start(struct qcow2_target *tgt) { struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; - int ret, ret2; + + lockdep_assert_held(&tgt->ctl_mutex); if (!lower) return -ENOENT; @@ -174,6 +177,35 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) return -EOPNOTSUPP; if (lower->hdr.size < qcow2->hdr.size) return -EBADSLT; + + if (tgt->backward_merge.state != BACKWARD_MERGE_STOPPED) + return -EBUSY; + tgt->backward_merge.state = BACKWARD_MERGE_START; + tgt->backward_merge.error = 0; + + schedule_work(&tgt->backward_merge.work); + return 0; +} +ALLOW_ERROR_INJECTION(qcow2_merge_backward_start, ERRNO); + +void qcow2_merge_backward_work(struct work_struct *work) +{ + struct qcow2_target *tgt = container_of(work, struct qcow2_target, + backward_merge.work); + struct qcow2 *qcow2, *lower; + int ret, ret2; + + mutex_lock(&tgt->ctl_mutex); + if (tgt->backward_merge.state != BACKWARD_MERGE_START) { + mutex_unlock(&tgt->ctl_mutex); + return; + } + tgt->backward_merge.state = BACKWARD_MERGE_RUN; + mutex_unlock(&tgt->ctl_mutex); + + qcow2 = tgt->top; + lower = qcow2->lower; + /* * Break all COW clus at L1 level. Otherwise, later * there would be problems with unusing them: @@ -183,13 +215,13 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret = qcow2_break_l1cow(tgt); if (ret) { QC_ERR(tgt->ti, "Can't break L1 COW"); - return ret; + goto out_err; } ret = qcow2_set_image_file_features(lower, true); if (ret) { QC_ERR(tgt->ti, "Can't set dirty bit"); - return ret; + goto out_err; } set_backward_merge_in_process(tgt, qcow2, true); @@ -200,22 +232,85 @@ static int qcow2_merge_backward(struct qcow2_target *tgt) ret2 = qcow2_set_image_file_features(lower, false); if (ret2 < 0) QC_ERR(tgt->ti, "Can't unuse lower (%d)", ret2); - return ret; } + +out_err: + mutex_lock(&tgt->ctl_mutex); + if (ret) { + /* Error */ + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + tgt->backward_merge.error = ret; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) { + /* Merge is canceled */ + set_backward_merge_in_process(tgt, qcow2, false); + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + tgt->backward_merge.error = -EINTR; + } else { + /* Finish merge */ + tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION; + } + mutex_unlock(&tgt->ctl_mutex); +} + +static int qcow2_merge_backward_complete(struct qcow2_target *tgt) +{ + struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; + int ret; + + lockdep_assert_held(&tgt->ctl_mutex); + + if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION) + return -EBUSY; + tgt->top = lower; smp_wmb(); /* Pairs with qcow2_ref_inc() */ qcow2_inflight_ref_switch(tgt); /* Pending qios */ qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */ qcow2->lower = NULL; - ret2 = qcow2_set_image_file_features(qcow2, false); - if (ret2 < 0) - QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret2); + ret = qcow2_set_image_file_features(qcow2, false); + if (ret < 0) + QC_ERR(tgt->ti, "Can't unuse merged img (%d)", ret); qcow2_destroy(qcow2); + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + return 0; } -ALLOW_ERROR_INJECTION(qcow2_merge_backward, ERRNO); +ALLOW_ERROR_INJECTION(qcow2_merge_backward_complete, ERRNO); + +void qcow2_merge_backward_cancel(struct qcow2_target *tgt) +{ + bool flush = false; + + mutex_lock(&tgt->ctl_mutex); + if (tgt->backward_merge.state == BACKWARD_MERGE_STOPPED) { + mutex_unlock(&tgt->ctl_mutex); + return; + } + + if (tgt->backward_merge.state == BACKWARD_MERGE_START) { + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + flush = true; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_RUN) { + tgt->backward_merge.state = BACKWARD_MERGE_STOP; + flush = true; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) { + flush = true; + } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) { + set_backward_merge_in_process(tgt, tgt->top, false); + tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + } + mutex_unlock(&tgt->ctl_mutex); + + if (flush) + flush_work(&tgt->backward_merge.work); +} + +static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt) +{ + return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP; +} static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 *ref_index) { @@ -374,11 +469,19 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, } ret = qcow2_get_event(tgt, result, maxlen); goto out; + } else if (!strcmp(argv[0], "merge_backward")) { + if (argc != 2) { + ret = -EINVAL; + goto out; + } + if (!strcmp(argv[1], "cancel")) { + qcow2_merge_backward_cancel(tgt); + ret = 0; + goto out; + } } - ret = mutex_lock_killable(&tgt->ctl_mutex); - if (ret) - goto out; + mutex_lock(&tgt->ctl_mutex); if (!strcmp(argv[0], "get_errors")) { ret = qcow2_get_errors(tgt, result, maxlen); @@ -388,7 +491,18 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, } else if (!strcmp(argv[0], "merge_forward")) { ret = qcow2_merge_forward(tgt); } else if (!strcmp(argv[0], "merge_backward")) { - ret = qcow2_merge_backward(tgt); + if (argc != 2) { + ret = -EINVAL; + mutex_unlock(&tgt->ctl_mutex); + goto out; + } + if (!strcmp(argv[1], "start")) { + ret = qcow2_merge_backward_start(tgt); + } else if (!strcmp(argv[1], "complete")) { + ret = qcow2_merge_backward_complete(tgt); + } else { + ret = -ENOTTY; + } } else { ret = -ENOTTY; } diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c index 540c03cb3c44f..6e2e583ba0b8b 100644 --- a/drivers/md/dm-qcow2-target.c +++ b/drivers/md/dm-qcow2-target.c @@ -25,6 +25,8 @@ static void qcow2_set_service_operations(struct dm_target *ti, bool allowed) mutex_lock(&tgt->ctl_mutex); tgt->service_operations_allowed = allowed; mutex_unlock(&tgt->ctl_mutex); + if (!allowed) + qcow2_merge_backward_cancel(tgt); } static void qcow2_set_wants_suspend(struct dm_target *ti, bool wants) { @@ -251,6 +253,7 @@ static void qcow2_tgt_destroy(struct qcow2_target *tgt) /* Now kill the queue */ destroy_workqueue(tgt->wq); } + qcow2_merge_backward_cancel(tgt); mempool_destroy(tgt->qio_pool); mempool_destroy(tgt->qrq_pool); @@ -494,6 +497,9 @@ static struct qcow2_target *alloc_qcow2_target(struct dm_target *ti) timer_setup(&tgt->enospc_timer, qcow2_enospc_timer, 0); ti->private = tgt; tgt->ti = ti; + + INIT_WORK(&tgt->backward_merge.work, qcow2_merge_backward_work); + qcow2_set_service_operations(ti, false); return tgt; diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index a89fe3db2196d..bebfdc50ed6d4 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -149,6 +149,20 @@ struct md_page { struct list_head wpc_readers_wait_list; }; +enum qcow2_backward_merge_state { + BACKWARD_MERGE_STOPPED = 0, + BACKWARD_MERGE_START, + BACKWARD_MERGE_RUN, + BACKWARD_MERGE_WAIT_COMPLETION, + BACKWARD_MERGE_STOP, +}; + +struct qcow2_backward_merge { + struct work_struct work; + enum qcow2_backward_merge_state state; + int error; +}; + struct qcow2_target { struct dm_target *ti; #define QCOW2_QRQ_POOL_SIZE 512 /* Twice nr_requests from blk_mq_init_sched() */ @@ -180,6 +194,8 @@ struct qcow2_target { struct work_struct event_work; spinlock_t event_lock; struct mutex ctl_mutex; + + struct qcow2_backward_merge backward_merge; }; enum { @@ -375,6 +391,9 @@ int qcow2_inflight_ref_switch(struct qcow2_target *tgt); void qcow2_flush_deferred_activity(struct qcow2_target *tgt, struct qcow2 *qcow2); int qcow2_truncate_safe(struct file *file, loff_t new_len); +void qcow2_merge_backward_work(struct work_struct *work); +void qcow2_merge_backward_cancel(struct qcow2_target *tgt); + static inline struct qcow2_target *to_qcow2_target(struct dm_target *ti) { return ti->private; -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 12:37:24 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 17:37:24 +0800 Subject: [Devel] [PATCH v4 VZ9 4/5] dm-qcow2: add merge_backward set_eventfd command In-Reply-To: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> References: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303093802.1233834-5-ptikhomirov@virtuozzo.com> This eventfd can be used to get an event when merge_backward start work have finished and is waiting for completion. Note: The eventfd can be changed even while work is running. Locking: The backward_merge.eventfd_ctx is protected from being released by tgt->ctl_mutex. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v2: Always report that work finished, e.g. also on error or then it was canceled, this should be more consistent from the userspace perspective. v4: Address Andrey's reveiw: signal that we are at completion waiting on change of eventfd. --- drivers/md/dm-qcow2-cmd.c | 42 ++++++++++++++++++++++++++++++++++++++- drivers/md/dm-qcow2.h | 2 ++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index 04a992f3ebba6..f16b4f731ca5a 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include "dm-qcow2.h" @@ -197,6 +199,8 @@ void qcow2_merge_backward_work(struct work_struct *work) mutex_lock(&tgt->ctl_mutex); if (tgt->backward_merge.state != BACKWARD_MERGE_START) { + if (tgt->backward_merge.eventfd_ctx) + eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); mutex_unlock(&tgt->ctl_mutex); return; } @@ -249,6 +253,8 @@ void qcow2_merge_backward_work(struct work_struct *work) /* Finish merge */ tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION; } + if (tgt->backward_merge.eventfd_ctx) + eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); mutex_unlock(&tgt->ctl_mutex); } @@ -312,6 +318,27 @@ static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt) return READ_ONCE(tgt->backward_merge.state) == BACKWARD_MERGE_STOP; } +#define QCOW2_FILE_UNBIND -1 + +static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd) +{ + struct eventfd_ctx *ctx = NULL; + + ctx = efd == QCOW2_FILE_UNBIND ? NULL : eventfd_ctx_fdget(efd); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + mutex_lock(&tgt->ctl_mutex); + swap(ctx, tgt->backward_merge.eventfd_ctx); + if (ctx) + eventfd_ctx_put(ctx); + if (tgt->backward_merge.eventfd_ctx && + tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) + eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); + mutex_unlock(&tgt->ctl_mutex); + return 0; +} + static struct qcow2 *qcow2_get_img(struct qcow2_target *tgt, u32 img_id, u8 *ref_index) { struct qcow2 *qcow2; @@ -470,14 +497,27 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, ret = qcow2_get_event(tgt, result, maxlen); goto out; } else if (!strcmp(argv[0], "merge_backward")) { - if (argc != 2) { + if (argc < 2) { ret = -EINVAL; goto out; } if (!strcmp(argv[1], "cancel")) { + if (argc != 2) { + ret = -EINVAL; + goto out; + } qcow2_merge_backward_cancel(tgt); ret = 0; goto out; + } else if (!strcmp(argv[1], "set_eventfd")) { + int efd; + + if (argc != 3 || kstrtoint(argv[2], 10, &efd)) { + ret = -EINVAL; + goto out; + } + ret = qcow2_merge_backward_set_eventfd(tgt, efd); + goto out; } } diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index bebfdc50ed6d4..c4956e3fd0eb7 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "dm-core.h" #define DM_MSG_PREFIX "qcow2" @@ -161,6 +162,7 @@ struct qcow2_backward_merge { struct work_struct work; enum qcow2_backward_merge_state state; int error; + struct eventfd_ctx *eventfd_ctx; }; struct qcow2_target { -- 2.48.1 From ptikhomirov at virtuozzo.com Mon Mar 3 12:37:25 2025 From: ptikhomirov at virtuozzo.com (Pavel Tikhomirov) Date: Mon, 3 Mar 2025 17:37:25 +0800 Subject: [Devel] [PATCH v4 VZ9 5/5] dm-qcow2: add merge_backward progress command In-Reply-To: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> References: <20250303093802.1233834-1-ptikhomirov@virtuozzo.com> Message-ID: <20250303093802.1233834-6-ptikhomirov@virtuozzo.com> This allows to see progress of backward merge. It shows the stage we are at and for iterative stages it provides progress in form of how many iteratious are done and how many iterations there are in total. Locking: The progress data consistency is protected by tgt->ctl_mutex, we always update stage and error consistently under lock. Inside iterative stages for progress updating we have xchg instead of lock so that changes to progress are atomic and imply memory barrier (this way we would not see progress greater than max_progress in progress reporting), but at the same time there is less contention on tgt->ctl_mutex. https://virtuozzo.atlassian.net/browse/VSTOR-100466 Signed-off-by: Pavel Tikhomirov -- v3: Address Kostya's review comments: move progress printing out of lock, remove excess updates of max_progress, make progress updates without lock. --- drivers/md/dm-qcow2-cmd.c | 83 +++++++++++++++++++++++++++++++++++++++ drivers/md/dm-qcow2.h | 14 +++++++ 2 files changed, 97 insertions(+) diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c index f16b4f731ca5a..4d50b2f9284e4 100644 --- a/drivers/md/dm-qcow2-cmd.c +++ b/drivers/md/dm-qcow2-cmd.c @@ -54,6 +54,10 @@ static void service_qio_endio(struct qcow2_target *tgt, struct qio *qio, wake_up(&tgt->service_wq); } +static void backward_merge_update_progress(struct qcow2_target *tgt, + long long progress); +static void backward_merge_update_max_progress(struct qcow2_target *tgt, + long long max_progress); static bool qcow2_backward_merge_should_stop(struct qcow2_target *tgt); static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, @@ -66,7 +70,10 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2, WRITE_ONCE(service_status, BLK_STS_OK); + backward_merge_update_max_progress(tgt, end); for (pos = 0; pos < end; pos += step) { + backward_merge_update_progress(tgt, pos); + if (qcow2_backward_merge_should_stop(tgt)) { ret = -EINTR; break; @@ -165,6 +172,66 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt, qcow2_submit_embedded_qios(tgt, &list); } +static void __backward_merge_update_stage(struct qcow2_target *tgt, + enum qcow2_backward_merge_stage stage) +{ + tgt->backward_merge.stage = stage; + tgt->backward_merge.progress = 0; + tgt->backward_merge.max_progress = 0; +} + +static void backward_merge_update_stage(struct qcow2_target *tgt, + enum qcow2_backward_merge_stage stage) +{ + mutex_lock(&tgt->ctl_mutex); + __backward_merge_update_stage(tgt, stage); + mutex_unlock(&tgt->ctl_mutex); +} + +static void backward_merge_update_max_progress(struct qcow2_target *tgt, + long long max_progress) +{ + xchg(&tgt->backward_merge.max_progress, max_progress); +} + +static void backward_merge_update_progress(struct qcow2_target *tgt, + long long progress) +{ + xchg(&tgt->backward_merge.progress, progress); +} + +char *backward_merge_stage_names[] = { + "none", + "break_l1cow", + "set_dirty", + "running", + "waiting_completion", + "completing", + "fail", +}; + +static int qcow2_merge_backward_progress(struct qcow2_target *tgt, + char *result, unsigned int maxlen) +{ + struct qcow2_backward_merge backward_merge; + unsigned int sz = 0; + int ret; + + BUILD_BUG_ON(ARRAY_SIZE(backward_merge_stage_names) != BACKWARD_MERGE_STAGE_MAX); + + mutex_lock(&tgt->ctl_mutex); + backward_merge = tgt->backward_merge; + mutex_unlock(&tgt->ctl_mutex); + + ret = DMEMIT("stage=%s\nprogress=%lld\nmax_progress=%lld\nerror=%d\n", + backward_merge_stage_names[backward_merge.stage], + backward_merge.progress, + backward_merge.max_progress, + backward_merge.error); + + return ret ? 1 : 0; +} + static int qcow2_merge_backward_start(struct qcow2_target *tgt) { struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower; @@ -205,6 +272,7 @@ void qcow2_merge_backward_work(struct work_struct *work) return; } tgt->backward_merge.state = BACKWARD_MERGE_RUN; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_BREAK_L1COW); mutex_unlock(&tgt->ctl_mutex); qcow2 = tgt->top; @@ -222,6 +290,7 @@ void qcow2_merge_backward_work(struct work_struct *work) goto out_err; } + backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_SET_DIRTY); ret = qcow2_set_image_file_features(lower, true); if (ret) { QC_ERR(tgt->ti, "Can't set dirty bit"); @@ -230,6 +299,7 @@ void qcow2_merge_backward_work(struct work_struct *work) set_backward_merge_in_process(tgt, qcow2, true); /* Start merge */ + backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_RUNNING); ret = qcow2_merge_common(tgt); if (ret) { set_backward_merge_in_process(tgt, qcow2, false); @@ -244,14 +314,17 @@ void qcow2_merge_backward_work(struct work_struct *work) /* Error */ tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; tgt->backward_merge.error = ret; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_FAIL); } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) { /* Merge is canceled */ set_backward_merge_in_process(tgt, qcow2, false); tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; tgt->backward_merge.error = -EINTR; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_FAIL); } else { /* Finish merge */ tgt->backward_merge.state = BACKWARD_MERGE_WAIT_COMPLETION; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_WAITING_COMPLETION); } if (tgt->backward_merge.eventfd_ctx) eventfd_signal(tgt->backward_merge.eventfd_ctx, 1); @@ -267,6 +340,7 @@ static int qcow2_merge_backward_complete(struct qcow2_target *tgt) if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION) return -EBUSY; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_COMPLETING); tgt->top = lower; smp_wmb(); /* Pairs with qcow2_ref_inc() */ @@ -280,6 +354,7 @@ static int qcow2_merge_backward_complete(struct qcow2_target *tgt) qcow2_destroy(qcow2); tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE); return 0; } @@ -306,6 +381,7 @@ void qcow2_merge_backward_cancel(struct qcow2_target *tgt) } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) { set_backward_merge_in_process(tgt, tgt->top, false); tgt->backward_merge.state = BACKWARD_MERGE_STOPPED; + __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE); } mutex_unlock(&tgt->ctl_mutex); @@ -518,6 +594,13 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv, } ret = qcow2_merge_backward_set_eventfd(tgt, efd); goto out; + } else if (!strcmp(argv[1], "progress")) { + if (argc != 2) { + ret = -EINVAL; + goto out; + } + ret = qcow2_merge_backward_progress(tgt, result, maxlen); + goto out; } } diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h index c4956e3fd0eb7..ed7cf79348052 100644 --- a/drivers/md/dm-qcow2.h +++ b/drivers/md/dm-qcow2.h @@ -158,11 +158,25 @@ enum qcow2_backward_merge_state { BACKWARD_MERGE_STOP, }; +enum qcow2_backward_merge_stage { + BACKWARD_MERGE_STAGE_NONE = 0, + BACKWARD_MERGE_STAGE_BREAK_L1COW, + BACKWARD_MERGE_STAGE_SET_DIRTY, + BACKWARD_MERGE_STAGE_RUNNING, + BACKWARD_MERGE_STAGE_WAITING_COMPLETION, + BACKWARD_MERGE_STAGE_COMPLETING, + BACKWARD_MERGE_STAGE_FAIL, + BACKWARD_MERGE_STAGE_MAX, +}; + struct qcow2_backward_merge { struct work_struct work; enum qcow2_backward_merge_state state; int error; struct eventfd_ctx *eventfd_ctx; + enum qcow2_backward_merge_stage stage; + long long progress; + long long max_progress; }; struct qcow2_target { -- 2.48.1