[Devel] [PATCH VZ9] dm-qcow2: allow specifying depth to merge intermediate images

Vasileios Almpanis vasileios.almpanis at virtuozzo.com
Fri Sep 12 16:48:13 MSK 2025


Looks good to me.
Reviewed-by: Vasileios Almpanis <vasileios.almpanis at virtuozzo.com><mailto:andrey.zhadchenko at virtuozzo.com>
________________________________
From: devel-bounces at openvz.org <devel-bounces at openvz.org> on behalf of Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
Sent: Monday, September 8, 2025 7:27 PM
To: Konstantin Khorenko <khorenko at virtuozzo.com>; Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Cc: devel at openvz.org <devel at openvz.org>
Subject: [Devel] [PATCH VZ9] dm-qcow2: allow specifying depth to merge intermediate images

dm-qcow2 is almost ready to merge intermediate images, thanks
to separated metadata tables and qio being per-qcow2.
So this patch mostly propagates which qcow2 we want to use
instead of tgt->top.

qcow2_merge_backward_complete is a bit harder due to in-middle
replacement and needing to recalculate img_id for a part of
image chain.

https://virtuozzo.atlassian.net/browse/VSTOR-101375
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 drivers/md/dm-qcow2-cmd.c | 82 ++++++++++++++++++++++++++++-----------
 drivers/md/dm-qcow2.h     |  1 +
 2 files changed, 61 insertions(+), 22 deletions(-)

diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
index e77052ad61e6b..d59fa82dbffbc 100644
--- a/drivers/md/dm-qcow2-cmd.c
+++ b/drivers/md/dm-qcow2-cmd.c
@@ -116,9 +116,9 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
 }
 ALLOW_ERROR_INJECTION(qcow2_service_iter, ERRNO);

-static int qcow2_merge_common(struct qcow2_target *tgt)
+static int qcow2_merge_common(struct qcow2_target *tgt, struct qcow2 *qcow2)
 {
-       struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+       struct qcow2 *lower = qcow2->lower;
         u32 clu_size = qcow2->clu_size;
         loff_t end = lower->hdr.size;

@@ -139,9 +139,8 @@ static int qcow2_merge_forward(struct qcow2_target *tgt)
 }
 ALLOW_ERROR_INJECTION(qcow2_merge_forward, ERRNO);

-static int qcow2_break_l1cow(struct qcow2_target *tgt)
+static int qcow2_break_l1cow(struct qcow2_target *tgt, struct qcow2 *qcow2)
 {
-       struct qcow2 *qcow2 = tgt->top;
         loff_t end = qcow2->hdr.size;
         loff_t step = (u64)qcow2->l2_entries * qcow2->clu_size;

@@ -152,25 +151,35 @@ static int qcow2_break_l1cow(struct qcow2_target *tgt)
 static void set_backward_merge_in_process(struct qcow2_target *tgt,
                                      struct qcow2 *qcow2, bool set)
 {
+       struct qcow2 *top = tgt->top;
         LIST_HEAD(list);

+       /*
+        * There are no writes if it is not the top qcow2 image.
+        * so we do not need to stop and flush requests when setting
+        */
+       if (qcow2 != top && set) {
+               qcow2->backward_merge_in_process = set;
+               return;
+       }
+
         /*
          * To avoid race between allocations and COWS
          * we completely stop queueing qios and wait
          * for pending qios. Lock is for visability.
          */
-       spin_lock_irq(&qcow2->deferred_lock);
-       qcow2->pause_submitting_qios = true;
-       spin_unlock_irq(&qcow2->deferred_lock);
+       spin_lock_irq(&top->deferred_lock);
+       top->pause_submitting_qios = true;
+       spin_unlock_irq(&top->deferred_lock);
         qcow2_inflight_ref_switch(tgt);

         /* queue is stopped */
-       spin_lock_irq(&qcow2->deferred_lock);
+       spin_lock_irq(&top->deferred_lock);
         WARN_ON_ONCE(qcow2->backward_merge_in_process == set);
         qcow2->backward_merge_in_process = set;
-       qcow2->pause_submitting_qios = false;
-       list_splice_init(&qcow2->paused_qios, &list);
-       spin_unlock_irq(&qcow2->deferred_lock);
+       top->pause_submitting_qios = false;
+       list_splice_init(&top->paused_qios, &list);
+       spin_unlock_irq(&top->deferred_lock);

         qcow2_submit_embedded_qios(tgt, &list);
 }
@@ -240,11 +249,15 @@ static int qcow2_merge_backward_progress(struct qcow2_target *tgt,

 static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd);

-static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
+static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd, int depth)
 {
-       struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+       struct qcow2 *qcow2 = tgt->top, *lower;
         int ret;

+       while (depth-- > 0 && qcow2->lower)
+               qcow2 = qcow2->lower;
+       lower = qcow2->lower;
+
         lockdep_assert_held(&tgt->ctl_mutex);

         if (!lower)
@@ -263,6 +276,7 @@ static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
         if (ret)
                 return ret;

+       tgt->backward_merge.qcow2 = qcow2;
         tgt->backward_merge.state = BACKWARD_MERGE_START;
         __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_START);
         tgt->backward_merge.error = 0;
@@ -291,7 +305,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
         __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_BREAK_L1COW);
         mutex_unlock(&tgt->ctl_mutex);

-       qcow2 = tgt->top;
+       qcow2 = tgt->backward_merge.qcow2;
         lower = qcow2->lower;

         /*
@@ -300,7 +314,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
          * we'd have to freeze IO going to all data clusters
          * under every L1 entry related to several snapshots.
          */
-       ret = qcow2_break_l1cow(tgt);
+       ret = qcow2_break_l1cow(tgt, qcow2);
         if (ret) {
                 QC_ERR(tgt->ti, "Can't break L1 COW");
                 goto out_err;
@@ -316,7 +330,7 @@ void qcow2_merge_backward_work(struct work_struct *work)

         /* Start merge */
         backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_RUNNING);
-       ret = qcow2_merge_common(tgt);
+       ret = qcow2_merge_common(tgt, qcow2);
         if (ret) {
                 set_backward_merge_in_process(tgt, qcow2, false);
                 ret2 = qcow2_set_image_file_features(lower, false);
@@ -357,9 +371,29 @@ static int qcow2_merge_backward_complete(struct qcow2_target *tgt)

         if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
                 return -EBUSY;
+
+       if (tgt->top == tgt->backward_merge.qcow2) {
+               tgt->top = lower;
+       } else {
+               struct qcow2 *top = tgt->top;
+
+               while (top && top->lower != tgt->backward_merge.qcow2)
+                       top = top->lower;
+               if (!top)
+                       return -EFAULT;
+
+               qcow2 = tgt->backward_merge.qcow2;
+               lower = qcow2->lower;
+               top->lower = lower;
+
+               top = tgt->top;
+               while (top != lower) {
+                       top->img_id--;
+                       top = top->lower;
+               }
+       }
+
         __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_COMPLETING);
-
-       tgt->top = lower;
         smp_wmb(); /* Pairs with qcow2_ref_inc() */
         qcow2_inflight_ref_switch(tgt); /* Pending qios */
         qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
@@ -396,7 +430,7 @@ void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
         } else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
                 flush = true;
         } else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) {
-               set_backward_merge_in_process(tgt, tgt->top, false);
+               set_backward_merge_in_process(tgt, tgt->backward_merge.qcow2, false);
                 tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
                 __backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE);
         }
@@ -572,7 +606,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
         struct qcow2_target *tgt = to_qcow2_target(ti);
         int ret = -EPERM;
         u32 val, val2;
-       int efd;
+       int efd, depth = 0;

         if (!capable(CAP_SYS_ADMIN))
                 goto out;
@@ -652,11 +686,15 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
         } else if (!strcmp(argv[0], "merge_backward")) {
                 /* argc >= 2 */
                 if (!strcmp(argv[1], "start")) {
-                       if (argc != 3 || kstrtoint(argv[2], 10, &efd) || efd < 0) {
+                       if (argc < 3 || argc > 4 || kstrtoint(argv[2], 10, &efd) || efd < 0) {
                                 ret = -EINVAL;
                                 goto out_unlock;
                         }
-                       ret = qcow2_merge_backward_start(tgt, efd);
+                       if (argc == 4 && kstrtou32(argv[3], 10, &depth)) {
+                               ret = -EINVAL;
+                               goto out_unlock;
+                       }
+                       ret = qcow2_merge_backward_start(tgt, efd, depth);
                 } else if (!strcmp(argv[1], "complete")) {
                         if (argc != 2) {
                                 ret = -EINVAL;
diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
index 5aa00c6a5ebd5..c2b2193561461 100644
--- a/drivers/md/dm-qcow2.h
+++ b/drivers/md/dm-qcow2.h
@@ -172,6 +172,7 @@ enum qcow2_backward_merge_stage {

 struct qcow2_backward_merge {
         struct work_struct work;
+       struct qcow2 *qcow2;
         enum qcow2_backward_merge_state state;
         int error;
         struct eventfd_ctx *eventfd_ctx;
--
2.43.5

_______________________________________________
Devel mailing list
Devel at openvz.org
https://lists.openvz.org/mailman/listinfo/devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openvz.org/pipermail/devel/attachments/20250912/f5511c0a/attachment-0001.html>


More information about the Devel mailing list