[Devel] [PATCH VZ9 v3] dm-qcow2: allow specifying depth to merge intermediate images

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Thu Sep 18 13:45:21 MSK 2025


dm-qcow2 is almost ready to merge intermediate images, thanks
to separated metadata tables and qio being per-qcow2.
So this patch mostly propagates which qcow2 we want to use
instead of tgt->top.

qcow2_merge_backward_complete is a bit harder due to in-middle
replacement and needing to recalculate img_id for a part of
image chain.

https://virtuozzo.atlassian.net/browse/VSTOR-101375
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---

v2:
 - changed depth to u32
 - reverted changes in set_backward_merge_in_process(): do queue stop
for not top qcow2 images at it's own level
 - changed saving qcow2 in backward_merge to saving pointer to the
qcow2 which we later intend to change to lower delta

v3: simplified depth lookup in qcow2_merge_backward_start()

 drivers/md/dm-qcow2-cmd.c | 59 ++++++++++++++++++++++++++++-----------
 drivers/md/dm-qcow2.h     |  1 +
 2 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/drivers/md/dm-qcow2-cmd.c b/drivers/md/dm-qcow2-cmd.c
index e77052ad61e6b..2e4384360ca3d 100644
--- a/drivers/md/dm-qcow2-cmd.c
+++ b/drivers/md/dm-qcow2-cmd.c
@@ -116,9 +116,9 @@ static int qcow2_service_iter(struct qcow2_target *tgt, struct qcow2 *qcow2,
 }
 ALLOW_ERROR_INJECTION(qcow2_service_iter, ERRNO);
 
-static int qcow2_merge_common(struct qcow2_target *tgt)
+static int qcow2_merge_common(struct qcow2_target *tgt, struct qcow2 *qcow2)
 {
-	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+	struct qcow2 *lower = qcow2->lower;
 	u32 clu_size = qcow2->clu_size;
 	loff_t end = lower->hdr.size;
 
@@ -139,9 +139,8 @@ static int qcow2_merge_forward(struct qcow2_target *tgt)
 }
 ALLOW_ERROR_INJECTION(qcow2_merge_forward, ERRNO);
 
-static int qcow2_break_l1cow(struct qcow2_target *tgt)
+static int qcow2_break_l1cow(struct qcow2_target *tgt, struct qcow2 *qcow2)
 {
-	struct qcow2 *qcow2 = tgt->top;
 	loff_t end = qcow2->hdr.size;
 	loff_t step = (u64)qcow2->l2_entries * qcow2->clu_size;
 
@@ -154,6 +153,15 @@ static void set_backward_merge_in_process(struct qcow2_target *tgt,
 {
 	LIST_HEAD(list);
 
+	/*
+	 * There are no writes if it is not the top qcow2 image.
+	 * so we do not need to stop and flush requests when setting
+	 */
+	if (qcow2 != tgt->top && set) {
+		qcow2->backward_merge_in_process = set;
+		return;
+	}
+
 	/*
 	 * To avoid race between allocations and COWS
 	 * we completely stop queueing qios and wait
@@ -240,13 +248,20 @@ static int qcow2_merge_backward_progress(struct qcow2_target *tgt,
 
 static int qcow2_merge_backward_set_eventfd(struct qcow2_target *tgt, int efd);
 
-static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
+static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd, u32 depth)
 {
-	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+	struct qcow2 *qcow2, *lower, **pqcow2 = &tgt->top;
 	int ret;
 
 	lockdep_assert_held(&tgt->ctl_mutex);
 
+	while (depth-- && *pqcow2)
+		pqcow2 = &(*pqcow2)->lower;
+
+	qcow2 = *pqcow2;
+	if (!qcow2)
+		return -ENOENT;
+	lower = qcow2->lower;
 	if (!lower)
 		return -ENOENT;
 	if (!(lower->file->f_mode & FMODE_WRITE))
@@ -263,6 +278,7 @@ static int qcow2_merge_backward_start(struct qcow2_target *tgt, int efd)
 	if (ret)
 		return ret;
 
+	tgt->backward_merge.pqcow2 = pqcow2;
 	tgt->backward_merge.state = BACKWARD_MERGE_START;
 	__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_START);
 	tgt->backward_merge.error = 0;
@@ -291,7 +307,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
 	__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_BREAK_L1COW);
 	mutex_unlock(&tgt->ctl_mutex);
 
-	qcow2 = tgt->top;
+	qcow2 = *tgt->backward_merge.pqcow2;
 	lower = qcow2->lower;
 
 	/*
@@ -300,7 +316,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
 	 * we'd have to freeze IO going to all data clusters
 	 * under every L1 entry related to several snapshots.
 	 */
-	ret = qcow2_break_l1cow(tgt);
+	ret = qcow2_break_l1cow(tgt, qcow2);
 	if (ret) {
 		QC_ERR(tgt->ti, "Can't break L1 COW");
 		goto out_err;
@@ -316,7 +332,7 @@ void qcow2_merge_backward_work(struct work_struct *work)
 
 	/* Start merge */
 	backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_RUNNING);
-	ret = qcow2_merge_common(tgt);
+	ret = qcow2_merge_common(tgt, qcow2);
 	if (ret) {
 		set_backward_merge_in_process(tgt, qcow2, false);
 		ret2 = qcow2_set_image_file_features(lower, false);
@@ -350,16 +366,23 @@ void qcow2_merge_backward_work(struct work_struct *work)
 
 static int qcow2_merge_backward_complete(struct qcow2_target *tgt)
 {
-	struct qcow2 *qcow2 = tgt->top, *lower = qcow2->lower;
+	struct qcow2 *qcow2 = *tgt->backward_merge.pqcow2, *i;
 	int ret;
 
 	lockdep_assert_held(&tgt->ctl_mutex);
 
 	if (tgt->backward_merge.state != BACKWARD_MERGE_WAIT_COMPLETION)
 		return -EBUSY;
+
+	*tgt->backward_merge.pqcow2 = qcow2->lower;
+
+	i = tgt->top;
+	while (i != qcow2->lower) {
+		i->img_id--;
+		i = i->lower;
+	}
+
 	__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_COMPLETING);
-
-	tgt->top = lower;
 	smp_wmb(); /* Pairs with qcow2_ref_inc() */
 	qcow2_inflight_ref_switch(tgt); /* Pending qios */
 	qcow2_flush_deferred_activity(tgt, qcow2); /* Delayed md pages */
@@ -396,7 +419,7 @@ void qcow2_merge_backward_cancel(struct qcow2_target *tgt)
 	} else if (tgt->backward_merge.state == BACKWARD_MERGE_STOP) {
 		flush = true;
 	} else if (tgt->backward_merge.state == BACKWARD_MERGE_WAIT_COMPLETION) {
-		set_backward_merge_in_process(tgt, tgt->top, false);
+		set_backward_merge_in_process(tgt, *tgt->backward_merge.pqcow2, false);
 		tgt->backward_merge.state = BACKWARD_MERGE_STOPPED;
 		__backward_merge_update_stage(tgt, BACKWARD_MERGE_STAGE_NONE);
 	}
@@ -571,7 +594,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
 {
 	struct qcow2_target *tgt = to_qcow2_target(ti);
 	int ret = -EPERM;
-	u32 val, val2;
+	u32 val, val2, depth = 0;
 	int efd;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -652,11 +675,15 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
 	} else if (!strcmp(argv[0], "merge_backward")) {
 		/* argc >= 2 */
 		if (!strcmp(argv[1], "start")) {
-			if (argc != 3 || kstrtoint(argv[2], 10, &efd) || efd < 0) {
+			if (argc < 3 || argc > 4 || kstrtoint(argv[2], 10, &efd) || efd < 0) {
 				ret = -EINVAL;
 				goto out_unlock;
 			}
-			ret = qcow2_merge_backward_start(tgt, efd);
+			if (argc == 4 && kstrtou32(argv[3], 10, &depth)) {
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			ret = qcow2_merge_backward_start(tgt, efd, depth);
 		} else if (!strcmp(argv[1], "complete")) {
 			if (argc != 2) {
 				ret = -EINVAL;
diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
index 5aa00c6a5ebd5..c50b578fb8b50 100644
--- a/drivers/md/dm-qcow2.h
+++ b/drivers/md/dm-qcow2.h
@@ -172,6 +172,7 @@ enum qcow2_backward_merge_stage {
 
 struct qcow2_backward_merge {
 	struct work_struct work;
+	struct qcow2 **pqcow2;
 	enum qcow2_backward_merge_state state;
 	int error;
 	struct eventfd_ctx *eventfd_ctx;
-- 
2.43.5



More information about the Devel mailing list