[Devel] [PATCH RH9 4/7] dm/dm-qcow2: add find_hole

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Mon Jul 24 10:09:40 MSK 2023


Implement find_hole() for dm-qcow2 target.
Iterate over ranges with cluster granularity until hole or data is found.
To reduce code duplication, we should use already existing parse_metadata()
We can pretend that seek request is read request for metadata purposes
and than interpret parsing result in our favor.
Since parse_metadata() support request postponing (for example when the
requested L2 cluster is absent in RAM), we should create separate qio
list for our queries.

https://jira.vzint.dev/browse/PSBM-145746
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 drivers/md/dm-qcow2-map.c    | 140 +++++++++++++++++++++++++++++++++++
 drivers/md/dm-qcow2-target.c |   1 +
 drivers/md/dm-qcow2.h        |   2 +
 3 files changed, 143 insertions(+)

diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c
index a779889c6970..f728a52ab5e4 100644
--- a/drivers/md/dm-qcow2-map.c
+++ b/drivers/md/dm-qcow2-map.c
@@ -3980,6 +3980,14 @@ static void process_resubmit_qios(struct qcow2 *qcow2, struct list_head *qios)
 	}
 }
 
+static void process_seek_qios(struct qcow2 *qcow, struct list_head *qios)
+{
+	struct qio *qio;
+
+	while ((qio = qio_list_pop(qios)) != NULL)
+		complete(qio->data);
+}
+
 void do_qcow2_work(struct work_struct *ws)
 {
 	struct qcow2 *qcow2 = container_of(ws, struct qcow2, worker);
@@ -3991,6 +3999,7 @@ void do_qcow2_work(struct work_struct *ws)
 	LIST_HEAD(cow_indexes_qios);
 	LIST_HEAD(cow_end_qios);
 	LIST_HEAD(resubmit_qios);
+	LIST_HEAD(seek_qios);
 	unsigned int pflags = current->flags;
 
 	current->flags |= PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
@@ -4003,6 +4012,7 @@ void do_qcow2_work(struct work_struct *ws)
 	list_splice_init(&qcow2->qios[QLIST_COW_INDEXES], &cow_indexes_qios);
 	list_splice_init(&qcow2->qios[QLIST_COW_END], &cow_end_qios);
 	list_splice_init(&qcow2->resubmit_qios, &resubmit_qios);
+	list_splice_init(&qcow2->qios[QLIST_SEEK], &seek_qios);
 	spin_unlock_irq(&qcow2->deferred_lock);
 
 	process_embedded_qios(qcow2, &embedded_qios, &deferred_qios);
@@ -4013,6 +4023,7 @@ void do_qcow2_work(struct work_struct *ws)
 	process_cow_indexes_write(qcow2, &cow_indexes_qios);
 	process_cow_end(qcow2, &cow_end_qios);
 	process_resubmit_qios(qcow2, &resubmit_qios);
+	process_seek_qios(qcow2, &seek_qios);
 
 	/* This actually submits batch of md writeback, initiated above */
 	submit_metadata_writeback(qcow2);
@@ -4235,3 +4246,132 @@ static void handle_cleanup_mask(struct qio *qio)
 		ext->cleanup_mask &= ~FREE_ALLOCATED_CLU;
 	}
 }
+
+static sector_t get_next_l2(struct qio *qio)
+{
+	struct qcow2 *qcow2 = qio->qcow2;
+	loff_t start, add;
+
+	start = to_bytes(qio->bi_iter.bi_sector);
+	add = qcow2->l2_entries - (start / qcow2->clu_size) % qcow2->l2_entries;
+
+	return qio->bi_iter.bi_sector + (qcow2->clu_size / to_bytes(1)) * add;
+}
+
+static sector_t get_next_clu(struct qio *qio)
+{
+	struct qcow2 *qcow2 = qio->qcow2;
+	loff_t offset;
+
+	offset = to_bytes(qio->bi_iter.bi_sector);
+	offset = offset / qcow2->clu_size;
+	offset = (offset + 1) * qcow2->clu_size;
+
+	return to_sector(offset);
+}
+
+loff_t qcow2_find_hole(struct dm_target *ti, loff_t offset, int whence)
+{
+	struct qcow2 *qcow2 = to_qcow2_target(ti)->top;
+	DECLARE_COMPLETION_ONSTACK(compl);
+	bool unmapped, zeroes, try_lower;
+	struct qio qio = {0}, *qptr;
+	loff_t result = -EINVAL;
+	struct qcow2_map map;
+	u32 size;
+	int ret;
+
+	qio.bi_iter.bi_sector = to_sector(offset);
+	qio.bi_iter.bi_size = qcow2->clu_size - offset % qcow2->clu_size;
+
+	qcow2_init_qio(&qio, REQ_OP_READ, qcow2);
+	qio.queue_list_id = QLIST_SEEK;
+	qio.data = &compl;
+
+	while (qio.bi_iter.bi_sector < to_sector(qcow2->hdr.size)) {
+		qio.qcow2 = qcow2;
+retry:
+		memset(&map, 0, sizeof(map));
+		map.qcow2 = qio.qcow2;
+		qptr = &qio;
+		ret = parse_metadata(qio.qcow2, &qptr, &map);
+		/* ENXIO has a special meaning for llseek so remap it to EINVAL*/
+		if (ret < 0)
+			return (ret == -ENXIO) ? -EINVAL : ret;
+		if (qptr == NULL) {
+			wait_for_completion(&compl);
+			reinit_completion(&compl);
+			goto retry;
+		}
+
+calc_subclu:
+		zeroes = unmapped = try_lower = false;
+		zeroes = (size = qio_all_zeroes_size(qio.qcow2, &qio, &map));
+		if (!size)
+			unmapped = (size = qio_unmapped_size(qio.qcow2, &qio, &map));
+		if (!size)
+			size = qio_mapped_not_zeroes_size(qio.qcow2, &qio, &map);
+		if (unmapped)
+			try_lower = maybe_mapped_in_lower_delta(qio.qcow2, &qio);
+
+		if (unmapped && try_lower) {
+			loff_t end = to_bytes(qio.bi_iter.bi_sector) + qio.bi_iter.bi_size;
+
+			if (end < qio.qcow2->hdr.size) {
+				qio.qcow2 = qio.qcow2->lower;
+				goto retry;
+			}
+		}
+
+		if (whence & SEEK_HOLE) {
+			if (zeroes || unmapped) {
+				result = to_bytes(qio.bi_iter.bi_sector);
+				break;
+			} else if (size != qio.bi_iter.bi_size) {
+				/*
+				 * range starts with data subclusters and after that
+				 * some subclusters are zero or unmapped
+				 */
+				result = to_bytes(qio.bi_iter.bi_sector) + size;
+				break;
+			}
+		}
+
+		if (whence & SEEK_DATA) {
+			if (!zeroes && !unmapped) {
+				result = to_bytes(qio.bi_iter.bi_sector);
+				break;
+			} else if (size != qio.bi_iter.bi_size) {
+				/*
+				 * range starts with zero or unmapped subclusters
+				 * but after that it still can be unmapped or zero
+				 * We do not need to parse metadata again but we should
+				 * skip this sublusters and look onto next ones
+				 */
+				qio.bi_iter.bi_sector += to_sector(size);
+				qio.bi_iter.bi_size -= size;
+				goto calc_subclu;
+			}
+		}
+
+		/* whole L2 table is unmapped - skip to next l2 table */
+		if (!(map.level & L2_LEVEL))
+			qio.bi_iter.bi_sector = get_next_l2(&qio);
+		else
+			qio.bi_iter.bi_sector = get_next_clu(&qio);
+
+		qio.bi_iter.bi_size = qcow2->clu_size;
+	}
+
+	if (result >= 0 && result < offset)
+		result = offset;
+
+	if (qio.bi_iter.bi_sector >= to_sector(qcow2->hdr.size)) {
+		if (whence & SEEK_HOLE)
+			result = qcow2->hdr.size;
+		if (whence & SEEK_DATA)
+			result = -ENXIO;
+	}
+
+	return result;
+}
diff --git a/drivers/md/dm-qcow2-target.c b/drivers/md/dm-qcow2-target.c
index ffcab9b574ab..88a7af661829 100644
--- a/drivers/md/dm-qcow2-target.c
+++ b/drivers/md/dm-qcow2-target.c
@@ -1020,6 +1020,7 @@ static struct target_type qcow2_target = {
 	.resume = qcow2_resume,
 	.clone_and_map_rq = qcow2_clone_and_map,
 	.message = qcow2_message,
+	.find_hole = qcow2_find_hole,
 };
 
 static int __init dm_qcow2_init(void)
diff --git a/drivers/md/dm-qcow2.h b/drivers/md/dm-qcow2.h
index aec64c23dbae..704e369c126f 100644
--- a/drivers/md/dm-qcow2.h
+++ b/drivers/md/dm-qcow2.h
@@ -151,6 +151,7 @@ enum {
 	QLIST_COW_DATA,
 	QLIST_COW_INDEXES,
 	QLIST_COW_END,
+	QLIST_SEEK,
 
 	QLIST_COUNT,
 	QLIST_INVALID = QLIST_COUNT,
@@ -299,6 +300,7 @@ int qcow2_message(struct dm_target *ti, unsigned int argc, char **argv,
 		  char *result, unsigned int maxlen);
 int qcow2_clone_and_map(struct dm_target *ti, struct request *rq,
 		   union map_info *info, struct request **clone);
+loff_t qcow2_find_hole(struct dm_target *ti, loff_t offset, int whence);
 
 void do_qcow2_work(struct work_struct *ws);
 void do_qcow2_fsync_work(struct work_struct *ws);
-- 
2.39.3



More information about the Devel mailing list