[Devel] [PATCH RH9 4/7] dm/dm-qcow2: add find_hole

Alexander Atanasov alexander.atanasov at virtuozzo.com
Mon Jul 24 15:20:33 MSK 2023


On 24.07.23 11:03, Konstantin Khorenko wrote:
> Implement find_hole() for dm-qcow2 target.
> Iterate over ranges with cluster granularity until hole or data is found.
> To reduce code duplication, we should use already existing parse_metadata()
> We can pretend that seek request is read request for metadata purposes
> and than interpret parsing result in our favor.
> Since parse_metadata() support request postponing (for example when the
> requested L2 cluster is absent in RAM), we should create separate qio
> list for our queries.
> 
> https://jira.vzint.dev/browse/PSBM-145746
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
> ---
>   drivers/md/dm-qcow2-map.c    | 140 +++++++++++++++++++++++++++++++++++
>   drivers/md/dm-qcow2-target.c |   1 +
>   drivers/md/dm-qcow2.h        |   2 +
>   3 files changed, 143 insertions(+)
> 
> diff --git a/drivers/md/dm-qcow2-map.c b/drivers/md/dm-qcow2-map.c
> index a779889c6970..f728a52ab5e4 100644
> --- a/drivers/md/dm-qcow2-map.c
> +++ b/drivers/md/dm-qcow2-map.c
> @@ -3980,6 +3980,14 @@ static void process_resubmit_qios(struct qcow2 
> *qcow2, struct list_head *qios)
>       }
>   }
>   +static void process_seek_qios(struct qcow2 *qcow, struct list_head 
> *qios)
> +{
> +    struct qio *qio;
> +
> +    while ((qio = qio_list_pop(qios)) != NULL)
> +        complete(qio->data);
> +}
> +
>   void do_qcow2_work(struct work_struct *ws)
>   {
>       struct qcow2 *qcow2 = container_of(ws, struct qcow2, worker);
> @@ -3991,6 +3999,7 @@ void do_qcow2_work(struct work_struct *ws)
>       LIST_HEAD(cow_indexes_qios);
>       LIST_HEAD(cow_end_qios);
>       LIST_HEAD(resubmit_qios);
> +    LIST_HEAD(seek_qios);
>       unsigned int pflags = current->flags;
>        current->flags |= PF_LOCAL_THROTTLE|PF_MEMALLOC_NOIO;
> @@ -4003,6 +4012,7 @@ void do_qcow2_work(struct work_struct *ws)
>       list_splice_init(&qcow2->qios[QLIST_COW_INDEXES], &cow_indexes_qios);
>       list_splice_init(&qcow2->qios[QLIST_COW_END], &cow_end_qios);
>       list_splice_init(&qcow2->resubmit_qios, &resubmit_qios);
> +    list_splice_init(&qcow2->qios[QLIST_SEEK], &seek_qios);
>       spin_unlock_irq(&qcow2->deferred_lock);
>        process_embedded_qios(qcow2, &embedded_qios, &deferred_qios);
> @@ -4013,6 +4023,7 @@ void do_qcow2_work(struct work_struct *ws)
>       process_cow_indexes_write(qcow2, &cow_indexes_qios);
>       process_cow_end(qcow2, &cow_end_qios);
>       process_resubmit_qios(qcow2, &resubmit_qios);
> +    process_seek_qios(qcow2, &seek_qios);
>        /* This actually submits batch of md writeback, initiated above */
>       submit_metadata_writeback(qcow2);
> @@ -4235,3 +4246,132 @@ static void handle_cleanup_mask(struct qio *qio)
>           ext->cleanup_mask &= ~FREE_ALLOCATED_CLU;
>       }
>   }
> +
> +static sector_t get_next_l2(struct qio *qio)
> +{
> +    struct qcow2 *qcow2 = qio->qcow2;
> +    loff_t start, add;
> +
> +    start = to_bytes(qio->bi_iter.bi_sector);
> +    add = qcow2->l2_entries - (start / qcow2->clu_size) % 
> qcow2->l2_entries;
> +
> +    return qio->bi_iter.bi_sector + (qcow2->clu_size / to_bytes(1)) * add;
> +}
> +
> +static sector_t get_next_clu(struct qio *qio)
> +{
> +    struct qcow2 *qcow2 = qio->qcow2;
> +    loff_t offset;
> +
> +    offset = to_bytes(qio->bi_iter.bi_sector);
> +    offset = offset / qcow2->clu_size;
> +    offset = (offset + 1) * qcow2->clu_size;
> +

Isn't

offset = (offset + qcow2->clu_size) / qcow2->clu_size;
offset *= qcow2->clu_size;

more optimal ? If clu_size is known to be power of two it may be
possible to rewritte it in another way.

> +    return to_sector(offset);
> +}
> +
> +loff_t qcow2_find_hole(struct dm_target *ti, loff_t offset, int whence)
> +{
> +    struct qcow2 *qcow2 = to_qcow2_target(ti)->top;
> +    DECLARE_COMPLETION_ONSTACK(compl);
> +    bool unmapped, zeroes, try_lower;
> +    struct qio qio = {0}, *qptr;
> +    loff_t result = -EINVAL;
> +    struct qcow2_map map;
> +    u32 size;
> +    int ret;
> +
> +    qio.bi_iter.bi_sector = to_sector(offset);
> +    qio.bi_iter.bi_size = qcow2->clu_size - offset % qcow2->clu_size;
> +
> +    qcow2_init_qio(&qio, REQ_OP_READ, qcow2);
> +    qio.queue_list_id = QLIST_SEEK;
> +    qio.data = &compl;
> +
> +    while (qio.bi_iter.bi_sector < to_sector(qcow2->hdr.size)) {
> +        qio.qcow2 = qcow2;
> +retry:
> +        memset(&map, 0, sizeof(map));
> +        map.qcow2 = qio.qcow2;
> +        qptr = &qio;
> +        ret = parse_metadata(qio.qcow2, &qptr, &map);
> +        /* ENXIO has a special meaning for llseek so remap it to EINVAL*/
> +        if (ret < 0)
> +            return (ret == -ENXIO) ? -EINVAL : ret;
> +        if (qptr == NULL) {
> +            wait_for_completion(&compl);
> +            reinit_completion(&compl);

What's the point of this ? compl is local , it is assigned to qio.data,
who will complete it here after qptr is already null and parse_metadata 
is done? Looks like the places that use completion manage their own 
completion function and data - submit_rw_md_page, perform_rw_mapped, 
submit_read_whole_cow_clu.

> +            goto retry;
> +        }
> +
> +calc_subclu:
> +        zeroes = unmapped = try_lower = false;
> +        zeroes = (size = qio_all_zeroes_size(qio.qcow2, &qio, &map));
> +        if (!size)
> +            unmapped = (size = qio_unmapped_size(qio.qcow2, &qio, &map));
> +        if (!size)
> +            size = qio_mapped_not_zeroes_size(qio.qcow2, &qio, &map);
> +        if (unmapped)
> +            try_lower = maybe_mapped_in_lower_delta(qio.qcow2, &qio);
> +
> +        if (unmapped && try_lower) {
> +            loff_t end = to_bytes(qio.bi_iter.bi_sector) + 
> qio.bi_iter.bi_size;
> +
> +            if (end < qio.qcow2->hdr.size) {
> +                qio.qcow2 = qio.qcow2->lower;
> +                goto retry;
> +            }
> +        }
> +
> +        if (whence & SEEK_HOLE) {
> +            if (zeroes || unmapped) {
> +                result = to_bytes(qio.bi_iter.bi_sector);
> +                break;
> +            } else if (size != qio.bi_iter.bi_size) {
> +                /*
> +                 * range starts with data subclusters and after that
> +                 * some subclusters are zero or unmapped
> +                 */
> +                result = to_bytes(qio.bi_iter.bi_sector) + size;
> +                break;
> +            }
> +        }
> +
> +        if (whence & SEEK_DATA) {
> +            if (!zeroes && !unmapped) {
> +                result = to_bytes(qio.bi_iter.bi_sector);
> +                break;
> +            } else if (size != qio.bi_iter.bi_size) {
> +                /*
> +                 * range starts with zero or unmapped subclusters
> +                 * but after that it still can be unmapped or zero
> +                 * We do not need to parse metadata again but we should
> +                 * skip this sublusters and look onto next ones
> +                 */
> +                qio.bi_iter.bi_sector += to_sector(size);
> +                qio.bi_iter.bi_size -= size;
> +                goto calc_subclu;
> +            }
> +        }
> +
> +        /* whole L2 table is unmapped - skip to next l2 table */
> +        if (!(map.level & L2_LEVEL))
> +            qio.bi_iter.bi_sector = get_next_l2(&qio);
> +        else
> +            qio.bi_iter.bi_sector = get_next_clu(&qio);
> +
> +        qio.bi_iter.bi_size = qcow2->clu_size;
> +    }
> +


This whole retry logic is very long and it is hard to follow, could it 
be split into functions ?

> +    if (result >= 0 && result < offset)
> +        result = offset;
> +
> +    if (qio.bi_iter.bi_sector >= to_sector(qcow2->hdr.size)) {
> +        if (whence & SEEK_HOLE)
> +            result = qcow2->hdr.size;
> +        if (whence & SEEK_DATA)
> +            result = -ENXIO;
> +    }
> +
> +    return result;
> +}
-- 
Regards,
Alexander Atanasov



More information about the Devel mailing list