[Devel] [PATCH VZ9 2/3] block/blk-cbt: allow multiple cbts in a single queue

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Thu Jan 23 12:14:08 MSK 2025



On 1/23/25 09:05, Pavel Tikhomirov wrote:
> General comment:
> 
> Do we need a restriction on maximum amount of cbts? E.g. in next patch 
> we allow to read cbts list from userspace, if we have to many cbts it 
> can be not so trivial to allocate proper buffer to get them all at once.

Currently userspace code looks like that:
int cbt_dump_names(int fd, ...)
{
	struct blk_user_cbt_list cl = {}, *req = NULL;
	int fd, ret = -1, i;
	char *mem;

	ret = ioctl(fd, BLKCBTLIST, &cl);
	if (ret < 0) {
		ploop_err(errno, "failed to count CBT names");
		return SYSEXIT_DEVIOC;
	}

	req = malloc(cl.total_count * CBT_NAME_LENGTH + sizeof(cl));
	if (!req) {
		ploop_err(ENOMEM, "failed to allocate memory");
		return SYSEXIT_MALLOC;
	}

	req->count = cl.total_count;
	ret = ioctl(fd, BLKCBTLIST, req);
	if (ret < 0) {
		ploop_err(errno, "failed to list CBT names");
		ret = SYSEXIT_DEVIOC;
		goto out;
	}

	...
}

I think it is pretty easy

> 
> On 1/20/25 16:34, Andrey Zhadchenko wrote:
>> Store cbts in a list instead of a single pointer.
>> Update all APIs to work with user-specified bitmap.
>>
>> https://virtuozzo.atlassian.net/browse/VSTOR-96269
>> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
>> ---
>>   block/blk-cbt.c         | 174 +++++++++++++++++++++-------------------
>>   block/blk-core.c        |   2 +
>>   include/linux/blkdev.h  |   2 +-
>>   include/uapi/linux/fs.h |   2 +-
>>   4 files changed, 96 insertions(+), 84 deletions(-)
>>
>> diff --git a/block/blk-cbt.c b/block/blk-cbt.c
>> index 8ae993143098..8b0d07ee757c 100644
>> --- a/block/blk-cbt.c
>> +++ b/block/blk-cbt.c
>> @@ -53,6 +53,7 @@ struct cbt_info {
>>       blkcnt_t snp_block_max;
>>       spinlock_t lock;
>> +    struct list_head list;
>>   };
>> @@ -112,7 +113,11 @@ static int cbt_page_alloc(struct cbt_info  
>> **cbt_pp, unsigned long idx,
>>       if (likely(!test_bit(CBT_DEAD, &cbt->flags))) {
>>           cbt->count++;
>>       } else {
>> -        struct cbt_info *new = rcu_dereference(cbt->queue->cbt);
>> +        struct cbt_info *i, *new = NULL;
>> +
>> +        list_for_each_entry_rcu(i, &cbt->queue->cbt_list, list)
>> +            if (!memcmp(i->name, cbt->name, sizeof(cbt->name)))
>> +                new = i;
>>           spin_unlock_irq(&cbt->lock);
>>           /* was cbt updated ? */
>> @@ -216,33 +221,26 @@ static int __blk_cbt_set(struct cbt_info  *cbt, 
>> blkcnt_t block,
>>       return (pages_missed && *pages_missed) ? -EAGAIN : 0;
>>   }
>> -static void blk_cbt_add(struct request_queue *q, blkcnt_t start, 
>> blkcnt_t len)
>> +static void blk_cbt_add(struct cbt_info *cbt, blkcnt_t start, 
>> blkcnt_t len)
>>   {
>> -    struct cbt_info *cbt;
>>       struct cbt_extent *ex;
>>       struct cbt_extent old;
>>       blkcnt_t end;
>> -    /* Check per-cpu cache */
>> -
>> -    rcu_read_lock();
>> -    cbt = rcu_dereference(q->cbt);
>> -    if (unlikely(!cbt))
>> -        goto out_rcu;
>>       if (unlikely(test_bit(CBT_ERROR, &cbt->flags)))
>> -        goto out_rcu;
>> +        return;
>>       end = DIV_ROUND_UP(start + len, 1 << cbt->block_bits);
>>       start >>= cbt->block_bits;
>>       len = end - start;
>>       if (unlikely(test_bit(CBT_NOCACHE, &cbt->flags))) {
>>           __blk_cbt_set(cbt, start, len, 1, 1, NULL, NULL);
>> -        goto out_rcu;
>> +        return;
>>       }
>>       ex = get_cpu_ptr(cbt->cache);
>>       if (ex->start + ex->len == start) {
>>           ex->len += len;
>>           put_cpu_ptr(cbt->cache);
>> -        goto out_rcu;
>> +        return;
>>       }
>>       old = *ex;
>>       ex->start = start;
>> @@ -251,18 +249,23 @@ static void blk_cbt_add(struct request_queue *q, 
>> blkcnt_t start, blkcnt_t len)
>>       if (likely(old.len))
>>           __blk_cbt_set(cbt, old.start, old.len, 1, 1, NULL, NULL);
>> +}
>> +
>> +void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
>> +{
>> +    struct cbt_info *cbt;
>> +
>> +    rcu_read_lock();
>> +    if (list_empty(&q->cbt_list) || bio_data_dir(bio) == READ || 
>> !bio->bi_iter.bi_size)
>> +        goto out_rcu;
>> +
>> +    list_for_each_entry_rcu(cbt, &q->cbt_list, list)
>> +        blk_cbt_add(cbt, bio->bi_iter.bi_sector << 9, 
>> bio->bi_iter.bi_size);
>> +
>>   out_rcu:
>>       rcu_read_unlock();
>>   }
>> -inline void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
>> -{
>> -    if (!q->cbt || bio_data_dir(bio) == READ || !bio->bi_iter.bi_size)
>> -        return;
>> -
>> -    blk_cbt_add(q, bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
>> -}
>> -
>>   static struct cbt_info *do_cbt_alloc(struct request_queue *q, __u8 
>> *name,
>>                        loff_t size, loff_t blocksize)
>>   {
>> @@ -272,6 +275,7 @@ static struct cbt_info *do_cbt_alloc(struct 
>> request_queue *q, __u8 *name,
>>       if (!cbt)
>>           return ERR_PTR(-ENOMEM);
>> +    INIT_LIST_HEAD(&cbt->list);
>>       cbt->block_bits = ilog2(blocksize);
>>       cbt->block_max  = DIV_ROUND_UP(size, blocksize);
>>       spin_lock_init(&cbt->lock);
>> @@ -365,6 +369,17 @@ static int copy_cbt_to_user(struct page **map, 
>> unsigned long size,
>>           return 0;
>>   }
>> +static struct cbt_info *blk_cbt_find(struct request_queue *q, __u8 
>> *name)
>> +{
>> +    struct cbt_info *cbt;
>> +
>> +    list_for_each_entry(cbt, &q->cbt_list, list)
>> +        if (!memcmp(name, cbt->name, CBT_NAME_LENGTH))
>> +            return cbt;
>> +
>> +    return NULL;
>> +}
>> +
>>   static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
>>                      struct blk_user_cbt_snap_create __user *arg)
>>   {
>> @@ -382,8 +397,7 @@ static int blk_cbt_snap_create(struct 
>> request_queue *q, __u8 *name,
>>           return -EFAULT;
>>       mutex_lock(&cbt_mutex);
>> -    cbt = q->cbt;
>> -
>> +    cbt = blk_cbt_find(q, name);
>>       if (!cbt) {
>>           mutex_unlock(&cbt_mutex);
>>           return -ENOENT;
>> @@ -392,11 +406,6 @@ static int blk_cbt_snap_create(struct 
>> request_queue *q, __u8 *name,
>>       BUG_ON(!cbt->map);
>>       BUG_ON(!cbt->block_max);
>> -    if (!name || memcmp(name, cbt->name, sizeof(cbt->name))) {
>> -        mutex_unlock(&cbt_mutex);
>> -        return -EINVAL;
>> -    }
>> -
>>       if (cbt->snp_map) {
>>           mutex_unlock(&cbt_mutex);
>>           return -EBUSY;
>> @@ -461,7 +470,7 @@ static int blk_cbt_snap_drop(struct request_queue 
>> *q, __u8 *name)
>>       int ret;
>>       mutex_lock(&cbt_mutex);
>> -    cbt = q->cbt;
>> +    cbt = blk_cbt_find(q, name);
>>       ret = -ENOENT;
>>       if (!cbt)
>> @@ -470,10 +479,6 @@ static int blk_cbt_snap_drop(struct request_queue 
>> *q, __u8 *name)
>>       BUG_ON(!cbt->map);
>>       BUG_ON(!cbt->block_max);
>> -    ret = -EINVAL;
>> -    if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
>> -        goto out;
>> -
>>       ret = -ENODEV;
>>       map = cbt->snp_map;
>>       if (!map)
>> @@ -511,7 +516,7 @@ static int blk_cbt_snap_merge_back(struct 
>> request_queue *q, __u8 *name)
>>       int ret;
>>       mutex_lock(&cbt_mutex);
>> -    cbt = q->cbt;
>> +    cbt = blk_cbt_find(q, name);
>>       ret = -ENOENT;
>>       if (!cbt)
>> @@ -520,10 +525,6 @@ static int blk_cbt_snap_merge_back(struct 
>> request_queue *q, __u8 *name)
>>       BUG_ON(!cbt->map);
>>       BUG_ON(!cbt->block_max);
>> -    ret = -EINVAL;
>> -    if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
>> -        goto out;
>> -
>>       map = cbt->snp_map;
>>       block_max = cbt->snp_block_max;
>>       ret = -ENODEV;
>> @@ -568,33 +569,21 @@ static int blk_cbt_snap_merge_back(struct 
>> request_queue *q, __u8 *name)
>>       return ret;
>>   }
>> -void blk_cbt_update_size(struct block_device *bdev)
>> +static void blk_cbt_update_cbt_size(struct cbt_info *cbt, loff_t new_sz)
>>   {
>> -    struct request_queue *q;
>> -    struct cbt_info *new, *cbt;
>> +    struct cbt_info *new;
>>       unsigned long to_cpy, idx;
>>       unsigned bsz;
>> -    loff_t new_sz = i_size_read(bdev->bd_inode);
>>       int in_use = 0;
>> -    if (!bdev->bd_disk || !bdev_get_queue(bdev))
>> -        return;
>> -
>> -    q = bdev_get_queue(bdev);
>> -    mutex_lock(&cbt_mutex);
>> -    cbt = q->cbt;
>> -    if (!cbt) {
>> -        mutex_unlock(&cbt_mutex);
>> -        return;
>> -    }
>>       bsz = 1 << cbt->block_bits;
>>       if (DIV_ROUND_UP(new_sz, bsz) <= cbt->block_max)
>> -        goto err_mtx;
>> +        return;
>> -    new = do_cbt_alloc(q, cbt->name, new_sz, bsz);
>> +    new = do_cbt_alloc(cbt->queue, cbt->name, new_sz, bsz);
>>       if (IS_ERR(new)) {
>>           set_bit(CBT_ERROR, &cbt->flags);
>> -        goto err_mtx;
>> +        return;
>>       }
>>       to_cpy = NR_PAGES(cbt->block_max);
>>       set_bit(CBT_NOCACHE, &cbt->flags);
>> @@ -606,15 +595,27 @@ void blk_cbt_update_size(struct block_device *bdev)
>>           if (CBT_PAGE(new, idx))
>>               get_page(CBT_PAGE(new, idx));
>>       }
>> -    rcu_assign_pointer(q->cbt, new);
>> +    list_replace_rcu(&cbt->list, &new->list);
>>       in_use = cbt->count;
>>       spin_unlock_irq(&cbt->lock);
>>       if (!in_use)
>>           call_rcu(&cbt->rcu, &cbt_release_callback);
>> -err_mtx:
>> +}
>> +
>> +void blk_cbt_update_size(struct block_device *bdev)
>> +{
>> +    struct request_queue *q;
>> +    struct cbt_info *cbt;
>> +
>> +    if (!bdev->bd_disk || !bdev_get_queue(bdev))
>> +        return;
>> +
>> +    q = bdev_get_queue(bdev);
>> +    mutex_lock(&cbt_mutex);
>> +    list_for_each_entry(cbt, &q->cbt_list, list)
>> +        blk_cbt_update_cbt_size(cbt, i_size_read(bdev->bd_inode));
>> +
>>       mutex_unlock(&cbt_mutex);
>> -
>> -
>>   }
>>   static int cbt_ioc_init(struct block_device *bdev, struct 
>> blk_user_cbt_info __user *ucbt_ioc)
>> @@ -632,16 +633,13 @@ static int cbt_ioc_init(struct block_device 
>> *bdev, struct blk_user_cbt_info __us
>>       q = bdev_get_queue(bdev);
>>       mutex_lock(&cbt_mutex);
>> -    if (q->cbt) {
>> -        ret = -EBUSY;
>> -        goto err_mtx;
>> -    }
>> +
>>       cbt = do_cbt_alloc(q, ci.ci_name, i_size_read(bdev->bd_inode), 
>> ci.ci_blksize);
>>       if (IS_ERR(cbt))
>>           ret = PTR_ERR(cbt);
>>       else
>> -        rcu_assign_pointer(q->cbt, cbt);
>> -err_mtx:
>> +        list_add_tail_rcu(&q->cbt_list, &cbt->list);
>> +
>>       mutex_unlock(&cbt_mutex);
>>       return ret;
>>   }
>> @@ -667,35 +665,46 @@ static void cbt_release_callback(struct rcu_head 
>> *head)
>>       kfree(cbt);
>>   }
>> -void blk_cbt_release(struct request_queue *q)
>> +static void blk_cbt_del(struct cbt_info *cbt)
>>   {
>> -    struct cbt_info *cbt;
>> -    int in_use = 0;
>>       unsigned long flags;
>> +    int in_use;
>> -    cbt = q->cbt;
>> -    if (!cbt)
>> -        return;
>>       spin_lock_irqsave(&cbt->lock, flags);
>>       set_bit(CBT_DEAD, &cbt->flags);
>> -    rcu_assign_pointer(q->cbt, NULL);
>> +    list_del_rcu(&cbt->list);
>>       in_use = cbt->count;
>>       spin_unlock_irqrestore(&cbt->lock, flags);
>>       if (!in_use)
>>           call_rcu(&cbt->rcu, &cbt_release_callback);
>>   }
>> -static int cbt_ioc_stop(struct block_device *bdev)
>> +void blk_cbt_release(struct request_queue *q)
>>   {
>> +    struct cbt_info *cbt, *tmp;
>> +
>> +    list_for_each_entry_safe(cbt, tmp, &q->cbt_list, list)
>> +        blk_cbt_del(cbt);
>> +}
>> +
>> +static int cbt_ioc_stop(struct block_device *bdev, struct 
>> blk_user_cbt_info __user *ucbt_ioc)
>> +{
>> +    struct blk_user_cbt_info ci;
>>       struct request_queue *q;
>> +    struct cbt_info *cbt;
>> +
>> +    if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
>> +        return -EFAULT;
>>       mutex_lock(&cbt_mutex);
>>       q = bdev_get_queue(bdev);
>> -    if(!q->cbt) {
>> -        mutex_unlock(&cbt_mutex);
>> -        return -EINVAL;
>> -    }
>> -    blk_cbt_release(q);
>> +
>> +    cbt = blk_cbt_find(q, ci.ci_name);
>> +    if (!cbt)
>> +        return -ENOMEM;
>> +
>> +    blk_cbt_del(cbt);
>> +
>>       mutex_unlock(&cbt_mutex);
>>       return 0;
>>   }
>> @@ -839,7 +848,8 @@ static int cbt_ioc_get(struct block_device *bdev, 
>> struct blk_user_cbt_info __use
>>       ret = -EINVAL;
>>       q = bdev_get_queue(bdev);
>>       mutex_lock(&cbt_mutex);
>> -    cbt = q->cbt;
>> +
>> +    cbt = blk_cbt_find(q, ci.ci_name);
>>       if (!cbt ||
>>           (ci.ci_start >> cbt->block_bits) > cbt->block_max)
>>           goto ioc_get_failed;
>> @@ -925,7 +935,7 @@ static int cbt_ioc_set(struct block_device *bdev, 
>> struct blk_user_cbt_info __use
>>       ret = -EINVAL;
>>       mutex_lock(&cbt_mutex);
>> -    cbt = q->cbt;
>> +    cbt = blk_cbt_find(q, ci.ci_name);
>>       if (!cbt)
>>           goto ioc_set_failed;
>> @@ -948,8 +958,8 @@ static int cbt_ioc_set(struct block_device *bdev, 
>> struct blk_user_cbt_info __use
>>           ex.start  = cur_ex->ce_physical >> cbt->block_bits;
>>           ex.len  = DIV_ROUND_UP(cur_ex->ce_length, 1 << 
>> cbt->block_bits);
>> -        if (ex.start > q->cbt->block_max ||
>> -            ex.start + ex.len > q->cbt->block_max ||
>> +        if (ex.start > cbt->block_max ||
>> +            ex.start + ex.len > cbt->block_max ||
>>               ex.len == 0) {
>>               ret = -EINVAL;
>>               break;
>> @@ -1007,7 +1017,7 @@ int blk_cbt_ioctl(struct block_device *bdev, 
>> unsigned cmd, char __user *arg)
>>       case BLKCBTSTART:
>>           return cbt_ioc_init(bdev, ucbt_ioc);
>>       case BLKCBTSTOP:
>> -        return cbt_ioc_stop(bdev);
>> +        return cbt_ioc_stop(bdev, ucbt_ioc);
>>       case BLKCBTSET:
>>           return cbt_ioc_set(bdev, ucbt_ioc, 1);
>>       case BLKCBTCLR:
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 6abad29dd501..e3aa923f399e 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -428,6 +428,8 @@ struct request_queue *blk_alloc_queue(int node_id)
>>       init_waitqueue_head(&q->mq_freeze_wq);
>>       mutex_init(&q->mq_freeze_lock);
>> +    INIT_LIST_HEAD(&q->cbt_list);
>> +
>>       blkg_init_queue(q);
>>       /*
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index fcbc83909b91..3009964707e6 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -520,7 +520,7 @@ struct request_queue {
>>       struct throtl_data *td;
>>   #endif
>>   #ifdef CONFIG_BLK_DEV_CBT
>> -    struct cbt_info *cbt;
>> +    struct list_head cbt_list;
>>   #endif
>>       struct rcu_head        rcu_head;
>>       wait_queue_head_t    mq_freeze_wq;
>> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
>> index 47efa7ef05a0..667157fe864a 100644
>> --- a/include/uapi/linux/fs.h
>> +++ b/include/uapi/linux/fs.h
>> @@ -170,7 +170,7 @@ struct blk_user_cbt_snap_create {
>>   };
>>   #define BLKCBTSTART _IOR(0x12,200, struct blk_user_cbt_info)
>> -#define BLKCBTSTOP _IO(0x12,201)
>> +#define BLKCBTSTOP _IOR(0x12, 201, struct blk_user_cbt_info)
>>   #define BLKCBTGET _IOWR(0x12,202,struct blk_user_cbt_info)
>>   #define BLKCBTSET _IOR(0x12,203,struct blk_user_cbt_info)
>>   #define BLKCBTCLR _IOR(0x12,204,struct blk_user_cbt_info)
> 


More information about the Devel mailing list