[Devel] [PATCH VZ9 2/3] block/blk-cbt: allow multiple cbts in a single queue
Andrey Zhadchenko
andrey.zhadchenko at virtuozzo.com
Thu Jan 23 12:14:08 MSK 2025
On 1/23/25 09:05, Pavel Tikhomirov wrote:
> General comment:
>
> Do we need a restriction on maximum amount of cbts? E.g. in next patch
> we allow to read cbts list from userspace, if we have to many cbts it
> can be not so trivial to allocate proper buffer to get them all at once.
Currently userspace code looks like that:
int cbt_dump_names(int fd, ...)
{
struct blk_user_cbt_list cl = {}, *req = NULL;
int fd, ret = -1, i;
char *mem;
ret = ioctl(fd, BLKCBTLIST, &cl);
if (ret < 0) {
ploop_err(errno, "failed to count CBT names");
return SYSEXIT_DEVIOC;
}
req = malloc(cl.total_count * CBT_NAME_LENGTH + sizeof(cl));
if (!req) {
ploop_err(ENOMEM, "failed to allocate memory");
return SYSEXIT_MALLOC;
}
req->count = cl.total_count;
ret = ioctl(fd, BLKCBTLIST, req);
if (ret < 0) {
ploop_err(errno, "failed to list CBT names");
ret = SYSEXIT_DEVIOC;
goto out;
}
...
}
I think it is pretty easy
>
> On 1/20/25 16:34, Andrey Zhadchenko wrote:
>> Store cbts in a list instead of a single pointer.
>> Update all APIs to work with user-specified bitmap.
>>
>> https://virtuozzo.atlassian.net/browse/VSTOR-96269
>> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
>> ---
>> block/blk-cbt.c | 174 +++++++++++++++++++++-------------------
>> block/blk-core.c | 2 +
>> include/linux/blkdev.h | 2 +-
>> include/uapi/linux/fs.h | 2 +-
>> 4 files changed, 96 insertions(+), 84 deletions(-)
>>
>> diff --git a/block/blk-cbt.c b/block/blk-cbt.c
>> index 8ae993143098..8b0d07ee757c 100644
>> --- a/block/blk-cbt.c
>> +++ b/block/blk-cbt.c
>> @@ -53,6 +53,7 @@ struct cbt_info {
>> blkcnt_t snp_block_max;
>> spinlock_t lock;
>> + struct list_head list;
>> };
>> @@ -112,7 +113,11 @@ static int cbt_page_alloc(struct cbt_info
>> **cbt_pp, unsigned long idx,
>> if (likely(!test_bit(CBT_DEAD, &cbt->flags))) {
>> cbt->count++;
>> } else {
>> - struct cbt_info *new = rcu_dereference(cbt->queue->cbt);
>> + struct cbt_info *i, *new = NULL;
>> +
>> + list_for_each_entry_rcu(i, &cbt->queue->cbt_list, list)
>> + if (!memcmp(i->name, cbt->name, sizeof(cbt->name)))
>> + new = i;
>> spin_unlock_irq(&cbt->lock);
>> /* was cbt updated ? */
>> @@ -216,33 +221,26 @@ static int __blk_cbt_set(struct cbt_info *cbt,
>> blkcnt_t block,
>> return (pages_missed && *pages_missed) ? -EAGAIN : 0;
>> }
>> -static void blk_cbt_add(struct request_queue *q, blkcnt_t start,
>> blkcnt_t len)
>> +static void blk_cbt_add(struct cbt_info *cbt, blkcnt_t start,
>> blkcnt_t len)
>> {
>> - struct cbt_info *cbt;
>> struct cbt_extent *ex;
>> struct cbt_extent old;
>> blkcnt_t end;
>> - /* Check per-cpu cache */
>> -
>> - rcu_read_lock();
>> - cbt = rcu_dereference(q->cbt);
>> - if (unlikely(!cbt))
>> - goto out_rcu;
>> if (unlikely(test_bit(CBT_ERROR, &cbt->flags)))
>> - goto out_rcu;
>> + return;
>> end = DIV_ROUND_UP(start + len, 1 << cbt->block_bits);
>> start >>= cbt->block_bits;
>> len = end - start;
>> if (unlikely(test_bit(CBT_NOCACHE, &cbt->flags))) {
>> __blk_cbt_set(cbt, start, len, 1, 1, NULL, NULL);
>> - goto out_rcu;
>> + return;
>> }
>> ex = get_cpu_ptr(cbt->cache);
>> if (ex->start + ex->len == start) {
>> ex->len += len;
>> put_cpu_ptr(cbt->cache);
>> - goto out_rcu;
>> + return;
>> }
>> old = *ex;
>> ex->start = start;
>> @@ -251,18 +249,23 @@ static void blk_cbt_add(struct request_queue *q,
>> blkcnt_t start, blkcnt_t len)
>> if (likely(old.len))
>> __blk_cbt_set(cbt, old.start, old.len, 1, 1, NULL, NULL);
>> +}
>> +
>> +void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
>> +{
>> + struct cbt_info *cbt;
>> +
>> + rcu_read_lock();
>> + if (list_empty(&q->cbt_list) || bio_data_dir(bio) == READ ||
>> !bio->bi_iter.bi_size)
>> + goto out_rcu;
>> +
>> + list_for_each_entry_rcu(cbt, &q->cbt_list, list)
>> + blk_cbt_add(cbt, bio->bi_iter.bi_sector << 9,
>> bio->bi_iter.bi_size);
>> +
>> out_rcu:
>> rcu_read_unlock();
>> }
>> -inline void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
>> -{
>> - if (!q->cbt || bio_data_dir(bio) == READ || !bio->bi_iter.bi_size)
>> - return;
>> -
>> - blk_cbt_add(q, bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
>> -}
>> -
>> static struct cbt_info *do_cbt_alloc(struct request_queue *q, __u8
>> *name,
>> loff_t size, loff_t blocksize)
>> {
>> @@ -272,6 +275,7 @@ static struct cbt_info *do_cbt_alloc(struct
>> request_queue *q, __u8 *name,
>> if (!cbt)
>> return ERR_PTR(-ENOMEM);
>> + INIT_LIST_HEAD(&cbt->list);
>> cbt->block_bits = ilog2(blocksize);
>> cbt->block_max = DIV_ROUND_UP(size, blocksize);
>> spin_lock_init(&cbt->lock);
>> @@ -365,6 +369,17 @@ static int copy_cbt_to_user(struct page **map,
>> unsigned long size,
>> return 0;
>> }
>> +static struct cbt_info *blk_cbt_find(struct request_queue *q, __u8
>> *name)
>> +{
>> + struct cbt_info *cbt;
>> +
>> + list_for_each_entry(cbt, &q->cbt_list, list)
>> + if (!memcmp(name, cbt->name, CBT_NAME_LENGTH))
>> + return cbt;
>> +
>> + return NULL;
>> +}
>> +
>> static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
>> struct blk_user_cbt_snap_create __user *arg)
>> {
>> @@ -382,8 +397,7 @@ static int blk_cbt_snap_create(struct
>> request_queue *q, __u8 *name,
>> return -EFAULT;
>> mutex_lock(&cbt_mutex);
>> - cbt = q->cbt;
>> -
>> + cbt = blk_cbt_find(q, name);
>> if (!cbt) {
>> mutex_unlock(&cbt_mutex);
>> return -ENOENT;
>> @@ -392,11 +406,6 @@ static int blk_cbt_snap_create(struct
>> request_queue *q, __u8 *name,
>> BUG_ON(!cbt->map);
>> BUG_ON(!cbt->block_max);
>> - if (!name || memcmp(name, cbt->name, sizeof(cbt->name))) {
>> - mutex_unlock(&cbt_mutex);
>> - return -EINVAL;
>> - }
>> -
>> if (cbt->snp_map) {
>> mutex_unlock(&cbt_mutex);
>> return -EBUSY;
>> @@ -461,7 +470,7 @@ static int blk_cbt_snap_drop(struct request_queue
>> *q, __u8 *name)
>> int ret;
>> mutex_lock(&cbt_mutex);
>> - cbt = q->cbt;
>> + cbt = blk_cbt_find(q, name);
>> ret = -ENOENT;
>> if (!cbt)
>> @@ -470,10 +479,6 @@ static int blk_cbt_snap_drop(struct request_queue
>> *q, __u8 *name)
>> BUG_ON(!cbt->map);
>> BUG_ON(!cbt->block_max);
>> - ret = -EINVAL;
>> - if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
>> - goto out;
>> -
>> ret = -ENODEV;
>> map = cbt->snp_map;
>> if (!map)
>> @@ -511,7 +516,7 @@ static int blk_cbt_snap_merge_back(struct
>> request_queue *q, __u8 *name)
>> int ret;
>> mutex_lock(&cbt_mutex);
>> - cbt = q->cbt;
>> + cbt = blk_cbt_find(q, name);
>> ret = -ENOENT;
>> if (!cbt)
>> @@ -520,10 +525,6 @@ static int blk_cbt_snap_merge_back(struct
>> request_queue *q, __u8 *name)
>> BUG_ON(!cbt->map);
>> BUG_ON(!cbt->block_max);
>> - ret = -EINVAL;
>> - if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
>> - goto out;
>> -
>> map = cbt->snp_map;
>> block_max = cbt->snp_block_max;
>> ret = -ENODEV;
>> @@ -568,33 +569,21 @@ static int blk_cbt_snap_merge_back(struct
>> request_queue *q, __u8 *name)
>> return ret;
>> }
>> -void blk_cbt_update_size(struct block_device *bdev)
>> +static void blk_cbt_update_cbt_size(struct cbt_info *cbt, loff_t new_sz)
>> {
>> - struct request_queue *q;
>> - struct cbt_info *new, *cbt;
>> + struct cbt_info *new;
>> unsigned long to_cpy, idx;
>> unsigned bsz;
>> - loff_t new_sz = i_size_read(bdev->bd_inode);
>> int in_use = 0;
>> - if (!bdev->bd_disk || !bdev_get_queue(bdev))
>> - return;
>> -
>> - q = bdev_get_queue(bdev);
>> - mutex_lock(&cbt_mutex);
>> - cbt = q->cbt;
>> - if (!cbt) {
>> - mutex_unlock(&cbt_mutex);
>> - return;
>> - }
>> bsz = 1 << cbt->block_bits;
>> if (DIV_ROUND_UP(new_sz, bsz) <= cbt->block_max)
>> - goto err_mtx;
>> + return;
>> - new = do_cbt_alloc(q, cbt->name, new_sz, bsz);
>> + new = do_cbt_alloc(cbt->queue, cbt->name, new_sz, bsz);
>> if (IS_ERR(new)) {
>> set_bit(CBT_ERROR, &cbt->flags);
>> - goto err_mtx;
>> + return;
>> }
>> to_cpy = NR_PAGES(cbt->block_max);
>> set_bit(CBT_NOCACHE, &cbt->flags);
>> @@ -606,15 +595,27 @@ void blk_cbt_update_size(struct block_device *bdev)
>> if (CBT_PAGE(new, idx))
>> get_page(CBT_PAGE(new, idx));
>> }
>> - rcu_assign_pointer(q->cbt, new);
>> + list_replace_rcu(&cbt->list, &new->list);
>> in_use = cbt->count;
>> spin_unlock_irq(&cbt->lock);
>> if (!in_use)
>> call_rcu(&cbt->rcu, &cbt_release_callback);
>> -err_mtx:
>> +}
>> +
>> +void blk_cbt_update_size(struct block_device *bdev)
>> +{
>> + struct request_queue *q;
>> + struct cbt_info *cbt;
>> +
>> + if (!bdev->bd_disk || !bdev_get_queue(bdev))
>> + return;
>> +
>> + q = bdev_get_queue(bdev);
>> + mutex_lock(&cbt_mutex);
>> + list_for_each_entry(cbt, &q->cbt_list, list)
>> + blk_cbt_update_cbt_size(cbt, i_size_read(bdev->bd_inode));
>> +
>> mutex_unlock(&cbt_mutex);
>> -
>> -
>> }
>> static int cbt_ioc_init(struct block_device *bdev, struct
>> blk_user_cbt_info __user *ucbt_ioc)
>> @@ -632,16 +633,13 @@ static int cbt_ioc_init(struct block_device
>> *bdev, struct blk_user_cbt_info __us
>> q = bdev_get_queue(bdev);
>> mutex_lock(&cbt_mutex);
>> - if (q->cbt) {
>> - ret = -EBUSY;
>> - goto err_mtx;
>> - }
>> +
>> cbt = do_cbt_alloc(q, ci.ci_name, i_size_read(bdev->bd_inode),
>> ci.ci_blksize);
>> if (IS_ERR(cbt))
>> ret = PTR_ERR(cbt);
>> else
>> - rcu_assign_pointer(q->cbt, cbt);
>> -err_mtx:
>> + list_add_tail_rcu(&q->cbt_list, &cbt->list);
>> +
>> mutex_unlock(&cbt_mutex);
>> return ret;
>> }
>> @@ -667,35 +665,46 @@ static void cbt_release_callback(struct rcu_head
>> *head)
>> kfree(cbt);
>> }
>> -void blk_cbt_release(struct request_queue *q)
>> +static void blk_cbt_del(struct cbt_info *cbt)
>> {
>> - struct cbt_info *cbt;
>> - int in_use = 0;
>> unsigned long flags;
>> + int in_use;
>> - cbt = q->cbt;
>> - if (!cbt)
>> - return;
>> spin_lock_irqsave(&cbt->lock, flags);
>> set_bit(CBT_DEAD, &cbt->flags);
>> - rcu_assign_pointer(q->cbt, NULL);
>> + list_del_rcu(&cbt->list);
>> in_use = cbt->count;
>> spin_unlock_irqrestore(&cbt->lock, flags);
>> if (!in_use)
>> call_rcu(&cbt->rcu, &cbt_release_callback);
>> }
>> -static int cbt_ioc_stop(struct block_device *bdev)
>> +void blk_cbt_release(struct request_queue *q)
>> {
>> + struct cbt_info *cbt, *tmp;
>> +
>> + list_for_each_entry_safe(cbt, tmp, &q->cbt_list, list)
>> + blk_cbt_del(cbt);
>> +}
>> +
>> +static int cbt_ioc_stop(struct block_device *bdev, struct
>> blk_user_cbt_info __user *ucbt_ioc)
>> +{
>> + struct blk_user_cbt_info ci;
>> struct request_queue *q;
>> + struct cbt_info *cbt;
>> +
>> + if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
>> + return -EFAULT;
>> mutex_lock(&cbt_mutex);
>> q = bdev_get_queue(bdev);
>> - if(!q->cbt) {
>> - mutex_unlock(&cbt_mutex);
>> - return -EINVAL;
>> - }
>> - blk_cbt_release(q);
>> +
>> + cbt = blk_cbt_find(q, ci.ci_name);
>> + if (!cbt)
>> + return -ENOMEM;
>> +
>> + blk_cbt_del(cbt);
>> +
>> mutex_unlock(&cbt_mutex);
>> return 0;
>> }
>> @@ -839,7 +848,8 @@ static int cbt_ioc_get(struct block_device *bdev,
>> struct blk_user_cbt_info __use
>> ret = -EINVAL;
>> q = bdev_get_queue(bdev);
>> mutex_lock(&cbt_mutex);
>> - cbt = q->cbt;
>> +
>> + cbt = blk_cbt_find(q, ci.ci_name);
>> if (!cbt ||
>> (ci.ci_start >> cbt->block_bits) > cbt->block_max)
>> goto ioc_get_failed;
>> @@ -925,7 +935,7 @@ static int cbt_ioc_set(struct block_device *bdev,
>> struct blk_user_cbt_info __use
>> ret = -EINVAL;
>> mutex_lock(&cbt_mutex);
>> - cbt = q->cbt;
>> + cbt = blk_cbt_find(q, ci.ci_name);
>> if (!cbt)
>> goto ioc_set_failed;
>> @@ -948,8 +958,8 @@ static int cbt_ioc_set(struct block_device *bdev,
>> struct blk_user_cbt_info __use
>> ex.start = cur_ex->ce_physical >> cbt->block_bits;
>> ex.len = DIV_ROUND_UP(cur_ex->ce_length, 1 <<
>> cbt->block_bits);
>> - if (ex.start > q->cbt->block_max ||
>> - ex.start + ex.len > q->cbt->block_max ||
>> + if (ex.start > cbt->block_max ||
>> + ex.start + ex.len > cbt->block_max ||
>> ex.len == 0) {
>> ret = -EINVAL;
>> break;
>> @@ -1007,7 +1017,7 @@ int blk_cbt_ioctl(struct block_device *bdev,
>> unsigned cmd, char __user *arg)
>> case BLKCBTSTART:
>> return cbt_ioc_init(bdev, ucbt_ioc);
>> case BLKCBTSTOP:
>> - return cbt_ioc_stop(bdev);
>> + return cbt_ioc_stop(bdev, ucbt_ioc);
>> case BLKCBTSET:
>> return cbt_ioc_set(bdev, ucbt_ioc, 1);
>> case BLKCBTCLR:
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 6abad29dd501..e3aa923f399e 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -428,6 +428,8 @@ struct request_queue *blk_alloc_queue(int node_id)
>> init_waitqueue_head(&q->mq_freeze_wq);
>> mutex_init(&q->mq_freeze_lock);
>> + INIT_LIST_HEAD(&q->cbt_list);
>> +
>> blkg_init_queue(q);
>> /*
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index fcbc83909b91..3009964707e6 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -520,7 +520,7 @@ struct request_queue {
>> struct throtl_data *td;
>> #endif
>> #ifdef CONFIG_BLK_DEV_CBT
>> - struct cbt_info *cbt;
>> + struct list_head cbt_list;
>> #endif
>> struct rcu_head rcu_head;
>> wait_queue_head_t mq_freeze_wq;
>> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
>> index 47efa7ef05a0..667157fe864a 100644
>> --- a/include/uapi/linux/fs.h
>> +++ b/include/uapi/linux/fs.h
>> @@ -170,7 +170,7 @@ struct blk_user_cbt_snap_create {
>> };
>> #define BLKCBTSTART _IOR(0x12,200, struct blk_user_cbt_info)
>> -#define BLKCBTSTOP _IO(0x12,201)
>> +#define BLKCBTSTOP _IOR(0x12, 201, struct blk_user_cbt_info)
>> #define BLKCBTGET _IOWR(0x12,202,struct blk_user_cbt_info)
>> #define BLKCBTSET _IOR(0x12,203,struct blk_user_cbt_info)
>> #define BLKCBTCLR _IOR(0x12,204,struct blk_user_cbt_info)
>
More information about the Devel
mailing list