[Devel] [PATCH VZ9 2/3] block/blk-cbt: allow multiple cbts in a single queue

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Jan 23 11:05:25 MSK 2025


General comment:

Do we need a restriction on maximum amount of cbts? E.g. in next patch 
we allow to read cbts list from userspace, if we have to many cbts it 
can be not so trivial to allocate proper buffer to get them all at once.

On 1/20/25 16:34, Andrey Zhadchenko wrote:
> Store cbts in a list instead of a single pointer.
> Update all APIs to work with user-specified bitmap.
> 
> https://virtuozzo.atlassian.net/browse/VSTOR-96269
> Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
> ---
>   block/blk-cbt.c         | 174 +++++++++++++++++++++-------------------
>   block/blk-core.c        |   2 +
>   include/linux/blkdev.h  |   2 +-
>   include/uapi/linux/fs.h |   2 +-
>   4 files changed, 96 insertions(+), 84 deletions(-)
> 
> diff --git a/block/blk-cbt.c b/block/blk-cbt.c
> index 8ae993143098..8b0d07ee757c 100644
> --- a/block/blk-cbt.c
> +++ b/block/blk-cbt.c
> @@ -53,6 +53,7 @@ struct cbt_info {
>   	blkcnt_t snp_block_max;
>   
>   	spinlock_t lock;
> +	struct list_head list;
>   };
>   
>   
> @@ -112,7 +113,11 @@ static int cbt_page_alloc(struct cbt_info  **cbt_pp, unsigned long idx,
>   	if (likely(!test_bit(CBT_DEAD, &cbt->flags))) {
>   		cbt->count++;
>   	} else {
> -		struct cbt_info *new = rcu_dereference(cbt->queue->cbt);
> +		struct cbt_info *i, *new = NULL;
> +
> +		list_for_each_entry_rcu(i, &cbt->queue->cbt_list, list)
> +			if (!memcmp(i->name, cbt->name, sizeof(cbt->name)))
> +				new = i;
>   
>   		spin_unlock_irq(&cbt->lock);
>   		/* was cbt updated ? */
> @@ -216,33 +221,26 @@ static int __blk_cbt_set(struct cbt_info  *cbt, blkcnt_t block,
>   	return (pages_missed && *pages_missed) ? -EAGAIN : 0;
>   }
>   
> -static void blk_cbt_add(struct request_queue *q, blkcnt_t start, blkcnt_t len)
> +static void blk_cbt_add(struct cbt_info *cbt, blkcnt_t start, blkcnt_t len)
>   {
> -	struct cbt_info *cbt;
>   	struct cbt_extent *ex;
>   	struct cbt_extent old;
>   	blkcnt_t end;
> -	/* Check per-cpu cache */
> -
> -	rcu_read_lock();
> -	cbt = rcu_dereference(q->cbt);
> -	if (unlikely(!cbt))
> -		goto out_rcu;
>   
>   	if (unlikely(test_bit(CBT_ERROR, &cbt->flags)))
> -		goto out_rcu;
> +		return;
>   	end = DIV_ROUND_UP(start + len, 1 << cbt->block_bits);
>   	start >>= cbt->block_bits;
>   	len = end - start;
>   	if (unlikely(test_bit(CBT_NOCACHE, &cbt->flags))) {
>   		__blk_cbt_set(cbt, start, len, 1, 1, NULL, NULL);
> -		goto out_rcu;
> +		return;
>   	}
>   	ex = get_cpu_ptr(cbt->cache);
>   	if (ex->start + ex->len == start) {
>   		ex->len += len;
>   		put_cpu_ptr(cbt->cache);
> -		goto out_rcu;
> +		return;
>   	}
>   	old = *ex;
>   	ex->start = start;
> @@ -251,18 +249,23 @@ static void blk_cbt_add(struct request_queue *q, blkcnt_t start, blkcnt_t len)
>   
>   	if (likely(old.len))
>   		__blk_cbt_set(cbt, old.start, old.len, 1, 1, NULL, NULL);
> +}
> +
> +void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
> +{
> +	struct cbt_info *cbt;
> +
> +	rcu_read_lock();
> +	if (list_empty(&q->cbt_list) || bio_data_dir(bio) == READ || !bio->bi_iter.bi_size)
> +		goto out_rcu;
> +
> +	list_for_each_entry_rcu(cbt, &q->cbt_list, list)
> +		blk_cbt_add(cbt, bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
> +
>   out_rcu:
>   	rcu_read_unlock();
>   }
>   
> -inline void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
> -{
> -	if (!q->cbt || bio_data_dir(bio) == READ || !bio->bi_iter.bi_size)
> -		return;
> -
> -	blk_cbt_add(q, bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
> -}
> -
>   static struct cbt_info *do_cbt_alloc(struct request_queue *q, __u8 *name,
>   				     loff_t size, loff_t blocksize)
>   {
> @@ -272,6 +275,7 @@ static struct cbt_info *do_cbt_alloc(struct request_queue *q, __u8 *name,
>   	if (!cbt)
>   		return ERR_PTR(-ENOMEM);
>   
> +	INIT_LIST_HEAD(&cbt->list);
>   	cbt->block_bits = ilog2(blocksize);
>   	cbt->block_max  = DIV_ROUND_UP(size, blocksize);
>   	spin_lock_init(&cbt->lock);
> @@ -365,6 +369,17 @@ static int copy_cbt_to_user(struct page **map, unsigned long size,
>           return 0;
>   }
>   
> +static struct cbt_info *blk_cbt_find(struct request_queue *q, __u8 *name)
> +{
> +	struct cbt_info *cbt;
> +
> +	list_for_each_entry(cbt, &q->cbt_list, list)
> +		if (!memcmp(name, cbt->name, CBT_NAME_LENGTH))
> +			return cbt;
> +
> +	return NULL;
> +}
> +
>   static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
>   			       struct blk_user_cbt_snap_create __user *arg)
>   {
> @@ -382,8 +397,7 @@ static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
>   		return -EFAULT;
>   
>   	mutex_lock(&cbt_mutex);
> -	cbt = q->cbt;
> -
> +	cbt = blk_cbt_find(q, name);
>   	if (!cbt) {
>   		mutex_unlock(&cbt_mutex);
>   		return -ENOENT;
> @@ -392,11 +406,6 @@ static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
>   	BUG_ON(!cbt->map);
>   	BUG_ON(!cbt->block_max);
>   
> -	if (!name || memcmp(name, cbt->name, sizeof(cbt->name))) {
> -		mutex_unlock(&cbt_mutex);
> -		return -EINVAL;
> -	}
> -
>   	if (cbt->snp_map) {
>   		mutex_unlock(&cbt_mutex);
>   		return -EBUSY;
> @@ -461,7 +470,7 @@ static int blk_cbt_snap_drop(struct request_queue *q, __u8 *name)
>   	int ret;
>   
>   	mutex_lock(&cbt_mutex);
> -	cbt = q->cbt;
> +	cbt = blk_cbt_find(q, name);
>   
>   	ret = -ENOENT;
>   	if (!cbt)
> @@ -470,10 +479,6 @@ static int blk_cbt_snap_drop(struct request_queue *q, __u8 *name)
>   	BUG_ON(!cbt->map);
>   	BUG_ON(!cbt->block_max);
>   
> -	ret = -EINVAL;
> -	if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
> -		goto out;
> -
>   	ret = -ENODEV;
>   	map = cbt->snp_map;
>   	if (!map)
> @@ -511,7 +516,7 @@ static int blk_cbt_snap_merge_back(struct request_queue *q, __u8 *name)
>   	int ret;
>   
>   	mutex_lock(&cbt_mutex);
> -	cbt = q->cbt;
> +	cbt = blk_cbt_find(q, name);
>   
>   	ret = -ENOENT;
>   	if (!cbt)
> @@ -520,10 +525,6 @@ static int blk_cbt_snap_merge_back(struct request_queue *q, __u8 *name)
>   	BUG_ON(!cbt->map);
>   	BUG_ON(!cbt->block_max);
>   
> -	ret = -EINVAL;
> -	if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
> -		goto out;
> -
>   	map = cbt->snp_map;
>   	block_max = cbt->snp_block_max;
>   	ret = -ENODEV;
> @@ -568,33 +569,21 @@ static int blk_cbt_snap_merge_back(struct request_queue *q, __u8 *name)
>   	return ret;
>   }
>   
> -void blk_cbt_update_size(struct block_device *bdev)
> +static void blk_cbt_update_cbt_size(struct cbt_info *cbt, loff_t new_sz)
>   {
> -	struct request_queue *q;
> -	struct cbt_info *new, *cbt;
> +	struct cbt_info *new;
>   	unsigned long to_cpy, idx;
>   	unsigned bsz;
> -	loff_t new_sz = i_size_read(bdev->bd_inode);
>   	int in_use = 0;
>   
> -	if (!bdev->bd_disk || !bdev_get_queue(bdev))
> -		return;
> -
> -	q = bdev_get_queue(bdev);
> -	mutex_lock(&cbt_mutex);
> -	cbt = q->cbt;
> -	if (!cbt) {
> -		mutex_unlock(&cbt_mutex);
> -		return;
> -	}
>   	bsz = 1 << cbt->block_bits;
>   	if (DIV_ROUND_UP(new_sz, bsz) <= cbt->block_max)
> -		goto err_mtx;
> +		return;
>   
> -	new = do_cbt_alloc(q, cbt->name, new_sz, bsz);
> +	new = do_cbt_alloc(cbt->queue, cbt->name, new_sz, bsz);
>   	if (IS_ERR(new)) {
>   		set_bit(CBT_ERROR, &cbt->flags);
> -		goto err_mtx;
> +		return;
>   	}
>   	to_cpy = NR_PAGES(cbt->block_max);
>   	set_bit(CBT_NOCACHE, &cbt->flags);
> @@ -606,15 +595,27 @@ void blk_cbt_update_size(struct block_device *bdev)
>   		if (CBT_PAGE(new, idx))
>   			get_page(CBT_PAGE(new, idx));
>   	}
> -	rcu_assign_pointer(q->cbt, new);
> +	list_replace_rcu(&cbt->list, &new->list);
>   	in_use = cbt->count;
>   	spin_unlock_irq(&cbt->lock);
>   	if (!in_use)
>   		call_rcu(&cbt->rcu, &cbt_release_callback);
> -err_mtx:
> +}
> +
> +void blk_cbt_update_size(struct block_device *bdev)
> +{
> +	struct request_queue *q;
> +	struct cbt_info *cbt;
> +
> +	if (!bdev->bd_disk || !bdev_get_queue(bdev))
> +		return;
> +
> +	q = bdev_get_queue(bdev);
> +	mutex_lock(&cbt_mutex);
> +	list_for_each_entry(cbt, &q->cbt_list, list)
> +		blk_cbt_update_cbt_size(cbt, i_size_read(bdev->bd_inode));
> +
>   	mutex_unlock(&cbt_mutex);
> -
> -
>   }
>   
>   static int cbt_ioc_init(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc)
> @@ -632,16 +633,13 @@ static int cbt_ioc_init(struct block_device *bdev, struct blk_user_cbt_info __us
>   
>   	q = bdev_get_queue(bdev);
>   	mutex_lock(&cbt_mutex);
> -	if (q->cbt) {
> -		ret = -EBUSY;
> -		goto err_mtx;
> -	}
> +
>   	cbt = do_cbt_alloc(q, ci.ci_name, i_size_read(bdev->bd_inode), ci.ci_blksize);
>   	if (IS_ERR(cbt))
>   		ret = PTR_ERR(cbt);
>   	else
> -		rcu_assign_pointer(q->cbt, cbt);
> -err_mtx:
> +		list_add_tail_rcu(&q->cbt_list, &cbt->list);
> +
>   	mutex_unlock(&cbt_mutex);
>   	return ret;
>   }
> @@ -667,35 +665,46 @@ static void cbt_release_callback(struct rcu_head *head)
>   	kfree(cbt);
>   }
>   
> -void blk_cbt_release(struct request_queue *q)
> +static void blk_cbt_del(struct cbt_info *cbt)
>   {
> -	struct cbt_info *cbt;
> -	int in_use = 0;
>   	unsigned long flags;
> +	int in_use;
>   
> -	cbt = q->cbt;
> -	if (!cbt)
> -		return;
>   	spin_lock_irqsave(&cbt->lock, flags);
>   	set_bit(CBT_DEAD, &cbt->flags);
> -	rcu_assign_pointer(q->cbt, NULL);
> +	list_del_rcu(&cbt->list);
>   	in_use = cbt->count;
>   	spin_unlock_irqrestore(&cbt->lock, flags);
>   	if (!in_use)
>   		call_rcu(&cbt->rcu, &cbt_release_callback);
>   }
>   
> -static int cbt_ioc_stop(struct block_device *bdev)
> +void blk_cbt_release(struct request_queue *q)
>   {
> +	struct cbt_info *cbt, *tmp;
> +
> +	list_for_each_entry_safe(cbt, tmp, &q->cbt_list, list)
> +		blk_cbt_del(cbt);
> +}
> +
> +static int cbt_ioc_stop(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc)
> +{
> +	struct blk_user_cbt_info ci;
>   	struct request_queue *q;
> +	struct cbt_info *cbt;
> +
> +	if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
> +		return -EFAULT;
>   
>   	mutex_lock(&cbt_mutex);
>   	q = bdev_get_queue(bdev);
> -	if(!q->cbt) {
> -		mutex_unlock(&cbt_mutex);
> -		return -EINVAL;
> -	}
> -	blk_cbt_release(q);
> +
> +	cbt = blk_cbt_find(q, ci.ci_name);
> +	if (!cbt)
> +		return -ENOMEM;
> +
> +	blk_cbt_del(cbt);
> +
>   	mutex_unlock(&cbt_mutex);
>   	return 0;
>   }
> @@ -839,7 +848,8 @@ static int cbt_ioc_get(struct block_device *bdev, struct blk_user_cbt_info __use
>   	ret = -EINVAL;
>   	q = bdev_get_queue(bdev);
>   	mutex_lock(&cbt_mutex);
> -	cbt = q->cbt;
> +
> +	cbt = blk_cbt_find(q, ci.ci_name);
>   	if (!cbt ||
>   	    (ci.ci_start >> cbt->block_bits) > cbt->block_max)
>   		goto ioc_get_failed;
> @@ -925,7 +935,7 @@ static int cbt_ioc_set(struct block_device *bdev, struct blk_user_cbt_info __use
>   
>   	ret = -EINVAL;
>   	mutex_lock(&cbt_mutex);
> -	cbt = q->cbt;
> +	cbt = blk_cbt_find(q, ci.ci_name);
>   	if (!cbt)
>   		goto ioc_set_failed;
>   
> @@ -948,8 +958,8 @@ static int cbt_ioc_set(struct block_device *bdev, struct blk_user_cbt_info __use
>   
>   		ex.start  = cur_ex->ce_physical >> cbt->block_bits;
>   		ex.len  = DIV_ROUND_UP(cur_ex->ce_length, 1 << cbt->block_bits);
> -		if (ex.start > q->cbt->block_max ||
> -		    ex.start + ex.len > q->cbt->block_max ||
> +		if (ex.start > cbt->block_max ||
> +		    ex.start + ex.len > cbt->block_max ||
>   		    ex.len == 0) {
>   			ret = -EINVAL;
>   			break;
> @@ -1007,7 +1017,7 @@ int blk_cbt_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
>   	case BLKCBTSTART:
>   		return cbt_ioc_init(bdev, ucbt_ioc);
>   	case BLKCBTSTOP:
> -		return cbt_ioc_stop(bdev);
> +		return cbt_ioc_stop(bdev, ucbt_ioc);
>   	case BLKCBTSET:
>   		return cbt_ioc_set(bdev, ucbt_ioc, 1);
>   	case BLKCBTCLR:
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 6abad29dd501..e3aa923f399e 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -428,6 +428,8 @@ struct request_queue *blk_alloc_queue(int node_id)
>   	init_waitqueue_head(&q->mq_freeze_wq);
>   	mutex_init(&q->mq_freeze_lock);
>   
> +	INIT_LIST_HEAD(&q->cbt_list);
> +
>   	blkg_init_queue(q);
>   
>   	/*
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index fcbc83909b91..3009964707e6 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -520,7 +520,7 @@ struct request_queue {
>   	struct throtl_data *td;
>   #endif
>   #ifdef CONFIG_BLK_DEV_CBT
> -	struct cbt_info *cbt;
> +	struct list_head cbt_list;
>   #endif
>   	struct rcu_head		rcu_head;
>   	wait_queue_head_t	mq_freeze_wq;
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 47efa7ef05a0..667157fe864a 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -170,7 +170,7 @@ struct blk_user_cbt_snap_create {
>   };
>   
>   #define BLKCBTSTART _IOR(0x12,200, struct blk_user_cbt_info)
> -#define BLKCBTSTOP _IO(0x12,201)
> +#define BLKCBTSTOP _IOR(0x12, 201, struct blk_user_cbt_info)
>   #define BLKCBTGET _IOWR(0x12,202,struct blk_user_cbt_info)
>   #define BLKCBTSET _IOR(0x12,203,struct blk_user_cbt_info)
>   #define BLKCBTCLR _IOR(0x12,204,struct blk_user_cbt_info)

-- 
Best regards, Tikhomirov Pavel
Senior Software Developer, Virtuozzo.



More information about the Devel mailing list