[Devel] [PATCH VZ9 2/3] block/blk-cbt: allow multiple cbts in a single queue

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Mon Jan 20 11:34:05 MSK 2025


Store cbts in a list instead of a single pointer.
Update all APIs to work with user-specified bitmap.

https://virtuozzo.atlassian.net/browse/VSTOR-96269
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 block/blk-cbt.c         | 174 +++++++++++++++++++++-------------------
 block/blk-core.c        |   2 +
 include/linux/blkdev.h  |   2 +-
 include/uapi/linux/fs.h |   2 +-
 4 files changed, 96 insertions(+), 84 deletions(-)

diff --git a/block/blk-cbt.c b/block/blk-cbt.c
index 8ae993143098..8b0d07ee757c 100644
--- a/block/blk-cbt.c
+++ b/block/blk-cbt.c
@@ -53,6 +53,7 @@ struct cbt_info {
 	blkcnt_t snp_block_max;
 
 	spinlock_t lock;
+	struct list_head list;
 };
 
 
@@ -112,7 +113,11 @@ static int cbt_page_alloc(struct cbt_info  **cbt_pp, unsigned long idx,
 	if (likely(!test_bit(CBT_DEAD, &cbt->flags))) {
 		cbt->count++;
 	} else {
-		struct cbt_info *new = rcu_dereference(cbt->queue->cbt);
+		struct cbt_info *i, *new = NULL;
+
+		list_for_each_entry_rcu(i, &cbt->queue->cbt_list, list)
+			if (!memcmp(i->name, cbt->name, sizeof(cbt->name)))
+				new = i;
 
 		spin_unlock_irq(&cbt->lock);
 		/* was cbt updated ? */
@@ -216,33 +221,26 @@ static int __blk_cbt_set(struct cbt_info  *cbt, blkcnt_t block,
 	return (pages_missed && *pages_missed) ? -EAGAIN : 0;
 }
 
-static void blk_cbt_add(struct request_queue *q, blkcnt_t start, blkcnt_t len)
+static void blk_cbt_add(struct cbt_info *cbt, blkcnt_t start, blkcnt_t len)
 {
-	struct cbt_info *cbt;
 	struct cbt_extent *ex;
 	struct cbt_extent old;
 	blkcnt_t end;
-	/* Check per-cpu cache */
-
-	rcu_read_lock();
-	cbt = rcu_dereference(q->cbt);
-	if (unlikely(!cbt))
-		goto out_rcu;
 
 	if (unlikely(test_bit(CBT_ERROR, &cbt->flags)))
-		goto out_rcu;
+		return;
 	end = DIV_ROUND_UP(start + len, 1 << cbt->block_bits);
 	start >>= cbt->block_bits;
 	len = end - start;
 	if (unlikely(test_bit(CBT_NOCACHE, &cbt->flags))) {
 		__blk_cbt_set(cbt, start, len, 1, 1, NULL, NULL);
-		goto out_rcu;
+		return;
 	}
 	ex = get_cpu_ptr(cbt->cache);
 	if (ex->start + ex->len == start) {
 		ex->len += len;
 		put_cpu_ptr(cbt->cache);
-		goto out_rcu;
+		return;
 	}
 	old = *ex;
 	ex->start = start;
@@ -251,18 +249,23 @@ static void blk_cbt_add(struct request_queue *q, blkcnt_t start, blkcnt_t len)
 
 	if (likely(old.len))
 		__blk_cbt_set(cbt, old.start, old.len, 1, 1, NULL, NULL);
+}
+
+void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
+{
+	struct cbt_info *cbt;
+
+	rcu_read_lock();
+	if (list_empty(&q->cbt_list) || bio_data_dir(bio) == READ || !bio->bi_iter.bi_size)
+		goto out_rcu;
+
+	list_for_each_entry_rcu(cbt, &q->cbt_list, list)
+		blk_cbt_add(cbt, bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
+
 out_rcu:
 	rcu_read_unlock();
 }
 
-inline void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
-{
-	if (!q->cbt || bio_data_dir(bio) == READ || !bio->bi_iter.bi_size)
-		return;
-
-	blk_cbt_add(q, bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
-}
-
 static struct cbt_info *do_cbt_alloc(struct request_queue *q, __u8 *name,
 				     loff_t size, loff_t blocksize)
 {
@@ -272,6 +275,7 @@ static struct cbt_info *do_cbt_alloc(struct request_queue *q, __u8 *name,
 	if (!cbt)
 		return ERR_PTR(-ENOMEM);
 
+	INIT_LIST_HEAD(&cbt->list);
 	cbt->block_bits = ilog2(blocksize);
 	cbt->block_max  = DIV_ROUND_UP(size, blocksize);
 	spin_lock_init(&cbt->lock);
@@ -365,6 +369,17 @@ static int copy_cbt_to_user(struct page **map, unsigned long size,
         return 0;
 }
 
+static struct cbt_info *blk_cbt_find(struct request_queue *q, __u8 *name)
+{
+	struct cbt_info *cbt;
+
+	list_for_each_entry(cbt, &q->cbt_list, list)
+		if (!memcmp(name, cbt->name, CBT_NAME_LENGTH))
+			return cbt;
+
+	return NULL;
+}
+
 static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
 			       struct blk_user_cbt_snap_create __user *arg)
 {
@@ -382,8 +397,7 @@ static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
 		return -EFAULT;
 
 	mutex_lock(&cbt_mutex);
-	cbt = q->cbt;
-
+	cbt = blk_cbt_find(q, name);
 	if (!cbt) {
 		mutex_unlock(&cbt_mutex);
 		return -ENOENT;
@@ -392,11 +406,6 @@ static int blk_cbt_snap_create(struct request_queue *q, __u8 *name,
 	BUG_ON(!cbt->map);
 	BUG_ON(!cbt->block_max);
 
-	if (!name || memcmp(name, cbt->name, sizeof(cbt->name))) {
-		mutex_unlock(&cbt_mutex);
-		return -EINVAL;
-	}
-
 	if (cbt->snp_map) {
 		mutex_unlock(&cbt_mutex);
 		return -EBUSY;
@@ -461,7 +470,7 @@ static int blk_cbt_snap_drop(struct request_queue *q, __u8 *name)
 	int ret;
 
 	mutex_lock(&cbt_mutex);
-	cbt = q->cbt;
+	cbt = blk_cbt_find(q, name);
 
 	ret = -ENOENT;
 	if (!cbt)
@@ -470,10 +479,6 @@ static int blk_cbt_snap_drop(struct request_queue *q, __u8 *name)
 	BUG_ON(!cbt->map);
 	BUG_ON(!cbt->block_max);
 
-	ret = -EINVAL;
-	if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
-		goto out;
-
 	ret = -ENODEV;
 	map = cbt->snp_map;
 	if (!map)
@@ -511,7 +516,7 @@ static int blk_cbt_snap_merge_back(struct request_queue *q, __u8 *name)
 	int ret;
 
 	mutex_lock(&cbt_mutex);
-	cbt = q->cbt;
+	cbt = blk_cbt_find(q, name);
 
 	ret = -ENOENT;
 	if (!cbt)
@@ -520,10 +525,6 @@ static int blk_cbt_snap_merge_back(struct request_queue *q, __u8 *name)
 	BUG_ON(!cbt->map);
 	BUG_ON(!cbt->block_max);
 
-	ret = -EINVAL;
-	if (!name || memcmp(name, cbt->name, sizeof(cbt->name)))
-		goto out;
-
 	map = cbt->snp_map;
 	block_max = cbt->snp_block_max;
 	ret = -ENODEV;
@@ -568,33 +569,21 @@ static int blk_cbt_snap_merge_back(struct request_queue *q, __u8 *name)
 	return ret;
 }
 
-void blk_cbt_update_size(struct block_device *bdev)
+static void blk_cbt_update_cbt_size(struct cbt_info *cbt, loff_t new_sz)
 {
-	struct request_queue *q;
-	struct cbt_info *new, *cbt;
+	struct cbt_info *new;
 	unsigned long to_cpy, idx;
 	unsigned bsz;
-	loff_t new_sz = i_size_read(bdev->bd_inode);
 	int in_use = 0;
 
-	if (!bdev->bd_disk || !bdev_get_queue(bdev))
-		return;
-
-	q = bdev_get_queue(bdev);
-	mutex_lock(&cbt_mutex);
-	cbt = q->cbt;
-	if (!cbt) {
-		mutex_unlock(&cbt_mutex);
-		return;
-	}
 	bsz = 1 << cbt->block_bits;
 	if (DIV_ROUND_UP(new_sz, bsz) <= cbt->block_max)
-		goto err_mtx;
+		return;
 
-	new = do_cbt_alloc(q, cbt->name, new_sz, bsz);
+	new = do_cbt_alloc(cbt->queue, cbt->name, new_sz, bsz);
 	if (IS_ERR(new)) {
 		set_bit(CBT_ERROR, &cbt->flags);
-		goto err_mtx;
+		return;
 	}
 	to_cpy = NR_PAGES(cbt->block_max);
 	set_bit(CBT_NOCACHE, &cbt->flags);
@@ -606,15 +595,27 @@ void blk_cbt_update_size(struct block_device *bdev)
 		if (CBT_PAGE(new, idx))
 			get_page(CBT_PAGE(new, idx));
 	}
-	rcu_assign_pointer(q->cbt, new);
+	list_replace_rcu(&cbt->list, &new->list);
 	in_use = cbt->count;
 	spin_unlock_irq(&cbt->lock);
 	if (!in_use)
 		call_rcu(&cbt->rcu, &cbt_release_callback);
-err_mtx:
+}
+
+void blk_cbt_update_size(struct block_device *bdev)
+{
+	struct request_queue *q;
+	struct cbt_info *cbt;
+
+	if (!bdev->bd_disk || !bdev_get_queue(bdev))
+		return;
+
+	q = bdev_get_queue(bdev);
+	mutex_lock(&cbt_mutex);
+	list_for_each_entry(cbt, &q->cbt_list, list)
+		blk_cbt_update_cbt_size(cbt, i_size_read(bdev->bd_inode));
+
 	mutex_unlock(&cbt_mutex);
-
-
 }
 
 static int cbt_ioc_init(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc)
@@ -632,16 +633,13 @@ static int cbt_ioc_init(struct block_device *bdev, struct blk_user_cbt_info __us
 
 	q = bdev_get_queue(bdev);
 	mutex_lock(&cbt_mutex);
-	if (q->cbt) {
-		ret = -EBUSY;
-		goto err_mtx;
-	}
+
 	cbt = do_cbt_alloc(q, ci.ci_name, i_size_read(bdev->bd_inode), ci.ci_blksize);
 	if (IS_ERR(cbt))
 		ret = PTR_ERR(cbt);
 	else
-		rcu_assign_pointer(q->cbt, cbt);
-err_mtx:
+		list_add_tail_rcu(&q->cbt_list, &cbt->list);
+
 	mutex_unlock(&cbt_mutex);
 	return ret;
 }
@@ -667,35 +665,46 @@ static void cbt_release_callback(struct rcu_head *head)
 	kfree(cbt);
 }
 
-void blk_cbt_release(struct request_queue *q)
+static void blk_cbt_del(struct cbt_info *cbt)
 {
-	struct cbt_info *cbt;
-	int in_use = 0;
 	unsigned long flags;
+	int in_use;
 
-	cbt = q->cbt;
-	if (!cbt)
-		return;
 	spin_lock_irqsave(&cbt->lock, flags);
 	set_bit(CBT_DEAD, &cbt->flags);
-	rcu_assign_pointer(q->cbt, NULL);
+	list_del_rcu(&cbt->list);
 	in_use = cbt->count;
 	spin_unlock_irqrestore(&cbt->lock, flags);
 	if (!in_use)
 		call_rcu(&cbt->rcu, &cbt_release_callback);
 }
 
-static int cbt_ioc_stop(struct block_device *bdev)
+void blk_cbt_release(struct request_queue *q)
 {
+	struct cbt_info *cbt, *tmp;
+
+	list_for_each_entry_safe(cbt, tmp, &q->cbt_list, list)
+		blk_cbt_del(cbt);
+}
+
+static int cbt_ioc_stop(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc)
+{
+	struct blk_user_cbt_info ci;
 	struct request_queue *q;
+	struct cbt_info *cbt;
+
+	if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
+		return -EFAULT;
 
 	mutex_lock(&cbt_mutex);
 	q = bdev_get_queue(bdev);
-	if(!q->cbt) {
-		mutex_unlock(&cbt_mutex);
-		return -EINVAL;
-	}
-	blk_cbt_release(q);
+
+	cbt = blk_cbt_find(q, ci.ci_name);
+	if (!cbt)
+		return -ENOMEM;
+
+	blk_cbt_del(cbt);
+
 	mutex_unlock(&cbt_mutex);
 	return 0;
 }
@@ -839,7 +848,8 @@ static int cbt_ioc_get(struct block_device *bdev, struct blk_user_cbt_info __use
 	ret = -EINVAL;
 	q = bdev_get_queue(bdev);
 	mutex_lock(&cbt_mutex);
-	cbt = q->cbt;
+
+	cbt = blk_cbt_find(q, ci.ci_name);
 	if (!cbt ||
 	    (ci.ci_start >> cbt->block_bits) > cbt->block_max)
 		goto ioc_get_failed;
@@ -925,7 +935,7 @@ static int cbt_ioc_set(struct block_device *bdev, struct blk_user_cbt_info __use
 
 	ret = -EINVAL;
 	mutex_lock(&cbt_mutex);
-	cbt = q->cbt;
+	cbt = blk_cbt_find(q, ci.ci_name);
 	if (!cbt)
 		goto ioc_set_failed;
 
@@ -948,8 +958,8 @@ static int cbt_ioc_set(struct block_device *bdev, struct blk_user_cbt_info __use
 
 		ex.start  = cur_ex->ce_physical >> cbt->block_bits;
 		ex.len  = DIV_ROUND_UP(cur_ex->ce_length, 1 << cbt->block_bits);
-		if (ex.start > q->cbt->block_max ||
-		    ex.start + ex.len > q->cbt->block_max ||
+		if (ex.start > cbt->block_max ||
+		    ex.start + ex.len > cbt->block_max ||
 		    ex.len == 0) {
 			ret = -EINVAL;
 			break;
@@ -1007,7 +1017,7 @@ int blk_cbt_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 	case BLKCBTSTART:
 		return cbt_ioc_init(bdev, ucbt_ioc);
 	case BLKCBTSTOP:
-		return cbt_ioc_stop(bdev);
+		return cbt_ioc_stop(bdev, ucbt_ioc);
 	case BLKCBTSET:
 		return cbt_ioc_set(bdev, ucbt_ioc, 1);
 	case BLKCBTCLR:
diff --git a/block/blk-core.c b/block/blk-core.c
index 6abad29dd501..e3aa923f399e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -428,6 +428,8 @@ struct request_queue *blk_alloc_queue(int node_id)
 	init_waitqueue_head(&q->mq_freeze_wq);
 	mutex_init(&q->mq_freeze_lock);
 
+	INIT_LIST_HEAD(&q->cbt_list);
+
 	blkg_init_queue(q);
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fcbc83909b91..3009964707e6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -520,7 +520,7 @@ struct request_queue {
 	struct throtl_data *td;
 #endif
 #ifdef CONFIG_BLK_DEV_CBT
-	struct cbt_info *cbt;
+	struct list_head cbt_list;
 #endif
 	struct rcu_head		rcu_head;
 	wait_queue_head_t	mq_freeze_wq;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 47efa7ef05a0..667157fe864a 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -170,7 +170,7 @@ struct blk_user_cbt_snap_create {
 };
 
 #define BLKCBTSTART _IOR(0x12,200, struct blk_user_cbt_info)
-#define BLKCBTSTOP _IO(0x12,201)
+#define BLKCBTSTOP _IOR(0x12, 201, struct blk_user_cbt_info)
 #define BLKCBTGET _IOWR(0x12,202,struct blk_user_cbt_info)
 #define BLKCBTSET _IOR(0x12,203,struct blk_user_cbt_info)
 #define BLKCBTCLR _IOR(0x12,204,struct blk_user_cbt_info)
-- 
2.39.3



More information about the Devel mailing list