[Devel] [PATCH RHEL7 COMMIT] cbt: introduce changed block tracking
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Oct 2 00:54:27 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.8.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.8.2
------>
commit 85f9d6ce47d7975b0b4902835470391aed597bca
Author: Dmitry Monakhov <dmonakhov at openvz.org>
Date: Fri Oct 2 11:54:26 2015 +0400
cbt: introduce changed block tracking
Combined patch list:
diff-cbt-add-changed-block-trace-infrastructure
diff-cbt-fixup-use-after-free-inside-__blk_cbt_set
diff-cbt-use-propper-mem-allocation-context
diff-cbt-support-blockdevice-size-update-v2
diff-cbt-blk_cbt_update_size-add-block_dev-sanity-check
diff-cbt-ignore-device-shrink
diff-block-cbt-fix-mistype-statement
diff-cbt-add-get_once-feature
diff-cbt-fix-bytes-to-block-conversion-bug
diff-cbt-add-missed-mutex_unlock
Only minor context fixes from original patches.
https://jira.sw.ru/browse/PSBM-34156
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
block/Kconfig | 8 +
block/Makefile | 1 +
block/blk-cbt.c | 605 ++++++++++++++++++++++++++++++++++++++++++++++++
block/blk-core.c | 1 +
block/blk-sysfs.c | 1 +
block/ioctl.c | 9 +-
drivers/md/dm.c | 2 +-
fs/block_dev.c | 9 +-
include/linux/blkdev.h | 14 ++
include/linux/fs.h | 1 +
include/uapi/linux/fs.h | 34 +++
11 files changed, 682 insertions(+), 3 deletions(-)
diff --git a/block/Kconfig b/block/Kconfig
index a7e40a7..3d11f0c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -105,6 +105,14 @@ source "block/partitions/Kconfig"
endmenu
+config BLK_DEV_CBT
+ bool "Block layer changed block tracking support"
+ ---help---
+ Block layer changed block tracking support, It can be used to optimize
+ device backup,copy.
+
+ If unsure, say N.
+
endif # BLOCK
config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 21f4618..44f9426 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
+obj-$(CONFIG_BLK_DEV_CBT) += blk-cbt.o
diff --git a/block/blk-cbt.c b/block/blk-cbt.c
new file mode 100644
index 0000000..99d4a76
--- /dev/null
+++ b/block/blk-cbt.c
@@ -0,0 +1,605 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#define CBT_MAX_EXTENTS (UINT_MAX / sizeof(struct blk_user_cbt_extent))
+#define NR_PAGES(bits) (((bits) + PAGE_SIZE*8 - 1) / (PAGE_SIZE*8))
+#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
+
+static __cacheline_aligned_in_smp DEFINE_MUTEX(cbt_mutex);
+
+struct cbt_extent{
+ blkcnt_t start;
+ blkcnt_t len;
+};
+
+struct cbt_info {
+ __u8 uuid[16];
+ struct request_queue *queue;
+ blkcnt_t block_max;
+ blkcnt_t block_bits;
+ unsigned long flags;
+
+ struct rcu_head rcu;
+ unsigned int count;
+ struct cbt_extent __percpu *cache;
+ struct page **map;
+ spinlock_t lock;
+};
+
+
+enum CBT_FLAGS
+{
+ CBT_ERROR = 0,
+ CBT_DEAD = 1,
+ CBT_NOCACHE = 2,
+};
+static void cbt_release_callback(struct rcu_head *head);
+static void cbt_flush_cache(struct cbt_info *cbt);
+
+static inline void spin_lock_page(struct page *page)
+{
+ while(!trylock_page(page))
+ cpu_relax();
+}
+
+static void set_bits(void *bm, int cur, int len, bool is_set)
+{
+ __u32 *addr;
+ __u32 pattern = is_set? 0xffffffff : 0;
+
+ len = cur + len;
+ while (cur < len) {
+ if ((cur & 31) == 0 && (len - cur) >= 32) {
+ /* fast path: set whole word at once */
+ addr = bm + (cur >> 3);
+
+ *addr = pattern;
+ cur += 32;
+ continue;
+ }
+ if (is_set)
+ set_bit(cur, bm);
+ else
+ clear_bit(cur, bm);
+ cur++;
+ }
+}
+
+static int __blk_cbt_set(struct cbt_info *cbt, blkcnt_t block,
+ blkcnt_t count, bool in_rcu, bool set)
+{
+ struct page *page;
+
+ if (unlikely(block + count > cbt->block_max)) {
+ printk("WARN: %s eof access block:%lld, len: %lld, max:%lld\n",
+ __FUNCTION__, (unsigned long long) block,
+ (unsigned long long)count,
+ (unsigned long long)cbt->block_max);
+ set_bit(CBT_ERROR, &cbt->flags);
+ return -EINVAL;
+ }
+
+ while(count) {
+ unsigned long idx = block >> (PAGE_SHIFT + 3);
+ unsigned long off = block & (BITS_PER_PAGE -1);
+ unsigned long len = count & (BITS_PER_PAGE -1);
+
+ if (off + len > BITS_PER_PAGE)
+ len = BITS_PER_PAGE - off;
+ page = rcu_dereference(cbt->map[idx]);
+ if (page) {
+ spin_lock_page(page);
+ set_bits(page_address(page), off, len, set);
+ unlock_page(page);
+ count -= len;
+ block += len;
+ continue;
+ } else {
+ if (!set) {
+ len = count & (BITS_PER_PAGE -1);
+ count -= len;
+ block += len;
+ continue;
+ }
+ }
+ /* Page not allocated yet. Synchronization required */
+ spin_lock_irq(&cbt->lock);
+ if (likely(!test_bit(CBT_DEAD, &cbt->flags))) {
+ cbt->count++;
+ } else {
+ struct cbt_info *new = rcu_dereference(cbt->queue->cbt);
+
+ spin_unlock_irq(&cbt->lock);
+ /* was cbt updated ? */
+ if (new != cbt) {
+ cbt = new;
+ continue;
+ } else {
+ break;
+ }
+ }
+ spin_unlock_irq(&cbt->lock);
+ if (in_rcu)
+ rcu_read_unlock();
+ page = alloc_page(GFP_NOIO|__GFP_ZERO);
+ if (in_rcu)
+ rcu_read_lock();
+ spin_lock_irq(&cbt->lock);
+ if (unlikely(!cbt->count-- && test_bit(CBT_DEAD, &cbt->flags))) {
+ spin_unlock_irq(&cbt->lock);
+ call_rcu(&cbt->rcu, &cbt_release_callback);
+ if (page)
+ __free_page(page);
+ break;
+ }
+ if (unlikely(!page)) {
+ set_bit(CBT_ERROR, &cbt->flags);
+ spin_unlock_irq(&cbt->lock);
+ return -ENOMEM;
+ }
+ cbt->map[idx] = page;
+ page = NULL;
+ spin_unlock_irq(&cbt->lock);
+ }
+ return 0;
+}
+
+static void blk_cbt_add(struct request_queue *q, blkcnt_t start, blkcnt_t len)
+{
+ struct cbt_info *cbt;
+ struct cbt_extent *ex;
+ struct cbt_extent old;
+ blkcnt_t end;
+ /* Check per-cpu cache */
+
+ rcu_read_lock();
+ cbt = rcu_dereference(q->cbt);
+ if (unlikely(!cbt))
+ goto out_rcu;
+
+ if (unlikely(test_bit(CBT_ERROR, &cbt->flags)))
+ goto out_rcu;
+ end = (start + len + (1 << cbt->block_bits) -1) >> cbt->block_bits;
+ start >>= cbt->block_bits;
+ len = end - start;
+ if (unlikely(test_bit(CBT_NOCACHE, &cbt->flags))) {
+ __blk_cbt_set(cbt, start, len, 1, 1);
+ goto out_rcu;
+ }
+ ex = this_cpu_ptr(cbt->cache);
+ if (ex->start + ex->len == start) {
+ ex->len += len;
+ goto out_rcu;
+ }
+ old = *ex;
+ ex->start = start;
+ ex->len = len;
+
+ if (likely(old.len))
+ __blk_cbt_set(cbt, old.start, old.len, 1, 1);
+out_rcu:
+ rcu_read_unlock();
+}
+
+inline void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio)
+{
+ if (!q->cbt || bio_data_dir(bio) == READ || !bio->bi_size)
+ return;
+
+ blk_cbt_add(q, bio->bi_sector << 9, bio->bi_size);
+}
+
+static struct cbt_info* do_cbt_alloc(struct request_queue *q, __u8 *uuid,
+ loff_t size, loff_t blocksize)
+{
+ struct cbt_info *cbt;
+ struct cbt_extent *ex;
+ int i;
+
+
+ cbt = kzalloc(sizeof(*cbt), GFP_KERNEL);
+ if (!cbt)
+ return ERR_PTR(-ENOMEM);
+
+ cbt->block_bits = ilog2(blocksize);
+ cbt->block_max = (size + blocksize) >> cbt->block_bits;
+ spin_lock_init(&cbt->lock);
+ memcpy(cbt->uuid, uuid, sizeof(cbt->uuid));
+ cbt->cache = alloc_percpu(struct cbt_extent);
+ if (!cbt->cache)
+ goto err_cbt;
+
+ for_each_possible_cpu(i) {
+ ex = per_cpu_ptr(cbt->cache, i);
+ memset(ex, 0, sizeof (*ex));
+ }
+
+ cbt->map = vmalloc(NR_PAGES(cbt->block_max) * sizeof(void*));
+ if (!cbt->map)
+ goto err_pcpu;
+
+ memset(cbt->map, 0, NR_PAGES(cbt->block_max) * sizeof(void*));
+ cbt->queue = q;
+ return cbt;
+err_pcpu:
+ free_percpu(cbt->cache);
+err_cbt:
+ kfree(cbt);
+ return ERR_PTR(-ENOMEM);
+}
+
+
+void blk_cbt_update_size(struct block_device *bdev)
+{
+ struct request_queue *q;
+ struct cbt_info *new, *cbt;
+ unsigned long to_cpy, idx;
+ unsigned bsz;
+ loff_t new_sz = i_size_read(bdev->bd_inode);
+ int in_use = 0;
+
+ if (!bdev->bd_disk || !bdev_get_queue(bdev))
+ return;
+
+ q = bdev_get_queue(bdev);
+ mutex_lock(&cbt_mutex);
+ cbt = q->cbt;
+ if (!cbt) {
+ mutex_unlock(&cbt_mutex);
+ return;
+ }
+ bsz = 1 << cbt->block_bits;
+ if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max)
+ goto err_mtx;
+
+ new = do_cbt_alloc(q, cbt->uuid, new_sz, bsz);
+ if (IS_ERR(new)) {
+ set_bit(CBT_ERROR, &cbt->flags);
+ goto err_mtx;
+ }
+ to_cpy = NR_PAGES(new->block_max);
+ set_bit(CBT_NOCACHE, &cbt->flags);
+ cbt_flush_cache(cbt);
+ spin_lock_irq(&cbt->lock);
+ set_bit(CBT_DEAD, &cbt->flags);
+ for (idx = 0; idx < to_cpy; idx++){
+ new->map[idx] = cbt->map[idx];
+ if (new->map[idx])
+ get_page(new->map[idx]);
+ }
+ rcu_assign_pointer(q->cbt, new);
+ in_use = cbt->count;
+ spin_unlock(&cbt->lock);
+ if (!in_use)
+ call_rcu(&cbt->rcu, &cbt_release_callback);
+err_mtx:
+ mutex_unlock(&cbt_mutex);
+
+
+}
+
+static int cbt_ioc_init(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc)
+{
+ struct request_queue *q;
+ struct blk_user_cbt_info ci;
+ struct cbt_info *cbt;
+ int ret = 0;
+
+ if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
+ return -EFAULT;
+
+ if (((ci.ci_blksize -1) & ci.ci_blksize))
+ return -EINVAL;
+
+ q = bdev_get_queue(bdev);
+ mutex_lock(&cbt_mutex);
+ if (q->cbt) {
+ ret = -EBUSY;
+ goto err_mtx;
+ }
+ cbt = do_cbt_alloc(q, ci.ci_uuid, i_size_read(bdev->bd_inode), ci.ci_blksize);
+ if (IS_ERR(cbt))
+ ret = PTR_ERR(cbt);
+ else
+ rcu_assign_pointer(q->cbt, cbt);
+err_mtx:
+ mutex_unlock(&cbt_mutex);
+ return ret;
+}
+
+static void cbt_release_callback(struct rcu_head *head)
+{
+ struct cbt_info *cbt;
+ int nr_pages, i;
+
+ cbt = container_of(head, struct cbt_info, rcu);
+ nr_pages = NR_PAGES(cbt->block_max);
+ for (i = 0; i < nr_pages; i++)
+ if (cbt->map[i])
+ __free_page(cbt->map[i]);
+
+ vfree(cbt->map);
+ free_percpu(cbt->cache);
+ kfree(cbt);
+}
+
+void blk_cbt_release(struct request_queue *q)
+{
+ struct cbt_info *cbt;
+ int in_use = 0;
+
+ cbt = q->cbt;
+ if (!cbt)
+ return;
+ spin_lock(&cbt->lock);
+ set_bit(CBT_DEAD, &cbt->flags);
+ rcu_assign_pointer(q->cbt, NULL);
+ in_use = cbt->count;
+ spin_unlock(&cbt->lock);
+ if (in_use)
+ call_rcu(&cbt->rcu, &cbt_release_callback);
+}
+
+static int cbt_ioc_stop(struct block_device *bdev)
+{
+ struct request_queue *q;
+
+ mutex_lock(&cbt_mutex);
+ q = bdev_get_queue(bdev);
+ if(!q->cbt) {
+ mutex_unlock(&cbt_mutex);
+ return -EINVAL;
+ }
+ blk_cbt_release(q);
+ mutex_unlock(&cbt_mutex);
+ return 0;
+}
+
+static inline void __cbt_flush_cpu_cache(void *ptr)
+{
+ struct cbt_info *cbt = (struct cbt_info *) ptr;
+ struct cbt_extent *ex = this_cpu_ptr(cbt->cache);
+
+ if (ex->len) {
+ __blk_cbt_set(cbt, ex->start, ex->len, 0, 1);
+ ex->start += ex->len;
+ ex->len = 0;
+ }
+}
+
+static void cbt_flush_cache(struct cbt_info *cbt)
+{
+ on_each_cpu(__cbt_flush_cpu_cache, cbt, 1);
+}
+
+static void cbt_find_next_extent(struct cbt_info *cbt, blkcnt_t block, struct cbt_extent *ex)
+{
+ unsigned long off, off2, idx;
+ struct page *page;
+ bool found = 0;
+
+ ex->start = cbt->block_max;
+ ex->len = 0;
+
+ idx = block >> (PAGE_SHIFT + 3);
+ while (block < cbt->block_max) {
+ off = block & (BITS_PER_PAGE -1);
+ page = rcu_dereference(cbt->map[idx]);
+ if (!page) {
+ if (found)
+ break;
+ goto next;
+ }
+ spin_lock_page(page);
+ /* Find extent start */
+ if (!found) {
+ ex->start = find_next_bit(page_address(page), BITS_PER_PAGE, off);
+ if (ex->start != BITS_PER_PAGE) {
+ off = ex->start;
+ ex->start += idx << (PAGE_SHIFT + 3);
+ found = 1;
+ } else {
+ unlock_page(page);
+ goto next;
+ }
+ }
+ if (found) {
+ off2 = find_next_zero_bit(page_address(page), BITS_PER_PAGE, off);
+ ex->len += off2 - off;
+ if (off2 != BITS_PER_PAGE) {
+ unlock_page(page);
+ break;
+ }
+ }
+ unlock_page(page);
+ next:
+ idx++;
+ block = idx << (PAGE_SHIFT + 3);
+ continue;
+ }
+}
+
+static int cbt_ioc_get(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc)
+{
+ struct request_queue *q;
+ struct blk_user_cbt_info ci;
+ struct blk_user_cbt_extent __user *cur_u_ex;
+ struct blk_user_cbt_extent u_ex;
+ struct cbt_info *cbt;
+ struct cbt_extent ex;
+ blkcnt_t block , end;
+ int ret = 0;
+
+ if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
+ return -EFAULT;
+ if (ci.ci_flags & ~CI_FLAG_ONCE)
+ return -EINVAL;
+ if (ci.ci_extent_count > CBT_MAX_EXTENTS)
+ return -EINVAL;
+
+ cur_u_ex = (struct blk_user_cbt_extent __user*)
+ ((char *)ucbt_ioc + sizeof(struct blk_user_cbt_info));
+
+ if (ci.ci_extent_count != 0 &&
+ !access_ok(VERIFY_WRITE, cur_u_ex,
+ ci.ci_extent_count * sizeof(struct blk_user_cbt_extent))){
+ return -EFAULT;
+ }
+ q = bdev_get_queue(bdev);
+ mutex_lock(&cbt_mutex);
+ cbt = q->cbt;
+ if (!cbt) {
+ mutex_unlock(&cbt_mutex);
+ return -EINVAL;
+ }
+ if ((ci.ci_start >> cbt->block_bits) > cbt->block_max) {
+ mutex_unlock(&cbt_mutex);
+ return -EINVAL;
+ }
+ if (test_bit(CBT_ERROR, &cbt->flags)) {
+ mutex_unlock(&cbt_mutex);
+ return -EIO;
+ }
+ cbt_flush_cache(cbt);
+
+ memcpy(&ci.ci_uuid, cbt->uuid, sizeof(cbt->uuid));
+ ci.ci_blksize = 1UL << cbt->block_bits;
+ block = ci.ci_start >> cbt->block_bits;
+ end = (ci.ci_start + ci.ci_length) >> cbt->block_bits;
+ if (end > cbt->block_max)
+ end = cbt->block_max;
+
+ while (ci.ci_mapped_extents < ci.ci_extent_count) {
+ cbt_find_next_extent(cbt, block, &ex);
+ if (!ex.len || ex.start > end) {
+ ret = 0;
+ break;
+ }
+ u_ex.ce_physical = ex.start << cbt->block_bits;
+ u_ex.ce_length = ex.len << cbt->block_bits;
+ if (copy_to_user(cur_u_ex, &u_ex, sizeof(u_ex))) {
+ ret = -EFAULT;
+ break;
+ }
+ if (ci.ci_flags & CI_FLAG_ONCE)
+ __blk_cbt_set(cbt, ex.start, ex.len, 0, 0);
+ cur_u_ex++;
+ ci.ci_mapped_extents++;
+ block = ex.start + ex.len;
+ }
+ mutex_unlock(&cbt_mutex);
+ if (!ret && copy_to_user(ucbt_ioc, &ci, sizeof(ci)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int cbt_ioc_set(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc, bool set)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct cbt_info *cbt;
+ struct blk_user_cbt_info ci;
+ struct blk_user_cbt_extent __user u_ex, *cur_u_ex, *end;
+ int ret = 0;
+
+ if (copy_from_user(&ci, ucbt_ioc, sizeof(ci)))
+ return -EFAULT;
+ if (ci.ci_extent_count > CBT_MAX_EXTENTS)
+ return -EINVAL;
+ if (ci.ci_extent_count < ci.ci_mapped_extents)
+ return -EINVAL;
+
+ cur_u_ex = (struct blk_user_cbt_extent __user*)
+ ((char *)ucbt_ioc + sizeof(struct blk_user_cbt_info));
+ end = cur_u_ex + ci.ci_mapped_extents;
+ if (!access_ok(VERIFY_READ, cur_u_ex,
+ ci.ci_mapped_extents * sizeof(struct blk_user_cbt_extent)))
+ return -EFAULT;
+
+ mutex_lock(&cbt_mutex);
+ cbt = q->cbt;
+ if (!cbt) {
+ mutex_unlock(&cbt_mutex);
+ return -EINVAL;
+ }
+ if (ci.ci_flags & CI_FLAG_NEW_UUID)
+ memcpy(cbt->uuid, &ci.ci_uuid, sizeof(ci.ci_uuid));
+ else if (memcmp(cbt->uuid, &ci.ci_uuid, sizeof(ci.ci_uuid))) {
+ mutex_unlock(&cbt_mutex);
+ return -EINVAL;
+ }
+ if (test_bit(CBT_ERROR, &cbt->flags)) {
+ mutex_unlock(&cbt_mutex);
+ return -EIO;
+ }
+
+ /* Do not care about pcpu caches on set, only in case of clear */
+ if (!set)
+ cbt_flush_cache(cbt);
+
+ while (cur_u_ex < end) {
+ struct cbt_extent ex;
+
+ if (copy_from_user(&u_ex, cur_u_ex, sizeof(u_ex))) {
+ ret = -EFAULT;
+ break;
+ }
+ ex.start = u_ex.ce_physical >> cbt->block_bits;
+ ex.len = (u_ex.ce_length + (1 << cbt->block_bits) -1) >> cbt->block_bits;
+ if (ex.start > q->cbt->block_max ||
+ ex.start + ex.len > q->cbt->block_max ||
+ ex.len == 0) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = __blk_cbt_set(cbt, ex.start, ex.len, 0, set);
+ if (ret)
+ break;
+ cur_u_ex++;
+ }
+ mutex_unlock(&cbt_mutex);
+ return ret;
+}
+
+int blk_cbt_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+{
+ struct blk_user_cbt_info __user *ucbt_ioc = (struct blk_user_cbt_info __user *) arg;
+
+ switch(cmd) {
+ case BLKCBTSTART:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ return cbt_ioc_init(bdev, ucbt_ioc);
+ case BLKCBTSTOP:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ return cbt_ioc_stop(bdev);
+ case BLKCBTGET:
+ return cbt_ioc_get(bdev, ucbt_ioc);
+ case BLKCBTSET:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ return cbt_ioc_set(bdev, ucbt_ioc, 1);
+ case BLKCBTCLR:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ return cbt_ioc_set(bdev, ucbt_ioc, 0);
+ default:
+ BUG();
+ }
+ return -ENOTTY;
+}
diff --git a/block/blk-core.c b/block/blk-core.c
index 66f7be3..08ac4d3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1845,6 +1845,7 @@ generic_make_request_checks(struct bio *bio)
return false; /* throttled, will be resubmitted later */
trace_block_bio_queue(q, bio);
+ blk_cbt_bio_queue(q, bio);
return true;
end_io:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 10d2058..21bc0c7 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -551,6 +551,7 @@ static void blk_release_queue(struct kobject *kobj)
kfree(q->flush_rq);
blk_trace_shutdown(q);
+ blk_cbt_release(q);
bdi_destroy(&q->backing_dev_info);
diff --git a/block/ioctl.c b/block/ioctl.c
index 93a9fdc..c56168b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -139,7 +139,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
}
disk_part_iter_exit(&piter);
part_nr_sects_write(part, (sector_t)length);
- i_size_write(bdevp->bd_inode, p.length);
+ bd_write_size(bdevp, p.length);
mutex_unlock(&bdevp->bd_mutex);
mutex_unlock(&bdev->bd_mutex);
bdput(bdevp);
@@ -437,6 +437,13 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACETEARDOWN:
ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg);
break;
+ case BLKCBTSTART:
+ case BLKCBTSTOP:
+ case BLKCBTGET:
+ case BLKCBTSET:
+ case BLKCBTCLR:
+ ret = blk_cbt_ioctl(bdev, cmd, (char __user *)arg);
+ break;
default:
ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6bf6815..aca38b5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2338,7 +2338,7 @@ static void __set_size(struct mapped_device *md, sector_t size)
{
set_capacity(md->disk, size);
- i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
+ bd_write_size(md->bdev, (loff_t)size << SECTOR_SHIFT);
}
/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e6a2837..a22f439 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1037,12 +1037,19 @@ int check_disk_change(struct block_device *bdev)
EXPORT_SYMBOL(check_disk_change);
+void bd_write_size(struct block_device *bdev, loff_t size)
+{
+ i_size_write(bdev->bd_inode, size);
+ blk_cbt_update_size(bdev);
+}
+EXPORT_SYMBOL(bd_write_size);
+
void bd_set_size(struct block_device *bdev, loff_t size)
{
unsigned bsize = bdev_logical_block_size(bdev);
mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode, size);
+ bd_write_size(bdev, size);
mutex_unlock(&bdev->bd_inode->i_mutex);
while (bsize < PAGE_CACHE_SIZE) {
if (size & bsize)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bea378b..2a05818 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -501,6 +501,9 @@ struct request_queue {
/* Throttle data */
struct throtl_data *td;
#endif
+#ifdef CONFIG_BLK_DEV_CBT
+ struct cbt_info *cbt;
+#endif
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
struct percpu_counter mq_usage_counter;
@@ -1637,6 +1640,17 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#if defined (CONFIG_BLK_DEV_CBT)
+extern void blk_cbt_update_size(struct block_device *bdev);
+extern void blk_cbt_release(struct request_queue *q);
+extern void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio);
+extern int blk_cbt_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg);
+#else /* CONFIG_BLK_DEV_CBT */
+#define blk_cbt_update_size(b) (0)
+#define blk_cbt_release(q) (0)
+#define blk_cbt_bio_queue(q,bio) (0)
+#define blk_cbt_ioctl(b,c,a) (-ENOTTY)
+#endif /* CONFIG_BLK_DEV_CBT */
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 553bca3..7e7bd3f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2370,6 +2370,7 @@ extern int register_blkdev(unsigned int, const char *);
extern void unregister_blkdev(unsigned int, const char *);
extern struct block_device *bdget(dev_t);
extern struct block_device *bdgrab(struct block_device *bdev);
+extern void bd_write_size(struct block_device *, loff_t size);
extern void bd_set_size(struct block_device *, loff_t size);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *);
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 9b964a5..359bf02 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -153,6 +153,40 @@ struct inodes_stat_t {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
+/* Hole from 127..199 */
+struct blk_user_cbt_extent {
+ __u64 ce_physical; /* physical offset in bytes for the start
+ * of the extent from the beginning of the disk */
+ __u64 ce_length; /* length in bytes for this extent */
+ __u64 ce_reserved64[1];
+};
+
+struct blk_user_cbt_info {
+ __u8 ci_uuid[16]; /* Bitmap UUID */
+ __u64 ci_start; /* start phisical range of mapping which
+ userspace wants (in) */
+ __u64 ci_length; /* phisical length of mapping which
+ * userspace wants (in) */
+ __u32 ci_blksize; /* cbt logical block size */
+ __u32 ci_flags; /* CI_FLAG_* flags for request (in/out) */
+ __u32 ci_mapped_extents;/* number of extents that were mapped (out) */
+ __u32 ci_extent_count; /* size of fm_extents array (in) */
+ __u32 ci_reserved;
+ struct blk_user_cbt_extent ci_extents[0]; /* array of mapped extents (out) */
+};
+
+enum CI_FLAGS
+{
+ CI_FLAG_ONCE = 1, /* BLKCBTGET will clear bits */
+ CI_FLAG_NEW_UUID = 2 /* BLKCBTSET update uuid */
+};
+
+#define BLKCBTSTART _IOR(0x12,200, struct blk_user_cbt_info)
+#define BLKCBTSTOP _IO(0x12,201)
+#define BLKCBTGET _IOWR(0x12,202,struct blk_user_cbt_info)
+#define BLKCBTSET _IOR(0x12,203,struct blk_user_cbt_info)
+#define BLKCBTCLR _IOR(0x12,204,struct blk_user_cbt_info)
+
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
More information about the Devel
mailing list