[Devel] [PATCH RH8] dm: Add dm-tracking target
Kirill Tkhai
ktkhai at virtuozzo.com
Fri Sep 17 13:50:08 MSK 2021
https://jira.sw.ru/browse/PSBM-129845
On 17.09.2021 13:46, Kirill Tkhai wrote:
> This is driver to track changed clusters on migration.
>
> Create:
> $dmsetup create dmt --table "0 <dev_size_sectors> tracking <clu_size_sectors> <origin_dev>"
>
> Start:
> $dmsetup message dmt 0 tracking_start
> (device dmt must be suspended)
>
> Get next cluster:
> $dmsetup message dmt 0 tracking_get_next
>
> Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> drivers/md/Kconfig | 6 +
> drivers/md/Makefile | 2
> drivers/md/dm-tracking.c | 342 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 350 insertions(+)
> create mode 100644 drivers/md/dm-tracking.c
>
> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> index 04db1fdb60f4..d5c6ff8cc848 100644
> --- a/drivers/md/Kconfig
> +++ b/drivers/md/Kconfig
> @@ -566,6 +566,12 @@ config DM_PUSH_BACKUP
> ---help---
> Virtuozzo push backup driver.
>
> +config DM_TRACKING
> + tristate "Tracking target support"
> + depends on BLK_DEV_DM
> + ---help---
> + Virtuozzo migration tracking driver.
> +
> config DM_ZONED
> tristate "Drive-managed zoned block device target support"
> depends on BLK_DEV_DM
> diff --git a/drivers/md/Makefile b/drivers/md/Makefile
> index 00f9fa8fc10d..aa13baaa27f9 100644
> --- a/drivers/md/Makefile
> +++ b/drivers/md/Makefile
> @@ -21,6 +21,7 @@ dm-era-y += dm-era-target.o
> ploop-y += dm-ploop-target.o dm-ploop-map.o dm-ploop-cmd.o \
> dm-ploop-bat.o
> push-backup-y += dm-push-backup.o
> +dm-tracking-y += dm-tracking.o
> dm-qcow2-y += dm-qcow2-target.o dm-qcow2-map.o dm-qcow2-cmd.o
> dm-verity-y += dm-verity-target.o
> md-mod-y += md.o md-bitmap.o
> @@ -71,6 +72,7 @@ obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
> obj-$(CONFIG_DM_ERA) += dm-era.o
> obj-$(CONFIG_DM_PLOOP) += ploop.o
> obj-$(CONFIG_DM_PUSH_BACKUP) += push-backup.o
> +obj-$(CONFIG_DM_TRACKING) += dm-tracking.o
> obj-$(CONFIG_DM_QCOW2) += dm-qcow2.o
> obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
> obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
> diff --git a/drivers/md/dm-tracking.c b/drivers/md/dm-tracking.c
> new file mode 100644
> index 000000000000..c49e79cf1514
> --- /dev/null
> +++ b/drivers/md/dm-tracking.c
> @@ -0,0 +1,342 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * drivers/md/dm-tracking.c
> + *
> + * Copyright (c) 2020-2021 Virtuozzo International GmbH. All rights reserved.
> + *
> + */
> +
> +#include "dm.h"
> +#include "dm-rq.h"
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/vmalloc.h>
> +#include <linux/ctype.h>
> +#include <linux/dm-io.h>
> +#include <linux/blk-mq.h>
> +
> +
> +#define DM_MSG_PREFIX "dm-tracking"
> +
> +struct dm_tracking {
> + struct dm_target *ti;
> + struct dm_dev *origin_dev;
> +
> + u32 clu_size;
> + u64 nr_clus;
> +
> + u64 cursor;
> + void *bitmap;
> +
> + spinlock_t lock;
> + struct mutex ctl_mutex;
> + bool suspended;
> +};
> +
> +static sector_t get_dev_size(struct dm_dev *dev)
> +{
> + return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
> +}
> +
> +static void track_rq_clus(struct dm_tracking *dmt, struct request *rq)
> +{
> + loff_t off = to_bytes(blk_rq_pos(rq));
> + u64 start_clu, end_clu, clu;
> +
> + start_clu = off / dmt->clu_size;
> + end_clu = (off + blk_rq_bytes(rq) - 1) / dmt->clu_size;
> +
> + for (clu = start_clu; clu <= end_clu; clu++) {
> + set_bit(clu, dmt->bitmap);
> + if (clu == U64_MAX)
> + break;
> + }
> +}
> +
> +static int dmt_clone_and_map(struct dm_target *ti, struct request *rq,
> + union map_info *map_context,
> + struct request **__clone)
> +
> +{
> + struct dm_tracking *dmt = ti->private;
> + struct block_device *bdev = dmt->origin_dev->bdev;
> + struct request_queue *q;
> + struct request *clone;
> +
> + if (blk_rq_bytes(rq) && op_is_write(req_op(rq))) {
> + spin_lock_irq(&dmt->lock);
> + if (dmt->bitmap)
> + track_rq_clus(dmt, rq);
> + spin_unlock_irq(&dmt->lock);
> + }
> +
> + q = bdev_get_queue(bdev);
> + clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
> + BLK_MQ_REQ_NOWAIT);
> + if (IS_ERR(clone)) {
> + /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
> + if (blk_queue_dying(q))
> + return DM_MAPIO_DELAY_REQUEUE;
> + return DM_MAPIO_REQUEUE;
> + }
> +
> + clone->bio = clone->biotail = NULL;
> + clone->rq_disk = bdev->bd_disk;
> + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
> + *__clone = clone;
> + return DM_MAPIO_REMAPPED;
> +}
> +
> +static void dmt_release_clone(struct request *clone,
> + union map_info *map_context)
> +{
> + blk_put_request(clone);
> +}
> +
> +static void dmt_destroy(struct dm_tracking *dmt)
> +{
> + if (dmt->origin_dev)
> + dm_put_device(dmt->ti, dmt->origin_dev);
> +
> + kvfree(dmt->bitmap);
> + kfree(dmt);
> +}
> +
> +/*
> + * <cluster size> <data dev>
> + */
> +static int dmt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> +{
> + struct dm_tracking *dmt;
> + u64 origin_secs;
> + u32 sectors;
> + int ret;
> +
> + if (argc != 2 || ti->begin != 0)
> + return -EINVAL;
> +
> + ret = -ENOMEM;
> + dmt = kzalloc(sizeof(*dmt), GFP_KERNEL);
> + if (!dmt)
> + goto err;
> +
> + mutex_init(&dmt->ctl_mutex);
> + dmt->suspended = true;
> +
> + ti->private = dmt;
> + dmt->ti = ti;
> +
> + if (kstrtou32(argv[0], 10, §ors) < 0) {
> + ret = -EINVAL;
> + ti->error = "could not parse cluster size";
> + goto err;
> + }
> + dmt->clu_size = to_bytes(sectors);
> + dmt->nr_clus = DIV_ROUND_UP(ti->len, sectors);
> +
> + /*
> + * We do not add FMODE_EXCL, because further open_table_device()
> + * unconditionally adds it. See call stack.
> + */
> + ret = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table),
> + &dmt->origin_dev);
> + if (ret) {
> + ti->error = "Error opening origin device";
> + goto err;
> + }
> +
> + origin_secs = get_dev_size(dmt->origin_dev);
> + if (origin_secs < ti->len) {
> + ret = -EBADSLT;
> + ti->error = "Origin device is too small";
> + goto err;
> + }
> +
> + ti->num_flush_bios = 1;
> + ti->flush_supported = true;
> + ti->num_discard_bios = 1;
> + ti->discards_supported = true;
> + return 0;
> +err:
> + if (dmt)
> + dmt_destroy(dmt);
> + return ret;
> +}
> +
> +static void dmt_dtr(struct dm_target *ti)
> +{
> + dmt_destroy(ti->private);
> +}
> +
> +static int tracking_get_next(struct dm_tracking *dmt, char *result,
> + unsigned int maxlen)
> +{
> + unsigned int i, sz = 0, nr_clus = dmt->nr_clus, prev = dmt->cursor;
> + void *bitmap = dmt->bitmap;
> + int ret = -EAGAIN;
> +
> + if (WARN_ON_ONCE(prev > nr_clus - 1))
> + prev = 0;
> +
> + spin_lock_irq(&dmt->lock);
> + i = find_next_bit(bitmap, nr_clus, prev + 1);
> + if (i < nr_clus)
> + goto found;
> + i = find_first_bit(bitmap, prev + 1);
> + if (i >= prev + 1)
> + goto unlock;
> +found:
> + ret = (DMEMIT("%u\n", i)) ? 1 : 0;
> + if (ret)
> + clear_bit(i, bitmap);
> +unlock:
> + spin_unlock_irq(&dmt->lock);
> + if (ret > 0)
> + dmt->cursor = i;
> + return ret;
> +}
> +
> +static int dmt_cmd(struct dm_tracking *dmt, const char *suffix,
> + char *result, unsigned int maxlen)
> +{
> + unsigned int nr_clus, size;
> + void *bitmap = NULL;
> +
> + if (!strcmp(suffix, "get_next")) {
> + if (!dmt->bitmap)
> + return -ENOENT;
> + return tracking_get_next(dmt, result, maxlen);
> + }
> +
> + if (!strcmp(suffix, "start")) {
> + if (!dmt->suspended)
> + return -EBUSY;
> + if (dmt->bitmap)
> + return -EEXIST;
> + nr_clus = dmt->nr_clus;
> +
> + size = DIV_ROUND_UP(nr_clus, 8 * sizeof(unsigned long));
> + size *= sizeof(unsigned long);
> + bitmap = kvzalloc(size, GFP_KERNEL);
> + if (!bitmap)
> + return -ENOMEM;
> + dmt->cursor = nr_clus - 1;
> +
> + spin_lock_irq(&dmt->lock);
> + dmt->bitmap = bitmap;
> + spin_unlock_irq(&dmt->lock);
> + return 0;
> + } else if (!strcmp(suffix, "stop")) {
> + if (!dmt->bitmap)
> + return -ENOENT;
> +
> + spin_lock_irq(&dmt->lock);
> + swap(dmt->bitmap, bitmap);
> + spin_unlock_irq(&dmt->lock);
> + kvfree(bitmap);
> + return 0;
> + }
> +
> + return -ENOTSUPP;
> +}
> +
> +static int dmt_message(struct dm_target *ti, unsigned int argc, char **argv,
> + char *result, unsigned int maxlen)
> +{
> + struct dm_tracking *dmt = ti->private;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + mutex_lock(&dmt->ctl_mutex);
> + ret = -ENOTSUPP;
> + if (strncmp(argv[0], "tracking_", 9))
> + goto unlock;
> + ret = -EINVAL;
> + if (argc != 1)
> + goto unlock;
> + ret = dmt_cmd(dmt, argv[0] + 9, result, maxlen);
> +unlock:
> + mutex_unlock(&dmt->ctl_mutex);
> +
> + return ret;
> +}
> +
> +static int dmt_iterate_devices(struct dm_target *ti,
> + iterate_devices_callout_fn fn, void *data)
> +{
> + struct dm_tracking *dmt = ti->private;
> + sector_t size;
> +
> + size = get_dev_size(dmt->origin_dev);
> +
> + return fn(ti, dmt->origin_dev, 0, size, data);
> +}
> +
> +static void dmt_set_suspended(struct dm_target *ti, bool suspended)
> +{
> + struct dm_tracking *dmt = ti->private;
> +
> + mutex_lock(&dmt->ctl_mutex);
> + dmt->suspended = suspended;
> + mutex_unlock(&dmt->ctl_mutex);
> +}
> +static void dmt_postsuspend(struct dm_target *ti)
> +{
> + dmt_set_suspended(ti, true);
> +}
> +static void dmt_resume(struct dm_target *ti)
> +{
> + dmt_set_suspended(ti, false);
> +}
> +
> +static void dmt_status(struct dm_target *ti, status_type_t type,
> + unsigned int status_flags, char *result,
> + unsigned int maxlen)
> +{
> + struct dm_tracking *dmt = ti->private;
> + const char *status = "inactive";
> + ssize_t sz = 0;
> +
> + spin_lock_irq(&dmt->lock);
> + if (dmt->bitmap)
> + status = "active";
> + DMEMIT("%s %llu %s", dmt->origin_dev->name,
> + to_sector(dmt->clu_size), status);
> + spin_unlock_irq(&dmt->lock);
> +}
> +
> +static struct target_type dmt_target = {
> + .name = "tracking",
> + .version = {1, 0, 0},
> + .features = DM_TARGET_SINGLETON,
> + .module = THIS_MODULE,
> + .ctr = dmt_ctr,
> + .dtr = dmt_dtr,
> + .clone_and_map_rq = dmt_clone_and_map,
> + .release_clone_rq = dmt_release_clone,
> + .message = dmt_message,
> + .iterate_devices = dmt_iterate_devices,
> + .postsuspend = dmt_postsuspend,
> + .resume = dmt_resume,
> + .status = dmt_status,
> +};
> +
> +static int __init dmt_init(void)
> +{
> + return dm_register_target(&dmt_target);
> +}
> +
> +static void __exit dmt_exit(void)
> +{
> + dm_unregister_target(&dmt_target);
> +}
> +
> +module_init(dmt_init);
> +module_exit(dmt_exit);
> +
> +MODULE_AUTHOR("Kirill Tkhai <ktkhai at virtuozzo.com>");
> +MODULE_LICENSE("GPL v2");
>
>
More information about the Devel
mailing list