[Devel] [PATCH RHEL8 COMMIT] ploop: Switch to blk-mq

Konstantin Khorenko khorenko at virtuozzo.com
Mon May 17 19:05:02 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.30
------>
commit a754359e6e84db055af0ef1bf69e42a75ea82f64
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Mon May 17 19:05:02 2021 +0300

    ploop: Switch to blk-mq
    
    It will merge bios for us.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    =====================
    Patchset description:
    
    dm-ploop: Kill loop
    
    Intermediate patches can't be base for bisect.
    
    In scope of https://jira.sw.ru/browse/PSBM-123654
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 drivers/md/dm-ploop-map.c    | 127 +++++++++++++++++++++++++++++++++----------
 drivers/md/dm-ploop-target.c |   5 +-
 drivers/md/dm-ploop.h        |  21 ++++++-
 3 files changed, 121 insertions(+), 32 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 2e9cc5f5813f..72df8e64140a 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -5,6 +5,7 @@
 #include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/uio.h>
+#include <linux/blk-mq.h>
 #include <uapi/linux/falloc.h>
 #include "dm-ploop.h"
 
@@ -85,9 +86,10 @@ static void ploop_index_wb_init(struct ploop_index_wb *piwb, struct ploop *ploop
 	piwb->type = PIWB_TYPE_ALLOC;
 }
 
-static void init_pio(struct ploop *ploop, struct pio *pio)
+static void init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
 {
 	pio->ploop = ploop;
+	pio->bi_opf = bi_op;
 	pio->action = PLOOP_END_IO_NONE;
 	pio->ref_index = PLOOP_REF_INDEX_INVALID;
 	pio->bi_status = BLK_STS_OK;
@@ -127,17 +129,22 @@ static int ploop_pio_cluster(struct ploop *ploop, struct pio *pio,
 	return 0;
 }
 
-static void call_bio_endio(struct pio *pio, void *data, blk_status_t bi_status)
+static void prq_endio(struct pio *pio, void *prq_ptr, blk_status_t bi_status)
 {
-	struct bio *bio = data;
+        struct ploop_rq *prq = prq_ptr;
+        struct request *rq = prq->rq;
 	int ret;
 
 	ret = ploop_endio(pio->ploop, pio);
 
 	if (bi_status)
-		bio->bi_status = bi_status;
-	if (ret == DM_ENDIO_DONE)
-		bio_endio(bio);
+		dm_request_set_error(rq, bi_status);
+
+	if (ret == DM_ENDIO_DONE) {
+	        if (prq->bvec)
+			kfree(prq->bvec);
+	        blk_mq_complete_request(rq);
+	}
 }
 
 void pio_endio(struct pio *pio)
@@ -1109,7 +1116,7 @@ int submit_cluster_cow(struct ploop *ploop, unsigned int level,
 	pio = alloc_pio_with_pages(ploop);
 	if (!pio)
 		goto err;
-	init_pio(ploop, pio);
+	init_pio(ploop, REQ_OP_READ, pio);
 
 	cow = kmem_cache_alloc(cow_cache, GFP_NOIO);
 	if (!cow)
@@ -1126,7 +1133,7 @@ int submit_cluster_cow(struct ploop *ploop, unsigned int level,
 	pio->endio_cb = ploop_cow_endio;
 	pio->endio_cb_data = cow;
 
-	init_pio(ploop, &cow->hook);
+	init_pio(ploop, REQ_OP_WRITE, &cow->hook);
 	add_cluster_lk(ploop, &cow->hook, cluster);
 
 	/* Stage #0: read secondary delta full cluster */
@@ -1646,45 +1653,107 @@ void do_ploop_fsync_work(struct work_struct *ws)
 	}
 }
 
-/*
- * ploop_map() tries to map bio to origins or delays it.
- * It never modifies ploop->bat_entries and other cached
- * metadata: this should be made in do_ploop_work() only.
- */
-int ploop_map(struct dm_target *ti, struct bio *bio)
+static void init_prq(struct ploop_rq *prq, struct request *rq)
 {
-	struct pio *pio = bio_to_endio_hook(bio);
-	struct ploop *ploop = ti->private;
-	unsigned int cluster;
-	unsigned long flags;
+	prq->rq = rq;
+	prq->bvec = NULL;
+}
 
-	init_pio(ploop, pio);
+static noinline struct bio_vec *create_bvec_from_rq(struct request *rq)
+{
+	struct bio_vec bv, *bvec, *tmp;
+	struct req_iterator rq_iter;
+	unsigned int nr_bvec = 0;
 
-	pio->bi_iter = bio->bi_iter;
-	pio->bi_io_vec = bio->bi_io_vec;
-	pio->bi_opf = bio->bi_opf;
-	pio->endio_cb = call_bio_endio;
-	pio->endio_cb_data = bio;
+	rq_for_each_bvec(bv, rq, rq_iter)
+		nr_bvec++;
+
+	bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+			     GFP_NOIO);
+	if (!bvec)
+		goto out;
+
+	tmp = bvec;
+	rq_for_each_bvec(bv, rq, rq_iter) {
+		*tmp = bv;
+		tmp++;
+	}
+out:
+	return bvec;
+}
+
+static noinline void submit_pio(struct ploop *ploop, struct pio *pio)
+{
+	unsigned int cluster;
+	unsigned long flags;
 
 	if (pio->bi_iter.bi_size) {
 		if (ploop_pio_cluster(ploop, pio, &cluster) < 0)
-			return DM_MAPIO_KILL;
+			goto kill;
 		if (op_is_discard(pio->bi_opf) &&
 		    endio_if_unsupported_discard(ploop, pio))
-			return DM_MAPIO_SUBMITTED;
+			goto out;
 
 		defer_pios(ploop, pio, NULL);
-		return DM_MAPIO_SUBMITTED;
+		goto out;
 	}
 
-	if (WARN_ON_ONCE(!op_is_flush(pio->bi_opf)))
-		return DM_MAPIO_KILL;
+	if (WARN_ON_ONCE(pio->bi_opf != REQ_OP_FLUSH))
+		goto kill;
 
 	spin_lock_irqsave(&ploop->deferred_lock, flags);
 	list_add_tail(&pio->list, &ploop->flush_pios);
 	spin_unlock_irqrestore(&ploop->deferred_lock, flags);
 	queue_work(ploop->wq, &ploop->fsync_worker);
+out:
+	return;
+kill:
+	pio->bi_status = BLK_STS_RESOURCE;
+	pio_endio(pio);
+}
 
+int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
+		    union map_info *info, struct request **clone)
+{
+	struct ploop *ploop = ti->private;
+	struct bio_vec *bvec = NULL;
+	struct ploop_rq *prq;
+	struct pio *pio;
+
+	prq = map_info_to_prq(info);
+	init_prq(prq, rq);
+
+	pio = map_info_to_pio(info); /* Embedded pio */
+	init_pio(ploop, req_op(rq), pio);
+
+	if (rq->bio != rq->biotail) {
+		if (req_op(rq) == REQ_OP_DISCARD)
+			goto skip_bvec;
+		/*
+		 * Transform a set of bvec arrays related to bios
+		 * into a single bvec array (which we can iterate).
+		 */
+		bvec = create_bvec_from_rq(rq);
+		if (!bvec)
+			return DM_MAPIO_KILL;
+		prq->bvec = bvec;
+skip_bvec:
+		pio->bi_iter.bi_sector = blk_rq_pos(rq);
+		pio->bi_iter.bi_size = blk_rq_bytes(rq);
+		pio->bi_iter.bi_idx = 0;
+		pio->bi_iter.bi_bvec_done = 0;
+        } else if (rq->bio) {
+                /* Single bio already provides bvec array */
+		bvec = rq->bio->bi_io_vec;
+
+		pio->bi_iter = rq->bio->bi_iter;
+        } /* else FLUSH */
+
+        pio->bi_io_vec = bvec;
+        pio->endio_cb = prq_endio;
+        pio->endio_cb_data = prq;
+
+	submit_pio(ploop, pio);
 	return DM_MAPIO_SUBMITTED;
 }
 
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index 29d2636f12c9..2a2d36b51de9 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -349,7 +349,7 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (ret)
 		goto err;
 
-	ti->per_io_data_size = sizeof(struct pio);
+	ti->per_io_data_size = ploop_per_io_data_size();
 	ti->num_flush_bios = 1;
 	ti->flush_supported = true;
 	ti->num_discard_bios = 1;
@@ -422,13 +422,14 @@ static int ploop_preresume(struct dm_target *ti)
 static struct target_type ploop_target = {
 	.name = "ploop",
 	.version = {1, 0, 0},
+	.features = DM_TARGET_SINGLETON|DM_TARGET_IMMUTABLE,
 	.module = THIS_MODULE,
 	.ctr = ploop_ctr,
 	.dtr = ploop_dtr,
-	.map = ploop_map,
 	.message = ploop_message,
 	.io_hints = ploop_io_hints,
 	.preresume = ploop_preresume,
+	.clone_and_map_rq = ploop_clone_and_map,
 	.status = ploop_status,
 };
 
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 1b9a7ed8682c..3e562710194b 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -229,6 +229,11 @@ struct ploop {
 	spinlock_t pb_lock;
 };
 
+struct ploop_rq {
+	struct request *rq;
+	struct bio_vec *bvec;
+};
+
 struct pio;
 typedef void (*ploop_endio_t)(struct pio *, void *, blk_status_t);
 
@@ -326,6 +331,19 @@ static inline bool whole_cluster(struct ploop *ploop, struct pio *pio)
 	return !(end_sector & ((1 << ploop->cluster_log) - 1));
 }
 
+static inline ssize_t ploop_per_io_data_size(void)
+{
+	return sizeof(struct ploop_rq) + sizeof(struct pio);
+}
+static inline struct ploop_rq *map_info_to_prq(union map_info *info)
+{
+	return (void *)info->ptr;
+}
+static inline struct pio *map_info_to_pio(union map_info *info)
+{
+	return (void *)info->ptr + sizeof(struct ploop_rq);
+}
+
 #define BAT_LEVEL_MAX		(U8_MAX - 1)
 static inline u8 top_level(struct ploop *ploop)
 {
@@ -517,7 +535,8 @@ extern void do_ploop_work(struct work_struct *ws);
 extern void do_ploop_fsync_work(struct work_struct *ws);
 extern void process_deferred_cmd(struct ploop *ploop,
 			struct ploop_index_wb *piwb);
-extern int ploop_map(struct dm_target *ti, struct bio *bio);
+extern int ploop_clone_and_map(struct dm_target *ti, struct request *rq,
+		    union map_info *map_context, struct request **clone);
 extern int ploop_inflight_bios_ref_switch(struct ploop *ploop, bool killable);
 extern struct pio *find_lk_of_cluster(struct ploop *ploop, u32 cluster);
 extern void unlink_postponed_backup_endio(struct ploop *ploop,


More information about the Devel mailing list