[Devel] [PATCH RHEL8 COMMIT] ploop: Split pio to cluster-size list

Konstantin Khorenko khorenko at virtuozzo.com
Mon May 17 19:05:07 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.30
------>
commit 2c188f18407f9bd99abc55da8d67e1dd0b2179be
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Mon May 17 19:05:07 2021 +0300

    ploop: Split pio to cluster-size list
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    =====================
    Patchset description:
    
    dm-ploop: Kill loop
    
    Intermediate patches can't be base for bisect.
    
    In scope of https://jira.sw.ru/browse/PSBM-123654
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 drivers/md/dm-ploop-map.c | 143 ++++++++++++++++++++++++++++++++++++++++------
 drivers/md/dm-ploop.h     |   2 +
 2 files changed, 128 insertions(+), 17 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index ef3369b36141..07980d8f73b3 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -92,8 +92,10 @@ void init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
 	pio->bi_op = bi_op;
 	pio->wants_discard_index_cleanup = false;
 	pio->is_data_alloc = false;
+	pio->free_on_endio = false;
 	pio->ref_index = PLOOP_REF_INDEX_INVALID;
 	pio->bi_status = BLK_STS_OK;
+	atomic_set(&pio->remaining, 1);
 	pio->piwb = NULL;
 	INIT_LIST_HEAD(&pio->list);
 	INIT_LIST_HEAD(&pio->endio_list);
@@ -103,30 +105,26 @@ void init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
 }
 
 /* Get cluster related to pio sectors */
-static int ploop_pio_cluster(struct ploop *ploop, struct pio *pio,
-			     unsigned int *ret_cluster)
+static int ploop_pio_valid(struct ploop *ploop, struct pio *pio)
 {
 	sector_t sector = pio->bi_iter.bi_sector;
-	unsigned int cluster, end_cluster;
+	unsigned int end_cluster;
 	loff_t end_byte;
 
-	cluster = sector >> ploop->cluster_log;
-	end_byte = ((sector << 9) + pio->bi_iter.bi_size - 1);
-	end_cluster = end_byte >> (ploop->cluster_log + 9);
+	end_byte = to_bytes(sector) + pio->bi_iter.bi_size - 1;
+	end_cluster = to_sector(end_byte) >> ploop->cluster_log;
 
-	if (unlikely(cluster >= ploop->nr_bat_entries) ||
-		     cluster != end_cluster) {
+	if (unlikely(end_cluster >= ploop->nr_bat_entries)) {
 		/*
 		 * This mustn't happen, since we set max_io_len
 		 * via dm_set_target_max_io_len().
 		 */
-		WARN_ONCE(1, "sec=%llu, size=%u, clu=%u, end=%u, nr=%u\n",
-			  sector, pio->bi_iter.bi_size, cluster,
+		WARN_ONCE(1, "sec=%llu, size=%u, end_clu=%u, nr=%u\n",
+			  sector, pio->bi_iter.bi_size,
 			  end_cluster, ploop->nr_bat_entries);
 		return -EINVAL;
 	}
 
-	*ret_cluster = cluster;
 	return 0;
 }
 
@@ -143,10 +141,23 @@ static void prq_endio(struct pio *pio, void *prq_ptr, blk_status_t bi_status)
 	blk_mq_complete_request(rq);
 }
 
-void pio_endio(struct pio *pio)
+static void do_pio_endio(struct pio *pio)
 {
 	ploop_endio_t endio_cb = pio->endio_cb;
 	void *endio_cb_data = pio->endio_cb_data;
+	bool free_on_endio = pio->free_on_endio;
+
+        if (!atomic_dec_and_test(&pio->remaining))
+                return;
+
+	endio_cb(pio, endio_cb_data, pio->bi_status);
+
+	if (free_on_endio)
+		kfree(pio);
+}
+
+void pio_endio(struct pio *pio)
+{
 	struct ploop *ploop = pio->ploop;
 
 	if (pio->ref_index != PLOOP_REF_INDEX_INVALID)
@@ -154,7 +165,96 @@ void pio_endio(struct pio *pio)
 
 	handle_cleanup(ploop, pio);
 
-	endio_cb(pio, endio_cb_data, pio->bi_status);
+	do_pio_endio(pio);
+}
+
+static void pio_chain_endio(struct pio *pio, void *parent_ptr,
+			    blk_status_t bi_status)
+{
+        struct pio *parent = parent_ptr;
+
+        if (unlikely(bi_status))
+                parent->bi_status = bi_status;
+
+        do_pio_endio(parent);
+}
+
+static void pio_chain(struct pio *pio, struct pio *parent)
+{
+	BUG_ON(pio->endio_cb_data || pio->endio_cb);
+
+	pio->endio_cb_data = parent;
+	pio->endio_cb = pio_chain_endio;
+	atomic_inc(&parent->remaining);
+}
+
+/* Clone of bio_advance_iter() */
+static void pio_advance(struct pio *pio, unsigned int bytes)
+{
+	struct bvec_iter *iter = &pio->bi_iter;
+
+	iter->bi_sector += bytes >> 9;
+
+	if (op_is_discard(pio->bi_op))
+		iter->bi_size -= bytes;
+	else
+		bvec_iter_advance(pio->bi_io_vec, iter, bytes);
+}
+
+static struct pio * split_and_chain_pio(struct ploop *ploop,
+		struct pio *pio, u32 len)
+{
+	struct pio *split;
+
+	split = kmalloc(sizeof(*split), GFP_NOIO);
+	if (!split)
+		return NULL;
+
+	init_pio(ploop, pio->bi_op, split);
+	split->free_on_endio = true;
+	split->bi_io_vec = pio->bi_io_vec;
+	split->bi_iter = pio->bi_iter;
+	split->bi_iter.bi_size = len;
+	split->endio_cb = NULL;
+	split->endio_cb_data = NULL;
+	pio_chain(split, pio);
+	if (len)
+		pio_advance(pio, len);
+	return split;
+}
+
+static int split_pio_to_list(struct ploop *ploop, struct pio *pio,
+			     struct list_head *list)
+{
+	u32 clu_size = to_bytes(1 << ploop->cluster_log);
+	struct pio *split;
+
+	while (1) {
+		loff_t start = to_bytes(pio->bi_iter.bi_sector);
+		loff_t end = start + pio->bi_iter.bi_size;
+		unsigned int len;
+
+		WARN_ON_ONCE(start == end);
+
+		if (start / clu_size == (end - 1) / clu_size)
+			break;
+		end = round_up(start + 1, clu_size);
+		len = end - start;
+
+		split = split_and_chain_pio(ploop, pio, len);
+		if (!split)
+			goto err;
+
+		list_add_tail(&split->list, list);
+	}
+
+	return 0;
+err:
+	while ((pio = pio_list_pop(list)) != NULL) {
+		pio->bi_status = BLK_STS_RESOURCE;
+		pio_endio(pio);
+	}
+	return -ENOMEM;
 }
 
 void defer_pios(struct ploop *ploop, struct pio *pio, struct list_head *pio_list)
@@ -1639,14 +1739,22 @@ static noinline struct bio_vec *create_bvec_from_rq(struct request *rq)
 
 static noinline void submit_pio(struct ploop *ploop, struct pio *pio)
 {
-	unsigned int cluster;
 	unsigned long flags;
+	LIST_HEAD(list);
+	int ret;
 
 	if (pio->bi_iter.bi_size) {
-		if (ploop_pio_cluster(ploop, pio, &cluster) < 0)
+		if (ploop_pio_valid(ploop, pio) < 0)
 			goto kill;
 
-		defer_pios(ploop, pio, NULL);
+		ret = split_pio_to_list(ploop, pio, &list);
+		if (ret) {
+			pio->bi_status = BLK_STS_RESOURCE;
+			goto endio;
+		}
+		list_add(&pio->list, &list);
+
+		defer_pios(ploop, NULL, &list);
 		goto out;
 	}
 
@@ -1660,7 +1768,8 @@ static noinline void submit_pio(struct ploop *ploop, struct pio *pio)
 out:
 	return;
 kill:
-	pio->bi_status = BLK_STS_RESOURCE;
+	pio->bi_status = BLK_STS_IOERR;
+endio:
 	pio_endio(pio);
 }
 
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 6343a93aa9c0..06dec66a7d62 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -250,6 +250,7 @@ struct pio {
 	unsigned int		bi_op;
 	unsigned int		bi_vcnt;
 	blk_status_t bi_status;
+	atomic_t remaining;
 
 	ploop_endio_t endio_cb;
 	void *endio_cb_data;
@@ -258,6 +259,7 @@ struct pio {
 
 	bool is_data_alloc:1;
 	bool wants_discard_index_cleanup:1;
+	bool free_on_endio:1;
 	/*
 	 * 0 and 1 are related to inflight_bios_ref[],
 	 * 2 means index is not assigned.


More information about the Devel mailing list