[Devel] [PATCH RHEL8 COMMIT] ploop: Use hlist instead of rbtree

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jun 17 19:01:01 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.44
------>
commit 0aaa7c6961037d49d95f92e3e1b993bcd2d4e12d
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Thu Jun 17 19:01:00 2021 +0300

    ploop: Use hlist instead of rbtree
    
    Number of inflight/locking pios is small to use rbtree,
    while next patches will make inflight pios to be linked
    always. Since hlist has better add/del overhead, we use it.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    =====================
    Patchset description:
    
    ploop: Allow to resubmit partially completed request
    
    This allows to continue submitting partially completed requests.
    
    https://jira.sw.ru/browse/PSBM-127225
    
    Kirill Tkhai (18):
          ploop: Simplify ploop_write_cluster_sync()
          ploop: Rename hook->pio, h->pio, ploop_cow::hook->aux_pio
          ploop: Rename force_link_inflight_bios
          ploop: Introduce separate lock for inflight pios
          ploop: Use hlist instead of rbtree
          ploop: Always link submitted pios
          ploop: Unexport ploop_inflight_bios_ref_switch()
          ploop: Refactor submit_pio()
          ploop: Introduce ploop_suspend_submitting_pios
          ploop: Refactor ploop_ctr()
          ploop: Use ploop_call_rw_iter() in submit_delta_read()
          ploop: Generalize submit_rw_mapped()
          ploop: Kill submit_delta_read()
          ploop: Rename submit_rw_mapped()
          ploop: Extract submit_rw_mapped() to separate function
          ploop: Save level before submitting pio
          ploop: Make fsync work be able to run in parallel with main work
          ploop: Introduce resubmitting partially completed pios
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 drivers/md/dm-ploop-map.c    | 78 +++++++++++++++-----------------------------
 drivers/md/dm-ploop-target.c | 19 ++++++++---
 drivers/md/dm-ploop.h        | 24 +++++++-------
 3 files changed, 53 insertions(+), 68 deletions(-)

diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 388973b56a85..efa0da8afe3f 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -100,10 +100,10 @@ void init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
 	atomic_set(&pio->remaining, 1);
 	pio->piwb = NULL;
 	INIT_LIST_HEAD(&pio->list);
+	INIT_HLIST_NODE(&pio->hlist_node);
 	INIT_LIST_HEAD(&pio->endio_list);
 	/* FIXME: assign real cluster? */
 	pio->cluster = UINT_MAX;
-	RB_CLEAR_NODE(&pio->node);
 }
 
 /* Get cluster related to pio sectors */
@@ -333,19 +333,15 @@ static void zero_fill_pio(struct pio *pio)
 	}
 }
 
-struct pio *find_pio_range(struct ploop *ploop, struct rb_root *root,
-			   unsigned int left, unsigned int right)
+struct pio *find_pio(struct hlist_head head[], u32 clu)
 {
-	struct rb_node *node = root->rb_node;
+	struct hlist_head *slot = ploop_htable_slot(head, clu);
 	struct pio *pio;
 
-	while (node) {
-		pio = rb_entry(node, struct pio, node);
-		if (right < pio->cluster)
-			node = node->rb_left;
-		else if (left > pio->cluster)
-			node = node->rb_right;
-		else
+	BUG_ON(!slot);
+
+	hlist_for_each_entry(pio, slot, hlist_node) {
+		if (pio->cluster == clu)
 			return pio;
 	}
 
@@ -355,13 +351,13 @@ struct pio *find_pio_range(struct ploop *ploop, struct rb_root *root,
 static struct pio *find_inflight_bio(struct ploop *ploop, unsigned int cluster)
 {
 	lockdep_assert_held(&ploop->inflight_lock);
-	return find_pio(ploop, &ploop->inflight_pios_rbtree, cluster);
+	return find_pio(ploop->inflight_pios, cluster);
 }
 
 struct pio *find_lk_of_cluster(struct ploop *ploop, unsigned int cluster)
 {
 	lockdep_assert_held(&ploop->deferred_lock);
-	return find_pio(ploop, &ploop->exclusive_bios_rbtree, cluster);
+	return find_pio(ploop->exclusive_pios, cluster);
 }
 
 static void add_endio_pio(struct pio *head, struct pio *pio)
@@ -397,37 +393,17 @@ static void dec_nr_inflight(struct ploop *ploop, struct pio *pio)
 	}
 }
 
-static void link_pio(struct ploop *ploop, struct pio *new, struct rb_root *root,
-		     unsigned int cluster, bool exclusive)
+static void link_pio(struct hlist_head head[], struct pio *pio,
+		     u32 clu, bool exclusive)
 {
-	struct rb_node *parent, **node = &root->rb_node;
-	struct pio *pio;
+	struct hlist_head *slot = ploop_htable_slot(head, clu);
 
-	BUG_ON(!RB_EMPTY_NODE(&new->node));
-	parent = NULL;
-
-	while (*node) {
-		pio = rb_entry(*node, struct pio, node);
-		parent = *node;
-		if (cluster < pio->cluster)
-			node = &parent->rb_left;
-		else if (cluster > pio->cluster)
-			node = &parent->rb_right;
-		else {
-			if (exclusive)
-				BUG();
-			if (new < pio)
-				node = &parent->rb_left;
-			else if (new > pio)
-				node = &parent->rb_right;
-			else
-				BUG();
-		}
-	}
+	if (exclusive)
+		WARN_ON_ONCE(find_pio(head, clu) != NULL);
 
-	new->cluster = cluster;
-	rb_link_node(&new->node, parent, node);
-	rb_insert_color(&new->node, root);
+	BUG_ON(!hlist_unhashed(&pio->hlist_node));
+	hlist_add_head(&pio->hlist_node, slot);
+	pio->cluster = clu;
 }
 
 /*
@@ -435,14 +411,12 @@ static void link_pio(struct ploop *ploop, struct pio *new, struct rb_root *root,
  * or from exclusive_bios_rbtree. BIOs from endio_list are requeued
  * to deferred_list.
  */
-static void unlink_pio(struct ploop *ploop, struct rb_root *root,
-		       struct pio *pio, struct list_head *pio_list)
+static void unlink_pio(struct ploop *ploop, struct pio *pio,
+		       struct list_head *pio_list)
 {
-	BUG_ON(RB_EMPTY_NODE(&pio->node));
-
-	rb_erase(&pio->node, root);
-	RB_CLEAR_NODE(&pio->node);
+	BUG_ON(hlist_unhashed(&pio->hlist_node));
 
+	hlist_del_init(&pio->hlist_node);
 	list_splice_tail_init(&pio->endio_list, pio_list);
 }
 
@@ -451,7 +425,7 @@ static void add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 cluster)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ploop->deferred_lock, flags);
-	link_pio(ploop, pio, &ploop->exclusive_bios_rbtree, cluster, true);
+	link_pio(ploop->exclusive_pios, pio, cluster, true);
 	spin_unlock_irqrestore(&ploop->deferred_lock, flags);
 }
 static void del_cluster_lk(struct ploop *ploop, struct pio *pio)
@@ -461,7 +435,7 @@ static void del_cluster_lk(struct ploop *ploop, struct pio *pio)
 	bool queue = false;
 
 	spin_lock_irqsave(&ploop->deferred_lock, flags);
-	unlink_pio(ploop, &ploop->exclusive_bios_rbtree, pio, &pio_list);
+	unlink_pio(ploop, pio, &pio_list);
 	if (!list_empty(&pio_list)) {
 		list_splice_tail(&pio_list, &ploop->deferred_pios);
 		queue = true;
@@ -482,7 +456,7 @@ static void maybe_link_submitting_pio(struct ploop *ploop, struct pio *pio,
 		return;
 
 	spin_lock_irqsave(&ploop->inflight_lock, flags);
-	link_pio(ploop, pio, &ploop->inflight_pios_rbtree, cluster, false);
+	link_pio(ploop->inflight_pios, pio, cluster, false);
 	spin_unlock_irqrestore(&ploop->inflight_lock, flags);
 }
 static void maybe_unlink_completed_pio(struct ploop *ploop, struct pio *pio)
@@ -490,11 +464,11 @@ static void maybe_unlink_completed_pio(struct ploop *ploop, struct pio *pio)
 	LIST_HEAD(pio_list);
 	unsigned long flags;
 
-	if (likely(RB_EMPTY_NODE(&pio->node)))
+	if (likely(hlist_unhashed(&pio->hlist_node)))
 		return;
 
 	spin_lock_irqsave(&ploop->inflight_lock, flags);
-	unlink_pio(ploop, &ploop->inflight_pios_rbtree, pio, &pio_list);
+	unlink_pio(ploop, pio, &pio_list);
 	spin_unlock_irqrestore(&ploop->inflight_lock, flags);
 
 	if (!list_empty(&pio_list)) {
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index f346e6ca68c5..bbe3686e9f34 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -152,8 +152,10 @@ static void ploop_destroy(struct ploop *ploop)
 		if (ploop->deltas[ploop->nr_deltas].file)
 			fput(ploop->deltas[ploop->nr_deltas].file);
 	}
-	WARN_ON(!RB_EMPTY_ROOT(&ploop->exclusive_bios_rbtree));
-	WARN_ON(!RB_EMPTY_ROOT(&ploop->inflight_pios_rbtree));
+//	WARN_ON(!RB_EMPTY_ROOT(&ploop->exclusive_pios));
+//	WARN_ON(!RB_EMPTY_ROOT(&ploop->inflight_pios));
+	kfree(ploop->inflight_pios);
+	kfree(ploop->exclusive_pios);
 	kfree(ploop->deltas);
 	kvfree(ploop->holes_bitmap);
 	kvfree(ploop->tracking_bitmap);
@@ -289,6 +291,17 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -ENOMEM;
 	}
 
+	ploop->exclusive_pios = kcalloc(PLOOP_HASH_TABLE_SIZE,
+					sizeof(struct hlist_head),
+					GFP_KERNEL);
+	ploop->inflight_pios = kcalloc(PLOOP_HASH_TABLE_SIZE,
+					sizeof(struct hlist_head),
+					GFP_KERNEL);
+	if (!ploop->exclusive_pios || !ploop->inflight_pios) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
 	rwlock_init(&ploop->bat_rwlock);
 	init_rwsem(&ploop->ctl_rwsem);
 	spin_lock_init(&ploop->inflight_lock);
@@ -301,8 +314,6 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	INIT_LIST_HEAD(&ploop->delta_cow_action_list);
 	atomic_set(&ploop->nr_discard_bios, 0);
 	ploop->bat_entries = RB_ROOT;
-	ploop->exclusive_bios_rbtree = RB_ROOT;
-	ploop->inflight_pios_rbtree = RB_ROOT;
 
 	INIT_WORK(&ploop->worker, do_ploop_work);
 	INIT_WORK(&ploop->fsync_worker, do_ploop_fsync_work);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 03410b5ac093..635c53d6993b 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -160,17 +160,17 @@ struct ploop {
 
 	bool force_rbtree_for_inflight;
 	/*
-	 * RBtree to link non-exclusive submitted bios.
+	 * Hash table to link non-exclusive submitted bios.
 	 * This is needed for discard to check, nobody uses
 	 * the discarding cluster. Only made when the above
 	 * force_rbtree_for_inflight is enabled.
 	 */
-	struct rb_root inflight_pios_rbtree;
+	struct hlist_head *inflight_pios;
 	/*
 	 * Hash table to link exclusive submitted bios.
 	 * This allows to delay bios going in some cluster.
 	 */
-	struct rb_root exclusive_bios_rbtree;
+	struct hlist_head *exclusive_pios;
 
 	atomic_t nr_discard_bios;
 	unsigned long pending_discard_cleanup;
@@ -221,7 +221,7 @@ struct pio {
 	struct ploop *ploop;
 
 	struct list_head list;
-	struct rb_node node;
+	struct hlist_node hlist_node;
 	/* List of pios, which will be queued from this pio end */
 	struct list_head endio_list;
 
@@ -474,14 +474,7 @@ static inline void track_pio(struct ploop *ploop, struct pio *pio)
 		__track_pio(ploop, pio);
 }
 
-extern struct pio *find_pio_range(struct ploop *ploop, struct rb_root *root,
-				  unsigned int l, unsigned int r);
-
-static inline struct pio *find_pio(struct ploop *ploop, struct rb_root *root,
-					  unsigned int cluster)
-{
-	return find_pio_range(ploop, root, cluster, cluster);
-}
+extern struct pio *find_pio(struct hlist_head head[], u32 clu);
 
 extern int prealloc_md_pages(struct rb_root *root, unsigned int nr_bat_entries,
 			     unsigned int new_nr_bat_entries);
@@ -501,6 +494,13 @@ static inline struct pio *pio_list_pop(struct list_head *pio_list)
 	return pio;
 }
 
+#define PLOOP_HASH_TABLE_BITS 5
+#define PLOOP_HASH_TABLE_SIZE (1 << PLOOP_HASH_TABLE_BITS)
+static inline struct hlist_head *ploop_htable_slot(struct hlist_head head[], u32 clu)
+{
+	return &head[hash_32(clu, PLOOP_HASH_TABLE_BITS)];
+}
+
 extern void md_page_insert(struct ploop *ploop, struct md_page *md);
 extern void ploop_free_md_page(struct md_page *md);
 extern void free_md_pages_tree(struct rb_root *root);


More information about the Devel mailing list