[Devel] [PATCH RHEL8 COMMIT] ploop: Use hlist instead of rbtree
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jun 17 19:01:01 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.44
------>
commit 0aaa7c6961037d49d95f92e3e1b993bcd2d4e12d
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Thu Jun 17 19:01:00 2021 +0300
ploop: Use hlist instead of rbtree
Number of inflight/locking pios is small to use rbtree,
while next patches will make inflight pios to be linked
always. Since hlist has better add/del overhead, we use it.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
=====================
Patchset description:
ploop: Allow to resubmit partially completed request
This allows to continue submitting partially completed requests.
https://jira.sw.ru/browse/PSBM-127225
Kirill Tkhai (18):
ploop: Simplify ploop_write_cluster_sync()
ploop: Rename hook->pio, h->pio, ploop_cow::hook->aux_pio
ploop: Rename force_link_inflight_bios
ploop: Introduce separate lock for inflight pios
ploop: Use hlist instead of rbtree
ploop: Always link submitted pios
ploop: Unexport ploop_inflight_bios_ref_switch()
ploop: Refactor submit_pio()
ploop: Introduce ploop_suspend_submitting_pios
ploop: Refactor ploop_ctr()
ploop: Use ploop_call_rw_iter() in submit_delta_read()
ploop: Generalize submit_rw_mapped()
ploop: Kill submit_delta_read()
ploop: Rename submit_rw_mapped()
ploop: Extract submit_rw_mapped() to separate function
ploop: Save level before submitting pio
ploop: Make fsync work be able to run in parallel with main work
ploop: Introduce resubmitting partially completed pios
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
drivers/md/dm-ploop-map.c | 78 +++++++++++++++-----------------------------
drivers/md/dm-ploop-target.c | 19 ++++++++---
drivers/md/dm-ploop.h | 24 +++++++-------
3 files changed, 53 insertions(+), 68 deletions(-)
diff --git a/drivers/md/dm-ploop-map.c b/drivers/md/dm-ploop-map.c
index 388973b56a85..efa0da8afe3f 100644
--- a/drivers/md/dm-ploop-map.c
+++ b/drivers/md/dm-ploop-map.c
@@ -100,10 +100,10 @@ void init_pio(struct ploop *ploop, unsigned int bi_op, struct pio *pio)
atomic_set(&pio->remaining, 1);
pio->piwb = NULL;
INIT_LIST_HEAD(&pio->list);
+ INIT_HLIST_NODE(&pio->hlist_node);
INIT_LIST_HEAD(&pio->endio_list);
/* FIXME: assign real cluster? */
pio->cluster = UINT_MAX;
- RB_CLEAR_NODE(&pio->node);
}
/* Get cluster related to pio sectors */
@@ -333,19 +333,15 @@ static void zero_fill_pio(struct pio *pio)
}
}
-struct pio *find_pio_range(struct ploop *ploop, struct rb_root *root,
- unsigned int left, unsigned int right)
+struct pio *find_pio(struct hlist_head head[], u32 clu)
{
- struct rb_node *node = root->rb_node;
+ struct hlist_head *slot = ploop_htable_slot(head, clu);
struct pio *pio;
- while (node) {
- pio = rb_entry(node, struct pio, node);
- if (right < pio->cluster)
- node = node->rb_left;
- else if (left > pio->cluster)
- node = node->rb_right;
- else
+ BUG_ON(!slot);
+
+ hlist_for_each_entry(pio, slot, hlist_node) {
+ if (pio->cluster == clu)
return pio;
}
@@ -355,13 +351,13 @@ struct pio *find_pio_range(struct ploop *ploop, struct rb_root *root,
static struct pio *find_inflight_bio(struct ploop *ploop, unsigned int cluster)
{
lockdep_assert_held(&ploop->inflight_lock);
- return find_pio(ploop, &ploop->inflight_pios_rbtree, cluster);
+ return find_pio(ploop->inflight_pios, cluster);
}
struct pio *find_lk_of_cluster(struct ploop *ploop, unsigned int cluster)
{
lockdep_assert_held(&ploop->deferred_lock);
- return find_pio(ploop, &ploop->exclusive_bios_rbtree, cluster);
+ return find_pio(ploop->exclusive_pios, cluster);
}
static void add_endio_pio(struct pio *head, struct pio *pio)
@@ -397,37 +393,17 @@ static void dec_nr_inflight(struct ploop *ploop, struct pio *pio)
}
}
-static void link_pio(struct ploop *ploop, struct pio *new, struct rb_root *root,
- unsigned int cluster, bool exclusive)
+static void link_pio(struct hlist_head head[], struct pio *pio,
+ u32 clu, bool exclusive)
{
- struct rb_node *parent, **node = &root->rb_node;
- struct pio *pio;
+ struct hlist_head *slot = ploop_htable_slot(head, clu);
- BUG_ON(!RB_EMPTY_NODE(&new->node));
- parent = NULL;
-
- while (*node) {
- pio = rb_entry(*node, struct pio, node);
- parent = *node;
- if (cluster < pio->cluster)
- node = &parent->rb_left;
- else if (cluster > pio->cluster)
- node = &parent->rb_right;
- else {
- if (exclusive)
- BUG();
- if (new < pio)
- node = &parent->rb_left;
- else if (new > pio)
- node = &parent->rb_right;
- else
- BUG();
- }
- }
+ if (exclusive)
+ WARN_ON_ONCE(find_pio(head, clu) != NULL);
- new->cluster = cluster;
- rb_link_node(&new->node, parent, node);
- rb_insert_color(&new->node, root);
+ BUG_ON(!hlist_unhashed(&pio->hlist_node));
+ hlist_add_head(&pio->hlist_node, slot);
+ pio->cluster = clu;
}
/*
@@ -435,14 +411,12 @@ static void link_pio(struct ploop *ploop, struct pio *new, struct rb_root *root,
* or from exclusive_bios_rbtree. BIOs from endio_list are requeued
* to deferred_list.
*/
-static void unlink_pio(struct ploop *ploop, struct rb_root *root,
- struct pio *pio, struct list_head *pio_list)
+static void unlink_pio(struct ploop *ploop, struct pio *pio,
+ struct list_head *pio_list)
{
- BUG_ON(RB_EMPTY_NODE(&pio->node));
-
- rb_erase(&pio->node, root);
- RB_CLEAR_NODE(&pio->node);
+ BUG_ON(hlist_unhashed(&pio->hlist_node));
+ hlist_del_init(&pio->hlist_node);
list_splice_tail_init(&pio->endio_list, pio_list);
}
@@ -451,7 +425,7 @@ static void add_cluster_lk(struct ploop *ploop, struct pio *pio, u32 cluster)
unsigned long flags;
spin_lock_irqsave(&ploop->deferred_lock, flags);
- link_pio(ploop, pio, &ploop->exclusive_bios_rbtree, cluster, true);
+ link_pio(ploop->exclusive_pios, pio, cluster, true);
spin_unlock_irqrestore(&ploop->deferred_lock, flags);
}
static void del_cluster_lk(struct ploop *ploop, struct pio *pio)
@@ -461,7 +435,7 @@ static void del_cluster_lk(struct ploop *ploop, struct pio *pio)
bool queue = false;
spin_lock_irqsave(&ploop->deferred_lock, flags);
- unlink_pio(ploop, &ploop->exclusive_bios_rbtree, pio, &pio_list);
+ unlink_pio(ploop, pio, &pio_list);
if (!list_empty(&pio_list)) {
list_splice_tail(&pio_list, &ploop->deferred_pios);
queue = true;
@@ -482,7 +456,7 @@ static void maybe_link_submitting_pio(struct ploop *ploop, struct pio *pio,
return;
spin_lock_irqsave(&ploop->inflight_lock, flags);
- link_pio(ploop, pio, &ploop->inflight_pios_rbtree, cluster, false);
+ link_pio(ploop->inflight_pios, pio, cluster, false);
spin_unlock_irqrestore(&ploop->inflight_lock, flags);
}
static void maybe_unlink_completed_pio(struct ploop *ploop, struct pio *pio)
@@ -490,11 +464,11 @@ static void maybe_unlink_completed_pio(struct ploop *ploop, struct pio *pio)
LIST_HEAD(pio_list);
unsigned long flags;
- if (likely(RB_EMPTY_NODE(&pio->node)))
+ if (likely(hlist_unhashed(&pio->hlist_node)))
return;
spin_lock_irqsave(&ploop->inflight_lock, flags);
- unlink_pio(ploop, &ploop->inflight_pios_rbtree, pio, &pio_list);
+ unlink_pio(ploop, pio, &pio_list);
spin_unlock_irqrestore(&ploop->inflight_lock, flags);
if (!list_empty(&pio_list)) {
diff --git a/drivers/md/dm-ploop-target.c b/drivers/md/dm-ploop-target.c
index f346e6ca68c5..bbe3686e9f34 100644
--- a/drivers/md/dm-ploop-target.c
+++ b/drivers/md/dm-ploop-target.c
@@ -152,8 +152,10 @@ static void ploop_destroy(struct ploop *ploop)
if (ploop->deltas[ploop->nr_deltas].file)
fput(ploop->deltas[ploop->nr_deltas].file);
}
- WARN_ON(!RB_EMPTY_ROOT(&ploop->exclusive_bios_rbtree));
- WARN_ON(!RB_EMPTY_ROOT(&ploop->inflight_pios_rbtree));
+// WARN_ON(!RB_EMPTY_ROOT(&ploop->exclusive_pios));
+// WARN_ON(!RB_EMPTY_ROOT(&ploop->inflight_pios));
+ kfree(ploop->inflight_pios);
+ kfree(ploop->exclusive_pios);
kfree(ploop->deltas);
kvfree(ploop->holes_bitmap);
kvfree(ploop->tracking_bitmap);
@@ -289,6 +291,17 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -ENOMEM;
}
+ ploop->exclusive_pios = kcalloc(PLOOP_HASH_TABLE_SIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ ploop->inflight_pios = kcalloc(PLOOP_HASH_TABLE_SIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!ploop->exclusive_pios || !ploop->inflight_pios) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
rwlock_init(&ploop->bat_rwlock);
init_rwsem(&ploop->ctl_rwsem);
spin_lock_init(&ploop->inflight_lock);
@@ -301,8 +314,6 @@ static int ploop_ctr(struct dm_target *ti, unsigned int argc, char **argv)
INIT_LIST_HEAD(&ploop->delta_cow_action_list);
atomic_set(&ploop->nr_discard_bios, 0);
ploop->bat_entries = RB_ROOT;
- ploop->exclusive_bios_rbtree = RB_ROOT;
- ploop->inflight_pios_rbtree = RB_ROOT;
INIT_WORK(&ploop->worker, do_ploop_work);
INIT_WORK(&ploop->fsync_worker, do_ploop_fsync_work);
diff --git a/drivers/md/dm-ploop.h b/drivers/md/dm-ploop.h
index 03410b5ac093..635c53d6993b 100644
--- a/drivers/md/dm-ploop.h
+++ b/drivers/md/dm-ploop.h
@@ -160,17 +160,17 @@ struct ploop {
bool force_rbtree_for_inflight;
/*
- * RBtree to link non-exclusive submitted bios.
+ * Hash table to link non-exclusive submitted bios.
* This is needed for discard to check, nobody uses
* the discarding cluster. Only made when the above
* force_rbtree_for_inflight is enabled.
*/
- struct rb_root inflight_pios_rbtree;
+ struct hlist_head *inflight_pios;
/*
* Hash table to link exclusive submitted bios.
* This allows to delay bios going in some cluster.
*/
- struct rb_root exclusive_bios_rbtree;
+ struct hlist_head *exclusive_pios;
atomic_t nr_discard_bios;
unsigned long pending_discard_cleanup;
@@ -221,7 +221,7 @@ struct pio {
struct ploop *ploop;
struct list_head list;
- struct rb_node node;
+ struct hlist_node hlist_node;
/* List of pios, which will be queued from this pio end */
struct list_head endio_list;
@@ -474,14 +474,7 @@ static inline void track_pio(struct ploop *ploop, struct pio *pio)
__track_pio(ploop, pio);
}
-extern struct pio *find_pio_range(struct ploop *ploop, struct rb_root *root,
- unsigned int l, unsigned int r);
-
-static inline struct pio *find_pio(struct ploop *ploop, struct rb_root *root,
- unsigned int cluster)
-{
- return find_pio_range(ploop, root, cluster, cluster);
-}
+extern struct pio *find_pio(struct hlist_head head[], u32 clu);
extern int prealloc_md_pages(struct rb_root *root, unsigned int nr_bat_entries,
unsigned int new_nr_bat_entries);
@@ -501,6 +494,13 @@ static inline struct pio *pio_list_pop(struct list_head *pio_list)
return pio;
}
+#define PLOOP_HASH_TABLE_BITS 5
+#define PLOOP_HASH_TABLE_SIZE (1 << PLOOP_HASH_TABLE_BITS)
+static inline struct hlist_head *ploop_htable_slot(struct hlist_head head[], u32 clu)
+{
+ return &head[hash_32(clu, PLOOP_HASH_TABLE_BITS)];
+}
+
extern void md_page_insert(struct ploop *ploop, struct md_page *md);
extern void ploop_free_md_page(struct md_page *md);
extern void free_md_pages_tree(struct rb_root *root);
More information about the Devel
mailing list