[Devel] [PATCH rh7 2/2] ploop: push_backup: rework lockout machinery
Maxim Patlasov
mpatlasov at virtuozzo.com
Thu Jun 2 16:52:47 PDT 2016
It was not very nice idea to reuse plo->lockout_tree for push_backup. Because
by design only one preq (for any given req_cluster) can sit in the lockout
tree, but while we're reusing the tree for a WRITE request, a READ from
backup tool may come. Such a READ may want to to use the tree: see how
map_index_fault calls add_lockout for snapshot configuration.
The patch introduces ad-hoc separate push_backup lockout tree. This fix the
issue (PSBM-47680) and makes the code much easier to understand.
https://jira.sw.ru/browse/PSBM-47680
Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
drivers/block/ploop/dev.c | 111 ++++++++++++++++++++++++++++++++++--------
drivers/block/ploop/events.h | 1
include/linux/ploop/ploop.h | 3 +
3 files changed, 95 insertions(+), 20 deletions(-)
diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index d3f0ec0..27827a8 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits)
return ret;
}
-static int check_lockout(struct ploop_request *preq)
+static int __check_lockout(struct ploop_request *preq, bool pb)
{
struct ploop_device * plo = preq->plo;
- struct rb_node * n = plo->lockout_tree.rb_node;
+ struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node :
+ plo->lockout_tree.rb_node;
struct ploop_request * p;
+ int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT;
if (n == NULL)
return 0;
- if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+ if (test_bit(lockout_bit, &preq->state))
return 0;
while (n) {
- p = rb_entry(n, struct ploop_request, lockout_link);
+ if (pb)
+ p = rb_entry(n, struct ploop_request, lockout_pb_link);
+ else
+ p = rb_entry(n, struct ploop_request, lockout_link);
if (preq->req_cluster < p->req_cluster)
n = n->rb_left;
@@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq)
return 0;
}
-int ploop_add_lockout(struct ploop_request *preq, int try)
+static int check_lockout(struct ploop_request *preq)
+{
+ if (__check_lockout(preq, false))
+ return 1;
+
+ /* push_backup passes READs intact */
+ if (!(preq->req_rw & REQ_WRITE))
+ return 0;
+
+ if (__check_lockout(preq, true))
+ return 1;
+
+ return 0;
+}
+
+static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb)
{
struct ploop_device * plo = preq->plo;
- struct rb_node ** p = &plo->lockout_tree.rb_node;
+ struct rb_node ** p;
struct rb_node *parent = NULL;
struct ploop_request * pr;
+ struct rb_node *link;
+ struct rb_root *tree;
+ int lockout_bit;
+
+ if (pb) {
+ link = &preq->lockout_pb_link;
+ tree = &plo->lockout_pb_tree;
+ lockout_bit = PLOOP_REQ_PB_LOCKOUT;
+ } else {
+ link = &preq->lockout_link;
+ tree = &plo->lockout_tree;
+ lockout_bit = PLOOP_REQ_LOCKOUT;
+ }
- if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+ if (test_bit(lockout_bit, &preq->state))
return 0;
+ p = &tree->rb_node;
while (*p) {
parent = *p;
- pr = rb_entry(parent, struct ploop_request, lockout_link);
+ if (pb)
+ pr = rb_entry(parent, struct ploop_request, lockout_pb_link);
+ else
+ pr = rb_entry(parent, struct ploop_request, lockout_link);
if (preq->req_cluster == pr->req_cluster) {
if (try)
@@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int try)
trace_add_lockout(preq);
- rb_link_node(&preq->lockout_link, parent, p);
- rb_insert_color(&preq->lockout_link, &plo->lockout_tree);
- __set_bit(PLOOP_REQ_LOCKOUT, &preq->state);
+ rb_link_node(link, parent, p);
+ rb_insert_color(link, tree);
+ __set_bit(lockout_bit, &preq->state);
return 0;
}
+
+int ploop_add_lockout(struct ploop_request *preq, int try)
+{
+ return __ploop_add_lockout(preq, try, false);
+}
EXPORT_SYMBOL(ploop_add_lockout);
-void del_lockout(struct ploop_request *preq)
+static void ploop_add_pb_lockout(struct ploop_request *preq)
+{
+ __ploop_add_lockout(preq, 0, true);
+}
+
+static void __del_lockout(struct ploop_request *preq, bool pb)
{
struct ploop_device * plo = preq->plo;
+ struct rb_node *link;
+ struct rb_root *tree;
+ int lockout_bit;
+
+ if (pb) {
+ link = &preq->lockout_pb_link;
+ tree = &plo->lockout_pb_tree;
+ lockout_bit = PLOOP_REQ_PB_LOCKOUT;
+ } else {
+ link = &preq->lockout_link;
+ tree = &plo->lockout_tree;
+ lockout_bit = PLOOP_REQ_LOCKOUT;
+ }
- if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+ if (!test_and_clear_bit(lockout_bit, &preq->state))
return;
trace_del_lockout(preq);
- rb_erase(&preq->lockout_link, &plo->lockout_tree);
+ rb_erase(link, tree);
+}
+
+void del_lockout(struct ploop_request *preq)
+{
+ __del_lockout(preq, false);
+}
+
+static void del_pb_lockout(struct ploop_request *preq)
+{
+ __del_lockout(preq, true);
}
static void ploop_discard_wakeup(struct ploop_request *preq, int err)
@@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request * preq)
spin_lock_irq(&plo->lock);
del_lockout(preq);
+ del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */
if (!list_empty(&preq->delay_list))
list_splice_init(&preq->delay_list, plo->ready_queue.prev);
@@ -2040,23 +2111,22 @@ restart:
}
/* push_backup special processing */
- if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+ if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
(preq->req_rw & REQ_WRITE) && preq->req_size &&
ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
/* already reported by userspace push_backup */
ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
} else {
- spin_lock_irq(&plo->lock);
- ploop_add_lockout(preq, 0);
- spin_unlock_irq(&plo->lock);
+ /* needn't lock because only ploop_thread accesses */
+ ploop_add_pb_lockout(preq);
/*
* preq IN: preq is in ppb_pending tree waiting for
* out-of-band push_backup processing by userspace ...
*/
return;
}
- } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+ } else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
/*
* preq OUT: out-of-band push_backup processing by
@@ -2064,8 +2134,8 @@ restart:
*/
ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
+ del_pb_lockout(preq);
spin_lock_irq(&plo->lock);
- del_lockout(preq);
if (!list_empty(&preq->delay_list))
list_splice_init(&preq->delay_list, plo->ready_queue.prev);
spin_unlock_irq(&plo->lock);
@@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index)
INIT_LIST_HEAD(&plo->entry_queue);
plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT;
plo->lockout_tree = RB_ROOT;
+ plo->lockout_pb_tree = RB_ROOT;
INIT_LIST_HEAD(&plo->ready_queue);
INIT_LIST_HEAD(&plo->free_list);
init_waitqueue_head(&plo->waitq);
diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
index bc73b72..c22dbde 100644
--- a/drivers/block/ploop/events.h
+++ b/drivers/block/ploop/events.h
@@ -26,6 +26,7 @@
#define PRINT_PREQ_STATE(state) \
__print_flags(state, "|", \
{ 1 << PLOOP_REQ_LOCKOUT, "L"}, \
+ { 1 << PLOOP_REQ_PB_LOCKOUT, "BL"}, \
{ 1 << PLOOP_REQ_SYNC, "S"}, \
{ 1 << PLOOP_REQ_BARRIER, "B"}, \
{ 1 << PLOOP_REQ_UNSTABLE, "U"}, \
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 77fd833..2b63493 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -368,6 +368,7 @@ struct ploop_device
struct list_head ready_queue;
struct rb_root lockout_tree;
+ struct rb_root lockout_pb_tree;
int cluster_log;
int fmt_version;
@@ -453,6 +454,7 @@ struct ploop_device
enum
{
PLOOP_REQ_LOCKOUT, /* This preq is locking overapping requests */
+ PLOOP_REQ_PB_LOCKOUT, /* This preq is locking overlapping WRITEs */
PLOOP_REQ_SYNC,
PLOOP_REQ_BARRIER,
PLOOP_REQ_UNSTABLE,
@@ -574,6 +576,7 @@ struct ploop_request
* until we allocate and initialize block in delta.
*/
struct rb_node lockout_link;
+ struct rb_node lockout_pb_link;
u32 track_cluster;
More information about the Devel
mailing list