[Devel] [PATCH RHEL7 COMMIT] ploop: push_backup: rework lockout machinery

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jun 3 05:11:05 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.11
------>
commit ee80c8760dfcb40a3159c7d1ac19b2f73036203d
Author: Maxim Patlasov <mpatlasov at virtuozzo.com>
Date:   Fri Jun 3 16:11:05 2016 +0400

    ploop: push_backup: rework lockout machinery
    
    It was not very nice idea to reuse plo->lockout_tree for push_backup. Because
    by design only one preq (for any given req_cluster) can sit in the lockout
    tree, but while we're reusing the tree for a WRITE request, a READ from
    backup tool may come. Such a READ may want to to use the tree: see how
    map_index_fault calls add_lockout for snapshot configuration.
    
    The patch introduces ad-hoc separate push_backup lockout tree. This fix the
    issue (PSBM-47680) and makes the code much easier to understand.
    
    https://jira.sw.ru/browse/PSBM-47680
    
    Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
 drivers/block/ploop/dev.c    | 111 +++++++++++++++++++++++++++++++++++--------
 drivers/block/ploop/events.h |   1 +
 include/linux/ploop/ploop.h  |   3 ++
 3 files changed, 95 insertions(+), 20 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index d3f0ec0..27827a8 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits)
 	return ret;
 }
 
-static int check_lockout(struct ploop_request *preq)
+static int __check_lockout(struct ploop_request *preq, bool pb)
 {
 	struct ploop_device * plo = preq->plo;
-	struct rb_node * n = plo->lockout_tree.rb_node;
+	struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node :
+				  plo->lockout_tree.rb_node;
 	struct ploop_request * p;
+	int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT;
 
 	if (n == NULL)
 		return 0;
 
-	if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+	if (test_bit(lockout_bit, &preq->state))
 		return 0;
 
 	while (n) {
-		p = rb_entry(n, struct ploop_request, lockout_link);
+		if (pb)
+			p = rb_entry(n, struct ploop_request, lockout_pb_link);
+		else
+			p = rb_entry(n, struct ploop_request, lockout_link);
 
 		if (preq->req_cluster < p->req_cluster)
 			n = n->rb_left;
@@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq)
 	return 0;
 }
 
-int ploop_add_lockout(struct ploop_request *preq, int try)
+static int check_lockout(struct ploop_request *preq)
+{
+	if (__check_lockout(preq, false))
+		return 1;
+
+	/* push_backup passes READs intact */
+	if (!(preq->req_rw & REQ_WRITE))
+		return 0;
+
+	if (__check_lockout(preq, true))
+		return 1;
+
+	return 0;
+}
+
+static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb)
 {
 	struct ploop_device * plo = preq->plo;
-	struct rb_node ** p = &plo->lockout_tree.rb_node;
+	struct rb_node ** p;
 	struct rb_node *parent = NULL;
 	struct ploop_request * pr;
+	struct rb_node *link;
+	struct rb_root *tree;
+	int lockout_bit;
+
+	if (pb) {
+		link = &preq->lockout_pb_link;
+		tree = &plo->lockout_pb_tree;
+		lockout_bit = PLOOP_REQ_PB_LOCKOUT;
+	} else {
+		link = &preq->lockout_link;
+		tree = &plo->lockout_tree;
+		lockout_bit = PLOOP_REQ_LOCKOUT;
+	}
 
-	if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+	if (test_bit(lockout_bit, &preq->state))
 		return 0;
 
+	p = &tree->rb_node;
 	while (*p) {
 		parent = *p;
-		pr = rb_entry(parent, struct ploop_request, lockout_link);
+		if (pb)
+			pr = rb_entry(parent, struct ploop_request, lockout_pb_link);
+		else
+			pr = rb_entry(parent, struct ploop_request, lockout_link);
 
 		if (preq->req_cluster == pr->req_cluster) {
 			if (try)
@@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int try)
 
 	trace_add_lockout(preq);
 
-	rb_link_node(&preq->lockout_link, parent, p);
-	rb_insert_color(&preq->lockout_link, &plo->lockout_tree);
-	__set_bit(PLOOP_REQ_LOCKOUT, &preq->state);
+	rb_link_node(link, parent, p);
+	rb_insert_color(link, tree);
+	__set_bit(lockout_bit, &preq->state);
 	return 0;
 }
+
+int ploop_add_lockout(struct ploop_request *preq, int try)
+{
+	return __ploop_add_lockout(preq, try, false);
+}
 EXPORT_SYMBOL(ploop_add_lockout);
 
-void del_lockout(struct ploop_request *preq)
+static void ploop_add_pb_lockout(struct ploop_request *preq)
+{
+	__ploop_add_lockout(preq, 0, true);
+}
+
+static void __del_lockout(struct ploop_request *preq, bool pb)
 {
 	struct ploop_device * plo = preq->plo;
+	struct rb_node *link;
+	struct rb_root *tree;
+	int lockout_bit;
+
+	if (pb) {
+		link = &preq->lockout_pb_link;
+		tree = &plo->lockout_pb_tree;
+		lockout_bit = PLOOP_REQ_PB_LOCKOUT;
+	} else {
+		link = &preq->lockout_link;
+		tree = &plo->lockout_tree;
+		lockout_bit = PLOOP_REQ_LOCKOUT;
+	}
 
-	if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state))
+	if (!test_and_clear_bit(lockout_bit, &preq->state))
 		return;
 
 	trace_del_lockout(preq);
 
-	rb_erase(&preq->lockout_link, &plo->lockout_tree);
+	rb_erase(link, tree);
+}
+
+void del_lockout(struct ploop_request *preq)
+{
+	__del_lockout(preq, false);
+}
+
+static void del_pb_lockout(struct ploop_request *preq)
+{
+	__del_lockout(preq, true);
 }
 
 static void ploop_discard_wakeup(struct ploop_request *preq, int err)
@@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request * preq)
 	spin_lock_irq(&plo->lock);
 
 	del_lockout(preq);
+	del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */
 
 	if (!list_empty(&preq->delay_list))
 		list_splice_init(&preq->delay_list, plo->ready_queue.prev);
@@ -2040,23 +2111,22 @@ restart:
 	}
 
 	/* push_backup special processing */
-	if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+	if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
 	    (preq->req_rw & REQ_WRITE) && preq->req_size &&
 	    ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
 		if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
 			/* already reported by userspace push_backup */
 			ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
 		} else {
-			spin_lock_irq(&plo->lock);
-			ploop_add_lockout(preq, 0);
-			spin_unlock_irq(&plo->lock);
+			/* needn't lock because only ploop_thread accesses */
+			ploop_add_pb_lockout(preq);
 			/*
 			 * preq IN: preq is in ppb_pending tree waiting for
 			 * out-of-band push_backup processing by userspace ...
 			 */
 			return;
 		}
-	} else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
+	} else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
 		   test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
 		/*
 		 * preq OUT: out-of-band push_backup processing by
@@ -2064,8 +2134,8 @@ restart:
 		 */
 		ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
 
+		del_pb_lockout(preq);
 		spin_lock_irq(&plo->lock);
-		del_lockout(preq);
 		if (!list_empty(&preq->delay_list))
 			list_splice_init(&preq->delay_list, plo->ready_queue.prev);
 		spin_unlock_irq(&plo->lock);
@@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index)
 	INIT_LIST_HEAD(&plo->entry_queue);
 	plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT;
 	plo->lockout_tree = RB_ROOT;
+	plo->lockout_pb_tree = RB_ROOT;
 	INIT_LIST_HEAD(&plo->ready_queue);
 	INIT_LIST_HEAD(&plo->free_list);
 	init_waitqueue_head(&plo->waitq);
diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
index bc73b72..c22dbde 100644
--- a/drivers/block/ploop/events.h
+++ b/drivers/block/ploop/events.h
@@ -26,6 +26,7 @@
 #define PRINT_PREQ_STATE(state)					\
 			__print_flags(state, "|",		\
 			{ 1 << PLOOP_REQ_LOCKOUT,	"L"},	\
+			{ 1 << PLOOP_REQ_PB_LOCKOUT,	"BL"},	\
 			{ 1 << PLOOP_REQ_SYNC,		"S"},	\
 			{ 1 << PLOOP_REQ_BARRIER,	"B"},	\
 			{ 1 << PLOOP_REQ_UNSTABLE,	"U"},	\
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 77fd833..2b63493 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -368,6 +368,7 @@ struct ploop_device
 	struct list_head	ready_queue;
 
 	struct rb_root		lockout_tree;
+	struct rb_root		lockout_pb_tree;
 
 	int			cluster_log;
 	int			fmt_version;
@@ -453,6 +454,7 @@ struct ploop_device
 enum
 {
 	PLOOP_REQ_LOCKOUT,	/* This preq is locking overapping requests */
+	PLOOP_REQ_PB_LOCKOUT,	/* This preq is locking overlapping WRITEs */
 	PLOOP_REQ_SYNC,
 	PLOOP_REQ_BARRIER,
 	PLOOP_REQ_UNSTABLE,
@@ -574,6 +576,7 @@ struct ploop_request
 	 * until we allocate and initialize block in delta.
 	 */
 	struct rb_node		lockout_link;
+	struct rb_node		lockout_pb_link;
 
 	u32			track_cluster;
 


More information about the Devel mailing list