[Devel] [vzlin-dev] [PATCH rh7 2/2] ploop: push_backup: rework lockout machinery

Dmitry Monakhov dmonakhov at openvz.org
Wed Jun 8 06:53:08 PDT 2016


Maxim Patlasov <mpatlasov at virtuozzo.com> writes:

> It was not very nice idea to reuse plo->lockout_tree for push_backup. Because
> by design only one preq (for any given req_cluster) can sit in the lockout
> tree, but while we're reusing the tree for a WRITE request, a READ from
> backup tool may come. Such a READ may want to to use the tree: see how
> map_index_fault calls add_lockout for snapshot configuration.
>
> The patch introduces ad-hoc separate push_backup lockout tree. This fix the
> issue (PSBM-47680) and makes the code much easier to understand.
>
> https://jira.sw.ru/browse/PSBM-47680
ACK
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c    |  111 ++++++++++++++++++++++++++++++++++--------
>  drivers/block/ploop/events.h |    1 
>  include/linux/ploop/ploop.h  |    3 +
>  3 files changed, 95 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index d3f0ec0..27827a8 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits)
>  	return ret;
>  }
>  
> -static int check_lockout(struct ploop_request *preq)
> +static int __check_lockout(struct ploop_request *preq, bool pb)
>  {
>  	struct ploop_device * plo = preq->plo;
> -	struct rb_node * n = plo->lockout_tree.rb_node;
> +	struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node :
> +				  plo->lockout_tree.rb_node;
>  	struct ploop_request * p;
> +	int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT;
>  
>  	if (n == NULL)
>  		return 0;
>  
> -	if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> +	if (test_bit(lockout_bit, &preq->state))
>  		return 0;
>  
>  	while (n) {
> -		p = rb_entry(n, struct ploop_request, lockout_link);
> +		if (pb)
> +			p = rb_entry(n, struct ploop_request, lockout_pb_link);
> +		else
> +			p = rb_entry(n, struct ploop_request, lockout_link);
>  
>  		if (preq->req_cluster < p->req_cluster)
>  			n = n->rb_left;
> @@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq)
>  	return 0;
>  }
>  
> -int ploop_add_lockout(struct ploop_request *preq, int try)
> +static int check_lockout(struct ploop_request *preq)
> +{
> +	if (__check_lockout(preq, false))
> +		return 1;
> +
> +	/* push_backup passes READs intact */
> +	if (!(preq->req_rw & REQ_WRITE))
> +		return 0;
> +
> +	if (__check_lockout(preq, true))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb)
>  {
>  	struct ploop_device * plo = preq->plo;
> -	struct rb_node ** p = &plo->lockout_tree.rb_node;
> +	struct rb_node ** p;
>  	struct rb_node *parent = NULL;
>  	struct ploop_request * pr;
> +	struct rb_node *link;
> +	struct rb_root *tree;
> +	int lockout_bit;
> +
> +	if (pb) {
> +		link = &preq->lockout_pb_link;
> +		tree = &plo->lockout_pb_tree;
> +		lockout_bit = PLOOP_REQ_PB_LOCKOUT;
> +	} else {
> +		link = &preq->lockout_link;
> +		tree = &plo->lockout_tree;
> +		lockout_bit = PLOOP_REQ_LOCKOUT;
> +	}
>  
> -	if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> +	if (test_bit(lockout_bit, &preq->state))
>  		return 0;
>  
> +	p = &tree->rb_node;
>  	while (*p) {
>  		parent = *p;
> -		pr = rb_entry(parent, struct ploop_request, lockout_link);
> +		if (pb)
> +			pr = rb_entry(parent, struct ploop_request, lockout_pb_link);
> +		else
> +			pr = rb_entry(parent, struct ploop_request, lockout_link);
>  
>  		if (preq->req_cluster == pr->req_cluster) {
>  			if (try)
> @@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int try)
>  
>  	trace_add_lockout(preq);
>  
> -	rb_link_node(&preq->lockout_link, parent, p);
> -	rb_insert_color(&preq->lockout_link, &plo->lockout_tree);
> -	__set_bit(PLOOP_REQ_LOCKOUT, &preq->state);
> +	rb_link_node(link, parent, p);
> +	rb_insert_color(link, tree);
> +	__set_bit(lockout_bit, &preq->state);
>  	return 0;
>  }
> +
> +int ploop_add_lockout(struct ploop_request *preq, int try)
> +{
> +	return __ploop_add_lockout(preq, try, false);
> +}
>  EXPORT_SYMBOL(ploop_add_lockout);
>  
> -void del_lockout(struct ploop_request *preq)
> +static void ploop_add_pb_lockout(struct ploop_request *preq)
> +{
> +	__ploop_add_lockout(preq, 0, true);
> +}
> +
> +static void __del_lockout(struct ploop_request *preq, bool pb)
>  {
>  	struct ploop_device * plo = preq->plo;
> +	struct rb_node *link;
> +	struct rb_root *tree;
> +	int lockout_bit;
> +
> +	if (pb) {
> +		link = &preq->lockout_pb_link;
> +		tree = &plo->lockout_pb_tree;
> +		lockout_bit = PLOOP_REQ_PB_LOCKOUT;
> +	} else {
> +		link = &preq->lockout_link;
> +		tree = &plo->lockout_tree;
> +		lockout_bit = PLOOP_REQ_LOCKOUT;
> +	}
>  
> -	if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> +	if (!test_and_clear_bit(lockout_bit, &preq->state))
>  		return;
>  
>  	trace_del_lockout(preq);
>  
> -	rb_erase(&preq->lockout_link, &plo->lockout_tree);
> +	rb_erase(link, tree);
> +}
> +
> +void del_lockout(struct ploop_request *preq)
> +{
> +	__del_lockout(preq, false);
> +}
> +
> +static void del_pb_lockout(struct ploop_request *preq)
> +{
> +	__del_lockout(preq, true);
>  }
>  
>  static void ploop_discard_wakeup(struct ploop_request *preq, int err)
> @@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request * preq)
>  	spin_lock_irq(&plo->lock);
>  
>  	del_lockout(preq);
> +	del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */
>  
>  	if (!list_empty(&preq->delay_list))
>  		list_splice_init(&preq->delay_list, plo->ready_queue.prev);
> @@ -2040,23 +2111,22 @@ restart:
>  	}
>  
>  	/* push_backup special processing */
> -	if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +	if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
>  	    (preq->req_rw & REQ_WRITE) && preq->req_size &&
>  	    ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
>  		if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
>  			/* already reported by userspace push_backup */
>  			ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>  		} else {
> -			spin_lock_irq(&plo->lock);
> -			ploop_add_lockout(preq, 0);
> -			spin_unlock_irq(&plo->lock);
> +			/* needn't lock because only ploop_thread accesses */
> +			ploop_add_pb_lockout(preq);
>  			/*
>  			 * preq IN: preq is in ppb_pending tree waiting for
>  			 * out-of-band push_backup processing by userspace ...
>  			 */
>  			return;
>  		}
> -	} else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +	} else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
>  		   test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
>  		/*
>  		 * preq OUT: out-of-band push_backup processing by
> @@ -2064,8 +2134,8 @@ restart:
>  		 */
>  		ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>  
> +		del_pb_lockout(preq);
>  		spin_lock_irq(&plo->lock);
> -		del_lockout(preq);
>  		if (!list_empty(&preq->delay_list))
>  			list_splice_init(&preq->delay_list, plo->ready_queue.prev);
>  		spin_unlock_irq(&plo->lock);
> @@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index)
>  	INIT_LIST_HEAD(&plo->entry_queue);
>  	plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT;
>  	plo->lockout_tree = RB_ROOT;
> +	plo->lockout_pb_tree = RB_ROOT;
>  	INIT_LIST_HEAD(&plo->ready_queue);
>  	INIT_LIST_HEAD(&plo->free_list);
>  	init_waitqueue_head(&plo->waitq);
> diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
> index bc73b72..c22dbde 100644
> --- a/drivers/block/ploop/events.h
> +++ b/drivers/block/ploop/events.h
> @@ -26,6 +26,7 @@
>  #define PRINT_PREQ_STATE(state)					\
>  			__print_flags(state, "|",		\
>  			{ 1 << PLOOP_REQ_LOCKOUT,	"L"},	\
> +			{ 1 << PLOOP_REQ_PB_LOCKOUT,	"BL"},	\
>  			{ 1 << PLOOP_REQ_SYNC,		"S"},	\
>  			{ 1 << PLOOP_REQ_BARRIER,	"B"},	\
>  			{ 1 << PLOOP_REQ_UNSTABLE,	"U"},	\
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 77fd833..2b63493 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -368,6 +368,7 @@ struct ploop_device
>  	struct list_head	ready_queue;
>  
>  	struct rb_root		lockout_tree;
> +	struct rb_root		lockout_pb_tree;
>  
>  	int			cluster_log;
>  	int			fmt_version;
> @@ -453,6 +454,7 @@ struct ploop_device
>  enum
>  {
>  	PLOOP_REQ_LOCKOUT,	/* This preq is locking overapping requests */
> +	PLOOP_REQ_PB_LOCKOUT,	/* This preq is locking overlapping WRITEs */
>  	PLOOP_REQ_SYNC,
>  	PLOOP_REQ_BARRIER,
>  	PLOOP_REQ_UNSTABLE,
> @@ -574,6 +576,7 @@ struct ploop_request
>  	 * until we allocate and initialize block in delta.
>  	 */
>  	struct rb_node		lockout_link;
> +	struct rb_node		lockout_pb_link;
>  
>  	u32			track_cluster;
>  
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160608/849d2d5f/attachment.sig>


More information about the Devel mailing list