[Devel] [vzlin-dev] [PATCH rh7 2/2] ploop: push_backup: rework lockout machinery
Dmitry Monakhov
dmonakhov at openvz.org
Wed Jun 8 06:53:08 PDT 2016
Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
> It was not very nice idea to reuse plo->lockout_tree for push_backup. Because
> by design only one preq (for any given req_cluster) can sit in the lockout
> tree, but while we're reusing the tree for a WRITE request, a READ from
> backup tool may come. Such a READ may want to to use the tree: see how
> map_index_fault calls add_lockout for snapshot configuration.
>
> The patch introduces ad-hoc separate push_backup lockout tree. This fix the
> issue (PSBM-47680) and makes the code much easier to understand.
>
> https://jira.sw.ru/browse/PSBM-47680
ACK
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
> drivers/block/ploop/dev.c | 111 ++++++++++++++++++++++++++++++++++--------
> drivers/block/ploop/events.h | 1
> include/linux/ploop/ploop.h | 3 +
> 3 files changed, 95 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index d3f0ec0..27827a8 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -1117,20 +1117,25 @@ static int ploop_congested(void *data, int bits)
> return ret;
> }
>
> -static int check_lockout(struct ploop_request *preq)
> +static int __check_lockout(struct ploop_request *preq, bool pb)
> {
> struct ploop_device * plo = preq->plo;
> - struct rb_node * n = plo->lockout_tree.rb_node;
> + struct rb_node * n = pb ? plo->lockout_pb_tree.rb_node :
> + plo->lockout_tree.rb_node;
> struct ploop_request * p;
> + int lockout_bit = pb ? PLOOP_REQ_PB_LOCKOUT : PLOOP_REQ_LOCKOUT;
>
> if (n == NULL)
> return 0;
>
> - if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> + if (test_bit(lockout_bit, &preq->state))
> return 0;
>
> while (n) {
> - p = rb_entry(n, struct ploop_request, lockout_link);
> + if (pb)
> + p = rb_entry(n, struct ploop_request, lockout_pb_link);
> + else
> + p = rb_entry(n, struct ploop_request, lockout_link);
>
> if (preq->req_cluster < p->req_cluster)
> n = n->rb_left;
> @@ -1146,19 +1151,51 @@ static int check_lockout(struct ploop_request *preq)
> return 0;
> }
>
> -int ploop_add_lockout(struct ploop_request *preq, int try)
> +static int check_lockout(struct ploop_request *preq)
> +{
> + if (__check_lockout(preq, false))
> + return 1;
> +
> + /* push_backup passes READs intact */
> + if (!(preq->req_rw & REQ_WRITE))
> + return 0;
> +
> + if (__check_lockout(preq, true))
> + return 1;
> +
> + return 0;
> +}
> +
> +static int __ploop_add_lockout(struct ploop_request *preq, int try, bool pb)
> {
> struct ploop_device * plo = preq->plo;
> - struct rb_node ** p = &plo->lockout_tree.rb_node;
> + struct rb_node ** p;
> struct rb_node *parent = NULL;
> struct ploop_request * pr;
> + struct rb_node *link;
> + struct rb_root *tree;
> + int lockout_bit;
> +
> + if (pb) {
> + link = &preq->lockout_pb_link;
> + tree = &plo->lockout_pb_tree;
> + lockout_bit = PLOOP_REQ_PB_LOCKOUT;
> + } else {
> + link = &preq->lockout_link;
> + tree = &plo->lockout_tree;
> + lockout_bit = PLOOP_REQ_LOCKOUT;
> + }
>
> - if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> + if (test_bit(lockout_bit, &preq->state))
> return 0;
>
> + p = &tree->rb_node;
> while (*p) {
> parent = *p;
> - pr = rb_entry(parent, struct ploop_request, lockout_link);
> + if (pb)
> + pr = rb_entry(parent, struct ploop_request, lockout_pb_link);
> + else
> + pr = rb_entry(parent, struct ploop_request, lockout_link);
>
> if (preq->req_cluster == pr->req_cluster) {
> if (try)
> @@ -1174,23 +1211,56 @@ int ploop_add_lockout(struct ploop_request *preq, int try)
>
> trace_add_lockout(preq);
>
> - rb_link_node(&preq->lockout_link, parent, p);
> - rb_insert_color(&preq->lockout_link, &plo->lockout_tree);
> - __set_bit(PLOOP_REQ_LOCKOUT, &preq->state);
> + rb_link_node(link, parent, p);
> + rb_insert_color(link, tree);
> + __set_bit(lockout_bit, &preq->state);
> return 0;
> }
> +
> +int ploop_add_lockout(struct ploop_request *preq, int try)
> +{
> + return __ploop_add_lockout(preq, try, false);
> +}
> EXPORT_SYMBOL(ploop_add_lockout);
>
> -void del_lockout(struct ploop_request *preq)
> +static void ploop_add_pb_lockout(struct ploop_request *preq)
> +{
> + __ploop_add_lockout(preq, 0, true);
> +}
> +
> +static void __del_lockout(struct ploop_request *preq, bool pb)
> {
> struct ploop_device * plo = preq->plo;
> + struct rb_node *link;
> + struct rb_root *tree;
> + int lockout_bit;
> +
> + if (pb) {
> + link = &preq->lockout_pb_link;
> + tree = &plo->lockout_pb_tree;
> + lockout_bit = PLOOP_REQ_PB_LOCKOUT;
> + } else {
> + link = &preq->lockout_link;
> + tree = &plo->lockout_tree;
> + lockout_bit = PLOOP_REQ_LOCKOUT;
> + }
>
> - if (!test_and_clear_bit(PLOOP_REQ_LOCKOUT, &preq->state))
> + if (!test_and_clear_bit(lockout_bit, &preq->state))
> return;
>
> trace_del_lockout(preq);
>
> - rb_erase(&preq->lockout_link, &plo->lockout_tree);
> + rb_erase(link, tree);
> +}
> +
> +void del_lockout(struct ploop_request *preq)
> +{
> + __del_lockout(preq, false);
> +}
> +
> +static void del_pb_lockout(struct ploop_request *preq)
> +{
> + __del_lockout(preq, true);
> }
>
> static void ploop_discard_wakeup(struct ploop_request *preq, int err)
> @@ -1284,6 +1354,7 @@ static void ploop_complete_request(struct ploop_request * preq)
> spin_lock_irq(&plo->lock);
>
> del_lockout(preq);
> + del_pb_lockout(preq); /* preq may die via ploop_fail_immediate() */
>
> if (!list_empty(&preq->delay_list))
> list_splice_init(&preq->delay_list, plo->ready_queue.prev);
> @@ -2040,23 +2111,22 @@ restart:
> }
>
> /* push_backup special processing */
> - if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> + if (!test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
> (preq->req_rw & REQ_WRITE) && preq->req_size &&
> ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
> if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
> /* already reported by userspace push_backup */
> ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> } else {
> - spin_lock_irq(&plo->lock);
> - ploop_add_lockout(preq, 0);
> - spin_unlock_irq(&plo->lock);
> + /* needn't lock because only ploop_thread accesses */
> + ploop_add_pb_lockout(preq);
> /*
> * preq IN: preq is in ppb_pending tree waiting for
> * out-of-band push_backup processing by userspace ...
> */
> return;
> }
> - } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> + } else if (test_bit(PLOOP_REQ_PB_LOCKOUT, &preq->state) &&
> test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
> /*
> * preq OUT: out-of-band push_backup processing by
> @@ -2064,8 +2134,8 @@ restart:
> */
> ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>
> + del_pb_lockout(preq);
> spin_lock_irq(&plo->lock);
> - del_lockout(preq);
> if (!list_empty(&preq->delay_list))
> list_splice_init(&preq->delay_list, plo->ready_queue.prev);
> spin_unlock_irq(&plo->lock);
> @@ -4894,6 +4964,7 @@ static struct ploop_device *__ploop_dev_alloc(int index)
> INIT_LIST_HEAD(&plo->entry_queue);
> plo->entry_tree[0] = plo->entry_tree[1] = RB_ROOT;
> plo->lockout_tree = RB_ROOT;
> + plo->lockout_pb_tree = RB_ROOT;
> INIT_LIST_HEAD(&plo->ready_queue);
> INIT_LIST_HEAD(&plo->free_list);
> init_waitqueue_head(&plo->waitq);
> diff --git a/drivers/block/ploop/events.h b/drivers/block/ploop/events.h
> index bc73b72..c22dbde 100644
> --- a/drivers/block/ploop/events.h
> +++ b/drivers/block/ploop/events.h
> @@ -26,6 +26,7 @@
> #define PRINT_PREQ_STATE(state) \
> __print_flags(state, "|", \
> { 1 << PLOOP_REQ_LOCKOUT, "L"}, \
> + { 1 << PLOOP_REQ_PB_LOCKOUT, "BL"}, \
> { 1 << PLOOP_REQ_SYNC, "S"}, \
> { 1 << PLOOP_REQ_BARRIER, "B"}, \
> { 1 << PLOOP_REQ_UNSTABLE, "U"}, \
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 77fd833..2b63493 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -368,6 +368,7 @@ struct ploop_device
> struct list_head ready_queue;
>
> struct rb_root lockout_tree;
> + struct rb_root lockout_pb_tree;
>
> int cluster_log;
> int fmt_version;
> @@ -453,6 +454,7 @@ struct ploop_device
> enum
> {
> PLOOP_REQ_LOCKOUT, /* This preq is locking overapping requests */
> + PLOOP_REQ_PB_LOCKOUT, /* This preq is locking overlapping WRITEs */
> PLOOP_REQ_SYNC,
> PLOOP_REQ_BARRIER,
> PLOOP_REQ_UNSTABLE,
> @@ -574,6 +576,7 @@ struct ploop_request
> * until we allocate and initialize block in delta.
> */
> struct rb_node lockout_link;
> + struct rb_node lockout_pb_link;
>
> u32 track_cluster;
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160608/849d2d5f/attachment.sig>
More information about the Devel
mailing list