[Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine

Dmitry Monakhov dmonakhov at openvz.org
Sat Apr 30 10:00:16 PDT 2016


Maxim Patlasov <mpatlasov at virtuozzo.com> writes:

I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.

For example: fs does
submit_bio(data_b1)
submit_bio(data_b2) 
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but  b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.

Otherwise looks good.

> When ploop state-machine looks at preq first time, it suspends the preq if
> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>
> Later, when userspace reports that out-of-band processing is done, we
> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
> know that given preq is OK and we shouldn't suspend further preq-s for
> given cluster-block anymore.
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c         |   32 +++++++++++++++++++
>  drivers/block/ploop/push_backup.c |   62 +++++++++++++++++++++++++++++++++++++
>  drivers/block/ploop/push_backup.h |    6 ++++
>  include/linux/ploop/ploop.h       |    1 +
>  4 files changed, 101 insertions(+)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2a77d2e..c7cc385 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2021,6 +2021,38 @@ restart:
>  		return;
>  	}
>  
> +	/* push_backup special processing */
> +	if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +	    (preq->req_rw & REQ_WRITE) && preq->req_size &&
> +	    ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
> +		if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
> +			/* already reported by userspace push_backup */
> +			ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +		} else {
> +			spin_lock_irq(&plo->lock);
> +			ploop_add_lockout(preq, 0);
> +			spin_unlock_irq(&plo->lock);
> +			/*
> +			 * preq IN: preq is in ppb_pending tree waiting for
> +			 * out-of-band push_backup processing by userspace ...
> +			 */
> +			return;
> +		}
> +	} else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> +		   test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
> +		/*
> +		 * preq OUT: out-of-band push_backup processing by
> +		 * userspace done; preq was re-scheduled
> +		 */
> +		ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +
> +		spin_lock_irq(&plo->lock);
> +		del_lockout(preq);
> +		if (!list_empty(&preq->delay_list))
> +			list_splice_init(&preq->delay_list, plo->ready_queue.prev);
> +		spin_unlock_irq(&plo->lock);
> +	}
> +
>  	if (plo->trans_map) {
>  		err = ploop_find_trans_map(plo->trans_map, preq);
>  		if (err) {
> diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c
> index 477caf7..488b8fb 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, u64 blk)
>  	do_bit_in_map(map, map_max, blk, SET_BIT);
>  }
>  
> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> +	do_bit_in_map(map, map_max, blk, CLEAR_BIT);
> +}
> +
> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> +	return do_bit_in_map(map, map_max, blk, CHECK_BIT);
> +}
> +
> +/* intentionally lockless */
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> +	BUG_ON(!pbd);
> +	clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
> +/* intentionally lockless */
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> +	if (!pbd)
> +		return false;
> +
> +	return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
>  static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
>  {
>  	struct page **from_map = pbd->cbt_map;
> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request *preq,
>  	rb_insert_color(&preq->reloc_link, tree);
>  }
>  
> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
> +					struct ploop_request *preq)
> +{
> +	ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
> +}
> +
>  static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
>  					 struct ploop_request *preq)
>  {
> @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct ploop_pushbackup_desc *pbd,
>  	return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
>  }
>  
> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
> +			       struct ploop_request *preq)
> +{
> +	BUG_ON(!pbd);
> +
> +	spin_lock(&pbd->ppb_lock);
> +
> +	if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
> +		spin_unlock(&pbd->ppb_lock);
> +		return -EINTR;
> +	}
> +
> +	/* if (preq matches pbd->reported_map) return -EALREADY; */
> +	if (preq->req_cluster < pbd->ppb_offset) {
> +		spin_unlock(&pbd->ppb_lock);
> +		return -EALREADY;
> +	}
> +
> +	ploop_pb_add_req_to_pending(pbd, preq);
> +
> +	if (pbd->ppb_waiting)
> +		complete(&pbd->ppb_comp);
> +
> +	spin_unlock(&pbd->ppb_lock);
> +	return 0;
> +}
> +
>  unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
>  {
>  	if (pbd == NULL)
> @@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>  	else
>  		n_found++;
>  
> +	if (preq)
> +		__set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state);
> +
>  	/*
>  	 * If preq not found above, it's unsolicited report. Then it's
>  	 * enough to have corresponding bit set in reported_map because if
> diff --git a/drivers/block/ploop/push_backup.h b/drivers/block/ploop/push_backup.h
> index 482e070..476ac53 100644
> --- a/drivers/block/ploop/push_backup.h
> +++ b/drivers/block/ploop/push_backup.h
> @@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,
>  			 cluster_t *clu_p, cluster_t *len_p, unsigned n_done);
>  void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>  			   cluster_t clu, cluster_t len);
> +
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
> +
> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
> +			       struct ploop_request *preq);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 09f419d3..762d2fd 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -464,6 +464,7 @@ enum
>  	PLOOP_REQ_FORCE_FLUSH,	/*force flush by engine */
>  	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
>  	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
> +	PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
>  };
>  
>  enum
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160430/af9ea076/attachment-0001.sig>


More information about the Devel mailing list