[Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine

Maxim Patlasov mpatlasov at virtuozzo.com
Mon May 9 16:53:20 PDT 2016


On 04/30/2016 10:00 AM, Dmitry Monakhov wrote:
> Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
>
> I can not avoid obsession that this request joggling fully destroys FS
> barriers assumptions.
>
> For example: fs does
> submit_bio(data_b1)
> submit_bio(data_b2)
> submit_bio(commit_b3, FLUSH|FUA) journal commit record
> wait_for_bio(commit_b3)
> But there is no guaranee that data_b1 and data_b2 was completed already.
> They can be in pedned list. In case of power-loss we have good commit
> record which reference b1 and b2, but  b1 and b2 was not flushed,
> which result expose of unitialized data.
> In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
> ext4 will works fine.

Any code assuming that completion of FLUSH|FUA guarantees something 
about already-submitted-but-not-yet-completed bio-s is broken.

>
> Otherwise looks good.
>
>> When ploop state-machine looks at preq first time, it suspends the preq if
>> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
>> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>>
>> Later, when userspace reports that out-of-band processing is done, we
>> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
>> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
>> know that given preq is OK and we shouldn't suspend further preq-s for
>> given cluster-block anymore.
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>> ---
>>   drivers/block/ploop/dev.c         |   32 +++++++++++++++++++
>>   drivers/block/ploop/push_backup.c |   62 +++++++++++++++++++++++++++++++++++++
>>   drivers/block/ploop/push_backup.h |    6 ++++
>>   include/linux/ploop/ploop.h       |    1 +
>>   4 files changed, 101 insertions(+)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index 2a77d2e..c7cc385 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -2021,6 +2021,38 @@ restart:
>>   		return;
>>   	}
>>   
>> +	/* push_backup special processing */
>> +	if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
>> +	    (preq->req_rw & REQ_WRITE) && preq->req_size &&
>> +	    ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
>> +		if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
>> +			/* already reported by userspace push_backup */
>> +			ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>> +		} else {
>> +			spin_lock_irq(&plo->lock);
>> +			ploop_add_lockout(preq, 0);
>> +			spin_unlock_irq(&plo->lock);
>> +			/*
>> +			 * preq IN: preq is in ppb_pending tree waiting for
>> +			 * out-of-band push_backup processing by userspace ...
>> +			 */
>> +			return;
>> +		}
>> +	} else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
>> +		   test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
>> +		/*
>> +		 * preq OUT: out-of-band push_backup processing by
>> +		 * userspace done; preq was re-scheduled
>> +		 */
>> +		ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>> +
>> +		spin_lock_irq(&plo->lock);
>> +		del_lockout(preq);
>> +		if (!list_empty(&preq->delay_list))
>> +			list_splice_init(&preq->delay_list, plo->ready_queue.prev);
>> +		spin_unlock_irq(&plo->lock);
>> +	}
>> +
>>   	if (plo->trans_map) {
>>   		err = ploop_find_trans_map(plo->trans_map, preq);
>>   		if (err) {
>> diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c
>> index 477caf7..488b8fb 100644
>> --- a/drivers/block/ploop/push_backup.c
>> +++ b/drivers/block/ploop/push_backup.c
>> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, u64 blk)
>>   	do_bit_in_map(map, map_max, blk, SET_BIT);
>>   }
>>   
>> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
>> +{
>> +	do_bit_in_map(map, map_max, blk, CLEAR_BIT);
>> +}
>> +
>> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
>> +{
>> +	return do_bit_in_map(map, map_max, blk, CHECK_BIT);
>> +}
>> +
>> +/* intentionally lockless */
>> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
>> +{
>> +	BUG_ON(!pbd);
>> +	clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
>> +}
>> +
>> +/* intentionally lockless */
>> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
>> +{
>> +	if (!pbd)
>> +		return false;
>> +
>> +	return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
>> +}
>> +
>>   static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
>>   {
>>   	struct page **from_map = pbd->cbt_map;
>> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request *preq,
>>   	rb_insert_color(&preq->reloc_link, tree);
>>   }
>>   
>> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
>> +					struct ploop_request *preq)
>> +{
>> +	ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
>> +}
>> +
>>   static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
>>   					 struct ploop_request *preq)
>>   {
>> @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct ploop_pushbackup_desc *pbd,
>>   	return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
>>   }
>>   
>> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
>> +			       struct ploop_request *preq)
>> +{
>> +	BUG_ON(!pbd);
>> +
>> +	spin_lock(&pbd->ppb_lock);
>> +
>> +	if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
>> +		spin_unlock(&pbd->ppb_lock);
>> +		return -EINTR;
>> +	}
>> +
>> +	/* if (preq matches pbd->reported_map) return -EALREADY; */
>> +	if (preq->req_cluster < pbd->ppb_offset) {
>> +		spin_unlock(&pbd->ppb_lock);
>> +		return -EALREADY;
>> +	}
>> +
>> +	ploop_pb_add_req_to_pending(pbd, preq);
>> +
>> +	if (pbd->ppb_waiting)
>> +		complete(&pbd->ppb_comp);
>> +
>> +	spin_unlock(&pbd->ppb_lock);
>> +	return 0;
>> +}
>> +
>>   unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
>>   {
>>   	if (pbd == NULL)
>> @@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>>   	else
>>   		n_found++;
>>   
>> +	if (preq)
>> +		__set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state);
>> +
>>   	/*
>>   	 * If preq not found above, it's unsolicited report. Then it's
>>   	 * enough to have corresponding bit set in reported_map because if
>> diff --git a/drivers/block/ploop/push_backup.h b/drivers/block/ploop/push_backup.h
>> index 482e070..476ac53 100644
>> --- a/drivers/block/ploop/push_backup.h
>> +++ b/drivers/block/ploop/push_backup.h
>> @@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,
>>   			 cluster_t *clu_p, cluster_t *len_p, unsigned n_done);
>>   void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>>   			   cluster_t clu, cluster_t len);
>> +
>> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
>> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
>> +
>> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
>> +			       struct ploop_request *preq);
>> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
>> index 09f419d3..762d2fd 100644
>> --- a/include/linux/ploop/ploop.h
>> +++ b/include/linux/ploop/ploop.h
>> @@ -464,6 +464,7 @@ enum
>>   	PLOOP_REQ_FORCE_FLUSH,	/*force flush by engine */
>>   	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
>>   	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>> +	PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
>>   };
>>   
>>   enum



More information about the Devel mailing list