[Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine
Maxim Patlasov
mpatlasov at virtuozzo.com
Mon May 9 16:53:20 PDT 2016
On 04/30/2016 10:00 AM, Dmitry Monakhov wrote:
> Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
>
> I can not avoid obsession that this request joggling fully destroys FS
> barriers assumptions.
>
> For example: fs does
> submit_bio(data_b1)
> submit_bio(data_b2)
> submit_bio(commit_b3, FLUSH|FUA) journal commit record
> wait_for_bio(commit_b3)
> But there is no guaranee that data_b1 and data_b2 was completed already.
> They can be in pedned list. In case of power-loss we have good commit
> record which reference b1 and b2, but b1 and b2 was not flushed,
> which result expose of unitialized data.
> In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
> ext4 will works fine.
Any code assuming that completion of FLUSH|FUA guarantees something
about already-submitted-but-not-yet-completed bio-s is broken.
>
> Otherwise looks good.
>
>> When ploop state-machine looks at preq first time, it suspends the preq if
>> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
>> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>>
>> Later, when userspace reports that out-of-band processing is done, we
>> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
>> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
>> know that given preq is OK and we shouldn't suspend further preq-s for
>> given cluster-block anymore.
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>> ---
>> drivers/block/ploop/dev.c | 32 +++++++++++++++++++
>> drivers/block/ploop/push_backup.c | 62 +++++++++++++++++++++++++++++++++++++
>> drivers/block/ploop/push_backup.h | 6 ++++
>> include/linux/ploop/ploop.h | 1 +
>> 4 files changed, 101 insertions(+)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index 2a77d2e..c7cc385 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -2021,6 +2021,38 @@ restart:
>> return;
>> }
>>
>> + /* push_backup special processing */
>> + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
>> + (preq->req_rw & REQ_WRITE) && preq->req_size &&
>> + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
>> + if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
>> + /* already reported by userspace push_backup */
>> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>> + } else {
>> + spin_lock_irq(&plo->lock);
>> + ploop_add_lockout(preq, 0);
>> + spin_unlock_irq(&plo->lock);
>> + /*
>> + * preq IN: preq is in ppb_pending tree waiting for
>> + * out-of-band push_backup processing by userspace ...
>> + */
>> + return;
>> + }
>> + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
>> + test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
>> + /*
>> + * preq OUT: out-of-band push_backup processing by
>> + * userspace done; preq was re-scheduled
>> + */
>> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
>> +
>> + spin_lock_irq(&plo->lock);
>> + del_lockout(preq);
>> + if (!list_empty(&preq->delay_list))
>> + list_splice_init(&preq->delay_list, plo->ready_queue.prev);
>> + spin_unlock_irq(&plo->lock);
>> + }
>> +
>> if (plo->trans_map) {
>> err = ploop_find_trans_map(plo->trans_map, preq);
>> if (err) {
>> diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c
>> index 477caf7..488b8fb 100644
>> --- a/drivers/block/ploop/push_backup.c
>> +++ b/drivers/block/ploop/push_backup.c
>> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, u64 blk)
>> do_bit_in_map(map, map_max, blk, SET_BIT);
>> }
>>
>> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
>> +{
>> + do_bit_in_map(map, map_max, blk, CLEAR_BIT);
>> +}
>> +
>> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
>> +{
>> + return do_bit_in_map(map, map_max, blk, CHECK_BIT);
>> +}
>> +
>> +/* intentionally lockless */
>> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
>> +{
>> + BUG_ON(!pbd);
>> + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
>> +}
>> +
>> +/* intentionally lockless */
>> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
>> +{
>> + if (!pbd)
>> + return false;
>> +
>> + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
>> +}
>> +
>> static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
>> {
>> struct page **from_map = pbd->cbt_map;
>> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request *preq,
>> rb_insert_color(&preq->reloc_link, tree);
>> }
>>
>> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
>> + struct ploop_request *preq)
>> +{
>> + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
>> +}
>> +
>> static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
>> struct ploop_request *preq)
>> {
>> @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct ploop_pushbackup_desc *pbd,
>> return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
>> }
>>
>> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
>> + struct ploop_request *preq)
>> +{
>> + BUG_ON(!pbd);
>> +
>> + spin_lock(&pbd->ppb_lock);
>> +
>> + if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
>> + spin_unlock(&pbd->ppb_lock);
>> + return -EINTR;
>> + }
>> +
>> + /* if (preq matches pbd->reported_map) return -EALREADY; */
>> + if (preq->req_cluster < pbd->ppb_offset) {
>> + spin_unlock(&pbd->ppb_lock);
>> + return -EALREADY;
>> + }
>> +
>> + ploop_pb_add_req_to_pending(pbd, preq);
>> +
>> + if (pbd->ppb_waiting)
>> + complete(&pbd->ppb_comp);
>> +
>> + spin_unlock(&pbd->ppb_lock);
>> + return 0;
>> +}
>> +
>> unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
>> {
>> if (pbd == NULL)
>> @@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>> else
>> n_found++;
>>
>> + if (preq)
>> + __set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state);
>> +
>> /*
>> * If preq not found above, it's unsolicited report. Then it's
>> * enough to have corresponding bit set in reported_map because if
>> diff --git a/drivers/block/ploop/push_backup.h b/drivers/block/ploop/push_backup.h
>> index 482e070..476ac53 100644
>> --- a/drivers/block/ploop/push_backup.h
>> +++ b/drivers/block/ploop/push_backup.h
>> @@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,
>> cluster_t *clu_p, cluster_t *len_p, unsigned n_done);
>> void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
>> cluster_t clu, cluster_t len);
>> +
>> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
>> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
>> +
>> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
>> + struct ploop_request *preq);
>> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
>> index 09f419d3..762d2fd 100644
>> --- a/include/linux/ploop/ploop.h
>> +++ b/include/linux/ploop/ploop.h
>> @@ -464,6 +464,7 @@ enum
>> PLOOP_REQ_FORCE_FLUSH, /*force flush by engine */
>> PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
>> PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>> + PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
>> };
>>
>> enum
More information about the Devel
mailing list