[Devel] [PATCH rh7 3/4] ploop: wire push_backup into state-machine
Dmitry Monakhov
dmonakhov at openvz.org
Sat Apr 30 10:00:16 PDT 2016
Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
I can not avoid obsession that this request joggling fully destroys FS
barriers assumptions.
For example: fs does
submit_bio(data_b1)
submit_bio(data_b2)
submit_bio(commit_b3, FLUSH|FUA) journal commit record
wait_for_bio(commit_b3)
But there is no guaranee that data_b1 and data_b2 was completed already.
They can be in pedned list. In case of power-loss we have good commit
record which reference b1 and b2, but b1 and b2 was not flushed,
which result expose of unitialized data.
In fact ext4/jbd2 will wait b1 and b2 first and only after that it will b3 so
ext4 will works fine.
Otherwise looks good.
> When ploop state-machine looks at preq first time, it suspends the preq if
> its cluster-block matches pbd->ppb_map -- the copy of CBT mask initially.
> To suspend preq we simply put it to pbd->pending_tree and plo->lockout_tree.
>
> Later, when userspace reports that out-of-band processing is done, we
> set PLOOP_REQ_PUSH_BACKUP bit in preq->state, re-schedule the preq and
> wakeup ploop state-machine. This PLOOP_REQ_PUSH_BACKUP bit lets state-machine
> know that given preq is OK and we shouldn't suspend further preq-s for
> given cluster-block anymore.
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
> drivers/block/ploop/dev.c | 32 +++++++++++++++++++
> drivers/block/ploop/push_backup.c | 62 +++++++++++++++++++++++++++++++++++++
> drivers/block/ploop/push_backup.h | 6 ++++
> include/linux/ploop/ploop.h | 1 +
> 4 files changed, 101 insertions(+)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2a77d2e..c7cc385 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2021,6 +2021,38 @@ restart:
> return;
> }
>
> + /* push_backup special processing */
> + if (!test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> + (preq->req_rw & REQ_WRITE) && preq->req_size &&
> + ploop_pb_check_bit(plo->pbd, preq->req_cluster)) {
> + if (ploop_pb_preq_add_pending(plo->pbd, preq)) {
> + /* already reported by userspace push_backup */
> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> + } else {
> + spin_lock_irq(&plo->lock);
> + ploop_add_lockout(preq, 0);
> + spin_unlock_irq(&plo->lock);
> + /*
> + * preq IN: preq is in ppb_pending tree waiting for
> + * out-of-band push_backup processing by userspace ...
> + */
> + return;
> + }
> + } else if (test_bit(PLOOP_REQ_LOCKOUT, &preq->state) &&
> + test_and_clear_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state)) {
> + /*
> + * preq OUT: out-of-band push_backup processing by
> + * userspace done; preq was re-scheduled
> + */
> + ploop_pb_clear_bit(plo->pbd, preq->req_cluster);
> +
> + spin_lock_irq(&plo->lock);
> + del_lockout(preq);
> + if (!list_empty(&preq->delay_list))
> + list_splice_init(&preq->delay_list, plo->ready_queue.prev);
> + spin_unlock_irq(&plo->lock);
> + }
> +
> if (plo->trans_map) {
> err = ploop_find_trans_map(plo->trans_map, preq);
> if (err) {
> diff --git a/drivers/block/ploop/push_backup.c b/drivers/block/ploop/push_backup.c
> index 477caf7..488b8fb 100644
> --- a/drivers/block/ploop/push_backup.c
> +++ b/drivers/block/ploop/push_backup.c
> @@ -146,6 +146,32 @@ static void set_bit_in_map(struct page **map, u64 map_max, u64 blk)
> do_bit_in_map(map, map_max, blk, SET_BIT);
> }
>
> +static void clear_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> + do_bit_in_map(map, map_max, blk, CLEAR_BIT);
> +}
> +
> +static bool check_bit_in_map(struct page **map, u64 map_max, u64 blk)
> +{
> + return do_bit_in_map(map, map_max, blk, CHECK_BIT);
> +}
> +
> +/* intentionally lockless */
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> + BUG_ON(!pbd);
> + clear_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
> +/* intentionally lockless */
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu)
> +{
> + if (!pbd)
> + return false;
> +
> + return check_bit_in_map(pbd->ppb_map, pbd->ppb_block_max, clu);
> +}
> +
> static int convert_map_to_map(struct ploop_pushbackup_desc *pbd)
> {
> struct page **from_map = pbd->cbt_map;
> @@ -278,6 +304,12 @@ static void ploop_pb_add_req_to_tree(struct ploop_request *preq,
> rb_insert_color(&preq->reloc_link, tree);
> }
>
> +static void ploop_pb_add_req_to_pending(struct ploop_pushbackup_desc *pbd,
> + struct ploop_request *preq)
> +{
> + ploop_pb_add_req_to_tree(preq, &pbd->pending_tree);
> +}
> +
> static void ploop_pb_add_req_to_reported(struct ploop_pushbackup_desc *pbd,
> struct ploop_request *preq)
> {
> @@ -339,6 +371,33 @@ ploop_pb_get_req_from_reported(struct ploop_pushbackup_desc *pbd,
> return ploop_pb_get_req_from_tree(&pbd->reported_tree, clu);
> }
>
> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
> + struct ploop_request *preq)
> +{
> + BUG_ON(!pbd);
> +
> + spin_lock(&pbd->ppb_lock);
> +
> + if (!test_bit(PLOOP_S_PUSH_BACKUP, &pbd->plo->state)) {
> + spin_unlock(&pbd->ppb_lock);
> + return -EINTR;
> + }
> +
> + /* if (preq matches pbd->reported_map) return -EALREADY; */
> + if (preq->req_cluster < pbd->ppb_offset) {
> + spin_unlock(&pbd->ppb_lock);
> + return -EALREADY;
> + }
> +
> + ploop_pb_add_req_to_pending(pbd, preq);
> +
> + if (pbd->ppb_waiting)
> + complete(&pbd->ppb_comp);
> +
> + spin_unlock(&pbd->ppb_lock);
> + return 0;
> +}
> +
> unsigned long ploop_pb_stop(struct ploop_pushbackup_desc *pbd)
> {
> if (pbd == NULL)
> @@ -428,6 +487,9 @@ void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
> else
> n_found++;
>
> + if (preq)
> + __set_bit(PLOOP_REQ_PUSH_BACKUP, &preq->state);
> +
> /*
> * If preq not found above, it's unsolicited report. Then it's
> * enough to have corresponding bit set in reported_map because if
> diff --git a/drivers/block/ploop/push_backup.h b/drivers/block/ploop/push_backup.h
> index 482e070..476ac53 100644
> --- a/drivers/block/ploop/push_backup.h
> +++ b/drivers/block/ploop/push_backup.h
> @@ -11,3 +11,9 @@ int ploop_pb_get_pending(struct ploop_pushbackup_desc *pbd,
> cluster_t *clu_p, cluster_t *len_p, unsigned n_done);
> void ploop_pb_put_reported(struct ploop_pushbackup_desc *pbd,
> cluster_t clu, cluster_t len);
> +
> +void ploop_pb_clear_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
> +bool ploop_pb_check_bit(struct ploop_pushbackup_desc *pbd, cluster_t clu);
> +
> +int ploop_pb_preq_add_pending(struct ploop_pushbackup_desc *pbd,
> + struct ploop_request *preq);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 09f419d3..762d2fd 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -464,6 +464,7 @@ enum
> PLOOP_REQ_FORCE_FLUSH, /*force flush by engine */
> PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
> PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
> + PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
> };
>
> enum
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160430/af9ea076/attachment-0001.sig>
More information about the Devel
mailing list