[Devel] [PATCH rh7 9/9] ploop: fixup barrier handling during relocation
Dmitry Monakhov
dmonakhov at openvz.org
Fri Jun 24 07:48:09 PDT 2016
Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
> Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19,
> but without help of delayed_flush engine:
>
> To ensure consistency on crash/power outage/hard reboot
> events, ploop must implement the following barrier logic
> for RELOC_A|S requests:
>
> 1) After we store data to new place, but before updating
> BAT on disk, we have FLUSH everything (in fact, flushing
> those data would be enough, but it is simplier to flush
> everything).
>
> 2) We should not proceed handling RELOC_A|S until we
> 100% sure new BAT value went to disk platters. So far as
> new BAT is only one page, it's OK to mark corresponding
> bio with FUA flag for io_direct case. For io_kaio, not
> having FUA api, we have to post_fsync BAT update.
>
> PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced
> long time ago probably were intended to ensure the
> logic above, but they actually didn't.
>
> The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA,
> and implements barriers in a straightforward and simple way:
> check for RELOC_A|S explicitly and make FLUSH/FUA where
> needed.
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
> drivers/block/ploop/dev.c | 4 ++--
> drivers/block/ploop/io_direct.c | 7 -------
> drivers/block/ploop/io_kaio.c | 8 +++++---
> drivers/block/ploop/map.c | 22 ++++++++++------------
> include/linux/ploop/ploop.h | 1 -
> 5 files changed, 17 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2b60dfa..40768b6 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2610,8 +2610,8 @@ restart:
> top_delta = ploop_top_delta(plo);
> sbl.head = sbl.tail = preq->aux_bio;
>
> - /* Relocated data write required sync before BAT updatee */
> - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> + /* Relocated data write required sync before BAT update
> + * this will happen inside index_update */
>
> if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
> preq->eng_state = PLOOP_E_DATA_WBI;
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index c4d0f63..266f041 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
> sector_t sec, nsec;
> int err;
> struct bio_list_walk bw;
> - int postfua = 0;
> int write = !!(rw & REQ_WRITE);
> int delayed_fua = 0;
>
> trace_submit(preq);
>
> - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
> - postfua = 1;
> -
> if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) {
> /* Mark req that delayed flush required */
> preq->req_rw |= (REQ_FLUSH | REQ_FUA);
> @@ -233,9 +229,6 @@ flush_bio:
> b->bi_private = preq;
> b->bi_end_io = dio_endio_async;
>
> - if (unlikely(postfua && !bl.head))
> - rw2 |= REQ_FUA;
> -
> ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
> submit_bio(rw2, b);
> }
> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> index ed550f4..85863df 100644
> --- a/drivers/block/ploop/io_kaio.c
> +++ b/drivers/block/ploop/io_kaio.c
> @@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * preq)
> unsigned long flags;
> int post_fsync = 0;
> int need_fua = !!(preq->req_rw & REQ_FUA);
> + unsigned long state = READ_ONCE(preq->state);
> + int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
>
> if (preq->error || !(preq->req_rw & REQ_FUA) ||
> preq->eng_state == PLOOP_E_INDEX_READ ||
> @@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * preq)
> }
>
> /* Convert requested fua to fsync */
> - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
> - test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> - (need_fua && !ploop_req_delay_fua_possible(preq))) {
This is the change I dislike the most. io_XXX should not care it is
reloc or not. Caller should rule whenether PREFLUSH/POSTFLUSH should
happen before preq completes. So IMHO this is a crunch, but correct one.
> + if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> + (need_fua && !ploop_req_delay_fua_possible(preq)) ||
> + (reloc && ploop_req_delay_fua_possible(preq))) {
> post_fsync = 1;
> preq->req_rw &= ~REQ_FUA;
> }
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 915a216..1883674 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq)
> struct page * page;
> sector_t sec;
> unsigned long rw;
> + unsigned long state = READ_ONCE(preq->state);
>
> /* No way back, we are going to initiate index write. */
>
> @@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq)
> __TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
> plo->st.map_single_writes++;
> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> - /* Relocate requires consistent writes, mark such reqs appropriately */
> - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> - test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>
> rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH));
>
> @@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq)
> will do the FLUSH */
> preq->req_rw &= ~REQ_FLUSH;
>
> + /* Relocate requires consistent index update */
> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> + rw |= (REQ_FLUSH | REQ_FUA);
> +
> top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
>
> put_page(page);
> @@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int err)
> int delayed = 0;
> unsigned int idx;
> sector_t sec;
> - int force_fua;
> unsigned long rw;
>
> /* First, complete processing of written back indices,
> @@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int err)
>
> main_preq = NULL;
> rw = 0;
> - force_fua = 0;
>
> list_for_each_safe(cursor, tmp, &m->io_queue) {
> struct ploop_request * preq;
> + unsigned long state;
>
> preq = list_entry(cursor, struct ploop_request, list);
>
> @@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int err)
> will do the FLUSH */
> preq->req_rw &= ~REQ_FLUSH;
>
> - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> - test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> - force_fua = 1;
> + state = READ_ONCE(preq->state);
> + /* Relocate requires consistent index update */
> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> + rw |= (REQ_FLUSH | REQ_FUA);
>
> preq->eng_state = PLOOP_E_INDEX_WB;
> get_page(page);
> @@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int err)
> plo->st.map_multi_writes++;
> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>
> - if (force_fua)
> - set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
> -
> top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
> rw);
> put_page(page);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 920daf7..6f41570 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -474,7 +474,6 @@ enum
> PLOOP_REQ_ZERO,
> PLOOP_REQ_DISCARD,
> PLOOP_REQ_RSYNC,
> - PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */
> PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
> PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
> PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160624/57f2c9ec/attachment.sig>
More information about the Devel
mailing list