[Devel] [RH7 PATCH 5/6] ploop: fixup barrier handling during relocation
Maxim Patlasov
mpatlasov at virtuozzo.com
Thu Jun 23 16:06:16 PDT 2016
No reasons to keep it along with optimization patches. See please the
port I'll send soon today.
On 06/23/2016 10:25 AM, Dmitry Monakhov wrote:
> barrier code is broken in many ways:
> Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
> But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes)
> So in case of grow_dev we have following sequance:
>
> E_RELOC_DATA_READ:
> ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> ->delta->allocate
> ->io->submit_allloc: dio_submit_alloc
> ->dio_submit_pad
> E_DATA_WBI : data written, time to update index
> ->delta->allocate_complete:ploop_index_update
> ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> ->write_page
> ->ploop_map_wb_complete
> ->ploop_wb_complete_post_process
> ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> E_RELOC_NULLIFY:
>
> ->submit()
>
> Once we have delayed_flush engine it is easy to implement correct scheme for
> both engines.
>
> E_RELOC_DATA_READ ->submit_allloc => wait->post_submit->issue_flush
> E_DATA_WBI ->ploop_index_update with FUA
> E_RELOC_NULLIFY ->submit: => wait->post_submit->issue_flush
>
> This makes reloc sequence optimal:
> RELOC_S: R1, W2,WAIT,FLUSH, WBI:FUA
> RELOC_A: R1, W2,WAIT,FLUSH, WBI:FUA, W1:NULLIFY,WAIT, FLUSH
>
> https://jira.sw.ru/browse/PSBM-47107
> Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
> ---
> drivers/block/ploop/dev.c | 2 +-
> drivers/block/ploop/io_kaio.c | 3 +--
> drivers/block/ploop/map.c | 28 ++++++++++++++--------------
> 3 files changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 95e3067..090cd2d 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2533,7 +2533,7 @@ restart:
> sbl.head = sbl.tail = preq->aux_bio;
>
> /* Relocated data write required sync before BAT updatee */
> - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> + preq->req_rw |= REQ_FUA;
>
> if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
> preq->eng_state = PLOOP_E_DATA_WBI;
> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> index 5341fd5..5217ab4 100644
> --- a/drivers/block/ploop/io_kaio.c
> +++ b/drivers/block/ploop/io_kaio.c
> @@ -72,8 +72,7 @@ static void kaio_complete_io_state(struct ploop_request * preq)
> }
>
> /* Convert requested fua to fsync */
> - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
> - test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) ||
> + if (test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) ||
> test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state))
> post_fsync = 1;
>
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 3a6365d..ef351fb 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -901,6 +901,8 @@ void ploop_index_update(struct ploop_request * preq)
> int old_level;
> struct page * page;
> sector_t sec;
> + int fua = !!(preq->req_rw & REQ_FUA);
> + unsigned long state = READ_ONCE(preq->state);
>
> /* No way back, we are going to initiate index write. */
>
> @@ -954,12 +956,11 @@ void ploop_index_update(struct ploop_request * preq)
> plo->st.map_single_writes++;
> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> /* Relocate requires consistent writes, mark such reqs appropriately */
> - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> - test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> -
> - top_delta->io.ops->write_page(&top_delta->io, preq, page, sec,
> - !!(preq->req_rw & REQ_FUA));
> + if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) {
> + WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL);
> + fua = 1;
> + }
> + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua);
> put_page(page);
> return;
>
> @@ -1063,7 +1064,7 @@ static void map_wb_complete_post_process(struct ploop_map *map,
> * (see dio_submit()). So fsync of EXT4 image doesnt help us.
> * We need to force sync of nullified blocks.
> */
> - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> + preq->req_rw |= REQ_FUA;
> top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
> &sbl, preq->iblock, 1<<plo->cluster_log);
> }
> @@ -1153,8 +1154,10 @@ static void map_wb_complete(struct map_node * m, int err)
>
> list_for_each_safe(cursor, tmp, &m->io_queue) {
> struct ploop_request * preq;
> + unsigned long state;
>
> preq = list_entry(cursor, struct ploop_request, list);
> + state = READ_ONCE(preq->state);
>
> switch (preq->eng_state) {
> case PLOOP_E_INDEX_DELAY:
> @@ -1171,9 +1174,10 @@ static void map_wb_complete(struct map_node * m, int err)
> if (preq->req_rw & REQ_FUA)
> fua = 1;
>
> - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> - test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> - force_fua = 1;
> + if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) {
> + WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL);
> + fua = 1;
> + }
>
> preq->eng_state = PLOOP_E_INDEX_WB;
> get_page(page);
> @@ -1199,10 +1203,6 @@ static void map_wb_complete(struct map_node * m, int err)
> __TRACE("wbi2 %p %u %p\n", main_preq, main_preq->req_cluster, m);
> plo->st.map_multi_writes++;
> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> -
> - if (force_fua)
> - set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
> -
> top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, fua);
> put_page(page);
> }
More information about the Devel
mailing list