[Devel] [RH7 PATCH 5/6] ploop: fixup barrier handling during relocation

Maxim Patlasov mpatlasov at virtuozzo.com
Thu Jun 23 16:06:16 PDT 2016


No reasons to keep it along with optimization patches. See please the 
port I'll send soon today.

On 06/23/2016 10:25 AM, Dmitry Monakhov wrote:
> barrier code is broken in many ways:
> Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
> But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes)
> So in case of grow_dev we have following sequance:
>
> E_RELOC_DATA_READ:
>               ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>                ->delta->allocate
>                   ->io->submit_allloc: dio_submit_alloc
>                     ->dio_submit_pad
> E_DATA_WBI : data written, time to update index
>                ->delta->allocate_complete:ploop_index_update
>                  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>                  ->write_page
>                  ->ploop_map_wb_complete
>                    ->ploop_wb_complete_post_process
>                      ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> E_RELOC_NULLIFY:
>
>                 ->submit()
>
> Once we have delayed_flush engine it is easy to implement correct scheme for
> both engines.
>
> E_RELOC_DATA_READ ->submit_allloc => wait->post_submit->issue_flush
> E_DATA_WBI ->ploop_index_update with FUA
> E_RELOC_NULLIFY ->submit: => wait->post_submit->issue_flush
>
> This makes reloc sequence optimal:
> RELOC_S: R1, W2,WAIT,FLUSH, WBI:FUA
> RELOC_A: R1, W2,WAIT,FLUSH, WBI:FUA, W1:NULLIFY,WAIT, FLUSH
>
> https://jira.sw.ru/browse/PSBM-47107
> Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
> ---
>   drivers/block/ploop/dev.c     |  2 +-
>   drivers/block/ploop/io_kaio.c |  3 +--
>   drivers/block/ploop/map.c     | 28 ++++++++++++++--------------
>   3 files changed, 16 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 95e3067..090cd2d 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2533,7 +2533,7 @@ restart:
>   		sbl.head = sbl.tail = preq->aux_bio;
>   
>   		/* Relocated data write required sync before BAT updatee */
> -		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> +		preq->req_rw |= REQ_FUA;
>   
>   		if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
>   			preq->eng_state = PLOOP_E_DATA_WBI;
> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> index 5341fd5..5217ab4 100644
> --- a/drivers/block/ploop/io_kaio.c
> +++ b/drivers/block/ploop/io_kaio.c
> @@ -72,8 +72,7 @@ static void kaio_complete_io_state(struct ploop_request * preq)
>   	}
>   
>   	/* Convert requested fua to fsync */
> -	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
> -	    test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) ||
> +	if (test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) ||
>   	    test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state))
>   		post_fsync = 1;
>   
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 3a6365d..ef351fb 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -901,6 +901,8 @@ void ploop_index_update(struct ploop_request * preq)
>   	int old_level;
>   	struct page * page;
>   	sector_t sec;
> +	int fua = !!(preq->req_rw & REQ_FUA);
> +	unsigned long state = READ_ONCE(preq->state);
>   
>   	/* No way back, we are going to initiate index write. */
>   
> @@ -954,12 +956,11 @@ void ploop_index_update(struct ploop_request * preq)
>   	plo->st.map_single_writes++;
>   	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>   	/* Relocate requires consistent writes, mark such reqs appropriately */
> -	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> -	    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> -		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> -
> -	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec,
> -				      !!(preq->req_rw & REQ_FUA));
> +	if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) {
> +		WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL);
> +		fua = 1;
> +	}
> +	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua);
>   	put_page(page);
>   	return;
>   
> @@ -1063,7 +1064,7 @@ static void map_wb_complete_post_process(struct ploop_map *map,
>   	 * (see dio_submit()). So fsync of EXT4 image doesnt help us.
>   	 * We need to force sync of nullified blocks.
>   	 */
> -	set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> +	preq->req_rw |= REQ_FUA;
>   	top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
>   				  &sbl, preq->iblock, 1<<plo->cluster_log);
>   }
> @@ -1153,8 +1154,10 @@ static void map_wb_complete(struct map_node * m, int err)
>   
>   	list_for_each_safe(cursor, tmp, &m->io_queue) {
>   		struct ploop_request * preq;
> +		unsigned long state;
>   
>   		preq = list_entry(cursor, struct ploop_request, list);
> +		state = READ_ONCE(preq->state);
>   
>   		switch (preq->eng_state) {
>   		case PLOOP_E_INDEX_DELAY:
> @@ -1171,9 +1174,10 @@ static void map_wb_complete(struct map_node * m, int err)
>   			if (preq->req_rw & REQ_FUA)
>   				fua = 1;
>   
> -			if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> -			    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> -				force_fua = 1;
> +			if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) {
> +				WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL);
> +				fua = 1;
> +			}
>   
>   			preq->eng_state = PLOOP_E_INDEX_WB;
>   			get_page(page);
> @@ -1199,10 +1203,6 @@ static void map_wb_complete(struct map_node * m, int err)
>   	__TRACE("wbi2 %p %u %p\n", main_preq, main_preq->req_cluster, m);
>   	plo->st.map_multi_writes++;
>   	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> -
> -	if (force_fua)
> -		set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
> -
>   	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, fua);
>   	put_page(page);
>   }



More information about the Devel mailing list