[Devel] [PATCH rh7 9/9] ploop: fixup barrier handling during relocation

Dmitry Monakhov dmonakhov at openvz.org
Fri Jun 24 07:48:09 PDT 2016


Maxim Patlasov <mpatlasov at virtuozzo.com> writes:

> Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19,
> but without help of delayed_flush engine:
>
> To ensure consistency on crash/power outage/hard reboot
> events, ploop must implement the following barrier logic
> for RELOC_A|S requests:
>
> 1) After we store data to new place, but before updating
> BAT on disk, we have FLUSH everything (in fact, flushing
> those data would be enough, but it is simplier to flush
> everything).
>
> 2) We should not proceed handling RELOC_A|S until we
> 100% sure new BAT value went to disk platters. So far as
> new BAT is only one page, it's OK to mark corresponding
> bio with FUA flag for io_direct case. For io_kaio, not
> having FUA api, we have to post_fsync BAT update.
>
> PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced
> long time ago probably were intended to ensure the
> logic above, but they actually didn't.
>
> The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA,
> and implements barriers in a straightforward and simple way:
> check for RELOC_A|S explicitly and make FLUSH/FUA where
> needed.
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c       |    4 ++--
>  drivers/block/ploop/io_direct.c |    7 -------
>  drivers/block/ploop/io_kaio.c   |    8 +++++---
>  drivers/block/ploop/map.c       |   22 ++++++++++------------
>  include/linux/ploop/ploop.h     |    1 -
>  5 files changed, 17 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 2b60dfa..40768b6 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2610,8 +2610,8 @@ restart:
>  		top_delta = ploop_top_delta(plo);
>  		sbl.head = sbl.tail = preq->aux_bio;
>  
> -		/* Relocated data write required sync before BAT updatee */
> -		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
> +		/* Relocated data write required sync before BAT update
> +		 * this will happen inside index_update */
>  
>  		if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
>  			preq->eng_state = PLOOP_E_DATA_WBI;
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index c4d0f63..266f041 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
>  	sector_t sec, nsec;
>  	int err;
>  	struct bio_list_walk bw;
> -	int postfua = 0;
>  	int write = !!(rw & REQ_WRITE);
>  	int delayed_fua = 0;
>  
>  	trace_submit(preq);
>  
> -	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
> -		postfua = 1;
> -
>  	if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) {
>  		/* Mark req that delayed flush required */
>  		preq->req_rw |= (REQ_FLUSH | REQ_FUA);
> @@ -233,9 +229,6 @@ flush_bio:
>  		b->bi_private = preq;
>  		b->bi_end_io = dio_endio_async;
>  
> -		if (unlikely(postfua && !bl.head))
> -			rw2 |= REQ_FUA;
> -
>  		ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
>  		submit_bio(rw2, b);
>  	}
> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> index ed550f4..85863df 100644
> --- a/drivers/block/ploop/io_kaio.c
> +++ b/drivers/block/ploop/io_kaio.c
> @@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * preq)
>  	unsigned long flags;
>  	int post_fsync = 0;
>  	int need_fua = !!(preq->req_rw & REQ_FUA);
> +	unsigned long state = READ_ONCE(preq->state);
> +	int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
>  
>  	if (preq->error || !(preq->req_rw & REQ_FUA) ||
>  	    preq->eng_state == PLOOP_E_INDEX_READ ||
> @@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * preq)
>  	}
>  
>  	/* Convert requested fua to fsync */
> -	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
> -	    test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> -	    (need_fua && !ploop_req_delay_fua_possible(preq))) {
This is the change I dislike the most. io_XXX should not care it is
reloc or not. Caller should rule whenether PREFLUSH/POSTFLUSH should
happen before preq completes. So IMHO this is a crunch, but correct one.

> +	if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> +	    (need_fua && !ploop_req_delay_fua_possible(preq)) ||
> +	    (reloc && ploop_req_delay_fua_possible(preq))) {
>  		post_fsync = 1;
>  		preq->req_rw &= ~REQ_FUA;
>  	}
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 915a216..1883674 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq)
>  	struct page * page;
>  	sector_t sec;
>  	unsigned long rw;
> +	unsigned long state = READ_ONCE(preq->state);
>  
>  	/* No way back, we are going to initiate index write. */
>  
> @@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq)
>  	__TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
>  	plo->st.map_single_writes++;
>  	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> -	/* Relocate requires consistent writes, mark such reqs appropriately */
> -	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> -	    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> -		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>  
>  	rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH));
>  
> @@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq)
>  	   will do the FLUSH */
>  	preq->req_rw &= ~REQ_FLUSH;
>  
> +	/* Relocate requires consistent index update */
> +	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +		rw |= (REQ_FLUSH | REQ_FUA);
> +
>  	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
>  
>  	put_page(page);
> @@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int err)
>  	int delayed = 0;
>  	unsigned int idx;
>  	sector_t sec;
> -	int force_fua;
>  	unsigned long rw;
>  
>  	/* First, complete processing of written back indices,
> @@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int err)
>  
>  	main_preq = NULL;
>  	rw = 0;
> -	force_fua = 0;
>  
>  	list_for_each_safe(cursor, tmp, &m->io_queue) {
>  		struct ploop_request * preq;
> +		unsigned long state;
>  
>  		preq = list_entry(cursor, struct ploop_request, list);
>  
> @@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int err)
>  			   will do the FLUSH */
>  			preq->req_rw &= ~REQ_FLUSH;
>  
> -			if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> -			    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> -				force_fua = 1;
> +			state = READ_ONCE(preq->state);
> +			/* Relocate requires consistent index update */
> +			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +				rw |= (REQ_FLUSH | REQ_FUA);
>  
>  			preq->eng_state = PLOOP_E_INDEX_WB;
>  			get_page(page);
> @@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int err)
>  	plo->st.map_multi_writes++;
>  	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>  
> -	if (force_fua)
> -		set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
> -
>  	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
>  				      rw);
>  	put_page(page);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 920daf7..6f41570 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -474,7 +474,6 @@ enum
>  	PLOOP_REQ_ZERO,
>  	PLOOP_REQ_DISCARD,
>  	PLOOP_REQ_RSYNC,
> -	PLOOP_REQ_FORCE_FUA,	/*force fua of req write I/O by engine */
>  	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
>  	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>  	PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160624/57f2c9ec/attachment.sig>


More information about the Devel mailing list