[Devel] [PATCH rh7 9/9] ploop: fixup barrier handling during relocation

Maxim Patlasov mpatlasov at virtuozzo.com
Thu Jun 23 22:42:00 PDT 2016


Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19,
but without help of delayed_flush engine:

To ensure consistency on crash/power outage/hard reboot
events, ploop must implement the following barrier logic
for RELOC_A|S requests:

1) After we store data to new place, but before updating
BAT on disk, we have FLUSH everything (in fact, flushing
those data would be enough, but it is simplier to flush
everything).

2) We should not proceed handling RELOC_A|S until we
100% sure new BAT value went to disk platters. So far as
new BAT is only one page, it's OK to mark corresponding
bio with FUA flag for io_direct case. For io_kaio, not
having FUA api, we have to post_fsync BAT update.

PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced
long time ago probably were intended to ensure the
logic above, but they actually didn't.

The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA,
and implements barriers in a straightforward and simple way:
check for RELOC_A|S explicitly and make FLUSH/FUA where
needed.

Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
 drivers/block/ploop/dev.c       |    4 ++--
 drivers/block/ploop/io_direct.c |    7 -------
 drivers/block/ploop/io_kaio.c   |    8 +++++---
 drivers/block/ploop/map.c       |   22 ++++++++++------------
 include/linux/ploop/ploop.h     |    1 -
 5 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 2b60dfa..40768b6 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2610,8 +2610,8 @@ restart:
 		top_delta = ploop_top_delta(plo);
 		sbl.head = sbl.tail = preq->aux_bio;
 
-		/* Relocated data write required sync before BAT updatee */
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
+		/* Relocated data write required sync before BAT update
+		 * this will happen inside index_update */
 
 		if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
 			preq->eng_state = PLOOP_E_DATA_WBI;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index c4d0f63..266f041 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
 	sector_t sec, nsec;
 	int err;
 	struct bio_list_walk bw;
-	int postfua = 0;
 	int write = !!(rw & REQ_WRITE);
 	int delayed_fua = 0;
 
 	trace_submit(preq);
 
-	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-		postfua = 1;
-
 	if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) {
 		/* Mark req that delayed flush required */
 		preq->req_rw |= (REQ_FLUSH | REQ_FUA);
@@ -233,9 +229,6 @@ flush_bio:
 		b->bi_private = preq;
 		b->bi_end_io = dio_endio_async;
 
-		if (unlikely(postfua && !bl.head))
-			rw2 |= REQ_FUA;
-
 		ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
 		submit_bio(rw2, b);
 	}
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index ed550f4..85863df 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * preq)
 	unsigned long flags;
 	int post_fsync = 0;
 	int need_fua = !!(preq->req_rw & REQ_FUA);
+	unsigned long state = READ_ONCE(preq->state);
+	int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
 
 	if (preq->error || !(preq->req_rw & REQ_FUA) ||
 	    preq->eng_state == PLOOP_E_INDEX_READ ||
@@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * preq)
 	}
 
 	/* Convert requested fua to fsync */
-	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
-	    test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
-	    (need_fua && !ploop_req_delay_fua_possible(preq))) {
+	if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
+	    (need_fua && !ploop_req_delay_fua_possible(preq)) ||
+	    (reloc && ploop_req_delay_fua_possible(preq))) {
 		post_fsync = 1;
 		preq->req_rw &= ~REQ_FUA;
 	}
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 915a216..1883674 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq)
 	struct page * page;
 	sector_t sec;
 	unsigned long rw;
+	unsigned long state = READ_ONCE(preq->state);
 
 	/* No way back, we are going to initiate index write. */
 
@@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq)
 	__TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
 	plo->st.map_single_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
-	/* Relocate requires consistent writes, mark such reqs appropriately */
-	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-	    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
 
 	rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH));
 
@@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq)
 	   will do the FLUSH */
 	preq->req_rw &= ~REQ_FLUSH;
 
+	/* Relocate requires consistent index update */
+	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+		rw |= (REQ_FLUSH | REQ_FUA);
+
 	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
 
 	put_page(page);
@@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int err)
 	int delayed = 0;
 	unsigned int idx;
 	sector_t sec;
-	int force_fua;
 	unsigned long rw;
 
 	/* First, complete processing of written back indices,
@@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int err)
 
 	main_preq = NULL;
 	rw = 0;
-	force_fua = 0;
 
 	list_for_each_safe(cursor, tmp, &m->io_queue) {
 		struct ploop_request * preq;
+		unsigned long state;
 
 		preq = list_entry(cursor, struct ploop_request, list);
 
@@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int err)
 			   will do the FLUSH */
 			preq->req_rw &= ~REQ_FLUSH;
 
-			if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-			    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-				force_fua = 1;
+			state = READ_ONCE(preq->state);
+			/* Relocate requires consistent index update */
+			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+				rw |= (REQ_FLUSH | REQ_FUA);
 
 			preq->eng_state = PLOOP_E_INDEX_WB;
 			get_page(page);
@@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int err)
 	plo->st.map_multi_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
 
-	if (force_fua)
-		set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
-
 	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
 				      rw);
 	put_page(page);
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 920daf7..6f41570 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -474,7 +474,6 @@ enum
 	PLOOP_REQ_ZERO,
 	PLOOP_REQ_DISCARD,
 	PLOOP_REQ_RSYNC,
-	PLOOP_REQ_FORCE_FUA,	/*force fua of req write I/O by engine */
 	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
 	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
 	PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */



More information about the Devel mailing list