[Devel] [PATCH RHEL7 COMMIT] ploop: fixup barrier handling during relocation

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jun 27 00:10:26 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.20
------>
commit 7fba82f610d91ae75ff26ad70dc93f38d5b3eb11
Author: Maxim Patlasov <mpatlasov at virtuozzo.com>
Date:   Mon Jun 27 11:10:26 2016 +0400

    ploop: fixup barrier handling during relocation
    
    Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19,
    but without help of delayed_flush engine:
    
    To ensure consistency on crash/power outage/hard reboot
    events, ploop must implement the following barrier logic
    for RELOC_A|S requests:
    
    1) After we store data to new place, but before updating
    BAT on disk, we have FLUSH everything (in fact, flushing
    those data would be enough, but it is simplier to flush
    everything).
    
    2) We should not proceed handling RELOC_A|S until we
    100% sure new BAT value went to disk platters. So far as
    new BAT is only one page, it's OK to mark corresponding
    bio with FUA flag for io_direct case. For io_kaio, not
    having FUA api, we have to post_fsync BAT update.
    
    PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced
    long time ago probably were intended to ensure the
    logic above, but they actually didn't.
    
    The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA,
    and implements barriers in a straightforward and simple way:
    check for RELOC_A|S explicitly and make FLUSH/FUA where
    needed.
    
    Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
    Acked-by: Dmitry Monakhov <dmonakhov at virtuozzo.com>
---
 drivers/block/ploop/dev.c       |  4 ++--
 drivers/block/ploop/io_direct.c |  7 -------
 drivers/block/ploop/io_kaio.c   |  8 +++++---
 drivers/block/ploop/map.c       | 22 ++++++++++------------
 include/linux/ploop/ploop.h     |  1 -
 5 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 2b60dfa..40768b6 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2610,8 +2610,8 @@ restart:
 		top_delta = ploop_top_delta(plo);
 		sbl.head = sbl.tail = preq->aux_bio;
 
-		/* Relocated data write required sync before BAT updatee */
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
+		/* Relocated data write required sync before BAT update
+		 * this will happen inside index_update */
 
 		if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
 			preq->eng_state = PLOOP_E_DATA_WBI;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 189aa1a..1086850 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
 	sector_t sec, nsec;
 	int err;
 	struct bio_list_walk bw;
-	int postfua = 0;
 	int write = !!(rw & REQ_WRITE);
 	int delayed_fua = 0;
 
 	trace_submit(preq);
 
-	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-		postfua = 1;
-
 	if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) {
 		/* Mark req that delayed flush required */
 		preq->req_rw |= (REQ_FLUSH | REQ_FUA);
@@ -234,9 +230,6 @@ flush_bio:
 		b->bi_private = preq;
 		b->bi_end_io = dio_endio_async;
 
-		if (unlikely(postfua && !bl.head))
-			rw2 |= REQ_FUA;
-
 		ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
 		submit_bio(rw2, b);
 	}
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index ed550f4..85863df 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * preq)
 	unsigned long flags;
 	int post_fsync = 0;
 	int need_fua = !!(preq->req_rw & REQ_FUA);
+	unsigned long state = READ_ONCE(preq->state);
+	int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL));
 
 	if (preq->error || !(preq->req_rw & REQ_FUA) ||
 	    preq->eng_state == PLOOP_E_INDEX_READ ||
@@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * preq)
 	}
 
 	/* Convert requested fua to fsync */
-	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
-	    test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
-	    (need_fua && !ploop_req_delay_fua_possible(preq))) {
+	if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
+	    (need_fua && !ploop_req_delay_fua_possible(preq)) ||
+	    (reloc && ploop_req_delay_fua_possible(preq))) {
 		post_fsync = 1;
 		preq->req_rw &= ~REQ_FUA;
 	}
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 915a216..1883674 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq)
 	struct page * page;
 	sector_t sec;
 	unsigned long rw;
+	unsigned long state = READ_ONCE(preq->state);
 
 	/* No way back, we are going to initiate index write. */
 
@@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq)
 	__TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
 	plo->st.map_single_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
-	/* Relocate requires consistent writes, mark such reqs appropriately */
-	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-	    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
 
 	rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH));
 
@@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq)
 	   will do the FLUSH */
 	preq->req_rw &= ~REQ_FLUSH;
 
+	/* Relocate requires consistent index update */
+	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+		rw |= (REQ_FLUSH | REQ_FUA);
+
 	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
 
 	put_page(page);
@@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int err)
 	int delayed = 0;
 	unsigned int idx;
 	sector_t sec;
-	int force_fua;
 	unsigned long rw;
 
 	/* First, complete processing of written back indices,
@@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int err)
 
 	main_preq = NULL;
 	rw = 0;
-	force_fua = 0;
 
 	list_for_each_safe(cursor, tmp, &m->io_queue) {
 		struct ploop_request * preq;
+		unsigned long state;
 
 		preq = list_entry(cursor, struct ploop_request, list);
 
@@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int err)
 			   will do the FLUSH */
 			preq->req_rw &= ~REQ_FLUSH;
 
-			if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-			    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-				force_fua = 1;
+			state = READ_ONCE(preq->state);
+			/* Relocate requires consistent index update */
+			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+				rw |= (REQ_FLUSH | REQ_FUA);
 
 			preq->eng_state = PLOOP_E_INDEX_WB;
 			get_page(page);
@@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int err)
 	plo->st.map_multi_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
 
-	if (force_fua)
-		set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
-
 	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
 				      rw);
 	put_page(page);
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 920daf7..6f41570 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -474,7 +474,6 @@ enum
 	PLOOP_REQ_ZERO,
 	PLOOP_REQ_DISCARD,
 	PLOOP_REQ_RSYNC,
-	PLOOP_REQ_FORCE_FUA,	/*force fua of req write I/O by engine */
 	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
 	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
 	PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */


More information about the Devel mailing list