[Devel] [RH7 PATCH 5/6] ploop: fixup barrier handling during relocation

Dmitry Monakhov dmonakhov at openvz.org
Thu Jun 23 10:25:57 PDT 2016


barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
             ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
              ->delta->allocate
                 ->io->submit_allloc: dio_submit_alloc
                   ->dio_submit_pad
E_DATA_WBI : data written, time to update index
              ->delta->allocate_complete:ploop_index_update
                ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
                ->write_page
                ->ploop_map_wb_complete
                  ->ploop_wb_complete_post_process
                    ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

               ->submit()

Once we have delayed_flush engine it is easy to implement correct scheme for
both engines.

E_RELOC_DATA_READ ->submit_allloc => wait->post_submit->issue_flush
E_DATA_WBI ->ploop_index_update with FUA
E_RELOC_NULLIFY ->submit: => wait->post_submit->issue_flush

This makes reloc sequence optimal:
RELOC_S: R1, W2,WAIT,FLUSH, WBI:FUA
RELOC_A: R1, W2,WAIT,FLUSH, WBI:FUA, W1:NULLIFY,WAIT, FLUSH

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
 drivers/block/ploop/dev.c     |  2 +-
 drivers/block/ploop/io_kaio.c |  3 +--
 drivers/block/ploop/map.c     | 28 ++++++++++++++--------------
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 95e3067..090cd2d 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2533,7 +2533,7 @@ restart:
 		sbl.head = sbl.tail = preq->aux_bio;
 
 		/* Relocated data write required sync before BAT updatee */
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
+		preq->req_rw |= REQ_FUA;
 
 		if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
 			preq->eng_state = PLOOP_E_DATA_WBI;
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 5341fd5..5217ab4 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -72,8 +72,7 @@ static void kaio_complete_io_state(struct ploop_request * preq)
 	}
 
 	/* Convert requested fua to fsync */
-	if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) ||
-	    test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) ||
+	if (test_and_clear_bit(PLOOP_REQ_DEL_FLUSH, &preq->state) ||
 	    test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state))
 		post_fsync = 1;
 
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 3a6365d..ef351fb 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -901,6 +901,8 @@ void ploop_index_update(struct ploop_request * preq)
 	int old_level;
 	struct page * page;
 	sector_t sec;
+	int fua = !!(preq->req_rw & REQ_FUA);
+	unsigned long state = READ_ONCE(preq->state);
 
 	/* No way back, we are going to initiate index write. */
 
@@ -954,12 +956,11 @@ void ploop_index_update(struct ploop_request * preq)
 	plo->st.map_single_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
 	/* Relocate requires consistent writes, mark such reqs appropriately */
-	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-	    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
-	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec,
-				      !!(preq->req_rw & REQ_FUA));
+	if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) {
+		WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL);
+		fua = 1;
+	}
+	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua);
 	put_page(page);
 	return;
 
@@ -1063,7 +1064,7 @@ static void map_wb_complete_post_process(struct ploop_map *map,
 	 * (see dio_submit()). So fsync of EXT4 image doesnt help us.
 	 * We need to force sync of nullified blocks.
 	 */
-	set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
+	preq->req_rw |= REQ_FUA;
 	top_delta->io.ops->submit(&top_delta->io, preq, preq->req_rw,
 				  &sbl, preq->iblock, 1<<plo->cluster_log);
 }
@@ -1153,8 +1154,10 @@ static void map_wb_complete(struct map_node * m, int err)
 
 	list_for_each_safe(cursor, tmp, &m->io_queue) {
 		struct ploop_request * preq;
+		unsigned long state;
 
 		preq = list_entry(cursor, struct ploop_request, list);
+		state = READ_ONCE(preq->state);
 
 		switch (preq->eng_state) {
 		case PLOOP_E_INDEX_DELAY:
@@ -1171,9 +1174,10 @@ static void map_wb_complete(struct map_node * m, int err)
 			if (preq->req_rw & REQ_FUA)
 				fua = 1;
 
-			if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-			    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-				force_fua = 1;
+			if (state & (PLOOP_REQ_RELOC_A_FL | PLOOP_REQ_RELOC_S_FL)) {
+				WARN_ON(state & PLOOP_REQ_DEL_FLUSH_FL);
+				fua = 1;
+			}
 
 			preq->eng_state = PLOOP_E_INDEX_WB;
 			get_page(page);
@@ -1199,10 +1203,6 @@ static void map_wb_complete(struct map_node * m, int err)
 	__TRACE("wbi2 %p %u %p\n", main_preq, main_preq->req_cluster, m);
 	plo->st.map_multi_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
-
-	if (force_fua)
-		set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
-
 	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, fua);
 	put_page(page);
 }
-- 
1.8.3.1



More information about the Devel mailing list