[Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling

Dmitry Monakhov dmonakhov at openvz.org
Wed Jun 15 07:49:44 PDT 2016


barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
             ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
              ->delta->allocate
                 ->io->submit_allloc: dio_submit_alloc
                   ->dio_submit_pad
E_DATA_WBI : data written, time to update index
              ->delta->allocate_complete:ploop_index_update
                ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
                ->write_page
                ->ploop_map_wb_complete
                  ->ploop_wb_complete_post_process
                    ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

               ->submit()

This patch unify barrier handling like follows:
- Add assertation to ploop_complete_request for FORCE_{FLUSH,FUA} state
- Perform explicit FUA inside index_update for RELOC requests.

This makes reloc sequence optimal:
RELOC_S: R1, W2, WBI:FUA
RELOC_A: R1, W2, WBI:FUA, W1:NULLIFY:FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
 drivers/block/ploop/dev.c | 10 +++++++---
 drivers/block/ploop/map.c | 29 ++++++++++++-----------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..998fe71 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,11 @@ static void ploop_complete_request(struct ploop_request * preq)
 
 	__TRACE("Z %p %u\n", preq, preq->req_cluster);
 
+	if (!preq->error) {
+		unsigned long state =  READ_ONCE(preq->state);
+		WARN_ON(state & (1 << PLOOP_REQ_FORCE_FUA));
+		WARN_ON(state & (1 <<PLOOP_REQ_FORCE_FLUSH));
+	}
 	while (preq->bl.head) {
 		struct bio * bio = preq->bl.head;
 		preq->bl.head = bio->bi_next;
@@ -2530,9 +2535,8 @@ restart:
 		top_delta = ploop_top_delta(plo);
 		sbl.head = sbl.tail = preq->aux_bio;
 
-		/* Relocated data write required sync before BAT updatee */
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+		/* Relocated data write required sync before BAT updatee
+		 * this will happen inside index_update */
 		if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
 			preq->eng_state = PLOOP_E_DATA_WBI;
 			plo->st.bio_out++;
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 3a6365d..c17e598 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -896,6 +896,7 @@ void ploop_index_update(struct ploop_request * preq)
 	struct ploop_device * plo = preq->plo;
 	struct map_node * m = preq->map;
 	struct ploop_delta * top_delta = map_top_delta(m->parent);
+	int fua = !!(preq->req_rw & REQ_FUA);
 	u32 idx;
 	map_index_t blk;
 	int old_level;
@@ -953,13 +954,13 @@ void ploop_index_update(struct ploop_request * preq)
 	__TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
 	plo->st.map_single_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
-	/* Relocate requires consistent writes, mark such reqs appropriately */
+	/* Relocate requires consistent index update */
 	if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
 	    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-		set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
-	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec,
-				      !!(preq->req_rw & REQ_FUA));
+		fua = 1;
+	if (fua)
+		clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state);
+	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, fua);
 	put_page(page);
 	return;
 
@@ -1078,7 +1079,7 @@ static void map_wb_complete(struct map_node * m, int err)
 	int delayed = 0;
 	unsigned int idx;
 	sector_t sec;
-	int fua, force_fua;
+	int fua;
 
 	/* First, complete processing of written back indices,
 	 * finally instantiate indices in mapping cache.
@@ -1149,7 +1150,6 @@ static void map_wb_complete(struct map_node * m, int err)
 
 	main_preq = NULL;
 	fua = 0;
-	force_fua = 0;
 
 	list_for_each_safe(cursor, tmp, &m->io_queue) {
 		struct ploop_request * preq;
@@ -1168,13 +1168,12 @@ static void map_wb_complete(struct map_node * m, int err)
 				break;
 			}
 
-			if (preq->req_rw & REQ_FUA)
+			if (preq->req_rw & REQ_FUA ||
+			    test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
+			    test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
+				clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state);
 				fua = 1;
-
-			if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
-			    test_bit(PLOOP_REQ_RELOC_S, &preq->state))
-				force_fua = 1;
-
+			}
 			preq->eng_state = PLOOP_E_INDEX_WB;
 			get_page(page);
 			preq->sinfo.wi.tpage = page;
@@ -1199,10 +1198,6 @@ static void map_wb_complete(struct map_node * m, int err)
 	__TRACE("wbi2 %p %u %p\n", main_preq, main_preq->req_cluster, m);
 	plo->st.map_multi_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
-
-	if (force_fua)
-		set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
-
 	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, fua);
 	put_page(page);
 }
-- 
1.8.3.1



More information about the Devel mailing list