[Devel] [PATCH rh7] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests

Maxim Patlasov mpatlasov at virtuozzo.com
Tue Jul 5 16:13:52 PDT 2016


Commit 9f860e606 introduced an engine to delay fsync: doing
fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
later, when incoming FLUSH|FUA comes.

That was deemed as important because (PSBM-47026):

> This optimization becomes more important due to the fact that customers tend to use pcompact heavily => ploop images grow each day.

Now, we can easily re-use the engine to delay fsync for reloc
requests as well. As explained in the description of commit
5aa3fe09:

>     1->read_data_from_old_post
>     2->write_to_new_pos
>       ->sumbit_alloc
>          ->submit_pad
>      ->post_submit->convert_unwritten
>     3->update_index ->write_page with FLUSH|FUA
>     4->nullify_old_pos
>    5->issue_flush

by the time of step 3 extent coversion is not yet stable because
belongs to uncommitted transaction. But instead of doing fsync
inside ->post_submit, we can fsync later, as the very first step
of write_page for index_update.

https://jira.sw.ru/browse/PSBM-47026

Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
 drivers/block/ploop/dev.c       |    4 ++--
 drivers/block/ploop/io_direct.c |   25 ++++++++++++++++++++++++-
 drivers/block/ploop/io_kaio.c   |    3 ++-
 drivers/block/ploop/map.c       |   17 ++++++++++++-----
 include/linux/ploop/ploop.h     |    3 ++-
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index e5f010b..40768b6 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -4097,7 +4097,7 @@ static void ploop_relocate(struct ploop_device * plo)
 	preq->bl.tail = preq->bl.head = NULL;
 	preq->req_cluster = 0;
 	preq->req_size = 0;
-	preq->req_rw = WRITE_SYNC|REQ_FUA;
+	preq->req_rw = WRITE_SYNC;
 	preq->eng_state = PLOOP_E_ENTRY;
 	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
 	preq->error = 0;
@@ -4401,7 +4401,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
 		preq->bl.tail = preq->bl.head = NULL;
 		preq->req_cluster = ~0U; /* uninitialized */
 		preq->req_size = 0;
-		preq->req_rw = WRITE_SYNC|REQ_FUA;
+		preq->req_rw = WRITE_SYNC;
 		preq->eng_state = PLOOP_E_ENTRY;
 		preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
 		preq->error = 0;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 1086850..0a5fb15 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -1494,13 +1494,36 @@ dio_read_page(struct ploop_io * io, struct ploop_request * preq,
 
 static void
 dio_write_page(struct ploop_io * io, struct ploop_request * preq,
-	       struct page * page, sector_t sec, unsigned long rw)
+	       struct page * page, sector_t sec, unsigned long rw,
+	       int do_fsync_if_delayed)
 {
 	if (!(io->files.file->f_mode & FMODE_WRITE)) {
 		PLOOP_FAIL_REQUEST(preq, -EBADF);
 		return;
 	}
 
+	if (do_fsync_if_delayed &&
+	    test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)) {
+		struct ploop_device * plo = io->plo;
+		u64 io_count;
+		int err;
+
+		spin_lock_irq(&plo->lock);
+		io_count = io->io_count;
+		spin_unlock_irq(&plo->lock);
+
+		err = io->ops->sync(io);
+		if (err) {
+			PLOOP_FAIL_REQUEST(preq, -EBADF);
+			return;
+		}
+
+		spin_lock_irq(&plo->lock);
+		if (io_count == io->io_count && !(io_count & 1))
+			clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
+		spin_unlock_irq(&plo->lock);
+	}
+
 	dio_io_page(io, rw | WRITE | REQ_SYNC, preq, page, sec);
 }
 
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 85863df..0d731ef 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -614,7 +614,8 @@ kaio_read_page(struct ploop_io * io, struct ploop_request * preq,
 
 static void
 kaio_write_page(struct ploop_io * io, struct ploop_request * preq,
-		 struct page * page, sector_t sec, unsigned long rw)
+		struct page * page, sector_t sec, unsigned long rw,
+		int do_fsync_if_delayed)
 {
 	ploop_prepare_tracker(preq, sec);
 
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 1883674..96e428b 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -910,6 +910,7 @@ void ploop_index_update(struct ploop_request * preq)
 	sector_t sec;
 	unsigned long rw;
 	unsigned long state = READ_ONCE(preq->state);
+	int do_fsync_if_delayed = 0;
 
 	/* No way back, we are going to initiate index write. */
 
@@ -970,10 +971,13 @@ void ploop_index_update(struct ploop_request * preq)
 	preq->req_rw &= ~REQ_FLUSH;
 
 	/* Relocate requires consistent index update */
-	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
 		rw |= (REQ_FLUSH | REQ_FUA);
+		do_fsync_if_delayed = 1;
+	}
 
-	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
+	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw,
+				      do_fsync_if_delayed);
 
 	put_page(page);
 	return;
@@ -1096,6 +1100,7 @@ static void map_wb_complete(struct map_node * m, int err)
 	unsigned int idx;
 	sector_t sec;
 	unsigned long rw;
+	int do_fsync_if_delayed = 0;
 
 	/* First, complete processing of written back indices,
 	 * finally instantiate indices in mapping cache.
@@ -1193,8 +1198,10 @@ static void map_wb_complete(struct map_node * m, int err)
 
 			state = READ_ONCE(preq->state);
 			/* Relocate requires consistent index update */
-			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
 				rw |= (REQ_FLUSH | REQ_FUA);
+				do_fsync_if_delayed = 1;
+			}
 
 			preq->eng_state = PLOOP_E_INDEX_WB;
 			get_page(page);
@@ -1221,8 +1228,8 @@ static void map_wb_complete(struct map_node * m, int err)
 	plo->st.map_multi_writes++;
 	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
 
-	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
-				      rw);
+	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, rw,
+				      do_fsync_if_delayed);
 	put_page(page);
 }
 
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index deee8a7..b03565b 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -164,7 +164,8 @@ struct ploop_io_ops
 	void	(*read_page)(struct ploop_io * io, struct ploop_request * preq,
 			     struct page * page, sector_t sec);
 	void	(*write_page)(struct ploop_io * io, struct ploop_request * preq,
-			      struct page * page, sector_t sec, unsigned long rw);
+			      struct page * page, sector_t sec, unsigned long rw,
+			      int do_fsync_if_delayed);
 
 
 	int	(*sync_read)(struct ploop_io * io, struct page * page,



More information about the Devel mailing list