[Devel] [PATCH rh7] ploop: io_direct: delay f_op->fsync() until FLUSH|FUA (v2)

Maxim Patlasov mpatlasov at virtuozzo.com
Thu May 26 14:32:45 PDT 2016


Once we converted extent to initialized it can be part of uncompleted
journal transaction, so we have to force transaction commit at some point.

Instead of forcing transaction commit immediately, the patch delays it
until an incoming bio with FLUSH|FUA arrives. Then, as the very first
step of processing such a bio, we sends corresponding preq to fsync_thread
to perform f_op->fsync().

As a very unlikely case, it is also possible that processing a FLUSH|FUA
bio itself results in converting extents. Then, the patch calls f_op->fsync()
immediately after conversion to preserve FUA semantics.

Changed in v2 (thanks to dmonakhov@):
	- simplified bitwise arithmetic in  preq_is_special() helper;
	- reject fastmap for REQ_FUA bio-s if PLOOP_IO_FSYNC_DELAYED.

https://jira.sw.ru/browse/PSBM-47026

Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
 drivers/block/ploop/dev.c       |   72 ++++++++++++++++++++++++---------------
 drivers/block/ploop/io_direct.c |   28 +++++++++++++++
 include/linux/ploop/ploop.h     |   12 +++++++
 3 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 654b60b..861ce86 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1942,46 +1942,64 @@ err:
 
 /* Main preq state machine */
 
+static inline bool preq_is_special(struct ploop_request * preq)
+{
+	unsigned long state = READ_ONCE(preq->state);
+
+	return state & (PLOOP_REQ_MERGE_FL |
+			PLOOP_REQ_RELOC_A_FL |
+			PLOOP_REQ_RELOC_S_FL |
+			PLOOP_REQ_DISCARD_FL |
+			PLOOP_REQ_ZERO_FL);
+}
+
 static void
 ploop_entry_request(struct ploop_request * preq)
 {
 	struct ploop_device * plo       = preq->plo;
 	struct ploop_delta  * top_delta = ploop_top_delta(plo);
+	struct ploop_io     * top_io    = &top_delta->io;
 	struct ploop_delta  * delta;
 	int level;
 	int err;
 	iblock_t iblk;
 
-	/* Control request. */
-	if (unlikely(preq->bl.head == NULL &&
-		     !test_bit(PLOOP_REQ_MERGE, &preq->state) &&
-		     !test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
-		     !test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
-		     !test_bit(PLOOP_REQ_DISCARD, &preq->state) &&
-		     !test_bit(PLOOP_REQ_ZERO, &preq->state))) {
-		complete(plo->quiesce_comp);
-		wait_for_completion(&plo->relax_comp);
-		ploop_complete_request(preq);
-		complete(&plo->relaxed_comp);
-		return;
-	}
+	if (!preq_is_special(preq)) {
+		/* Control request */
+		if (unlikely(preq->bl.head == NULL)) {
+			complete(plo->quiesce_comp);
+			wait_for_completion(&plo->relax_comp);
+			ploop_complete_request(preq);
+			complete(&plo->relaxed_comp);
+			return;
+		}
 
-	/* Empty flush. */
-	if (unlikely(preq->req_size == 0 &&
-		     !test_bit(PLOOP_REQ_MERGE, &preq->state) &&
-		     !test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
-		     !test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
-		     !test_bit(PLOOP_REQ_ZERO, &preq->state))) {
-		if (preq->req_rw & REQ_FLUSH) {
-			if (top_delta->io.ops->issue_flush) {
-				top_delta->io.ops->issue_flush(&top_delta->io, preq);
-				return;
-			}
+		/* Need to fsync before start handling FLUSH */
+		if ((preq->req_rw & REQ_FLUSH) &&
+		    test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state) &&
+		    !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
+			spin_lock_irq(&plo->lock);
+			list_add_tail(&preq->list, &top_io->fsync_queue);
+			if (waitqueue_active(&top_io->fsync_waitq))
+				wake_up_interruptible(&top_io->fsync_waitq);
+			spin_unlock_irq(&plo->lock);
+			return;
 		}
 
-		preq->eng_state = PLOOP_E_COMPLETE;
-		ploop_complete_request(preq);
-		return;
+		/* Empty flush or unknown zero-size request */
+		if (preq->req_size == 0) {
+			if (preq->req_rw & REQ_FLUSH &&
+			    !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
+				if (top_io->ops->issue_flush) {
+					top_io->ops->issue_flush(top_io, preq);
+					return;
+				}
+			}
+
+			preq->eng_state = PLOOP_E_COMPLETE;
+			ploop_complete_request(preq);
+			return;
+		}
 	}
 
 	if (unlikely(test_bit(PLOOP_REQ_SYNC, &preq->state) &&
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 8096110..b844a80 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -514,17 +514,32 @@ end_write:
 static void
 dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
 {
+	struct ploop_device *plo = preq->plo;
 	sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
 	loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
 	int err;
 
 	file_start_write(io->files.file);
+
+	/* Here io->io_count is even ... */
+	spin_lock_irq(&plo->lock);
+	io->io_count++;
+	set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
+	spin_unlock_irq(&plo->lock);
+
 	err = io->files.file->f_op->fallocate(io->files.file,
 					      FALLOC_FL_CONVERT_UNWRITTEN,
 					      (loff_t)sec << 9, clu_siz);
-	if (!err)
+
+	/* highly unlikely case: FUA coming to a block not provisioned yet */
+	if (!err && (preq->req_rw & REQ_FUA))
 		err = io->ops->sync(io);
 
+	spin_lock_irq(&plo->lock);
+	io->io_count++;
+	spin_unlock_irq(&plo->lock);
+	/* and here io->io_count is even (+2) again. */
+
 	file_end_write(io->files.file);
 	if (err) {
 		PLOOP_REQ_SET_ERROR(preq, err);
@@ -782,6 +797,7 @@ static int dio_fsync_thread(void * data)
 {
 	struct ploop_io * io = data;
 	struct ploop_device * plo = io->plo;
+	u64 io_count;
 
 	set_user_nice(current, -20);
 
@@ -808,6 +824,7 @@ static int dio_fsync_thread(void * data)
 
 		INIT_LIST_HEAD(&list);
 		list_splice_init(&io->fsync_queue, &list);
+		io_count = io->io_count;
 		spin_unlock_irq(&plo->lock);
 
 		/* filemap_fdatawrite() has been made already */
@@ -824,12 +841,17 @@ static int dio_fsync_thread(void * data)
 
 		spin_lock_irq(&plo->lock);
 
+		if (io_count == io->io_count && !(io_count & 1))
+			clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
+
 		while (!list_empty(&list)) {
 			struct ploop_request * preq;
 			preq = list_entry(list.next, struct ploop_request, list);
 			list_del(&preq->list);
 			if (err)
 				PLOOP_REQ_SET_ERROR(preq, err);
+
+			__set_bit(PLOOP_REQ_FSYNC_DONE, &preq->state);
 			list_add_tail(&preq->list, &plo->ready_queue);
 			io->fsync_qlen--;
 		}
@@ -1522,6 +1544,10 @@ dio_fastmap(struct ploop_io * io, struct bio * orig_bio,
 	struct extent_map * em;
 	int i;
 
+	if (unlikely((orig_bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+		     test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)))
+		return 1;
+
 	if (orig_bio->bi_size == 0) {
 		bio->bi_vcnt   = 0;
 		bio->bi_sector = 0;
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index ad36a91..0fba25e 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -87,6 +87,9 @@ struct ploop_file
  * This struct describes how we do real IO on particular backing file.
  */
 
+enum {
+	PLOOP_IO_FSYNC_DELAYED,  /* Must f_op->fsync before FLUSH|FUA */
+};
 
 struct ploop_io
 {
@@ -108,6 +111,8 @@ struct ploop_io
 	struct timer_list	fsync_timer;
 
 	struct ploop_io_ops	*ops;
+	unsigned long		io_state;
+	u64                     io_count;
 };
 
 struct ploop_io_ops
@@ -466,8 +471,15 @@ enum
 	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
 	PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
 	PLOOP_REQ_ALLOW_READS, /* READs are allowed for given req_cluster */
+	PLOOP_REQ_FSYNC_DONE,  /* fsync_thread() performed f_op->fsync() */
 };
 
+#define PLOOP_REQ_MERGE_FL (1 << PLOOP_REQ_MERGE)
+#define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A)
+#define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)
+#define PLOOP_REQ_DISCARD_FL (1 << PLOOP_REQ_DISCARD)
+#define PLOOP_REQ_ZERO_FL (1 << PLOOP_REQ_ZERO)
+
 enum
 {
 	PLOOP_E_ENTRY,		/* Not yet processed */



More information about the Devel mailing list