[Devel] [PATCH rh7 2/2] ploop: use FALLOC_FL_CONVERT_UNWRITTEN in io_direct

Maxim Patlasov mpatlasov at virtuozzo.com
Sun Mar 13 14:12:09 PDT 2016


The patch implements an optimization of submit_alloc path for pio_direct:
write user data directly to host block-device (accordingly to fiemap info)
and then use fallocate(FALLOC_FL_CONVERT_UNWRITTEN). This avoids expensive
pagecache_write_begin/copy/pagecache_write_end mechanism (saves ~750usec
per megabyte in my experiments). The feature improves performance
significantly. Before the patch:

# dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 44.2684 s, 243 MB/s

after the patch:

# dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 29.3066 s, 366 MB/s

https://jira.sw.ru/browse/PSBM-22381

Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
 drivers/block/ploop/dev.c       |    7 +++++++
 drivers/block/ploop/io_direct.c |   42 ++++++++++++++++++++++++++++++++++++---
 include/linux/ploop/ploop.h     |    6 ++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index feb56c7..1da073c 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2294,7 +2294,14 @@ static void ploop_req_state_process(struct ploop_request * preq)
 		preq->prealloc_size = 0; /* only for sanity */
 	}
 
+	if (test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state)) {
+		preq->eng_io->ops->post_submit(preq->eng_io, preq);
+		clear_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
+		preq->eng_io = NULL;
+	}
+
 restart:
+	BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state));
 	__TRACE("ST %p %u %lu\n", preq, preq->req_cluster, preq->eng_state);
 	switch (preq->eng_state) {
 	case PLOOP_E_ENTRY:
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 514af4b..d6f0f21 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -359,6 +359,10 @@ static inline void bzero_page(struct page *page)
 	kunmap_atomic(kaddr);
 }
 
+static void
+dio_submit_pad(struct ploop_io *io, struct ploop_request * preq,
+	       struct bio_list * sbl, unsigned int size,
+	       struct extent_map *em);
 
 static int
 cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
@@ -371,6 +375,8 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
 	struct bio_iter biter;
 	loff_t new_size;
 	loff_t used_pos;
+	bool may_fallocate = io->files.file->f_op->fallocate &&
+		io->files.flags & EXT4_EXTENTS_FL;
 
 	trace_cached_submit(preq);
 
@@ -379,9 +385,7 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
 	used_pos = (io->alloc_head - 1) << (io->plo->cluster_log + 9);
 
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24)
-	if (use_prealloc && end_pos > used_pos &&
-	    io->files.file->f_op->fallocate &&
-	    io->files.flags & EXT4_EXTENTS_FL) {
+	if (use_prealloc && end_pos > used_pos && may_fallocate) {
 		if (unlikely(io->prealloced_size < clu_siz)) {
 			loff_t prealloc = end_pos;
 			if (prealloc > PLOOP_MAX_PREALLOC(plo))
@@ -405,6 +409,21 @@ try_again:
 	}
 #endif
 
+	if (may_fallocate) {
+		sector_t sec = (sector_t)iblk << preq->plo->cluster_log;
+		sector_t len = 1 << preq->plo->cluster_log;
+		struct extent_map * em = extent_lookup_create(io, sec, len);
+
+		if (unlikely(IS_ERR(em)))
+			return PTR_ERR(em);
+
+		preq->iblock = iblk;
+		preq->eng_io = io;
+		set_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
+		dio_submit_pad(io, preq, sbl, size, em);
+		return 0;
+	}
+
 	bio_iter_init(&biter, sbl);
 	mutex_lock(&io->files.inode->i_mutex);
 
@@ -480,6 +499,22 @@ try_again:
 	return err;
 }
 
+static void
+dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
+{
+	sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
+	loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
+	int err;
+
+	err = io->files.file->f_op->fallocate(io->files.file,
+					      FALLOC_FL_CONVERT_UNWRITTEN,
+					      (loff_t)sec << 9, clu_siz);
+	if (err) {
+		PLOOP_REQ_SET_ERROR(preq, err);
+		set_bit(PLOOP_S_ABORT, &preq->plo->state);
+	}
+}
+
 /* Submit the whole cluster. If preq contains only partial data
  * within the cluster, pad the rest of cluster with zeros.
  */
@@ -1854,6 +1889,7 @@ static struct ploop_io_ops ploop_io_ops_direct =
 	.alloc		=	dio_alloc_sync,
 	.submit		=	dio_submit,
 	.submit_alloc	=	dio_submit_alloc,
+	.post_submit	=	dio_post_submit,
 	.disable_merge	=	dio_disable_merge,
 	.fastmap	=	dio_fastmap,
 	.read_page	=	dio_read_page,
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index b8c7130..c9fb1b0 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -142,6 +142,7 @@ struct ploop_io_ops
 			  struct bio_list *sbl, iblock_t iblk, unsigned int size);
 	void	(*submit_alloc)(struct ploop_io *, struct ploop_request *,
 				struct bio_list *sbl, unsigned int size);
+	void	(*post_submit)(struct ploop_io *, struct ploop_request *);
 
 	int	(*disable_merge)(struct ploop_io * io, sector_t isector, unsigned int len);
 	int	(*fastmap)(struct ploop_io * io, struct bio *orig_bio,
@@ -459,6 +460,7 @@ enum
 	PLOOP_REQ_FORCE_FUA,	/*force fua of req write I/O by engine */
 	PLOOP_REQ_FORCE_FLUSH,	/*force flush by engine */
 	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
+	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
 };
 
 enum
@@ -561,6 +563,10 @@ struct ploop_request
 
 	/* # bytes in tail of image file to prealloc on behalf of this preq */
 	loff_t			prealloc_size;
+
+	/* if the engine starts operation on particular io, let's finish
+	 * the operation on the same io (see io.ops->post_submit) */
+	struct ploop_io	       *eng_io;
 };
 
 static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)



More information about the Devel mailing list