[Devel] [PATCH rh7 2/2] ploop: use FALLOC_FL_CONVERT_UNWRITTEN in io_direct
Maxim Patlasov
mpatlasov at virtuozzo.com
Sun Mar 13 14:12:09 PDT 2016
The patch implements an optimization of submit_alloc path for pio_direct:
write user data directly to host block-device (accordingly to fiemap info)
and then use fallocate(FALLOC_FL_CONVERT_UNWRITTEN). This avoids expensive
pagecache_write_begin/copy/pagecache_write_end mechanism (saves ~750usec
per megabyte in my experiments). The feature improves performance
significantly. Before the patch:
# dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 44.2684 s, 243 MB/s
after the patch:
# dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 29.3066 s, 366 MB/s
https://jira.sw.ru/browse/PSBM-22381
Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
drivers/block/ploop/dev.c | 7 +++++++
drivers/block/ploop/io_direct.c | 42 ++++++++++++++++++++++++++++++++++++---
include/linux/ploop/ploop.h | 6 ++++++
3 files changed, 52 insertions(+), 3 deletions(-)
diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index feb56c7..1da073c 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2294,7 +2294,14 @@ static void ploop_req_state_process(struct ploop_request * preq)
preq->prealloc_size = 0; /* only for sanity */
}
+ if (test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state)) {
+ preq->eng_io->ops->post_submit(preq->eng_io, preq);
+ clear_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
+ preq->eng_io = NULL;
+ }
+
restart:
+ BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state));
__TRACE("ST %p %u %lu\n", preq, preq->req_cluster, preq->eng_state);
switch (preq->eng_state) {
case PLOOP_E_ENTRY:
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 514af4b..d6f0f21 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -359,6 +359,10 @@ static inline void bzero_page(struct page *page)
kunmap_atomic(kaddr);
}
+static void
+dio_submit_pad(struct ploop_io *io, struct ploop_request * preq,
+ struct bio_list * sbl, unsigned int size,
+ struct extent_map *em);
static int
cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
@@ -371,6 +375,8 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
struct bio_iter biter;
loff_t new_size;
loff_t used_pos;
+ bool may_fallocate = io->files.file->f_op->fallocate &&
+ io->files.flags & EXT4_EXTENTS_FL;
trace_cached_submit(preq);
@@ -379,9 +385,7 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
used_pos = (io->alloc_head - 1) << (io->plo->cluster_log + 9);
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24)
- if (use_prealloc && end_pos > used_pos &&
- io->files.file->f_op->fallocate &&
- io->files.flags & EXT4_EXTENTS_FL) {
+ if (use_prealloc && end_pos > used_pos && may_fallocate) {
if (unlikely(io->prealloced_size < clu_siz)) {
loff_t prealloc = end_pos;
if (prealloc > PLOOP_MAX_PREALLOC(plo))
@@ -405,6 +409,21 @@ try_again:
}
#endif
+ if (may_fallocate) {
+ sector_t sec = (sector_t)iblk << preq->plo->cluster_log;
+ sector_t len = 1 << preq->plo->cluster_log;
+ struct extent_map * em = extent_lookup_create(io, sec, len);
+
+ if (unlikely(IS_ERR(em)))
+ return PTR_ERR(em);
+
+ preq->iblock = iblk;
+ preq->eng_io = io;
+ set_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
+ dio_submit_pad(io, preq, sbl, size, em);
+ return 0;
+ }
+
bio_iter_init(&biter, sbl);
mutex_lock(&io->files.inode->i_mutex);
@@ -480,6 +499,22 @@ try_again:
return err;
}
+static void
+dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
+{
+ sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
+ loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
+ int err;
+
+ err = io->files.file->f_op->fallocate(io->files.file,
+ FALLOC_FL_CONVERT_UNWRITTEN,
+ (loff_t)sec << 9, clu_siz);
+ if (err) {
+ PLOOP_REQ_SET_ERROR(preq, err);
+ set_bit(PLOOP_S_ABORT, &preq->plo->state);
+ }
+}
+
/* Submit the whole cluster. If preq contains only partial data
* within the cluster, pad the rest of cluster with zeros.
*/
@@ -1854,6 +1889,7 @@ static struct ploop_io_ops ploop_io_ops_direct =
.alloc = dio_alloc_sync,
.submit = dio_submit,
.submit_alloc = dio_submit_alloc,
+ .post_submit = dio_post_submit,
.disable_merge = dio_disable_merge,
.fastmap = dio_fastmap,
.read_page = dio_read_page,
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index b8c7130..c9fb1b0 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -142,6 +142,7 @@ struct ploop_io_ops
struct bio_list *sbl, iblock_t iblk, unsigned int size);
void (*submit_alloc)(struct ploop_io *, struct ploop_request *,
struct bio_list *sbl, unsigned int size);
+ void (*post_submit)(struct ploop_io *, struct ploop_request *);
int (*disable_merge)(struct ploop_io * io, sector_t isector, unsigned int len);
int (*fastmap)(struct ploop_io * io, struct bio *orig_bio,
@@ -459,6 +460,7 @@ enum
PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */
PLOOP_REQ_FORCE_FLUSH, /*force flush by engine */
PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
+ PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
};
enum
@@ -561,6 +563,10 @@ struct ploop_request
/* # bytes in tail of image file to prealloc on behalf of this preq */
loff_t prealloc_size;
+
+ /* if the engine starts operation on particular io, let's finish
+ * the operation on the same io (see io.ops->post_submit) */
+ struct ploop_io *eng_io;
};
static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)
More information about the Devel
mailing list