[Devel] [vzlin-dev] [PATCH rh7 2/2] ploop: use FALLOC_FL_CONVERT_UNWRITTEN in io_direct
Dmitry Monakhov
dmonakhov at openvz.org
Wed Mar 16 05:48:14 PDT 2016
Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
> The patch implements an optimization of submit_alloc path for pio_direct:
> write user data directly to host block-device (accordingly to fiemap info)
> and then use fallocate(FALLOC_FL_CONVERT_UNWRITTEN). This avoids expensive
> pagecache_write_begin/copy/pagecache_write_end mechanism (saves ~750usec
> per megabyte in my experiments). The feature improves performance
> significantly. Before the patch:
Ack-by:Dmitry Monakhov <dmonakhov at parallels.com>
>
> # dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 44.2684 s, 243 MB/s
>
> after the patch:
>
> # dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 29.3066 s, 366 MB/s
>
> https://jira.sw.ru/browse/PSBM-22381
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
> drivers/block/ploop/dev.c | 7 +++++++
> drivers/block/ploop/io_direct.c | 42 ++++++++++++++++++++++++++++++++++++---
> include/linux/ploop/ploop.h | 6 ++++++
> 3 files changed, 52 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index feb56c7..1da073c 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2294,7 +2294,14 @@ static void ploop_req_state_process(struct ploop_request * preq)
> preq->prealloc_size = 0; /* only for sanity */
> }
>
> + if (test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state)) {
> + preq->eng_io->ops->post_submit(preq->eng_io, preq);
> + clear_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
> + preq->eng_io = NULL;
> + }
> +
> restart:
> + BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state));
> __TRACE("ST %p %u %lu\n", preq, preq->req_cluster, preq->eng_state);
> switch (preq->eng_state) {
> case PLOOP_E_ENTRY:
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index 514af4b..d6f0f21 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -359,6 +359,10 @@ static inline void bzero_page(struct page *page)
> kunmap_atomic(kaddr);
> }
>
> +static void
> +dio_submit_pad(struct ploop_io *io, struct ploop_request * preq,
> + struct bio_list * sbl, unsigned int size,
> + struct extent_map *em);
>
> static int
> cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
> @@ -371,6 +375,8 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
> struct bio_iter biter;
> loff_t new_size;
> loff_t used_pos;
> + bool may_fallocate = io->files.file->f_op->fallocate &&
> + io->files.flags & EXT4_EXTENTS_FL;
>
> trace_cached_submit(preq);
>
> @@ -379,9 +385,7 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
> used_pos = (io->alloc_head - 1) << (io->plo->cluster_log + 9);
>
> #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24)
> - if (use_prealloc && end_pos > used_pos &&
> - io->files.file->f_op->fallocate &&
> - io->files.flags & EXT4_EXTENTS_FL) {
> + if (use_prealloc && end_pos > used_pos && may_fallocate) {
> if (unlikely(io->prealloced_size < clu_siz)) {
> loff_t prealloc = end_pos;
> if (prealloc > PLOOP_MAX_PREALLOC(plo))
> @@ -405,6 +409,21 @@ try_again:
> }
> #endif
>
> + if (may_fallocate) {
> + sector_t sec = (sector_t)iblk << preq->plo->cluster_log;
> + sector_t len = 1 << preq->plo->cluster_log;
> + struct extent_map * em = extent_lookup_create(io, sec, len);
> +
> + if (unlikely(IS_ERR(em)))
> + return PTR_ERR(em);
> +
> + preq->iblock = iblk;
> + preq->eng_io = io;
> + set_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
> + dio_submit_pad(io, preq, sbl, size, em);
> + return 0;
> + }
> +
> bio_iter_init(&biter, sbl);
> mutex_lock(&io->files.inode->i_mutex);
>
> @@ -480,6 +499,22 @@ try_again:
> return err;
> }
>
> +static void
> +dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
> +{
> + sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
> + loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
> + int err;
> +
> + err = io->files.file->f_op->fallocate(io->files.file,
> + FALLOC_FL_CONVERT_UNWRITTEN,
> + (loff_t)sec << 9, clu_siz);
> + if (err) {
> + PLOOP_REQ_SET_ERROR(preq, err);
> + set_bit(PLOOP_S_ABORT, &preq->plo->state);
> + }
> +}
> +
> /* Submit the whole cluster. If preq contains only partial data
> * within the cluster, pad the rest of cluster with zeros.
> */
> @@ -1854,6 +1889,7 @@ static struct ploop_io_ops ploop_io_ops_direct =
> .alloc = dio_alloc_sync,
> .submit = dio_submit,
> .submit_alloc = dio_submit_alloc,
> + .post_submit = dio_post_submit,
> .disable_merge = dio_disable_merge,
> .fastmap = dio_fastmap,
> .read_page = dio_read_page,
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index b8c7130..c9fb1b0 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -142,6 +142,7 @@ struct ploop_io_ops
> struct bio_list *sbl, iblock_t iblk, unsigned int size);
> void (*submit_alloc)(struct ploop_io *, struct ploop_request *,
> struct bio_list *sbl, unsigned int size);
> + void (*post_submit)(struct ploop_io *, struct ploop_request *);
>
> int (*disable_merge)(struct ploop_io * io, sector_t isector, unsigned int len);
> int (*fastmap)(struct ploop_io * io, struct bio *orig_bio,
> @@ -459,6 +460,7 @@ enum
> PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */
> PLOOP_REQ_FORCE_FLUSH, /*force flush by engine */
> PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
> + PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
> };
>
> enum
> @@ -561,6 +563,10 @@ struct ploop_request
>
> /* # bytes in tail of image file to prealloc on behalf of this preq */
> loff_t prealloc_size;
> +
> + /* if the engine starts operation on particular io, let's finish
> + * the operation on the same io (see io.ops->post_submit) */
> + struct ploop_io *eng_io;
> };
>
> static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160316/45454032/attachment.sig>
More information about the Devel
mailing list