[Devel] [vzlin-dev] [PATCH rh7 2/2] ploop: use FALLOC_FL_CONVERT_UNWRITTEN in io_direct

Dmitry Monakhov dmonakhov at openvz.org
Wed Mar 16 05:48:14 PDT 2016


Maxim Patlasov <mpatlasov at virtuozzo.com> writes:

> The patch implements an optimization of submit_alloc path for pio_direct:
> write user data directly to host block-device (accordingly to fiemap info)
> and then use fallocate(FALLOC_FL_CONVERT_UNWRITTEN). This avoids expensive
> pagecache_write_begin/copy/pagecache_write_end mechanism (saves ~750usec
> per megabyte in my experiments). The feature improves performance
> significantly. Before the patch:
Ack-by:Dmitry Monakhov <dmonakhov at parallels.com>
>
> # dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 44.2684 s, 243 MB/s
>
> after the patch:
>
> # dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync
> 10240+0 records in
> 10240+0 records out
> 10737418240 bytes (11 GB) copied, 29.3066 s, 366 MB/s
>
> https://jira.sw.ru/browse/PSBM-22381
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c       |    7 +++++++
>  drivers/block/ploop/io_direct.c |   42 ++++++++++++++++++++++++++++++++++++---
>  include/linux/ploop/ploop.h     |    6 ++++++
>  3 files changed, 52 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index feb56c7..1da073c 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2294,7 +2294,14 @@ static void ploop_req_state_process(struct ploop_request * preq)
>  		preq->prealloc_size = 0; /* only for sanity */
>  	}
>  
> +	if (test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state)) {
> +		preq->eng_io->ops->post_submit(preq->eng_io, preq);
> +		clear_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
> +		preq->eng_io = NULL;
> +	}
> +
>  restart:
> +	BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state));
>  	__TRACE("ST %p %u %lu\n", preq, preq->req_cluster, preq->eng_state);
>  	switch (preq->eng_state) {
>  	case PLOOP_E_ENTRY:
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index 514af4b..d6f0f21 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -359,6 +359,10 @@ static inline void bzero_page(struct page *page)
>  	kunmap_atomic(kaddr);
>  }
>  
> +static void
> +dio_submit_pad(struct ploop_io *io, struct ploop_request * preq,
> +	       struct bio_list * sbl, unsigned int size,
> +	       struct extent_map *em);
>  
>  static int
>  cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
> @@ -371,6 +375,8 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
>  	struct bio_iter biter;
>  	loff_t new_size;
>  	loff_t used_pos;
> +	bool may_fallocate = io->files.file->f_op->fallocate &&
> +		io->files.flags & EXT4_EXTENTS_FL;
>  
>  	trace_cached_submit(preq);
>  
> @@ -379,9 +385,7 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * preq,
>  	used_pos = (io->alloc_head - 1) << (io->plo->cluster_log + 9);
>  
>  #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24)
> -	if (use_prealloc && end_pos > used_pos &&
> -	    io->files.file->f_op->fallocate &&
> -	    io->files.flags & EXT4_EXTENTS_FL) {
> +	if (use_prealloc && end_pos > used_pos && may_fallocate) {
>  		if (unlikely(io->prealloced_size < clu_siz)) {
>  			loff_t prealloc = end_pos;
>  			if (prealloc > PLOOP_MAX_PREALLOC(plo))
> @@ -405,6 +409,21 @@ try_again:
>  	}
>  #endif
>  
> +	if (may_fallocate) {
> +		sector_t sec = (sector_t)iblk << preq->plo->cluster_log;
> +		sector_t len = 1 << preq->plo->cluster_log;
> +		struct extent_map * em = extent_lookup_create(io, sec, len);
> +
> +		if (unlikely(IS_ERR(em)))
> +			return PTR_ERR(em);
> +
> +		preq->iblock = iblk;
> +		preq->eng_io = io;
> +		set_bit(PLOOP_REQ_POST_SUBMIT, &preq->state);
> +		dio_submit_pad(io, preq, sbl, size, em);
> +		return 0;
> +	}
> +
>  	bio_iter_init(&biter, sbl);
>  	mutex_lock(&io->files.inode->i_mutex);
>  
> @@ -480,6 +499,22 @@ try_again:
>  	return err;
>  }
>  
> +static void
> +dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
> +{
> +	sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
> +	loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
> +	int err;
> +
> +	err = io->files.file->f_op->fallocate(io->files.file,
> +					      FALLOC_FL_CONVERT_UNWRITTEN,
> +					      (loff_t)sec << 9, clu_siz);
> +	if (err) {
> +		PLOOP_REQ_SET_ERROR(preq, err);
> +		set_bit(PLOOP_S_ABORT, &preq->plo->state);
> +	}
> +}
> +
>  /* Submit the whole cluster. If preq contains only partial data
>   * within the cluster, pad the rest of cluster with zeros.
>   */
> @@ -1854,6 +1889,7 @@ static struct ploop_io_ops ploop_io_ops_direct =
>  	.alloc		=	dio_alloc_sync,
>  	.submit		=	dio_submit,
>  	.submit_alloc	=	dio_submit_alloc,
> +	.post_submit	=	dio_post_submit,
>  	.disable_merge	=	dio_disable_merge,
>  	.fastmap	=	dio_fastmap,
>  	.read_page	=	dio_read_page,
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index b8c7130..c9fb1b0 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -142,6 +142,7 @@ struct ploop_io_ops
>  			  struct bio_list *sbl, iblock_t iblk, unsigned int size);
>  	void	(*submit_alloc)(struct ploop_io *, struct ploop_request *,
>  				struct bio_list *sbl, unsigned int size);
> +	void	(*post_submit)(struct ploop_io *, struct ploop_request *);
>  
>  	int	(*disable_merge)(struct ploop_io * io, sector_t isector, unsigned int len);
>  	int	(*fastmap)(struct ploop_io * io, struct bio *orig_bio,
> @@ -459,6 +460,7 @@ enum
>  	PLOOP_REQ_FORCE_FUA,	/*force fua of req write I/O by engine */
>  	PLOOP_REQ_FORCE_FLUSH,	/*force flush by engine */
>  	PLOOP_REQ_KAIO_FSYNC,	/*force image fsync by KAIO module */
> +	PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>  };
>  
>  enum
> @@ -561,6 +563,10 @@ struct ploop_request
>  
>  	/* # bytes in tail of image file to prealloc on behalf of this preq */
>  	loff_t			prealloc_size;
> +
> +	/* if the engine starts operation on particular io, let's finish
> +	 * the operation on the same io (see io.ops->post_submit) */
> +	struct ploop_io	       *eng_io;
>  };
>  
>  static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160316/45454032/attachment.sig>


More information about the Devel mailing list