[Devel] [PATCH rh7] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests

Dmitry Monakhov dmonakhov at openvz.org
Wed Jul 6 04:58:32 PDT 2016


Maxim Patlasov <mpatlasov at virtuozzo.com> writes:

> Commit 9f860e606 introduced an engine to delay fsync: doing
> fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
> io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
> later, when incoming FLUSH|FUA comes.
>
> That was deemed as important because (PSBM-47026):
>
>> This optimization becomes more important due to the fact that customers tend to use pcompact heavily => ploop images grow each day.
>
> Now, we can easily re-use the engine to delay fsync for reloc
> requests as well. As explained in the description of commit
> 5aa3fe09:
>
>>     1->read_data_from_old_post
>>     2->write_to_new_pos
>>       ->sumbit_alloc
>>          ->submit_pad
>>      ->post_submit->convert_unwritten
>>     3->update_index ->write_page with FLUSH|FUA
>>     4->nullify_old_pos
>>    5->issue_flush
>
> by the time of step 3 extent coversion is not yet stable because
> belongs to uncommitted transaction. But instead of doing fsync
> inside ->post_submit, we can fsync later, as the very first step
> of write_page for index_update.
NAK from me. What is advantage of this patch?
Does it makes code more optimal? No
Does it makes main ploop more asynchronous? No.

If you want to make optimization then it is reasonable to
queue preq with PLOOP_IO_FSYNC_DELAYED to top_io->fsync_queue
before processing PLOOP_E_DATA_WBI  state for  preq with FUA
So sequence will looks like follows:
->sumbit_alloc
  ->submit_pad
  ->post_submit->convert_unwritten-> tag PLOOP_IO_FSYNC_DELAYED
->ploop_req_state_process
  case PLOOP_E_DATA_WBI:
  if (preq->start & PLOOP_IO_FSYNC_DELAYED_FL) {
      preq->start &= ~PLOOP_IO_FSYNC_DELAYED_FL
      list_add_tail(&preq->list, &top_io->fsync_queue)
      return;
   }
##Let fsync_thread do it's work
->ploop_req_state_process
   case LOOP_E_DATA_WBI:
   update_index->write_page with FUA (FLUSH is not required because we  already done fsync)

>
> https://jira.sw.ru/browse/PSBM-47026
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c       |    4 ++--
>  drivers/block/ploop/io_direct.c |   25 ++++++++++++++++++++++++-
>  drivers/block/ploop/io_kaio.c   |    3 ++-
>  drivers/block/ploop/map.c       |   17 ++++++++++++-----
>  include/linux/ploop/ploop.h     |    3 ++-
>  5 files changed, 42 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index e5f010b..40768b6 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -4097,7 +4097,7 @@ static void ploop_relocate(struct ploop_device * plo)
>  	preq->bl.tail = preq->bl.head = NULL;
>  	preq->req_cluster = 0;
>  	preq->req_size = 0;
> -	preq->req_rw = WRITE_SYNC|REQ_FUA;
> +	preq->req_rw = WRITE_SYNC;
>  	preq->eng_state = PLOOP_E_ENTRY;
>  	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
>  	preq->error = 0;
> @@ -4401,7 +4401,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
>  		preq->bl.tail = preq->bl.head = NULL;
>  		preq->req_cluster = ~0U; /* uninitialized */
>  		preq->req_size = 0;
> -		preq->req_rw = WRITE_SYNC|REQ_FUA;
> +		preq->req_rw = WRITE_SYNC;
>  		preq->eng_state = PLOOP_E_ENTRY;
>  		preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
>  		preq->error = 0;
> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
> index 1086850..0a5fb15 100644
> --- a/drivers/block/ploop/io_direct.c
> +++ b/drivers/block/ploop/io_direct.c
> @@ -1494,13 +1494,36 @@ dio_read_page(struct ploop_io * io, struct ploop_request * preq,
>  
>  static void
>  dio_write_page(struct ploop_io * io, struct ploop_request * preq,
> -	       struct page * page, sector_t sec, unsigned long rw)
> +	       struct page * page, sector_t sec, unsigned long rw,
> +	       int do_fsync_if_delayed)
>  {
>  	if (!(io->files.file->f_mode & FMODE_WRITE)) {
>  		PLOOP_FAIL_REQUEST(preq, -EBADF);
>  		return;
>  	}
>  
> +	if (do_fsync_if_delayed &&
> +	    test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)) {
> +		struct ploop_device * plo = io->plo;
> +		u64 io_count;
> +		int err;
> +
> +		spin_lock_irq(&plo->lock);
> +		io_count = io->io_count;
> +		spin_unlock_irq(&plo->lock);
> +
> +		err = io->ops->sync(io);
> +		if (err) {
> +			PLOOP_FAIL_REQUEST(preq, -EBADF);
> +			return;
> +		}
> +
> +		spin_lock_irq(&plo->lock);
> +		if (io_count == io->io_count && !(io_count & 1))
> +			clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
> +		spin_unlock_irq(&plo->lock);
> +	}
> +
>  	dio_io_page(io, rw | WRITE | REQ_SYNC, preq, page, sec);
>  }
>  
> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> index 85863df..0d731ef 100644
> --- a/drivers/block/ploop/io_kaio.c
> +++ b/drivers/block/ploop/io_kaio.c
> @@ -614,7 +614,8 @@ kaio_read_page(struct ploop_io * io, struct ploop_request * preq,
>  
>  static void
>  kaio_write_page(struct ploop_io * io, struct ploop_request * preq,
> -		 struct page * page, sector_t sec, unsigned long rw)
> +		struct page * page, sector_t sec, unsigned long rw,
> +		int do_fsync_if_delayed)
>  {
>  	ploop_prepare_tracker(preq, sec);
>  
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 1883674..96e428b 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -910,6 +910,7 @@ void ploop_index_update(struct ploop_request * preq)
>  	sector_t sec;
>  	unsigned long rw;
>  	unsigned long state = READ_ONCE(preq->state);
> +	int do_fsync_if_delayed = 0;
>  
>  	/* No way back, we are going to initiate index write. */
>  
> @@ -970,10 +971,13 @@ void ploop_index_update(struct ploop_request * preq)
>  	preq->req_rw &= ~REQ_FLUSH;
>  
>  	/* Relocate requires consistent index update */
> -	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>  		rw |= (REQ_FLUSH | REQ_FUA);
> +		do_fsync_if_delayed = 1;
> +	}
>  
> -	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
> +	top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw,
> +				      do_fsync_if_delayed);
>  
>  	put_page(page);
>  	return;
> @@ -1096,6 +1100,7 @@ static void map_wb_complete(struct map_node * m, int err)
>  	unsigned int idx;
>  	sector_t sec;
>  	unsigned long rw;
> +	int do_fsync_if_delayed = 0;
>  
>  	/* First, complete processing of written back indices,
>  	 * finally instantiate indices in mapping cache.
> @@ -1193,8 +1198,10 @@ static void map_wb_complete(struct map_node * m, int err)
>  
>  			state = READ_ONCE(preq->state);
>  			/* Relocate requires consistent index update */
> -			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>  				rw |= (REQ_FLUSH | REQ_FUA);
> +				do_fsync_if_delayed = 1;
> +			}
>  
>  			preq->eng_state = PLOOP_E_INDEX_WB;
>  			get_page(page);
> @@ -1221,8 +1228,8 @@ static void map_wb_complete(struct map_node * m, int err)
>  	plo->st.map_multi_writes++;
>  	top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>  
> -	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
> -				      rw);
> +	top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, rw,
> +				      do_fsync_if_delayed);
>  	put_page(page);
>  }
>  
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index deee8a7..b03565b 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -164,7 +164,8 @@ struct ploop_io_ops
>  	void	(*read_page)(struct ploop_io * io, struct ploop_request * preq,
>  			     struct page * page, sector_t sec);
>  	void	(*write_page)(struct ploop_io * io, struct ploop_request * preq,
> -			      struct page * page, sector_t sec, unsigned long rw);
> +			      struct page * page, sector_t sec, unsigned long rw,
> +			      int do_fsync_if_delayed);
>  
>  
>  	int	(*sync_read)(struct ploop_io * io, struct page * page,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160706/839cb2ed/attachment-0001.sig>


More information about the Devel mailing list