[Devel] [PATCH rh7 3/3] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests (v3)

Maxim Patlasov mpatlasov at virtuozzo.com
Wed Jul 27 10:55:21 PDT 2016


Dima,


One week elapsed, still no feedback from you. Do you have something 
against this patch?


Thanks,

Maxim


On 07/20/2016 11:21 PM, Maxim Patlasov wrote:
> Commit 9f860e606 introduced an engine to delay fsync: doing
> fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
> io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
> later, when incoming FLUSH|FUA comes.
>
> That was deemed as important because (PSBM-47026):
>
>> This optimization becomes more important due to the fact that customers tend to use pcompact heavily => ploop images grow each day.
> Now, we can easily re-use the engine to delay fsync for reloc
> requests as well. As explained in the description of commit
> 5aa3fe09:
>
>>      1->read_data_from_old_post
>>      2->write_to_new_pos
>>        ->sumbit_alloc
>>           ->submit_pad
>>       ->post_submit->convert_unwritten
>>      3->update_index ->write_page with FLUSH|FUA
>>      4->nullify_old_pos
>>     5->issue_flush
> by the time of step 3 extent coversion is not yet stable because
> belongs to uncommitted transaction. But instead of doing fsync
> inside ->post_submit, we can fsync later, as the very first step
> of write_page for index_update.
>
> Changed in v2:
>   - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state
>
> Changed in v3:
>   - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc PLOOP_REQ_FSYNC_IF_DELAYED
>
> https://jira.sw.ru/browse/PSBM-47026
>
> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
> ---
>   drivers/block/ploop/dev.c   |    9 +++++++--
>   drivers/block/ploop/map.c   |   32 ++++++++++++++++++++++++++++----
>   include/linux/ploop/ploop.h |    1 +
>   3 files changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index df3eec9..ed60b1f 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -2720,6 +2720,11 @@ restart:
>   		ploop_index_wb_complete(preq);
>   		break;
>   
> +	case PLOOP_E_FSYNC_PENDED:
> +		/* fsync done */
> +		ploop_index_wb_proceed(preq);
> +		break;
> +
>   	default:
>   		BUG();
>   	}
> @@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
>   	preq->bl.tail = preq->bl.head = NULL;
>   	preq->req_cluster = 0;
>   	preq->req_size = 0;
> -	preq->req_rw = WRITE_SYNC|REQ_FUA;
> +	preq->req_rw = WRITE_SYNC;
>   	preq->eng_state = PLOOP_E_ENTRY;
>   	preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
>   	preq->error = 0;
> @@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
>   		preq->bl.tail = preq->bl.head = NULL;
>   		preq->req_cluster = ~0U; /* uninitialized */
>   		preq->req_size = 0;
> -		preq->req_rw = WRITE_SYNC|REQ_FUA;
> +		preq->req_rw = WRITE_SYNC;
>   		preq->eng_state = PLOOP_E_ENTRY;
>   		preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
>   		preq->error = 0;
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 5f7fd66..715dc15 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
>   	put_page(page);
>   }
>   
> +static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq,
> +					    int do_fsync_if_delayed)
> +{
> +	if (do_fsync_if_delayed) {
> +		struct map_node * m = preq->map;
> +		struct ploop_delta * top_delta = map_top_delta(m->parent);
> +		struct ploop_io * top_io = &top_delta->io;
> +
> +		if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
> +			preq->eng_state = PLOOP_E_FSYNC_PENDED;
> +			ploop_add_req_to_fsync_queue(preq);
> +			return;
> +		}
> +	}
> +
> +	ploop_index_wb_proceed(preq);
> +}
> +
>   /* Data write is commited. Now we need to update index. */
>   
>   void ploop_index_update(struct ploop_request * preq)
> @@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq)
>   	int old_level;
>   	struct page * page;
>   	unsigned long state = READ_ONCE(preq->state);
> +	int do_fsync_if_delayed = 0;
>   
>   	/* No way back, we are going to initiate index write. */
>   
> @@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq)
>   	preq->req_rw &= ~REQ_FLUSH;
>   
>   	/* Relocate requires consistent index update */
> -	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +	if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>   		preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA);
> +		do_fsync_if_delayed = 1;
> +	}
>   
> -	ploop_index_wb_proceed(preq);
> +	ploop_index_wb_proceed_or_delay(preq, do_fsync_if_delayed);
>   	return;
>   
>   enomem:
> @@ -1109,6 +1130,7 @@ static void map_wb_complete(struct map_node * m, int err)
>   	int delayed = 0;
>   	unsigned int idx;
>   	unsigned long rw;
> +	int do_fsync_if_delayed = 0;
>   
>   	/* First, complete processing of written back indices,
>   	 * finally instantiate indices in mapping cache.
> @@ -1206,8 +1228,10 @@ static void map_wb_complete(struct map_node * m, int err)
>   
>   			state = READ_ONCE(preq->state);
>   			/* Relocate requires consistent index update */
> -			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> +			if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>   				rw |= (REQ_FLUSH | REQ_FUA);
> +				do_fsync_if_delayed = 1;
> +			}
>   
>   			preq->eng_state = PLOOP_E_INDEX_WB;
>   			get_page(page);
> @@ -1234,7 +1258,7 @@ static void map_wb_complete(struct map_node * m, int err)
>   	plo->st.map_multi_writes++;
>   
>   	main_preq->req_index_update_rw = rw;
> -	ploop_index_wb_proceed(main_preq);
> +	ploop_index_wb_proceed_or_delay(main_preq, do_fsync_if_delayed);
>   }
>   
>   void
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index d8e01b6..33733e9 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -513,6 +513,7 @@ enum
>   	PLOOP_E_ZERO_INDEX,	/* Zeroing index of free block; original request
>   				   can use .submit on completion */
>   	PLOOP_E_DELTA_ZERO_INDEX,/* the same but for PLOOP_E_DELTA_READ */
> +	PLOOP_E_FSYNC_PENDED,   /* INDEX_WB needs io->ops->sync() to proceed */
>   };
>   
>   #define BIO_BDEV_REUSED	14	/* io_context is stored in bi_bdev */
>



More information about the Devel mailing list