[Devel] [PATCH rh7 3/3] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests (v3)
Dmitry Monakhov
dmonakhov at openvz.org
Fri Jul 29 00:15:42 PDT 2016
Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
> Dima,
>
>
> One week elapsed, still no feedback from you. Do you have something
> against this patch?
Sorry for delay Max. I was overloaded by pended crap I've collected
before vacations, and lost your email. Again sorry.
Whole patch looks good. Thank you for your rede
BTW: We defenitely need regression testing for original bug (broken
barries and others). I'm working on that.
>
>
> Thanks,
>
> Maxim
>
>
> On 07/20/2016 11:21 PM, Maxim Patlasov wrote:
>> Commit 9f860e606 introduced an engine to delay fsync: doing
>> fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
>> io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
>> later, when incoming FLUSH|FUA comes.
>>
>> That was deemed as important because (PSBM-47026):
>>
>>> This optimization becomes more important due to the fact that customers tend to use pcompact heavily => ploop images grow each day.
>> Now, we can easily re-use the engine to delay fsync for reloc
>> requests as well. As explained in the description of commit
>> 5aa3fe09:
>>
>>> 1->read_data_from_old_post
>>> 2->write_to_new_pos
>>> ->sumbit_alloc
>>> ->submit_pad
>>> ->post_submit->convert_unwritten
>>> 3->update_index ->write_page with FLUSH|FUA
>>> 4->nullify_old_pos
>>> 5->issue_flush
>> by the time of step 3 extent coversion is not yet stable because
>> belongs to uncommitted transaction. But instead of doing fsync
>> inside ->post_submit, we can fsync later, as the very first step
>> of write_page for index_update.
>>
>> Changed in v2:
>> - process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state
>>
>> Changed in v3:
>> - use extra arg for ploop_index_wb_proceed_or_delay() instead of ad-hoc PLOOP_REQ_FSYNC_IF_DELAYED
>>
>> https://jira.sw.ru/browse/PSBM-47026
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>> ---
>> drivers/block/ploop/dev.c | 9 +++++++--
>> drivers/block/ploop/map.c | 32 ++++++++++++++++++++++++++++----
>> include/linux/ploop/ploop.h | 1 +
>> 3 files changed, 36 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index df3eec9..ed60b1f 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -2720,6 +2720,11 @@ restart:
>> ploop_index_wb_complete(preq);
>> break;
>>
>> + case PLOOP_E_FSYNC_PENDED:
>> + /* fsync done */
>> + ploop_index_wb_proceed(preq);
>> + break;
>> +
>> default:
>> BUG();
>> }
>> @@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
>> preq->bl.tail = preq->bl.head = NULL;
>> preq->req_cluster = 0;
>> preq->req_size = 0;
>> - preq->req_rw = WRITE_SYNC|REQ_FUA;
>> + preq->req_rw = WRITE_SYNC;
>> preq->eng_state = PLOOP_E_ENTRY;
>> preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
>> preq->error = 0;
>> @@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
>> preq->bl.tail = preq->bl.head = NULL;
>> preq->req_cluster = ~0U; /* uninitialized */
>> preq->req_size = 0;
>> - preq->req_rw = WRITE_SYNC|REQ_FUA;
>> + preq->req_rw = WRITE_SYNC;
>> preq->eng_state = PLOOP_E_ENTRY;
>> preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
>> preq->error = 0;
>> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
>> index 5f7fd66..715dc15 100644
>> --- a/drivers/block/ploop/map.c
>> +++ b/drivers/block/ploop/map.c
>> @@ -915,6 +915,24 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
>> put_page(page);
>> }
>>
>> +static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq,
>> + int do_fsync_if_delayed)
>> +{
>> + if (do_fsync_if_delayed) {
>> + struct map_node * m = preq->map;
>> + struct ploop_delta * top_delta = map_top_delta(m->parent);
>> + struct ploop_io * top_io = &top_delta->io;
>> +
>> + if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
>> + preq->eng_state = PLOOP_E_FSYNC_PENDED;
>> + ploop_add_req_to_fsync_queue(preq);
>> + return;
>> + }
>> + }
>> +
>> + ploop_index_wb_proceed(preq);
>> +}
>> +
>> /* Data write is commited. Now we need to update index. */
>>
>> void ploop_index_update(struct ploop_request * preq)
>> @@ -927,6 +945,7 @@ void ploop_index_update(struct ploop_request * preq)
>> int old_level;
>> struct page * page;
>> unsigned long state = READ_ONCE(preq->state);
>> + int do_fsync_if_delayed = 0;
>>
>> /* No way back, we are going to initiate index write. */
>>
>> @@ -985,10 +1004,12 @@ void ploop_index_update(struct ploop_request * preq)
>> preq->req_rw &= ~REQ_FLUSH;
>>
>> /* Relocate requires consistent index update */
>> - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
>> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>> preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA);
>> + do_fsync_if_delayed = 1;
>> + }
>>
>> - ploop_index_wb_proceed(preq);
>> + ploop_index_wb_proceed_or_delay(preq, do_fsync_if_delayed);
>> return;
>>
>> enomem:
>> @@ -1109,6 +1130,7 @@ static void map_wb_complete(struct map_node * m, int err)
>> int delayed = 0;
>> unsigned int idx;
>> unsigned long rw;
>> + int do_fsync_if_delayed = 0;
>>
>> /* First, complete processing of written back indices,
>> * finally instantiate indices in mapping cache.
>> @@ -1206,8 +1228,10 @@ static void map_wb_complete(struct map_node * m, int err)
>>
>> state = READ_ONCE(preq->state);
>> /* Relocate requires consistent index update */
>> - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
>> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>> rw |= (REQ_FLUSH | REQ_FUA);
>> + do_fsync_if_delayed = 1;
>> + }
>>
>> preq->eng_state = PLOOP_E_INDEX_WB;
>> get_page(page);
>> @@ -1234,7 +1258,7 @@ static void map_wb_complete(struct map_node * m, int err)
>> plo->st.map_multi_writes++;
>>
>> main_preq->req_index_update_rw = rw;
>> - ploop_index_wb_proceed(main_preq);
>> + ploop_index_wb_proceed_or_delay(main_preq, do_fsync_if_delayed);
>> }
>>
>> void
>> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
>> index d8e01b6..33733e9 100644
>> --- a/include/linux/ploop/ploop.h
>> +++ b/include/linux/ploop/ploop.h
>> @@ -513,6 +513,7 @@ enum
>> PLOOP_E_ZERO_INDEX, /* Zeroing index of free block; original request
>> can use .submit on completion */
>> PLOOP_E_DELTA_ZERO_INDEX,/* the same but for PLOOP_E_DELTA_READ */
>> + PLOOP_E_FSYNC_PENDED, /* INDEX_WB needs io->ops->sync() to proceed */
>> };
>>
>> #define BIO_BDEV_REUSED 14 /* io_context is stored in bi_bdev */
>>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://lists.openvz.org/pipermail/devel/attachments/20160729/941a038d/attachment.sig>
More information about the Devel
mailing list