[Devel] [PATCH rh7] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests
Maxim Patlasov
mpatlasov at virtuozzo.com
Wed Jul 6 11:10:51 PDT 2016
Dima,
On 07/06/2016 04:58 AM, Dmitry Monakhov wrote:
> Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
>
>> Commit 9f860e606 introduced an engine to delay fsync: doing
>> fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
>> io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
>> later, when incoming FLUSH|FUA comes.
>>
>> That was deemed as important because (PSBM-47026):
>>
>>> This optimization becomes more important due to the fact that customers tend to use pcompact heavily => ploop images grow each day.
>> Now, we can easily re-use the engine to delay fsync for reloc
>> requests as well. As explained in the description of commit
>> 5aa3fe09:
>>
>>> 1->read_data_from_old_post
>>> 2->write_to_new_pos
>>> ->sumbit_alloc
>>> ->submit_pad
>>> ->post_submit->convert_unwritten
>>> 3->update_index ->write_page with FLUSH|FUA
>>> 4->nullify_old_pos
>>> 5->issue_flush
>> by the time of step 3 extent coversion is not yet stable because
>> belongs to uncommitted transaction. But instead of doing fsync
>> inside ->post_submit, we can fsync later, as the very first step
>> of write_page for index_update.
> NAK from me. What is advantage of this patch?
The advantage is the following: in case of BAT multi-updates, instead of
doing many fsync-s (one per dio_post_submit), we'll do only one (when
final ->write_page is called).
> Does it makes code more optimal? No
Yes, it does. In the same sense as 9f860e606: saving some fsync-s.
> Does it makes main ploop more asynchronous? No.
Correct, the patch optimizes ploop in the other way. It's not about
making ploop more asynchronous.
>
> If you want to make optimization then it is reasonable to
> queue preq with PLOOP_IO_FSYNC_DELAYED to top_io->fsync_queue
> before processing PLOOP_E_DATA_WBI state for preq with FUA
> So sequence will looks like follows:
> ->sumbit_alloc
> ->submit_pad
> ->post_submit->convert_unwritten-> tag PLOOP_IO_FSYNC_DELAYED
> ->ploop_req_state_process
> case PLOOP_E_DATA_WBI:
> if (preq->start & PLOOP_IO_FSYNC_DELAYED_FL) {
> preq->start &= ~PLOOP_IO_FSYNC_DELAYED_FL
> list_add_tail(&preq->list, &top_io->fsync_queue)
> return;
> }
> ##Let fsync_thread do it's work
> ->ploop_req_state_process
> case LOOP_E_DATA_WBI:
> update_index->write_page with FUA (FLUSH is not required because we already done fsync)
That's another type of optimization: making ploop more asynchronous. I
thought about it, but didn't come to conclusion whether it's worthy
w.r.t. adding more complexity to ploop-state-machine and possible bugs
introduced with that.
Thanks,
Maxim
>
>> https://jira.sw.ru/browse/PSBM-47026
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>> ---
>> drivers/block/ploop/dev.c | 4 ++--
>> drivers/block/ploop/io_direct.c | 25 ++++++++++++++++++++++++-
>> drivers/block/ploop/io_kaio.c | 3 ++-
>> drivers/block/ploop/map.c | 17 ++++++++++++-----
>> include/linux/ploop/ploop.h | 3 ++-
>> 5 files changed, 42 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index e5f010b..40768b6 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -4097,7 +4097,7 @@ static void ploop_relocate(struct ploop_device * plo)
>> preq->bl.tail = preq->bl.head = NULL;
>> preq->req_cluster = 0;
>> preq->req_size = 0;
>> - preq->req_rw = WRITE_SYNC|REQ_FUA;
>> + preq->req_rw = WRITE_SYNC;
>> preq->eng_state = PLOOP_E_ENTRY;
>> preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
>> preq->error = 0;
>> @@ -4401,7 +4401,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
>> preq->bl.tail = preq->bl.head = NULL;
>> preq->req_cluster = ~0U; /* uninitialized */
>> preq->req_size = 0;
>> - preq->req_rw = WRITE_SYNC|REQ_FUA;
>> + preq->req_rw = WRITE_SYNC;
>> preq->eng_state = PLOOP_E_ENTRY;
>> preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
>> preq->error = 0;
>> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
>> index 1086850..0a5fb15 100644
>> --- a/drivers/block/ploop/io_direct.c
>> +++ b/drivers/block/ploop/io_direct.c
>> @@ -1494,13 +1494,36 @@ dio_read_page(struct ploop_io * io, struct ploop_request * preq,
>>
>> static void
>> dio_write_page(struct ploop_io * io, struct ploop_request * preq,
>> - struct page * page, sector_t sec, unsigned long rw)
>> + struct page * page, sector_t sec, unsigned long rw,
>> + int do_fsync_if_delayed)
>> {
>> if (!(io->files.file->f_mode & FMODE_WRITE)) {
>> PLOOP_FAIL_REQUEST(preq, -EBADF);
>> return;
>> }
>>
>> + if (do_fsync_if_delayed &&
>> + test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)) {
>> + struct ploop_device * plo = io->plo;
>> + u64 io_count;
>> + int err;
>> +
>> + spin_lock_irq(&plo->lock);
>> + io_count = io->io_count;
>> + spin_unlock_irq(&plo->lock);
>> +
>> + err = io->ops->sync(io);
>> + if (err) {
>> + PLOOP_FAIL_REQUEST(preq, -EBADF);
>> + return;
>> + }
>> +
>> + spin_lock_irq(&plo->lock);
>> + if (io_count == io->io_count && !(io_count & 1))
>> + clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
>> + spin_unlock_irq(&plo->lock);
>> + }
>> +
>> dio_io_page(io, rw | WRITE | REQ_SYNC, preq, page, sec);
>> }
>>
>> diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
>> index 85863df..0d731ef 100644
>> --- a/drivers/block/ploop/io_kaio.c
>> +++ b/drivers/block/ploop/io_kaio.c
>> @@ -614,7 +614,8 @@ kaio_read_page(struct ploop_io * io, struct ploop_request * preq,
>>
>> static void
>> kaio_write_page(struct ploop_io * io, struct ploop_request * preq,
>> - struct page * page, sector_t sec, unsigned long rw)
>> + struct page * page, sector_t sec, unsigned long rw,
>> + int do_fsync_if_delayed)
>> {
>> ploop_prepare_tracker(preq, sec);
>>
>> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
>> index 1883674..96e428b 100644
>> --- a/drivers/block/ploop/map.c
>> +++ b/drivers/block/ploop/map.c
>> @@ -910,6 +910,7 @@ void ploop_index_update(struct ploop_request * preq)
>> sector_t sec;
>> unsigned long rw;
>> unsigned long state = READ_ONCE(preq->state);
>> + int do_fsync_if_delayed = 0;
>>
>> /* No way back, we are going to initiate index write. */
>>
>> @@ -970,10 +971,13 @@ void ploop_index_update(struct ploop_request * preq)
>> preq->req_rw &= ~REQ_FLUSH;
>>
>> /* Relocate requires consistent index update */
>> - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
>> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>> rw |= (REQ_FLUSH | REQ_FUA);
>> + do_fsync_if_delayed = 1;
>> + }
>>
>> - top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
>> + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw,
>> + do_fsync_if_delayed);
>>
>> put_page(page);
>> return;
>> @@ -1096,6 +1100,7 @@ static void map_wb_complete(struct map_node * m, int err)
>> unsigned int idx;
>> sector_t sec;
>> unsigned long rw;
>> + int do_fsync_if_delayed = 0;
>>
>> /* First, complete processing of written back indices,
>> * finally instantiate indices in mapping cache.
>> @@ -1193,8 +1198,10 @@ static void map_wb_complete(struct map_node * m, int err)
>>
>> state = READ_ONCE(preq->state);
>> /* Relocate requires consistent index update */
>> - if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
>> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
>> rw |= (REQ_FLUSH | REQ_FUA);
>> + do_fsync_if_delayed = 1;
>> + }
>>
>> preq->eng_state = PLOOP_E_INDEX_WB;
>> get_page(page);
>> @@ -1221,8 +1228,8 @@ static void map_wb_complete(struct map_node * m, int err)
>> plo->st.map_multi_writes++;
>> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>>
>> - top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
>> - rw);
>> + top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, rw,
>> + do_fsync_if_delayed);
>> put_page(page);
>> }
>>
>> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
>> index deee8a7..b03565b 100644
>> --- a/include/linux/ploop/ploop.h
>> +++ b/include/linux/ploop/ploop.h
>> @@ -164,7 +164,8 @@ struct ploop_io_ops
>> void (*read_page)(struct ploop_io * io, struct ploop_request * preq,
>> struct page * page, sector_t sec);
>> void (*write_page)(struct ploop_io * io, struct ploop_request * preq,
>> - struct page * page, sector_t sec, unsigned long rw);
>> + struct page * page, sector_t sec, unsigned long rw,
>> + int do_fsync_if_delayed);
>>
>>
>> int (*sync_read)(struct ploop_io * io, struct page * page,
More information about the Devel
mailing list