[Devel] [PATCH rh7] ploop: io_direct: delay f_op->fsync() until FLUSH|FUA
Maxim Patlasov
mpatlasov at virtuozzo.com
Thu May 26 14:08:01 PDT 2016
On 05/26/2016 07:58 AM, Dmitry Monakhov wrote:
> Maxim Patlasov <mpatlasov at virtuozzo.com> writes:
>
>> Once we converted extent to initialized it can be part of uncompleted
>> journal transaction, so we have to force transaction commit at some point.
>>
>> Instead of forcing transaction commit immediately, the patch delays it
>> until an incoming bio with FLUSH|FUA arrives. Then, as the very first
>> step of processing such a bio, we sends corresponding preq to fsync_thread
>> to perform f_op->fsync().
>>
>> As a very unlikely case, it is also possible that processing a FLUSH|FUA
>> bio itself results in converting extents. Then, the patch calls f_op->fsync()
>> immediately after conversion to preserve FUA semantics.
> ACK. With minor comments. See below:
Thank you for thoughtful review, Dima!
>> https://jira.sw.ru/browse/PSBM-47026
>>
>> Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
>> ---
>> drivers/block/ploop/dev.c | 70 ++++++++++++++++++++++++---------------
>> drivers/block/ploop/io_direct.c | 28 +++++++++++++++-
>> include/linux/ploop/ploop.h | 6 +++
>> 3 files changed, 76 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index 654b60b..03fc289 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -1942,46 +1942,62 @@ err:
>>
>> /* Main preq state machine */
>>
>> +static inline bool preq_is_special(struct ploop_request * preq)
>> +{
>> + return test_bit(PLOOP_REQ_MERGE, &preq->state) ||
>> + test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
>> + test_bit(PLOOP_REQ_RELOC_S, &preq->state) ||
>> + test_bit(PLOOP_REQ_DISCARD, &preq->state) ||
>> + test_bit(PLOOP_REQ_ZERO, &preq->state);
> Oh. It looks awful. Please use one atomic read here and other places.
> #define PLOOP_REQ_RELOC_A_FL (1 << PLOOP_REQ_RELOC_A)
> #define PLOOP_REQ_RELOC_S_FL (1 << PLOOP_REQ_RELOC_S)
>
> unsigned long state = READ_ONCE(preq->state);
> ...
> if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_B_FL) ...
OK. I'll fix this place in v2 of the patch and other places later in a
separate "clean-up" patch.
>> +}
>> +
>> static void
>> ploop_entry_request(struct ploop_request * preq)
>> {
>> struct ploop_device * plo = preq->plo;
>> struct ploop_delta * top_delta = ploop_top_delta(plo);
>> + struct ploop_io * top_io = &top_delta->io;
>> struct ploop_delta * delta;
>> int level;
>> int err;
>> iblock_t iblk;
>>
>> - /* Control request. */
>> - if (unlikely(preq->bl.head == NULL &&
>> - !test_bit(PLOOP_REQ_MERGE, &preq->state) &&
>> - !test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
>> - !test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
>> - !test_bit(PLOOP_REQ_DISCARD, &preq->state) &&
>> - !test_bit(PLOOP_REQ_ZERO, &preq->state))) {
>> - complete(plo->quiesce_comp);
>> - wait_for_completion(&plo->relax_comp);
>> - ploop_complete_request(preq);
>> - complete(&plo->relaxed_comp);
>> - return;
>> - }
>> + if (!preq_is_special(preq)) {
>> + /* Control request */
>> + if (unlikely(preq->bl.head == NULL)) {
>> + complete(plo->quiesce_comp);
>> + wait_for_completion(&plo->relax_comp);
>> + ploop_complete_request(preq);
>> + complete(&plo->relaxed_comp);
>> + return;
>> + }
>>
>> - /* Empty flush. */
>> - if (unlikely(preq->req_size == 0 &&
>> - !test_bit(PLOOP_REQ_MERGE, &preq->state) &&
>> - !test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
>> - !test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
>> - !test_bit(PLOOP_REQ_ZERO, &preq->state))) {
>> - if (preq->req_rw & REQ_FLUSH) {
>> - if (top_delta->io.ops->issue_flush) {
>> - top_delta->io.ops->issue_flush(&top_delta->io, preq);
>> - return;
>> - }
>> + /* Need to fsync before start handling FLUSH */
>> + if ((preq->req_rw & REQ_FLUSH) &&
>> + test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state) &&
>> + !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
>> + spin_lock_irq(&plo->lock);
>> + list_add_tail(&preq->list, &top_io->fsync_queue);
>> + if (waitqueue_active(&top_io->fsync_waitq))
>> + wake_up_interruptible(&top_io->fsync_waitq);
>> + spin_unlock_irq(&plo->lock);
>> + return;
>> }
>>
>> - preq->eng_state = PLOOP_E_COMPLETE;
>> - ploop_complete_request(preq);
>> - return;
>> + /* Empty flush or unknown zero-size request */
> Do you know any zero size requests instead of FLUSH?
No. The comment only tells explicitly what always existed in ploop:
simply ignore such requests (if they ever come to us).
>> + if (preq->req_size == 0) {
>> + if (preq->req_rw & REQ_FLUSH &&
>> + !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
>> + if (top_io->ops->issue_flush) {
>> + top_io->ops->issue_flush(top_io, preq);
>> + return;
>> + }
>> + }
>> +
>> + preq->eng_state = PLOOP_E_COMPLETE;
>> + ploop_complete_request(preq);
>> + return;
>> + }
>> }
>>
>> if (unlikely(test_bit(PLOOP_REQ_SYNC, &preq->state) &&
>> diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
>> index 8096110..a0b7a4e 100644
>> --- a/drivers/block/ploop/io_direct.c
>> +++ b/drivers/block/ploop/io_direct.c
>> @@ -514,17 +514,32 @@ end_write:
>> static void
>> dio_post_submit(struct ploop_io *io, struct ploop_request * preq)
>> {
>> + struct ploop_device *plo = preq->plo;
>> sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
>> loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
>> int err;
>>
>> file_start_write(io->files.file);
>> +
>> + /* Here io->io_count is even ... */
>> + spin_lock_irq(&plo->lock);
>> + io->io_count++;
>> + set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
>> + spin_unlock_irq(&plo->lock);
>> +
>> err = io->files.file->f_op->fallocate(io->files.file,
>> FALLOC_FL_CONVERT_UNWRITTEN,
>> (loff_t)sec << 9, clu_siz);
>> - if (!err)
>> +
>> + /* highly unlikely case: FUA coming to a block not provisioned yet */
>> + if (!err && (preq->req_rw & REQ_FUA))
>> err = io->ops->sync(io);
>>
>> + spin_lock_irq(&plo->lock);
>> + io->io_count++;
>> + spin_unlock_irq(&plo->lock);
> Double entrance is not possible because ->post_submit is called
> synchronously from ploop_thread(). Am I correct?
Yes. ploop_thread is single-threaded and lockless and functions it calls
are never double-entered.
>> + /* and here io->io_count is even (+2) again. */
>> +
>> file_end_write(io->files.file);
>> if (err) {
>> PLOOP_REQ_SET_ERROR(preq, err);
>> @@ -782,6 +797,7 @@ static int dio_fsync_thread(void * data)
>> {
>> struct ploop_io * io = data;
>> struct ploop_device * plo = io->plo;
>> + u64 io_count;
>>
>> set_user_nice(current, -20);
>>
>> @@ -808,6 +824,7 @@ static int dio_fsync_thread(void * data)
>>
>> INIT_LIST_HEAD(&list);
>> list_splice_init(&io->fsync_queue, &list);
>> + io_count = io->io_count;
>> spin_unlock_irq(&plo->lock);
>>
>> /* filemap_fdatawrite() has been made already */
>> @@ -824,12 +841,17 @@ static int dio_fsync_thread(void * data)
>>
>> spin_lock_irq(&plo->lock);
>>
>> + if (io_count == io->io_count && !(io_count & 1))
>> + clear_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
>> +
>> while (!list_empty(&list)) {
>> struct ploop_request * preq;
>> preq = list_entry(list.next, struct ploop_request, list);
>> list_del(&preq->list);
>> if (err)
>> PLOOP_REQ_SET_ERROR(preq, err);
>> +
>> + __set_bit(PLOOP_REQ_FSYNC_DONE, &preq->state);
>> list_add_tail(&preq->list, &plo->ready_queue);
>> io->fsync_qlen--;
>> }
>> @@ -1522,6 +1544,10 @@ dio_fastmap(struct ploop_io * io, struct bio * orig_bio,
>> struct extent_map * em;
>> int i;
>>
>> + if (unlikely((orig_bio->bi_rw & REQ_FLUSH) &&
> blk layer implicitly treats REQ_FUA as REQ_FUA + later FLUSH. Let's do the same:
> + if (unlikely((orig_bio->bi_rw & (REQ_FUA|REQ_FLUSH)) &&
OK. Actually, seeing FUA when FSYNC_DELAYED, we don't know what exactly
sits inside that not-committed-yet transaction on host fs. So, it's good
idea to fsync there anyway.
>> + test_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state)))
>> + return 1;
>> +
>> if (orig_bio->bi_size == 0) {
>> bio->bi_vcnt = 0;
>> bio->bi_sector = 0;
>> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
>> index ad36a91..49f3cda 100644
>> --- a/include/linux/ploop/ploop.h
>> +++ b/include/linux/ploop/ploop.h
>> @@ -87,6 +87,9 @@ struct ploop_file
>> * This struct describes how we do real IO on particular backing file.
>> */
>>
>> +enum {
>> + PLOOP_IO_FSYNC_DELAYED, /* Must f_op->fsync before FLUSH|FUA */
>> +};
>>
>> struct ploop_io
>> {
>> @@ -108,6 +111,8 @@ struct ploop_io
>> struct timer_list fsync_timer;
>>
>> struct ploop_io_ops *ops;
>> + unsigned long io_state;
>> + u64 io_count;
>> };
>>
>> struct ploop_io_ops
>> @@ -466,6 +471,7 @@ enum
>> PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
>> PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
>> PLOOP_REQ_ALLOW_READS, /* READs are allowed for given req_cluster */
>> + PLOOP_REQ_FSYNC_DONE, /* fsync_thread() performed f_op->fsync() */
>> };
>>
>> enum
More information about the Devel
mailing list