[Devel] [PATCH rh7 3/3] ploop: io_direct: delay f_op->fsync() until index_update for reloc requests (v2)
Maxim Patlasov
mpatlasov at virtuozzo.com
Wed Jul 20 19:01:59 PDT 2016
Commit 9f860e606 introduced an engine to delay fsync: doing
fallocate(FALLOC_FL_CONVERT_UNWRITTEN) dio_post_submit marks
io as PLOOP_IO_FSYNC_DELAYED to ensure that fsync happens
later, when incoming FLUSH|FUA comes.
That was deemed as important because (PSBM-47026):
> This optimization becomes more important due to the fact that customers tend to use pcompact heavily => ploop images grow each day.
Now, we can easily re-use the engine to delay fsync for reloc
requests as well. As explained in the description of commit
5aa3fe09:
> 1->read_data_from_old_post
> 2->write_to_new_pos
> ->sumbit_alloc
> ->submit_pad
> ->post_submit->convert_unwritten
> 3->update_index ->write_page with FLUSH|FUA
> 4->nullify_old_pos
> 5->issue_flush
by the time of step 3 extent coversion is not yet stable because
belongs to uncommitted transaction. But instead of doing fsync
inside ->post_submit, we can fsync later, as the very first step
of write_page for index_update.
Changed in v2:
- process delayed fsync asynchronously, via PLOOP_E_FSYNC_PENDED eng_state
https://jira.sw.ru/browse/PSBM-47026
Signed-off-by: Maxim Patlasov <mpatlasov at virtuozzo.com>
---
drivers/block/ploop/dev.c | 9 +++++++--
drivers/block/ploop/map.c | 33 +++++++++++++++++++++++++++++----
include/linux/ploop/ploop.h | 2 ++
3 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index df3eec9..ed60b1f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -2720,6 +2720,11 @@ restart:
ploop_index_wb_complete(preq);
break;
+ case PLOOP_E_FSYNC_PENDED:
+ /* fsync done */
+ ploop_index_wb_proceed(preq);
+ break;
+
default:
BUG();
}
@@ -4106,7 +4111,7 @@ static void ploop_relocate(struct ploop_device * plo)
preq->bl.tail = preq->bl.head = NULL;
preq->req_cluster = 0;
preq->req_size = 0;
- preq->req_rw = WRITE_SYNC|REQ_FUA;
+ preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_A);
preq->error = 0;
@@ -4410,7 +4415,7 @@ static void ploop_relocblks_process(struct ploop_device *plo)
preq->bl.tail = preq->bl.head = NULL;
preq->req_cluster = ~0U; /* uninitialized */
preq->req_size = 0;
- preq->req_rw = WRITE_SYNC|REQ_FUA;
+ preq->req_rw = WRITE_SYNC;
preq->eng_state = PLOOP_E_ENTRY;
preq->state = (1 << PLOOP_REQ_SYNC) | (1 << PLOOP_REQ_RELOC_S);
preq->error = 0;
diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
index 5f7fd66..01e1064 100644
--- a/drivers/block/ploop/map.c
+++ b/drivers/block/ploop/map.c
@@ -915,6 +915,23 @@ void ploop_index_wb_proceed(struct ploop_request * preq)
put_page(page);
}
+static void ploop_index_wb_proceed_or_delay(struct ploop_request * preq)
+{
+ if (test_and_clear_bit(PLOOP_REQ_FSYNC_IF_DELAYED, &preq->state)) {
+ struct map_node * m = preq->map;
+ struct ploop_delta * top_delta = map_top_delta(m->parent);
+ struct ploop_io * top_io = &top_delta->io;
+
+ if (test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state)) {
+ preq->eng_state = PLOOP_E_FSYNC_PENDED;
+ ploop_add_req_to_fsync_queue(preq);
+ return;
+ }
+ }
+
+ ploop_index_wb_proceed(preq);
+}
+
/* Data write is commited. Now we need to update index. */
void ploop_index_update(struct ploop_request * preq)
@@ -985,10 +1002,12 @@ void ploop_index_update(struct ploop_request * preq)
preq->req_rw &= ~REQ_FLUSH;
/* Relocate requires consistent index update */
- if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+ if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
preq->req_index_update_rw |= (REQ_FLUSH | REQ_FUA);
+ set_bit(PLOOP_REQ_FSYNC_IF_DELAYED, &preq->state);
+ }
- ploop_index_wb_proceed(preq);
+ ploop_index_wb_proceed_or_delay(preq);
return;
enomem:
@@ -1109,6 +1128,7 @@ static void map_wb_complete(struct map_node * m, int err)
int delayed = 0;
unsigned int idx;
unsigned long rw;
+ int do_fsync_if_delayed = 0;
/* First, complete processing of written back indices,
* finally instantiate indices in mapping cache.
@@ -1206,8 +1226,10 @@ static void map_wb_complete(struct map_node * m, int err)
state = READ_ONCE(preq->state);
/* Relocate requires consistent index update */
- if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
+ if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) {
rw |= (REQ_FLUSH | REQ_FUA);
+ do_fsync_if_delayed = 1;
+ }
preq->eng_state = PLOOP_E_INDEX_WB;
get_page(page);
@@ -1233,8 +1255,11 @@ static void map_wb_complete(struct map_node * m, int err)
__TRACE("wbi2 %p %u %p\n", main_preq, main_preq->req_cluster, m);
plo->st.map_multi_writes++;
+ if (do_fsync_if_delayed)
+ set_bit(PLOOP_REQ_FSYNC_IF_DELAYED, &main_preq->state);
+
main_preq->req_index_update_rw = rw;
- ploop_index_wb_proceed(main_preq);
+ ploop_index_wb_proceed_or_delay(main_preq);
}
void
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index d8e01b6..fcbafee 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -483,6 +483,7 @@ enum
PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
PLOOP_REQ_FSYNC_DONE, /* fsync_thread() performed f_op->fsync() */
PLOOP_REQ_ISSUE_FLUSH, /* preq needs ->issue_flush before completing */
+ PLOOP_REQ_FSYNC_IF_DELAYED, /* preq needs fsync before index wb */
};
#define PLOOP_REQ_MERGE_FL (1 << PLOOP_REQ_MERGE)
@@ -513,6 +514,7 @@ enum
PLOOP_E_ZERO_INDEX, /* Zeroing index of free block; original request
can use .submit on completion */
PLOOP_E_DELTA_ZERO_INDEX,/* the same but for PLOOP_E_DELTA_READ */
+ PLOOP_E_FSYNC_PENDED, /* INDEX_WB needs io->ops->sync() to proceed */
};
#define BIO_BDEV_REUSED 14 /* io_context is stored in bi_bdev */
More information about the Devel
mailing list