[Devel] [PATCH RHEL9 COMMIT] fuse: sync protocol for accelerated cses
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Nov 1 22:47:17 MSK 2023
The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-284.25.1.vz9.30.8
------>
commit cda38993b3b9843a34ac6af7b5b5b08416cd3406
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date: Fri Oct 6 18:43:37 2023 +0800
fuse: sync protocol for accelerated cses
Syncs are still send along chain to user space, no fanouts and
no kernel-based syncs. The complexity is that we must keep in sync
state of user space csd and kernel, found solution is pretty good,
though not the best.
https://pmc.acronis.work/browse/VSTOR-54040
Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
Feature: vStorage
---
fs/fuse/kio/pcs/pcs_cs_accel.c | 7 +++++++
fs/fuse/kio/pcs/pcs_map.c | 46 +++++++++++++++++++++++++++++++++++++++---
fs/fuse/kio/pcs/pcs_map.h | 6 ++++++
3 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_cs_accel.c b/fs/fuse/kio/pcs/pcs_cs_accel.c
index 8a7e7f902ef2..33a60cae936b 100644
--- a/fs/fuse/kio/pcs/pcs_cs_accel.c
+++ b/fs/fuse/kio/pcs/pcs_cs_accel.c
@@ -822,6 +822,11 @@ static void __pcs_csa_write_final_completion(struct pcs_accel_write_req *areq)
th->ts_io = ktime_to_us(ktime_get()) - th->misc;
th->misc &= PCS_CS_TS_MASK;
th->misc |= PCS_CS_IO_CLEAR | PCS_CS_IO_FANOUT;
+ if (!(ireq->dentry->fileinfo.attr.attrib & PCS_FATTR_IMMEDIATE_WRITE) &&
+ !ireq->dentry->no_write_delay) {
+ if (!test_and_set_bit(CSL_SF_DIRTY, &ireq->iochunk.csl->cs[areq->index].flags))
+ pcs_map_reevaluate_dirty_status(ireq->iochunk.map);
+ }
}
csa_complete_acr(ireq);
@@ -833,6 +838,8 @@ static void csa_sync_work(struct work_struct *w)
struct pcs_int_request * ireq = container_of(areq-areq->index, struct pcs_int_request, iochunk.acr.awr[0]);
int res;
+ clear_bit(CSL_SF_DIRTY, &ireq->iochunk.csl->cs[ireq->iochunk.cs_index].flags);
+
res = vfs_fsync(areq->iocb.ki_filp, 1);
if (res) {
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 00489f50bf09..df33b525ba95 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -810,6 +810,7 @@ void transfer_sync_data(struct pcs_cs_list * new_cs_list, struct pcs_cs_list * o
for (i = 0; i < new_cs_list->nsrv; i++) {
for (k = 0; k < old_cs_list->nsrv; k++) {
if (old_cs_list->cs[k].info.id.val == new_cs_list->cs[i].info.id.val) {
+ new_cs_list->cs[i].flags = old_cs_list->cs[k].flags;
new_cs_list->cs[i].sync = old_cs_list->cs[k].sync;
new_cs_list->cs[i].dirty_ts = old_cs_list->cs[k].dirty_ts;
break;
@@ -832,6 +833,23 @@ static int cs_is_dirty(struct cs_sync_state * sync)
return res >= 0;
}
+static void force_dirty(struct pcs_cs_record * rec, struct pcs_map_entry * m)
+{
+ if (!rec->sync.dirty_seq || pcs_sync_seq_compare(rec->sync.dirty_seq, rec->sync.sync_seq) < 0)
+ rec->sync.dirty_seq = rec->sync.sync_seq;
+ if (!rec->sync.dirty_epoch || pcs_sync_seq_compare(rec->sync.dirty_epoch, rec->sync.sync_epoch) < 0)
+ rec->sync.dirty_epoch = rec->sync.sync_epoch;
+ if (!rec->sync.dirty_integrity)
+ rec->sync.dirty_integrity = rec->info.integrity_seq;
+ if (!rec->sync.dirty_integrity || !rec->sync.dirty_epoch || !rec->sync.dirty_seq) {
+ FUSE_KTRACE(cc_from_maps(m->maps)->fc, "cannot dirty "NODE_FMT" [%u/%u,%u/%u,%u/%u]", NODE_ARGS(rec->info.id),
+ rec->sync.dirty_integrity, rec->info.integrity_seq,
+ rec->sync.dirty_epoch, rec->sync.sync_epoch,
+ rec->sync.dirty_seq, rec->sync.sync_seq);
+ WARN_ON(1);
+ }
+}
+
static void evaluate_dirty_status(struct pcs_map_entry * m)
{
int i;
@@ -851,6 +869,9 @@ static void evaluate_dirty_status(struct pcs_map_entry * m)
BUG_ON(rec->info.integrity_seq == 0);
+ if (test_bit(CSL_SF_DIRTY, &rec->flags))
+ force_dirty(rec, m);
+
if (cs_is_dirty(&rec->sync)) {
if (rec->sync.dirty_integrity == rec->info.integrity_seq) {
if (!(m->flags & PCS_MAP_DIRTY)) {
@@ -880,6 +901,15 @@ static void evaluate_dirty_status(struct pcs_map_entry * m)
}
}
+/* Called when we make something which dirties map */
+void pcs_map_reevaluate_dirty_status(struct pcs_map_entry * m)
+{
+ spin_lock(&m->lock);
+ if (!(m->state & (PCS_MAP_DEAD|PCS_MAP_DIRTY)))
+ evaluate_dirty_status(m);
+ spin_unlock(&m->lock);
+}
+
int pcs_map_encode_req(struct pcs_map_entry*m, struct pcs_ioc_getmap *map, int direction)
{
int i;
@@ -904,7 +934,7 @@ int pcs_map_encode_req(struct pcs_map_entry*m, struct pcs_ioc_getmap *map, int d
map->state = 0;
if (m->state & PCS_MAP_READABLE)
map->state |= PCS_IOC_MAP_S_READ;
- if (m->state & PCS_MAP_WRITEABLE || direction)
+ if ((m->state & PCS_MAP_WRITEABLE) || direction)
map->state |= PCS_IOC_MAP_S_WRITE;
if (m->state & PCS_MAP_NEW)
map->state |= PCS_IOC_MAP_S_NEW;
@@ -920,7 +950,8 @@ int pcs_map_encode_req(struct pcs_map_entry*m, struct pcs_ioc_getmap *map, int d
map->cs_cnt = m->cs_list->nsrv;
for (i = 0; i < m->cs_list->nsrv; i++) {
map->cs[i] = m->cs_list->cs[i].info;
- if (!(m->flags & PCS_MAP_DIRTY) || !cs_is_dirty(&m->cs_list->cs[i].sync))
+ if (!(m->flags & PCS_MAP_DIRTY) || (!cs_is_dirty(&m->cs_list->cs[i].sync) &&
+ !test_bit(CSL_SF_DIRTY, &m->cs_list->cs[i].flags)))
map->cs[i].integrity_seq = 0;
}
}
@@ -3090,6 +3121,15 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque
for (i = 0; i < m->cs_list->nsrv; i++) {
struct pcs_cs_record * rec = m->cs_list->cs + i;
+ if (test_and_clear_bit(CSL_SF_DIRTY, &rec->flags)) {
+ /* If chunk is dirty locally, force it to be dirty vstorage-wise
+ * and clear magic PCS_CS_IO_SEQ flag to enforce CS to make sync
+ * even if chunk looks already synced.
+ */
+ force_dirty(rec, m);
+ ioh->sync.misc &= ~PCS_CS_IO_SEQ;
+ }
+
if (cs_is_dirty(&rec->sync)) {
arr->cs_id = rec->info.id;
arr->sync.integrity_seq = rec->sync.dirty_integrity;
@@ -3194,7 +3234,7 @@ static int prepare_map_flush_ireq(struct pcs_map_entry *m,
sreq->flushreq.csl = NULL;
sreq->complete_cb = pcs_flushreq_complete;
sreq->flushreq.msg = msg;
- FUSE_KTRACE(sreq->cc->fc, "timed FLUSH " MAP_FMT, MAP_ARGS(m));
+ FUSE_KTRACE(sreq->cc->fc, "%s FLUSH " MAP_FMT, timer_sync ? "timed" : "user", MAP_ARGS(m));
if (timer_sync)
m->flags |= PCS_MAP_FLUSHING;
__pcs_map_get(m);
diff --git a/fs/fuse/kio/pcs/pcs_map.h b/fs/fuse/kio/pcs/pcs_map.h
index 6d771387d0cc..f990c9f9defa 100644
--- a/fs/fuse/kio/pcs/pcs_map.h
+++ b/fs/fuse/kio/pcs/pcs_map.h
@@ -83,6 +83,8 @@ enum
PCS_MAP_CLIENT_SIZE = 8, /* chunk size is controlled by client */
PCS_MAP_CLIENT_ALLOC = 0x10, /* chunk allocation is controlled by client */
PCS_MAP_CLIENT_PSIZE = 0x20, /* physical size of chunk on CS must be transmitted to MDS */
+ PCS_MAP_KDIRECT = 0x40, /* map is claimed by kernel side */
+ PCS_MAP_KDIRTY = 0x80, /* map is dirtied by kernel side */
};
struct cs_sync_state
@@ -99,9 +101,12 @@ struct pcs_cs_record
struct pcs_cs_info info;
struct cs_sync_state sync;
abs_time_t dirty_ts;
+ unsigned long flags;
struct pcs_cs_link cslink;
};
+#define CSL_SF_DIRTY 0
+
struct pcs_cs_list
{
struct pcs_map_entry __rcu *map; /* Currently modified under
@@ -220,6 +225,7 @@ void map_truncate_tail(struct pcs_mapping *mapping, u64 offset);
void pcs_cs_truncate_maps(struct pcs_cs *cs);
unsigned long pcs_map_shrink_scan(struct shrinker *, struct shrink_control *sc);
void ireq_drop_tokens(struct pcs_int_request * ireq);
+void pcs_map_reevaluate_dirty_status(struct pcs_map_entry * m);
extern unsigned int cs_io_locality;
extern unsigned int cs_enable_fanout;
More information about the Devel
mailing list