[Devel] [PATCH RHEL9 COMMIT] fuse: sync protocol for accelerated cses

Konstantin Khorenko khorenko at virtuozzo.com
Wed Nov 1 22:47:17 MSK 2023


The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-284.25.1.vz9.30.8
------>
commit cda38993b3b9843a34ac6af7b5b5b08416cd3406
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date:   Fri Oct 6 18:43:37 2023 +0800

    fuse: sync protocol for accelerated cses
    
    Syncs are still send along chain to user space, no fanouts and
    no kernel-based syncs. The complexity is that we must keep in sync
    state of user space csd and kernel, found solution is pretty good,
    though not the best.
    
    https://pmc.acronis.work/browse/VSTOR-54040
    
    Signed-off-by: Alexey Kuznetsov <kuznet at acronis.com>
    
    Feature: vStorage
---
 fs/fuse/kio/pcs/pcs_cs_accel.c |  7 +++++++
 fs/fuse/kio/pcs/pcs_map.c      | 46 +++++++++++++++++++++++++++++++++++++++---
 fs/fuse/kio/pcs/pcs_map.h      |  6 ++++++
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_cs_accel.c b/fs/fuse/kio/pcs/pcs_cs_accel.c
index 8a7e7f902ef2..33a60cae936b 100644
--- a/fs/fuse/kio/pcs/pcs_cs_accel.c
+++ b/fs/fuse/kio/pcs/pcs_cs_accel.c
@@ -822,6 +822,11 @@ static void __pcs_csa_write_final_completion(struct pcs_accel_write_req *areq)
 		th->ts_io = ktime_to_us(ktime_get()) - th->misc;
 		th->misc &= PCS_CS_TS_MASK;
 		th->misc |= PCS_CS_IO_CLEAR | PCS_CS_IO_FANOUT;
+		if (!(ireq->dentry->fileinfo.attr.attrib & PCS_FATTR_IMMEDIATE_WRITE) &&
+		    !ireq->dentry->no_write_delay) {
+			if (!test_and_set_bit(CSL_SF_DIRTY, &ireq->iochunk.csl->cs[areq->index].flags))
+				pcs_map_reevaluate_dirty_status(ireq->iochunk.map);
+		}
 	}
 
 	csa_complete_acr(ireq);
@@ -833,6 +838,8 @@ static void csa_sync_work(struct work_struct *w)
 	struct pcs_int_request * ireq = container_of(areq-areq->index, struct pcs_int_request, iochunk.acr.awr[0]);
 	int res;
 
+	clear_bit(CSL_SF_DIRTY, &ireq->iochunk.csl->cs[ireq->iochunk.cs_index].flags);
+
 	res = vfs_fsync(areq->iocb.ki_filp, 1);
 
 	if (res) {
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 00489f50bf09..df33b525ba95 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -810,6 +810,7 @@ void transfer_sync_data(struct pcs_cs_list * new_cs_list, struct pcs_cs_list * o
 	for (i = 0; i < new_cs_list->nsrv; i++) {
 		for (k = 0; k < old_cs_list->nsrv; k++) {
 			if (old_cs_list->cs[k].info.id.val == new_cs_list->cs[i].info.id.val) {
+				new_cs_list->cs[i].flags = old_cs_list->cs[k].flags;
 				new_cs_list->cs[i].sync = old_cs_list->cs[k].sync;
 				new_cs_list->cs[i].dirty_ts = old_cs_list->cs[k].dirty_ts;
 				break;
@@ -832,6 +833,23 @@ static int cs_is_dirty(struct cs_sync_state * sync)
 	return res >= 0;
 }
 
+static void force_dirty(struct pcs_cs_record * rec, struct pcs_map_entry * m)
+{
+	if (!rec->sync.dirty_seq || pcs_sync_seq_compare(rec->sync.dirty_seq, rec->sync.sync_seq) < 0)
+		rec->sync.dirty_seq = rec->sync.sync_seq;
+	if (!rec->sync.dirty_epoch || pcs_sync_seq_compare(rec->sync.dirty_epoch, rec->sync.sync_epoch) < 0)
+		rec->sync.dirty_epoch = rec->sync.sync_epoch;
+	if (!rec->sync.dirty_integrity)
+		rec->sync.dirty_integrity = rec->info.integrity_seq;
+	if (!rec->sync.dirty_integrity || !rec->sync.dirty_epoch || !rec->sync.dirty_seq) {
+		FUSE_KTRACE(cc_from_maps(m->maps)->fc, "cannot dirty "NODE_FMT" [%u/%u,%u/%u,%u/%u]", NODE_ARGS(rec->info.id),
+			    rec->sync.dirty_integrity, rec->info.integrity_seq,
+			    rec->sync.dirty_epoch, rec->sync.sync_epoch,
+			    rec->sync.dirty_seq, rec->sync.sync_seq);
+		WARN_ON(1);
+	}
+}
+
 static void evaluate_dirty_status(struct pcs_map_entry * m)
 {
 	int i;
@@ -851,6 +869,9 @@ static void evaluate_dirty_status(struct pcs_map_entry * m)
 
 		BUG_ON(rec->info.integrity_seq == 0);
 
+		if (test_bit(CSL_SF_DIRTY, &rec->flags))
+			force_dirty(rec, m);
+
 		if (cs_is_dirty(&rec->sync)) {
 			if (rec->sync.dirty_integrity == rec->info.integrity_seq) {
 				if (!(m->flags & PCS_MAP_DIRTY)) {
@@ -880,6 +901,15 @@ static void evaluate_dirty_status(struct pcs_map_entry * m)
 	}
 }
 
+/* Called when we make something which dirties map */
+void pcs_map_reevaluate_dirty_status(struct pcs_map_entry * m)
+{
+	spin_lock(&m->lock);
+	if (!(m->state & (PCS_MAP_DEAD|PCS_MAP_DIRTY)))
+		evaluate_dirty_status(m);
+	spin_unlock(&m->lock);
+}
+
 int pcs_map_encode_req(struct pcs_map_entry*m, struct pcs_ioc_getmap *map, int direction)
 {
 	int i;
@@ -904,7 +934,7 @@ int pcs_map_encode_req(struct pcs_map_entry*m, struct pcs_ioc_getmap *map, int d
 	map->state = 0;
 	if (m->state & PCS_MAP_READABLE)
 		map->state |= PCS_IOC_MAP_S_READ;
-	if (m->state & PCS_MAP_WRITEABLE || direction)
+	if ((m->state & PCS_MAP_WRITEABLE) || direction)
 		map->state |= PCS_IOC_MAP_S_WRITE;
 	if (m->state & PCS_MAP_NEW)
 		map->state |= PCS_IOC_MAP_S_NEW;
@@ -920,7 +950,8 @@ int pcs_map_encode_req(struct pcs_map_entry*m, struct pcs_ioc_getmap *map, int d
 		map->cs_cnt = m->cs_list->nsrv;
 		for (i = 0; i < m->cs_list->nsrv; i++) {
 			map->cs[i] = m->cs_list->cs[i].info;
-			if (!(m->flags & PCS_MAP_DIRTY) || !cs_is_dirty(&m->cs_list->cs[i].sync))
+			if (!(m->flags & PCS_MAP_DIRTY) || (!cs_is_dirty(&m->cs_list->cs[i].sync) &&
+							    !test_bit(CSL_SF_DIRTY, &m->cs_list->cs[i].flags)))
 				map->cs[i].integrity_seq = 0;
 		}
 	}
@@ -3090,6 +3121,15 @@ static void prepare_map_flush_msg(struct pcs_map_entry * m, struct pcs_int_reque
 
 		for (i = 0; i < m->cs_list->nsrv; i++) {
 			struct pcs_cs_record * rec = m->cs_list->cs + i;
+			if (test_and_clear_bit(CSL_SF_DIRTY, &rec->flags)) {
+				/* If chunk is dirty locally, force it to be dirty vstorage-wise
+				 * and clear magic PCS_CS_IO_SEQ flag to enforce CS to make sync
+				 * even if chunk looks already synced.
+				 */
+				force_dirty(rec, m);
+				ioh->sync.misc &= ~PCS_CS_IO_SEQ;
+			}
+
 			if (cs_is_dirty(&rec->sync)) {
 				arr->cs_id = rec->info.id;
 				arr->sync.integrity_seq = rec->sync.dirty_integrity;
@@ -3194,7 +3234,7 @@ static int prepare_map_flush_ireq(struct pcs_map_entry *m,
 	sreq->flushreq.csl = NULL;
 	sreq->complete_cb = pcs_flushreq_complete;
 	sreq->flushreq.msg = msg;
-	FUSE_KTRACE(sreq->cc->fc, "timed FLUSH " MAP_FMT, MAP_ARGS(m));
+	FUSE_KTRACE(sreq->cc->fc, "%s FLUSH " MAP_FMT, timer_sync ? "timed" : "user", MAP_ARGS(m));
 	if (timer_sync)
 		m->flags |= PCS_MAP_FLUSHING;
 	__pcs_map_get(m);
diff --git a/fs/fuse/kio/pcs/pcs_map.h b/fs/fuse/kio/pcs/pcs_map.h
index 6d771387d0cc..f990c9f9defa 100644
--- a/fs/fuse/kio/pcs/pcs_map.h
+++ b/fs/fuse/kio/pcs/pcs_map.h
@@ -83,6 +83,8 @@ enum
 	PCS_MAP_CLIENT_SIZE	= 8,	/* chunk size is controlled by client */
 	PCS_MAP_CLIENT_ALLOC	= 0x10,	/* chunk allocation is controlled by client */
 	PCS_MAP_CLIENT_PSIZE	= 0x20, /* physical size of chunk on CS must be transmitted to MDS */
+	PCS_MAP_KDIRECT		= 0x40, /* map is claimed by kernel side */
+	PCS_MAP_KDIRTY		= 0x80, /* map is dirtied by kernel side */
 };
 
 struct cs_sync_state
@@ -99,9 +101,12 @@ struct pcs_cs_record
 	struct pcs_cs_info	info;
 	struct cs_sync_state	sync;
 	abs_time_t		dirty_ts;
+	unsigned long		flags;
 	struct pcs_cs_link	cslink;
 };
 
+#define CSL_SF_DIRTY		0
+
 struct pcs_cs_list
 {
 	struct pcs_map_entry __rcu *map;		/* Currently modified under
@@ -220,6 +225,7 @@ void map_truncate_tail(struct pcs_mapping *mapping, u64 offset);
 void pcs_cs_truncate_maps(struct pcs_cs *cs);
 unsigned long pcs_map_shrink_scan(struct shrinker *,  struct shrink_control *sc);
 void ireq_drop_tokens(struct pcs_int_request * ireq);
+void pcs_map_reevaluate_dirty_status(struct pcs_map_entry * m);
 
 extern unsigned int cs_io_locality;
 extern unsigned int cs_enable_fanout;


More information about the Devel mailing list