[Devel] [PATCH VZ9 6/10] fs/fuse/kio: severe reordering when cs is congested

Alexey Kuznetsov kuznet at virtuozzo.com
Fri Jan 17 21:09:08 MSK 2025


The issue is old, it was found and fixed in user space,
but we forgot to update kernel as well.

Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
---
 fs/fuse/kio/pcs/pcs_cluster.c      |  1 +
 fs/fuse/kio/pcs/pcs_cluster_core.c | 17 +++++++++++++++++
 fs/fuse/kio/pcs/pcs_cs.c           | 18 ++++++++++++++----
 fs/fuse/kio/pcs/pcs_map.c          | 15 ++++++++++++---
 fs/fuse/kio/pcs/pcs_req.h          |  4 +++-
 5 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c
index c64668a..c87313b 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.c
+++ b/fs/fuse/kio/pcs/pcs_cluster.c
@@ -47,6 +47,7 @@ void pcs_sreq_complete(struct pcs_int_request *sreq)
 			 */
 			if (ireq_check_redo(sreq)) {
 				ireq_retry_inc(ireq);
+				sreq->flags &= ~IREQ_F_REQUEUED;
 				if (sreq->type != PCS_IREQ_CUSTOM) {
 					map_notify_soft_error(sreq);
 
diff --git a/fs/fuse/kio/pcs/pcs_cluster_core.c b/fs/fuse/kio/pcs/pcs_cluster_core.c
index 86fe185..10d61f2 100644
--- a/fs/fuse/kio/pcs/pcs_cluster_core.c
+++ b/fs/fuse/kio/pcs/pcs_cluster_core.c
@@ -250,3 +250,20 @@ void pcs_cc_requeue(struct pcs_cluster_core *cc, struct list_head *q)
 	if (was_idle)
 		queue_work(cc->wq, &cc->main_job);
 }
+
+void pcs_cc_requeue_head(struct pcs_cluster_core *cc, struct list_head *q)
+{
+	unsigned long flags;
+	int was_idle = 0;
+
+	if (list_empty(q))
+		return;
+
+	spin_lock_irqsave(&cc->lock, flags);
+	was_idle = list_empty(&cc->work_queue);
+	list_splice_init(q, &cc->work_queue);
+	spin_unlock_irqrestore(&cc->lock, flags);
+
+	if (was_idle)
+		queue_work(cc->wq, &cc->main_job);
+}
diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index b2ad511..0707575 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -314,7 +314,7 @@ void cs_log_io_times(struct pcs_int_request * ireq, struct pcs_msg * resp, unsig
 			th->ino = ireq->dentry->fileinfo.attr.id;
 			th->type = h->hdr.type;
 			th->cses = 1;
-			th->__pad = 0;
+			th->__pad = ((!!(ireq->flags & IREQ_F_REQUEUED)) << 7) | smp_processor_id();
 			th->chid = (unsigned int)h->uid;
 
 			ch->csid = resp->rpc->peer_id.val;
@@ -1549,13 +1549,23 @@ int pcs_cs_cong_enqueue_cond(struct pcs_int_request *ireq, struct pcs_cs *cs)
 	int queued = 0;
 
 	spin_lock(&cs->lock);
-	if (cs->in_flight >= cs->eff_cwnd) {
-		list_add_tail(&ireq->list, &cs->cong_queue);
+	if (cs->in_flight >= cs->eff_cwnd ||
+	    (cs->cong_queue_len && !(ireq->flags & IREQ_F_REQUEUED))) {
+		queued = 1;
+		if (!list_empty(&cs->active_list)) {
+			list_add_tail(&ireq->list, &cs->active_list);
+			queued = 2;
+		} else
+			list_add_tail(&ireq->list, &cs->cong_queue);
 		cs->cong_queue_len++;
 		if (!ireq->qdepth)
 			ireq->qdepth = cs->cong_queue_len;
-		queued = 1;
 	}
+	if (queued && ireq->type == PCS_IREQ_IOCHUNK)
+		FUSE_KDTRACE(ireq->cc->fc, "queued%d {%p} cpu%u %d %u/%u " DENTRY_FMT " %llu+%llu",
+			     queued, ireq, smp_processor_id(), cs->cong_queue_len,
+			     cs->in_flight, cs->eff_cwnd, DENTRY_ARGS(ireq->dentry),
+			     ireq->iochunk.chunk + ireq->iochunk.offset, ireq->iochunk.size);
 	spin_unlock(&cs->lock);
 	return queued;
 }
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 221e9f8..5d1dd5a 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -1235,7 +1235,7 @@ void pcs_map_complete(struct pcs_map_entry *m, struct pcs_ioc_getmap *omap)
 	spin_unlock(&m->lock);
 
 	/* Success, resubmit waiting requests */
-	pcs_cc_requeue(cc_from_map(m), &queue);
+	pcs_cc_requeue_head(cc_from_map(m), &queue);
 	BUG_ON(!list_empty(&queue));
 	pcs_map_put(m);
 
@@ -1575,7 +1575,15 @@ static void pcs_cs_wakeup(struct pcs_cs * cs)
 			sreq = parent;
 		}
 
+		sreq->flags |= IREQ_F_REQUEUED;
+
 		if (sreq->type != PCS_IREQ_FLUSH) {
+			FUSE_KDTRACE(sreq->cc->fc,
+				     "wakeup {%p} cpu%u %d %u/%u " DENTRY_FMT " %llu+%llu",
+				     sreq, smp_processor_id(), cs->cong_queue_len,
+				     cs->in_flight, cs->eff_cwnd, DENTRY_ARGS(sreq->dentry),
+				     sreq->iochunk.chunk + sreq->iochunk.offset,
+				     sreq->iochunk.size);
 			map = pcs_find_get_map(sreq->dentry, sreq->iochunk.chunk +
 						   ((sreq->flags & IREQ_F_MAPPED) ? 0 : sreq->iochunk.offset));
 			if (map) {
@@ -2057,7 +2065,7 @@ static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_li
 	allot = cs->eff_cwnd - cs->in_flight;
 	spin_unlock(&cs->lock);
 
-	if (allot < 0) {
+	if (allot < 0 || cs->cong_queue_len) {
 		if (pcs_cs_cong_enqueue_cond(ireq, cs))
 			return 0;
 	}
@@ -2109,7 +2117,7 @@ static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_li
 		if (sreq == ireq)
 			return 0;
 
-		if (allot < 0) {
+		if (allot < 0 || !list_empty(&cs->cong_queue)) {
 			if (pcs_cs_cong_enqueue_cond(ireq, cs))
 				return 0;
 		}
@@ -3128,6 +3136,7 @@ static void pcs_flushreq_complete(struct pcs_int_request * sreq)
 			map_notify_error(m, sreq, &ioh->map_version, sreq->flushreq.csl);
 			pcs_deaccount_ireq(sreq, &sreq->error);
 			pcs_clear_error(&sreq->error);
+			sreq->flags &= ~IREQ_F_REQUEUED;
 
 			if (!(sreq->flags & IREQ_F_ONCE)) {
 				sreq->flags |= IREQ_F_ONCE;
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index e43d28e..1503e59 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -130,7 +130,8 @@ struct pcs_int_request
 #define IREQ_F_CRYPT		0x2000
 #define IREQ_F_ACCELERROR	0x4000
 #define IREQ_F_NOACCT		0x8000
-#define IREQ_F_FANOUT	       0x10000
+#define IREQ_F_FANOUT		0x10000
+#define IREQ_F_REQUEUED		0x20000
 
 	atomic_t		iocount;
 
@@ -350,6 +351,7 @@ static inline struct pcs_cluster_core *cc_from_krpc(struct pcs_krpc *krpc)
 
 void pcs_cc_submit(struct pcs_cluster_core *cc, struct pcs_int_request* ireq);
 void pcs_cc_requeue(struct pcs_cluster_core *cc, struct list_head * q);
+void pcs_cc_requeue_head(struct pcs_cluster_core *cc, struct list_head *q);
 void pcs_cc_update_storage_versions(struct pcs_cluster_core *cc, int version);
 /* FROM pcs_cluster.h */
 static inline void pcs_sreq_attach(struct pcs_int_request * sreq, struct pcs_int_request * parent)
-- 
1.8.3.1



More information about the Devel mailing list