[Devel] [PATCH RHEL9 COMMIT] fs/fuse/kio: severe reordering when cs is congested
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jan 23 21:53:24 MSK 2025
The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.4
------>
commit 99c9c1cd191938841ab2280fc6eadde23c4dcf8d
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date: Sat Jan 18 02:09:08 2025 +0800
fs/fuse/kio: severe reordering when cs is congested
The issue is old, it was found and fixed in user space,
but we forgot to update kernel as well.
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
Feature: vStorage
---
fs/fuse/kio/pcs/pcs_cluster.c | 1 +
fs/fuse/kio/pcs/pcs_cluster_core.c | 17 +++++++++++++++++
fs/fuse/kio/pcs/pcs_cs.c | 18 ++++++++++++++----
fs/fuse/kio/pcs/pcs_map.c | 15 ++++++++++++---
fs/fuse/kio/pcs/pcs_req.h | 4 +++-
5 files changed, 47 insertions(+), 8 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_cluster.c b/fs/fuse/kio/pcs/pcs_cluster.c
index c64668a2a121..c87313b90ab3 100644
--- a/fs/fuse/kio/pcs/pcs_cluster.c
+++ b/fs/fuse/kio/pcs/pcs_cluster.c
@@ -47,6 +47,7 @@ void pcs_sreq_complete(struct pcs_int_request *sreq)
*/
if (ireq_check_redo(sreq)) {
ireq_retry_inc(ireq);
+ sreq->flags &= ~IREQ_F_REQUEUED;
if (sreq->type != PCS_IREQ_CUSTOM) {
map_notify_soft_error(sreq);
diff --git a/fs/fuse/kio/pcs/pcs_cluster_core.c b/fs/fuse/kio/pcs/pcs_cluster_core.c
index 86fe185684be..10d61f203916 100644
--- a/fs/fuse/kio/pcs/pcs_cluster_core.c
+++ b/fs/fuse/kio/pcs/pcs_cluster_core.c
@@ -250,3 +250,20 @@ void pcs_cc_requeue(struct pcs_cluster_core *cc, struct list_head *q)
if (was_idle)
queue_work(cc->wq, &cc->main_job);
}
+
+void pcs_cc_requeue_head(struct pcs_cluster_core *cc, struct list_head *q)
+{
+ unsigned long flags;
+ int was_idle = 0;
+
+ if (list_empty(q))
+ return;
+
+ spin_lock_irqsave(&cc->lock, flags);
+ was_idle = list_empty(&cc->work_queue);
+ list_splice_init(q, &cc->work_queue);
+ spin_unlock_irqrestore(&cc->lock, flags);
+
+ if (was_idle)
+ queue_work(cc->wq, &cc->main_job);
+}
diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index b2ad5113dc3f..07075759a658 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -314,7 +314,7 @@ void cs_log_io_times(struct pcs_int_request * ireq, struct pcs_msg * resp, unsig
th->ino = ireq->dentry->fileinfo.attr.id;
th->type = h->hdr.type;
th->cses = 1;
- th->__pad = 0;
+ th->__pad = ((!!(ireq->flags & IREQ_F_REQUEUED)) << 7) | smp_processor_id();
th->chid = (unsigned int)h->uid;
ch->csid = resp->rpc->peer_id.val;
@@ -1549,13 +1549,23 @@ int pcs_cs_cong_enqueue_cond(struct pcs_int_request *ireq, struct pcs_cs *cs)
int queued = 0;
spin_lock(&cs->lock);
- if (cs->in_flight >= cs->eff_cwnd) {
- list_add_tail(&ireq->list, &cs->cong_queue);
+ if (cs->in_flight >= cs->eff_cwnd ||
+ (cs->cong_queue_len && !(ireq->flags & IREQ_F_REQUEUED))) {
+ queued = 1;
+ if (!list_empty(&cs->active_list)) {
+ list_add_tail(&ireq->list, &cs->active_list);
+ queued = 2;
+ } else
+ list_add_tail(&ireq->list, &cs->cong_queue);
cs->cong_queue_len++;
if (!ireq->qdepth)
ireq->qdepth = cs->cong_queue_len;
- queued = 1;
}
+ if (queued && ireq->type == PCS_IREQ_IOCHUNK)
+ FUSE_KDTRACE(ireq->cc->fc, "queued%d {%p} cpu%u %d %u/%u " DENTRY_FMT " %llu+%llu",
+ queued, ireq, smp_processor_id(), cs->cong_queue_len,
+ cs->in_flight, cs->eff_cwnd, DENTRY_ARGS(ireq->dentry),
+ ireq->iochunk.chunk + ireq->iochunk.offset, ireq->iochunk.size);
spin_unlock(&cs->lock);
return queued;
}
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 221e9f8e9463..5d1dd5a13b1c 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -1235,7 +1235,7 @@ void pcs_map_complete(struct pcs_map_entry *m, struct pcs_ioc_getmap *omap)
spin_unlock(&m->lock);
/* Success, resubmit waiting requests */
- pcs_cc_requeue(cc_from_map(m), &queue);
+ pcs_cc_requeue_head(cc_from_map(m), &queue);
BUG_ON(!list_empty(&queue));
pcs_map_put(m);
@@ -1575,7 +1575,15 @@ static void pcs_cs_wakeup(struct pcs_cs * cs)
sreq = parent;
}
+ sreq->flags |= IREQ_F_REQUEUED;
+
if (sreq->type != PCS_IREQ_FLUSH) {
+ FUSE_KDTRACE(sreq->cc->fc,
+ "wakeup {%p} cpu%u %d %u/%u " DENTRY_FMT " %llu+%llu",
+ sreq, smp_processor_id(), cs->cong_queue_len,
+ cs->in_flight, cs->eff_cwnd, DENTRY_ARGS(sreq->dentry),
+ sreq->iochunk.chunk + sreq->iochunk.offset,
+ sreq->iochunk.size);
map = pcs_find_get_map(sreq->dentry, sreq->iochunk.chunk +
((sreq->flags & IREQ_F_MAPPED) ? 0 : sreq->iochunk.offset));
if (map) {
@@ -2057,7 +2065,7 @@ static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_li
allot = cs->eff_cwnd - cs->in_flight;
spin_unlock(&cs->lock);
- if (allot < 0) {
+ if (allot < 0 || cs->cong_queue_len) {
if (pcs_cs_cong_enqueue_cond(ireq, cs))
return 0;
}
@@ -2109,7 +2117,7 @@ static int pcs_cslist_submit_read(struct pcs_int_request *ireq, struct pcs_cs_li
if (sreq == ireq)
return 0;
- if (allot < 0) {
+ if (allot < 0 || !list_empty(&cs->cong_queue)) {
if (pcs_cs_cong_enqueue_cond(ireq, cs))
return 0;
}
@@ -3128,6 +3136,7 @@ static void pcs_flushreq_complete(struct pcs_int_request * sreq)
map_notify_error(m, sreq, &ioh->map_version, sreq->flushreq.csl);
pcs_deaccount_ireq(sreq, &sreq->error);
pcs_clear_error(&sreq->error);
+ sreq->flags &= ~IREQ_F_REQUEUED;
if (!(sreq->flags & IREQ_F_ONCE)) {
sreq->flags |= IREQ_F_ONCE;
diff --git a/fs/fuse/kio/pcs/pcs_req.h b/fs/fuse/kio/pcs/pcs_req.h
index e43d28e790f4..1503e5972d30 100644
--- a/fs/fuse/kio/pcs/pcs_req.h
+++ b/fs/fuse/kio/pcs/pcs_req.h
@@ -130,7 +130,8 @@ struct pcs_int_request
#define IREQ_F_CRYPT 0x2000
#define IREQ_F_ACCELERROR 0x4000
#define IREQ_F_NOACCT 0x8000
-#define IREQ_F_FANOUT 0x10000
+#define IREQ_F_FANOUT 0x10000
+#define IREQ_F_REQUEUED 0x20000
atomic_t iocount;
@@ -350,6 +351,7 @@ static inline struct pcs_cluster_core *cc_from_krpc(struct pcs_krpc *krpc)
void pcs_cc_submit(struct pcs_cluster_core *cc, struct pcs_int_request* ireq);
void pcs_cc_requeue(struct pcs_cluster_core *cc, struct list_head * q);
+void pcs_cc_requeue_head(struct pcs_cluster_core *cc, struct list_head *q);
void pcs_cc_update_storage_versions(struct pcs_cluster_core *cc, int version);
/* FROM pcs_cluster.h */
static inline void pcs_sreq_attach(struct pcs_int_request * sreq, struct pcs_int_request * parent)
More information about the Devel
mailing list