[Devel] [PATCH VZ9 5/7] fs/fuse/kio: tidy up RPC_AFFINITY_RSS
Alexey Kuznetsov
kuznet at virtuozzo.com
Fri Mar 28 15:01:03 MSK 2025
Also, enable it for rdma (tested with mellanox) and unix sockets.
Now it provides essentially perfect affinity when socket contexts
never hit lock contention and cache bouncing provided RSS and XPS
are configured correctly.
Change fallback when rx_cpu is not available from RPC_AFFINITY_RETENT
to RPC_AFFINITY_FAIR_SPREAD.
Unfortunatley, we cannot enable it by default, since enabling
RSS/XPS is an advanced performance tuning.
Also, change fallback when rx_cpu is unknown from RPC_AFFINITY_RETENT
to RPC_AFFINITY_FAIR_SPREAD
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
---
fs/fuse/kio/pcs/pcs_rdma_io.c | 1 +
fs/fuse/kio/pcs/pcs_rpc.c | 25 ++++++++++++++++---------
fs/fuse/kio/pcs/pcs_rpc.h | 1 +
fs/fuse/kio/pcs/pcs_sock_io.c | 5 ++---
4 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_rdma_io.c b/fs/fuse/kio/pcs/pcs_rdma_io.c
index 2755b13..d50f2c1 100644
--- a/fs/fuse/kio/pcs/pcs_rdma_io.c
+++ b/fs/fuse/kio/pcs/pcs_rdma_io.c
@@ -1096,6 +1096,7 @@ static void pcs_rdma_cq_comp_handler(struct ib_cq *cq, void *private)
set_bit(PCS_RDMA_IO_CQE, &rio->io_flags);
wake_up(&rio->waitq);
+ ep->rx_cpu = smp_processor_id();
pcs_rpc_kick_queue(ep);
}
diff --git a/fs/fuse/kio/pcs/pcs_rpc.c b/fs/fuse/kio/pcs/pcs_rpc.c
index b9774ce1..71c2a3b 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.c
+++ b/fs/fuse/kio/pcs/pcs_rpc.c
@@ -339,6 +339,7 @@ void pcs_rpc_attach_new_ep(struct pcs_rpc * ep, struct pcs_rpc_engine * eng)
atomic_set(&ep->netlat_cnt, 0);
atomic64_set(&ep->netlat_avg, 0);
ep->cpu = WORK_CPU_UNBOUND;
+ ep->rx_cpu = WORK_CPU_UNBOUND;
ep->gc = NULL;
if (eng->max_gc_index)
@@ -863,27 +864,33 @@ static void pcs_rpc_affinity(struct pcs_rpc *ep, bool was_idle)
ep->cpu = WORK_CPU_UNBOUND;
}
break;
- case RPC_AFFINITY_RSS:
- if (!(ep->flags & PCS_RPC_F_LOCAL) && ep->addr.type != PCS_ADDRTYPE_RDMA)
- break;
+ case RPC_AFFINITY_RSS: {
+ int rx_cpu = READ_ONCE(ep->rx_cpu);
+
+ if (rx_cpu != WORK_CPU_UNBOUND && ep->cpu != rx_cpu)
+ ep->cpu = rx_cpu;
fallthrough;
+ }
+ case RPC_AFFINITY_FAIR_SPREAD:
+ if (ep->cpu == WORK_CPU_UNBOUND ||
+ (time_is_before_jiffies(ep->cpu_stamp) && was_idle))
+ pcs_rpc_cpu_select(ep);
+ break;
case RPC_AFFINITY_RETENT:
/* Naive socket-to-cpu binding approach */
- if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
+ if (ep->cpu == WORK_CPU_UNBOUND ||
+ (time_is_before_jiffies(ep->cpu_stamp) && was_idle)) {
ep->cpu_stamp = jiffies + rpc_cpu_time_slice;
ep->cpu = smp_processor_id();
}
break;
case RPC_AFFINITY_SPREAD:
- if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
+ if (ep->cpu == WORK_CPU_UNBOUND ||
+ (time_is_before_jiffies(ep->cpu_stamp) && was_idle)) {
ep->cpu_stamp = jiffies + rpc_cpu_time_slice;
ep->cpu = pcs_rpc_cpu_next();
}
break;
- case RPC_AFFINITY_FAIR_SPREAD:
- if (time_is_before_jiffies(ep->cpu_stamp) && was_idle)
- pcs_rpc_cpu_select(ep);
- break;
default:
pr_err("Unknown affinity mode: %u\n", rpc_affinity_mode);
}
diff --git a/fs/fuse/kio/pcs/pcs_rpc.h b/fs/fuse/kio/pcs/pcs_rpc.h
index cb18557..0bafc8a 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.h
+++ b/fs/fuse/kio/pcs/pcs_rpc.h
@@ -142,6 +142,7 @@ struct pcs_rpc
int cpu;
unsigned long cpu_stamp;
struct delayed_work cpu_timer_work; /* reset cpu affinity after being idle */
+ int rx_cpu;
struct mutex mutex;
u64 accounted;
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 7c62f48..805b8f1 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -561,9 +561,8 @@ static void pcs_sk_kick_queue(struct sock *sk)
sio = rcu_dereference_sk_user_data(sk);
if (sio) {
struct pcs_rpc *ep = sio->netio.parent;
- TRACE(PEER_FMT" queue cpu=%d\n", PEER_ARGS(ep), smp_processor_id());
- if (rpc_affinity_mode == RPC_AFFINITY_RSS && !(ep->flags & PCS_RPC_F_LOCAL))
- ep->cpu = smp_processor_id();
+ DTRACE(PEER_FMT" queue cpu=%d\n", PEER_ARGS(ep), smp_processor_id());
+ ep->rx_cpu = smp_processor_id();
pcs_rpc_kick_queue(ep);
}
rcu_read_unlock();
--
1.8.3.1
More information about the Devel
mailing list