[Devel] [PATCH RHEL7 COMMIT] fs/fuse kio_pcs: improve rpc to cpu binding

Konstantin Khorenko khorenko at virtuozzo.com
Thu Sep 27 12:41:02 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.11.6.vz7.71.15
------>
commit 8408ed8b9cecad2aa85735c24b8b6c809a802d30
Author: Pavel Butsykin <pbutsykin at virtuozzo.com>
Date:   Thu Sep 27 12:41:01 2018 +0300

    fs/fuse kio_pcs: improve rpc to cpu binding
    
    This patch provides a new strategy for cpu affinity, the main purpose of which
    is to spread the rpc connections across all cpu's. Unlike the old behavior, it
    will help to avoid situations when several rpc connections can bind to the same
    cpu. Such a strategy fits well with RFS and aRFS features, every kernel_recvmsg()
    call makes a hint on which cpu it's worth handling the further RX-queue for
    this socket.
    
    Also, this patch provides the ability to change the rpc affinity mod through
    sysfs '/sys/module/fuse_kio_pcs/parameters/rpc_affinity_mode'.
    
    rpc affinity modes:
    0 - Disable rpc affinity
    1 - Old strategy. Binding rpc always to current cpu.
    2 - New strategy. Binding rpc always to different cpu's.
    
    [root at kteam03 ~]# vstorage -c k9 stat
    connected to MDS#1
    Cluster 'k9': healthy
    MDS nodes: 1 of 1, epoch uptime: 23h 15m, cluster version: 128
    CS nodes:  6 of 6 (6 avail, 0 inactive, 0 offline), storage version: 128
    
    [root at kteam03 ~]# echo 0 > /sys/module/fuse_kio_pcs/parameters/rpc_affinity_mode
    [root at kteam03 ~]# ~/at_io_iops --read --rand --iops -u 16G -s 4k -p 24 -n 24 --aio -q 128 -t 100 -f /vzt/svg_test
    (C) 2004-2015. Parallels IP Holdings GmbH. All rights reserved.
    time: 100.022896 sec; rate: { 83150.28125 } iops;
    
    [root at kteam03 ~]# echo 1 > /sys/module/fuse_kio_pcs/parameters/rpc_affinity_mode
    [root at kteam03 ~]# ~/at_io_iops --read --rand --iops -u 16G -s 4k -p 24 -n 24 --aio -q 128 -t 100 -f /vzt/svg_test
    (C) 2004-2015. Parallels IP Holdings GmbH. All rights reserved.
    time: 100.023529 sec; rate: { 89197.07031 } iops;
    
    [root at kteam03 ~]# echo 2 > /sys/module/fuse_kio_pcs/parameters/rpc_affinity_mode
    [root at kteam03 ~]# ~/at_io_iops --read --rand --iops -u 16G -s 4k -p 24 -n 24 --aio -q 128 -t 100 -f /vzt/svg_test
    (C) 2004-2015. Parallels IP Holdings GmbH. All rights reserved.
    time: 100.007874 sec; rate: { 226939.37500 } iops;
    
    https://pmc.acronis.com/browse/VSTOR-14031
    
    Signed-off-by: Pavel Butsykin <pbutsykin at virtuozzo.com>
    Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 fs/fuse/kio/pcs/pcs_rpc.c | 58 ++++++++++++++++++++++++++++++++++++++++++-----
 fs/fuse/kio/pcs/pcs_rpc.h |  6 +++++
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_rpc.c b/fs/fuse/kio/pcs/pcs_rpc.c
index 5cba5660850c..9f0dfd017579 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.c
+++ b/fs/fuse/kio/pcs/pcs_rpc.c
@@ -18,6 +18,7 @@
 #include <net/sock.h>
 #include <linux/net.h>
 #include <linux/kthread.h>
+#include <linux/module.h>
 #include <linux/types.h>
 
 
@@ -28,6 +29,11 @@
 #include "log.h"
 #include "fuse_ktrace.h"
 
+
+static unsigned int rpc_affinity_mode = RPC_AFFINITY_SPREAD;
+module_param(rpc_affinity_mode, uint, 0644);
+MODULE_PARM_DESC(rpc_affinity_mode, "RPC affinity mode");
+
 static void timer_work(struct work_struct *w);
 static int rpc_gc_classify(struct pcs_rpc * ep);
 
@@ -619,19 +625,58 @@ void pcs_rpc_kick_queue(struct pcs_rpc * ep)
 	queue_work_on(ep->cpu, cc->wq, &ep->work);
 }
 
+static int pcs_rpc_cpu_next(void)
+{
+	static atomic_t cpu_affinity_num = ATOMIC_INIT(-1);
+
+	int old, new;
+
+	do {
+		old = atomic_read(&cpu_affinity_num);
+		new = cpumask_next(old, cpu_online_mask);
+		if (new >= nr_cpu_ids)
+			new = cpumask_first(cpu_online_mask);
+
+	} while (atomic_cmpxchg(&cpu_affinity_num, old, new) != old);
+
+	return new;
+}
+
+static void pcs_rpc_affinity(struct pcs_rpc *ep, bool was_idle)
+{
+	switch(rpc_affinity_mode) {
+		case RPC_AFFINITY_NONE:
+			if (unlikely(ep->cpu != WORK_CPU_UNBOUND)) {
+				ep->cpu = WORK_CPU_UNBOUND;
+			}
+			break;
+		case RPC_AFFINITY_RETENT:
+			/* Naive socket-to-cpu binding approach */
+			if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
+				ep->cpu_stamp = jiffies + PCS_RPC_CPU_SLICE;
+				ep->cpu = smp_processor_id();
+			}
+			break;
+		case RPC_AFFINITY_SPREAD:
+			if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
+				ep->cpu_stamp = jiffies + PCS_RPC_CPU_SLICE;
+				ep->cpu = pcs_rpc_cpu_next();
+			}
+			break;
+		default:
+			pr_err("Unknown affninity mode: %u\n", rpc_affinity_mode);
+	}
+}
+
 void pcs_rpc_queue(struct pcs_rpc * ep, struct pcs_msg * msg)
 {
-	int was_idle;
+	bool was_idle;
 
 	spin_lock(&ep->q_lock);
 	was_idle = list_empty(&ep->input_queue);
 	list_add_tail(&msg->list, &ep->input_queue);
 
-	/* Naive socket-to-cpu binding approach */
-	if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
-		ep->cpu_stamp = jiffies + PCS_RPC_CPU_SLICE;
-		ep->cpu = smp_processor_id();
-	}
+	pcs_rpc_affinity(ep, was_idle);
 	spin_unlock(&ep->q_lock);
 
 	if (was_idle)
@@ -743,6 +788,7 @@ static void update_xmit_timeout(struct pcs_rpc *ep)
 	mod_delayed_work(cc->wq, &ep->timer_work, timeout);
 
 }
+
 static void rpc_queue_work(struct work_struct *w)
 {
 	LIST_HEAD(input_q);
diff --git a/fs/fuse/kio/pcs/pcs_rpc.h b/fs/fuse/kio/pcs/pcs_rpc.h
index b4bb99f0b3a8..fd2bc29bc52c 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.h
+++ b/fs/fuse/kio/pcs/pcs_rpc.h
@@ -22,6 +22,12 @@ enum
 	PCS_RPC_DESTROY	= 8		/* Destruction in progress */
 };
 
+enum {
+	RPC_AFFINITY_NONE   = 0,
+	RPC_AFFINITY_RETENT = 1,
+	RPC_AFFINITY_SPREAD = 2,
+};
+
 struct pcs_rpc_params
 {
 	unsigned int	alloc_hdr_size;


More information about the Devel mailing list