[Devel] [PATCH RHEL9 COMMIT] fs/fuse/kio: switch to TCP_NODELAY/MSG_MORE from TCP_CORK

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jan 23 21:53:23 MSK 2025


The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.4
------>
commit 3ba3aabb0b42206cc13cee6ab7e456a50237d80c
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date:   Sat Jan 18 02:08:58 2025 +0800

    fs/fuse/kio: switch to TCP_NODELAY/MSG_MORE from TCP_CORK
    
    In user space we switched to this mode long ago, because
    it saves a syscall per message. In kernel this is not an issue,
    yet we have to do downcall to network taking socket lock etc.
    So, let us do this.
    
    Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
    Feature: vStorage
---
 fs/fuse/kio/pcs/pcs_sock_conn.c | 21 ++++++++++++++++++---
 fs/fuse/kio/pcs/pcs_sock_io.c   | 14 +++++++++++---
 fs/fuse/kio/pcs/pcs_sock_io.h   |  1 +
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_sock_conn.c b/fs/fuse/kio/pcs/pcs_sock_conn.c
index bce2d898fe5d..69cfe0026b98 100644
--- a/fs/fuse/kio/pcs/pcs_sock_conn.c
+++ b/fs/fuse/kio/pcs/pcs_sock_conn.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/tcp.h>
+#include <linux/module.h>
 
 #include "pcs_types.h"
 #include "pcs_sock_io.h"
@@ -18,6 +19,10 @@
 #include "log.h"
 #include "fuse_ktrace.h"
 
+static unsigned int pcs_use_cork;
+module_param(pcs_use_cork, uint, 0644);
+MODULE_PARM_DESC(pcs_use_cork, "Use TCP_CORK instead of TCP_NODELAY");
+
 static inline void pcs_sock_keepalive(struct socket *sock)
 {
 	sock_set_keepalive(sock->sk);
@@ -33,6 +38,11 @@ static inline void pcs_sock_cork(struct socket *sock)
 	tcp_sock_set_cork(sock->sk, true);
 }
 
+static inline void pcs_sock_nodelay(struct socket *sock)
+{
+	tcp_sock_set_nodelay(sock->sk);
+}
+
 static inline void set_sock_parameters(struct socket *sock, struct pcs_cluster_core *cc)
 {
 	if (sock->sk->sk_family == PF_INET || sock->sk->sk_family == PF_INET6) {
@@ -82,7 +92,7 @@ void pcs_sockconnect_start(struct pcs_rpc *ep)
 	iov_iter_kvec(&sio->read_iter, READ, NULL, 0, 0);
 	iov_iter_kvec(&sio->write_iter, WRITE, NULL, 0, 0);
 	sio->hdr_max = sizeof(struct pcs_rpc_hdr);
-	sio->flags = sa->sa_family != AF_UNIX ? PCS_SOCK_F_CORK : 0;
+	sio->flags = 0;
 
 	err = sock_create(sa->sa_family, SOCK_STREAM, 0, &sock);
 	if (err < 0) {
@@ -101,8 +111,13 @@ void pcs_sockconnect_start(struct pcs_rpc *ep)
 	}
 	pcs_sock_keepalive(sock);
 	if (sa->sa_family == PF_INET || sa->sa_family == PF_INET6) {
-		pcs_sock_cork(sock);
-		sio->flags |= PCS_SOCK_F_CORK;
+		if (pcs_use_cork) {
+			pcs_sock_cork(sock);
+			sio->flags |= PCS_SOCK_F_CORK;
+		} else {
+			pcs_sock_nodelay(sock);
+			sio->flags |= PCS_SOCK_F_NODELAY;
+		}
 	}
 	set_sock_parameters(sock, container_of(ep->eng, struct pcs_cluster_core, eng));
 
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 29a5c272d6fe..7c62f483ea45 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
+#include <linux/module.h>
 
 #include "pcs_types.h"
 #include "pcs_sock_io.h"
@@ -20,6 +21,9 @@
 #include "log.h"
 #include "fuse_ktrace.h"
 
+static unsigned int pcs_use_eor;
+module_param(pcs_use_eor, uint, 0644);
+MODULE_PARM_DESC(pcs_use_eor, "Use MSG_EOR");
 
 void pcs_msg_sent(struct pcs_msg * msg)
 {
@@ -118,12 +122,15 @@ static bool pcs_should_fail_sock_io(void)
 }
 #endif
 
-static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left)
+static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left, int has_more)
 {
 	int ret = -EIO;
 	size_t size = iov_iter_single_seg_count(it);
 	bool more = (size < left);
-	int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | (more ? MSG_MORE : MSG_EOR);
+	int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | ((more || has_more) ? MSG_MORE : 0);
+
+	if (unlikely(pcs_use_eor) && !more)
+		flags |= MSG_EOR;
 
 	DTRACE("sock(%p)  len:%ld, more:%d\n", sock, iov_iter_count(it), more);
 
@@ -372,6 +379,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio)
 		/* TODO: cond resched here? */
 		while (sio->write_offset < msg->size) {
 			size_t left = msg->size - sio->write_offset;
+			int has_more = (msg->list.next != &sio->write_queue);
 			int n;
 
 			TRACE(PEER_FMT "offset:%d msg:%p left:%ld, it->len:%ld\n", PEER_ARGS(ep), sio->write_offset, msg,
@@ -382,7 +390,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio)
 				msg->get_iter(msg, sio->write_offset, it, WRITE);
 			}
 			BUG_ON(iov_iter_count(it) > left);
-			n = do_send_one_seg(sio->socket, it, left);
+			n = do_send_one_seg(sio->socket, it, left, has_more);
 			if (n > 0) {
 				sio->write_offset += n;
 				iov_iter_advance(it, n);
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.h b/fs/fuse/kio/pcs/pcs_sock_io.h
index 872faffefe01..09870b38cdad 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.h
+++ b/fs/fuse/kio/pcs/pcs_sock_io.h
@@ -101,6 +101,7 @@ enum
 	PCS_SOCK_F_EOF			= 8,
 	PCS_SOCK_F_POOLIN		= 0x10,
 	PCS_SOCK_F_POOLOUT		= 0x20,
+	PCS_SOCK_F_NODELAY		= 0x40,
 };
 
 enum


More information about the Devel mailing list