[Devel] [PATCH VZ9 4/10] fs/fuse/kio: switch to TCP_NODELAY/MSG_MORE from TCP_CORK

Alexey Kuznetsov kuznet at virtuozzo.com
Fri Jan 17 21:08:58 MSK 2025


In user space we switched to this mode long ago, because
it saves a syscall per message. In kernel this is not an issue,
yet we have to do downcall to network taking socket lock etc.
So, let us do this.

Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
---
 fs/fuse/kio/pcs/pcs_sock_conn.c | 21 ++++++++++++++++++---
 fs/fuse/kio/pcs/pcs_sock_io.c   | 14 +++++++++++---
 fs/fuse/kio/pcs/pcs_sock_io.h   |  1 +
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_sock_conn.c b/fs/fuse/kio/pcs/pcs_sock_conn.c
index bce2d89..69cfe00 100644
--- a/fs/fuse/kio/pcs/pcs_sock_conn.c
+++ b/fs/fuse/kio/pcs/pcs_sock_conn.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/tcp.h>
+#include <linux/module.h>
 
 #include "pcs_types.h"
 #include "pcs_sock_io.h"
@@ -18,6 +19,10 @@
 #include "log.h"
 #include "fuse_ktrace.h"
 
+static unsigned int pcs_use_cork;
+module_param(pcs_use_cork, uint, 0644);
+MODULE_PARM_DESC(pcs_use_cork, "Use TCP_CORK instead of TCP_NODELAY");
+
 static inline void pcs_sock_keepalive(struct socket *sock)
 {
 	sock_set_keepalive(sock->sk);
@@ -33,6 +38,11 @@ static inline void pcs_sock_cork(struct socket *sock)
 	tcp_sock_set_cork(sock->sk, true);
 }
 
+static inline void pcs_sock_nodelay(struct socket *sock)
+{
+	tcp_sock_set_nodelay(sock->sk);
+}
+
 static inline void set_sock_parameters(struct socket *sock, struct pcs_cluster_core *cc)
 {
 	if (sock->sk->sk_family == PF_INET || sock->sk->sk_family == PF_INET6) {
@@ -82,7 +92,7 @@ void pcs_sockconnect_start(struct pcs_rpc *ep)
 	iov_iter_kvec(&sio->read_iter, READ, NULL, 0, 0);
 	iov_iter_kvec(&sio->write_iter, WRITE, NULL, 0, 0);
 	sio->hdr_max = sizeof(struct pcs_rpc_hdr);
-	sio->flags = sa->sa_family != AF_UNIX ? PCS_SOCK_F_CORK : 0;
+	sio->flags = 0;
 
 	err = sock_create(sa->sa_family, SOCK_STREAM, 0, &sock);
 	if (err < 0) {
@@ -101,8 +111,13 @@ void pcs_sockconnect_start(struct pcs_rpc *ep)
 	}
 	pcs_sock_keepalive(sock);
 	if (sa->sa_family == PF_INET || sa->sa_family == PF_INET6) {
-		pcs_sock_cork(sock);
-		sio->flags |= PCS_SOCK_F_CORK;
+		if (pcs_use_cork) {
+			pcs_sock_cork(sock);
+			sio->flags |= PCS_SOCK_F_CORK;
+		} else {
+			pcs_sock_nodelay(sock);
+			sio->flags |= PCS_SOCK_F_NODELAY;
+		}
 	}
 	set_sock_parameters(sock, container_of(ep->eng, struct pcs_cluster_core, eng));
 
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 29a5c27..7c62f48 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
+#include <linux/module.h>
 
 #include "pcs_types.h"
 #include "pcs_sock_io.h"
@@ -20,6 +21,9 @@
 #include "log.h"
 #include "fuse_ktrace.h"
 
+static unsigned int pcs_use_eor;
+module_param(pcs_use_eor, uint, 0644);
+MODULE_PARM_DESC(pcs_use_eor, "Use MSG_EOR");
 
 void pcs_msg_sent(struct pcs_msg * msg)
 {
@@ -118,12 +122,15 @@ static bool pcs_should_fail_sock_io(void)
 }
 #endif
 
-static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left)
+static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left, int has_more)
 {
 	int ret = -EIO;
 	size_t size = iov_iter_single_seg_count(it);
 	bool more = (size < left);
-	int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | (more ? MSG_MORE : MSG_EOR);
+	int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | ((more || has_more) ? MSG_MORE : 0);
+
+	if (unlikely(pcs_use_eor) && !more)
+		flags |= MSG_EOR;
 
 	DTRACE("sock(%p)  len:%ld, more:%d\n", sock, iov_iter_count(it), more);
 
@@ -372,6 +379,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio)
 		/* TODO: cond resched here? */
 		while (sio->write_offset < msg->size) {
 			size_t left = msg->size - sio->write_offset;
+			int has_more = (msg->list.next != &sio->write_queue);
 			int n;
 
 			TRACE(PEER_FMT "offset:%d msg:%p left:%ld, it->len:%ld\n", PEER_ARGS(ep), sio->write_offset, msg,
@@ -382,7 +390,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio)
 				msg->get_iter(msg, sio->write_offset, it, WRITE);
 			}
 			BUG_ON(iov_iter_count(it) > left);
-			n = do_send_one_seg(sio->socket, it, left);
+			n = do_send_one_seg(sio->socket, it, left, has_more);
 			if (n > 0) {
 				sio->write_offset += n;
 				iov_iter_advance(it, n);
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.h b/fs/fuse/kio/pcs/pcs_sock_io.h
index 872faff..09870b3 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.h
+++ b/fs/fuse/kio/pcs/pcs_sock_io.h
@@ -101,6 +101,7 @@ enum
 	PCS_SOCK_F_EOF			= 8,
 	PCS_SOCK_F_POOLIN		= 0x10,
 	PCS_SOCK_F_POOLOUT		= 0x20,
+	PCS_SOCK_F_NODELAY		= 0x40,
 };
 
 enum
-- 
1.8.3.1



More information about the Devel mailing list