[Devel] [PATCH RHEL9 COMMIT] fs/fuse/kio: switch to TCP_NODELAY/MSG_MORE from TCP_CORK
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jan 23 21:53:23 MSK 2025
The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at git at bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.4
------>
commit 3ba3aabb0b42206cc13cee6ab7e456a50237d80c
Author: Alexey Kuznetsov <kuznet at virtuozzo.com>
Date: Sat Jan 18 02:08:58 2025 +0800
fs/fuse/kio: switch to TCP_NODELAY/MSG_MORE from TCP_CORK
In user space we switched to this mode long ago, because
it saves a syscall per message. In kernel this is not an issue,
yet we have to do downcall to network taking socket lock etc.
So, let us do this.
Signed-off-by: Alexey Kuznetsov <kuznet at virtuozzo.com>
Feature: vStorage
---
fs/fuse/kio/pcs/pcs_sock_conn.c | 21 ++++++++++++++++++---
fs/fuse/kio/pcs/pcs_sock_io.c | 14 +++++++++++---
fs/fuse/kio/pcs/pcs_sock_io.h | 1 +
3 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/fs/fuse/kio/pcs/pcs_sock_conn.c b/fs/fuse/kio/pcs/pcs_sock_conn.c
index bce2d898fe5d..69cfe0026b98 100644
--- a/fs/fuse/kio/pcs/pcs_sock_conn.c
+++ b/fs/fuse/kio/pcs/pcs_sock_conn.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/tcp.h>
+#include <linux/module.h>
#include "pcs_types.h"
#include "pcs_sock_io.h"
@@ -18,6 +19,10 @@
#include "log.h"
#include "fuse_ktrace.h"
+static unsigned int pcs_use_cork;
+module_param(pcs_use_cork, uint, 0644);
+MODULE_PARM_DESC(pcs_use_cork, "Use TCP_CORK instead of TCP_NODELAY");
+
static inline void pcs_sock_keepalive(struct socket *sock)
{
sock_set_keepalive(sock->sk);
@@ -33,6 +38,11 @@ static inline void pcs_sock_cork(struct socket *sock)
tcp_sock_set_cork(sock->sk, true);
}
+static inline void pcs_sock_nodelay(struct socket *sock)
+{
+ tcp_sock_set_nodelay(sock->sk);
+}
+
static inline void set_sock_parameters(struct socket *sock, struct pcs_cluster_core *cc)
{
if (sock->sk->sk_family == PF_INET || sock->sk->sk_family == PF_INET6) {
@@ -82,7 +92,7 @@ void pcs_sockconnect_start(struct pcs_rpc *ep)
iov_iter_kvec(&sio->read_iter, READ, NULL, 0, 0);
iov_iter_kvec(&sio->write_iter, WRITE, NULL, 0, 0);
sio->hdr_max = sizeof(struct pcs_rpc_hdr);
- sio->flags = sa->sa_family != AF_UNIX ? PCS_SOCK_F_CORK : 0;
+ sio->flags = 0;
err = sock_create(sa->sa_family, SOCK_STREAM, 0, &sock);
if (err < 0) {
@@ -101,8 +111,13 @@ void pcs_sockconnect_start(struct pcs_rpc *ep)
}
pcs_sock_keepalive(sock);
if (sa->sa_family == PF_INET || sa->sa_family == PF_INET6) {
- pcs_sock_cork(sock);
- sio->flags |= PCS_SOCK_F_CORK;
+ if (pcs_use_cork) {
+ pcs_sock_cork(sock);
+ sio->flags |= PCS_SOCK_F_CORK;
+ } else {
+ pcs_sock_nodelay(sock);
+ sio->flags |= PCS_SOCK_F_NODELAY;
+ }
}
set_sock_parameters(sock, container_of(ep->eng, struct pcs_cluster_core, eng));
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 29a5c272d6fe..7c62f483ea45 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/highmem.h>
#include <linux/file.h>
+#include <linux/module.h>
#include "pcs_types.h"
#include "pcs_sock_io.h"
@@ -20,6 +21,9 @@
#include "log.h"
#include "fuse_ktrace.h"
+static unsigned int pcs_use_eor;
+module_param(pcs_use_eor, uint, 0644);
+MODULE_PARM_DESC(pcs_use_eor, "Use MSG_EOR");
void pcs_msg_sent(struct pcs_msg * msg)
{
@@ -118,12 +122,15 @@ static bool pcs_should_fail_sock_io(void)
}
#endif
-static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left)
+static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left, int has_more)
{
int ret = -EIO;
size_t size = iov_iter_single_seg_count(it);
bool more = (size < left);
- int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | (more ? MSG_MORE : MSG_EOR);
+ int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | ((more || has_more) ? MSG_MORE : 0);
+
+ if (unlikely(pcs_use_eor) && !more)
+ flags |= MSG_EOR;
DTRACE("sock(%p) len:%ld, more:%d\n", sock, iov_iter_count(it), more);
@@ -372,6 +379,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio)
/* TODO: cond resched here? */
while (sio->write_offset < msg->size) {
size_t left = msg->size - sio->write_offset;
+ int has_more = (msg->list.next != &sio->write_queue);
int n;
TRACE(PEER_FMT "offset:%d msg:%p left:%ld, it->len:%ld\n", PEER_ARGS(ep), sio->write_offset, msg,
@@ -382,7 +390,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio)
msg->get_iter(msg, sio->write_offset, it, WRITE);
}
BUG_ON(iov_iter_count(it) > left);
- n = do_send_one_seg(sio->socket, it, left);
+ n = do_send_one_seg(sio->socket, it, left, has_more);
if (n > 0) {
sio->write_offset += n;
iov_iter_advance(it, n);
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.h b/fs/fuse/kio/pcs/pcs_sock_io.h
index 872faffefe01..09870b38cdad 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.h
+++ b/fs/fuse/kio/pcs/pcs_sock_io.h
@@ -101,6 +101,7 @@ enum
PCS_SOCK_F_EOF = 8,
PCS_SOCK_F_POOLIN = 0x10,
PCS_SOCK_F_POOLOUT = 0x20,
+ PCS_SOCK_F_NODELAY = 0x40,
};
enum
More information about the Devel
mailing list