[Devel] [PATCH RHEL8 COMMIT] sock: allow reading and changing sk_userlocks with setsockopt

Konstantin Khorenko khorenko at virtuozzo.com
Thu Aug 12 15:01:51 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-305.3.1.vz8.7.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-305.3.1.vz8.7.5
------>
commit 3fbaf74458b4c92fef18edf646f387666460f644
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Thu Aug 12 15:01:51 2021 +0300

    sock: allow reading and changing sk_userlocks with setsockopt
    
    SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags disable automatic socket
    buffers adjustment done by kernel (see tcp_fixup_rcvbuf() and
    tcp_sndbuf_expand()). If we've just created a new socket this adjustment
    is enabled on it, but if one changes the socket buffer size by
    setsockopt(SO_{SND,RCV}BUF*) it becomes disabled.
    
    CRIU needs to call setsockopt(SO_{SND,RCV}BUF*) on each socket on
    restore as it first needs to increase buffer sizes for packet queues
    restore and second it needs to restore back original buffer sizes. So
    after CRIU restore all sockets become non-auto-adjustable, which can
    decrease network performance of restored applications significantly.
    
    CRIU need to be able to restore sockets with enabled/disabled adjustment
    to the same state it was before dump, so let's add special setsockopt
    for it.
    
    Let's also export SOCK_SNDBUF_LOCK and SOCK_RCVBUF_LOCK flags to uAPI so
    that using these interface one can reenable automatic socket buffer
    adjustment on their sockets.
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    Reviewed-by: Eric Dumazet <edumazet at google.com>
    Signed-off-by: David S. Miller <davem at davemloft.net>
    
    https://jira.sw.ru/browse/PSBM-131580
    
    (cherry-picked from net-next commit 04190bf8944d ("sock: allow reading
    and changing sk_userlocks with setsockopt")
    Reviewed-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 arch/alpha/include/uapi/asm/socket.h  |  2 ++
 arch/mips/include/uapi/asm/socket.h   |  2 ++
 arch/parisc/include/uapi/asm/socket.h |  2 ++
 arch/sparc/include/uapi/asm/socket.h  |  2 ++
 include/net/sock.h                    |  3 +--
 include/uapi/asm-generic/socket.h     |  2 ++
 include/uapi/linux/socket.h           |  5 +++++
 net/core/sock.c                       | 13 +++++++++++++
 8 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 065fb372e355..0d7b45581350 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -115,4 +115,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BUF_LOCK		72
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 71370fb3ceef..6c4838fa1299 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -126,4 +126,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BUF_LOCK		72
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 061b9cf2a779..402897123261 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -107,4 +107,6 @@
 #define SO_TXTIME		0x4036
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BUF_LOCK		0x4046
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 7ea35e5601b6..b52f129c2d4f 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -109,4 +109,6 @@
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
 #define SO_SECURITY_ENCRYPTION_NETWORK		0x5004
 
+#define SO_BUF_LOCK              0x0051
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 4bd1a4b43ddf..9005c4da03b0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -73,6 +73,7 @@
 #include <linux/net_tstamp.h>
 #include <net/smc.h>
 #include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
 
 #include <linux/rh_kabi.h>
 
@@ -1417,8 +1418,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
 #define RCV_SHUTDOWN	1
 #define SEND_SHUTDOWN	2
 
-#define SOCK_SNDBUF_LOCK	1
-#define SOCK_RCVBUF_LOCK	2
 #define SOCK_BINDADDR_LOCK	4
 #define SOCK_BINDPORT_LOCK	8
 
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 044b8966c2ce..cfdcc6bc4c41 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -114,4 +114,6 @@
 
 #define SO_DETACH_REUSEPORT_BPF 68
 
+#define SO_BUF_LOCK		72
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 8eb96021709c..2f9a04de3d8e 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -19,4 +19,9 @@ struct __kernel_sockaddr_storage {
 				/* _SS_MAXSIZE value minus size of ss_family */
 } __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
 
+#define SOCK_SNDBUF_LOCK	1
+#define SOCK_RCVBUF_LOCK	2
+
+#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+
 #endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 577fb736d413..cc1cd215f4ed 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1217,6 +1217,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		ret = sock_bindtoindex_locked(sk, val);
 		break;
 
+	case SO_BUF_LOCK:
+		if (val & ~SOCK_BUF_LOCK_MASK) {
+			ret = -EINVAL;
+			break;
+		}
+		sk->sk_userlocks = val | (sk->sk_userlocks &
+					  ~SOCK_BUF_LOCK_MASK);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1551,6 +1560,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_bound_dev_if;
 		break;
 
+	case SO_BUF_LOCK:
+		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).


More information about the Devel mailing list