[Devel] [PATCH vz9] ve/netfilter: Add autoloading of sockopt modules
Nikita Yushchenko
nikita.yushchenko at virtuozzo.com
Thu Nov 11 18:33:21 MSK 2021
Partially based on vz8 commit 3cd26ece5d16 ("ve/netfilter:
Add autoloading of sockopt modules").
Original task: https://jira.sw.ru/browse/PSBM-28910
On netfilter [gs]etsockopt() call, if implementation of the requested
operation is missing in the currently running kernel, try to load the
module containing that implementation.
A hardcoded table is used to look up the name of the module to load.
This table contains information about all in-tree kernel modules that
netfilter [gs]etsockopt() operations, as of kernel 5.14 version.
Unlike previous versions, CAP_NET_ADMIN is not longer required to
try module loading. Not all [gs]etsockopt() operations require additional
privileges, and issuing one not requiring them shall not fail due to
missing module. Required permission checks are inside operations.
Original task: https://jira.sw.ru/browse/PSBM-28910
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
kernel/kmod.c | 3 +
net/netfilter/nf_sockopt.c | 119 ++++++++++++++++++++++++++++++++++++-
2 files changed, 120 insertions(+), 2 deletions(-)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 678735dbb969..16563ff101f3 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -214,6 +214,7 @@ static const char * const ve0_allowed_mod[] = {
"ip6table_security",
"ip6table_nat",
"ip6table_mangle",
+ "arp_tables",
"nf-nat",
"nf_conncount",
@@ -270,6 +271,7 @@ static const char * const ve0_allowed_mod[] = {
/* ip_set */
"nfnetlink-subsys-6", /* NFNL_SUBSYS_IPSET */
+ "ip_set",
"ip_set_bitmap:ip",
"ip_set_bitmap:ip,mac",
"ip_set_bitmap:port",
@@ -290,6 +292,7 @@ static const char * const ve0_allowed_mod[] = {
"nfsv4",
/* IPVS */
+ "ip_vs"
"ip_vs_ftp",
"ip_vs_nq",
"ip_vs_wlc",
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 34afcd03b6f6..4613428a9679 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -7,6 +7,15 @@
#include <linux/mutex.h>
#include <net/sock.h>
+#ifdef CONFIG_VE
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/ip_vs.h>
+#endif /* CONFIG_VE */
+
#include "nf_internals.h"
/* Sockopts only registered and called from user context, so
@@ -89,13 +98,119 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
return ops;
}
+#ifdef CONFIG_VE
+static int nf_sockopt_request_module(u8 pf, int val, int get)
+{
+ /* Normally, information of sockopt range provided by a module is owned
+ * by that module, and registered via nf_register_sockopt().
+ *
+ * But now need to find not-yet-loaded module by a sockopt number.
+ *
+ * TODO: evaluate if module aliases or device tables or whatever
+ * similar could be used to avoid duplication of that infomration
+ * in the below lookup table.
+ */
+ struct table_entry {
+ const char *name;
+ u8 pf;
+ int get_min;
+ int get_max;
+ int set_min;
+ int set_max;
+ };
+
+#define TABLE_ENTRY(_name, _pf, _prefix) { \
+ .name = _name, \
+ .pf = _pf, \
+ .get_min = _prefix ## _BASE_CTL, \
+ .get_max = _prefix ## _SO_GET_MAX, \
+ .set_min = _prefix ## _BASE_CTL, \
+ .set_max = _prefix ## _SO_SET_MAX, \
+}
+#define TABLE_ENTRY_SINGLE_GET(_name, _pf, _val) { \
+ .name = _name, \
+ .pf = _pf, \
+ .get_min = _val, \
+ .get_max = _val, \
+ .set_min = 0, \
+ .set_max = -1, \
+}
+
+ static struct table_entry table[] = {
+#ifdef CONFIG_IP_NF_IPTABLES_MODULE
+ TABLE_ENTRY("ip_tables", PF_INET, IPT),
+#endif
+#ifdef CONFIG_IP6_NF_IPTABLES_MODULE
+ TABLE_ENTRY("ip6_tables", PF_INET6, IP6T),
+#endif
+#ifdef CONFIG_IP_NF_ARPTABLES_MODULE
+ TABLE_ENTRY("arp_tables", PF_INET, ARPT),
+#endif
+#ifdef CONFIG_BRIDGE_NF_EBTABLES_MODULE
+ TABLE_ENTRY("ebtables", PF_INET, EBT),
+#endif
+#ifdef CONFIG_NF_CONNTRACK_MODULE
+ TABLE_ENTRY_SINGLE_GET("nf_conntrack", PF_INET,
+ SO_ORIGINAL_DST),
+ TABLE_ENTRY_SINGLE_GET("nf_conntrack", PF_INET6,
+ IP6T_SO_ORIGINAL_DST),
+#endif
+#ifdef CONFIG_IP_SET_MODULE
+ TABLE_ENTRY_SINGLE_GET("ip_set", PF_INET, SO_IP_SET),
+#endif
+#ifdef CONFIG_IP_VS_MODULE
+ TABLE_ENTRY("ip_vs", PF_INET, IP_VS),
+#endif
+ };
+#undef TABLE_ENTRY
+#undef TABLE_ENTRY_SINGLE_GET
+
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(table); i++) {
+ if (pf != table[i].pf)
+ continue;
+ if (get && val >= table[i].get_min && val <= table[i].get_max)
+ break;
+ if (!get && val >= table[i].set_min && val <= table[i].set_max)
+ break;
+ }
+
+ if (i == ARRAY_SIZE(table))
+ return -EOPNOTSUPP;
+
+ return request_module(table[i].name);
+}
+
+static struct nf_sockopt_ops *nf_sockopt_find_ve(struct sock *sk, u_int8_t pf,
+ int val, int get)
+{
+ struct nf_sockopt_ops *ops = nf_sockopt_find(sk, pf, val, get);
+
+ if (!IS_ERR(ops) || ve_is_super(get_exec_env()))
+ return ops;
+
+ /* Containers are not able to load appropriate modules
+ * from userspace. We tricky help them here. For containers
+ * this looks like module is already loaded or driver
+ * is built in kernel.
+ */
+ if (nf_sockopt_request_module(pf, val, get) == 0)
+ ops = nf_sockopt_find(sk, pf, val, get);
+
+ return ops;
+}
+#else /* !CONFIG_VE */
+#define nf_sockopt_find_ve(sk, pf, val, get) nf_sockopt_find(sk, pf, val, get)
+#endif /* !CONFIG_VE */
+
int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt,
unsigned int len)
{
struct nf_sockopt_ops *ops;
int ret;
- ops = nf_sockopt_find(sk, pf, val, 0);
+ ops = nf_sockopt_find_ve(sk, pf, val, 0);
if (IS_ERR(ops))
return PTR_ERR(ops);
ret = ops->set(sk, val, opt, len);
@@ -110,7 +225,7 @@ int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
struct nf_sockopt_ops *ops;
int ret;
- ops = nf_sockopt_find(sk, pf, val, 1);
+ ops = nf_sockopt_find_ve(sk, pf, val, 1);
if (IS_ERR(ops))
return PTR_ERR(ops);
ret = ops->get(sk, val, opt, len);
--
2.30.2
More information about the Devel
mailing list