[Devel] [PATCH vz9 v2] ve/netfilter: Add autoloading of sockopt modules

Konstantin Khorenko khorenko at virtuozzo.com
Tue Nov 16 18:59:51 MSK 2021


ack

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 11.11.2021 19:45, Nikita Yushchenko wrote:
> Partially based on vz8 commit 3cd26ece5d16 ("ve/netfilter:
> Add autoloading of sockopt modules").
> Original task: https://jira.sw.ru/browse/PSBM-28910
> 
> On netfilter [gs]etsockopt() call, if implementation of the requested
> operation is missing in the currently running kernel, try to load the
> module containing that implementation.
> 
> A hardcoded table is used to look up the name of the module to load.
> This table contains information about all in-tree kernel modules that
> provide netfilter [gs]etsockopt() operations, as of kernel 5.14 version.
> 
> Unlike previous versions of this functionality, CAP_NET_ADMIN is no
> longer required to try module loading. Not all [gs]etsockopt()
> operations require additional privileges, and issuing one not requiring
> them shall not fail due to missing module. Required permission checks
> are performed inside operations.
> 
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
> ---
> v2:
> - fix typos in commit message
> 
>   kernel/kmod.c              |   3 +
>   net/netfilter/nf_sockopt.c | 119 ++++++++++++++++++++++++++++++++++++-
>   2 files changed, 120 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/kmod.c b/kernel/kmod.c
> index 678735dbb969..16563ff101f3 100644
> --- a/kernel/kmod.c
> +++ b/kernel/kmod.c
> @@ -214,6 +214,7 @@ static const char * const ve0_allowed_mod[] = {
>   	"ip6table_security",
>   	"ip6table_nat",
>   	"ip6table_mangle",
> +	"arp_tables",
>   
>   	"nf-nat",
>   	"nf_conncount",
> @@ -270,6 +271,7 @@ static const char * const ve0_allowed_mod[] = {
>   
>   	/* ip_set */
>   	"nfnetlink-subsys-6",		/* NFNL_SUBSYS_IPSET */
> +	"ip_set",
>   	"ip_set_bitmap:ip",
>   	"ip_set_bitmap:ip,mac",
>   	"ip_set_bitmap:port",
> @@ -290,6 +292,7 @@ static const char * const ve0_allowed_mod[] = {
>   	"nfsv4",
>   
>   	/* IPVS */
> +	"ip_vs"
>   	"ip_vs_ftp",
>   	"ip_vs_nq",
>   	"ip_vs_wlc",
> diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
> index 34afcd03b6f6..4613428a9679 100644
> --- a/net/netfilter/nf_sockopt.c
> +++ b/net/netfilter/nf_sockopt.c
> @@ -7,6 +7,15 @@
>   #include <linux/mutex.h>
>   #include <net/sock.h>
>   
> +#ifdef CONFIG_VE
> +#include <linux/netfilter_ipv4/ip_tables.h>
> +#include <linux/netfilter_ipv6/ip6_tables.h>
> +#include <linux/netfilter_arp/arp_tables.h>
> +#include <linux/netfilter_bridge/ebtables.h>
> +#include <linux/netfilter/ipset/ip_set.h>
> +#include <linux/ip_vs.h>
> +#endif /* CONFIG_VE */
> +
>   #include "nf_internals.h"
>   
>   /* Sockopts only registered and called from user context, so
> @@ -89,13 +98,119 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
>   	return ops;
>   }
>   
> +#ifdef CONFIG_VE
> +static int nf_sockopt_request_module(u8 pf, int val, int get)
> +{
> +	/* Normally, information of sockopt range provided by a module is owned
> +	 * by that module, and registered via nf_register_sockopt().
> +	 *
> +	 * But now need to find not-yet-loaded module by a sockopt number.
> +	 *
> +	 * TODO: evaluate if module aliases or device tables or whatever
> +	 * similar could be used to avoid duplication of that infomration
> +	 * in the below lookup table.
> +	 */
> +	struct table_entry {
> +		const char *name;
> +		u8 pf;
> +		int get_min;
> +		int get_max;
> +		int set_min;
> +		int set_max;
> +	};
> +
> +#define TABLE_ENTRY(_name, _pf, _prefix) {		\
> +	.name = _name,					\
> +	.pf = _pf,					\
> +	.get_min = _prefix ## _BASE_CTL,		\
> +	.get_max = _prefix ## _SO_GET_MAX,		\
> +	.set_min = _prefix ## _BASE_CTL,		\
> +	.set_max = _prefix ## _SO_SET_MAX,		\
> +}
> +#define TABLE_ENTRY_SINGLE_GET(_name, _pf, _val) {	\
> +	.name = _name,					\
> +	.pf = _pf,					\
> +	.get_min = _val,				\
> +	.get_max = _val,				\
> +	.set_min = 0,					\
> +	.set_max = -1,					\
> +}
> +
> +	static struct table_entry table[] = {
> +#ifdef CONFIG_IP_NF_IPTABLES_MODULE
> +		TABLE_ENTRY("ip_tables", PF_INET, IPT),
> +#endif
> +#ifdef CONFIG_IP6_NF_IPTABLES_MODULE
> +		TABLE_ENTRY("ip6_tables", PF_INET6, IP6T),
> +#endif
> +#ifdef CONFIG_IP_NF_ARPTABLES_MODULE
> +		TABLE_ENTRY("arp_tables", PF_INET, ARPT),
> +#endif
> +#ifdef CONFIG_BRIDGE_NF_EBTABLES_MODULE
> +		TABLE_ENTRY("ebtables", PF_INET, EBT),
> +#endif
> +#ifdef CONFIG_NF_CONNTRACK_MODULE
> +		TABLE_ENTRY_SINGLE_GET("nf_conntrack", PF_INET,
> +				       SO_ORIGINAL_DST),
> +		TABLE_ENTRY_SINGLE_GET("nf_conntrack", PF_INET6,
> +				       IP6T_SO_ORIGINAL_DST),
> +#endif
> +#ifdef CONFIG_IP_SET_MODULE
> +		TABLE_ENTRY_SINGLE_GET("ip_set", PF_INET, SO_IP_SET),
> +#endif
> +#ifdef CONFIG_IP_VS_MODULE
> +		TABLE_ENTRY("ip_vs", PF_INET, IP_VS),
> +#endif
> +	};
> +#undef TABLE_ENTRY
> +#undef TABLE_ENTRY_SINGLE_GET
> +
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(table); i++) {
> +		if (pf != table[i].pf)
> +			continue;
> +		if (get && val >= table[i].get_min && val <= table[i].get_max)
> +			break;
> +		if (!get && val >= table[i].set_min && val <= table[i].set_max)
> +			break;
> +	}
> +
> +	if (i == ARRAY_SIZE(table))
> +		return -EOPNOTSUPP;
> +
> +	return request_module(table[i].name);
> +}
> +
> +static struct nf_sockopt_ops *nf_sockopt_find_ve(struct sock *sk, u_int8_t pf,
> +						 int val, int get)
> +{
> +	struct nf_sockopt_ops *ops = nf_sockopt_find(sk, pf, val, get);
> +
> +	if (!IS_ERR(ops) || ve_is_super(get_exec_env()))
> +		return ops;
> +
> +	/* Containers are not able to load appropriate modules
> +	 * from userspace. We tricky help them here. For containers
> +	 * this looks like module is already loaded or driver
> +	 * is built in kernel.
> +	 */
> +	if (nf_sockopt_request_module(pf, val, get) == 0)
> +		ops = nf_sockopt_find(sk, pf, val, get);
> +
> +	return ops;
> +}
> +#else /* !CONFIG_VE */
> +#define nf_sockopt_find_ve(sk, pf, val, get)	nf_sockopt_find(sk, pf, val, get)
> +#endif /* !CONFIG_VE */
> +
>   int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt,
>   		  unsigned int len)
>   {
>   	struct nf_sockopt_ops *ops;
>   	int ret;
>   
> -	ops = nf_sockopt_find(sk, pf, val, 0);
> +	ops = nf_sockopt_find_ve(sk, pf, val, 0);
>   	if (IS_ERR(ops))
>   		return PTR_ERR(ops);
>   	ret = ops->set(sk, val, opt, len);
> @@ -110,7 +225,7 @@ int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
>   	struct nf_sockopt_ops *ops;
>   	int ret;
>   
> -	ops = nf_sockopt_find(sk, pf, val, 1);
> +	ops = nf_sockopt_find_ve(sk, pf, val, 1);
>   	if (IS_ERR(ops))
>   		return PTR_ERR(ops);
>   	ret = ops->get(sk, val, opt, len);
> 


More information about the Devel mailing list