[Devel] [PATCH vz9 v2] ve/netfilter: Add autoloading of sockopt modules

Konstantin Khorenko khorenko at virtuozzo.com
Tue Nov 16 19:09:23 MSK 2021


On 11.11.2021 19:45, Nikita Yushchenko wrote:
> Partially based on vz8 commit 3cd26ece5d16 ("ve/netfilter:
> Add autoloading of sockopt modules").
> Original task: https://jira.sw.ru/browse/PSBM-28910
> 
> On netfilter [gs]etsockopt() call, if implementation of the requested
> operation is missing in the currently running kernel, try to load the
> module containing that implementation.
> 
> A hardcoded table is used to look up the name of the module to load.
> This table contains information about all in-tree kernel modules that
> provide netfilter [gs]etsockopt() operations, as of kernel 5.14 version.
> 
> Unlike previous versions of this functionality, CAP_NET_ADMIN is no
> longer required to try module loading. Not all [gs]etsockopt()
> operations require additional privileges, and issuing one not requiring
> them shall not fail due to missing module. Required permission checks
> are performed inside operations.

So effectively previously we denied modules autoload by an unprivileged process
(keeping in mind that some getsockopts are available for unprivileged processes on host) and by ALL 
processes from nested netns.

Now modules are autoloaded by any process (privileged/unprovileged) from any netns, even nested.
Note: modules are autoloaded even in case later [gs]etsockopt() fails due to later permissions checked.

Don't see any problem in may be too vast permissions on modules autoloading,
we do maintain a whitelist of allowed modules and consider them safe.


> 
> Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
> ---
> v2:
> - fix typos in commit message
> 
>   kernel/kmod.c              |   3 +
>   net/netfilter/nf_sockopt.c | 119 ++++++++++++++++++++++++++++++++++++-
>   2 files changed, 120 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/kmod.c b/kernel/kmod.c
> index 678735dbb969..16563ff101f3 100644
> --- a/kernel/kmod.c
> +++ b/kernel/kmod.c
> @@ -214,6 +214,7 @@ static const char * const ve0_allowed_mod[] = {
>   	"ip6table_security",
>   	"ip6table_nat",
>   	"ip6table_mangle",
> +	"arp_tables",
>   
>   	"nf-nat",
>   	"nf_conncount",
> @@ -270,6 +271,7 @@ static const char * const ve0_allowed_mod[] = {
>   
>   	/* ip_set */
>   	"nfnetlink-subsys-6",		/* NFNL_SUBSYS_IPSET */
> +	"ip_set",
>   	"ip_set_bitmap:ip",
>   	"ip_set_bitmap:ip,mac",
>   	"ip_set_bitmap:port",
> @@ -290,6 +292,7 @@ static const char * const ve0_allowed_mod[] = {
>   	"nfsv4",
>   
>   	/* IPVS */
> +	"ip_vs"
>   	"ip_vs_ftp",
>   	"ip_vs_nq",
>   	"ip_vs_wlc",
> diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
> index 34afcd03b6f6..4613428a9679 100644
> --- a/net/netfilter/nf_sockopt.c
> +++ b/net/netfilter/nf_sockopt.c
> @@ -7,6 +7,15 @@
>   #include <linux/mutex.h>
>   #include <net/sock.h>
>   
> +#ifdef CONFIG_VE
> +#include <linux/netfilter_ipv4/ip_tables.h>
> +#include <linux/netfilter_ipv6/ip6_tables.h>
> +#include <linux/netfilter_arp/arp_tables.h>
> +#include <linux/netfilter_bridge/ebtables.h>
> +#include <linux/netfilter/ipset/ip_set.h>
> +#include <linux/ip_vs.h>
> +#endif /* CONFIG_VE */
> +
>   #include "nf_internals.h"
>   
>   /* Sockopts only registered and called from user context, so
> @@ -89,13 +98,119 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
>   	return ops;
>   }
>   
> +#ifdef CONFIG_VE
> +static int nf_sockopt_request_module(u8 pf, int val, int get)
> +{
> +	/* Normally, information of sockopt range provided by a module is owned
> +	 * by that module, and registered via nf_register_sockopt().
> +	 *
> +	 * But now need to find not-yet-loaded module by a sockopt number.
> +	 *
> +	 * TODO: evaluate if module aliases or device tables or whatever
> +	 * similar could be used to avoid duplication of that infomration
> +	 * in the below lookup table.
> +	 */
> +	struct table_entry {
> +		const char *name;
> +		u8 pf;
> +		int get_min;
> +		int get_max;
> +		int set_min;
> +		int set_max;
> +	};
> +
> +#define TABLE_ENTRY(_name, _pf, _prefix) {		\
> +	.name = _name,					\
> +	.pf = _pf,					\
> +	.get_min = _prefix ## _BASE_CTL,		\
> +	.get_max = _prefix ## _SO_GET_MAX,		\
> +	.set_min = _prefix ## _BASE_CTL,		\
> +	.set_max = _prefix ## _SO_SET_MAX,		\
> +}
> +#define TABLE_ENTRY_SINGLE_GET(_name, _pf, _val) {	\
> +	.name = _name,					\
> +	.pf = _pf,					\
> +	.get_min = _val,				\
> +	.get_max = _val,				\
> +	.set_min = 0,					\
> +	.set_max = -1,					\
> +}
> +
> +	static struct table_entry table[] = {
> +#ifdef CONFIG_IP_NF_IPTABLES_MODULE
> +		TABLE_ENTRY("ip_tables", PF_INET, IPT),
> +#endif
> +#ifdef CONFIG_IP6_NF_IPTABLES_MODULE
> +		TABLE_ENTRY("ip6_tables", PF_INET6, IP6T),
> +#endif
> +#ifdef CONFIG_IP_NF_ARPTABLES_MODULE
> +		TABLE_ENTRY("arp_tables", PF_INET, ARPT),
> +#endif
> +#ifdef CONFIG_BRIDGE_NF_EBTABLES_MODULE
> +		TABLE_ENTRY("ebtables", PF_INET, EBT),
> +#endif
> +#ifdef CONFIG_NF_CONNTRACK_MODULE
> +		TABLE_ENTRY_SINGLE_GET("nf_conntrack", PF_INET,
> +				       SO_ORIGINAL_DST),
> +		TABLE_ENTRY_SINGLE_GET("nf_conntrack", PF_INET6,
> +				       IP6T_SO_ORIGINAL_DST),
> +#endif
> +#ifdef CONFIG_IP_SET_MODULE
> +		TABLE_ENTRY_SINGLE_GET("ip_set", PF_INET, SO_IP_SET),
> +#endif
> +#ifdef CONFIG_IP_VS_MODULE
> +		TABLE_ENTRY("ip_vs", PF_INET, IP_VS),
> +#endif
> +	};
> +#undef TABLE_ENTRY
> +#undef TABLE_ENTRY_SINGLE_GET
> +
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(table); i++) {
> +		if (pf != table[i].pf)
> +			continue;
> +		if (get && val >= table[i].get_min && val <= table[i].get_max)
> +			break;
> +		if (!get && val >= table[i].set_min && val <= table[i].set_max)
> +			break;
> +	}
> +
> +	if (i == ARRAY_SIZE(table))
> +		return -EOPNOTSUPP;
> +
> +	return request_module(table[i].name);
> +}
> +
> +static struct nf_sockopt_ops *nf_sockopt_find_ve(struct sock *sk, u_int8_t pf,
> +						 int val, int get)
> +{
> +	struct nf_sockopt_ops *ops = nf_sockopt_find(sk, pf, val, get);
> +
> +	if (!IS_ERR(ops) || ve_is_super(get_exec_env()))
> +		return ops;
> +
> +	/* Containers are not able to load appropriate modules
> +	 * from userspace. We tricky help them here. For containers
> +	 * this looks like module is already loaded or driver
> +	 * is built in kernel.
> +	 */
> +	if (nf_sockopt_request_module(pf, val, get) == 0)
> +		ops = nf_sockopt_find(sk, pf, val, get);
> +
> +	return ops;
> +}
> +#else /* !CONFIG_VE */
> +#define nf_sockopt_find_ve(sk, pf, val, get)	nf_sockopt_find(sk, pf, val, get)
> +#endif /* !CONFIG_VE */
> +
>   int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt,
>   		  unsigned int len)
>   {
>   	struct nf_sockopt_ops *ops;
>   	int ret;
>   
> -	ops = nf_sockopt_find(sk, pf, val, 0);
> +	ops = nf_sockopt_find_ve(sk, pf, val, 0);
>   	if (IS_ERR(ops))
>   		return PTR_ERR(ops);
>   	ret = ops->set(sk, val, opt, len);
> @@ -110,7 +225,7 @@ int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
>   	struct nf_sockopt_ops *ops;
>   	int ret;
>   
> -	ops = nf_sockopt_find(sk, pf, val, 1);
> +	ops = nf_sockopt_find_ve(sk, pf, val, 1);
>   	if (IS_ERR(ops))
>   		return PTR_ERR(ops);
>   	ret = ops->get(sk, val, opt, len);
> 


More information about the Devel mailing list