[Devel] [PATCH RH7] nat: allow nft NAT and iptables NAT work on the same node

Konstantin Khorenko khorenko at virtuozzo.com
Mon Dec 28 15:12:31 MSK 2020


On 12/28/2020 09:46 AM, Vasily Averin wrote:
> The netfilter NAT core cannot deal with more than one NAT hook
> per hook location (prerouting, input ...), because the NAT hooks install
> a NAT null binding in case the iptables nat table (iptable_nat hooks)
> or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
> transformation.
>
> Currently iptables NAT hook is called in all netns, even if according
> iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing.
> this block execution of corrsponding nft NAT hook, even if it is present.
> This is true in reverted direction: if nft NAT hook was called first it blocks
> execution of iptbles nat hook, because corresponding conntrack already have
> NAT null binding.
>
> This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
> and is in use in current net namespace.
>
> The patch does not allow nft to add new NAT chains if netns already have another one:
> either iptables nat or an another nft NAT chain with the same priority.
>
> Patch does not block the loading of the iptables NAT if nft NAT is already present,
> because it is quite rare case.
>
> In general this patch allows to work NAT both on host and inside iptables-containers
> and inside centos8 containers where nftables nat is only used without any additional
> configuration.
>
> https://jira.sw.ru/browse/PSBM-123345
> Signed-off-by: Vasily Averin <vvs at virtuozzo.com>

Reviewed-by: Konstantin Khorenko <khorenko at virtuozzo.com>

> ---
>  include/net/netfilter/nf_nat.h           | 24 +++++++++++++++++-------
>  net/ipv4/netfilter/nf_nat_l3proto_ipv4.c |  4 ++--
>  net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |  4 ++--
>  net/netfilter/core.c                     | 13 ++++++++++++-
>  4 files changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
> index c9ca6ed..4308180 100644
> --- a/include/net/netfilter/nf_nat.h
> +++ b/include/net/netfilter/nf_nat.h
> @@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
>  }
>
>  /*
> - * Check if nft chain's netns fits conntrack netns.
> + * Check if netns have enabled NAT.
>   * Uses ops->is_nft_ops flag to detect nft ops.
> - * If ops is not nft-related, the check is considered passed.
> + * If ops is not nft-related we need to be sure
> + * that according ipt nat table is not empty and can be in use.
>   */
> -#define is_valid_netns(ops, ct) ({					\
> +#define netns_nat_check(ops, pf, __net) ({				\
>  	const struct nft_chain *__chain;				\
>  	const struct net *__chain_net;					\
> -	const struct net *__net;					\
>  	bool __ret;							\
>  									\
>  	if (ops->is_nft_ops) {						\
>  		__chain = ops->priv;					\
>  		__chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
> -		__net = nf_ct_net(ct);					\
>  		__ret = net_eq(__net, __chain_net);			\
> -	} else								\
> -		__ret = true;						\
> +	} else {							\
> +		struct xt_table_info *__priv = NULL;			\
> +		if (pf == NFPROTO_IPV4 &&				\
> +		    !IS_ERR_OR_NULL(__net->ipv4.nat_table))		\
> +			__priv = __net->ipv4.nat_table->private		\
							       ^^^^^

Add ";".
Just for code beauty. :)

> +		else if (pf == NFPROTO_IPV6 &&				\
> +			 !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat))	\
> +			__priv = __net->ipv6.ip6table_nat->private;	\
> +		if (__priv && __priv->number > __priv->initial_entries)	\
> +			__ret = true;					\
> +		else							\
> +			__ret = false;					\
> +	}								\
>  	__ret;								\
>  })
>  #endif
> diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> index 3a261e2..a202287 100644
> --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> @@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
>  		if (!nf_nat_initialized(ct, maniptype)) {
>  			unsigned int ret;
>
> -			/* Ignore nft chains with wrong netns. */
> -			if (!is_valid_netns(ops, ct))
> +			/* Ignore if nft/ipt NAT is not used in this netns */
> +			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
>  				return NF_ACCEPT;
>
>  			ret = do_chain(ops, skb, state, ct);
> diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> index fdedb60..99a6799 100644
> --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> @@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
>  		if (!nf_nat_initialized(ct, maniptype)) {
>  			unsigned int ret;
>
> -			/* Ignore nft chains with wrong netns. */
> -			if (!is_valid_netns(ops, ct))
> +			/* Ignore if nft/ipt NAT is not used in this netns */
> +			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
>  				return NF_ACCEPT;
>
>  			ret = do_chain(ops, skb, state, ct);
> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> index 4ad3d13..74dee8c 100644
> --- a/net/netfilter/core.c
> +++ b/net/netfilter/core.c
> @@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed);
>  #endif
>
>  static DEFINE_MUTEX(nf_hook_mutex);
> -
> +#include <net/netfilter/nf_tables.h>
> +#include <net/netfilter/nf_nat.h>
>  int nf_register_hook(struct nf_hook_ops *reg)
>  {
>  	struct nf_hook_ops *elem;
> @@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg)
>  	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
>  		if (reg->priority < elem->priority)
>  			break;
> +		else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
> +			const struct nft_chain *c = reg->priv;
> +			struct net *net = read_pnet(&nft_base_chain(c)->pnet);
> +
> +			/* fail if netns already have enabled nft/ipt nat */
> +			if (netns_nat_check(elem, reg->pf, net)) {
> +				mutex_unlock(&nf_hook_mutex);
> +				return -EBUSY;
> +			}
> +		}
>  	}
>  	list_add_rcu(&reg->list, elem->list.prev);
>  	mutex_unlock(&nf_hook_mutex);
>


More information about the Devel mailing list