[Devel] [PATCH RH7] nat: allow nft NAT and iptables NAT work on the same node
Konstantin Khorenko
khorenko at virtuozzo.com
Mon Dec 28 15:12:31 MSK 2020
On 12/28/2020 09:46 AM, Vasily Averin wrote:
> The netfilter NAT core cannot deal with more than one NAT hook
> per hook location (prerouting, input ...), because the NAT hooks install
> a NAT null binding in case the iptables nat table (iptable_nat hooks)
> or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
> transformation.
>
> Currently iptables NAT hook is called in all netns, even if according
> iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing.
> this block execution of corrsponding nft NAT hook, even if it is present.
> This is true in reverted direction: if nft NAT hook was called first it blocks
> execution of iptbles nat hook, because corresponding conntrack already have
> NAT null binding.
>
> This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
> and is in use in current net namespace.
>
> The patch does not allow nft to add new NAT chains if netns already have another one:
> either iptables nat or an another nft NAT chain with the same priority.
>
> Patch does not block the loading of the iptables NAT if nft NAT is already present,
> because it is quite rare case.
>
> In general this patch allows to work NAT both on host and inside iptables-containers
> and inside centos8 containers where nftables nat is only used without any additional
> configuration.
>
> https://jira.sw.ru/browse/PSBM-123345
> Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
Reviewed-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
> include/net/netfilter/nf_nat.h | 24 +++++++++++++++++-------
> net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 ++--
> net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++--
> net/netfilter/core.c | 13 ++++++++++++-
> 4 files changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
> index c9ca6ed..4308180 100644
> --- a/include/net/netfilter/nf_nat.h
> +++ b/include/net/netfilter/nf_nat.h
> @@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
> }
>
> /*
> - * Check if nft chain's netns fits conntrack netns.
> + * Check if netns have enabled NAT.
> * Uses ops->is_nft_ops flag to detect nft ops.
> - * If ops is not nft-related, the check is considered passed.
> + * If ops is not nft-related we need to be sure
> + * that according ipt nat table is not empty and can be in use.
> */
> -#define is_valid_netns(ops, ct) ({ \
> +#define netns_nat_check(ops, pf, __net) ({ \
> const struct nft_chain *__chain; \
> const struct net *__chain_net; \
> - const struct net *__net; \
> bool __ret; \
> \
> if (ops->is_nft_ops) { \
> __chain = ops->priv; \
> __chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
> - __net = nf_ct_net(ct); \
> __ret = net_eq(__net, __chain_net); \
> - } else \
> - __ret = true; \
> + } else { \
> + struct xt_table_info *__priv = NULL; \
> + if (pf == NFPROTO_IPV4 && \
> + !IS_ERR_OR_NULL(__net->ipv4.nat_table)) \
> + __priv = __net->ipv4.nat_table->private \
^^^^^
Add ";".
Just for code beauty. :)
> + else if (pf == NFPROTO_IPV6 && \
> + !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat)) \
> + __priv = __net->ipv6.ip6table_nat->private; \
> + if (__priv && __priv->number > __priv->initial_entries) \
> + __ret = true; \
> + else \
> + __ret = false; \
> + } \
> __ret; \
> })
> #endif
> diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> index 3a261e2..a202287 100644
> --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
> @@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
> if (!nf_nat_initialized(ct, maniptype)) {
> unsigned int ret;
>
> - /* Ignore nft chains with wrong netns. */
> - if (!is_valid_netns(ops, ct))
> + /* Ignore if nft/ipt NAT is not used in this netns */
> + if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
> return NF_ACCEPT;
>
> ret = do_chain(ops, skb, state, ct);
> diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> index fdedb60..99a6799 100644
> --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
> @@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
> if (!nf_nat_initialized(ct, maniptype)) {
> unsigned int ret;
>
> - /* Ignore nft chains with wrong netns. */
> - if (!is_valid_netns(ops, ct))
> + /* Ignore if nft/ipt NAT is not used in this netns */
> + if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
> return NF_ACCEPT;
>
> ret = do_chain(ops, skb, state, ct);
> diff --git a/net/netfilter/core.c b/net/netfilter/core.c
> index 4ad3d13..74dee8c 100644
> --- a/net/netfilter/core.c
> +++ b/net/netfilter/core.c
> @@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed);
> #endif
>
> static DEFINE_MUTEX(nf_hook_mutex);
> -
> +#include <net/netfilter/nf_tables.h>
> +#include <net/netfilter/nf_nat.h>
> int nf_register_hook(struct nf_hook_ops *reg)
> {
> struct nf_hook_ops *elem;
> @@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg)
> list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
> if (reg->priority < elem->priority)
> break;
> + else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
> + const struct nft_chain *c = reg->priv;
> + struct net *net = read_pnet(&nft_base_chain(c)->pnet);
> +
> + /* fail if netns already have enabled nft/ipt nat */
> + if (netns_nat_check(elem, reg->pf, net)) {
> + mutex_unlock(&nf_hook_mutex);
> + return -EBUSY;
> + }
> + }
> }
> list_add_rcu(®->list, elem->list.prev);
> mutex_unlock(&nf_hook_mutex);
>
More information about the Devel
mailing list