[Devel] [PATCH RHEL7 COMMIT] nat: allow nft NAT and iptables NAT work on the same node

Vasily Averin vvs at virtuozzo.com
Mon Dec 28 09:52:43 MSK 2020


The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.5
------>
commit bd3f66019d759e8d366bee23679092c7cd30f5ae
Author: Vasily Averin <vvs at virtuozzo.com>
Date:   Mon Dec 28 09:52:43 2020 +0300

    nat: allow nft NAT and iptables NAT work on the same node
    
    The netfilter NAT core cannot deal with more than one NAT hook
    per hook location (prerouting, input ...), because the NAT hooks install
    a NAT null binding in case the iptables nat table (iptable_nat hooks)
    or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
    transformation.
    
    Currently iptables NAT hook is called in all netns, even if according
    iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing.
    this block execution of corrsponding nft NAT hook, even if it is present.
    This is true in reverted direction: if nft NAT hook was called first it blocks
    execution of iptbles nat hook, because corresponding conntrack already have
    NAT null binding.
    
    This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
    and is in use in current net namespace.
    
    The patch does not allow nft to add new NAT chains if netns already have another one:
    either iptables nat or an another nft NAT chain with the same priority.
    
    Patch does not block the loading of the iptables NAT if nft NAT is already present,
    because it is quite rare case.
    
    In general this patch allows to work NAT both on host and inside iptables-containers
    and inside centos8 containers where nftables nat is only used without any additional
    configuration.
    
    https://jira.sw.ru/browse/PSBM-123345
    Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
---
 include/net/netfilter/nf_nat.h           | 24 +++++++++++++++++-------
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c |  4 ++--
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |  4 ++--
 net/netfilter/core.c                     | 13 ++++++++++++-
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c9ca6ed..4308180 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 }
 
 /*
- * Check if nft chain's netns fits conntrack netns.
+ * Check if netns have enabled NAT.
  * Uses ops->is_nft_ops flag to detect nft ops.
- * If ops is not nft-related, the check is considered passed.
+ * If ops is not nft-related we need to be sure
+ * that according ipt nat table is not empty and can be in use.
  */
-#define is_valid_netns(ops, ct) ({					\
+#define netns_nat_check(ops, pf, __net) ({				\
 	const struct nft_chain *__chain;				\
 	const struct net *__chain_net;					\
-	const struct net *__net;					\
 	bool __ret;							\
 									\
 	if (ops->is_nft_ops) {						\
 		__chain = ops->priv;					\
 		__chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
-		__net = nf_ct_net(ct);					\
 		__ret = net_eq(__net, __chain_net);			\
-	} else								\
-		__ret = true;						\
+	} else {							\
+		struct xt_table_info *__priv = NULL;			\
+		if (pf == NFPROTO_IPV4 &&				\
+		    !IS_ERR_OR_NULL(__net->ipv4.nat_table))		\
+			__priv = __net->ipv4.nat_table->private		\
+		else if (pf == NFPROTO_IPV6 &&				\
+			 !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat))	\
+			__priv = __net->ipv6.ip6table_nat->private;	\
+		if (__priv && __priv->number > __priv->initial_entries)	\
+			__ret = true;					\
+		else							\
+			__ret = false;					\
+	}								\
 	__ret;								\
 })
 #endif
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 3a261e2..a202287 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			/* Ignore nft chains with wrong netns. */
-			if (!is_valid_netns(ops, ct))
+			/* Ignore if nft/ipt NAT is not used in this netns */
+			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
 				return NF_ACCEPT;
 
 			ret = do_chain(ops, skb, state, ct);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index fdedb60..99a6799 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			/* Ignore nft chains with wrong netns. */
-			if (!is_valid_netns(ops, ct))
+			/* Ignore if nft/ipt NAT is not used in this netns */
+			if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
 				return NF_ACCEPT;
 
 			ret = do_chain(ops, skb, state, ct);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4ad3d13..74dee8c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
-
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
 int nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct nf_hook_ops *elem;
@@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg)
 	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
 		if (reg->priority < elem->priority)
 			break;
+		else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
+			const struct nft_chain *c = reg->priv;
+			struct net *net = read_pnet(&nft_base_chain(c)->pnet);
+
+			/* fail if netns already have enabled nft/ipt nat */
+			if (netns_nat_check(elem, reg->pf, net)) {
+				mutex_unlock(&nf_hook_mutex);
+				return -EBUSY;
+			}
+		}
 	}
 	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);


More information about the Devel mailing list