[Devel] [PATCH RH7] nat: allow nft NAT and iptables NAT work on the same node
Vasily Averin
vvs at virtuozzo.com
Mon Dec 28 09:46:28 MSK 2020
The netfilter NAT core cannot deal with more than one NAT hook
per hook location (prerouting, input ...), because the NAT hooks install
a NAT null binding in case the iptables nat table (iptable_nat hooks)
or the corresponding nftables chain (nft nat hooks) doesn't specify a nat
transformation.
Currently iptables NAT hook is called in all netns, even if according
iptables NAT tables (vpv4 and ipv6 have separate tables) are empty and does nothing.
this block execution of corrsponding nft NAT hook, even if it is present.
This is true in reverted direction: if nft NAT hook was called first it blocks
execution of iptbles nat hook, because corresponding conntrack already have
NAT null binding.
This patch allows nft nat hook to be sure if coresponding NAT kind is enabled
and is in use in current net namespace.
The patch does not allow nft to add new NAT chains if netns already have another one:
either iptables nat or an another nft NAT chain with the same priority.
Patch does not block the loading of the iptables NAT if nft NAT is already present,
because it is quite rare case.
In general this patch allows to work NAT both on host and inside iptables-containers
and inside centos8 containers where nftables nat is only used without any additional
configuration.
https://jira.sw.ru/browse/PSBM-123345
Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
---
include/net/netfilter/nf_nat.h | 24 +++++++++++++++++-------
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 4 ++--
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 4 ++--
net/netfilter/core.c | 13 ++++++++++++-
4 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c9ca6ed..4308180 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -79,23 +79,33 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
}
/*
- * Check if nft chain's netns fits conntrack netns.
+ * Check if netns have enabled NAT.
* Uses ops->is_nft_ops flag to detect nft ops.
- * If ops is not nft-related, the check is considered passed.
+ * If ops is not nft-related we need to be sure
+ * that according ipt nat table is not empty and can be in use.
*/
-#define is_valid_netns(ops, ct) ({ \
+#define netns_nat_check(ops, pf, __net) ({ \
const struct nft_chain *__chain; \
const struct net *__chain_net; \
- const struct net *__net; \
bool __ret; \
\
if (ops->is_nft_ops) { \
__chain = ops->priv; \
__chain_net = read_pnet(&nft_base_chain(__chain)->pnet);\
- __net = nf_ct_net(ct); \
__ret = net_eq(__net, __chain_net); \
- } else \
- __ret = true; \
+ } else { \
+ struct xt_table_info *__priv = NULL; \
+ if (pf == NFPROTO_IPV4 && \
+ !IS_ERR_OR_NULL(__net->ipv4.nat_table)) \
+ __priv = __net->ipv4.nat_table->private \
+ else if (pf == NFPROTO_IPV6 && \
+ !IS_ERR_OR_NULL(__net->ipv6.ip6table_nat)) \
+ __priv = __net->ipv6.ip6table_nat->private; \
+ if (__priv && __priv->number > __priv->initial_entries) \
+ __ret = true; \
+ else \
+ __ret = false; \
+ } \
__ret; \
})
#endif
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 3a261e2..a202287 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -292,8 +292,8 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- /* Ignore nft chains with wrong netns. */
- if (!is_valid_netns(ops, ct))
+ /* Ignore if nft/ipt NAT is not used in this netns */
+ if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
return NF_ACCEPT;
ret = do_chain(ops, skb, state, ct);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index fdedb60..99a6799 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -305,8 +305,8 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- /* Ignore nft chains with wrong netns. */
- if (!is_valid_netns(ops, ct))
+ /* Ignore if nft/ipt NAT is not used in this netns */
+ if (!netns_nat_check(ops, ops->pf, nf_ct_net(ct)))
return NF_ACCEPT;
ret = do_chain(ops, skb, state, ct);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 4ad3d13..74dee8c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,7 +64,8 @@ EXPORT_SYMBOL(nf_hooks_needed);
#endif
static DEFINE_MUTEX(nf_hook_mutex);
-
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
@@ -73,6 +74,16 @@ int nf_register_hook(struct nf_hook_ops *reg)
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority)
break;
+ else if ((reg->priority == elem->priority) && reg->is_nft_ops) {
+ const struct nft_chain *c = reg->priv;
+ struct net *net = read_pnet(&nft_base_chain(c)->pnet);
+
+ /* fail if netns already have enabled nft/ipt nat */
+ if (netns_nat_check(elem, reg->pf, net)) {
+ mutex_unlock(&nf_hook_mutex);
+ return -EBUSY;
+ }
+ }
}
list_add_rcu(®->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
--
1.8.3.1
More information about the Devel
mailing list