[Devel] [PATCH RH7 01/10] netfilter: nf_tables: add fib expression

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Feb 4 11:51:53 MSK 2021


From: Florian Westphal <fw at strlen.de>

Add FIB expression, supported for ipv4, ipv6 and inet family (the latter
just dispatches to ipv4 or ipv6 one based on nfproto).

Currently supports fetching output interface index/name and the
rtm_type associated with an address.

This can be used for adding path filtering. rtm_type is useful
to e.g. enforce a strong-end host model where packets
are only accepted if daddr is configured on the interface the
packet arrived on.

The fib expression is a native nftables alternative to the
xtables addrtype and rp_filter matches.

FIB result order for oif/oifname retrieval is as follows:
 - if packet is local (skb has rtable, RTF_LOCAL set, this
   will also catch looped-back multicast packets), set oif to
   the loopback interface.
 - if fib lookup returns an error, or result points to local,
   store zero result.  This means '--local' option of -m rpfilter
   is not supported. It is possible to use 'fib type local' or add
   explicit saddr/daddr matching rules to create exceptions if this
   is really needed.
 - store result in the destination register.
   In case of multiple routes, search set for desired oif in case
   strict matching is requested.

ipv4 and ipv6 behave fib expressions are supposed to behave the same.

[ I have collapsed Arnd Bergmann's ("netfilter: nf_tables: fib warnings")

	http://patchwork.ozlabs.org/patch/688615/

  to address fallout from this patch after rebasing nf-next, that was
  posted to address compilation warnings. --pablo ]

Signed-off-by: Florian Westphal <fw at strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>

Rebase changes:
1) replace s/pkt->\(in\|out\|net\|hook\|pf\)/nft_\1(pkt)/
2) remove non-existant FIB_LOOKUP_IGNORE_LINKSTATE

https://jira.sw.ru/browse/PSBM-125002
(cherry picked from commit f6d0cbcf09c506b9b022df8f9d7693a7cec3c732)
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 include/net/netfilter/nft_fib.h          |  31 +++
 include/uapi/linux/netfilter/nf_tables.h |  36 +++
 net/ipv4/netfilter/Kconfig               |   8 +
 net/ipv4/netfilter/Makefile              |   1 +
 net/ipv4/netfilter/nft_fib_ipv4.c        | 238 ++++++++++++++++++++
 net/ipv6/netfilter/Kconfig               |   8 +
 net/ipv6/netfilter/Makefile              |   1 +
 net/ipv6/netfilter/nft_fib_ipv6.c        | 275 +++++++++++++++++++++++
 net/netfilter/Kconfig                    |  13 ++
 net/netfilter/Makefile                   |   2 +
 net/netfilter/nft_fib.c                  | 159 +++++++++++++
 net/netfilter/nft_fib_inet.c             |  82 +++++++
 12 files changed, 854 insertions(+)
 create mode 100644 include/net/netfilter/nft_fib.h
 create mode 100644 net/ipv4/netfilter/nft_fib_ipv4.c
 create mode 100644 net/ipv6/netfilter/nft_fib_ipv6.c
 create mode 100644 net/netfilter/nft_fib.c
 create mode 100644 net/netfilter/nft_fib_inet.c

diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
new file mode 100644
index 0000000000000..cbedda077db2c
--- /dev/null
+++ b/include/net/netfilter/nft_fib.h
@@ -0,0 +1,31 @@
+#ifndef _NFT_FIB_H_
+#define _NFT_FIB_H_
+
+struct nft_fib {
+	enum nft_registers	dreg:8;
+	u8			result;
+	u32			flags;
+};
+
+extern const struct nla_policy nft_fib_policy[];
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		 const struct nlattr * const tb[]);
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		     const struct nft_data **data);
+
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+			const struct nft_pktinfo *pkt);
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		   const struct nft_pktinfo *pkt);
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+			const struct nft_pktinfo *pkt);
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		   const struct nft_pktinfo *pkt);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+			  const struct nft_pktinfo *pkt, int index);
+#endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 1c7a759e134c7..d4dc80a47b878 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1043,6 +1043,42 @@ enum nft_gen_attributes {
 };
 #define NFTA_GEN_MAX		(__NFTA_GEN_MAX - 1)
 
+/*
+ * enum nft_fib_attributes - nf_tables fib expression netlink attributes
+ *
+ * @NFTA_FIB_DREG: destination register (NLA_U32)
+ * @NFTA_FIB_RESULT: desired result (NLA_U32)
+ * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32)
+ *
+ * The FIB expression performs a route lookup according
+ * to the packet data.
+ */
+enum nft_fib_attributes {
+	NFTA_FIB_UNSPEC,
+	NFTA_FIB_DREG,
+	NFTA_FIB_RESULT,
+	NFTA_FIB_FLAGS,
+	__NFTA_FIB_MAX
+};
+#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1)
+
+enum nft_fib_result {
+	NFT_FIB_RESULT_UNSPEC,
+	NFT_FIB_RESULT_OIF,
+	NFT_FIB_RESULT_OIFNAME,
+	NFT_FIB_RESULT_ADDRTYPE,
+	__NFT_FIB_RESULT_MAX
+};
+#define NFT_FIB_RESULT_MAX	(__NFT_FIB_RESULT_MAX - 1)
+
+enum nft_fib_flags {
+	NFTA_FIB_F_SADDR	= 1 << 0,	/* look up src */
+	NFTA_FIB_F_DADDR	= 1 << 1,	/* look up dst */
+	NFTA_FIB_F_MARK		= 1 << 2,	/* use skb->mark */
+	NFTA_FIB_F_IIF		= 1 << 3,	/* restrict to iif */
+	NFTA_FIB_F_OIF		= 1 << 4,	/* restrict to oif */
+};
+
 /**
  * enum nft_trace_attributes - nf_tables trace netlink attributes
  *
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0d672e80facd4..e4350958a9ee1 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -64,6 +64,14 @@ config NFT_DUP_IPV4
 	help
 	  This module enables IPv4 packet duplication support for nf_tables.
 
+config NFT_FIB_IPV4
+	select NFT_FIB
+	tristate "nf_tables fib / ip route lookup support"
+	help
+	  This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
+	  It also allows query of the FIB for the route type, e.g. local, unicast,
+	  multicast or blackhole.
+
 endif # NF_TABLES_IPV4
 
 config NF_TABLES_ARP
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1a5c76d736855..9d415d3fadc35 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
 obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
 obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
new file mode 100644
index 0000000000000..d4fca6015997f
--- /dev/null
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -0,0 +1,238 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 get_saddr(__be32 addr)
+{
+	if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+	    ipv4_is_zeronet(addr))
+		return 0;
+	return addr;
+}
+
+static bool fib4_is_local(const struct sk_buff *skb)
+{
+	const struct rtable *rt = skb_rtable(skb);
+
+	return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
+#define DSCP_BITS     0xfc
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	u32 *dst = &regs->data[priv->dreg];
+	const struct net_device *dev = NULL;
+	const struct iphdr *iph;
+	__be32 addr;
+
+	if (priv->flags & NFTA_FIB_F_IIF)
+		dev = nft_in(pkt);
+	else if (priv->flags & NFTA_FIB_F_OIF)
+		dev = nft_out(pkt);
+
+	iph = ip_hdr(pkt->skb);
+	if (priv->flags & NFTA_FIB_F_DADDR)
+		addr = iph->daddr;
+	else
+		addr = iph->saddr;
+
+	*dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
+
+static int get_ifindex(const struct net_device *dev)
+{
+	return dev ? dev->ifindex : 0;
+}
+
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		   const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	const struct iphdr *iph;
+	struct fib_result res;
+	struct flowi4 fl4 = {
+		.flowi4_scope = RT_SCOPE_UNIVERSE,
+		.flowi4_iif = LOOPBACK_IFINDEX,
+	};
+	const struct net_device *oif;
+	struct net_device *found;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	int i;
+#endif
+
+	/*
+	 * Do not set flowi4_oif, it restricts results (for example, asking
+	 * for oif 3 will get RTN_UNICAST result even if the daddr exits
+	 * on another interface.
+	 *
+	 * Search results for the desired outinterface instead.
+	 */
+	if (priv->flags & NFTA_FIB_F_OIF)
+		oif = nft_out(pkt);
+	else if (priv->flags & NFTA_FIB_F_IIF)
+		oif = nft_in(pkt);
+	else
+		oif = NULL;
+
+	if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
+		nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+		return;
+	}
+
+	iph = ip_hdr(pkt->skb);
+	if (ipv4_is_multicast(iph->daddr) &&
+	    ipv4_is_zeronet(iph->saddr) &&
+	    ipv4_is_local_multicast(iph->daddr)) {
+		nft_fib_store_result(dest, priv->result, pkt,
+				     get_ifindex(pkt->skb->dev));
+		return;
+	}
+
+	if (priv->flags & NFTA_FIB_F_MARK)
+		fl4.flowi4_mark = pkt->skb->mark;
+
+	fl4.flowi4_tos = iph->tos & DSCP_BITS;
+
+	if (priv->flags & NFTA_FIB_F_DADDR) {
+		fl4.daddr = iph->daddr;
+		fl4.saddr = get_saddr(iph->saddr);
+	} else {
+		fl4.daddr = iph->saddr;
+		fl4.saddr = get_saddr(iph->daddr);
+	}
+
+	if (fib_lookup(nft_net(pkt), &fl4, &res))
+		return;
+
+	switch (res.type) {
+	case RTN_UNICAST:
+		break;
+	case RTN_LOCAL:	/* should not appear here, see fib4_is_local() above */
+		return;
+	default:
+		break;
+	}
+
+       if (!oif) {
+               found = FIB_RES_DEV(res);
+               goto ok;
+       }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	for (i = 0; i < res.fi->fib_nhs; i++) {
+		struct fib_nh *nh = &res.fi->fib_nh[i];
+
+		if (nh->nh_dev == oif) {
+			found = nh->nh_dev;
+			goto ok;
+		}
+	}
+	return;
+#else
+	found = FIB_RES_DEV(res);
+	if (found != oif)
+		return;
+#endif
+ok:
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+		*dest = found->ifindex;
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		strncpy((char *)dest, found->name, IFNAMSIZ);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval);
+
+static struct nft_expr_type nft_fib4_type;
+
+static const struct nft_expr_ops nft_fib4_type_ops = {
+	.type		= &nft_fib4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib4_eval_type,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib4_ops = {
+	.type		= &nft_fib4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib4_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib4_select_ops(const struct nft_ctx *ctx,
+		    const struct nlattr * const tb[])
+{
+	enum nft_fib_result result;
+
+	if (!tb[NFTA_FIB_RESULT])
+		return ERR_PTR(-EINVAL);
+
+	result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+	switch (result) {
+	case NFT_FIB_RESULT_OIF:
+		return &nft_fib4_ops;
+	case NFT_FIB_RESULT_OIFNAME:
+		return &nft_fib4_ops;
+	case NFT_FIB_RESULT_ADDRTYPE:
+		return &nft_fib4_type_ops;
+	default:
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+}
+
+static struct nft_expr_type nft_fib4_type __read_mostly = {
+	.name		= "fib",
+	.select_ops	= &nft_fib4_select_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.family		= NFPROTO_IPV4,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib4_module_init(void)
+{
+	return nft_register_expr(&nft_fib4_type);
+}
+
+static void __exit nft_fib4_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib4_type);
+}
+
+module_init(nft_fib4_module_init);
+module_exit(nft_fib4_module_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 57df69eebe563..3a9337761479a 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -53,6 +53,14 @@ config NFT_DUP_IPV6
 	help
 	  This module enables IPv6 packet duplication support for nf_tables.
 
+config NFT_FIB_IPV6
+	tristate "nf_tables fib / ipv6 route lookup support"
+	select NFT_FIB
+	help
+	  This module enables IPv6 FIB lookups, e.g. for reverse path filtering.
+	  It also allows query of the FIB for the route type, e.g. local, unicast,
+	  multicast or blackhole.
+
 endif # NF_TABLES_IPV6
 endif # NF_TABLES
 
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 9d38868f888ee..75e6d92ad6b00 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
 obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
 obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
+obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
new file mode 100644
index 0000000000000..d526bb594956b
--- /dev/null
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -0,0 +1,275 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+static bool fib6_is_local(const struct sk_buff *skb)
+{
+	const struct rt6_info *rt = (const void *)skb_dst(skb);
+
+	return rt && (rt->rt6i_flags & RTF_LOCAL);
+}
+
+static int get_ifindex(const struct net_device *dev)
+{
+	return dev ? dev->ifindex : 0;
+}
+
+static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+			       const struct nft_pktinfo *pkt,
+			       const struct net_device *dev)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
+	int lookup_flags = 0;
+
+	if (priv->flags & NFTA_FIB_F_DADDR) {
+		fl6->daddr = iph->daddr;
+		fl6->saddr = iph->saddr;
+	} else {
+		fl6->daddr = iph->saddr;
+		fl6->saddr = iph->daddr;
+	}
+
+	if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+		lookup_flags |= RT6_LOOKUP_F_IFACE;
+		fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+	}
+
+	if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+		lookup_flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+	if (priv->flags & NFTA_FIB_F_MARK)
+		fl6->flowi6_mark = pkt->skb->mark;
+
+	fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+
+	return lookup_flags;
+}
+
+static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
+				const struct nft_pktinfo *pkt)
+{
+	const struct net_device *dev = NULL;
+	const struct nf_ipv6_ops *v6ops;
+	const struct nf_afinfo *afinfo;
+	int route_err, addrtype;
+	struct rt6_info *rt;
+	struct flowi6 fl6 = {
+		.flowi6_iif = LOOPBACK_IFINDEX,
+		.flowi6_proto = pkt->tprot,
+	};
+	u32 ret = 0;
+
+	afinfo = nf_get_afinfo(NFPROTO_IPV6);
+	if (!afinfo)
+		return RTN_UNREACHABLE;
+
+	if (priv->flags & NFTA_FIB_F_IIF)
+		dev = nft_in(pkt);
+	else if (priv->flags & NFTA_FIB_F_OIF)
+		dev = nft_out(pkt);
+
+	nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+
+	v6ops = nf_get_ipv6_ops();
+	if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+		ret = RTN_LOCAL;
+
+	route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
+				  flowi6_to_flowi(&fl6), false);
+	if (route_err)
+		goto err;
+
+	if (rt->rt6i_flags & RTF_REJECT) {
+		route_err = rt->dst.error;
+		dst_release(&rt->dst);
+		goto err;
+	}
+
+	if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr))
+		ret = RTN_ANYCAST;
+	else if (!dev && rt->rt6i_flags & RTF_LOCAL)
+		ret = RTN_LOCAL;
+
+	dst_release(&rt->dst);
+
+	if (ret)
+		return ret;
+
+	addrtype = ipv6_addr_type(&fl6.daddr);
+
+	if (addrtype & IPV6_ADDR_MULTICAST)
+		return RTN_MULTICAST;
+	if (addrtype & IPV6_ADDR_UNICAST)
+		return RTN_UNICAST;
+
+	return RTN_UNSPEC;
+ err:
+	switch (route_err) {
+	case -EINVAL:
+		return RTN_BLACKHOLE;
+	case -EACCES:
+		return RTN_PROHIBIT;
+	case -EAGAIN:
+		return RTN_THROW;
+	default:
+		break;
+	}
+
+	return RTN_UNREACHABLE;
+}
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+
+	*dest = __nft_fib6_eval_type(priv, pkt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+		   const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	const struct net_device *oif = NULL;
+	u32 *dest = &regs->data[priv->dreg];
+	struct flowi6 fl6 = {
+		.flowi6_iif = LOOPBACK_IFINDEX,
+		.flowi6_proto = pkt->tprot,
+	};
+	struct rt6_info *rt;
+	int lookup_flags;
+
+	if (priv->flags & NFTA_FIB_F_IIF)
+		oif = nft_in(pkt);
+	else if (priv->flags & NFTA_FIB_F_OIF)
+		oif = nft_out(pkt);
+
+	lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+
+	if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
+		nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+		return;
+	}
+
+	*dest = 0;
+ again:
+	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+	if (rt->dst.error)
+		goto put_rt_err;
+
+	/* Should not see RTF_LOCAL here */
+	if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
+		goto put_rt_err;
+
+	if (oif && oif != rt->rt6i_idev->dev) {
+		/* multipath route? Try again with F_IFACE */
+		if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
+			lookup_flags |= RT6_LOOKUP_F_IFACE;
+			fl6.flowi6_oif = oif->ifindex;
+			ip6_rt_put(rt);
+			goto again;
+		}
+	}
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+		*dest = rt->rt6i_idev->dev->ifindex;
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+ put_rt_err:
+	ip6_rt_put(rt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval);
+
+static struct nft_expr_type nft_fib6_type;
+
+static const struct nft_expr_ops nft_fib6_type_ops = {
+	.type		= &nft_fib6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib6_eval_type,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib6_ops = {
+	.type		= &nft_fib6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib6_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib6_select_ops(const struct nft_ctx *ctx,
+		    const struct nlattr * const tb[])
+{
+	enum nft_fib_result result;
+
+	if (!tb[NFTA_FIB_RESULT])
+		return ERR_PTR(-EINVAL);
+
+	result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+	switch (result) {
+	case NFT_FIB_RESULT_OIF:
+		return &nft_fib6_ops;
+	case NFT_FIB_RESULT_OIFNAME:
+		return &nft_fib6_ops;
+	case NFT_FIB_RESULT_ADDRTYPE:
+		return &nft_fib6_type_ops;
+	default:
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+}
+
+static struct nft_expr_type nft_fib6_type __read_mostly = {
+	.name		= "fib",
+	.select_ops	= &nft_fib6_select_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.family		= NFPROTO_IPV6,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib6_module_init(void)
+{
+	return nft_register_expr(&nft_fib6_type);
+}
+
+static void __exit nft_fib6_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib6_type);
+}
+module_init(nft_fib6_module_init);
+module_exit(nft_fib6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0b02434fd1fc6..c8e7c5863ae08 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -550,6 +550,19 @@ config NFT_COMPAT
 	  x_tables match/target extensions over the nf_tables
 	  framework.
 
+config NFT_FIB
+	tristate
+
+config NFT_FIB_INET
+	depends on NF_TABLES_INET
+	depends on NFT_FIB_IPV4
+	depends on NFT_FIB_IPV6
+	tristate "Netfilter nf_tables fib inet support"
+	help
+	  This option allows using the FIB expression from the inet table.
+	  The lookup will be delegated to the IPv4 or IPv6 FIB depending
+	  on the protocol of the packet.
+
 endif # NF_TABLES
 
 config NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5a7c589c40ff8..4d8846e79956c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -88,6 +88,8 @@ obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
 obj-$(CONFIG_NFT_LOG)		+= nft_log.o
 obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
 obj-$(CONFIG_NFT_REDIR)		+= nft_redir.o
+obj-$(CONFIG_NFT_FIB)		+= nft_fib.o
+obj-$(CONFIG_NFT_FIB_INET)	+= nft_fib_inet.o
 
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
new file mode 100644
index 0000000000000..249c9b80c1507
--- /dev/null
+++ b/net/netfilter/nft_fib.c
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
+	[NFTA_FIB_DREG]		= { .type = NLA_U32 },
+	[NFTA_FIB_RESULT]	= { .type = NLA_U32 },
+	[NFTA_FIB_FLAGS]	= { .type = NLA_U32 },
+};
+EXPORT_SYMBOL(nft_fib_policy);
+
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+			NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		     const struct nft_data **data)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF: /* fallthrough */
+	case NFT_FIB_RESULT_OIFNAME:
+		hooks = (1 << NF_INET_PRE_ROUTING);
+		break;
+	case NFT_FIB_RESULT_ADDRTYPE:
+		if (priv->flags & NFTA_FIB_F_IIF)
+			hooks = (1 << NF_INET_PRE_ROUTING) |
+				(1 << NF_INET_LOCAL_IN) |
+				(1 << NF_INET_FORWARD);
+		else if (priv->flags & NFTA_FIB_F_OIF)
+			hooks = (1 << NF_INET_LOCAL_OUT) |
+				(1 << NF_INET_POST_ROUTING) |
+				(1 << NF_INET_FORWARD);
+		else
+			hooks = (1 << NF_INET_LOCAL_IN) |
+				(1 << NF_INET_LOCAL_OUT) |
+				(1 << NF_INET_FORWARD) |
+				(1 << NF_INET_PRE_ROUTING) |
+				(1 << NF_INET_POST_ROUTING);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+EXPORT_SYMBOL_GPL(nft_fib_validate);
+
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		 const struct nlattr * const tb[])
+{
+	struct nft_fib *priv = nft_expr_priv(expr);
+	unsigned int len;
+	int err;
+
+	if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
+		return -EINVAL;
+
+	priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
+
+	if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+		return -EINVAL;
+
+	if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
+			   (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
+		return -EINVAL;
+	if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
+			   (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
+		return -EINVAL;
+	if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
+		return -EINVAL;
+
+	priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+	priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
+
+	switch (priv->result) {
+	case NFT_FIB_RESULT_OIF:
+		if (priv->flags & NFTA_FIB_F_OIF)
+			return -EINVAL;
+		len = sizeof(int);
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		if (priv->flags & NFTA_FIB_F_OIF)
+			return -EINVAL;
+		len = IFNAMSIZ;
+		break;
+	case NFT_FIB_RESULT_ADDRTYPE:
+		len = sizeof(u32);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	err = nft_validate_register_store(ctx, priv->dreg, NULL,
+					  NFT_DATA_VALUE, len);
+	if (err < 0)
+		return err;
+
+	return nft_fib_validate(ctx, expr, NULL);
+}
+EXPORT_SYMBOL_GPL(nft_fib_init);
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
+		return -1;
+
+	if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
+		return -1;
+
+	if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fib_dump);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+			  const struct nft_pktinfo *pkt, int index)
+{
+	struct net_device *dev;
+	u32 *dreg = reg;
+
+	switch (r) {
+	case NFT_FIB_RESULT_OIF:
+		*dreg = index;
+		break;
+	case NFT_FIB_RESULT_OIFNAME:
+		dev = dev_get_by_index_rcu(nft_net(pkt), index);
+		strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		*dreg = 0;
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(nft_fib_store_result);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
new file mode 100644
index 0000000000000..9120fc7228f4e
--- /dev/null
+++ b/net/netfilter/nft_fib_inet.c
@@ -0,0 +1,82 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_inet_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib4_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib4_eval_type(expr, regs, pkt);
+		}
+		break;
+	case NFPROTO_IPV6:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib6_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib6_eval_type(expr, regs, pkt);
+		}
+		break;
+	}
+
+	regs->verdict.code = NF_DROP;
+}
+
+static struct nft_expr_type nft_fib_inet_type;
+static const struct nft_expr_ops nft_fib_inet_ops = {
+	.type		= &nft_fib_inet_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib_inet_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_inet_type __read_mostly = {
+	.family		= NFPROTO_INET,
+	.name		= "fib",
+	.ops		= &nft_fib_inet_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib_inet_module_init(void)
+{
+	return nft_register_expr(&nft_fib_inet_type);
+}
+
+static void __exit nft_fib_inet_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib_inet_type);
+}
+
+module_init(nft_fib_inet_module_init);
+module_exit(nft_fib_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
-- 
2.26.2



More information about the Devel mailing list