[Devel] [PATCH RH7 01/10] netfilter: nf_tables: add fib expression
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Thu Feb 4 11:52:58 MSK 2021
Sorry for this, please drop.
On 2/4/21 11:51 AM, Pavel Tikhomirov wrote:
> From: Florian Westphal <fw at strlen.de>
>
> Add FIB expression, supported for ipv4, ipv6 and inet family (the latter
> just dispatches to ipv4 or ipv6 one based on nfproto).
>
> Currently supports fetching output interface index/name and the
> rtm_type associated with an address.
>
> This can be used for adding path filtering. rtm_type is useful
> to e.g. enforce a strong-end host model where packets
> are only accepted if daddr is configured on the interface the
> packet arrived on.
>
> The fib expression is a native nftables alternative to the
> xtables addrtype and rp_filter matches.
>
> FIB result order for oif/oifname retrieval is as follows:
> - if packet is local (skb has rtable, RTF_LOCAL set, this
> will also catch looped-back multicast packets), set oif to
> the loopback interface.
> - if fib lookup returns an error, or result points to local,
> store zero result. This means '--local' option of -m rpfilter
> is not supported. It is possible to use 'fib type local' or add
> explicit saddr/daddr matching rules to create exceptions if this
> is really needed.
> - store result in the destination register.
> In case of multiple routes, search set for desired oif in case
> strict matching is requested.
>
> ipv4 and ipv6 behave fib expressions are supposed to behave the same.
>
> [ I have collapsed Arnd Bergmann's ("netfilter: nf_tables: fib warnings")
>
> http://patchwork.ozlabs.org/patch/688615/
>
> to address fallout from this patch after rebasing nf-next, that was
> posted to address compilation warnings. --pablo ]
>
> Signed-off-by: Florian Westphal <fw at strlen.de>
> Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>
>
> Rebase changes:
> 1) replace s/pkt->\(in\|out\|net\|hook\|pf\)/nft_\1(pkt)/
> 2) remove non-existant FIB_LOOKUP_IGNORE_LINKSTATE
>
> https://jira.sw.ru/browse/PSBM-125002
> (cherry picked from commit f6d0cbcf09c506b9b022df8f9d7693a7cec3c732)
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
> include/net/netfilter/nft_fib.h | 31 +++
> include/uapi/linux/netfilter/nf_tables.h | 36 +++
> net/ipv4/netfilter/Kconfig | 8 +
> net/ipv4/netfilter/Makefile | 1 +
> net/ipv4/netfilter/nft_fib_ipv4.c | 238 ++++++++++++++++++++
> net/ipv6/netfilter/Kconfig | 8 +
> net/ipv6/netfilter/Makefile | 1 +
> net/ipv6/netfilter/nft_fib_ipv6.c | 275 +++++++++++++++++++++++
> net/netfilter/Kconfig | 13 ++
> net/netfilter/Makefile | 2 +
> net/netfilter/nft_fib.c | 159 +++++++++++++
> net/netfilter/nft_fib_inet.c | 82 +++++++
> 12 files changed, 854 insertions(+)
> create mode 100644 include/net/netfilter/nft_fib.h
> create mode 100644 net/ipv4/netfilter/nft_fib_ipv4.c
> create mode 100644 net/ipv6/netfilter/nft_fib_ipv6.c
> create mode 100644 net/netfilter/nft_fib.c
> create mode 100644 net/netfilter/nft_fib_inet.c
>
> diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
> new file mode 100644
> index 0000000000000..cbedda077db2c
> --- /dev/null
> +++ b/include/net/netfilter/nft_fib.h
> @@ -0,0 +1,31 @@
> +#ifndef _NFT_FIB_H_
> +#define _NFT_FIB_H_
> +
> +struct nft_fib {
> + enum nft_registers dreg:8;
> + u8 result;
> + u32 flags;
> +};
> +
> +extern const struct nla_policy nft_fib_policy[];
> +
> +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
> +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
> + const struct nlattr * const tb[]);
> +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
> + const struct nft_data **data);
> +
> +
> +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt);
> +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt);
> +
> +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt);
> +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt);
> +
> +void nft_fib_store_result(void *reg, enum nft_fib_result r,
> + const struct nft_pktinfo *pkt, int index);
> +#endif
> diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
> index 1c7a759e134c7..d4dc80a47b878 100644
> --- a/include/uapi/linux/netfilter/nf_tables.h
> +++ b/include/uapi/linux/netfilter/nf_tables.h
> @@ -1043,6 +1043,42 @@ enum nft_gen_attributes {
> };
> #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
>
> +/*
> + * enum nft_fib_attributes - nf_tables fib expression netlink attributes
> + *
> + * @NFTA_FIB_DREG: destination register (NLA_U32)
> + * @NFTA_FIB_RESULT: desired result (NLA_U32)
> + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32)
> + *
> + * The FIB expression performs a route lookup according
> + * to the packet data.
> + */
> +enum nft_fib_attributes {
> + NFTA_FIB_UNSPEC,
> + NFTA_FIB_DREG,
> + NFTA_FIB_RESULT,
> + NFTA_FIB_FLAGS,
> + __NFTA_FIB_MAX
> +};
> +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1)
> +
> +enum nft_fib_result {
> + NFT_FIB_RESULT_UNSPEC,
> + NFT_FIB_RESULT_OIF,
> + NFT_FIB_RESULT_OIFNAME,
> + NFT_FIB_RESULT_ADDRTYPE,
> + __NFT_FIB_RESULT_MAX
> +};
> +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1)
> +
> +enum nft_fib_flags {
> + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */
> + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */
> + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */
> + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */
> + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */
> +};
> +
> /**
> * enum nft_trace_attributes - nf_tables trace netlink attributes
> *
> diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
> index 0d672e80facd4..e4350958a9ee1 100644
> --- a/net/ipv4/netfilter/Kconfig
> +++ b/net/ipv4/netfilter/Kconfig
> @@ -64,6 +64,14 @@ config NFT_DUP_IPV4
> help
> This module enables IPv4 packet duplication support for nf_tables.
>
> +config NFT_FIB_IPV4
> + select NFT_FIB
> + tristate "nf_tables fib / ip route lookup support"
> + help
> + This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
> + It also allows query of the FIB for the route type, e.g. local, unicast,
> + multicast or blackhole.
> +
> endif # NF_TABLES_IPV4
>
> config NF_TABLES_ARP
> diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
> index 1a5c76d736855..9d415d3fadc35 100644
> --- a/net/ipv4/netfilter/Makefile
> +++ b/net/ipv4/netfilter/Makefile
> @@ -38,6 +38,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
> obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
> obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
> obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
> +obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
> obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
> obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
> obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
> diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
> new file mode 100644
> index 0000000000000..d4fca6015997f
> --- /dev/null
> +++ b/net/ipv4/netfilter/nft_fib_ipv4.c
> @@ -0,0 +1,238 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/netlink.h>
> +#include <linux/netfilter.h>
> +#include <linux/netfilter/nf_tables.h>
> +#include <net/netfilter/nf_tables_core.h>
> +#include <net/netfilter/nf_tables.h>
> +#include <net/netfilter/nft_fib.h>
> +
> +#include <net/ip_fib.h>
> +#include <net/route.h>
> +
> +/* don't try to find route from mcast/bcast/zeronet */
> +static __be32 get_saddr(__be32 addr)
> +{
> + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
> + ipv4_is_zeronet(addr))
> + return 0;
> + return addr;
> +}
> +
> +static bool fib4_is_local(const struct sk_buff *skb)
> +{
> + const struct rtable *rt = skb_rtable(skb);
> +
> + return rt && (rt->rt_flags & RTCF_LOCAL);
> +}
> +
> +#define DSCP_BITS 0xfc
> +
> +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> + u32 *dst = ®s->data[priv->dreg];
> + const struct net_device *dev = NULL;
> + const struct iphdr *iph;
> + __be32 addr;
> +
> + if (priv->flags & NFTA_FIB_F_IIF)
> + dev = nft_in(pkt);
> + else if (priv->flags & NFTA_FIB_F_OIF)
> + dev = nft_out(pkt);
> +
> + iph = ip_hdr(pkt->skb);
> + if (priv->flags & NFTA_FIB_F_DADDR)
> + addr = iph->daddr;
> + else
> + addr = iph->saddr;
> +
> + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
> +}
> +EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
> +
> +static int get_ifindex(const struct net_device *dev)
> +{
> + return dev ? dev->ifindex : 0;
> +}
> +
> +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> + u32 *dest = ®s->data[priv->dreg];
> + const struct iphdr *iph;
> + struct fib_result res;
> + struct flowi4 fl4 = {
> + .flowi4_scope = RT_SCOPE_UNIVERSE,
> + .flowi4_iif = LOOPBACK_IFINDEX,
> + };
> + const struct net_device *oif;
> + struct net_device *found;
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> + int i;
> +#endif
> +
> + /*
> + * Do not set flowi4_oif, it restricts results (for example, asking
> + * for oif 3 will get RTN_UNICAST result even if the daddr exits
> + * on another interface.
> + *
> + * Search results for the desired outinterface instead.
> + */
> + if (priv->flags & NFTA_FIB_F_OIF)
> + oif = nft_out(pkt);
> + else if (priv->flags & NFTA_FIB_F_IIF)
> + oif = nft_in(pkt);
> + else
> + oif = NULL;
> +
> + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
> + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
> + return;
> + }
> +
> + iph = ip_hdr(pkt->skb);
> + if (ipv4_is_multicast(iph->daddr) &&
> + ipv4_is_zeronet(iph->saddr) &&
> + ipv4_is_local_multicast(iph->daddr)) {
> + nft_fib_store_result(dest, priv->result, pkt,
> + get_ifindex(pkt->skb->dev));
> + return;
> + }
> +
> + if (priv->flags & NFTA_FIB_F_MARK)
> + fl4.flowi4_mark = pkt->skb->mark;
> +
> + fl4.flowi4_tos = iph->tos & DSCP_BITS;
> +
> + if (priv->flags & NFTA_FIB_F_DADDR) {
> + fl4.daddr = iph->daddr;
> + fl4.saddr = get_saddr(iph->saddr);
> + } else {
> + fl4.daddr = iph->saddr;
> + fl4.saddr = get_saddr(iph->daddr);
> + }
> +
> + if (fib_lookup(nft_net(pkt), &fl4, &res))
> + return;
> +
> + switch (res.type) {
> + case RTN_UNICAST:
> + break;
> + case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
> + return;
> + default:
> + break;
> + }
> +
> + if (!oif) {
> + found = FIB_RES_DEV(res);
> + goto ok;
> + }
> +
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> + for (i = 0; i < res.fi->fib_nhs; i++) {
> + struct fib_nh *nh = &res.fi->fib_nh[i];
> +
> + if (nh->nh_dev == oif) {
> + found = nh->nh_dev;
> + goto ok;
> + }
> + }
> + return;
> +#else
> + found = FIB_RES_DEV(res);
> + if (found != oif)
> + return;
> +#endif
> +ok:
> + switch (priv->result) {
> + case NFT_FIB_RESULT_OIF:
> + *dest = found->ifindex;
> + break;
> + case NFT_FIB_RESULT_OIFNAME:
> + strncpy((char *)dest, found->name, IFNAMSIZ);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + break;
> + }
> +}
> +EXPORT_SYMBOL_GPL(nft_fib4_eval);
> +
> +static struct nft_expr_type nft_fib4_type;
> +
> +static const struct nft_expr_ops nft_fib4_type_ops = {
> + .type = &nft_fib4_type,
> + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
> + .eval = nft_fib4_eval_type,
> + .init = nft_fib_init,
> + .dump = nft_fib_dump,
> + .validate = nft_fib_validate,
> +};
> +
> +static const struct nft_expr_ops nft_fib4_ops = {
> + .type = &nft_fib4_type,
> + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
> + .eval = nft_fib4_eval,
> + .init = nft_fib_init,
> + .dump = nft_fib_dump,
> + .validate = nft_fib_validate,
> +};
> +
> +static const struct nft_expr_ops *
> +nft_fib4_select_ops(const struct nft_ctx *ctx,
> + const struct nlattr * const tb[])
> +{
> + enum nft_fib_result result;
> +
> + if (!tb[NFTA_FIB_RESULT])
> + return ERR_PTR(-EINVAL);
> +
> + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
> +
> + switch (result) {
> + case NFT_FIB_RESULT_OIF:
> + return &nft_fib4_ops;
> + case NFT_FIB_RESULT_OIFNAME:
> + return &nft_fib4_ops;
> + case NFT_FIB_RESULT_ADDRTYPE:
> + return &nft_fib4_type_ops;
> + default:
> + return ERR_PTR(-EOPNOTSUPP);
> + }
> +}
> +
> +static struct nft_expr_type nft_fib4_type __read_mostly = {
> + .name = "fib",
> + .select_ops = &nft_fib4_select_ops,
> + .policy = nft_fib_policy,
> + .maxattr = NFTA_FIB_MAX,
> + .family = NFPROTO_IPV4,
> + .owner = THIS_MODULE,
> +};
> +
> +static int __init nft_fib4_module_init(void)
> +{
> + return nft_register_expr(&nft_fib4_type);
> +}
> +
> +static void __exit nft_fib4_module_exit(void)
> +{
> + nft_unregister_expr(&nft_fib4_type);
> +}
> +
> +module_init(nft_fib4_module_init);
> +module_exit(nft_fib4_module_exit);
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
> +MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
> diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
> index 57df69eebe563..3a9337761479a 100644
> --- a/net/ipv6/netfilter/Kconfig
> +++ b/net/ipv6/netfilter/Kconfig
> @@ -53,6 +53,14 @@ config NFT_DUP_IPV6
> help
> This module enables IPv6 packet duplication support for nf_tables.
>
> +config NFT_FIB_IPV6
> + tristate "nf_tables fib / ipv6 route lookup support"
> + select NFT_FIB
> + help
> + This module enables IPv6 FIB lookups, e.g. for reverse path filtering.
> + It also allows query of the FIB for the route type, e.g. local, unicast,
> + multicast or blackhole.
> +
> endif # NF_TABLES_IPV6
> endif # NF_TABLES
>
> diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
> index 9d38868f888ee..75e6d92ad6b00 100644
> --- a/net/ipv6/netfilter/Makefile
> +++ b/net/ipv6/netfilter/Makefile
> @@ -42,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
> obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
> obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
> obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
> +obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
>
> # matches
> obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
> diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
> new file mode 100644
> index 0000000000000..d526bb594956b
> --- /dev/null
> +++ b/net/ipv6/netfilter/nft_fib_ipv6.c
> @@ -0,0 +1,275 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/netlink.h>
> +#include <linux/netfilter.h>
> +#include <linux/netfilter/nf_tables.h>
> +#include <linux/netfilter_ipv6.h>
> +#include <net/netfilter/nf_tables_core.h>
> +#include <net/netfilter/nf_tables.h>
> +#include <net/netfilter/nft_fib.h>
> +
> +#include <net/ip6_fib.h>
> +#include <net/ip6_route.h>
> +
> +static bool fib6_is_local(const struct sk_buff *skb)
> +{
> + const struct rt6_info *rt = (const void *)skb_dst(skb);
> +
> + return rt && (rt->rt6i_flags & RTF_LOCAL);
> +}
> +
> +static int get_ifindex(const struct net_device *dev)
> +{
> + return dev ? dev->ifindex : 0;
> +}
> +
> +static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
> + const struct nft_pktinfo *pkt,
> + const struct net_device *dev)
> +{
> + const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
> + int lookup_flags = 0;
> +
> + if (priv->flags & NFTA_FIB_F_DADDR) {
> + fl6->daddr = iph->daddr;
> + fl6->saddr = iph->saddr;
> + } else {
> + fl6->daddr = iph->saddr;
> + fl6->saddr = iph->daddr;
> + }
> +
> + if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
> + lookup_flags |= RT6_LOOKUP_F_IFACE;
> + fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
> + }
> +
> + if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
> + lookup_flags |= RT6_LOOKUP_F_HAS_SADDR;
> +
> + if (priv->flags & NFTA_FIB_F_MARK)
> + fl6->flowi6_mark = pkt->skb->mark;
> +
> + fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
> +
> + return lookup_flags;
> +}
> +
> +static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
> + const struct nft_pktinfo *pkt)
> +{
> + const struct net_device *dev = NULL;
> + const struct nf_ipv6_ops *v6ops;
> + const struct nf_afinfo *afinfo;
> + int route_err, addrtype;
> + struct rt6_info *rt;
> + struct flowi6 fl6 = {
> + .flowi6_iif = LOOPBACK_IFINDEX,
> + .flowi6_proto = pkt->tprot,
> + };
> + u32 ret = 0;
> +
> + afinfo = nf_get_afinfo(NFPROTO_IPV6);
> + if (!afinfo)
> + return RTN_UNREACHABLE;
> +
> + if (priv->flags & NFTA_FIB_F_IIF)
> + dev = nft_in(pkt);
> + else if (priv->flags & NFTA_FIB_F_OIF)
> + dev = nft_out(pkt);
> +
> + nft_fib6_flowi_init(&fl6, priv, pkt, dev);
> +
> + v6ops = nf_get_ipv6_ops();
> + if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
> + ret = RTN_LOCAL;
> +
> + route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
> + flowi6_to_flowi(&fl6), false);
> + if (route_err)
> + goto err;
> +
> + if (rt->rt6i_flags & RTF_REJECT) {
> + route_err = rt->dst.error;
> + dst_release(&rt->dst);
> + goto err;
> + }
> +
> + if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr))
> + ret = RTN_ANYCAST;
> + else if (!dev && rt->rt6i_flags & RTF_LOCAL)
> + ret = RTN_LOCAL;
> +
> + dst_release(&rt->dst);
> +
> + if (ret)
> + return ret;
> +
> + addrtype = ipv6_addr_type(&fl6.daddr);
> +
> + if (addrtype & IPV6_ADDR_MULTICAST)
> + return RTN_MULTICAST;
> + if (addrtype & IPV6_ADDR_UNICAST)
> + return RTN_UNICAST;
> +
> + return RTN_UNSPEC;
> + err:
> + switch (route_err) {
> + case -EINVAL:
> + return RTN_BLACKHOLE;
> + case -EACCES:
> + return RTN_PROHIBIT;
> + case -EAGAIN:
> + return RTN_THROW;
> + default:
> + break;
> + }
> +
> + return RTN_UNREACHABLE;
> +}
> +
> +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> + u32 *dest = ®s->data[priv->dreg];
> +
> + *dest = __nft_fib6_eval_type(priv, pkt);
> +}
> +EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
> +
> +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
> + const struct nft_pktinfo *pkt)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> + const struct net_device *oif = NULL;
> + u32 *dest = ®s->data[priv->dreg];
> + struct flowi6 fl6 = {
> + .flowi6_iif = LOOPBACK_IFINDEX,
> + .flowi6_proto = pkt->tprot,
> + };
> + struct rt6_info *rt;
> + int lookup_flags;
> +
> + if (priv->flags & NFTA_FIB_F_IIF)
> + oif = nft_in(pkt);
> + else if (priv->flags & NFTA_FIB_F_OIF)
> + oif = nft_out(pkt);
> +
> + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
> +
> + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
> + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
> + return;
> + }
> +
> + *dest = 0;
> + again:
> + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
> + if (rt->dst.error)
> + goto put_rt_err;
> +
> + /* Should not see RTF_LOCAL here */
> + if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
> + goto put_rt_err;
> +
> + if (oif && oif != rt->rt6i_idev->dev) {
> + /* multipath route? Try again with F_IFACE */
> + if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
> + lookup_flags |= RT6_LOOKUP_F_IFACE;
> + fl6.flowi6_oif = oif->ifindex;
> + ip6_rt_put(rt);
> + goto again;
> + }
> + }
> +
> + switch (priv->result) {
> + case NFT_FIB_RESULT_OIF:
> + *dest = rt->rt6i_idev->dev->ifindex;
> + break;
> + case NFT_FIB_RESULT_OIFNAME:
> + strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + break;
> + }
> +
> + put_rt_err:
> + ip6_rt_put(rt);
> +}
> +EXPORT_SYMBOL_GPL(nft_fib6_eval);
> +
> +static struct nft_expr_type nft_fib6_type;
> +
> +static const struct nft_expr_ops nft_fib6_type_ops = {
> + .type = &nft_fib6_type,
> + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
> + .eval = nft_fib6_eval_type,
> + .init = nft_fib_init,
> + .dump = nft_fib_dump,
> + .validate = nft_fib_validate,
> +};
> +
> +static const struct nft_expr_ops nft_fib6_ops = {
> + .type = &nft_fib6_type,
> + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
> + .eval = nft_fib6_eval,
> + .init = nft_fib_init,
> + .dump = nft_fib_dump,
> + .validate = nft_fib_validate,
> +};
> +
> +static const struct nft_expr_ops *
> +nft_fib6_select_ops(const struct nft_ctx *ctx,
> + const struct nlattr * const tb[])
> +{
> + enum nft_fib_result result;
> +
> + if (!tb[NFTA_FIB_RESULT])
> + return ERR_PTR(-EINVAL);
> +
> + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
> +
> + switch (result) {
> + case NFT_FIB_RESULT_OIF:
> + return &nft_fib6_ops;
> + case NFT_FIB_RESULT_OIFNAME:
> + return &nft_fib6_ops;
> + case NFT_FIB_RESULT_ADDRTYPE:
> + return &nft_fib6_type_ops;
> + default:
> + return ERR_PTR(-EOPNOTSUPP);
> + }
> +}
> +
> +static struct nft_expr_type nft_fib6_type __read_mostly = {
> + .name = "fib",
> + .select_ops = &nft_fib6_select_ops,
> + .policy = nft_fib_policy,
> + .maxattr = NFTA_FIB_MAX,
> + .family = NFPROTO_IPV6,
> + .owner = THIS_MODULE,
> +};
> +
> +static int __init nft_fib6_module_init(void)
> +{
> + return nft_register_expr(&nft_fib6_type);
> +}
> +
> +static void __exit nft_fib6_module_exit(void)
> +{
> + nft_unregister_expr(&nft_fib6_type);
> +}
> +module_init(nft_fib6_module_init);
> +module_exit(nft_fib6_module_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
> +MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
> index 0b02434fd1fc6..c8e7c5863ae08 100644
> --- a/net/netfilter/Kconfig
> +++ b/net/netfilter/Kconfig
> @@ -550,6 +550,19 @@ config NFT_COMPAT
> x_tables match/target extensions over the nf_tables
> framework.
>
> +config NFT_FIB
> + tristate
> +
> +config NFT_FIB_INET
> + depends on NF_TABLES_INET
> + depends on NFT_FIB_IPV4
> + depends on NFT_FIB_IPV6
> + tristate "Netfilter nf_tables fib inet support"
> + help
> + This option allows using the FIB expression from the inet table.
> + The lookup will be delegated to the IPv4 or IPv6 FIB depending
> + on the protocol of the packet.
> +
> endif # NF_TABLES
>
> config NETFILTER_XTABLES
> diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
> index 5a7c589c40ff8..4d8846e79956c 100644
> --- a/net/netfilter/Makefile
> +++ b/net/netfilter/Makefile
> @@ -88,6 +88,8 @@ obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
> obj-$(CONFIG_NFT_LOG) += nft_log.o
> obj-$(CONFIG_NFT_MASQ) += nft_masq.o
> obj-$(CONFIG_NFT_REDIR) += nft_redir.o
> +obj-$(CONFIG_NFT_FIB) += nft_fib.o
> +obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
>
> # generic X tables
> obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
> diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
> new file mode 100644
> index 0000000000000..249c9b80c1507
> --- /dev/null
> +++ b/net/netfilter/nft_fib.c
> @@ -0,0 +1,159 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * Generic part shared by ipv4 and ipv6 backends.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/netlink.h>
> +#include <linux/netfilter.h>
> +#include <linux/netfilter/nf_tables.h>
> +#include <net/netfilter/nf_tables_core.h>
> +#include <net/netfilter/nf_tables.h>
> +#include <net/netfilter/nft_fib.h>
> +
> +const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
> + [NFTA_FIB_DREG] = { .type = NLA_U32 },
> + [NFTA_FIB_RESULT] = { .type = NLA_U32 },
> + [NFTA_FIB_FLAGS] = { .type = NLA_U32 },
> +};
> +EXPORT_SYMBOL(nft_fib_policy);
> +
> +#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
> + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
> +
> +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
> + const struct nft_data **data)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> + unsigned int hooks;
> +
> + switch (priv->result) {
> + case NFT_FIB_RESULT_OIF: /* fallthrough */
> + case NFT_FIB_RESULT_OIFNAME:
> + hooks = (1 << NF_INET_PRE_ROUTING);
> + break;
> + case NFT_FIB_RESULT_ADDRTYPE:
> + if (priv->flags & NFTA_FIB_F_IIF)
> + hooks = (1 << NF_INET_PRE_ROUTING) |
> + (1 << NF_INET_LOCAL_IN) |
> + (1 << NF_INET_FORWARD);
> + else if (priv->flags & NFTA_FIB_F_OIF)
> + hooks = (1 << NF_INET_LOCAL_OUT) |
> + (1 << NF_INET_POST_ROUTING) |
> + (1 << NF_INET_FORWARD);
> + else
> + hooks = (1 << NF_INET_LOCAL_IN) |
> + (1 << NF_INET_LOCAL_OUT) |
> + (1 << NF_INET_FORWARD) |
> + (1 << NF_INET_PRE_ROUTING) |
> + (1 << NF_INET_POST_ROUTING);
> +
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + return nft_chain_validate_hooks(ctx->chain, hooks);
> +}
> +EXPORT_SYMBOL_GPL(nft_fib_validate);
> +
> +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
> + const struct nlattr * const tb[])
> +{
> + struct nft_fib *priv = nft_expr_priv(expr);
> + unsigned int len;
> + int err;
> +
> + if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
> + return -EINVAL;
> +
> + priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
> +
> + if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
> + return -EINVAL;
> +
> + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
> + (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
> + return -EINVAL;
> + if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
> + (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
> + return -EINVAL;
> + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
> + return -EINVAL;
> +
> + priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
> + priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
> +
> + switch (priv->result) {
> + case NFT_FIB_RESULT_OIF:
> + if (priv->flags & NFTA_FIB_F_OIF)
> + return -EINVAL;
> + len = sizeof(int);
> + break;
> + case NFT_FIB_RESULT_OIFNAME:
> + if (priv->flags & NFTA_FIB_F_OIF)
> + return -EINVAL;
> + len = IFNAMSIZ;
> + break;
> + case NFT_FIB_RESULT_ADDRTYPE:
> + len = sizeof(u32);
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + err = nft_validate_register_store(ctx, priv->dreg, NULL,
> + NFT_DATA_VALUE, len);
> + if (err < 0)
> + return err;
> +
> + return nft_fib_validate(ctx, expr, NULL);
> +}
> +EXPORT_SYMBOL_GPL(nft_fib_init);
> +
> +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> +
> + if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
> + return -1;
> +
> + if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
> + return -1;
> +
> + if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
> + return -1;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(nft_fib_dump);
> +
> +void nft_fib_store_result(void *reg, enum nft_fib_result r,
> + const struct nft_pktinfo *pkt, int index)
> +{
> + struct net_device *dev;
> + u32 *dreg = reg;
> +
> + switch (r) {
> + case NFT_FIB_RESULT_OIF:
> + *dreg = index;
> + break;
> + case NFT_FIB_RESULT_OIFNAME:
> + dev = dev_get_by_index_rcu(nft_net(pkt), index);
> + strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + *dreg = 0;
> + break;
> + }
> +}
> +EXPORT_SYMBOL_GPL(nft_fib_store_result);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
> diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
> new file mode 100644
> index 0000000000000..9120fc7228f4e
> --- /dev/null
> +++ b/net/netfilter/nft_fib_inet.c
> @@ -0,0 +1,82 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/netlink.h>
> +#include <linux/netfilter.h>
> +#include <linux/netfilter/nf_tables.h>
> +#include <net/netfilter/nf_tables_core.h>
> +#include <net/netfilter/nf_tables.h>
> +
> +#include <net/netfilter/nft_fib.h>
> +
> +static void nft_fib_inet_eval(const struct nft_expr *expr,
> + struct nft_regs *regs,
> + const struct nft_pktinfo *pkt)
> +{
> + const struct nft_fib *priv = nft_expr_priv(expr);
> +
> + switch (nft_pf(pkt)) {
> + case NFPROTO_IPV4:
> + switch (priv->result) {
> + case NFT_FIB_RESULT_OIF:
> + case NFT_FIB_RESULT_OIFNAME:
> + return nft_fib4_eval(expr, regs, pkt);
> + case NFT_FIB_RESULT_ADDRTYPE:
> + return nft_fib4_eval_type(expr, regs, pkt);
> + }
> + break;
> + case NFPROTO_IPV6:
> + switch (priv->result) {
> + case NFT_FIB_RESULT_OIF:
> + case NFT_FIB_RESULT_OIFNAME:
> + return nft_fib6_eval(expr, regs, pkt);
> + case NFT_FIB_RESULT_ADDRTYPE:
> + return nft_fib6_eval_type(expr, regs, pkt);
> + }
> + break;
> + }
> +
> + regs->verdict.code = NF_DROP;
> +}
> +
> +static struct nft_expr_type nft_fib_inet_type;
> +static const struct nft_expr_ops nft_fib_inet_ops = {
> + .type = &nft_fib_inet_type,
> + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
> + .eval = nft_fib_inet_eval,
> + .init = nft_fib_init,
> + .dump = nft_fib_dump,
> + .validate = nft_fib_validate,
> +};
> +
> +static struct nft_expr_type nft_fib_inet_type __read_mostly = {
> + .family = NFPROTO_INET,
> + .name = "fib",
> + .ops = &nft_fib_inet_ops,
> + .policy = nft_fib_policy,
> + .maxattr = NFTA_FIB_MAX,
> + .owner = THIS_MODULE,
> +};
> +
> +static int __init nft_fib_inet_module_init(void)
> +{
> + return nft_register_expr(&nft_fib_inet_type);
> +}
> +
> +static void __exit nft_fib_inet_module_exit(void)
> +{
> + nft_unregister_expr(&nft_fib_inet_type);
> +}
> +
> +module_init(nft_fib_inet_module_init);
> +module_exit(nft_fib_inet_module_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Florian Westphal <fw at strlen.de>");
> +MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
>
--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
More information about the Devel
mailing list