[Devel] [PATCH RHEL7 COMMIT] vznetfilter/vzprivnet: Combined patch
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Mar 24 08:53:25 PDT 2016
The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.10.1.vz7.12.3
------>
commit 82d7801524565d4e64ffb54e5215601309717705
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date: Thu Mar 24 19:53:25 2016 +0400
vznetfilter/vzprivnet: Combined patch
Patchset description:
vzprivnet: port feature from VZ6
changes:
1) replace NIPQUAD_FMT
2) make vzprivnet/vzprivnet6 hooks correspond to prototype
3) include inet.h
4) fix rt_cache_flush arguments
5) use skb instead of pskb in vzprivnet6_hook
6) remove second loop cursor in hlist_for_each_entry
7) use ipv6_prefix_equal
8) remove ctl_name
9) use ve from device skb->dev->nd_net->owner_ve
10) also do not filter ipv6 packets inside CT
11) set license GPL
12) zero-init privnet_mark on dst alloc
Pavel Tikhomirov (36):
vznfprivnet: combined
vzfilter: include last ip in range
vzprivnet: split privnet into two parts
vzprivnet: cache filtering on dst
vzprivnet: rework proc layout
vzprivnet: legacy tree core isolation
vzprivnet: netmask byte order toss
vzprivnet: support sparse
vzprivnet: basic statistics
vzprivnet: classifier helper
vzprivnet: flush rtcache
vzprivnet: weak sparse
vzprivnet: addnet leak
vzprivnet: addnet bad retcode
vzprivnet: v6 support prep
vzprivnet: v6 module plain
vzprivnet6: hashsets
vzprivnet6: rh6 compile
vzprivnet: handle bridged skbs
vzprivnet: check for bridge properly
vzprivnet6: fix hash access
vzprivnet: filter host traffic
vzprivnet6: dont handle neigh solicit in bridge
vzprivnet: catch v4 conflict on add
vzprivnet: constants for weakness
vzprivnet: kill weak to weak communications
vzprivnet6: remove hashes
vzprivnet6: switch to radix tree
vzprivnet6: legacy mode support
vzprivnet6: INADDR6_ANY is internet
vzprivnet: classify pne NULL checks fix
vzprivnet: incorrect return value in vzprivnet_hook
vzprivnet: rt cache drop on vzprivnet update
vzprivnet: set GPL license
vzprivnet: netfilter do not do vzprivnet_hook inside CT
vzprivnet: enable module in config
https://jira.sw.ru/browse/PSBM-43685
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Acked-by: Andrey Vagin <avagin at virtuozzo.com>
=======================================================
This patch description:
changes:
1) replace NIPQUAD_FMT with %pI4 in vzprivnet_seq_show
see:
commit cf4ca4874fc4 ("kernel.h: remove unused NIPQUAD and NIPQUAD_FMT")
commit 0df1a84e0ec8 ("staging: gdm72xx: use %pI4 format to print IPv4
address and remove last usage of NIP6")
2) make vzprivnet_hook correspond to nf_hookfn prototype
Port diff-vz-nfprivnet-combined
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
net/ipv4/netfilter/Kconfig | 6 +
net/ipv4/netfilter/Makefile | 1 +
net/ipv4/netfilter/ip_vzprivnet.c | 441 ++++++++++++++++++++++++++++++++++++++
3 files changed, 448 insertions(+)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fb82242..0eacd9f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -397,5 +397,11 @@ config IP_NF_ARP_MANGLE
endif # IP_NF_ARPTABLES
+config VE_IP_NF_VZPRIVNET
+ tristate "VE private networking filtering"
+ default m
+ depends on IP_NF_IPTABLES && m
+ help
+ This option allows filtering private subnets.
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3f57959..83a7cd5 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+obj-$(CONFIG_VE_IP_NF_VZPRIVNET) += ip_vzprivnet.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
diff --git a/net/ipv4/netfilter/ip_vzprivnet.c b/net/ipv4/netfilter/ip_vzprivnet.c
new file mode 100644
index 0000000..2d66dcc
--- /dev/null
+++ b/net/ipv4/netfilter/ip_vzprivnet.c
@@ -0,0 +1,441 @@
+/*
+ *
+ * Copyright (C) 2010 Parallels
+ *
+ */
+
+/*
+ * This is implementation of the private network filtering.
+ * How does it work:
+ * _______ _______ _______
+ * | VE1 | | VE2 | | VE-N |
+ * |_______| |_______| |_______|
+ * | venet | venet | venet
+ * | | |
+ * |_______ip_forward__ ... __| VE0
+ * vzfilter_hook
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/log2.h>
+#include <linux/ctype.h>
+#include <asm/page.h>
+
+#define VZPRIV_PROCNAME "ip_vzprivnet"
+
+struct vzprivnet {
+ /* In big-endian */
+ u32 netip;
+ u32 netmask1;
+ u32 netmask2;
+ int weak;
+ struct rb_node node;
+};
+
+static DEFINE_RWLOCK(vzprivlock);
+
+/*
+ * Tree helpers
+ */
+
+static struct rb_root rbroot = RB_ROOT;
+static struct vzprivnet vzpriv_internet = {
+ .netip = 0,
+ .netmask1 = 0,
+ .netmask2 = 0,
+ .weak = 1
+};
+
+/* ip: big-endian IP address */
+static struct vzprivnet *tree_search(u32 ip)
+{
+ struct rb_node *node = rbroot.rb_node;
+
+ ip = ntohl(ip);
+ while (node) {
+ struct vzprivnet *p = rb_entry(node, struct vzprivnet, node);
+ u32 start, end;
+
+ start = ntohl(p->netip);
+ end = start | ~ntohl(p->netmask1);
+
+ if (ip < end) {
+ if (start <= ip)
+ return p;
+
+ node = node->rb_left;
+ } else
+ node = node->rb_right;
+ }
+ return &vzpriv_internet;
+}
+
+static int tree_insert(struct vzprivnet *data)
+{
+ struct rb_node **link = &(rbroot.rb_node), *parent = NULL;
+ u32 ip = ntohl(data->netip);
+
+ while (*link) {
+ struct vzprivnet *p = rb_entry(*link, struct vzprivnet, node);
+ u32 start, end;
+
+ start = ntohl(p->netip);
+ end = start | ~ntohl(p->netmask1);
+
+ if (start <= ip && ip <= end)
+ return -EEXIST;
+
+ parent = *link;
+ if (ip < end)
+ link = &((*link)->rb_left);
+ else
+ link = &((*link)->rb_right);
+ }
+
+ /* Add link node and rebalance tree. */
+ rb_link_node(&data->node, parent, link);
+ rb_insert_color(&data->node, &rbroot);
+
+ return 0;
+}
+
+static void tree_delete(struct vzprivnet *p)
+{
+ rb_erase(&p->node, &rbroot);
+}
+
+static struct vzprivnet *tree_first(void)
+{
+ struct rb_node *node;
+
+ node = rb_first(&rbroot);
+ if (!node)
+ return NULL;
+
+ return rb_entry(node, struct vzprivnet, node);
+}
+
+static struct vzprivnet *tree_next(struct vzprivnet *p)
+{
+ struct rb_node *node;
+
+ node = rb_next(&p->node);
+ if (!node)
+ return NULL;
+
+ return rb_entry(node, struct vzprivnet, node);
+}
+
+/*
+ * Generic code
+ */
+static unsigned int vzprivnet_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_hook_state *state)
+{
+ int res = NF_ACCEPT;
+ u32 saddr, daddr;
+ struct vzprivnet *p1, *p2;
+
+ saddr = ip_hdr(skb)->saddr;
+ daddr = ip_hdr(skb)->daddr;
+
+ read_lock(&vzprivlock);
+ p1 = tree_search(saddr);
+ p2 = tree_search(daddr);
+
+ if (p1 == p2) {
+ if ((saddr & p1->netmask2) == (daddr & p1->netmask2))
+ res = NF_ACCEPT;
+ else
+ res = NF_DROP;
+ } else {
+ if (p1->weak && p2->weak)
+ res = NF_ACCEPT;
+ else
+ res = NF_DROP;
+ }
+
+ read_unlock(&vzprivlock);
+ return res;
+}
+
+static struct nf_hook_ops vzprivnet_ops = {
+ .hook = vzprivnet_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP_PRI_FIRST
+};
+
+static inline u32 to_netmask(int prefix)
+{
+ return ((~0 << (32 - prefix)));
+}
+
+static inline unsigned int to_prefix(u32 netmask)
+{
+ return 32 - ilog2(~netmask + 1);
+}
+
+static char *nextline(char *s)
+{
+ while(*s && *s != '\n') s++;
+ while(*s && *s == '\n') s++;
+ return s;
+}
+
+static int vzprivnet_add(u32 net, u32 m1, u32 m2, int weak)
+{
+ struct vzprivnet *p;
+ int err;
+
+ p = kmalloc(sizeof(struct vzprivnet), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->netip = net;
+ p->netmask1 = m1;
+ p->netmask2 = m2;
+ p->weak = weak;
+
+ write_lock_bh(&vzprivlock);
+ err = tree_insert(p);
+ write_unlock_bh(&vzprivlock);
+ if (err)
+ kfree(p);
+
+ return err;
+}
+
+static int vzprivnet_del(u32 net)
+{
+ struct vzprivnet *p;
+
+ write_lock_bh(&vzprivlock);
+ p = tree_search(net);
+ if (p == &vzpriv_internet) {
+ write_unlock_bh(&vzprivlock);
+ return -ENOENT;
+ }
+
+ tree_delete(p);
+ write_unlock_bh(&vzprivlock);
+ kfree(p);
+ return 0;
+}
+
+static void vzprivnet_cleanup(void)
+{
+ struct vzprivnet *p;
+
+ write_lock_bh(&vzprivlock);
+ while (1) {
+ p = tree_first();
+ if (!p)
+ break;
+ tree_delete(p);
+ kfree(p);
+ }
+ write_unlock_bh(&vzprivlock);
+}
+
+/* +a.b.c.d/M1/M2
+ * or
+ * -a.b.c.d/M1/M2
+ *
+ * add: 0 - delete, 1 - add
+ * if delete, netmasks don't matter
+ */
+static int parse_param(const char *param, int *add, u32 *net,
+ u32 *netmask1, u32 *netmask2, int *weak)
+{
+ int err;
+ unsigned char ch, e;
+ unsigned int a,b,c,d;
+ unsigned int m1, m2;
+
+ if (!*param)
+ return -EINVAL;
+
+ ch = *param;
+ if (ch != '+' && ch != '-')
+ return -EINVAL;
+
+ param++;
+ err = sscanf(param, "%u.%u.%u.%u/%u/%u%c\n",
+ &a, &b, &c, &d, &m1, &m2, &e);
+ if (err < 4 || (a == 0 || a > 255 || b > 255 || c > 255 || d > 255))
+ return -EINVAL;
+
+ *weak = 0;
+ if (err == 7) {
+ if (e == '*')
+ *weak = 1;
+ else if (e != '\n' || !isspace(e))
+ return -EINVAL;
+ }
+
+ *net = htonl((a << 24) + (b << 16) + (c << 8) + d);
+ if (ch == '+') {
+ if (err < 6 || m1 == 0 || m1 > 32 || m2 == 0 || m2 > 32)
+ return -EINVAL;
+
+ *netmask1 = htonl(to_netmask(m1));
+ *netmask2 = htonl(to_netmask(m2));
+ *net &= *netmask1;
+ } else
+ *netmask1 = *netmask2 = 0;
+
+ *add = (ch == '+') ? 1 : 0;
+ return 0;
+}
+
+static ssize_t vzpriv_write(struct file * file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ char *s, *page;
+ int err;
+ int offset;
+
+ page = (unsigned char *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ if (count > (PAGE_SIZE - 1))
+ count = (PAGE_SIZE - 1);
+
+ err = copy_from_user(page, buf, count);
+ if (err)
+ goto err;
+
+ s = page;
+ s[count] = 0;
+
+ err = -EINVAL;
+ while (*s) {
+ u32 net, m1, m2;
+ int add, weak;
+
+ err = parse_param(s, &add, &net, &m1, &m2, &weak);
+ if (err)
+ goto out;
+
+ if (add)
+ err = vzprivnet_add(net, m1, m2, weak);
+ else
+ err = vzprivnet_del(net);
+
+ if (err)
+ goto out;
+
+ s = nextline(s);
+ }
+out:
+ offset = s - page;
+ if (offset > 0)
+ err = offset;
+err:
+ free_page((unsigned long)page);
+ return err;
+}
+
+static void *vzprivnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ unsigned int n = *pos;
+
+ read_lock_bh(&vzprivlock);
+ if (n > 0) {
+ struct vzprivnet *p;
+
+ p = tree_first();
+ while (n-- && p)
+ p = tree_next(p);
+
+ return p;
+ }
+
+ return tree_first();
+}
+
+static void *vzprivnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+
+ return tree_next(v);
+}
+
+static void vzprivnet_seq_stop(struct seq_file *s, void *v)
+{
+ read_unlock_bh(&vzprivlock);
+}
+
+static int vzprivnet_seq_show(struct seq_file *s, void *v)
+{
+ struct vzprivnet *p = v;
+
+ seq_printf(s, "%pI4/%u/%u", &p->netip,
+ to_prefix(ntohl(p->netmask1)), to_prefix(ntohl(p->netmask2)));
+ if (p->weak)
+ seq_printf(s, "*\n");
+ else
+ seq_printf(s, "\n");
+ return 0;
+}
+
+static struct seq_operations vzprivnet_seq_ops = {
+ .start = vzprivnet_seq_start,
+ .next = vzprivnet_seq_next,
+ .stop = vzprivnet_seq_stop,
+ .show = vzprivnet_seq_show,
+};
+
+static int vzprivnet_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &vzprivnet_seq_ops);
+}
+
+static struct file_operations proc_vzprivnet_ops = {
+ .owner = THIS_MODULE,
+ .open = vzprivnet_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = vzpriv_write,
+};
+
+static int __init iptable_vzprivnet_init(void)
+{
+ int err;
+ struct proc_dir_entry *proc;
+
+ proc = proc_net_fops_create(&init_net, VZPRIV_PROCNAME, 0640, &proc_vzprivnet_ops);
+ if (!proc)
+ return -ENOMEM;
+
+ err = nf_register_hook(&vzprivnet_ops);
+ if (err)
+ proc_net_remove(&init_net, VZPRIV_PROCNAME);
+
+ return err;
+}
+
+static void __exit iptable_vzprivnet_exit(void)
+{
+ nf_unregister_hook(&vzprivnet_ops);
+ proc_net_remove(&init_net, VZPRIV_PROCNAME);
+ vzprivnet_cleanup();
+}
+
+module_init(iptable_vzprivnet_init)
+module_exit(iptable_vzprivnet_exit)
More information about the Devel
mailing list