[Devel] [PATCH RHEL7 COMMIT] vznetfilter/vzprivnet: Combined patch

Konstantin Khorenko khorenko at virtuozzo.com
Thu Mar 24 08:53:25 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.10.1.vz7.12.3
------>
commit 82d7801524565d4e64ffb54e5215601309717705
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Thu Mar 24 19:53:25 2016 +0400

    vznetfilter/vzprivnet: Combined patch
    
    Patchset description:
    
    vzprivnet: port feature from VZ6
    
    changes:
    1) replace NIPQUAD_FMT
    2) make vzprivnet/vzprivnet6 hooks correspond to prototype
    3) include inet.h
    4) fix rt_cache_flush arguments
    5) use skb instead of pskb in vzprivnet6_hook
    6) remove second loop cursor in hlist_for_each_entry
    7) use ipv6_prefix_equal
    8) remove ctl_name
    9) use ve from device skb->dev->nd_net->owner_ve
    10) also do not filter ipv6 packets inside CT
    11) set license GPL
    12) zero-init privnet_mark on dst alloc
    
    Pavel Tikhomirov (36):
      vznfprivnet: combined
      vzfilter: include last ip in range
      vzprivnet: split privnet into two parts
      vzprivnet: cache filtering on dst
      vzprivnet: rework proc layout
      vzprivnet: legacy tree core isolation
      vzprivnet: netmask byte order toss
      vzprivnet: support sparse
      vzprivnet: basic statistics
      vzprivnet: classifier helper
      vzprivnet: flush rtcache
      vzprivnet: weak sparse
      vzprivnet: addnet leak
      vzprivnet: addnet bad retcode
      vzprivnet: v6 support prep
      vzprivnet: v6 module plain
      vzprivnet6: hashsets
      vzprivnet6: rh6 compile
      vzprivnet: handle bridged skbs
      vzprivnet: check for bridge properly
      vzprivnet6: fix hash access
      vzprivnet: filter host traffic
      vzprivnet6: dont handle neigh solicit in bridge
      vzprivnet: catch v4 conflict on add
      vzprivnet: constants for weakness
      vzprivnet: kill weak to weak communications
      vzprivnet6: remove hashes
      vzprivnet6: switch to radix tree
      vzprivnet6: legacy mode support
      vzprivnet6: INADDR6_ANY is internet
      vzprivnet: classify pne NULL checks fix
      vzprivnet: incorrect return value in vzprivnet_hook
      vzprivnet: rt cache drop on vzprivnet update
      vzprivnet: set GPL license
      vzprivnet: netfilter do not do vzprivnet_hook inside CT
      vzprivnet: enable module in config
    
    https://jira.sw.ru/browse/PSBM-43685
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    Acked-by: Andrey Vagin <avagin at virtuozzo.com>
    
    =======================================================
    This patch description:
    
    changes:
    1) replace NIPQUAD_FMT with %pI4 in vzprivnet_seq_show
    see:
    commit cf4ca4874fc4 ("kernel.h: remove unused NIPQUAD and NIPQUAD_FMT")
    commit 0df1a84e0ec8 ("staging: gdm72xx: use %pI4 format to print IPv4
    address and remove last usage of NIP6")
    2) make vzprivnet_hook correspond to nf_hookfn prototype
    
    Port diff-vz-nfprivnet-combined
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 net/ipv4/netfilter/Kconfig        |   6 +
 net/ipv4/netfilter/Makefile       |   1 +
 net/ipv4/netfilter/ip_vzprivnet.c | 441 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 448 insertions(+)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fb82242..0eacd9f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -397,5 +397,11 @@ config IP_NF_ARP_MANGLE
 
 endif # IP_NF_ARPTABLES
 
+config VE_IP_NF_VZPRIVNET
+	tristate "VE private networking filtering"
+	default m
+	depends on IP_NF_IPTABLES && m
+	help
+	  This option allows filtering private subnets.
 endmenu
 
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3f57959..83a7cd5 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
 
 # generic IP tables 
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+obj-$(CONFIG_VE_IP_NF_VZPRIVNET) += ip_vzprivnet.o
 
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
diff --git a/net/ipv4/netfilter/ip_vzprivnet.c b/net/ipv4/netfilter/ip_vzprivnet.c
new file mode 100644
index 0000000..2d66dcc
--- /dev/null
+++ b/net/ipv4/netfilter/ip_vzprivnet.c
@@ -0,0 +1,441 @@
+/*
+ *
+ *  Copyright (C) 2010  Parallels
+ *
+ */
+
+/*
+ * This is implementation of the private network filtering.
+ * How does it work:
+ *   _______      _______       _______
+ *  |  VE1  |    |  VE2  |     | VE-N  |
+ *  |_______|    |_______|     |_______|
+ *      | venet      | venet       | venet
+ *      |            |             |
+ *      |_______ip_forward__ ... __| VE0
+ *             vzfilter_hook
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/proc_fs.h>
+#include <linux/log2.h>
+#include <linux/ctype.h>
+#include <asm/page.h>
+
+#define VZPRIV_PROCNAME "ip_vzprivnet"
+
+struct vzprivnet {
+	/* In big-endian */
+	u32 netip;
+	u32 netmask1;
+	u32 netmask2;
+	int weak;
+	struct rb_node node;
+};
+
+static DEFINE_RWLOCK(vzprivlock);
+
+/*
+ * Tree helpers
+ */
+
+static struct rb_root rbroot = RB_ROOT;
+static struct vzprivnet vzpriv_internet = {
+	.netip = 0,
+	.netmask1 = 0,
+	.netmask2 = 0,
+	.weak = 1
+};
+
+/* ip: big-endian IP address */
+static struct vzprivnet *tree_search(u32 ip)
+{
+	struct rb_node *node = rbroot.rb_node;
+
+	ip = ntohl(ip);
+	while (node) {
+		struct vzprivnet *p = rb_entry(node, struct vzprivnet, node);
+		u32 start, end;
+
+		start = ntohl(p->netip);
+		end = start | ~ntohl(p->netmask1);
+
+		if (ip < end) {
+			if (start <= ip)
+				return p;
+
+			node = node->rb_left;
+		} else
+			node = node->rb_right;
+	}
+	return &vzpriv_internet;
+}
+
+static int tree_insert(struct vzprivnet *data)
+{
+	struct rb_node **link = &(rbroot.rb_node), *parent = NULL;
+	u32 ip = ntohl(data->netip);
+
+	while (*link) {
+		struct vzprivnet *p = rb_entry(*link, struct vzprivnet, node);
+		u32 start, end;
+
+		start = ntohl(p->netip);
+		end = start | ~ntohl(p->netmask1);
+
+		if (start <= ip && ip <= end)
+			return -EEXIST;
+
+		parent = *link;
+		if (ip < end)
+			link = &((*link)->rb_left);
+		else
+			link = &((*link)->rb_right);
+	}
+
+	/* Add link node and rebalance tree. */
+	rb_link_node(&data->node, parent, link);
+	rb_insert_color(&data->node, &rbroot);
+
+	return 0;
+}
+
+static void tree_delete(struct vzprivnet *p)
+{
+	rb_erase(&p->node, &rbroot);
+}
+
+static struct vzprivnet *tree_first(void)
+{
+	struct rb_node *node;
+
+	node = rb_first(&rbroot);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct vzprivnet, node);
+}
+
+static struct vzprivnet *tree_next(struct vzprivnet *p)
+{
+	struct rb_node *node;
+
+	node = rb_next(&p->node);
+	if (!node)
+		return NULL;
+
+	return rb_entry(node, struct vzprivnet, node);
+}
+
+/*
+ * Generic code
+ */
+static unsigned int vzprivnet_hook(const struct nf_hook_ops *ops,
+				  struct sk_buff *skb,
+				  const struct net_device *in,
+				  const struct net_device *out,
+				  const struct nf_hook_state *state)
+{
+	int res = NF_ACCEPT;
+	u32 saddr, daddr;
+	struct vzprivnet *p1, *p2;
+
+	saddr = ip_hdr(skb)->saddr;
+	daddr = ip_hdr(skb)->daddr;
+
+	read_lock(&vzprivlock);
+	p1 = tree_search(saddr);
+	p2 = tree_search(daddr);
+
+	if (p1 == p2) {
+		if ((saddr & p1->netmask2) == (daddr & p1->netmask2))
+			res = NF_ACCEPT;
+		else
+			res = NF_DROP;
+	} else {
+		if (p1->weak && p2->weak)
+			res = NF_ACCEPT;
+		else
+			res = NF_DROP;
+	}
+
+	read_unlock(&vzprivlock);
+	return res;
+}
+
+static struct nf_hook_ops vzprivnet_ops = {
+	.hook = vzprivnet_hook,
+	.owner = THIS_MODULE,
+	.pf = PF_INET,
+	.hooknum = NF_INET_FORWARD,
+	.priority = NF_IP_PRI_FIRST
+};
+
+static inline u32 to_netmask(int prefix)
+{
+	return ((~0 << (32 - prefix)));
+}
+
+static inline unsigned int to_prefix(u32 netmask)
+{
+	return 32 - ilog2(~netmask + 1);
+}
+
+static char *nextline(char *s)
+{
+	while(*s && *s != '\n') s++;
+	while(*s && *s == '\n') s++;
+	return s;
+}
+
+static int vzprivnet_add(u32 net, u32 m1, u32 m2, int weak)
+{
+	struct vzprivnet *p;
+	int err;
+
+	p = kmalloc(sizeof(struct vzprivnet), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	p->netip = net;
+	p->netmask1 = m1;
+	p->netmask2 = m2;
+	p->weak = weak;
+
+	write_lock_bh(&vzprivlock);
+	err = tree_insert(p);
+	write_unlock_bh(&vzprivlock);
+	if (err)
+		kfree(p);
+
+	return err;
+}
+
+static int vzprivnet_del(u32 net)
+{
+	struct vzprivnet *p;
+
+	write_lock_bh(&vzprivlock);
+	p = tree_search(net);
+	if (p == &vzpriv_internet) {
+		write_unlock_bh(&vzprivlock);
+		return -ENOENT;
+	}
+
+	tree_delete(p);
+	write_unlock_bh(&vzprivlock);
+	kfree(p);
+	return 0;
+}
+
+static void vzprivnet_cleanup(void)
+{
+	struct vzprivnet *p;
+
+	write_lock_bh(&vzprivlock);
+	while (1) {
+		p = tree_first();
+		if (!p)
+			break;
+		tree_delete(p);
+		kfree(p);
+	}
+	write_unlock_bh(&vzprivlock);
+}
+
+/*     +a.b.c.d/M1/M2
+ * or
+ *     -a.b.c.d/M1/M2
+ *
+ * add: 0 - delete, 1 - add
+ * if delete, netmasks don't matter
+ */
+static int parse_param(const char *param, int *add, u32 *net,
+			u32 *netmask1, u32 *netmask2, int *weak)
+{
+	int err;
+	unsigned char ch, e;
+	unsigned int a,b,c,d;
+	unsigned int m1, m2;
+
+	if (!*param)
+		return -EINVAL;
+
+	ch = *param;
+	if (ch != '+' && ch != '-')
+		return -EINVAL;
+
+	param++;
+	err = sscanf(param, "%u.%u.%u.%u/%u/%u%c\n",
+				&a, &b, &c, &d, &m1, &m2, &e);
+	if (err < 4 || (a == 0 || a > 255 || b > 255 || c > 255 || d > 255))
+		return -EINVAL;
+
+	*weak = 0;
+	if (err == 7) {
+		if (e == '*')
+			*weak = 1;
+		else if (e != '\n' || !isspace(e))
+			return -EINVAL;
+	}
+
+	*net = htonl((a << 24) + (b << 16) + (c << 8) + d);
+	if (ch == '+') {
+		if (err < 6 || m1 == 0 || m1 > 32 || m2 == 0 || m2 > 32)
+			return -EINVAL;
+
+		*netmask1 = htonl(to_netmask(m1));
+		*netmask2 = htonl(to_netmask(m2));
+		*net &= *netmask1;
+	} else
+		*netmask1 = *netmask2 = 0;
+
+	*add = (ch == '+') ? 1 : 0;
+	return 0;
+}
+
+static ssize_t vzpriv_write(struct file * file, const char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	char *s, *page;
+	int err;
+	int offset;
+
+	page = (unsigned char *)__get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	if (count > (PAGE_SIZE - 1))
+		count = (PAGE_SIZE - 1);
+
+	err = copy_from_user(page, buf, count);
+	if (err)
+		goto err;
+
+	s = page;
+	s[count] = 0;
+
+	err = -EINVAL;
+	while (*s) {
+		u32 net, m1, m2;
+		int add, weak;
+
+		err = parse_param(s, &add, &net, &m1, &m2, &weak);
+		if (err)
+			goto out;
+
+		if (add)
+			err = vzprivnet_add(net, m1, m2, weak);
+		else
+			err = vzprivnet_del(net);
+
+		if (err)
+			goto out;
+
+		s = nextline(s);
+	}
+out:
+	offset = s - page;
+	if (offset > 0)
+		err = offset;
+err:
+	free_page((unsigned long)page);
+	return err;
+}
+
+static void *vzprivnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	unsigned int n = *pos;
+
+	read_lock_bh(&vzprivlock);
+	if (n > 0) {
+		struct vzprivnet *p;
+
+		p = tree_first();
+		while (n-- && p)
+			p = tree_next(p);
+
+		return p;
+	}
+
+	return tree_first();
+}
+
+static void *vzprivnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	(*pos)++;
+
+	return tree_next(v);
+}
+
+static void vzprivnet_seq_stop(struct seq_file *s, void *v)
+{
+	read_unlock_bh(&vzprivlock);
+}
+
+static int vzprivnet_seq_show(struct seq_file *s, void *v)
+{
+	struct vzprivnet *p = v;
+
+	seq_printf(s, "%pI4/%u/%u", &p->netip,
+		   to_prefix(ntohl(p->netmask1)), to_prefix(ntohl(p->netmask2)));
+	if (p->weak)
+		seq_printf(s, "*\n");
+	else
+		seq_printf(s, "\n");
+	return 0;
+}
+
+static struct seq_operations vzprivnet_seq_ops = {
+	.start = vzprivnet_seq_start,
+	.next  = vzprivnet_seq_next,
+	.stop  = vzprivnet_seq_stop,
+	.show  = vzprivnet_seq_show,
+};
+
+static int vzprivnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &vzprivnet_seq_ops);
+}
+
+static struct file_operations proc_vzprivnet_ops = {
+	.owner   = THIS_MODULE,
+	.open    = vzprivnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+	.write   = vzpriv_write,
+};
+
+static int __init iptable_vzprivnet_init(void)
+{
+	int err;
+	struct proc_dir_entry *proc;
+
+	proc = proc_net_fops_create(&init_net, VZPRIV_PROCNAME, 0640, &proc_vzprivnet_ops);
+	if (!proc)
+		return -ENOMEM;
+
+	err = nf_register_hook(&vzprivnet_ops);
+	if (err)
+		proc_net_remove(&init_net, VZPRIV_PROCNAME);
+
+	return err;
+}
+
+static void __exit iptable_vzprivnet_exit(void)
+{
+	nf_unregister_hook(&vzprivnet_ops);
+	proc_net_remove(&init_net, VZPRIV_PROCNAME);
+	vzprivnet_cleanup();
+}
+
+module_init(iptable_vzprivnet_init)
+module_exit(iptable_vzprivnet_exit)


More information about the Devel mailing list