[Devel] [PATCH RHEL7 COMMIT] vzprivnet6: Switch to radix tree
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Mar 24 08:53:41 PDT 2016
The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.10.1.vz7.12.3
------>
commit 0974dabfa3975674cf127e6e4a2569b672ff2af6
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date: Thu Mar 24 19:53:41 2016 +0400
vzprivnet6: Switch to radix tree
Port diff-vzprivnet6-switch-to-radix-tree
VZPIVNET6: insert radix tree
Inserted simplified radix tree, taken from ipv6 routing.
This tree is now used to store vzprivnets.
Also all storage structures explicitely renamed to sparse6* since legacy
mode
will be added soon.
https://jira.sw.ru/browse/PCLIN-28951
Ported from rhel5
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
net/ipv6/netfilter/ip6_vzprivnet.c | 277 ++++++++++++++++++++++++++++++++++---
1 file changed, 257 insertions(+), 20 deletions(-)
diff --git a/net/ipv6/netfilter/ip6_vzprivnet.c b/net/ipv6/netfilter/ip6_vzprivnet.c
index 5999d6e..7c35bfd 100644
--- a/net/ipv6/netfilter/ip6_vzprivnet.c
+++ b/net/ipv6/netfilter/ip6_vzprivnet.c
@@ -17,15 +17,39 @@ struct vzprivnet {
struct list_head entries;
};
-static LIST_HEAD(vzprivnets);
+static LIST_HEAD(sparse6_vzprivnets);
struct vzprivnet_entry {
__u32 ip[4];
unsigned preflen;
struct vzprivnet *pn;
+ struct vzprivnet6_node *n;
struct list_head list;
};
+struct vzprivnet6_node
+{
+ struct vzprivnet6_node *parent;
+ struct vzprivnet6_node *left;
+ struct vzprivnet6_node *right;
+
+ struct vzprivnet_entry *entry;
+
+ __u16 fn_bit; /* bit key */
+ __u16 fn_flags;
+};
+
+#define RTN_RTINFO 1
+
+static struct vzprivnet_entry sparse6_null_entry = {
+ .preflen = 128,
+};
+
+static struct vzprivnet6_node sparse6_root_node = {
+ .entry = &sparse6_null_entry,
+ .fn_flags = RTN_RTINFO,
+};
+
static inline int ip6_match(u32 *net, unsigned plen, u32 *ip)
{
return ipv6_prefix_equal((const struct in6_addr *)net, (const struct in6_addr *)ip, plen);
@@ -36,8 +60,48 @@ static inline int ip6_intersect(u32 *ip1, unsigned len1, u32 *ip2, unsigned len2
return ip6_match(ip1, len1, ip2) || ip6_match(ip2, len2, ip1);
}
-static struct vzprivnet_entry *vzprivnet6_lookup(u32 *ip)
+static __inline__ int addr_bit_set(void *ip, int fn_bit)
{
+ __u32 *addr = ip;
+
+ return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+}
+
+static __inline__ void vzprivnet6_node_free(struct vzprivnet6_node * fn)
+{
+ kfree(fn);
+}
+
+static __inline__ struct vzprivnet6_node * vzprivnet6_node_alloc(void)
+{
+ return kzalloc(sizeof(struct vzprivnet6_node), GFP_ATOMIC);
+}
+
+static struct vzprivnet6_node * radix_tree_search(struct vzprivnet6_node *root,
+ struct in6_addr *addr)
+{
+ struct vzprivnet6_node *fn;
+ int dir;
+
+ fn = root;
+
+ for (;;) {
+ struct vzprivnet6_node *next;
+
+ dir = addr_bit_set(addr, fn->fn_bit);
+
+ next = dir ? fn->right : fn->left;
+ if (next) {
+ fn = next;
+ continue;
+ }
+
+ break;
+ }
+
+ if (ip6_match(fn->entry->ip, fn->entry->preflen, (u32 *)addr))
+ return fn;
+
return NULL;
}
@@ -45,11 +109,20 @@ struct vzprivnet internet = {
.weak = VZPRIVNET_INET,
};
+static struct vzprivnet_entry *vzprivnet6_lookup(struct vzprivnet6_node *root,
+ u32 *ip)
+{
+ struct vzprivnet6_node *n;
+
+ n = radix_tree_search(root, (struct in6_addr *)ip);
+ return (n) ? n->entry : NULL;
+}
+
static inline struct vzprivnet *vzprivnet6_lookup_net(u32 *ip)
{
struct vzprivnet_entry *pne;
- pne = vzprivnet6_lookup(ip);
+ pne = vzprivnet6_lookup(&sparse6_root_node, ip);
if (pne != NULL)
return pne->pn;
else
@@ -61,11 +134,120 @@ static inline int noip(u32 *ip)
return (ip[0] | ip[1] | ip[2] | ip[3]) == 0;
}
+static struct vzprivnet6_node * radix_tree_add(void *addr, unsigned plen,
+ struct vzprivnet6_node *root)
+{
+ struct vzprivnet6_node *fn, *in, *ln;
+ struct vzprivnet6_node *pn = NULL;
+ struct vzprivnet_entry *pne = NULL;
+ int bit;
+ int dir = 0;
+
+ /* insert node in tree */
+
+ fn = root;
+
+ do {
+ pne = fn->entry;
+ if (ip6_intersect(pne->ip, pne->preflen, (u32 *)addr, plen))
+ return ERR_PTR(-EEXIST);
+
+ /*
+ * Prefix match
+ */
+ if (plen < fn->fn_bit ||
+ !ipv6_prefix_equal((struct in6_addr *)pne->ip, addr, fn->fn_bit))
+ goto insert_intermediate_node;
+
+ dir = addr_bit_set(addr, fn->fn_bit);
+ pn = fn;
+ fn = dir ? fn->right : fn->left;
+ } while (fn);
+
+ /*
+ * We walked to the bottom of tree.
+ * Create new leaf node without children.
+ */
+
+ ln = vzprivnet6_node_alloc();
+ if (ln == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ ln->fn_bit = plen;
+ ln->parent = pn;
+
+ if (dir)
+ pn->right = ln;
+ else
+ pn->left = ln;
+
+ return ln;
+
+insert_intermediate_node:
+
+ pn = fn->parent;
+
+ bit = ipv6_addr_diff(addr, (struct in6_addr *)pne->ip);
+
+ BUG_ON(plen <= bit);
+
+ /*
+ * (intermediate)[in]
+ * / \
+ * (new leaf node)[ln] (old node)[fn]
+ */
+ in = vzprivnet6_node_alloc();
+ ln = vzprivnet6_node_alloc();
+
+ if (in == NULL || ln == NULL) {
+ if (in)
+ vzprivnet6_node_free(in);
+ if (ln)
+ vzprivnet6_node_free(ln);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /*
+ * new intermediate node.
+ * RTN_RTINFO will be off
+ */
+
+ in->fn_bit = bit;
+
+ in->parent = pn;
+
+ /* update parent pointer */
+ if (dir)
+ pn->right = in;
+ else
+ pn->left = in;
+
+ ln->fn_bit = plen;
+
+ ln->parent = in;
+ fn->parent = in;
+
+ if (addr_bit_set(addr, bit)) {
+ in->right = ln;
+ in->left = fn;
+ } else {
+ in->left = ln;
+ in->right = fn;
+ }
+
+ return ln;
+}
+
+static struct vzprivnet6_node * sparse6_add_subnet(void *addr, unsigned plen)
+{
+ return radix_tree_add(addr, plen, &sparse6_root_node);
+}
+
static int sparse6_add(unsigned netid, u32 *ip, unsigned preflen, int weak)
{
int err;
struct vzprivnet *pn = NULL, *epn = NULL;
- struct vzprivnet_entry *pne = NULL, *tmp;
+ struct vzprivnet_entry *pne = NULL;
err = -ENOMEM;
pn = kzalloc(sizeof(*pn), GFP_KERNEL);
@@ -77,7 +259,7 @@ static int sparse6_add(unsigned netid, u32 *ip, unsigned preflen, int weak)
goto out;
write_lock_bh(&vzpriv6lock);
- list_for_each_entry(epn, &vzprivnets, list)
+ list_for_each_entry(epn, &sparse6_vzprivnets, list)
if (epn->netid == netid) {
kfree(pn);
pn = epn;
@@ -90,15 +272,22 @@ static int sparse6_add(unsigned netid, u32 *ip, unsigned preflen, int weak)
found_net:
if (!noip(ip)) {
- err = -EEXIST;
- list_for_each_entry(tmp, &pn->entries, list)
- if (ip6_intersect(ip, preflen, tmp->ip, tmp->preflen))
- goto out_unlock;
+ struct vzprivnet6_node *n;
+
+ n = sparse6_add_subnet(ip, preflen);
+ if (IS_ERR(n)) {
+ err = PTR_ERR(n);
+ goto out_unlock;
+ }
+
+ n->entry = pne;
+ n->fn_flags |= RTN_RTINFO;
memcpy(pne->ip, ip, sizeof(pne->ip));
pne->preflen = preflen;
pne->pn = pn;
list_add_tail(&pne->list, &pn->entries);
+ pne->n = n;
pne = NULL;
} else if (weak == VZPRIVNET_WEAK) {
pn->weak = VZPRIVNET_WEAK;
@@ -108,7 +297,7 @@ found_net:
}
if (pn != epn) {
- list_add_tail(&pn->list, &vzprivnets);
+ list_add_tail(&pn->list, &sparse6_vzprivnets);
pn = NULL;
}
@@ -124,13 +313,61 @@ out:
return err;
}
+static void radix_tree_del(struct vzprivnet6_node *fn)
+{
+ int children;
+ struct vzprivnet6_node *child, *pn;
+
+ BUG_ON(fn->parent == NULL);
+
+ for (;;) {
+ children = 0;
+ child = NULL;
+
+ if (fn->right) {
+ child = fn->right;
+ children |= 1;
+ }
+ if (fn->left) {
+ child = fn->left;
+ children |= 2;
+ }
+
+ if (children == 3)
+ return;
+
+ pn = fn->parent;
+ if (pn->right == fn)
+ pn->right = child;
+ else if (pn->left == fn)
+ pn->left = child;
+
+ if (child)
+ child->parent = pn;
+
+ vzprivnet6_node_free(fn);
+ if (pn->fn_flags & RTN_RTINFO)
+ return;
+
+ fn = pn;
+ }
+}
+
+
+static void vzprivnet6_del_entry(struct vzprivnet_entry *pne)
+{
+ radix_tree_del(pne->n);
+}
+
+
static void sparse6_free_entry(struct vzprivnet_entry *pne)
{
list_del(&pne->list);
+ vzprivnet6_del_entry(pne);
kfree(pne);
}
-static void sparse6_del_one(struct vzprivnet *pn)
+static void vzprivnet6_del_one(struct vzprivnet *pn)
{
struct vzprivnet_entry *pne;
@@ -150,10 +387,10 @@ static void vzprivnet6_cleanup(void)
struct vzprivnet *pn;
write_lock_bh(&vzpriv6lock);
- while (!list_empty(&vzprivnets)) {
- pn = list_first_entry(&vzprivnets,
+ while (!list_empty(&sparse6_vzprivnets)) {
+ pn = list_first_entry(&sparse6_vzprivnets,
struct vzprivnet, list);
- sparse6_del_one(pn);
+ vzprivnet6_del_one(pn);
}
write_unlock_bh(&vzpriv6lock);
}
@@ -162,14 +399,14 @@ static int sparse6_del_net(unsigned netid, int weak)
{
struct vzprivnet *pn;
- list_for_each_entry(pn, &vzprivnets, list) {
+ list_for_each_entry(pn, &sparse6_vzprivnets, list) {
if (pn->netid != netid)
continue;
if (weak == VZPRIVNET_WEAK)
pn->weak = VZPRIVNET_STRONG;
else
- sparse6_del_one(pn);
+ vzprivnet6_del_one(pn);
return 0;
}
@@ -181,7 +418,7 @@ static int sparse6_del_ip(u32 *ip)
{
struct vzprivnet_entry *pne;
- pne = vzprivnet6_lookup(ip);
+ pne = vzprivnet6_lookup(&sparse6_root_node, ip);
if (pne == NULL)
return -ENOENT;
@@ -454,7 +691,7 @@ static void *sparse6_seq_start(struct seq_file *seq, loff_t *ppos)
loff_t pos = *ppos;
read_lock(&vzpriv6lock);
- list_for_each(lh, &vzprivnets)
+ list_for_each(lh, &sparse6_vzprivnets)
if (pos-- == 0)
return lh;
@@ -467,7 +704,7 @@ static void *sparse6_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
lh = ((struct list_head *)v)->next;
++*ppos;
- return lh == &vzprivnets ? NULL : lh;
+ return lh == &sparse6_vzprivnets ? NULL : lh;
}
static void sparse6_seq_stop(struct seq_file *s, void *v)
@@ -550,7 +787,7 @@ static int classify6_seq_show(struct seq_file *s, void *v)
}
read_lock(&vzpriv6lock);
- pne = vzprivnet6_lookup(ip);
+ pne = vzprivnet6_lookup(&sparse6_root_node, ip);
if (pne == NULL) {
seq_printf(s, "internet\n");
goto out;
More information about the Devel
mailing list