[Devel] [PATCH RHEL9 COMMIT] ve/net/neighbour: per-ct limit for neighbour entries

Konstantin Khorenko khorenko at virtuozzo.com
Wed Oct 20 11:40:34 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.12
------>
commit 9e3e62bfa850c01dccefc14cf4e2694b9d92e6eb
Author: Vasily Averin <vvs at virtuozzo.com>
Date:   Wed Oct 20 11:40:34 2021 +0300

    ve/net/neighbour: per-ct limit for neighbour entries
    
    Based on patch from Konstantin Khoerenko <khorenko at virtuozzo.com>
    
    Currently neighours do not have per-namespace structures and use
    common hash, locks, counters and global limits.
    
    Total number of neigh entries on a Node can easily exceed the
    global gc_thresh3 on the Node. (We've got an Node with 67 active
    Containers which required ~6000 active neighbour entries).
    
    Let's introduce additional counters for neigh entries per Container and
    check max limit against them. Global counters for neigh entries are left
    as is, they will be used for periodical and forced cleanups as it works
    now.
    
    https://jira.sw.ru/browse/PSBM-87155
    
    Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
    
    Acked-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    (cherry picked from vz7 commit c75e42c15e7d ("ve/net/neighbour: per-ct limit for
    neighbour entries"))
    
    VZ 8 rebase part https://jira.sw.ru/browse/PSBM-127837
    
    Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
    
    (cherry picked from vz8 commit 690ab3c461b97672274252713a7f5684f3f86bfd)
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 include/linux/ve.h   |  2 ++
 kernel/ve/ve.c       |  4 ++++
 net/core/neighbour.c | 46 +++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index d3499853e6dd..60947c200e8d 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -76,6 +76,8 @@ struct ve_struct {
 
 	int			_randomize_va_space;
 
+	atomic_t		arp_neigh_nr;
+	atomic_t		nd_neigh_nr;
 	unsigned long		meminfo_val;
 
 	atomic_t		mnt_nr; /* number of present VE mounts */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 847179044066..2f4b746f39d5 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -68,6 +68,8 @@ struct ve_struct ve0 = {
 					2,
 #endif
 
+	.arp_neigh_nr		= ATOMIC_INIT(0),
+	.nd_neigh_nr		= ATOMIC_INIT(0),
 	.mnt_nr			= ATOMIC_INIT(0),
 	.meminfo_val		= VE_MEMINFO_SYSTEM,
 	.vdso_64		= (struct vdso_image*)&vdso_image_64,
@@ -720,6 +722,8 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	INIT_LIST_HEAD(&ve->ve_list);
 	kmapset_init_key(&ve->sysfs_perms_key);
 
+	atomic_set(&ve->arp_neigh_nr, 0);
+	atomic_set(&ve->nd_neigh_nr, 0);
 	atomic_set(&ve->mnt_nr, 0);
 
 #ifdef CONFIG_COREDUMP
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5941bb6f2367..dc801734c83b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -39,6 +39,7 @@
 #include <linux/log2.h>
 #include <linux/inetdevice.h>
 #include <net/addrconf.h>
+#include <linux/ve.h>
 
 #include <trace/events/neigh.h>
 
@@ -222,7 +223,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 	return false;
 }
 
-static int neigh_forced_gc(struct neigh_table *tbl)
+static int neigh_forced_gc(struct neigh_table *tbl, struct ve_struct *ve)
 {
 	int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
 	unsigned long tref = jiffies - 5 * HZ;
@@ -236,6 +237,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
 		if (refcount_read(&n->refcnt) == 1) {
 			bool remove = false;
+			bool same_ve = (dev_net(n->dev)->owner_ve == ve);
 
 			write_lock(&n->lock);
 			if ((n->nud_state == NUD_FAILED) ||
@@ -247,7 +249,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 			write_unlock(&n->lock);
 
 			if (remove && neigh_remove_one(n, tbl))
-				shrunk++;
+				if (same_ve)
+					shrunk++;
 			if (shrunk >= max_clean)
 				break;
 		}
@@ -378,22 +381,44 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 }
 EXPORT_SYMBOL(neigh_ifdown);
 
+static inline atomic_t *get_perve_tbl_entries_counter(struct neigh_table *tbl,
+							struct ve_struct *ve)
+{
+	switch (tbl->family) {
+	case AF_INET:
+		return &ve->arp_neigh_nr;
+	case AF_INET6:
+		return &ve->nd_neigh_nr;
+	}
+	return NULL;
+}
+
 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 				     struct net_device *dev,
 				     bool exempt_from_gc)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
-	int entries;
+	int entries, glob_entries;
+	atomic_t *cnt;
+	struct ve_struct *ve = dev_net(dev)->owner_ve;
 
 	if (exempt_from_gc)
 		goto do_alloc;
 
-	entries = atomic_inc_return(&tbl->gc_entries) - 1;
+	/* If per-VE counter of neighbour entries exist
+	 * it will be limited by tbl->gc_thresh3
+	 * and according global counter (tbl->entries) become unlimited.
+	 */
+
+	glob_entries = atomic_inc_return(&tbl->entries) - 1;
+	cnt = get_perve_tbl_entries_counter(tbl, ve);
+	entries = cnt ? atomic_inc_return(cnt) - 1 : glob_entries;
+
 	if (entries >= tbl->gc_thresh3 ||
-	    (entries >= tbl->gc_thresh2 &&
+	    (glob_entries >= tbl->gc_thresh2 &&
 	     time_after(now, tbl->last_flush + 5 * HZ))) {
-		if (!neigh_forced_gc(tbl) &&
+		if (!neigh_forced_gc(tbl, ve) &&
 		    entries >= tbl->gc_thresh3) {
 			net_info_ratelimited("%s: neighbor table overflow!\n",
 					     tbl->id);
@@ -428,8 +453,11 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl,
 	return n;
 
 out_entries:
-	if (!exempt_from_gc)
+	if (!exempt_from_gc) {
 		atomic_dec(&tbl->gc_entries);
+		if (cnt)
+			atomic_dec(cnt);
+	}
 	goto out;
 }
 
@@ -835,6 +863,8 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
 void neigh_destroy(struct neighbour *neigh)
 {
 	struct net_device *dev = neigh->dev;
+	struct ve_struct *ve = dev_net(dev)->owner_ve;
+	atomic_t *cnt = get_perve_tbl_entries_counter(neigh->tbl, ve);
 
 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 
@@ -860,6 +890,8 @@ void neigh_destroy(struct neighbour *neigh)
 
 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
 
+	if (cnt)
+		 atomic_dec(cnt);
 	atomic_dec(&neigh->tbl->entries);
 	kfree_rcu(neigh, rcu);
 }


More information about the Devel mailing list