[Devel] [PATCH 7/7] ve/net/neighbour: per-ct limit for neighbour entries

Alexander Mikhalitsyn alexander.mikhalitsyn at virtuozzo.com
Wed Jun 2 18:59:36 MSK 2021


From: Vasily Averin <vvs at virtuozzo.com>

Based on patch from Konstantin Khoerenko <khorenko at virtuozzo.com>

Currently neighours do not have per-namespace structures and use
common hash, locks, counters and global limits.

Total number of neigh entries on a Node can easily exceed the
global gc_thresh3 on the Node. (We've got an Node with 67 active
Containers which required ~6000 active neighbour entries).

Let's introduce additional counters for neigh entries per Container and
check max limit against them. Global counters for neigh entries are left
as is, they will be used for periodical and forced cleanups as it works
now.

https://jira.sw.ru/browse/PSBM-87155

Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
Acked-by: Konstantin Khorenko <khorenko at virtuozzo.com>
(cherry picked from commit c75e42c15e7d3f73777de7c932c60ae3b96fa025)

VZ 8 rebase part https://jira.sw.ru/browse/PSBM-127837

Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
---
 include/linux/ve.h   |  2 ++
 kernel/ve/ve.c       |  4 ++++
 net/core/neighbour.c | 43 +++++++++++++++++++++++++++++++++++++------
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index 9c553ac96072..d0dcf6371717 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -85,6 +85,8 @@ struct ve_struct {
 	struct kthread_worker	umh_worker;
 	struct task_struct	*umh_task;
 
+	atomic_t		arp_neigh_nr;
+	atomic_t		nd_neigh_nr;
 	unsigned long		meminfo_val;
 
 	atomic_t		mnt_nr;	/* number of present VE mounts */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 2283e234af5f..43bb8d292233 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -73,6 +73,8 @@ struct ve_struct ve0 = {
 #else
 					2,
 #endif
+	.arp_neigh_nr		= ATOMIC_INIT(0),
+	.nd_neigh_nr		= ATOMIC_INIT(0),
 	.mnt_nr			= ATOMIC_INIT(0),
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
@@ -937,6 +939,8 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	INIT_LIST_HEAD(&ve->ve_list);
 	kmapset_init_key(&ve->sysfs_perms_key);
 
+	atomic_set(&ve->arp_neigh_nr, 0);
+	atomic_set(&ve->nd_neigh_nr, 0);
 	atomic_set(&ve->mnt_nr, 0);
 
 #ifdef CONFIG_COREDUMP
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 95090d1e8197..f3f790019ada 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -24,6 +24,7 @@
 #include <linux/socket.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
+#include <linux/ve.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
@@ -165,7 +166,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 	return false;
 }
 
-static int neigh_forced_gc(struct neigh_table *tbl)
+static int neigh_forced_gc(struct neigh_table *tbl, struct ve_struct *ve)
 {
 	int shrunk = 0;
 	int i;
@@ -183,13 +184,15 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 		np = &nht->hash_buckets[i];
 		while ((n = rcu_dereference_protected(*np,
 					lockdep_is_held(&tbl->lock))) != NULL) {
+			bool same_ve = (dev_net(n->dev)->owner_ve == ve);
 			/* Neighbour record may be discarded if:
 			 * - nobody refers to it.
 			 * - it is not permanent
 			 */
 			if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
 				      tbl)) {
-				shrunk = 1;
+				if (same_ve)
+					shrunk = 1;
 				continue;
 			}
 			np = &n->next;
@@ -322,17 +325,39 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 }
 EXPORT_SYMBOL(neigh_ifdown);
 
+static inline atomic_t *get_perve_tbl_entries_counter(struct neigh_table *tbl,
+							struct ve_struct *ve)
+{
+	switch (tbl->family) {
+	case AF_INET:
+		return &ve->arp_neigh_nr;
+	case AF_INET6:
+		return &ve->nd_neigh_nr;
+	}
+	return NULL;
+}
+
 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
-	int entries;
+	int entries, glob_entries;
+	atomic_t *cnt;
+	struct ve_struct *ve = dev_net(dev)->owner_ve;
+
+	/* If per-VE counter of neighbour entries exist
+	 * it will be limited by tbl->gc_thresh3
+	 * and according global counter (tbl->entries) become unlimited.
+	 */
+
+	glob_entries = atomic_inc_return(&tbl->entries) - 1;
+	cnt = get_perve_tbl_entries_counter(tbl, ve);
+	entries = cnt ? atomic_inc_return(cnt) - 1 : glob_entries;
 
-	entries = atomic_inc_return(&tbl->entries) - 1;
 	if (entries >= tbl->gc_thresh3 ||
-	    (entries >= tbl->gc_thresh2 &&
+	    (glob_entries >= tbl->gc_thresh2 &&
 	     time_after(now, tbl->last_flush + 5 * HZ))) {
-		if (!neigh_forced_gc(tbl) &&
+		if (!neigh_forced_gc(tbl, ve) &&
 		    entries >= tbl->gc_thresh3) {
 			net_info_ratelimited("%s: neighbor table overflow!\n",
 					     tbl->id);
@@ -364,6 +389,8 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
 
 out_entries:
 	atomic_dec(&tbl->entries);
+	if (cnt)
+		atomic_dec(cnt);
 	goto out;
 }
 
@@ -750,6 +777,8 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
 void neigh_destroy(struct neighbour *neigh)
 {
 	struct net_device *dev = neigh->dev;
+	struct ve_struct *ve = dev_net(dev)->owner_ve;
+	atomic_t *cnt = get_perve_tbl_entries_counter(neigh->tbl, ve);
 
 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 
@@ -775,6 +804,8 @@ void neigh_destroy(struct neighbour *neigh)
 
 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
 
+	if (cnt)
+		 atomic_dec(cnt);
 	atomic_dec(&neigh->tbl->entries);
 	kfree_rcu(neigh, rcu);
 }
-- 
2.28.0



More information about the Devel mailing list