[Devel] [PATCH RH9] ve/net: restrict number of net devices for CT

Andrey Zhadchenko andrey.zhadchenko at virtuozzo.com
Wed Jan 26 16:01:37 MSK 2022


From: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

In default_device_exit_batch we call dev->rtnl_link_ops->dellink for
each link on cleanup_net. So more links in CT means it's slower stop.
On number of network(veth) interfaces ~10000, CT stop time is ~1min.

Also kernel messages like "stop ploop26375 failed (holders=1)" might
appear if a CT stops for too long.

Make netif_avail_nr/netif_max_nr pair on ve cgroup to restrict number
of interfaces in CT anologious to netns limit. With default 256 network
devices (most of them are veth pairs) containter stop time will be ~3sec
(~1sec for removing network interfaces).

https://jira.sw.ru/browse/PSBM-51354

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Acked-by: Andrew Vagin <avagin at virtuozzo.com>

++++++
ve/net: allow to change max number of net devices for running CT

If a Container hits "ve::netif_max_nr", we want to fix the situation by
increasing the limit without a Container restart, so allow this.

Note: too low value set can turn "ve::netif_avail_nr" to negative,
that's OK, the code is ready for that.

https://jira.sw.ru/browse/PSBM-92132

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

++++++
ve/net: warn in case a Container hits ve::netif_max_nr

Without a warning it's tricky to findout the reason of
-ENOMEM on new network interface creation.

Warning is printed both on host and inside the Container.

https://jira.sw.ru/browse/PSBM-92132

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>

Rebased to vz9 and merged
 - vz7 commit f3ca20f88d2d ("ve/net: restrict number of net devices for CT")
 - vz7 commit 727b6a3616f1 ("ve/net: allow to change max number of net devices for running CT")
 - vz7 commit 0925ea68bf56 ("ve/net: warn in case a Container hits ve::netif_max_nr")

https://jira.sw.ru/browse/PSBM-130514
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 include/linux/ve.h |  6 ++++++
 kernel/ve/ve.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c     | 27 +++++++++++++++++++++++++--
 3 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index 4c8f7d308829..5be8b8c5fc43 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -72,6 +72,9 @@ struct ve_struct {
 	atomic_t		netns_avail_nr;
 	int			netns_max_nr;
 
+	atomic_t		netif_avail_nr;
+	int			netif_max_nr;
+
 	u64			_uevent_seqnum;
 
 	int			_randomize_va_space;
@@ -142,6 +146,8 @@ extern int nr_ve;
 
 #define NETNS_MAX_NR_DEFAULT	256	/* number of net-namespaces per-VE */
 
+#define NETIF_MAX_NR_DEFAULT	256	/* number of net-interfaces per-VE */
+
 #define capable_setveid() \
 	(ve_is_super(get_exec_env()) && capable(CAP_SYS_ADMIN))
 
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 557a14f216c4..95ab131f0363 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -70,6 +70,8 @@ struct ve_struct ve0 = {
 	.sched_lat_ve.cur	= &ve0_lat_stats,
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
+	.netif_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netif_max_nr		= INT_MAX,
 	.fsync_enable		= FSYNC_FILTERED,
 	._randomize_va_space	=
 #ifdef CONFIG_COMPAT_BRK
@@ -937,6 +939,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
 	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
 
+	atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
+	ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
+
 	err = ve_log_init(ve);
 	if (err)
 		goto err_log;
@@ -1321,6 +1326,35 @@ static u64 ve_netns_avail_nr_read(struct cgroup_subsys_state *css, struct cftype
 	return atomic_read(&css_to_ve(css)->netns_avail_nr);
 }
 
+static u64 ve_netif_max_nr_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return css_to_ve(css)->netif_max_nr;
+}
+
+static int ve_netif_max_nr_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	struct ve_struct *ve = css_to_ve(css);
+	int delta;
+
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	if (val > INT_MAX)
+		return -EOVERFLOW;
+
+	down_write(&ve->op_sem);
+	delta = val - ve->netif_max_nr;
+	ve->netif_max_nr = val;
+	atomic_add(delta, &ve->netif_avail_nr);
+	up_write(&ve->op_sem);
+	return 0;
+}
+
+static s64 ve_netif_avail_nr_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return atomic_read(&css_to_ve(css)->netif_avail_nr);
+}
+
 static int ve_os_release_read(struct seq_file *sf, void *v)
 {
 	struct cgroup_subsys_state *css = seq_css(sf);
@@ -1778,6 +1812,16 @@ static struct cftype ve_cftypes[] = {
 		.name			= "netns_avail_nr",
 		.read_u64		= ve_netns_avail_nr_read,
 	},
+	{
+		.name			= "netif_max_nr",
+		.flags			= CFTYPE_NOT_ON_ROOT,
+		.read_u64		= ve_netif_max_nr_read,
+		.write_u64		= ve_netif_max_nr_write,
+	},
+	{
+		.name			= "netif_avail_nr",
+		.read_s64		= ve_netif_avail_nr_read,
+	},
 	{
 		.name			= "os_release",
 		.max_write_len		= __NEW_UTS_LEN + 1,
diff --git a/net/core/dev.c b/net/core/dev.c
index b6dbbe7bf34b..4b357cd42808 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10307,17 +10307,26 @@ int register_netdevice(struct net_device *dev)
 	if (ret)
 		return ret;
 
+	ret = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0) {
+		ve_pr_warn_ratelimited(VE_LOG_BOTH,
+			"CT%s: hits max number of network devices, "
+			"increase ve::netif_max_nr parameter\n",
+			net->owner_ve->ve_name);
+		return ret;
+	}
+
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
-		goto out;
+		goto out_inc;
 
 	ret = -ENOMEM;
 	dev->name_node = netdev_name_node_head_alloc(dev);
 	if (!dev->name_node)
-		goto out;
+		goto out_inc;
 
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
@@ -10448,6 +10457,8 @@ int register_netdevice(struct net_device *dev)
 		dev->priv_destructor(dev);
 err_free_name:
 	netdev_name_node_free(dev->name_node);
+out_inc:
+	atomic_inc(&net->owner_ve->netif_avail_nr);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -10689,6 +10700,8 @@ void netdev_run_todo(void)
 		if (dev->needs_free_netdev)
 			free_netdev(dev);
 
+		atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 		/* Report a network device has been unregistered */
 		rtnl_lock();
 		dev_net(dev)->dev_unreg_count--;
@@ -11255,6 +11268,16 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	if (new_ifindex && __dev_get_by_index(net, new_ifindex))
 		goto out;
 
+	err = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0) {
+		ve_pr_warn_ratelimited(VE_LOG_BOTH,
+			"CT%s: hits max number of network devices, "
+			"increase ve::netif_max_nr parameter\n",
+			net->owner_ve->ve_name);
+		goto out;
+	}
+	atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */
-- 
2.35.0.rc2



More information about the Devel mailing list