[Devel] [PATCH RHEL9 COMMIT] ve/net: restrict number of net devices for CT

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jan 27 20:08:52 MSK 2022


The commit is pushed to "branch-rh9-5.14.0-4.vz9.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.12.2
------>
commit 008ec0aa9522b9f63d500379162f1ea20b374d84
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Thu Jan 27 20:08:52 2022 +0300

    ve/net: restrict number of net devices for CT
    
    In default_device_exit_batch we call dev->rtnl_link_ops->dellink for
    each link on cleanup_net. So more links in CT means it's slower stop.
    On number of network(veth) interfaces ~10000, CT stop time is ~1min.
    
    Also kernel messages like "stop ploop26375 failed (holders=1)" might
    appear if a CT stops for too long.
    
    Make netif_avail_nr/netif_max_nr pair on ve cgroup to restrict number
    of interfaces in CT anologious to netns limit. With default 256 network
    devices (most of them are veth pairs) containter stop time will be ~3sec
    (~1sec for removing network interfaces).
    
    https://jira.sw.ru/browse/PSBM-51354
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    Acked-by: Andrew Vagin <avagin at virtuozzo.com>
    Feature: net: per-CT limit number of network devices
    
    ++++++
    ve/net: allow to change max number of net devices for running CT
    
    If a Container hits "ve::netif_max_nr", we want to fix the situation by
    increasing the limit without a Container restart, so allow this.
    
    Note: too low value set can turn "ve::netif_avail_nr" to negative,
    that's OK, the code is ready for that.
    
    https://jira.sw.ru/browse/PSBM-92132
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    ++++++
    ve/net: warn in case a Container hits ve::netif_max_nr
    
    Without a warning it's tricky to findout the reason of
    -ENOMEM on new network interface creation.
    
    Warning is printed both on host and inside the Container.
    
    https://jira.sw.ru/browse/PSBM-92132
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    Rebased to vz9 and merged
     - vz7 commit f3ca20f88d2d ("ve/net: restrict number of net devices for CT")
     - vz7 commit 727b6a3616f1 ("ve/net: allow to change max number of net devices for running CT")
     - vz7 commit 0925ea68bf56 ("ve/net: warn in case a Container hits ve::netif_max_nr")
    
    https://jira.sw.ru/browse/PSBM-130514
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 include/linux/ve.h |  5 +++++
 kernel/ve/ve.c     | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c     | 27 +++++++++++++++++++++++++--
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index dee08fbc34c0..5691bd624a30 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -72,6 +72,9 @@ struct ve_struct {
 	atomic_t		netns_avail_nr;
 	int			netns_max_nr;
 
+	atomic_t		netif_avail_nr;
+	int			netif_max_nr;
+
 	u64			_uevent_seqnum;
 
 	int			_randomize_va_space;
@@ -142,6 +145,8 @@ extern int nr_ve;
 
 #define NETNS_MAX_NR_DEFAULT	256	/* number of net-namespaces per-VE */
 
+#define NETIF_MAX_NR_DEFAULT	256	/* number of net-interfaces per-VE */
+
 #define capable_setveid() \
 	(ve_is_super(get_exec_env()) && capable(CAP_SYS_ADMIN))
 
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 557a14f216c4..95ab131f0363 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -70,6 +70,8 @@ struct ve_struct ve0 = {
 	.sched_lat_ve.cur	= &ve0_lat_stats,
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
+	.netif_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netif_max_nr		= INT_MAX,
 	.fsync_enable		= FSYNC_FILTERED,
 	._randomize_va_space	=
 #ifdef CONFIG_COMPAT_BRK
@@ -937,6 +939,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
 	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
 
+	atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
+	ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
+
 	err = ve_log_init(ve);
 	if (err)
 		goto err_log;
@@ -1321,6 +1326,35 @@ static u64 ve_netns_avail_nr_read(struct cgroup_subsys_state *css, struct cftype
 	return atomic_read(&css_to_ve(css)->netns_avail_nr);
 }
 
+static u64 ve_netif_max_nr_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return css_to_ve(css)->netif_max_nr;
+}
+
+static int ve_netif_max_nr_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	struct ve_struct *ve = css_to_ve(css);
+	int delta;
+
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	if (val > INT_MAX)
+		return -EOVERFLOW;
+
+	down_write(&ve->op_sem);
+	delta = val - ve->netif_max_nr;
+	ve->netif_max_nr = val;
+	atomic_add(delta, &ve->netif_avail_nr);
+	up_write(&ve->op_sem);
+	return 0;
+}
+
+static s64 ve_netif_avail_nr_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return atomic_read(&css_to_ve(css)->netif_avail_nr);
+}
+
 static int ve_os_release_read(struct seq_file *sf, void *v)
 {
 	struct cgroup_subsys_state *css = seq_css(sf);
@@ -1778,6 +1812,16 @@ static struct cftype ve_cftypes[] = {
 		.name			= "netns_avail_nr",
 		.read_u64		= ve_netns_avail_nr_read,
 	},
+	{
+		.name			= "netif_max_nr",
+		.flags			= CFTYPE_NOT_ON_ROOT,
+		.read_u64		= ve_netif_max_nr_read,
+		.write_u64		= ve_netif_max_nr_write,
+	},
+	{
+		.name			= "netif_avail_nr",
+		.read_s64		= ve_netif_avail_nr_read,
+	},
 	{
 		.name			= "os_release",
 		.max_write_len		= __NEW_UTS_LEN + 1,
diff --git a/net/core/dev.c b/net/core/dev.c
index b6dbbe7bf34b..df8bb05ad4a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10307,17 +10307,26 @@ int register_netdevice(struct net_device *dev)
 	if (ret)
 		return ret;
 
+	ret = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0) {
+		ve_pr_warn_ratelimited(VE_LOG_BOTH,
+			"CT%s: hits max number of network devices, "
+			"increase ve::netif_max_nr parameter\n",
+			net->owner_ve->ve_name);
+		return ret;
+	}
+
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
-		goto out;
+		goto out_inc;
 
 	ret = -ENOMEM;
 	dev->name_node = netdev_name_node_head_alloc(dev);
 	if (!dev->name_node)
-		goto out;
+		goto out_inc;
 
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
@@ -10448,6 +10457,8 @@ int register_netdevice(struct net_device *dev)
 		dev->priv_destructor(dev);
 err_free_name:
 	netdev_name_node_free(dev->name_node);
+out_inc:
+	atomic_inc(&net->owner_ve->netif_avail_nr);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -10689,6 +10700,8 @@ void netdev_run_todo(void)
 		if (dev->needs_free_netdev)
 			free_netdev(dev);
 
+		atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 		/* Report a network device has been unregistered */
 		rtnl_lock();
 		dev_net(dev)->dev_unreg_count--;
@@ -11255,6 +11268,16 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	if (new_ifindex && __dev_get_by_index(net, new_ifindex))
 		goto out;
 
+	err = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0) {
+		ve_pr_warn_ratelimited(VE_LOG_BOTH,
+			"CT%s: hits max number of network devices, "
+			"increase ve::netif_max_nr parameter\n",
+			net->owner_ve->ve_name);
+		goto out;
+	}
+	atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */


More information about the Devel mailing list