[Devel] [PATCH RH7 2/2] ve/net: restrict number of net devices for CT

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Mon Mar 20 08:09:34 PDT 2017


Description from Solar:
"Adding thousands of network interfaces (aliases) in a container causes
moderately slow container stop times (1 minute observed) with many
kernel messages like "stop ploop26375 failed (holders=1)" appearing.
A safety limit on the number of network interfaces in a container
should be introduced by default, especially considering that there
might be (or might be introduced later) ways to add network interfaces
faster (thereby making the attack more powerful and quicker to carry
out)."

In default_device_exit_batch we call dev->rtnl_link_ops->dellink for
each link on cleanup_net. So more links in CT means it's slower stop.
On number of network(veth) interfaces ~10000, CT stop time is ~1min.

Make netif_avail_nr/netif_max_nr pair on ve cgroup to restrict number
of interfaces in CT anologious to netns limit. With default 256 network
devices (most of them are veth pairs) containter stop time will be ~3sec
(~1sec for removing network interfaces).

https://jira.sw.ru/browse/PSBM-51354
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 include/linux/ve.h |  3 +++
 kernel/ve/ve.c     | 28 ++++++++++++++++++++++++++++
 net/core/dev.c     | 17 +++++++++++++++--
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index edff7e4..708c6d3 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -120,6 +120,8 @@ struct ve_struct {
 #endif
 	atomic_t		netns_avail_nr;
 	int			netns_max_nr;
+	atomic_t		netif_avail_nr;
+	int			netif_max_nr;
 	/* Number of mounts. May become unbalanced if VE0 mounts something
 	 * and the VE unmounts it. This is acceptable.
 	 */
@@ -138,6 +140,7 @@ struct ve_devmnt {
 };
 
 #define NETNS_MAX_NR_DEFAULT	256	/* number of net-namespaces per-VE */
+#define NETIF_MAX_NR_DEFAULT	256	/* number of net-interfaces per-VE */
 
 #define VE_MEMINFO_DEFAULT      1       /* default behaviour */
 #define VE_MEMINFO_SYSTEM       0       /* disable meminfo virtualization */
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 67b7882..a17b048 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -81,6 +81,8 @@ struct ve_struct ve0 = {
 	.mnt_nr			= 0,
 	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
 	.netns_max_nr		= INT_MAX,
+	.netif_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netif_max_nr		= INT_MAX,
 };
 EXPORT_SYMBOL(ve0);
 
@@ -653,6 +655,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
 	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
 	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
 
+	atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
+	ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
+
 do_init:
 	init_rwsem(&ve->op_sem);
 	INIT_LIST_HEAD(&ve->devices);
@@ -1191,6 +1196,8 @@ enum {
 	VE_CF_PID_MAX,
 	VE_CF_NETNS_MAX_NR,
 	VE_CF_NETNS_NR,
+	VE_CF_NETIF_MAX_NR,
+	VE_CF_NETIF_NR,
 };
 
 static int ve_ts_read(struct cgroup *cg, struct cftype *cft, struct seq_file *m)
@@ -1260,6 +1267,10 @@ static u64 ve_read_u64(struct cgroup *cg, struct cftype *cft)
 		return cgroup_ve(cg)->netns_max_nr;
 	else if (cft->private == VE_CF_NETNS_NR)
 		return atomic_read(&cgroup_ve(cg)->netns_avail_nr);
+	else if (cft->private == VE_CF_NETIF_MAX_NR)
+		return cgroup_ve(cg)->netif_max_nr;
+	else if (cft->private == VE_CF_NETIF_NR)
+		return atomic_read(&cgroup_ve(cg)->netif_avail_nr);
 	return 0;
 }
 
@@ -1346,6 +1357,11 @@ static int _ve_write_u64(struct cgroup *cg, struct cftype *cft,
 
 		ve->netns_max_nr = value;
 		atomic_add(delta, &ve->netns_avail_nr);
+	} else if (cft->private == VE_CF_NETIF_MAX_NR) {
+		int delta = value - ve->netif_max_nr;
+
+		ve->netif_max_nr = value;
+		atomic_add(delta, &ve->netif_avail_nr);
 	}
 	up_write(&ve->op_sem);
 	return 0;
@@ -1451,6 +1467,18 @@ static struct cftype ve_cftypes[] = {
 		.read_u64		= ve_read_u64,
 		.private		= VE_CF_NETNS_NR,
 	},
+	{
+		.name			= "netif_max_nr",
+		.flags			= CFTYPE_NOT_ON_ROOT,
+		.read_u64		= ve_read_u64,
+		.write_u64		= ve_write_u64,
+		.private		= VE_CF_NETIF_MAX_NR,
+	},
+	{
+		.name			= "netif_avail_nr",
+		.read_u64		= ve_read_u64,
+		.private		= VE_CF_NETIF_NR,
+	},
 	{ }
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 40e5a97..12de79c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6605,6 +6605,10 @@ int register_netdevice(struct net_device *dev)
 	if (!ve_is_super(net->owner_ve) && ve_is_dev_movable(dev))
 		goto out;
 
+	ret = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
+		goto out;
+
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
 
@@ -6612,7 +6616,7 @@ int register_netdevice(struct net_device *dev)
 
 	ret = dev_get_valid_name(net, dev, dev->name);
 	if (ret < 0)
-		goto out;
+		goto err_avail;
 
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
@@ -6620,7 +6624,7 @@ int register_netdevice(struct net_device *dev)
 		if (ret) {
 			if (ret > 0)
 				ret = -EIO;
-			goto out;
+			goto err_avail;
 		}
 	}
 
@@ -6734,6 +6738,8 @@ int register_netdevice(struct net_device *dev)
 err_uninit:
 	if (dev->netdev_ops->ndo_uninit)
 		dev->netdev_ops->ndo_uninit(dev);
+err_avail:
+	atomic_inc(&net->owner_ve->netif_avail_nr);
 	goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -7011,6 +7017,8 @@ void netdev_run_todo(void)
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
 
+		atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 		/* It must be the very last action,
 		 * after this 'dev' may point to freed up memory.
 		 */
@@ -7415,6 +7423,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 			goto out;
 	}
 
+	err = -ENOMEM;
+	if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
+		goto out;
+	atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
+
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */
-- 
2.9.3



More information about the Devel mailing list