[Devel] [PATCH RH7 2/2] ve/net: restrict number of net devices for CT
Andrew Vagin
avagin at virtuozzo.com
Tue Mar 28 15:06:55 PDT 2017
Acked-by: Andrew Vagin <avagin at virtuozzo.com>
On Mon, Mar 20, 2017 at 06:09:34PM +0300, Pavel Tikhomirov wrote:
> Description from Solar:
> "Adding thousands of network interfaces (aliases) in a container causes
> moderately slow container stop times (1 minute observed) with many
> kernel messages like "stop ploop26375 failed (holders=1)" appearing.
> A safety limit on the number of network interfaces in a container
> should be introduced by default, especially considering that there
> might be (or might be introduced later) ways to add network interfaces
> faster (thereby making the attack more powerful and quicker to carry
> out)."
>
> In default_device_exit_batch we call dev->rtnl_link_ops->dellink for
> each link on cleanup_net. So more links in CT means it's slower stop.
> On number of network(veth) interfaces ~10000, CT stop time is ~1min.
>
> Make netif_avail_nr/netif_max_nr pair on ve cgroup to restrict number
> of interfaces in CT anologious to netns limit. With default 256 network
> devices (most of them are veth pairs) containter stop time will be ~3sec
> (~1sec for removing network interfaces).
>
> https://jira.sw.ru/browse/PSBM-51354
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
> include/linux/ve.h | 3 +++
> kernel/ve/ve.c | 28 ++++++++++++++++++++++++++++
> net/core/dev.c | 17 +++++++++++++++--
> 3 files changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index edff7e4..708c6d3 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -120,6 +120,8 @@ struct ve_struct {
> #endif
> atomic_t netns_avail_nr;
> int netns_max_nr;
> + atomic_t netif_avail_nr;
> + int netif_max_nr;
> /* Number of mounts. May become unbalanced if VE0 mounts something
> * and the VE unmounts it. This is acceptable.
> */
> @@ -138,6 +140,7 @@ struct ve_devmnt {
> };
>
> #define NETNS_MAX_NR_DEFAULT 256 /* number of net-namespaces per-VE */
> +#define NETIF_MAX_NR_DEFAULT 256 /* number of net-interfaces per-VE */
>
> #define VE_MEMINFO_DEFAULT 1 /* default behaviour */
> #define VE_MEMINFO_SYSTEM 0 /* disable meminfo virtualization */
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 67b7882..a17b048 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -81,6 +81,8 @@ struct ve_struct ve0 = {
> .mnt_nr = 0,
> .netns_avail_nr = ATOMIC_INIT(INT_MAX),
> .netns_max_nr = INT_MAX,
> + .netif_avail_nr = ATOMIC_INIT(INT_MAX),
> + .netif_max_nr = INT_MAX,
> };
> EXPORT_SYMBOL(ve0);
>
> @@ -653,6 +655,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
> atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
> ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
>
> + atomic_set(&ve->netif_avail_nr, NETIF_MAX_NR_DEFAULT);
> + ve->netif_max_nr = NETIF_MAX_NR_DEFAULT;
> +
> do_init:
> init_rwsem(&ve->op_sem);
> INIT_LIST_HEAD(&ve->devices);
> @@ -1191,6 +1196,8 @@ enum {
> VE_CF_PID_MAX,
> VE_CF_NETNS_MAX_NR,
> VE_CF_NETNS_NR,
> + VE_CF_NETIF_MAX_NR,
> + VE_CF_NETIF_NR,
> };
>
> static int ve_ts_read(struct cgroup *cg, struct cftype *cft, struct seq_file *m)
> @@ -1260,6 +1267,10 @@ static u64 ve_read_u64(struct cgroup *cg, struct cftype *cft)
> return cgroup_ve(cg)->netns_max_nr;
> else if (cft->private == VE_CF_NETNS_NR)
> return atomic_read(&cgroup_ve(cg)->netns_avail_nr);
> + else if (cft->private == VE_CF_NETIF_MAX_NR)
> + return cgroup_ve(cg)->netif_max_nr;
> + else if (cft->private == VE_CF_NETIF_NR)
> + return atomic_read(&cgroup_ve(cg)->netif_avail_nr);
> return 0;
> }
>
> @@ -1346,6 +1357,11 @@ static int _ve_write_u64(struct cgroup *cg, struct cftype *cft,
>
> ve->netns_max_nr = value;
> atomic_add(delta, &ve->netns_avail_nr);
> + } else if (cft->private == VE_CF_NETIF_MAX_NR) {
> + int delta = value - ve->netif_max_nr;
> +
> + ve->netif_max_nr = value;
> + atomic_add(delta, &ve->netif_avail_nr);
> }
> up_write(&ve->op_sem);
> return 0;
> @@ -1451,6 +1467,18 @@ static struct cftype ve_cftypes[] = {
> .read_u64 = ve_read_u64,
> .private = VE_CF_NETNS_NR,
> },
> + {
> + .name = "netif_max_nr",
> + .flags = CFTYPE_NOT_ON_ROOT,
> + .read_u64 = ve_read_u64,
> + .write_u64 = ve_write_u64,
> + .private = VE_CF_NETIF_MAX_NR,
> + },
> + {
> + .name = "netif_avail_nr",
> + .read_u64 = ve_read_u64,
> + .private = VE_CF_NETIF_NR,
> + },
> { }
> };
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 40e5a97..12de79c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6605,6 +6605,10 @@ int register_netdevice(struct net_device *dev)
> if (!ve_is_super(net->owner_ve) && ve_is_dev_movable(dev))
> goto out;
>
> + ret = -ENOMEM;
> + if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
> + goto out;
> +
> spin_lock_init(&dev->addr_list_lock);
> netdev_set_addr_lockdep_class(dev);
>
> @@ -6612,7 +6616,7 @@ int register_netdevice(struct net_device *dev)
>
> ret = dev_get_valid_name(net, dev, dev->name);
> if (ret < 0)
> - goto out;
> + goto err_avail;
>
> /* Init, if this function is available */
> if (dev->netdev_ops->ndo_init) {
> @@ -6620,7 +6624,7 @@ int register_netdevice(struct net_device *dev)
> if (ret) {
> if (ret > 0)
> ret = -EIO;
> - goto out;
> + goto err_avail;
> }
> }
>
> @@ -6734,6 +6738,8 @@ int register_netdevice(struct net_device *dev)
> err_uninit:
> if (dev->netdev_ops->ndo_uninit)
> dev->netdev_ops->ndo_uninit(dev);
> +err_avail:
> + atomic_inc(&net->owner_ve->netif_avail_nr);
> goto out;
> }
> EXPORT_SYMBOL(register_netdevice);
> @@ -7011,6 +7017,8 @@ void netdev_run_todo(void)
> WARN_ON(rcu_access_pointer(dev->ip6_ptr));
> WARN_ON(dev->dn_ptr);
>
> + atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
> +
> /* It must be the very last action,
> * after this 'dev' may point to freed up memory.
> */
> @@ -7415,6 +7423,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
> goto out;
> }
>
> + err = -ENOMEM;
> + if (atomic_dec_if_positive(&net->owner_ve->netif_avail_nr) < 0)
> + goto out;
> + atomic_inc(&dev_net(dev)->owner_ve->netif_avail_nr);
> +
> /*
> * And now a mini version of register_netdevice unregister_netdevice.
> */
> --
> 2.9.3
>
More information about the Devel
mailing list