[Devel] [PATCH RH9 11/13] ve/netns: limit number of network namespaces per container

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Tue Sep 21 19:04:29 MSK 2021


Network namespaces are destroyed asynchronously from net_cleanup_work
(cleanup_net()). This work destroys all network namespaces which are
queued in net_kill_list. It's doing this under net_mutex.

This work is executed from the "kworker" kernel thead. We know that here
is an avalanch affect. If N processes which creates network namespaces
don't affect a time of creating new namespaces in other containers, it
doesn't mean that N + 1 processes will not increase a creation time in
dozen times. It can be explained too.

The longer kworker is running on cpu, the longer it will be
not scheduled in the next time and the more namespaces will be
created for this period of time.

The next script can be used to reproduce the problem:
while :; do
	n=`jobs | wc -l`
	n=$((10000 - $n))
	for i in `seq $n`; do
		unshare -m true &
	done
	wait -n 1
done

https://jira.sw.ru/browse/PSBM-49690

v2: Describe the real reason of the issue.
v3: - don't limit a number of netns for ve0
    - initialize netns before returning an error, otherwise
      net_namespace structures are not freed.
v4: don't allocate a net_namespace structure if a ct hits the limit.

khorenko@: v5: s/netns_nr/netns_avail_nr/g
The variable contains the number of available netns, not currently existing.

Cc: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>

Signed-off-by: Andrey Vagin <avagin at openvz.org>
Reviewed-by: Cyrill Gorcunov <gorcunov at openvz.org>

Rebase 4.14:
Add separate read/write callbacks for cgroup files as for iptables_mask
and features. Add helpers to change netns_avail_nr and handle yet
unhandled error-path for net_alloc().

We still need it as ms sysctl max_net_namespaces is per-user not per
ve/userns and it sets same max value for all users, thus it is not
flexible enough for us.

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

(cherry-picked from vz8 commit 43e6ffb41ba7dccc3d225b0d8a954a9453167e7b)
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 include/linux/ve.h       |  5 +++++
 kernel/ve/ve.c           | 45 +++++++++++++++++++++++++++++++++++++++-
 net/core/net_namespace.c | 25 ++++++++++++++++++++++
 3 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index ba3d1e517152..c8dd71cfa3d9 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -48,10 +48,15 @@ struct ve_struct {
 	__u64			features;
 
 	struct kmapset_key	sysfs_perms_key;
+
+	atomic_t		netns_avail_nr;
+	int			netns_max_nr;
 };
 
 extern int nr_ve;
 
+#define NETNS_MAX_NR_DEFAULT	256	/* number of net-namespaces per-VE */
+
 #define capable_setveid() \
 	(ve_is_super(get_exec_env()) && capable(CAP_SYS_ADMIN))
 
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index ff51f5678a83..f87e3fd53ec7 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -38,6 +38,8 @@ struct ve_struct ve0 = {
 
 	.init_cred		= &init_cred,
 	.features		= -1,
+	.netns_avail_nr		= ATOMIC_INIT(INT_MAX),
+	.netns_max_nr		= INT_MAX,
 };
 EXPORT_SYMBOL(ve0);
 
@@ -327,6 +329,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
 		goto err_ve;
 
 	ve->features = VE_FEATURES_DEF;
+
+	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
+	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
 do_init:
 	init_rwsem(&ve->op_sem);
 	INIT_LIST_HEAD(&ve->ve_list);
@@ -599,6 +604,35 @@ static int ve_reatures_write(struct cgroup_subsys_state *css, struct cftype *cft
 	return 0;
 }
 
+static u64 ve_netns_max_nr_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return css_to_ve(css)->netns_max_nr;
+}
+
+static int ve_netns_max_nr_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
+{
+	struct ve_struct *ve = css_to_ve(css);
+	int delta;
+
+	if (!ve_is_super(get_exec_env()))
+		return -EPERM;
+
+	down_write(&ve->op_sem);
+	if (ve->is_running || ve->ve_ns) {
+		up_write(&ve->op_sem);
+		return -EBUSY;
+	}
+	delta = val - ve->netns_max_nr;
+	ve->netns_max_nr = val;
+	atomic_add(delta, &ve->netns_avail_nr);
+	up_write(&ve->op_sem);
+	return 0;
+}
+static u64 ve_netns_avail_nr_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return atomic_read(&css_to_ve(css)->netns_avail_nr);
+}
+
 static struct cftype ve_cftypes[] = {
 
 	{
@@ -625,7 +659,16 @@ static struct cftype ve_cftypes[] = {
 		.read_u64		= ve_reatures_read,
 		.write_u64		= ve_reatures_write,
 	},
-
+	{
+		.name			= "netns_max_nr",
+		.flags			= CFTYPE_NOT_ON_ROOT,
+		.read_u64		= ve_netns_max_nr_read,
+		.write_u64		= ve_netns_max_nr_write,
+	},
+	{
+		.name			= "netns_avail_nr",
+		.read_u64		= ve_netns_avail_nr_read,
+	},
 	{ }
 };
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e40ad5d30b15..ee0dbebe3bb5 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -455,9 +455,24 @@ void net_drop_ns(void *p)
 		net_free(ns);
 }
 
+#ifdef CONFIG_VE
+static int dec_netns_avail(struct ve_struct *ve)
+{
+	if (atomic_dec_if_positive(&ve->netns_avail_nr) < 0)
+		return -ENOSPC;
+	return 0;
+}
+
+static void inc_netns_avail(struct ve_struct *ve)
+{
+	atomic_inc(&ve->netns_avail_nr);
+}
+#endif
+
 struct net *copy_net_ns(unsigned long flags,
 			struct user_namespace *user_ns, struct net *old_net)
 {
+	struct ve_struct *ve = get_exec_env();
 	struct ucounts *ucounts;
 	struct net *net;
 	int rv;
@@ -469,6 +484,12 @@ struct net *copy_net_ns(unsigned long flags,
 	if (!ucounts)
 		return ERR_PTR(-ENOSPC);
 
+#ifdef CONFIG_VE
+	rv = dec_netns_avail(ve);
+	if (rv < 0)
+		return ERR_PTR(rv);
+#endif
+
 	net = net_alloc();
 	if (!net) {
 		rv = -ENOMEM;
@@ -493,6 +514,9 @@ struct net *copy_net_ns(unsigned long flags,
 		net_drop_ns(net);
 dec_ucounts:
 		dec_net_namespaces(ucounts);
+#ifdef CONFIG_VE
+		inc_netns_avail(ve);
+#endif
 		return ERR_PTR(rv);
 	}
 	return net;
@@ -623,6 +647,7 @@ static void cleanup_net(struct work_struct *work)
 		key_remove_domain(net->key_domain);
 		put_user_ns(net->user_ns);
 #ifdef CONFIG_VE
+		inc_netns_avail(net->owner_ve);
 		put_ve(net->owner_ve);
 #endif
 		net_drop_ns(net);
-- 
2.31.1



More information about the Devel mailing list