[Devel] [PATCH v3 3/3] cgroup/net_prio: virtualize ifpriomap per-ve

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Wed Aug 18 14:00:05 MSK 2021


Ifpriomap is a map of net_prio cgroup id to device prio. Each process is
in some netprio cgroup and all sockets of this process have prio cgroup
id of this cgroup. When packet from such socket goes through network
stack we choose priority for packet on each device we go through based
on these device+id->prio map.

Previously we were able to set map for each net_prio cgroup on the
system, but only for devices of host init network namespace. This patch
adds mapping for ve init netns devices. VE can only get/change device
map for ve init netns, Host can only get/change device map for host's
init netns.

We can have for same cgroup both mappings setup by host for host net
devices and mappings setup by ve for ve net devices.

When new cgroup is created it either copies only mappings for host
network devices if done from host, or copies also mappings for ve
network devices if done from ve.

If ve is not running (ve_ns is NULL), even while in ve we would operate
with host ifpriomap.

https://jira.sw.ru/browse/PSBM-123766

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

---
v3: remove lockdep assertion as cgroup_mutex is static

---
 net/core/netprio_cgroup.c | 73 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0d2180df669c..32009cac46e6 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -24,6 +24,7 @@
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 #include <net/netprio_cgroup.h>
+#include <linux/ve.h>
 
 #include <linux/fdtable.h>
 
@@ -144,6 +145,7 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 static int cgrp_css_online(struct cgroup *cgrp)
 {
 	struct cgroup *parent = cgrp->parent;
+	struct ve_struct *ve;
 	struct net_device *dev;
 	int ret = 0;
 
@@ -162,6 +164,38 @@ static int cgrp_css_online(struct cgroup *cgrp)
 		if (ret)
 			break;
 	}
+
+	/* get_exec_env is safe under cgroup_mutex */
+	ve = get_exec_env();
+	/*
+	 * Inherit prios from the parent cgroup in scope of ve init netns.
+	 */
+	if (!ve_is_super(ve)) {
+		struct nsproxy *ve_ns;
+		struct net *net = NULL;
+
+		/*
+		 * Take rcu read lock to check that ve's net is not freed under
+		 * us after we release rcu read lock we still have rtnl lock to
+		 * insure net remains non-freed, pairs with rtnl lock in
+		 * cleanup_net().
+		 */
+		rcu_read_lock();
+		ve_ns = rcu_dereference(ve->ve_ns);
+		if (ve_ns)
+			net = ve_ns->net_ns;
+		rcu_read_unlock();
+
+		if (net && net != &init_net) {
+			for_each_netdev(net, dev) {
+				u32 prio = netprio_prio(parent, dev);
+
+				ret = netprio_set_prio(cgrp, dev, prio);
+				if (ret)
+					break;
+			}
+		}
+	}
 	rtnl_unlock();
 	return ret;
 }
@@ -179,18 +213,37 @@ static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
 static int read_priomap(struct cgroup *cont, struct cftype *cft,
 			struct cgroup_map_cb *cb)
 {
+	struct ve_struct *ve;
+	struct net *net, *_net = NULL;
 	struct net_device *dev;
 
+	ve = get_curr_ve();
+	if (!ve_is_super(ve)) {
+		struct nsproxy *ve_ns;
+
+		rcu_read_lock();
+		ve_ns = rcu_dereference(ve->ve_ns);
+		if (ve_ns)
+			_net = get_net(ve_ns->net_ns);
+		rcu_read_unlock();
+	}
+	put_ve(ve);
+
+	net = _net ? : &init_net;
 	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev)
+	for_each_netdev_rcu(net, dev)
 		cb->fill(cb, dev->name, netprio_prio(cont, dev));
 	rcu_read_unlock();
+	if (_net)
+		put_net(_net);
 	return 0;
 }
 
 static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
 			 const char *buffer)
 {
+	struct ve_struct *ve;
+	struct net *net, *_net = NULL;
 	char devname[IFNAMSIZ + 1];
 	struct net_device *dev;
 	u32 prio;
@@ -199,7 +252,22 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
 	if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
 		return -EINVAL;
 
-	dev = dev_get_by_name(&init_net, devname);
+	ve = get_curr_ve();
+	if (!ve_is_super(ve)) {
+		struct nsproxy *ve_ns;
+
+		rcu_read_lock();
+		ve_ns = rcu_dereference(ve->ve_ns);
+		if (ve_ns)
+			_net = get_net(ve_ns->net_ns);
+		rcu_read_unlock();
+	}
+	put_ve(ve);
+
+	net = _net ? : &init_net;
+	dev = dev_get_by_name(net, devname);
+	if (_net)
+		put_net(_net);
 	if (!dev)
 		return -ENODEV;
 
@@ -241,6 +309,7 @@ static struct cftype ss_files[] = {
 	},
 	{
 		.name = "ifpriomap",
+		.flags = CFTYPE_VE_WRITABLE,
 		.read_map = read_priomap,
 		.write_string = write_priomap,
 	},
-- 
2.31.1



More information about the Devel mailing list