[Devel] [PATCH rh7 6/6] cgroup: fix path mangling for ve cgroups

Vladimir Davydov vdavydov at virtuozzo.com
Mon Jun 20 09:40:16 PDT 2016


Presently, we just cut first component off cgroup path when inside a VE,
because all VE cgroups are located at the top level of the cgroup
hierarchy. However, this is going to change - the cgroups are going to
move to machine.slice - so we should introduce a more generic way of
mangling cgroup paths.

This patch does the trick. On a VE start it marks all cgroups the init
task of the VE resides in with a special flag (CGRP_VE_ROOT). Cgroups
marked this way will be treated as root if looked at from inside a VE.
As long as we don't have nested VEs, this should work fine.

Note, we don't need to clear these flags on VE destruction, because
vzctl always creates new cgroups on VE start.

https://jira.sw.ru/browse/PSBM-48629

Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 include/linux/cgroup.h |  3 +++
 kernel/cgroup.c        | 27 ++++++++++++++++++++-------
 kernel/ve/ve.c         |  4 ++++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aad06e8e0258..730ca9091bfb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -175,6 +175,9 @@ enum {
 	CGRP_CPUSET_CLONE_CHILDREN,
 	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
 	CGRP_SANE_BEHAVIOR,
+
+	/* The cgroup is root in a VE */
+	CGRP_VE_ROOT,
 };
 
 struct cgroup_name {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dd548853e2eb..581924e7af9e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1791,6 +1791,21 @@ static struct file_system_type cgroup_fs_type = {
 
 static struct kobject *cgroup_kobj;
 
+#ifdef CONFIG_VE
+void cgroup_mark_ve_root(struct ve_struct *ve)
+{
+	struct cgroup *cgrp;
+	struct cgroupfs_root *root;
+
+	mutex_lock(&cgroup_mutex);
+	for_each_active_root(root) {
+		cgrp = task_cgroup_from_root(ve->init_task, root);
+		set_bit(CGRP_VE_ROOT, &cgrp->flags);
+	}
+	mutex_unlock(&cgroup_mutex);
+}
+#endif
+
 /**
  * cgroup_path - generate the path of a cgroup
  * @cgrp: the cgroup in question
@@ -1804,7 +1819,8 @@ static struct kobject *cgroup_kobj;
  * inode's i_mutex, while on the other hand cgroup_path() can be called
  * with some irq-safe spinlocks held.
  */
-int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
+static int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen,
+			 bool virt)
 {
 	int ret = -ENAMETOOLONG;
 	char *start;
@@ -1824,14 +1840,11 @@ int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
 		int len;
 
 #ifdef CONFIG_VE
-		if (virt && cgrp->parent && !cgrp->parent->parent) {
+		if (virt && test_bit(CGRP_VE_ROOT, &cgrp->flags)) {
 			/*
 			 * Containers cgroups are bind-mounted from node
 			 * so they are like '/' from inside, thus we have
-			 * to mangle cgroup path output. Effectively it is
-			 * enough to remove two topmost cgroups from path.
-			 * e.g. in ct 101: /101/test.slice/test.scope ->
-			 * /test.slice/test.scope
+			 * to mangle cgroup path output.
 			 */
 			if (*start != '/') {
 				if (--start < buf)
@@ -2391,7 +2404,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
 	 * inside a container FS.
 	 */
 	if (!ve_is_super(get_exec_env())
-	    && (!cgrp->parent || !cgrp->parent->parent)
+	    && test_bit(CGRP_VE_ROOT, &cgrp->flags)
 	    && !get_exec_env()->is_pseudosuper
 	    && !(cft->flags & CFTYPE_VE_WRITABLE))
 		return -EPERM;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 08a15fc02e21..e65130f18bb4 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -454,6 +454,8 @@ static void ve_drop_context(struct ve_struct *ve)
 
 static const struct timespec zero_time = { };
 
+extern void cgroup_mark_ve_root(struct ve_struct *ve);
+
 /* under ve->op_sem write-lock */
 static int ve_start_container(struct ve_struct *ve)
 {
@@ -501,6 +503,8 @@ static int ve_start_container(struct ve_struct *ve)
 	if (err < 0)
 		goto err_iterate;
 
+	cgroup_mark_ve_root(ve);
+
 	ve->is_running = 1;
 
 	printk(KERN_INFO "CT: %s: started\n", ve_name(ve));
-- 
2.1.4



More information about the Devel mailing list