[Devel] [PATCH RHEL7 COMMIT] cgroup: fix path mangling for ve cgroups

Vladimir Davydov vdavydov at virtuozzo.com
Mon Jun 20 10:07:58 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.16
------>
commit 79fa6ee2446a3efe9791378cf9b582bbee0ef7ec
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Mon Jun 20 21:07:58 2016 +0400

    cgroup: fix path mangling for ve cgroups
    
    Presently, we just cut first component off cgroup path when inside a VE,
    because all VE cgroups are located at the top level of the cgroup
    hierarchy. However, this is going to change - the cgroups are going to
    move to machine.slice - so we should introduce a more generic way of
    mangling cgroup paths.
    
    This patch does the trick. On a VE start it marks all cgroups the init
    task of the VE resides in with a special flag (CGRP_VE_ROOT). Cgroups
    marked this way will be treated as root if looked at from inside a VE.
    As long as we don't have nested VEs, this should work fine.
    
    Note, we don't need to clear these flags on VE destruction, because
    vzctl always creates new cgroups on VE start.
    
    https://jira.sw.ru/browse/PSBM-48629
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 include/linux/cgroup.h |  3 +++
 kernel/cgroup.c        | 27 ++++++++++++++++++++-------
 kernel/ve/ve.c         |  4 ++++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aad06e8e0258..730ca9091bfb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -175,6 +175,9 @@ enum {
 	CGRP_CPUSET_CLONE_CHILDREN,
 	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
 	CGRP_SANE_BEHAVIOR,
+
+	/* The cgroup is root in a VE */
+	CGRP_VE_ROOT,
 };
 
 struct cgroup_name {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dd548853e2eb..581924e7af9e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1791,6 +1791,21 @@ static struct file_system_type cgroup_fs_type = {
 
 static struct kobject *cgroup_kobj;
 
+#ifdef CONFIG_VE
+void cgroup_mark_ve_root(struct ve_struct *ve)
+{
+	struct cgroup *cgrp;
+	struct cgroupfs_root *root;
+
+	mutex_lock(&cgroup_mutex);
+	for_each_active_root(root) {
+		cgrp = task_cgroup_from_root(ve->init_task, root);
+		set_bit(CGRP_VE_ROOT, &cgrp->flags);
+	}
+	mutex_unlock(&cgroup_mutex);
+}
+#endif
+
 /**
  * cgroup_path - generate the path of a cgroup
  * @cgrp: the cgroup in question
@@ -1804,7 +1819,8 @@ static struct kobject *cgroup_kobj;
  * inode's i_mutex, while on the other hand cgroup_path() can be called
  * with some irq-safe spinlocks held.
  */
-int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
+static int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen,
+			 bool virt)
 {
 	int ret = -ENAMETOOLONG;
 	char *start;
@@ -1824,14 +1840,11 @@ int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
 		int len;
 
 #ifdef CONFIG_VE
-		if (virt && cgrp->parent && !cgrp->parent->parent) {
+		if (virt && test_bit(CGRP_VE_ROOT, &cgrp->flags)) {
 			/*
 			 * Containers cgroups are bind-mounted from node
 			 * so they are like '/' from inside, thus we have
-			 * to mangle cgroup path output. Effectively it is
-			 * enough to remove two topmost cgroups from path.
-			 * e.g. in ct 101: /101/test.slice/test.scope ->
-			 * /test.slice/test.scope
+			 * to mangle cgroup path output.
 			 */
 			if (*start != '/') {
 				if (--start < buf)
@@ -2391,7 +2404,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
 	 * inside a container FS.
 	 */
 	if (!ve_is_super(get_exec_env())
-	    && (!cgrp->parent || !cgrp->parent->parent)
+	    && test_bit(CGRP_VE_ROOT, &cgrp->flags)
 	    && !get_exec_env()->is_pseudosuper
 	    && !(cft->flags & CFTYPE_VE_WRITABLE))
 		return -EPERM;
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 9904a4ae130e..2459cb53a665 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -452,6 +452,8 @@ static void ve_drop_context(struct ve_struct *ve)
 
 static const struct timespec zero_time = { };
 
+extern void cgroup_mark_ve_root(struct ve_struct *ve);
+
 /* under ve->op_sem write-lock */
 static int ve_start_container(struct ve_struct *ve)
 {
@@ -499,6 +501,8 @@ static int ve_start_container(struct ve_struct *ve)
 	if (err < 0)
 		goto err_iterate;
 
+	cgroup_mark_ve_root(ve);
+
 	ve->is_running = 1;
 
 	printk(KERN_INFO "CT: %s: started\n", ve_name(ve));


More information about the Devel mailing list