[Devel] [PATCH rh7] cgroup: do not virtualize output of cgroup_path

Vladimir Davydov vdavydov at parallels.com
Mon Aug 24 04:58:07 PDT 2015


On Mon, Aug 24, 2015 at 02:46:31PM +0300, Cyrill Gorcunov wrote:
...
> > diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> > index aa3546d93f88..0ff3b5254b5f 100644
> > --- a/kernel/cgroup.c
> > +++ b/kernel/cgroup.c
> > @@ -1804,11 +1804,13 @@ static struct kobject *cgroup_kobj;
> >   * inode's i_mutex, while on the other hand cgroup_path() can be called
> >   * with some irq-safe spinlocks held.
> >   */
> > -int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
> > +int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
> >  {
> >  	int ret = -ENAMETOOLONG;
> >  	char *start;
> > -	struct ve_struct *ve = get_exec_env();
> > +
> > +	if (ve_is_super(get_exec_env()))
> > +		virt = false;
> 
> May we not modify argument here? Won't the follow be better?
> 
> cgroup_path_ve
> 	return __cgroup_path(cgrp, buf, buflen, ve_is_super(get_exec_env()) ? false : true);
> 
> and drop ve_is_super from __cgroup_path itself.

As you wish.
---
From: Vladimir Davydov <vdavydov at parallels.com>
Subject: [PATCH rh7] cgroup: do not virtualize output of cgroup_path

When cgroup_path() is called from inside a container, its output is
"virtualized", i.e. cgroup /CTID/A/B is reported as /A/B. This was done
for userspace tools to not get confused by the output of some proc files
(namely, /proc/PID/{cgroup,cpuset}). However, it is wrong to virtualize
cgroup_path() anytime it is called by a container. For instance, it is
called from inside a container on OOM in order to dump memcg info to
system log, in which case mangling its output would be incorrect.

Therefore this patch makes cgroup_path() always return an absolute path.
To get a container-relative path, one should now use cgroup_path_ve().
Currently, cgroup_path_ve() is only used for /proc files output (it
seems to be enough for now).

https://jira.sw.ru/browse/PSBM-34852

Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b7eb28ffd0d6..146a924664cf 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -561,6 +561,7 @@ int cgroup_is_removed(const struct cgroup *cgrp);
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
 
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
+int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa3546d93f88..81851b8fe505 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1804,11 +1804,10 @@ static struct kobject *cgroup_kobj;
  * inode's i_mutex, while on the other hand cgroup_path() can be called
  * with some irq-safe spinlocks held.
  */
-int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
 {
 	int ret = -ENAMETOOLONG;
 	char *start;
-	struct ve_struct *ve = get_exec_env();
 
 	if (!cgrp->parent) {
 		if (strlcpy(buf, "/", buflen) >= buflen)
@@ -1825,7 +1824,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 		int len;
 
 #ifdef CONFIG_VE
-		if (!ve_is_super(ve) && cgrp->parent && !cgrp->parent->parent) {
+		if (virt && cgrp->parent && !cgrp->parent->parent) {
 			/*
 			 * Containers cgroups are bind-mounted from node
 			 * so they are like '/' from inside, thus we have
@@ -1860,8 +1859,18 @@ out:
 	rcu_read_unlock();
 	return ret;
 }
+
+int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+{
+	return __cgroup_path(cgrp, buf, buflen, false);
+}
 EXPORT_SYMBOL_GPL(cgroup_path);
 
+int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen)
+{
+	return __cgroup_path(cgrp, buf, buflen, !ve_is_super(get_exec_env()));
+}
+
 /*
  * Control Group taskset
  */
@@ -4927,7 +4936,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 				   root->name);
 		seq_putc(m, ':');
 		cgrp = task_cgroup_from_root(tsk, root);
-		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
+		retval = cgroup_path_ve(cgrp, buf, PAGE_SIZE);
 		if (retval < 0)
 			goto out_unlock;
 		seq_puts(m, buf);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2400c4e1b002..81030b340dbd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2697,7 +2697,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
 
 	rcu_read_lock();
 	css = task_subsys_state(tsk, cpuset_subsys_id);
-	retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
+	retval = cgroup_path_ve(css->cgroup, buf, PAGE_SIZE);
 	rcu_read_unlock();
 	if (retval < 0)
 		goto out_put_task;



More information about the Devel mailing list