[Devel] [PATCH rh7] cgroup: do not virtualize output of cgroup_path
Vladimir Davydov
vdavydov at parallels.com
Mon Aug 24 03:04:35 PDT 2015
When cgroup_path() is called from inside a container, its output is
"virtualized", i.e. cgroup /CTID/A/B is reported as /A/B. This was done
for userspace tools to not get confused by the output of some proc files
(namely, /proc/PID/{cgroup,cpuset}). However, it is wrong to virtualize
cgroup_path() anytime it is called by a container. For instance, it is
called from inside a container on OOM in order to dump memcg info to
system log, in which case mangling its output would be incorrect.
Therefore this patch makes cgroup_path() always return an absolute path.
To get a container-relative path, one should now use cgroup_path_ve().
Currently, cgroup_path_ve() is only used for /proc files output (it
seems to be enough for now).
https://jira.sw.ru/browse/PSBM-34852
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
include/linux/cgroup.h | 1 +
kernel/cgroup.c | 20 ++++++++++++++++----
kernel/cpuset.c | 2 +-
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b7eb28ffd0d6..146a924664cf 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -561,6 +561,7 @@ int cgroup_is_removed(const struct cgroup *cgrp);
bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
+int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen);
int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa3546d93f88..0ff3b5254b5f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1804,11 +1804,13 @@ static struct kobject *cgroup_kobj;
* inode's i_mutex, while on the other hand cgroup_path() can be called
* with some irq-safe spinlocks held.
*/
-int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
{
int ret = -ENAMETOOLONG;
char *start;
- struct ve_struct *ve = get_exec_env();
+
+ if (ve_is_super(get_exec_env()))
+ virt = false;
if (!cgrp->parent) {
if (strlcpy(buf, "/", buflen) >= buflen)
@@ -1825,7 +1827,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
int len;
#ifdef CONFIG_VE
- if (!ve_is_super(ve) && cgrp->parent && !cgrp->parent->parent) {
+ if (virt && cgrp->parent && !cgrp->parent->parent) {
/*
* Containers cgroups are bind-mounted from node
* so they are like '/' from inside, thus we have
@@ -1860,8 +1862,18 @@ out:
rcu_read_unlock();
return ret;
}
+
+int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+{
+ return __cgroup_path(cgrp, buf, buflen, false);
+}
EXPORT_SYMBOL_GPL(cgroup_path);
+int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen)
+{
+ return __cgroup_path(cgrp, buf, buflen, true);
+}
+
/*
* Control Group taskset
*/
@@ -4927,7 +4939,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
root->name);
seq_putc(m, ':');
cgrp = task_cgroup_from_root(tsk, root);
- retval = cgroup_path(cgrp, buf, PAGE_SIZE);
+ retval = cgroup_path_ve(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2400c4e1b002..81030b340dbd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2697,7 +2697,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
rcu_read_lock();
css = task_subsys_state(tsk, cpuset_subsys_id);
- retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
+ retval = cgroup_path_ve(css->cgroup, buf, PAGE_SIZE);
rcu_read_unlock();
if (retval < 0)
goto out_put_task;
--
2.1.4
More information about the Devel
mailing list