[Devel] [PATCH RHEL7 COMMIT] ve/cgroup: do not virtualize output of cgroup_path
Konstantin Khorenko
khorenko at virtuozzo.com
Tue Sep 1 06:38:00 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-229.7.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.6.5
------>
commit 607a3ac1497d796f267f72c4af27dbfd9d0cc3f0
Author: Vladimir Davydov <vdavydov at parallels.com>
Date: Tue Sep 1 17:38:00 2015 +0400
ve/cgroup: do not virtualize output of cgroup_path
When cgroup_path() is called from inside a container, its output is
"virtualized", i.e. cgroup /CTID/A/B is reported as /A/B. This was done
for userspace tools to not get confused by the output of some proc files
(namely, /proc/PID/{cgroup,cpuset}). However, it is wrong to virtualize
cgroup_path() anytime it is called by a container. For instance, it is
called from inside a container on OOM in order to dump memcg info to
system log, in which case mangling its output would be incorrect.
Therefore this patch makes cgroup_path() always return an absolute path.
To get a container-relative path, one should now use cgroup_path_ve().
Currently, cgroup_path_ve() is only used for /proc files output (it
seems to be enough for now).
https://jira.sw.ru/browse/PSBM-34852
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Acked-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
include/linux/cgroup.h | 1 +
kernel/cgroup.c | 17 +++++++++++++----
kernel/cpuset.c | 2 +-
3 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 44b64c9..ed5e6ac 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -561,6 +561,7 @@ int cgroup_is_removed(const struct cgroup *cgrp);
bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
+int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen);
int cgroup_task_count(const struct cgroup *cgrp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a07c4e0..ad61c97 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1804,11 +1804,10 @@ static struct kobject *cgroup_kobj;
* inode's i_mutex, while on the other hand cgroup_path() can be called
* with some irq-safe spinlocks held.
*/
-int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt)
{
int ret = -ENAMETOOLONG;
char *start;
- struct ve_struct *ve = get_exec_env();
if (!cgrp->parent) {
if (strlcpy(buf, "/", buflen) >= buflen)
@@ -1825,7 +1824,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
int len;
#ifdef CONFIG_VE
- if (!ve_is_super(ve) && cgrp->parent && !cgrp->parent->parent) {
+ if (virt && cgrp->parent && !cgrp->parent->parent) {
/*
* Containers cgroups are bind-mounted from node
* so they are like '/' from inside, thus we have
@@ -1860,8 +1859,18 @@ out:
rcu_read_unlock();
return ret;
}
+
+int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+{
+ return __cgroup_path(cgrp, buf, buflen, false);
+}
EXPORT_SYMBOL_GPL(cgroup_path);
+int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen)
+{
+ return __cgroup_path(cgrp, buf, buflen, !ve_is_super(get_exec_env()));
+}
+
/*
* Control Group taskset
*/
@@ -4927,7 +4936,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
root->name);
seq_putc(m, ':');
cgrp = task_cgroup_from_root(tsk, root);
- retval = cgroup_path(cgrp, buf, PAGE_SIZE);
+ retval = cgroup_path_ve(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2400c4e..81030b3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2697,7 +2697,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
rcu_read_lock();
css = task_subsys_state(tsk, cpuset_subsys_id);
- retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
+ retval = cgroup_path_ve(css->cgroup, buf, PAGE_SIZE);
rcu_read_unlock();
if (retval < 0)
goto out_put_task;
More information about the Devel
mailing list