[Devel] [patch rh7 1/3] cgroups: Drop virtualization code
Cyrill Gorcunov
gorcunov at odin.com
Wed Apr 29 04:56:24 PDT 2015
Here we rip off all the virtualization code we introduced into kernel to
behave close to rhel6.
Because we're trying a new concept (bindmounting from the node) it is
no longer needed.
Now some details:
- drop cgroup_show_path -- we don't hide VEID in /proc/self/cgroup output,
it doesn't break criu so no need to carry it, same applies to changes
in cgroup_path;
- because we drop virtualization of systemd -- disable creation of new
hierarchies in container: we don't support it, neither we need it. The
primary reason why we allowed new hierarchies in container was that
CRIU has been running restore procedure inside VE but now we initiate
restore from VE0, so we can safely disable new hierarchies;
- in cgroup_addrm_files go back to former RHEL7 code; if we need something
special here it must be reviewed carefully and separately;
- no need to hide /proc/cgroups in VE, there is no sensible info present.
Signed-off-by: Cyrill Gorcunov <gorcunov at odin.com>
CC: Vladimir Davydov <vdavydov at odin.com>
CC: Konstantin Khorenko <khorenko at odin.com>
CC: Pavel Emelyanov <xemul at odin.com>
CC: Andrey Vagin <avagin at odin.com>
---
include/linux/cgroup.h | 3 -
kernel/cgroup.c | 116 ++++++++-----------------------------------------
2 files changed, 19 insertions(+), 100 deletions(-)
Index: linux-pcs7.git/include/linux/cgroup.h
===================================================================
--- linux-pcs7.git.orig/include/linux/cgroup.h
+++ linux-pcs7.git/include/linux/cgroup.h
@@ -191,9 +191,6 @@ enum {
/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
CGRP_SANE_BEHAVIOR,
CGRP_SELF_DESTRUCTION,
-
- /* container virtualization */
- CGRP_VE_TOP_CGROUP_VIRTUAL,
};
struct cgroup_name {
Index: linux-pcs7.git/kernel/cgroup.c
===================================================================
--- linux-pcs7.git.orig/kernel/cgroup.c
+++ linux-pcs7.git/kernel/cgroup.c
@@ -1125,18 +1125,6 @@ static int cgroup_show_options(struct se
return 0;
}
-static int cgroup_show_path(struct seq_file *m, struct dentry *root)
-{
- struct ve_struct *ve = get_exec_env();
- struct cgroup *cgrp = __d_cgrp(root);
-
- if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &cgrp->flags))
- seq_puts(m, "/");
- else
- seq_dentry(m, root, " \t\n\\");
- return 0;
-}
-
/*
* Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
* with cgroup_mutex held to protect the subsys[] array. This function takes
@@ -1299,26 +1287,6 @@ static int parse_cgroupfs_options(char *
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
- /* virtualize 'systemd' hierarchy */
- if (!ve_is_super(get_exec_env()) && !opts->subsys_mask && opts->name && !strcmp(opts->name, "systemd"))
- set_bit(CGRP_ROOT_VIRTUAL, &opts->flags);
-
- /* forbid non-virtualized hierarchies in containers */
- if (!ve_is_super(get_exec_env()) && !test_bit(CGRP_ROOT_VIRTUAL, &opts->flags)) {
- WARN_ONCE(1, "Allow non-virtualized hierarchies for CRIU sake\n");
- /*
- * FIXME
- *
- * We need to somehow limit this ability for CRIU only, because
- * we've to run restore procedure from inside of VE cgroup
- * (otherwise a number of get_exec_env() in network code
- * won't work as needed).
- *
- * -- cyrillos
- */
- /* return opts->subsys_mask ? -ENOENT : -EPERM; */
- }
-
/*
* Grab references on all the modules we'll need, so the subsystems
* don't dance around before rebind_subsystems attaches them. This may
@@ -1441,7 +1409,6 @@ static const struct super_operations cgr
.drop_inode = generic_delete_inode,
.show_options = cgroup_show_options,
.remount_fs = cgroup_remount,
- .show_path = cgroup_show_path,
};
static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1621,7 +1588,6 @@ static struct dentry *cgroup_mount(struc
struct super_block *sb;
struct cgroupfs_root *new_root;
struct inode *inode;
- struct dentry *root_dentry;
/* First find the desired set of subsystems */
if (!(flags & MS_KERNMOUNT)) {
@@ -1668,6 +1634,17 @@ static struct dentry *cgroup_mount(struc
BUG_ON(sb->s_root != NULL);
+#ifdef CONFIG_VE
+ /*
+ * Don't allow to create new hierarchies in container,
+ * we don't support them.
+ */
+ if (!ve_is_super(get_exec_env())) {
+ ret = -EACCES;
+ goto drop_new_super;
+ }
+#endif
+
ret = cgroup_get_rootdir(sb);
if (ret)
goto drop_new_super;
@@ -1727,11 +1704,9 @@ static struct dentry *cgroup_mount(struc
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
- if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) {
- root_cgrp->release_agent = opts.release_agent;
- root_cgrp->cgroup_ve = get_exec_env();
- opts.release_agent = NULL;
- }
+ root_cgrp->release_agent = opts.release_agent;
+ root_cgrp->cgroup_ve = get_exec_env();
+ opts.release_agent = NULL;
cred = override_creds(&init_cred);
cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
@@ -1760,40 +1735,9 @@ static struct dentry *cgroup_mount(struc
drop_parsed_module_refcounts(opts.subsys_mask);
}
- if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) {
- root_dentry = dget(sb->s_root);
- } else {
- struct ve_struct *ve = get_exec_env();
- struct cgroup *top_cgrp;
-
- top_cgrp = cgroup_kernel_open(&root->top_cgroup, 0, ve->ve_name);
- ret = PTR_ERR(top_cgrp);
- if (IS_ERR(top_cgrp))
- goto drop_new_super;
-
- /* create fake root-cgroup in virtualized hierarchy */
- if (top_cgrp == NULL) {
- top_cgrp = cgroup_kernel_open(&root->top_cgroup, CGRP_CREAT, ve->ve_name);
- ret = PTR_ERR(top_cgrp);
- if (IS_ERR(top_cgrp))
- goto drop_new_super;
-
- mutex_lock(&cgroup_mutex);
- top_cgrp->cgroup_ve = ve;
- top_cgrp->release_agent = opts.release_agent;
- opts.release_agent = NULL;
- set_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &top_cgrp->flags);
- mutex_unlock(&cgroup_mutex);
- }
-
- /* mount it as bindmount to fist-level fake root-cgroup */
- root_dentry = dget(top_cgrp->dentry);
- cgroup_kernel_close(top_cgrp);
- }
-
kfree(opts.release_agent);
kfree(opts.name);
- return root_dentry;
+ return dget(sb->s_root);
unlock_drop:
mutex_unlock(&cgroup_root_mutex);
@@ -1881,7 +1825,6 @@ static struct kobject *cgroup_kobj;
*/
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
- struct ve_struct *ve = get_exec_env();
int ret = -ENAMETOOLONG;
char *start;
@@ -1899,16 +1842,6 @@ int cgroup_path(const struct cgroup *cgr
const char *name = cgroup_name(cgrp);
int len;
- /* Hide fake root-cgroup in virtualized hierarchy */
- if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &cgrp->flags)) {
- if (*start != '/') {
- if (--start < buf)
- goto out;
- *start = '/';
- }
- break;
- }
-
len = strlen(name);
if ((start -= len) < buf)
goto out;
@@ -2843,9 +2776,9 @@ static int cgroup_addrm_files(struct cgr
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
continue;
- if ((cft->flags & CFTYPE_NOT_ON_ROOT) && &cgrp->root->top_cgroup == cgrp)
+ if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
continue;
- if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && &cgrp->root->top_cgroup != cgrp)
+ if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
continue;
if (is_add) {
@@ -4313,13 +4246,8 @@ static long cgroup_create(struct cgroup
cgrp->parent = parent;
cgrp->root = parent->root;
- if (test_bit(CGRP_ROOT_VIRTUAL, &root->flags) && parent == &root->top_cgroup) {
- cgrp->cgroup_ve = get_exec_env();
- list_add(&cgrp->cgroup_ve_list, &cgrp->cgroup_ve->ve_cgroup_head);
- } else {
- cgrp->cgroup_ve = parent->cgroup_ve;
- list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list);
- }
+ cgrp->cgroup_ve = parent->cgroup_ve;
+ list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list);
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -4926,14 +4854,8 @@ out:
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
int i;
- struct ve_struct *ve = get_exec_env();
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
-
- /* cgset wants to read /proc/cgroups and it's used for starting CT */
- if (!ve_is_super(ve) && ve->is_running)
- return 0;
-
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
More information about the Devel
mailing list