[Devel] [patch rh7 1/3] cgroups: Drop virtualization code

Vladimir Davydov vdavydov at parallels.com
Tue May 5 03:44:12 PDT 2015


On Wed, Apr 29, 2015 at 02:56:24PM +0300, Cyrill Gorcunov wrote:
> Here we rip off all the virtualization code we introduced into kernel to
> behave close to rhel6.
> 
> Because we're trying a new concept (bindmounting from the node) it is
> no longer needed.
> 
> Now some details:
> 
>  - drop cgroup_show_path -- we don't hide VEID in /proc/self/cgroup output,
>    it doesn't break criu so no need to carry it, same applies to changes
>    in cgroup_path;
> 
>  - because we drop virtualization of systemd -- disable creation of new
>    hierarchies in container: we don't support it, neither we need it. The
>    primary reason why we allowed new hierarchies in container was that
>    CRIU has been running restore procedure inside VE but now we initiate
>    restore from VE0, so we can safely disable new hierarchies;
> 
>  - in cgroup_addrm_files go back to former RHEL7 code; if we need something
>    special here it must be reviewed carefully and separately;
> 
>  - no need to hide /proc/cgroups in VE, there is no sensible info present.

Again, not everything is removed. E.g. we do not longer need
cgroup_kernel_destroy and ve->ve_cgroup_head. Please check out
c2ac6df22b20389ae2d0af49c436b00ff3243e89 ("VE: virtualize cgroups") and
fix accordingly.

> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at odin.com>
> CC: Vladimir Davydov <vdavydov at odin.com>
> CC: Konstantin Khorenko <khorenko at odin.com>
> CC: Pavel Emelyanov <xemul at odin.com>
> CC: Andrey Vagin <avagin at odin.com>
> ---
>  include/linux/cgroup.h |    3 -
>  kernel/cgroup.c        |  116 ++++++++-----------------------------------------
>  2 files changed, 19 insertions(+), 100 deletions(-)
> 
> Index: linux-pcs7.git/include/linux/cgroup.h
> ===================================================================
> --- linux-pcs7.git.orig/include/linux/cgroup.h
> +++ linux-pcs7.git/include/linux/cgroup.h
> @@ -191,9 +191,6 @@ enum {
>  	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
>  	CGRP_SANE_BEHAVIOR,
>  	CGRP_SELF_DESTRUCTION,
> -
> -	/* container virtualization */
> -	CGRP_VE_TOP_CGROUP_VIRTUAL,
>  };
>  
>  struct cgroup_name {
> Index: linux-pcs7.git/kernel/cgroup.c
> ===================================================================
> --- linux-pcs7.git.orig/kernel/cgroup.c
> +++ linux-pcs7.git/kernel/cgroup.c
> @@ -1125,18 +1125,6 @@ static int cgroup_show_options(struct se
>  	return 0;
>  }
>  
> -static int cgroup_show_path(struct seq_file *m, struct dentry *root)
> -{
> -	struct ve_struct *ve = get_exec_env();
> -	struct cgroup *cgrp = __d_cgrp(root);
> -
> -	if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &cgrp->flags))
> -		seq_puts(m, "/");
> -	else
> -		seq_dentry(m, root, " \t\n\\");
> -	return 0;
> -}
> -
>  /*
>   * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
>   * with cgroup_mutex held to protect the subsys[] array. This function takes
> @@ -1299,26 +1287,6 @@ static int parse_cgroupfs_options(char *
>  	if (!opts->subsys_mask && !opts->name)
>  		return -EINVAL;
>  
> -	/* virtualize 'systemd' hierarchy */
> -	if (!ve_is_super(get_exec_env()) && !opts->subsys_mask && opts->name && !strcmp(opts->name, "systemd"))
> -		set_bit(CGRP_ROOT_VIRTUAL, &opts->flags);
> -
> -	/* forbid non-virtualized hierarchies in containers */
> -	if (!ve_is_super(get_exec_env()) && !test_bit(CGRP_ROOT_VIRTUAL, &opts->flags)) {
> -		WARN_ONCE(1, "Allow non-virtualized hierarchies for CRIU sake\n");
> -		/*
> -		 * FIXME
> -		 *
> -		 * We need to somehow limit this ability for CRIU only, because
> -		 * we've to run restore procedure from inside of VE cgroup
> -		 * (otherwise a number of get_exec_env() in network code
> -		 * won't work as needed).
> -		 *
> -		 *   -- cyrillos
> -		 */
> -		/* return opts->subsys_mask ? -ENOENT : -EPERM; */
> -	}
> -
>  	/*
>  	 * Grab references on all the modules we'll need, so the subsystems
>  	 * don't dance around before rebind_subsystems attaches them. This may
> @@ -1441,7 +1409,6 @@ static const struct super_operations cgr
>  	.drop_inode = generic_delete_inode,
>  	.show_options = cgroup_show_options,
>  	.remount_fs = cgroup_remount,
> -	.show_path = cgroup_show_path,
>  };
>  
>  static void init_cgroup_housekeeping(struct cgroup *cgrp)
> @@ -1621,7 +1588,6 @@ static struct dentry *cgroup_mount(struc
>  	struct super_block *sb;
>  	struct cgroupfs_root *new_root;
>  	struct inode *inode;
> -	struct dentry *root_dentry;
>  
>  	/* First find the desired set of subsystems */
>  	if (!(flags & MS_KERNMOUNT)) {
> @@ -1668,6 +1634,17 @@ static struct dentry *cgroup_mount(struc
>  
>  		BUG_ON(sb->s_root != NULL);
>  
> +#ifdef CONFIG_VE
> +		/*
> +		 * Don't allow to create new hierarchies in container,
> +		 * we don't support them.
> +		 */
> +		if (!ve_is_super(get_exec_env())) {
> +			ret = -EACCES;
> +			goto drop_new_super;
> +		}
> +#endif
> +
>  		ret = cgroup_get_rootdir(sb);
>  		if (ret)
>  			goto drop_new_super;
> @@ -1727,11 +1704,9 @@ static struct dentry *cgroup_mount(struc
>  		BUG_ON(!list_empty(&root_cgrp->children));
>  		BUG_ON(root->number_of_cgroups != 1);
>  
> -		if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) {
> -			root_cgrp->release_agent = opts.release_agent;
> -			root_cgrp->cgroup_ve = get_exec_env();
> -			opts.release_agent = NULL;
> -		}
> +		root_cgrp->release_agent = opts.release_agent;
> +		root_cgrp->cgroup_ve = get_exec_env();
> +		opts.release_agent = NULL;
>  
>  		cred = override_creds(&init_cred);
>  		cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
> @@ -1760,40 +1735,9 @@ static struct dentry *cgroup_mount(struc
>  		drop_parsed_module_refcounts(opts.subsys_mask);
>  	}
>  
> -	if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) {
> -		root_dentry = dget(sb->s_root);
> -	} else {
> -		struct ve_struct *ve = get_exec_env();
> -		struct cgroup *top_cgrp;
> -
> -		top_cgrp = cgroup_kernel_open(&root->top_cgroup, 0, ve->ve_name);
> -		ret = PTR_ERR(top_cgrp);
> -		if (IS_ERR(top_cgrp))
> -			goto drop_new_super;
> -
> -		/* create fake root-cgroup in virtualized hierarchy */
> -		if (top_cgrp == NULL) {
> -			top_cgrp = cgroup_kernel_open(&root->top_cgroup, CGRP_CREAT, ve->ve_name);
> -			ret = PTR_ERR(top_cgrp);
> -			if (IS_ERR(top_cgrp))
> -				goto drop_new_super;
> -
> -			mutex_lock(&cgroup_mutex);
> -			top_cgrp->cgroup_ve = ve;
> -			top_cgrp->release_agent = opts.release_agent;
> -			opts.release_agent = NULL;
> -			set_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &top_cgrp->flags);
> -			mutex_unlock(&cgroup_mutex);
> -		}
> -
> -		/* mount it as bindmount to fist-level fake root-cgroup */
> -		root_dentry = dget(top_cgrp->dentry);
> -		cgroup_kernel_close(top_cgrp);
> -	}
> -
>  	kfree(opts.release_agent);
>  	kfree(opts.name);
> -	return root_dentry;
> +	return dget(sb->s_root);
>  
>   unlock_drop:
>  	mutex_unlock(&cgroup_root_mutex);
> @@ -1881,7 +1825,6 @@ static struct kobject *cgroup_kobj;
>   */
>  int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
>  {
> -	struct ve_struct *ve = get_exec_env();
>  	int ret = -ENAMETOOLONG;
>  	char *start;
>  
> @@ -1899,16 +1842,6 @@ int cgroup_path(const struct cgroup *cgr
>  		const char *name = cgroup_name(cgrp);
>  		int len;
>  
> -		/* Hide fake root-cgroup in virtualized hierarchy */
> -		if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &cgrp->flags)) {
> -			if (*start != '/') {
> -				if (--start < buf)
> -					goto out;
> -				*start = '/';
> -			}
> -			break;
> -		}
> -
>  		len = strlen(name);
>  		if ((start -= len) < buf)
>  			goto out;
> @@ -2843,9 +2776,9 @@ static int cgroup_addrm_files(struct cgr
>  		/* does cft->flags tell us to skip this file on @cgrp? */
>  		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
>  			continue;
> -		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && &cgrp->root->top_cgroup == cgrp)
> +		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
>  			continue;
> -		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && &cgrp->root->top_cgroup != cgrp)
> +		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
>  			continue;
>  
>  		if (is_add) {
> @@ -4313,13 +4246,8 @@ static long cgroup_create(struct cgroup
>  	cgrp->parent = parent;
>  	cgrp->root = parent->root;
>  
> -	if (test_bit(CGRP_ROOT_VIRTUAL, &root->flags) && parent == &root->top_cgroup) {
> -		cgrp->cgroup_ve = get_exec_env();
> -		list_add(&cgrp->cgroup_ve_list, &cgrp->cgroup_ve->ve_cgroup_head);
> -	} else {
> -		cgrp->cgroup_ve = parent->cgroup_ve;
> -		list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list);
> -	}
> +	cgrp->cgroup_ve = parent->cgroup_ve;
> +	list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list);
>  
>  	if (notify_on_release(parent))
>  		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
> @@ -4926,14 +4854,8 @@ out:
>  static int proc_cgroupstats_show(struct seq_file *m, void *v)
>  {
>  	int i;
> -	struct ve_struct *ve = get_exec_env();
>  
>  	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
> -
> -	/* cgset wants to read /proc/cgroups and it's used for starting CT */
> -	if (!ve_is_super(ve) && ve->is_running)
> -		return 0;
> -
>  	/*
>  	 * ideally we don't want subsystems moving around while we do this.
>  	 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
> 



More information about the Devel mailing list