[Devel] [PATCH rh7] cgroups: Drop virtualization code, v2

Cyrill Gorcunov gorcunov at odin.com
Tue May 5 09:54:37 PDT 2015


Here we rip off all the virtualization code we introduced into kernel to
behave close to rhel6.

Because we're trying a new concept (bindmounting from the node) it is
no longer needed.

Now some details:

 - drop cgroup_show_path -- we don't hide VEID in /proc/self/cgroup output,
   it doesn't break criu so no need to carry it, same applies to changes
   in cgroup_path;

 - because we drop virtualization of systemd -- disable creation of new
   hierarchies in container: we don't support it, neither we need it. The
   primary reason why we allowed new hierarchies in container was that
   CRIU has been running restore procedure inside VE but now we initiate
   restore from VE0, so we can safely disable new hierarchies;

 - in cgroup_addrm_files go back to former RHEL7 code; if we need something
   special here it must be reviewed carefully and separately;

 - no need to hide /proc/cgroups in VE, there is no sensible info present.

v2:

 - take into account commits 38f039db6e023ac14517219ad6f674633c4e99ca
   and c2ac6df22b20389ae2d0af49c436b00ff3243e89 removing cgroup_is_disposable,
   cgroup_kernel_destroy, ve::ve_cgroup_head.

 - drop GRPP_WEAK, CGRP_SELF_DESTRUCTION and CGRP_VE_TOP_CGROUP_VIRTUAL flags
   which implies the cgroups no longer auto-cleaned up but user-space tool
   (read vzctl and friends) should handle cgroups removal

 - because we're moving to native cgroups code we don't virtualize release
   agent anymore

 - still cgroup::cgroup_ve member is needed because we're using it
   all over the code

Signed-off-by: Cyrill Gorcunov <gorcunov at odin.com>
CC: Vladimir Davydov <vdavydov at odin.com>
CC: Konstantin Khorenko <khorenko at odin.com>
CC: Pavel Emelyanov <xemul at odin.com>
CC: Andrey Vagin <avagin at odin.com>
---

I'll need to drop off cgroup 0 as well, but this gonna be
addressed in another patch. I've tested it locally but more
review is quite appreciated.

 include/linux/cgroup.h  |   15 ---
 include/linux/ve.h      |    1 
 kernel/bc/beancounter.c |   11 --
 kernel/cgroup.c         |  214 ++++++------------------------------------------
 kernel/ve/ve.c          |   19 ----
 kernel/ve/vecalls.c     |    6 -
 6 files changed, 39 insertions(+), 227 deletions(-)

Index: linux-pcs7.git/include/linux/cgroup.h
===================================================================
--- linux-pcs7.git.orig/include/linux/cgroup.h
+++ linux-pcs7.git/include/linux/cgroup.h
@@ -46,7 +46,6 @@ struct cgroup_sb_opts {
 enum cgroup_open_flags {
 	CGRP_CREAT	= 0x0001,	/* create if not found */
 	CGRP_EXCL	= 0x0002,	/* fail if already exist */
-	CGRP_WEAK	= 0x0004,	/* arm cgroup self-destruction */
 };
 
 struct vfsmount *cgroup_kernel_mount(struct cgroup_sb_opts *opts);
@@ -56,7 +55,6 @@ struct cgroup *cgroup_kernel_open(struct
 int cgroup_kernel_remove(struct cgroup *parent, const char *name);
 int cgroup_kernel_attach(struct cgroup *cgrp, struct task_struct *tsk);
 void cgroup_kernel_close(struct cgroup *cgrp);
-void cgroup_kernel_destroy(struct cgroup *cgrp);
 
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
@@ -190,10 +188,6 @@ enum {
 	CGRP_CPUSET_CLONE_CHILDREN,
 	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
 	CGRP_SANE_BEHAVIOR,
-	CGRP_SELF_DESTRUCTION,
-
-	/* container virtualization */
-	CGRP_VE_TOP_CGROUP_VIRTUAL,
 };
 
 struct cgroup_name {
@@ -241,12 +235,7 @@ struct cgroup {
 
 	struct cgroupfs_root *root;
 
-	/* The path to use for release notifications. */
-	char *release_agent;
-
-	/* Owner VE for fake cgroup hierarchy */
 	struct ve_struct *cgroup_ve;
-	struct list_head cgroup_ve_list;
 
 	/*
 	 * List of cg_cgroup_links pointing at css_sets with
@@ -325,7 +314,6 @@ enum {
 
 	CGRP_ROOT_NOPREFIX	= (1 << 1), /* mounted subsystems have no named prefix */
 	CGRP_ROOT_XATTR		= (1 << 2), /* supports extended attributes */
-	CGRP_ROOT_VIRTUAL	= (1 << 3), /* VE-based root-cgroup virtualization */
 };
 
 /*
@@ -369,6 +357,9 @@ struct cgroupfs_root {
 	/* IDs for cgroups in this hierarchy */
 	struct ida cgroup_ida;
 
+	/* The path to use for release notifications. */
+	char release_agent_path[PATH_MAX];
+
 	/* The name for this hierarchy - may be empty */
 	char name[MAX_CGROUP_ROOT_NAMELEN];
 };
Index: linux-pcs7.git/include/linux/ve.h
===================================================================
--- linux-pcs7.git.orig/include/linux/ve.h
+++ linux-pcs7.git/include/linux/ve.h
@@ -124,7 +124,6 @@ struct ve_struct {
 	struct mutex		sync_mutex;
 
 	struct kmapset_key	ve_sysfs_perms;
-	struct list_head	ve_cgroup_head;
 #if IS_ENABLED(CONFIG_DEVTMPFS)
 	struct path		devtmpfs_root;
 #endif
Index: linux-pcs7.git/kernel/bc/beancounter.c
===================================================================
--- linux-pcs7.git.orig/kernel/bc/beancounter.c
+++ linux-pcs7.git/kernel/bc/beancounter.c
@@ -109,8 +109,7 @@ static int ub_mem_cgroup_attach_task(str
 	struct cgroup *cg;
 	int ret;
 
-	cg = cgroup_kernel_open(mem_cgroup_root,
-				CGRP_CREAT|CGRP_WEAK, ub->ub_name);
+	cg = cgroup_kernel_open(mem_cgroup_root, CGRP_CREAT, ub->ub_name);
 	if (IS_ERR(cg))
 		return PTR_ERR(cg);
 
@@ -134,8 +133,7 @@ static int ub_blkio_cgroup_attach_task(s
 	if (!ubc_ioprio)
 		return 0;
 
-	cg = cgroup_kernel_open(blkio_cgroup_root,
-				CGRP_CREAT|CGRP_WEAK, ub->ub_name);
+	cg = cgroup_kernel_open(blkio_cgroup_root, CGRP_CREAT, ub->ub_name);
 	if (IS_ERR(cg))
 		return PTR_ERR(cg);
 
@@ -152,8 +150,7 @@ static int ub_cgroup_attach_task(struct
 	int ret;
 
 	if (ub != get_ub0()) {
-		cg = cgroup_kernel_open(ub_cgroup_root,
-					CGRP_CREAT|CGRP_WEAK, ub->ub_name);
+		cg = cgroup_kernel_open(ub_cgroup_root, CGRP_CREAT, ub->ub_name);
 		if (IS_ERR(cg))
 			return PTR_ERR(cg);
 	} else
@@ -398,7 +395,7 @@ struct user_beancounter *get_beancounter
 		if (IS_ERR(cg))
 			return NULL;
 		if (!cg) {
-			cg = cgroup_kernel_open(ub_cgroup_root, CGRP_CREAT|CGRP_WEAK, name);
+			cg = cgroup_kernel_open(ub_cgroup_root, CGRP_CREAT, name);
 			if (IS_ERR_OR_NULL(cg))
 				return NULL;
 			pr_warn_once("Allocating UB with syslimits is deprecated!\n");
Index: linux-pcs7.git/kernel/cgroup.c
===================================================================
--- linux-pcs7.git.orig/kernel/cgroup.c
+++ linux-pcs7.git/kernel/cgroup.c
@@ -256,18 +256,11 @@ bool cgroup_is_descendant(struct cgroup
 }
 EXPORT_SYMBOL_GPL(cgroup_is_descendant);
 
-static int cgroup_is_disposable(const struct cgroup *cgrp)
-{
-	return (cgrp->flags & ((1 << CGRP_NOTIFY_ON_RELEASE) |
-				(1 << CGRP_SELF_DESTRUCTION))) > 0;
-}
-
 static int cgroup_is_releasable(const struct cgroup *cgrp)
 {
 	const int bits =
 		(1 << CGRP_RELEASABLE) |
-		(1 << CGRP_NOTIFY_ON_RELEASE) |
-		(1 << CGRP_SELF_DESTRUCTION);
+		(1 << CGRP_NOTIFY_ON_RELEASE);
 	return (cgrp->flags & bits) > (1 << CGRP_RELEASABLE);
 }
 
@@ -422,7 +415,7 @@ static void __put_css_set(struct css_set
 		 */
 		rcu_read_lock();
 		if (atomic_dec_and_test(&cgrp->count) &&
-		    cgroup_is_disposable(cgrp)) {
+		    notify_on_release(cgrp)) {
 			if (taskexit)
 				set_bit(CGRP_RELEASABLE, &cgrp->flags);
 			check_for_release(cgrp);
@@ -854,7 +847,6 @@ static void cgroup_free_fn(struct work_s
 		ss->css_free(cgrp);
 
 	cgrp->root->number_of_cgroups--;
-	kfree(cgrp->release_agent);
 	mutex_unlock(&cgroup_mutex);
 
 	/*
@@ -1095,7 +1087,6 @@ static int rebind_subsystems(struct cgro
 static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
 {
 	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
-	struct cgroup *top_cgrp;
 	struct cgroup_subsys *ss;
 
 	mutex_lock(&cgroup_root_mutex);
@@ -1108,14 +1099,8 @@ static int cgroup_show_options(struct se
 	if (root->flags & CGRP_ROOT_XATTR)
 		seq_puts(seq, ",xattr");
 
-	/* bindmount to attribute file? */
-	if (!S_ISDIR(dentry->d_inode->i_mode))
-		dentry = dentry->d_parent;
-	top_cgrp = dentry->d_fsdata;
-	/* release_agent is stored on top cgroup */
-	top_cgrp = &top_cgrp->root->top_cgroup;
-	if (top_cgrp->release_agent)
-		seq_printf(seq, ",release_agent=%s", top_cgrp->release_agent);
+	if (strlen(root->release_agent_path))
+		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
 
 	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
 		seq_puts(seq, ",clone_children");
@@ -1125,18 +1110,6 @@ static int cgroup_show_options(struct se
 	return 0;
 }
 
-static int cgroup_show_path(struct seq_file *m, struct dentry *root)
-{
-	struct ve_struct *ve = get_exec_env();
-	struct cgroup *cgrp = __d_cgrp(root);
-
-	if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &cgrp->flags))
-		seq_puts(m, "/");
-	else
-		seq_dentry(m, root, " \t\n\\");
-	return 0;
-}
-
 /*
  * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
  * with cgroup_mutex held to protect the subsys[] array. This function takes
@@ -1299,26 +1272,6 @@ static int parse_cgroupfs_options(char *
 	if (!opts->subsys_mask && !opts->name)
 		return -EINVAL;
 
-	/* virtualize 'systemd' hierarchy */
-	if (!ve_is_super(get_exec_env()) && !opts->subsys_mask && opts->name && !strcmp(opts->name, "systemd"))
-		set_bit(CGRP_ROOT_VIRTUAL, &opts->flags);
-
-	/* forbid non-virtualized hierarchies in containers */
-	if (!ve_is_super(get_exec_env()) && !test_bit(CGRP_ROOT_VIRTUAL, &opts->flags)) {
-		WARN_ONCE(1, "Allow non-virtualized hierarchies for CRIU sake\n");
-		/*
-		 * FIXME
-		 *
-		 * We need to somehow limit this ability for CRIU only, because
-		 * we've to run restore procedure from inside of VE cgroup
-		 * (otherwise a number of get_exec_env() in network code
-		 * won't work as needed).
-		 *
-		 *   -- cyrillos
-		 */
-		/* return opts->subsys_mask ? -ENOENT : -EPERM; */
-	}
-
 	/*
 	 * Grab references on all the modules we'll need, so the subsystems
 	 * don't dance around before rebind_subsystems attaches them. This may
@@ -1422,11 +1375,8 @@ static int cgroup_remount(struct super_b
 	/* re-populate subsystem files */
 	cgroup_populate_dir(cgrp, false, added_mask);
 
-	if (opts.release_agent) {
-		kfree(cgrp->release_agent);
-		cgrp->release_agent = opts.release_agent;
-		opts.release_agent = NULL;
-	}
+	if (opts.release_agent)
+		strcpy(root->release_agent_path, opts.release_agent);
  out_unlock:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -1441,7 +1391,6 @@ static const struct super_operations cgr
 	.drop_inode = generic_delete_inode,
 	.show_options = cgroup_show_options,
 	.remount_fs = cgroup_remount,
-	.show_path = cgroup_show_path,
 };
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1451,7 +1400,6 @@ static void init_cgroup_housekeeping(str
 	INIT_LIST_HEAD(&cgrp->files);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->allcg_node);
-	INIT_LIST_HEAD(&cgrp->cgroup_ve_list);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	INIT_WORK(&cgrp->free_work, cgroup_free_fn);
@@ -1540,6 +1488,8 @@ static struct cgroupfs_root *cgroup_root
 	root->subsys_mask = opts->subsys_mask;
 	root->flags = opts->flags;
 	ida_init(&root->cgroup_ida);
+	if (opts->release_agent)
+		strcpy(root->release_agent_path, opts->release_agent);
 	if (opts->name)
 		strcpy(root->name, opts->name);
 	if (opts->cpuset_clone_children)
@@ -1621,7 +1571,6 @@ static struct dentry *cgroup_mount(struc
 	struct super_block *sb;
 	struct cgroupfs_root *new_root;
 	struct inode *inode;
-	struct dentry *root_dentry;
 
 	/* First find the desired set of subsystems */
 	if (!(flags & MS_KERNMOUNT)) {
@@ -1668,6 +1617,17 @@ static struct dentry *cgroup_mount(struc
 
 		BUG_ON(sb->s_root != NULL);
 
+#ifdef CONFIG_VE
+		/*
+		 * Don't allow to create new hierarchies in container,
+		 * we don't support them.
+		 */
+		if (!ve_is_super(get_exec_env())) {
+			ret = -EACCES;
+			goto drop_new_super;
+		}
+#endif
+
 		ret = cgroup_get_rootdir(sb);
 		if (ret)
 			goto drop_new_super;
@@ -1727,11 +1687,7 @@ static struct dentry *cgroup_mount(struc
 		BUG_ON(!list_empty(&root_cgrp->children));
 		BUG_ON(root->number_of_cgroups != 1);
 
-		if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) {
-			root_cgrp->release_agent = opts.release_agent;
-			root_cgrp->cgroup_ve = get_exec_env();
-			opts.release_agent = NULL;
-		}
+		root_cgrp->cgroup_ve = get_exec_env();
 
 		cred = override_creds(&init_cred);
 		cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
@@ -1760,40 +1716,9 @@ static struct dentry *cgroup_mount(struc
 		drop_parsed_module_refcounts(opts.subsys_mask);
 	}
 
-	if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) {
-		root_dentry = dget(sb->s_root);
-	} else {
-		struct ve_struct *ve = get_exec_env();
-		struct cgroup *top_cgrp;
-
-		top_cgrp = cgroup_kernel_open(&root->top_cgroup, 0, ve->ve_name);
-		ret = PTR_ERR(top_cgrp);
-		if (IS_ERR(top_cgrp))
-			goto drop_new_super;
-
-		/* create fake root-cgroup in virtualized hierarchy */
-		if (top_cgrp == NULL) {
-			top_cgrp = cgroup_kernel_open(&root->top_cgroup, CGRP_CREAT, ve->ve_name);
-			ret = PTR_ERR(top_cgrp);
-			if (IS_ERR(top_cgrp))
-				goto drop_new_super;
-
-			mutex_lock(&cgroup_mutex);
-			top_cgrp->cgroup_ve = ve;
-			top_cgrp->release_agent = opts.release_agent;
-			opts.release_agent = NULL;
-			set_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &top_cgrp->flags);
-			mutex_unlock(&cgroup_mutex);
-		}
-
-		/* mount it as bindmount to fist-level fake root-cgroup */
-		root_dentry = dget(top_cgrp->dentry);
-		cgroup_kernel_close(top_cgrp);
-	}
-
 	kfree(opts.release_agent);
 	kfree(opts.name);
-	return root_dentry;
+	return dget(sb->s_root);
 
  unlock_drop:
 	mutex_unlock(&cgroup_root_mutex);
@@ -1881,7 +1806,6 @@ static struct kobject *cgroup_kobj;
  */
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 {
-	struct ve_struct *ve = get_exec_env();
 	int ret = -ENAMETOOLONG;
 	char *start;
 
@@ -1899,16 +1823,6 @@ int cgroup_path(const struct cgroup *cgr
 		const char *name = cgroup_name(cgrp);
 		int len;
 
-		/* Hide fake root-cgroup in virtualized hierarchy */
-		if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &cgrp->flags)) {
-			if (*start != '/') {
-				if (--start < buf)
-					goto out;
-				*start = '/';
-			}
-			break;
-		}
-
 		len = strlen(name);
 		if ((start -= len) < buf)
 			goto out;
@@ -2319,23 +2233,16 @@ static int cgroup_procs_write(struct cgr
 static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 				      const char *buffer)
 {
-	char *release_agent;
+	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
 
 	if (strlen(buffer) >= PATH_MAX)
 		return -EINVAL;
 
-	release_agent = kstrdup(buffer, GFP_KERNEL);
-	if (!release_agent)
-		return -ENOMEM;
-
-	if (!cgroup_lock_live_group(cgrp)) {
-		kfree(release_agent);
+	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
-	}
 
 	mutex_lock(&cgroup_root_mutex);
-	kfree(cgrp->release_agent);
-	cgrp->release_agent = release_agent;
+	strcpy(cgrp->root->release_agent_path, buffer);
 	mutex_unlock(&cgroup_root_mutex);
 	mutex_unlock(&cgroup_mutex);
 	return 0;
@@ -2346,8 +2253,7 @@ static int cgroup_release_agent_show(str
 {
 	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
-	if (cgrp->release_agent)
-		seq_puts(seq, cgrp->release_agent);
+	seq_puts(seq, cgrp->root->release_agent_path);
 	seq_putc(seq, '\n');
 	mutex_unlock(&cgroup_mutex);
 	return 0;
@@ -2843,9 +2749,9 @@ static int cgroup_addrm_files(struct cgr
 		/* does cft->flags tell us to skip this file on @cgrp? */
 		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && &cgrp->root->top_cgroup == cgrp)
+		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
 			continue;
-		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && &cgrp->root->top_cgroup != cgrp)
+		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
 			continue;
 
 		if (is_add) {
@@ -4077,23 +3983,6 @@ static int cgroup_clone_children_write(s
 	return 0;
 }
 
-static u64 cgroup_read_self_destruction(struct cgroup *cgrp,
-		struct cftype *cft)
-{
-	return test_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags);
-}
-
-static int cgroup_write_self_destruction(struct cgroup *cgrp,
-		struct cftype *cft, u64 val)
-{
-	clear_bit(CGRP_RELEASABLE, &cgrp->flags);
-	if (val)
-		set_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags);
-	else
-		clear_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags);
-	return 0;
-}
-
 /*
  * for the common functions, 'private' gives the type of file
  */
@@ -4142,11 +4031,6 @@ static struct cftype files[] = {
 		.write_string = cgroup_release_agent_write,
 		.max_write_len = PATH_MAX,
 	},
-	{
-		.name = "self_destruction",
-		.read_u64 = cgroup_read_self_destruction,
-		.write_u64 = cgroup_write_self_destruction,
-	},
 	{ }	/* terminate */
 };
 
@@ -4313,13 +4197,7 @@ static long cgroup_create(struct cgroup
 	cgrp->parent = parent;
 	cgrp->root = parent->root;
 
-	if (test_bit(CGRP_ROOT_VIRTUAL, &root->flags) && parent == &root->top_cgroup) {
-		cgrp->cgroup_ve = get_exec_env();
-		list_add(&cgrp->cgroup_ve_list, &cgrp->cgroup_ve->ve_cgroup_head);
-	} else {
-		cgrp->cgroup_ve = parent->cgroup_ve;
-		list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list);
-	}
+	cgrp->cgroup_ve = parent->cgroup_ve;
 
 	if (notify_on_release(parent))
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -4327,9 +4205,6 @@ static long cgroup_create(struct cgroup
 	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
 
-	if (test_bit(CGRP_SELF_DESTRUCTION, &parent->flags))
-		set_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags);
-
 	for_each_subsys(root, ss) {
 		struct cgroup_subsys_state *css;
 
@@ -4394,7 +4269,6 @@ static long cgroup_create(struct cgroup
 	return 0;
 
 err_free_all:
-	list_del_init(&cgrp->cgroup_ve_list);
 	for_each_subsys(root, ss) {
 		if (cgrp->subsys[ss->subsys_id])
 			ss->css_free(cgrp);
@@ -4475,7 +4349,6 @@ static int cgroup_destroy_locked(struct
 	/* delete this cgroup from parent->children */
 	list_del_rcu(&cgrp->sibling);
 	list_del_init(&cgrp->allcg_node);
-	list_del(&cgrp->cgroup_ve_list);
 
 	dget(d);
 	cgroup_d_remove_dir(d);
@@ -4926,14 +4799,8 @@ out:
 static int proc_cgroupstats_show(struct seq_file *m, void *v)
 {
 	int i;
-	struct ve_struct *ve = get_exec_env();
 
 	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
-
-	/* cgset wants to read /proc/cgroups and it's used for starting CT */
-	if (!ve_is_super(ve) && ve->is_running)
-		return 0;
-
 	/*
 	 * ideally we don't want subsystems moving around while we do this.
 	 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
@@ -5232,26 +5099,12 @@ static void cgroup_release_agent(struct
 						    release_list);
 		list_del_init(&cgrp->release_list);
 		raw_spin_unlock(&release_list_lock);
-
-		if (test_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags)) {
-			struct inode *parent = cgrp->dentry->d_parent->d_inode;
-
-			dget(cgrp->dentry);
-			mutex_unlock(&cgroup_mutex);
-			mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
-			vfs_rmdir(parent, cgrp->dentry);
-			mutex_unlock(&parent->i_mutex);
-			dput(cgrp->dentry);
-			mutex_lock(&cgroup_mutex);
-			goto continue_free;
-		}
-
 		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!pathbuf)
 			goto continue_free;
 		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
 			goto continue_free;
-		agentbuf = kstrdup(cgrp->root->top_cgroup.release_agent, GFP_KERNEL);
+		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
 		if (!agentbuf)
 			goto continue_free;
 
@@ -5713,14 +5566,10 @@ struct cgroup *cgroup_kernel_open(struct
 			ret = -EEXIST;
 		else if (!dentry->d_inode)
 			ret = vfs_mkdir(parent->dentry->d_inode, dentry, 0755);
-		else
-			flags &= ~CGRP_WEAK;
 	}
 	if (!ret && dentry->d_inode) {
 		cgrp = __d_cgrp(dentry);
 		atomic_inc(&cgrp->count);
-		if (flags & CGRP_WEAK)
-			set_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags);
 	} else
 		cgrp = ret ? ERR_PTR(ret) : NULL;
 	dput(dentry);
@@ -5763,9 +5612,7 @@ EXPORT_SYMBOL(cgroup_kernel_attach);
 
 void cgroup_kernel_close(struct cgroup *cgrp)
 {
-	if (!cgroup_is_disposable(cgrp)) {
-		atomic_dec(&cgrp->count);
-	} else if (atomic_dec_and_test(&cgrp->count)) {
+	if (atomic_dec_and_test(&cgrp->count)) {
 		set_bit(CGRP_RELEASABLE, &cgrp->flags);
 		check_for_release(cgrp);
 	}
@@ -5774,7 +5621,6 @@ EXPORT_SYMBOL(cgroup_kernel_close);
 
 void cgroup_kernel_destroy(struct cgroup *cgrp)
 {
-	set_bit(CGRP_SELF_DESTRUCTION, &cgrp->flags);
 	set_bit(CGRP_RELEASABLE, &cgrp->flags);
 	check_for_release(cgrp);
 }
Index: linux-pcs7.git/kernel/ve/ve.c
===================================================================
--- linux-pcs7.git.orig/kernel/ve/ve.c
+++ linux-pcs7.git/kernel/ve/ve.c
@@ -710,7 +710,6 @@ do_init:
 	init_rwsem(&ve->op_sem);
 	mutex_init(&ve->sync_mutex);
 	INIT_LIST_HEAD(&ve->devices);
-	INIT_LIST_HEAD(&ve->ve_cgroup_head);
 	INIT_LIST_HEAD(&ve->ve_list);
 	kmapset_init_key(&ve->ve_sysfs_perms);
 
@@ -728,23 +727,6 @@ err_id:
 	return ERR_PTR(err);
 }
 
-static void ve_offline(struct cgroup *cg)
-{
-	struct ve_struct *ve = cgroup_ve(cg);
-	struct cgroup *cgrp;
-
-	ve_list_del(ve);
-	veid_free(ve->veid);
-
-	while (!list_empty(&ve->ve_cgroup_head)) {
-		cgrp = list_entry(ve->ve_cgroup_head.prev,
-				struct cgroup, cgroup_ve_list);
-		cgrp->cgroup_ve = NULL;
-		list_del_init(&cgrp->cgroup_ve_list);
-		cgroup_kernel_destroy(cgrp);
-	}
-}
-
 static void ve_destroy(struct cgroup *cg)
 {
 	struct ve_struct *ve = cgroup_ve(cg);
@@ -979,7 +961,6 @@ struct cgroup_subsys ve_subsys = {
 	.name		= "ve",
 	.subsys_id	= ve_subsys_id,
 	.css_alloc	= ve_create,
-	.css_offline	= ve_offline,
 	.css_free	= ve_destroy,
 	.can_attach	= ve_can_attach,
 	.attach		= ve_attach,
Index: linux-pcs7.git/kernel/ve/vecalls.c
===================================================================
--- linux-pcs7.git.orig/kernel/ve/vecalls.c
+++ linux-pcs7.git/kernel/ve/vecalls.c
@@ -441,14 +441,12 @@ static int do_env_create(envid_t veid, u
 
 	legacy_veid_to_name(veid, ve_name);
 
-	ve_cgroup = ve_cgroup_open(ve0.css.cgroup,
-			CGRP_CREAT|CGRP_WEAK|CGRP_EXCL, veid);
+	ve_cgroup = ve_cgroup_open(ve0.css.cgroup, CGRP_CREAT|CGRP_EXCL, veid);
 	err = PTR_ERR(ve_cgroup);
 	if (IS_ERR(ve_cgroup))
 		goto err_ve_cgroup;
 
-	dev_cgroup = ve_cgroup_open(devices_root,
-		CGRP_CREAT|CGRP_WEAK, veid);
+	dev_cgroup = ve_cgroup_open(devices_root, CGRP_CREAT, veid);
 	err = PTR_ERR(dev_cgroup);
 	if (IS_ERR(dev_cgroup))
 		goto err_dev_cgroup;



More information about the Devel mailing list