[Devel] [PATCH VZ8 v3 13/14] cgroup/ve: pass cgroup_root to ve_set(get)_release_agent

Valeriy Vdovin valeriy.vdovin at virtuozzo.com
Tue Feb 16 11:44:35 MSK 2021


Due to virtualization of release_agent cgroup property, cgroup1_show_options
has become more complex.
struct cgroup_root is one of the arguments to that function, it was previously
holding the value of release_agent. But now this property is per-ve AND
per-cgroup.  That's why to find the right release_agent value, the code should
convert cgroup_root into one specific cgroup that is a 'virtual cgroup root' of
a container, represented by the current VE. Getting ve is trivial but cgroup
can be found by a helper function that will iterate css_set links under
cgroup_mutex lock. There is a lock inversion problem when using cgroup_mutex
in cgroup1_show_options, lockdep shows cgroup_mutex conflicts with
kernfs_node->dep_map.
This can be solved easily by converting per-cgroup data structure in VE into
per-cgroup-root. This way we can provide ve_set(get)release_agent_path directly
with struct cgroup_root agrument. For each cgroup hierarchy there is only one
root and for each VE there can only be one virtual root either, that's why
it is safe to just use cgroup_root as a key to find the proper release_agent
path in each VE.

Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 include/linux/ve.h        |  8 ++++---
 kernel/cgroup/cgroup-v1.c | 44 +++++++++++----------------------------
 kernel/cgroup/cgroup.c    |  5 +++--
 kernel/ve/ve.c            | 19 +++++++----------
 4 files changed, 27 insertions(+), 49 deletions(-)

diff --git a/include/linux/ve.h b/include/linux/ve.h
index 7cef4b39847e..3b487f8a4a50 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -145,12 +145,14 @@ extern int nr_ve;
 void ve_add_to_release_list(struct cgroup *cgrp);
 void ve_rm_from_release_list(struct cgroup *cgrp);
 
-int ve_set_release_agent_path(struct cgroup *cgroot,
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root *cgroot,
 	const char *release_agent);
 
-const char *ve_get_release_agent_path(struct cgroup *cgrp_root);
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+	struct cgroup_root *cgroot);
 
-void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp);
+void ve_cleanup_per_cgroot_data(struct ve_struct *ve,
+	struct cgroup_root *cgrp);
 
 extern struct ve_struct *get_ve(struct ve_struct *ve);
 extern void put_ve(struct ve_struct *ve);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 46be2f688503..993ac38b895f 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -577,7 +577,8 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
 	}
 
 	if (root_cgrp->ve_owner)
-		ret = ve_set_release_agent_path(root_cgrp, strstrip(buf));
+		ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+			root_cgrp->root, strstrip(buf));
 	else
 		ret = -ENODEV;
 
@@ -598,7 +599,9 @@ static int cgroup_release_agent_show(struct seq_file *seq, void *v)
 	root_cgrp = cgroup_get_local_root(cgrp);
 	if (root_cgrp->ve_owner) {
 		rcu_read_lock();
-		release_agent = ve_get_release_agent_path(root_cgrp);
+		release_agent = ve_get_release_agent_path(
+			rcu_dereference(root_cgrp->ve_owner),
+			root_cgrp->root);
 
 		if (release_agent)
 			seq_puts(seq, release_agent);
@@ -910,7 +913,7 @@ void cgroup1_release_agent(struct work_struct *work)
 			goto continue_free;
 		}
 
-		release_agent = ve_get_release_agent_path(root_cgrp);
+		release_agent = ve_get_release_agent_path(ve, root_cgrp->root);
 
 		*agentbuf = 0;
 		if (release_agent)
@@ -931,7 +934,9 @@ void cgroup1_release_agent(struct work_struct *work)
 		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
 		envp[i] = NULL;
 
+
 		mutex_unlock(&cgroup_mutex);
+
 		err = call_usermodehelper_ve(ve, argv[0], argv,
 			envp, UMH_WAIT_EXEC);
 
@@ -939,6 +944,7 @@ void cgroup1_release_agent(struct work_struct *work)
 			pr_warn_ratelimited("cgroup1_release_agent "
 					    "%s %s failed: %d\n",
 					    agentbuf, pathbuf, err);
+		up_write(&ve->op_sem);
 		mutex_lock(&cgroup_mutex);
 continue_free:
 		kfree(pathbuf);
@@ -989,7 +995,6 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
 	const char *release_agent;
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 	struct cgroup_subsys *ss;
-	struct cgroup *root_cgrp = &root->cgrp;
 	int ssid;
 
 	for_each_subsys(ss, ssid)
@@ -1003,32 +1008,7 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
 		seq_puts(seq, ",cpuset_v2_mode");
 
 	rcu_read_lock();
-#ifdef CONFIG_VE
-	{
-		struct ve_struct *ve = get_exec_env();
-		struct css_set *cset;
-		struct nsproxy *ve_ns;
-
-		if (!ve_is_super(ve)) {
-			/*
-			 * ve->init_task is NULL in case when cgroup is accessed
-			 * before ve_start_container has been called.
-			 *
-			 * ve->init_task is synchronized via ve->ve_ns rcu, see
-			 * ve_grab_context/drop_context.
-			 */
-			ve_ns = rcu_dereference(ve->ve_ns);
-			if (ve_ns) {
-				spin_lock_irq(&css_set_lock);
-				cset = ve_ns->cgroup_ns->root_cset;
-				BUG_ON(!cset);
-				root_cgrp = cset_cgroup_from_root(cset, root);
-				spin_unlock_irq(&css_set_lock);
-			}
-		}
-	}
-#endif
-	release_agent = ve_get_release_agent_path(root_cgrp);
+	release_agent = ve_get_release_agent_path(get_exec_env(), root);
 	if (release_agent && release_agent[0])
 		seq_show_option(seq, "release_agent", release_agent);
 	rcu_read_unlock();
@@ -1226,8 +1206,8 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
 
 		root_cgrp = cgroup_get_local_root(&root->cgrp);
 		if (root_cgrp->ve_owner)
-			ret = ve_set_release_agent_path(root_cgrp,
-				opts.release_agent);
+			ret = ve_set_release_agent_path(root_cgrp->ve_owner,
+				root_cgrp->root, opts.release_agent);
 	}
 
 	trace_cgroup_remount(root);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 75997b503d3c..09d328b76dab 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2152,7 +2152,8 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
 
 	root->flags = opts->flags;
 	if (opts->release_agent)
-		ve_set_release_agent_path(cgrp, opts->release_agent);
+		ve_set_release_agent_path(cgrp->ve_owner, root,
+			opts->release_agent);
 
 	if (opts->name)
 		strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
@@ -2360,7 +2361,7 @@ static void cgroup_kill_sb(struct super_block *sb)
 		percpu_ref_kill(&root->cgrp.self.refcnt);
 	cgroup_put(&root->cgrp);
 	kernfs_kill_sb(sb);
-	ve_cleanup_per_cgroot_data(NULL, &root->cgrp);
+	ve_cleanup_per_cgroot_data(&ve0, root);
 }
 
 struct file_system_type cgroup_fs_type = {
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index e8945e10e070..031b104075c8 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -35,7 +35,7 @@ struct per_cgroot_data {
 	/*
 	 * data is related to this cgroup
 	 */
-	struct cgroup *cgroot;
+	struct cgroup_root *cgroot;
 
 	/*
 	 * path to release agent binaray, that should
@@ -217,7 +217,7 @@ int nr_threads_ve(struct ve_struct *ve)
 EXPORT_SYMBOL(nr_threads_ve);
 
 static struct per_cgroot_data *per_cgroot_data_find_locked(
-	struct list_head *per_cgroot_list, struct cgroup *cgroot)
+	struct list_head *per_cgroot_list, struct cgroup_root *cgroot)
 {
 	struct per_cgroot_data *data;
 
@@ -229,7 +229,7 @@ static struct per_cgroot_data *per_cgroot_data_find_locked(
 }
 
 static inline struct per_cgroot_data *per_cgroot_get_or_create(
-	struct ve_struct *ve, struct cgroup *cgroot)
+	struct ve_struct *ve, struct cgroup_root *cgroot)
 {
 	struct per_cgroot_data *data, *other_data;
 	unsigned long flags;
@@ -263,10 +263,9 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create(
 	return data;
 }
 
-int ve_set_release_agent_path(struct cgroup *cgroot,
+int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root *cgroot,
 	const char *release_agent)
 {
-	struct ve_struct *ve;
 	unsigned long flags;
 	struct per_cgroot_data *data;
 	struct cgroup_rcu_string *new_path, *old_path;
@@ -276,7 +275,6 @@ int ve_set_release_agent_path(struct cgroup *cgroot,
 	 * caller should grab cgroup_mutex to safely use
 	 * ve_owner field
 	 */
-	ve = cgroot->ve_owner;
 	BUG_ON(!ve);
 
 	nbytes = strlen(release_agent);
@@ -304,16 +302,15 @@ int ve_set_release_agent_path(struct cgroup *cgroot,
 	return 0;
 }
 
-const char *ve_get_release_agent_path(struct cgroup *cgroot)
+const char *ve_get_release_agent_path(struct ve_struct *ve,
+	struct cgroup_root *cgroot)
 {
 	/* caller must grab rcu_read_lock */
 	const char *result = NULL;
 	struct per_cgroot_data *data;
 	struct cgroup_rcu_string *str;
-	struct ve_struct *ve;
 	unsigned long flags;
 
-	ve = rcu_dereference(cgroot->ve_owner);
 	if (!ve)
 		return NULL;
 
@@ -677,15 +674,13 @@ static inline void per_cgroot_data_free(struct per_cgroot_data *data)
 	kfree(data);
 }
 
-void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp)
+void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup_root *cgrp)
 {
 	struct per_cgroot_data *data, *saved;
 	unsigned long flags;
 
 	BUG_ON(!ve && !cgrp);
 	rcu_read_lock();
-	if (!ve)
-		ve = cgroup_get_ve_owner(cgrp);
 
 	spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
 	list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {
-- 
2.27.0



More information about the Devel mailing list