[Devel] [PATCH VZ8 v2 13/14] cgroup/ve: pass cgroup_root to ve_set(get)_release_agent

Kirill Tkhai ktkhai at virtuozzo.com
Mon Feb 15 14:00:38 MSK 2021


On 10.02.2021 13:03, Valeriy Vdovin wrote:
> Due to virtualization of release_agent cgroup property, cgroup1_show_options
> has become more complex.
> struct cgroup_root is one of the arguments to that function, it was previously
> holding the value of release_agent. But now this property is per-ve AND
> per-cgroup.  That's why to find the right release_agent value, the code should
> convert cgroup_root into one specific cgroup that is a 'virtual cgroup root' of
> a container, represented by the current VE. Getting ve is trivial but cgroup
> can be found by a helper function that will iterate css_set links under
> cgroup_mutex lock. There is a lock inversion problem when using cgroup_mutex
> in cgroup1_show_options, lockdep shows cgroup_mutex conflicts with
> kernfs_node->dep_map.
> This can be solved easily by converting per-cgroup data structure in VE into
> per-cgroup-root. This way we can provide ve_set(get)release_agent_path directly
> with struct cgroup_root agrument. For each cgroup hierarchy there is only one
> root and for each VE there can only be one virtual root either, that's why
> it is safe to just use cgroup_root as a key to find the proper release_agent
> path in each VE.
> 
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>

Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>

> ---
>  include/linux/ve.h        |  8 ++++---
>  kernel/cgroup/cgroup-v1.c | 44 +++++++++++----------------------------
>  kernel/cgroup/cgroup.c    |  5 +++--
>  kernel/ve/ve.c            | 19 +++++++----------
>  4 files changed, 27 insertions(+), 49 deletions(-)
> 
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 7cef4b39847e..3b487f8a4a50 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -145,12 +145,14 @@ extern int nr_ve;
>  void ve_add_to_release_list(struct cgroup *cgrp);
>  void ve_rm_from_release_list(struct cgroup *cgrp);
>  
> -int ve_set_release_agent_path(struct cgroup *cgroot,
> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root *cgroot,
>  	const char *release_agent);
>  
> -const char *ve_get_release_agent_path(struct cgroup *cgrp_root);
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> +	struct cgroup_root *cgroot);
>  
> -void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp);
> +void ve_cleanup_per_cgroot_data(struct ve_struct *ve,
> +	struct cgroup_root *cgrp);
>  
>  extern struct ve_struct *get_ve(struct ve_struct *ve);
>  extern void put_ve(struct ve_struct *ve);
> diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
> index 46be2f688503..993ac38b895f 100644
> --- a/kernel/cgroup/cgroup-v1.c
> +++ b/kernel/cgroup/cgroup-v1.c
> @@ -577,7 +577,8 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
>  	}
>  
>  	if (root_cgrp->ve_owner)
> -		ret = ve_set_release_agent_path(root_cgrp, strstrip(buf));
> +		ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> +			root_cgrp->root, strstrip(buf));
>  	else
>  		ret = -ENODEV;
>  
> @@ -598,7 +599,9 @@ static int cgroup_release_agent_show(struct seq_file *seq, void *v)
>  	root_cgrp = cgroup_get_local_root(cgrp);
>  	if (root_cgrp->ve_owner) {
>  		rcu_read_lock();
> -		release_agent = ve_get_release_agent_path(root_cgrp);
> +		release_agent = ve_get_release_agent_path(
> +			rcu_dereference(root_cgrp->ve_owner),
> +			root_cgrp->root);
>  
>  		if (release_agent)
>  			seq_puts(seq, release_agent);
> @@ -910,7 +913,7 @@ void cgroup1_release_agent(struct work_struct *work)
>  			goto continue_free;
>  		}
>  
> -		release_agent = ve_get_release_agent_path(root_cgrp);
> +		release_agent = ve_get_release_agent_path(ve, root_cgrp->root);
>  
>  		*agentbuf = 0;
>  		if (release_agent)
> @@ -931,7 +934,9 @@ void cgroup1_release_agent(struct work_struct *work)
>  		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
>  		envp[i] = NULL;
>  
> +
>  		mutex_unlock(&cgroup_mutex);
> +
>  		err = call_usermodehelper_ve(ve, argv[0], argv,
>  			envp, UMH_WAIT_EXEC);
>  
> @@ -939,6 +944,7 @@ void cgroup1_release_agent(struct work_struct *work)
>  			pr_warn_ratelimited("cgroup1_release_agent "
>  					    "%s %s failed: %d\n",
>  					    agentbuf, pathbuf, err);
> +		up_write(&ve->op_sem);
>  		mutex_lock(&cgroup_mutex);
>  continue_free:
>  		kfree(pathbuf);
> @@ -989,7 +995,6 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
>  	const char *release_agent;
>  	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
>  	struct cgroup_subsys *ss;
> -	struct cgroup *root_cgrp = &root->cgrp;
>  	int ssid;
>  
>  	for_each_subsys(ss, ssid)
> @@ -1003,32 +1008,7 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
>  		seq_puts(seq, ",cpuset_v2_mode");
>  
>  	rcu_read_lock();
> -#ifdef CONFIG_VE
> -	{
> -		struct ve_struct *ve = get_exec_env();
> -		struct css_set *cset;
> -		struct nsproxy *ve_ns;
> -
> -		if (!ve_is_super(ve)) {
> -			/*
> -			 * ve->init_task is NULL in case when cgroup is accessed
> -			 * before ve_start_container has been called.
> -			 *
> -			 * ve->init_task is synchronized via ve->ve_ns rcu, see
> -			 * ve_grab_context/drop_context.
> -			 */
> -			ve_ns = rcu_dereference(ve->ve_ns);
> -			if (ve_ns) {
> -				spin_lock_irq(&css_set_lock);
> -				cset = ve_ns->cgroup_ns->root_cset;
> -				BUG_ON(!cset);
> -				root_cgrp = cset_cgroup_from_root(cset, root);
> -				spin_unlock_irq(&css_set_lock);
> -			}
> -		}
> -	}
> -#endif
> -	release_agent = ve_get_release_agent_path(root_cgrp);
> +	release_agent = ve_get_release_agent_path(get_exec_env(), root);
>  	if (release_agent && release_agent[0])
>  		seq_show_option(seq, "release_agent", release_agent);
>  	rcu_read_unlock();
> @@ -1226,8 +1206,8 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
>  
>  		root_cgrp = cgroup_get_local_root(&root->cgrp);
>  		if (root_cgrp->ve_owner)
> -			ret = ve_set_release_agent_path(root_cgrp,
> -				opts.release_agent);
> +			ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> +				root_cgrp->root, opts.release_agent);
>  	}
>  
>  	trace_cgroup_remount(root);
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 75997b503d3c..09d328b76dab 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -2152,7 +2152,8 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
>  
>  	root->flags = opts->flags;
>  	if (opts->release_agent)
> -		ve_set_release_agent_path(cgrp, opts->release_agent);
> +		ve_set_release_agent_path(cgrp->ve_owner, root,
> +			opts->release_agent);
>  
>  	if (opts->name)
>  		strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
> @@ -2360,7 +2361,7 @@ static void cgroup_kill_sb(struct super_block *sb)
>  		percpu_ref_kill(&root->cgrp.self.refcnt);
>  	cgroup_put(&root->cgrp);
>  	kernfs_kill_sb(sb);
> -	ve_cleanup_per_cgroot_data(NULL, &root->cgrp);
> +	ve_cleanup_per_cgroot_data(&ve0, root);
>  }
>  
>  struct file_system_type cgroup_fs_type = {
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index e8945e10e070..031b104075c8 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -35,7 +35,7 @@ struct per_cgroot_data {
>  	/*
>  	 * data is related to this cgroup
>  	 */
> -	struct cgroup *cgroot;
> +	struct cgroup_root *cgroot;
>  
>  	/*
>  	 * path to release agent binaray, that should
> @@ -217,7 +217,7 @@ int nr_threads_ve(struct ve_struct *ve)
>  EXPORT_SYMBOL(nr_threads_ve);
>  
>  static struct per_cgroot_data *per_cgroot_data_find_locked(
> -	struct list_head *per_cgroot_list, struct cgroup *cgroot)
> +	struct list_head *per_cgroot_list, struct cgroup_root *cgroot)
>  {
>  	struct per_cgroot_data *data;
>  
> @@ -229,7 +229,7 @@ static struct per_cgroot_data *per_cgroot_data_find_locked(
>  }
>  
>  static inline struct per_cgroot_data *per_cgroot_get_or_create(
> -	struct ve_struct *ve, struct cgroup *cgroot)
> +	struct ve_struct *ve, struct cgroup_root *cgroot)
>  {
>  	struct per_cgroot_data *data, *other_data;
>  	unsigned long flags;
> @@ -263,10 +263,9 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create(
>  	return data;
>  }
>  
> -int ve_set_release_agent_path(struct cgroup *cgroot,
> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root *cgroot,
>  	const char *release_agent)
>  {
> -	struct ve_struct *ve;
>  	unsigned long flags;
>  	struct per_cgroot_data *data;
>  	struct cgroup_rcu_string *new_path, *old_path;
> @@ -276,7 +275,6 @@ int ve_set_release_agent_path(struct cgroup *cgroot,
>  	 * caller should grab cgroup_mutex to safely use
>  	 * ve_owner field
>  	 */
> -	ve = cgroot->ve_owner;
>  	BUG_ON(!ve);
>  
>  	nbytes = strlen(release_agent);
> @@ -304,16 +302,15 @@ int ve_set_release_agent_path(struct cgroup *cgroot,
>  	return 0;
>  }
>  
> -const char *ve_get_release_agent_path(struct cgroup *cgroot)
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> +	struct cgroup_root *cgroot)
>  {
>  	/* caller must grab rcu_read_lock */
>  	const char *result = NULL;
>  	struct per_cgroot_data *data;
>  	struct cgroup_rcu_string *str;
> -	struct ve_struct *ve;
>  	unsigned long flags;
>  
> -	ve = rcu_dereference(cgroot->ve_owner);
>  	if (!ve)
>  		return NULL;
>  
> @@ -677,15 +674,13 @@ static inline void per_cgroot_data_free(struct per_cgroot_data *data)
>  	kfree(data);
>  }
>  
> -void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp)
> +void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup_root *cgrp)
>  {
>  	struct per_cgroot_data *data, *saved;
>  	unsigned long flags;
>  
>  	BUG_ON(!ve && !cgrp);
>  	rcu_read_lock();
> -	if (!ve)
> -		ve = cgroup_get_ve_owner(cgrp);
>  
>  	spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
>  	list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {
> 



More information about the Devel mailing list