[Devel] [PATCH 6/7] ve/cgroup: Set release_agent_path for root cgroups separately for each ve.

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Apr 2 13:28:23 MSK 2020



On 4/1/20 6:41 PM, Valeriy Vdovin wrote:
> https://jira.sw.ru/browse/PSBM-83887
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
> ---
>   include/linux/cgroup.h |   5 +--
>   include/linux/ve.h     |   7 ++++
>   kernel/cgroup.c        | 108 ++++++++++++++++++++++++++++++++++++++++---------
>   kernel/ve/ve.c         |  61 +++++++++++++++++++++++++++-
>   4 files changed, 158 insertions(+), 23 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 86d3e5d..25e2bbd 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -417,9 +417,6 @@ struct cgroupfs_root {
>   	/* IDs for cgroups in this hierarchy */
>   	struct ida cgroup_ida;
>   
> -	/* The path to use for release notifications. */
> -	char release_agent_path[PATH_MAX];
> -
>   	/* The name for this hierarchy - may be empty */
>   	char name[MAX_CGROUP_ROOT_NAMELEN];
>   };
> @@ -663,7 +660,7 @@ int cgroup_task_count(const struct cgroup *cgrp);
>   void cgroup_release_agent(struct work_struct *work);
>   
>   #ifdef CONFIG_VE
> -void cgroup_mark_ve_root(struct ve_struct *ve);
> +int cgroup_mark_ve_root(struct ve_struct *ve);
>   void cgroup_unbind_roots_from_ve(struct ve_struct *ve);
>   #endif
>   
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 12b5873..bb6602d 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -214,6 +214,13 @@ void do_update_load_avg_ve(void);
>   
>   void ve_add_to_release_list(struct ve_struct *ve, struct cgroup *cgrp);
>   void ve_rm_from_release_list(struct ve_struct *ve, struct cgroup *cgrp);
> +
> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
> +	const char *release_agent);
> +
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> +	struct cgroup *cgrp_root);
> +
>   extern struct ve_struct *get_ve(struct ve_struct *ve);
>   extern void put_ve(struct ve_struct *ve);
>   
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 680ec8e..b2fa6a2 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1081,10 +1081,16 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>   
>   static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
>   {
> +	const char *release_agent;
>   	struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
>   	struct cgroup_subsys *ss;
> +	struct cgroup *root_cgrp = &root->top_cgroup;
>   
> +#ifdef CONFIG_VE
> +	struct ve_struct *ve = get_exec_env();
> +#endif
>   	mutex_lock(&cgroup_root_mutex);
> +
>   	for_each_subsys(root, ss)
>   		seq_printf(seq, ",%s", ss->name);
>   	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
> @@ -1095,9 +1101,18 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
>   		seq_puts(seq, ",xattr");
>   	if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
>   		seq_puts(seq, ",cpuset_v2_mode");
> -	if (strlen(root->release_agent_path))
> -		seq_show_option(seq, "release_agent",
> -				root->release_agent_path);
> +#ifdef CONFIG_VE
> +	if (!ve_is_super(ve)) {
> +		mutex_lock(&cgroup_mutex);
> +		root_cgrp = task_cgroup_from_root(ve->init_task, root);

Why not "css_cgroup_from_root(ve->root_css_set, root);" instead?

> +		mutex_unlock(&cgroup_mutex);
> +	}
> +#endif
> +	rcu_read_lock();
> +	release_agent = ve_get_release_agent_path(root_cgrp->ve_owner, root_cgrp);
> +	if (release_agent && release_agent[0])
> +		seq_show_option(seq, "release_agent", release_agent);
> +	rcu_read_unlock();
>   	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
>   		seq_puts(seq, ",clone_children");
>   	if (strlen(root->name))
> @@ -1375,8 +1390,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
>   	/* re-populate subsystem files */
>   	cgroup_populate_dir(cgrp, false, added_mask);
>   
> -	if (opts.release_agent)
> -		strcpy(root->release_agent_path, opts.release_agent);
> +	if (opts.release_agent) {
> +		struct cgroup *root_cgrp;
> +		root_cgrp = cgroup_get_local_root(cgrp);
> +		if (root_cgrp->ve_owner)
> +			ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> +				root_cgrp, opts.release_agent);
> +	}
>    out_unlock:
>   	kfree(opts.release_agent);
>   	kfree(opts.name);
> @@ -1537,8 +1557,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
>   	root->subsys_mask = opts->subsys_mask;
>   	root->flags = opts->flags;
>   	ida_init(&root->cgroup_ida);
> -	if (opts->release_agent)
> -		strcpy(root->release_agent_path, opts->release_agent);
>   	if (opts->name)
>   		strcpy(root->name, opts->name);
>   	if (opts->cpuset_clone_children)
> @@ -2304,27 +2322,44 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
>   static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
>   				      const char *buffer)
>   {
> -	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
> -
> +	int ret = 0;
> +	struct cgroup *root_cgrp;
>   	if (strlen(buffer) >= PATH_MAX)
>   		return -EINVAL;
>   
>   	if (!cgroup_lock_live_group(cgrp))
>   		return -ENODEV;
>   
> -	mutex_lock(&cgroup_root_mutex);
> -	strcpy(cgrp->root->release_agent_path, buffer);
> -	mutex_unlock(&cgroup_root_mutex);
> +	root_cgrp = cgroup_get_local_root(cgrp);
> +	BUG_ON(!root_cgrp);
> +	if (root_cgrp->ve_owner) {
> +		ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> +			root_cgrp, buffer);
> +	}
> +
>   	mutex_unlock(&cgroup_mutex);
> -	return 0;
> +	return ret;
>   }
>   
>   static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
>   				     struct seq_file *seq)
>   {
> +	const char *release_agent;
> +	struct cgroup *root_cgrp;
> +
>   	if (!cgroup_lock_live_group(cgrp))
>   		return -ENODEV;
> -	seq_puts(seq, cgrp->root->release_agent_path);
> +
> +	root_cgrp = cgroup_get_local_root(cgrp);
> +	if (root_cgrp->ve_owner) {
> +		rcu_read_lock();
> +		release_agent = ve_get_release_agent_path(
> +			root_cgrp->ve_owner, root_cgrp);
> +
> +		if (release_agent)
> +			seq_puts(seq, release_agent);
> +		rcu_read_unlock();
> +	}
>   	seq_putc(seq, '\n');
>   	mutex_unlock(&cgroup_mutex);
>   	return 0;
> @@ -4140,7 +4175,7 @@ static struct cftype files[] = {
>   	},
>   	{
>   		.name = "release_agent",
> -		.flags = CFTYPE_ONLY_ON_ROOT,
> +		.flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE,
>   		.read_seq_string = cgroup_release_agent_show,
>   		.write_string = cgroup_release_agent_write,
>   		.max_write_len = PATH_MAX,
> @@ -4344,22 +4379,53 @@ static int subgroups_count(struct cgroup *cgroup)
>   	return cgrps_count;
>   }
>   
> +static struct cftype *get_cftype_by_name(const char *name)
> +{
> +	struct cftype *cft;
> +	for (cft = files; cft->name[0] != '\0'; cft++) {
> +		if (!strcmp(cft->name, name))
> +			return cft;
> +	}
> +	return NULL;
> +}
> +
> +static int cgroup_add_file_on_mark_ve(struct cgroup *ve_root)
> +{
> +	int err;
> +	struct dentry *dir = ve_root->dentry;
> +	struct cftype *cft = get_cftype_by_name("release_agent");
> +	BUG_ON(!cft);
> +
> +	mutex_lock(&dir->d_inode->i_mutex);
> +	err = cgroup_add_file(ve_root, NULL, cft);
> +	mutex_unlock(&dir->d_inode->i_mutex);
> +	if (err) {
> +		pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
> +			cft->name, err);
> +	}
> +	return err;
> +}
> +
>   #ifdef CONFIG_VE
> -void cgroup_mark_ve_root(struct ve_struct *ve)
> +int cgroup_mark_ve_root(struct ve_struct *ve)
>   {
>   	struct cgroup *cgrp;
>   	struct cgroupfs_root *root;
> +	int err = 0;
>   
>   	mutex_lock(&cgroup_mutex);
>   	for_each_active_root(root) {
>   		cgrp = task_cgroup_from_root(ve->init_task, root);
>   		cgrp->ve_owner = ve;
>   		set_bit(CGRP_VE_ROOT, &cgrp->flags);
> -
> +		err = cgroup_add_file_on_mark_ve(cgrp);
> +		if (err)
> +			break;
>   		if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask))
>   			link_ve_root_cpu_cgroup(cgrp);
>   	}
>   	mutex_unlock(&cgroup_mutex);
> +	return err;
>   }
>   
>   void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
> @@ -5516,7 +5582,13 @@ void cgroup_release_agent(struct work_struct *work)
>   			goto continue_free;
>   		if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, 1) < 0)
>   			goto continue_free;
> -		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
> +
> +		rcu_read_lock();
> +		root_cgrp = cgroup_get_local_root(cgrp);
> +		release_agent = ve_get_release_agent_path(ve, root_cgrp);
> +		if (release_agent)
> +			agentbuf = kstrdup(release_agent, GFP_KERNEL);
> +		rcu_read_unlock();
>   		if (!agentbuf)
>   			goto continue_free;
>   
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 0136bf2..d2edfc0 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -51,6 +51,11 @@ struct per_cgroot_data {
>   	 * data is related to this cgroup
>   	 */
>   	struct cgroup *cgroot;
> +	/*
> +	 * path to release agent binaray, that should
> +	 * be spawned for all cgroups under this cgroup root
> +	 */
> +	struct cgroup_rcu_string __rcu *release_agent_path;
>   };
>   
>   extern struct kmapset_set sysfs_ve_perms_set;
> @@ -152,6 +157,58 @@ static struct per_cgroot_data *per_cgroot_data_new_locked(
>   	return data;
>   }
>   
> +int ve_set_release_agent_path(struct ve_struct *ve,
> +	struct cgroup *cgroot, const char *release_agent)
> +{
> +	struct per_cgroot_data *data;
> +	struct cgroup_rcu_string *new_path, *old_path;
> +
> +	new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
> +	if (!new_path)
> +		return -ENOMEM;
> +
> +	raw_spin_lock(&ve->per_cgroot_list_lock);
> +	data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
> +	if (!data) {
> +		data = per_cgroot_data_new_locked(&ve->per_cgroot_list,
> +			cgroot);
> +		if (IS_ERR(data)) {
> +			kfree_rcu(new_path, rcu_head);
> +			raw_spin_unlock(&ve->per_cgroot_list_lock);
> +			return PTR_ERR(data);
> +		}
> +	}
> +
> +	old_path = data->release_agent_path;
> +	rcu_assign_pointer(data->release_agent_path, new_path);
> +
> +	raw_spin_unlock(&ve->per_cgroot_list_lock);
> +
> +	if (old_path)
> +		kfree_rcu(old_path, rcu_head);
> +	return 0;
> +}
> +
> +const char *ve_get_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot)
> +{
> +	const char *result = NULL;
> +	struct per_cgroot_data *data;
> +	struct cgroup_rcu_string *str;
> +
> +	raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> +	data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
> +	if (!data)
> +		goto unlock;
> +
> +	str = rcu_dereference(data->release_agent_path);
> +	if (str)
> +		result = str->val;
> +unlock:
> +	raw_spin_unlock(&ve->per_cgroot_list_lock);
> +	return result;
> +}
> +
>   struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
>   {
>   	struct cgroup_subsys_state *css, *tmp;
> @@ -643,7 +700,9 @@ static int ve_start_container(struct ve_struct *ve)
>   	if (err < 0)
>   		goto err_iterate;
>   
> -	cgroup_mark_ve_root(ve);
> +	err = cgroup_mark_ve_root(ve);
> +	if (err)
> +		goto err_mark_ve;
>   
>   	err = ve_workqueue_start(ve);
>   	if (err)
> 

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.


More information about the Devel mailing list