[Devel] [PATCH RHEL7 v19 11/13] ve/cgroup: set release_agent_path for root cgroups separately for each ve.
Kirill Tkhai
ktkhai at virtuozzo.com
Fri Apr 24 12:45:55 MSK 2020
On 23.04.2020 18:45, Valeriy Vdovin wrote:
> This is done so that each container could set it's own release agent.
> Release agent information is now stored in per-cgroup-root data
> structure in ve.
>
> https://jira.sw.ru/browse/PSBM-83887
>
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
> ---
> include/linux/cgroup.h | 3 --
> include/linux/ve.h | 7 ++++
> kernel/cgroup.c | 89 ++++++++++++++++++++++++++++++++++++++++----------
> kernel/ve/ve.c | 61 ++++++++++++++++++++++++++++++++++
> 4 files changed, 139 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 51bd372..a6d50ee 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -394,9 +394,6 @@ struct cgroupfs_root {
> /* IDs for cgroups in this hierarchy */
> struct ida cgroup_ida;
>
> - /* The path to use for release notifications. */
> - char release_agent_path[PATH_MAX];
> -
> /* The name for this hierarchy - may be empty */
> char name[MAX_CGROUP_ROOT_NAMELEN];
> };
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 94a07df..da719b4 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -214,6 +214,13 @@ void do_update_load_avg_ve(void);
>
> void ve_add_to_release_list(struct cgroup *cgrp);
> void ve_rm_from_release_list(struct cgroup *cgrp);
> +
> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
> + const char *release_agent);
> +
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> + struct cgroup *cgrp_root);
> +
> extern struct ve_struct *get_ve(struct ve_struct *ve);
> extern void put_ve(struct ve_struct *ve);
>
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 0b047b0..d5897a8 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1090,9 +1090,19 @@ static int rebind_subsystems(struct cgroupfs_root *root,
>
> static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
> {
> + const char *release_agent;
> struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
> struct cgroup_subsys *ss;
> + struct cgroup *root_cgrp = &root->top_cgroup;
>
> +#ifdef CONFIG_VE
> + struct ve_struct *ve = get_exec_env();
> + if (!ve_is_super(ve)) {
> + mutex_lock(&cgroup_mutex);
> + root_cgrp = task_cgroup_from_root(ve->init_task, root);
> + mutex_unlock(&cgroup_mutex);
> + }
> +#endif
> mutex_lock(&cgroup_root_mutex);
> for_each_subsys(root, ss)
> seq_printf(seq, ",%s", ss->name);
> @@ -1104,9 +1114,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
> seq_puts(seq, ",xattr");
> if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
> seq_puts(seq, ",cpuset_v2_mode");
> - if (strlen(root->release_agent_path))
> - seq_show_option(seq, "release_agent",
> - root->release_agent_path);
> + rcu_read_lock();
> + release_agent = ve_get_release_agent_path(root_cgrp->ve_owner,
> + root_cgrp);
1)ve_owner is __rcu type, isn't it? Here and in other places of this patch.
2)Can ve_owner be NULL here? If so, we will get NULL pointer dereference.
If it can't, why?
> + if (release_agent && release_agent[0])
> + seq_show_option(seq, "release_agent", release_agent);
> + rcu_read_unlock();
> if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
> seq_puts(seq, ",clone_children");
> if (strlen(root->name))
> @@ -1384,8 +1397,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
> /* re-populate subsystem files */
> cgroup_populate_dir(cgrp, false, added_mask);
>
> - if (opts.release_agent)
> - strcpy(root->release_agent_path, opts.release_agent);
> + if (opts.release_agent) {
> + struct cgroup *root_cgrp;
> + root_cgrp = cgroup_get_local_root(cgrp);
> + if (root_cgrp->ve_owner)
> + ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> + root_cgrp, opts.release_agent);
> + }
> out_unlock:
> kfree(opts.release_agent);
> kfree(opts.name);
> @@ -1546,8 +1564,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
> root->subsys_mask = opts->subsys_mask;
> root->flags = opts->flags;
> ida_init(&root->cgroup_ida);
> - if (opts->release_agent)
> - strcpy(root->release_agent_path, opts->release_agent);
> if (opts->name)
> strcpy(root->name, opts->name);
> if (opts->cpuset_clone_children)
> @@ -1745,6 +1761,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
>
> cred = override_creds(&init_cred);
> cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
> + if (opts.release_agent) {
> + ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> + root_cgrp, opts.release_agent);
> + }
> +
> revert_creds(cred);
> mutex_unlock(&cgroup_root_mutex);
> mutex_unlock(&cgroup_mutex);
> @@ -2314,27 +2335,44 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
> static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
> const char *buffer)
> {
> - BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
> -
> + int ret = 0;
> + struct cgroup *root_cgrp;
> if (strlen(buffer) >= PATH_MAX)
> return -EINVAL;
>
> if (!cgroup_lock_live_group(cgrp))
> return -ENODEV;
>
> - mutex_lock(&cgroup_root_mutex);
> - strcpy(cgrp->root->release_agent_path, buffer);
> - mutex_unlock(&cgroup_root_mutex);
> + root_cgrp = cgroup_get_local_root(cgrp);
> + BUG_ON(!root_cgrp);
> + if (root_cgrp->ve_owner) {
> + ret = ve_set_release_agent_path(root_cgrp->ve_owner,
> + root_cgrp, buffer);
> + }
> +
> mutex_unlock(&cgroup_mutex);
> - return 0;
> + return ret;
> }
>
> static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
> struct seq_file *seq)
> {
> + const char *release_agent;
> + struct cgroup *root_cgrp;
> +
> if (!cgroup_lock_live_group(cgrp))
> return -ENODEV;
> - seq_puts(seq, cgrp->root->release_agent_path);
> +
> + root_cgrp = cgroup_get_local_root(cgrp);
> + if (root_cgrp->ve_owner) {
> + rcu_read_lock();
> + release_agent = ve_get_release_agent_path(
> + root_cgrp->ve_owner, root_cgrp);
> +
> + if (release_agent)
> + seq_puts(seq, release_agent);
> + rcu_read_unlock();
> + }
> seq_putc(seq, '\n');
> mutex_unlock(&cgroup_mutex);
> return 0;
> @@ -5431,15 +5469,24 @@ static void check_for_release(struct cgroup *cgrp)
> void cgroup_release_agent(struct work_struct *work)
> {
> struct ve_struct *ve;
> + char *agentbuf;
> +
> + agentbuf = kzalloc(PATH_MAX, GFP_KERNEL);
> + if (!agentbuf) {
> + pr_warn("failed to allocate agentbuf\n");
> + return;
> + }
> +
> ve = container_of(work, struct ve_struct, release_agent_work);
> mutex_lock(&cgroup_mutex);
> raw_spin_lock(&ve->release_list_lock);
> while (!list_empty(&ve->release_list)) {
> char *argv[3], *envp[3];
> int i, err;
> - char *pathbuf = NULL, *agentbuf = NULL;
> + char *pathbuf = NULL;
> struct cgroup *cgrp, *root_cgrp;
> struct task_struct *ve_task;
> + const char *release_agent;
>
> cgrp = list_entry(ve->release_list.next,
> struct cgroup,
> @@ -5467,9 +5514,15 @@ void cgroup_release_agent(struct work_struct *work)
> rcu_read_unlock();
> goto continue_free;
> }
> +
> + release_agent = ve_get_release_agent_path(ve, root_cgrp);
> +
> + *agentbuf = 0;
> + if (release_agent)
> + strcpy(agentbuf, release_agent);
> rcu_read_unlock();
> - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
> - if (!agentbuf)
> +
> + if (!*agentbuf)
> goto continue_free;
>
> i = 0;
> @@ -5500,11 +5553,11 @@ void cgroup_release_agent(struct work_struct *work)
> mutex_lock(&cgroup_mutex);
> continue_free:
> kfree(pathbuf);
> - kfree(agentbuf);
> raw_spin_lock(&ve->release_list_lock);
> }
> raw_spin_unlock(&ve->release_list_lock);
> mutex_unlock(&cgroup_mutex);
> + kfree(agentbuf);
> }
>
> static int __init cgroup_disable(char *str)
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 53ac9ea..6d6a9a9 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -51,6 +51,11 @@ struct per_cgroot_data {
> * data is related to this cgroup
> */
> struct cgroup *cgroot;
> + /*
> + * path to release agent binaray, that should
> + * be spawned for all cgroups under this cgroup root
> + */
> + struct cgroup_rcu_string __rcu *release_agent_path;
> };
>
> extern struct kmapset_set sysfs_ve_perms_set;
> @@ -175,6 +180,57 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create(
> return data;
> }
>
> +int ve_set_release_agent_path(struct ve_struct *ve,
> + struct cgroup *cgroot, const char *release_agent)
> +{
> + struct per_cgroot_data *data;
> + struct cgroup_rcu_string *new_path, *old_path;
> + int err = 0;
> +
> + new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
> + if (IS_ERR(new_path))
> + return PTR_ERR(new_path);
> +
> + data = per_cgroot_get_or_create(ve, cgroot);
> + if (IS_ERR(data)) {
> + kfree(new_path);
> + return PTR_ERR(data);
> + }
> +
> + raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> + old_path = rcu_dereference_protected(data->release_agent_path,
> + lockdep_is_held(&ve->per_cgroot_list_lock));
> +
> + rcu_assign_pointer(data->release_agent_path, new_path);
> + raw_spin_unlock(&ve->per_cgroot_list_lock);
> +
> + if (old_path)
> + kfree_rcu(old_path, rcu_head);
> +
> + return err;
> +}
> +
> +const char *ve_get_release_agent_path(struct ve_struct *ve,
> + struct cgroup *cgroot)
> +{
> + /* caller must grab rcu_read_lock */
> + const char *result = NULL;
> + struct per_cgroot_data *data;
> + struct cgroup_rcu_string *str;
> +
> + raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> + data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
> + if (data) {
> + str = rcu_dereference(data->release_agent_path);
> + if (str)
> + result = str->val;
> + }
> + raw_spin_unlock(&ve->per_cgroot_list_lock);
> + return result;
> +}
> +
> struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
> {
> struct cgroup_subsys_state *css, *tmp;
> @@ -672,8 +728,13 @@ err_list:
> static void ve_per_cgroot_free(struct ve_struct *ve)
> {
> struct per_cgroot_data *data, *saved;
> + struct cgroup_rcu_string *release_agent;
> raw_spin_lock(&ve->per_cgroot_list_lock);
> list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {
> + release_agent = data->release_agent_path;
> + RCU_INIT_POINTER(data->release_agent_path, NULL);
> + if (release_agent)
> + kfree_rcu(release_agent, rcu_head);
> list_del_init(&data->list);
> kfree(data);
> }
>
More information about the Devel
mailing list