[Devel] [PATCH v13 10/12] ve/cgroup: private per-cgroup-root data container
Valeriy Vdovin
valeriy.vdovin at virtuozzo.com
Tue Apr 21 13:47:33 MSK 2020
On 21.04.2020 11:46, Kirill Tkhai wrote:
> On 20.04.2020 15:13, Valeriy Vdovin wrote:
>> As long as each ve is internally attached to a particular
>> css_set via it's init_task, it's good to have container with parameters,
>> which are common to each cgroup subsystem hierarchy, rooting from it's
>> virtual root.
>>
>> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
>> ---
>> include/linux/ve.h | 7 +++++
>> kernel/ve/ve.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 84 insertions(+)
>>
>> diff --git a/include/linux/ve.h b/include/linux/ve.h
>> index 787a96f..94a07df 100644
>> --- a/include/linux/ve.h
>> +++ b/include/linux/ve.h
>> @@ -137,6 +137,13 @@ struct ve_struct {
>> struct work_struct release_agent_work;
>>
>> /*
>> + * List of data, private for each root cgroup in
>> + * ve's css_set.
>> + */
>> + struct list_head per_cgroot_list;
>> + struct raw_spinlock per_cgroot_list_lock;
>>
>> + /*
>> * All tasks, that belong to this ve, live
>> * in cgroups, that are children to cgroups
>> * that form this css_set.
>> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
>> index eb266ae..a30343f 100644
>> --- a/kernel/ve/ve.c
>> +++ b/kernel/ve/ve.c
>> @@ -45,6 +45,14 @@
>> #include <linux/vziptable_defs.h>
>> #include <net/rtnetlink.h>
>>
>> +struct per_cgroot_data {
>> + struct list_head list;
>> + /*
>> + * data is related to this cgroup
>> + */
>> + struct cgroup *cgroot;
>> +};
>> +
>> extern struct kmapset_set sysfs_ve_perms_set;
>>
>> static struct kmem_cache *ve_cachep;
>> @@ -92,6 +100,9 @@ struct ve_struct ve0 = {
>> .release_list = LIST_HEAD_INIT(ve0.release_list),
>> .release_agent_work = __WORK_INITIALIZER(ve0.release_agent_work,
>> cgroup_release_agent),
>> + .per_cgroot_list = LIST_HEAD_INIT(ve0.per_cgroot_list),
>> + .per_cgroot_list_lock = __RAW_SPIN_LOCK_UNLOCKED(
>> + ve0.per_cgroot_list_lock),
>> };
>> EXPORT_SYMBOL(ve0);
>>
>> @@ -118,6 +129,52 @@ void put_ve(struct ve_struct *ve)
>> }
>> EXPORT_SYMBOL(put_ve);
>>
>> +static struct per_cgroot_data *per_cgroot_data_find_locked(
>> + struct list_head *per_cgroot_list, struct cgroup *cgroot)
>> +{
>> + struct per_cgroot_data *data;
>> +
>> + list_for_each_entry(data, per_cgroot_list, list) {
>> + if (data->cgroot == cgroot)
>> + return data;
>> + }
>> + return NULL;
>> +}
>> +
>> +static inline struct per_cgroot_data *per_cgroot_get_or_create(
>> + struct ve_struct *ve, struct cgroup *cgroot)
>> +{
>> + struct per_cgroot_data *data, *other_data;
>> +
>> + raw_spin_lock(&ve->per_cgroot_list_lock);
>> + data = per_cgroot_data_find_locked(&ve->per_cgroot_list,
>> + cgroot);
>> + raw_spin_unlock(&ve->per_cgroot_list_lock);
>> +
>> + if (data)
>> + return data;
>> +
>> + data = kzalloc(sizeof(struct per_cgroot_data), GFP_KERNEL);
>> + if (!data)
>> + return ERR_PTR(-ENOMEM);
>> +
>> + raw_spin_lock(&ve->per_cgroot_list_lock);
>> + other_data = per_cgroot_data_find_locked(&ve->per_cgroot_list,
>> + cgroot);
>> +
>> + if (other_data) {
>> + raw_spin_unlock(&ve->per_cgroot_list_lock);
>> + kfree(data);
>> + return other_data;
>> + }
>> +
>> + data->cgroot = cgroot;
>> + list_add(&data->list, &ve->per_cgroot_list);
>> +
>> + raw_spin_unlock(&ve->per_cgroot_list_lock);
>> + return data;
>> +}
>> +
>> struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
>> {
>> struct cgroup_subsys_state *css, *tmp;
>> @@ -612,6 +669,22 @@ err_list:
>> return err;
>> }
>>
>> +static void per_cgroot_free_all_locked(struct list_head *per_cgroot_list)
>> +{
>> + struct per_cgroot_data *data, *saved;
>> + list_for_each_entry_safe(data, saved, per_cgroot_list, list) {
>> + list_del_init(&data->list);
>> + kfree(data);
>> + }
>> +}
>> +
>> +static void ve_per_cgroot_free(struct ve_struct *ve)
>> +{
>> + raw_spin_lock(&ve->per_cgroot_list_lock);
>> + per_cgroot_free_all_locked(&ve->per_cgroot_list);
>> + raw_spin_unlock(&ve->per_cgroot_list_lock);
>> +}
>> +
>> void ve_stop_ns(struct pid_namespace *pid_ns)
>> {
>> struct ve_struct *ve = current->task_ve;
>> @@ -662,6 +735,8 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
>>
>> ve_workqueue_stop(ve);
>>
>> + ve_per_cgroot_free(ve);
>> +
>> /*
>> * At this point all userspace tasks in container are dead.
>> */
>> @@ -735,6 +810,7 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
>>
>> INIT_WORK(&ve->release_agent_work, cgroup_release_agent);
>> raw_spin_lock_init(&ve->release_list_lock);
>> + raw_spin_lock_init(&ve->per_cgroot_list_lock);
>>
>> ve->_randomize_va_space = ve0._randomize_va_space;
>>
>> @@ -771,6 +847,7 @@ do_init:
>> INIT_LIST_HEAD(&ve->ve_list);
>> INIT_LIST_HEAD(&ve->devmnt_list);
>> INIT_LIST_HEAD(&ve->release_list);
>> + INIT_LIST_HEAD(&ve->per_cgroot_list);
>> mutex_init(&ve->devmnt_mutex);
>>
>> #ifdef CONFIG_AIO
>>
> What is this all for? Do you introduce per-cgroup-root data instead of cgroup::release_agent
> to save memory (avoid distribution release agent pointer to every cgroup)?
struct cgroup is not the place for release agent. You are right, that I
want to avoid mostly unused pointer in 99.99% or cgroups, but also
having release agent in a cgroup is semantically misleading. It's a
cgroup-root property. We could introduce something like cgroupfs_root
but supporting containers or even completely substitute one with the
other, but this is a heavy task.
To me having a per-cgroup-root container in VE structure seems cleaner.
VE is already responsible for managing multiple virtual cgroup roots
(mark/unmark_ve_roots as example), so it can proceed with it's manager
role and start having an associative container that would hold per-root
information (cgroup root is a key) with already established and
well-defined semantics, to where you just insert more data when you need
it. Right now we already need to store release_agent in direct
association with a specific cgroup root, so we would need to have this
release-agent<->root association one way or another.
More information about the Devel
mailing list