[Devel] [PATCH RHEL8 COMMIT] ve/cgroup: Private per-cgroup-root data container
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Mar 3 20:21:13 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.5
------>
commit 4082809201fbb52062c3cd13e44cbae5f5ff4dc2
Author: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Date: Wed Mar 3 20:21:13 2021 +0300
ve/cgroup: Private per-cgroup-root data container
As long as each ve is internally attached to a particular
css_set via it's init_task, it's good to have container with parameters,
which are common to each cgroup subsystem hierarchy, rooting from it's
virtual root.
Cherry-picked from vz7 commit
4a98f07102fd ("ve/cgroup: private per-cgroup-root data container")
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
=====================
Patchset description:
ve/cgroup: Port release_agent virtualization from vz7
This patchset ports virtualization of cgroup release_agent
virtualization from vz7.
Major challanges of porting are differences between vz7 and vz8 cgroup
implementations:
- transition of cgroups to kernfs
- slightly changed locking scheme, which relies on css_set_lock in
places, previously relied on cgroup_mutex.
There is a small number of patches that have been ported without
modifications, but most of the patches had suffered a lot of
modification due to the factors described above.
v1:
- original patchset
v2:
- removed port of CGRP_REMOVED due to the use of CSS_ONLINE in VZ8 for
same reason
- changed ve_set(get)_release_agent_path signature for more optimal
- added ve->is_running check before calling userspace executable
v3:
- use goto after check for ve->is_running in last patch
---
include/linux/ve.h | 7 +++++
kernel/ve/ve.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 82 insertions(+)
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 2ab39b607708..44369dddeb24 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -114,6 +114,13 @@ struct ve_struct {
struct workqueue_struct *wq;
struct work_struct release_agent_work;
+
+ /*
+ * List of data, private for each root cgroup in
+ * ve's css_set.
+ */
+ struct list_head per_cgroot_list;
+ spinlock_t per_cgroot_list_lock;
};
struct ve_devmnt {
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 934a5ff1c9bb..a108cb63bc9f 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -30,6 +30,14 @@
#include "../cgroup/cgroup-internal.h" /* For cgroup_task_count() */
+struct per_cgroot_data {
+ struct list_head list;
+ /*
+ * data is related to this cgroup
+ */
+ struct cgroup *cgroot;
+};
+
extern struct kmapset_set sysfs_ve_perms_set;
static struct kmem_cache *ve_cachep;
@@ -67,6 +75,9 @@ struct ve_struct ve0 = {
.release_list = LIST_HEAD_INIT(ve0.release_list),
.release_agent_work = __WORK_INITIALIZER(ve0.release_agent_work,
cgroup1_release_agent),
+ .per_cgroot_list = LIST_HEAD_INIT(ve0.per_cgroot_list),
+ .per_cgroot_list_lock = __SPIN_LOCK_UNLOCKED(
+ ve0.per_cgroot_list_lock),
};
EXPORT_SYMBOL(ve0);
@@ -199,6 +210,53 @@ int nr_threads_ve(struct ve_struct *ve)
}
EXPORT_SYMBOL(nr_threads_ve);
+static struct per_cgroot_data *per_cgroot_data_find_locked(
+ struct list_head *per_cgroot_list, struct cgroup *cgroot)
+{
+ struct per_cgroot_data *data;
+
+ list_for_each_entry(data, per_cgroot_list, list) {
+ if (data->cgroot == cgroot)
+ return data;
+ }
+ return NULL;
+}
+
+static inline struct per_cgroot_data *per_cgroot_get_or_create(
+ struct ve_struct *ve, struct cgroup *cgroot)
+{
+ struct per_cgroot_data *data, *other_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
+ data = per_cgroot_data_find_locked(&ve->per_cgroot_list,
+ cgroot);
+ spin_unlock_irqrestore(&ve->per_cgroot_list_lock, flags);
+
+ if (data)
+ return data;
+
+ data = kzalloc(sizeof(struct per_cgroot_data), GFP_KERNEL);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
+ other_data = per_cgroot_data_find_locked(&ve->per_cgroot_list,
+ cgroot);
+
+ if (other_data) {
+ spin_unlock_irqrestore(&ve->per_cgroot_list_lock, flags);
+ kfree(data);
+ return other_data;
+ }
+
+ data->cgroot = cgroot;
+ list_add(&data->list, &ve->per_cgroot_list);
+
+ spin_unlock_irqrestore(&ve->per_cgroot_list_lock, flags);
+ return data;
+}
+
struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id)
{
struct cgroup_subsys_state *css;
@@ -533,6 +591,19 @@ static int ve_start_container(struct ve_struct *ve)
return err;
}
+static void ve_per_cgroot_free(struct ve_struct *ve)
+{
+ struct per_cgroot_data *data, *saved;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
+ list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {
+ list_del_init(&data->list);
+ kfree(data);
+ }
+ spin_unlock_irqrestore(&ve->per_cgroot_list_lock, flags);
+}
+
void ve_stop_ns(struct pid_namespace *pid_ns)
{
struct ve_struct *ve = current->task_ve;
@@ -589,6 +660,8 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
ve_workqueue_stop(ve);
+ ve_per_cgroot_free(ve);
+
/*
* At this point all userspace tasks in container are dead.
*/
@@ -699,6 +772,7 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
INIT_WORK(&ve->release_agent_work, cgroup1_release_agent);
spin_lock_init(&ve->release_list_lock);
+ spin_lock_init(&ve->per_cgroot_list_lock);
ve->_randomize_va_space = ve0._randomize_va_space;
@@ -721,6 +795,7 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
#endif
INIT_LIST_HEAD(&ve->devmnt_list);
INIT_LIST_HEAD(&ve->release_list);
+ INIT_LIST_HEAD(&ve->per_cgroot_list);
mutex_init(&ve->devmnt_mutex);
#ifdef CONFIG_AIO
More information about the Devel
mailing list