[Devel] [PATCH VZ10 2/2] ve/cgroup: add vz.slice cgroup to put kernel threads to
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Mon Sep 29 08:13:56 MSK 2025
In cgroup-v2, persistent controllers like those in cgroup-v1 are not
available. Each controller (e.g., "cpu") must be explicitly enabled in
the parent cgroup directory (via `cgroup.subtree_control`) so that the
corresponding resource can be controlled in the child cgroup.
Also in cgroup-v2 the parent cgroup where we've enabled the controller
is not available for joining by processes which is called "no internal
process" constraint. Or vice versa the cgroup directory with processes
inside can't enable controllers for children cgroups.
The problem is that our kernel threads for VE are put into root
cgroup-v2 directory (of VE), and thus prevent any cgroup controllers
from being enabled. So let's move them into special "vz.slice"
sub-cgroup to fix that.
https://virtuozzo.atlassian.net/browse/VSTOR-115868
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
include/linux/cgroup.h | 2 ++
kernel/cgroup/cgroup.c | 52 ++++++++++++++++++++++++++++++++++++++++++
kernel/ve/ve.c | 10 ++++++++
3 files changed, 64 insertions(+)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 986ce1d519b60..86ee6604a5207 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -791,6 +791,8 @@ void cgroup1_release_agent(struct work_struct *work);
#ifdef CONFIG_VE
extern int cgroup_mark_ve_roots(struct ve_struct *ve);
void cgroup_unmark_ve_roots(struct ve_struct *ve);
+int cgroup_join_vz_slice(struct ve_struct *ve);
+int cgroup_leave_vz_slice(struct ve_struct *ve);
int ve_release_agent_setup(struct ve_struct *ve);
void ve_release_agent_teardown(struct ve_struct *ve);
struct ve_struct *cgroup_ve_owner(struct cgroup *cgrp);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 9d75e0a7b675c..8e22aea876505 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2246,6 +2246,58 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
synchronize_rcu();
}
+int cgroup_join_vz_slice(struct ve_struct *ve)
+{
+ struct kernfs_node *kn;
+ struct css_set *cset;
+ struct cgroup *cgrp;
+ int ret;
+
+ cset = rcu_dereference_protected(ve->ve_ns, 1)->cgroup_ns->root_cset;
+ cgrp = cset_cgroup_from_root(cset, &cgrp_dfl_root);
+
+ if (!is_virtualized_cgroup(cgrp) ||
+ !test_bit(CGRP_VE_ROOT, &cgrp->flags))
+ return 0;
+
+ kn = kernfs_find_and_get(cgrp->kn, "vz.slice");
+ if (!kn) {
+ ret = cgroup_mkdir(cgrp->kn, "vz.slice", 0755);
+ if (ret)
+ return ret;
+
+ kn = kernfs_find_and_get(cgrp->kn, "vz.slice");
+ if (!kn)
+ return -ENOENT;
+ }
+
+ cgrp = cgroup_kn_lock_live(kn, true);
+ if (!cgrp) {
+ kernfs_put(kn);
+ return -ENODEV;
+ }
+
+ ret = cgroup_attach_task(cgrp, current, true);
+ cgroup_kn_unlock(kn);
+ kernfs_put(kn);
+ return ret;
+}
+
+int cgroup_leave_vz_slice(struct ve_struct *ve)
+{
+ struct css_set *cset;
+ struct cgroup *cgrp;
+
+ cset = rcu_dereference_protected(ve->ve_ns, 1)->cgroup_ns->root_cset;
+ cgrp = cset_cgroup_from_root(cset, &cgrp_dfl_root);
+
+ if (!is_virtualized_cgroup(cgrp) ||
+ !test_bit(CGRP_VE_ROOT, &cgrp->flags))
+ return 0;
+
+ return cgroup_attach_task(cgrp, current, true);
+}
+
static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
struct cftype *cft, bool activate);
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 91128fbfacffd..cb77f7b7e4cd1 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -729,6 +729,10 @@ static int ve_start_container(struct ve_struct *ve)
if (err)
goto err_mark_ve;
+ err = cgroup_join_vz_slice(ve);
+ if (err)
+ goto err_vz_slice_join;
+
err = ve_start_kthreadd(ve);
if (err)
goto err_kthreadd;
@@ -741,6 +745,10 @@ static int ve_start_container(struct ve_struct *ve)
if (err)
goto err_workqueue;
+ err = cgroup_leave_vz_slice(ve);
+ if (err)
+ goto err_vz_slice_leave;
+
err = ve_hook_iterate_init(VE_SS_CHAIN, ve);
if (err < 0)
goto err_iterate;
@@ -760,12 +768,14 @@ static int ve_start_container(struct ve_struct *ve)
err_release_agent_setup:
ve_hook_iterate_fini(VE_SS_CHAIN, ve);
err_iterate:
+err_vz_slice_leave:
ve_workqueue_stop(ve);
err_workqueue:
ve_stop_umh(ve);
err_umh:
ve_stop_kthreadd(ve);
err_kthreadd:
+err_vz_slice_join:
cgroup_unmark_ve_roots(ve);
err_mark_ve:
ve_list_del(ve);
--
2.51.0
More information about the Devel
mailing list