[Devel] [PATCH RHEL8 COMMIT] ve/cgroup: Add ve_owner field to cgroup
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Mar 3 20:21:12 MSK 2021
The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.5
------>
commit afe890f53645826d8d53f1ea63e97fdc8b6de33f
Author: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Date: Wed Mar 3 20:21:12 2021 +0300
ve/cgroup: Add ve_owner field to cgroup
Each cgroup representing a host or a container root of
cgroup subsystem hierarhy will have this field set to
a valid ve_struct, that owns this root. This way each
cgroup in a system will be able to know it's owning VE.
Non root cgroups will have this field set to NULL, this
is an optimization for cleanup code: at VE destruction
we only need to iterate over all root cgroups to clean
reference to former owning VE, rather than over all
cgroup hierarchy.
Still any cgroup that wants to know about it's owning
VE can find it's virtual root cgroup and read it's
ve_owner field.
cgroup->ve_owner is declared as RCU pointer, because it fits
RCU semantics - rare writes/often reads. ve_owner will be
read from multiple locations in code in further patches and
is only rarely set at cgroup_mark_ve_root/cgroup_mount.
cgroup_get_ve_owner is a read wrapper for this purpose.
Cherry-picked from vz7 commit
eb9c0bfae39f ("ve/cgroup: Added ve_owner field to cgroup")
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
=====================
Patchset description:
ve/cgroup: Port release_agent virtualization from vz7
This patchset ports virtualization of cgroup release_agent
virtualization from vz7.
Major challanges of porting are differences between vz7 and vz8 cgroup
implementations:
- transition of cgroups to kernfs
- slightly changed locking scheme, which relies on css_set_lock in
places, previously relied on cgroup_mutex.
There is a small number of patches that have been ported without
modifications, but most of the patches had suffered a lot of
modification due to the factors described above.
v1:
- original patchset
v2:
- removed port of CGRP_REMOVED due to the use of CSS_ONLINE in VZ8 for
same reason
- changed ve_set(get)_release_agent_path signature for more optimal
- added ve->is_running check before calling userspace executable
v3:
- use goto after check for ve->is_running in last patch
---
include/linux/cgroup-defs.h | 3 +++
include/linux/cgroup.h | 1 +
kernel/cgroup/cgroup.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 772fcee71f37..a8eb94d2f97f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -526,6 +526,9 @@ struct cgroup {
u64 subgroups_limit;
+ /* ve_owner, responsible for running release agent. */
+ struct ve_struct __rcu *ve_owner;
+
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 391702cf43bd..17ee29f4071b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -900,6 +900,7 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
#ifdef CONFIG_VE
extern void cgroup_mark_ve_root(struct ve_struct *ve);
void cgroup_unmark_ve_roots(struct ve_struct *ve);
+struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
#endif
#else /* !CONFIG_CGROUPS */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index de105e651607..beb26dd7cd88 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -303,6 +303,43 @@ bool cgroup_on_dfl(const struct cgroup *cgrp)
return cgrp->root == &cgrp_dfl_root;
}
+struct cgroup *cgroup_get_local_root(struct cgroup *cgrp)
+{
+ /*
+ * Find nearest root cgroup, which might be host cgroup root
+ * or ve cgroup root.
+ *
+ * <host_root_cgroup> -> local_root
+ * \ ^
+ * <cgroup> |
+ * \ |
+ * <cgroup> ---> from here
+ * \
+ * <ve_root_cgroup> -> local_root
+ * \ ^
+ * <cgroup> |
+ * \ |
+ * <cgroup> ---> from here
+ */
+
+ while (cgrp->kn->parent && !test_bit(CGRP_VE_ROOT, &cgrp->flags))
+ cgrp = cgrp->kn->parent->priv;
+
+ return cgrp;
+}
+
+struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp)
+{
+ struct ve_struct *ve;
+ /* Caller should hold RCU */
+
+ cgrp = cgroup_get_local_root(cgrp);
+ ve = rcu_dereference(cgrp->ve_owner);
+ if (!ve)
+ ve = get_ve0();
+ return ve;
+}
+
/* IDR wrappers which synchronize using cgroup_idr_lock */
static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
gfp_t gfp_mask)
@@ -1900,6 +1937,7 @@ void cgroup_mark_ve_root(struct ve_struct *ve)
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
cgrp = link->cgrp;
+ rcu_assign_pointer(cgrp->ve_owner, ve);
set_bit(CGRP_VE_ROOT, &cgrp->flags);
}
link_ve_root_cpu_cgroup(cset->subsys[cpu_cgrp_id]);
@@ -1907,6 +1945,7 @@ void cgroup_mark_ve_root(struct ve_struct *ve)
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
+ synchronize_rcu();
}
void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -1924,12 +1963,15 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
cgrp = link->cgrp;
+ rcu_assign_pointer(cgrp->ve_owner, NULL);
clear_bit(CGRP_VE_ROOT, &cgrp->flags);
}
unlock:
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
+ /* ve_owner == NULL will be visible */
+ synchronize_rcu();
}
struct cgroup *cgroup_get_ve_root1(struct cgroup *cgrp)
@@ -2114,6 +2156,8 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct dentry *dentry;
bool new_sb = false;
+ RCU_INIT_POINTER(root->cgrp.ve_owner, &ve0);
+
dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
/*
More information about the Devel
mailing list