[Devel] [PATCH RHEL8 COMMIT] ve/cgroup: Add ve_owner field to cgroup

Konstantin Khorenko khorenko at virtuozzo.com
Wed Mar 3 20:21:12 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.5
------>
commit afe890f53645826d8d53f1ea63e97fdc8b6de33f
Author: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Date:   Wed Mar 3 20:21:12 2021 +0300

    ve/cgroup: Add ve_owner field to cgroup
    
    Each cgroup representing a host or a container root of
    cgroup subsystem hierarhy will have this field set to
    a valid ve_struct, that owns this root. This way each
    cgroup in a system will be able to know it's owning VE.
    Non root cgroups will have this field set to NULL, this
    is an optimization for cleanup code: at VE destruction
    we only need to iterate over all root cgroups to clean
    reference to former owning VE, rather than over all
    cgroup hierarchy.
    Still any cgroup that wants to know about it's owning
    VE can find it's virtual root cgroup and read it's
    ve_owner field.
    
    cgroup->ve_owner is declared as RCU pointer, because it fits
    RCU semantics - rare writes/often reads. ve_owner will be
    read from multiple locations in code in further patches and
    is only rarely set at cgroup_mark_ve_root/cgroup_mount.
    cgroup_get_ve_owner is a read wrapper for this purpose.
    
    Cherry-picked from vz7 commit
    eb9c0bfae39f ("ve/cgroup: Added ve_owner field to cgroup")
    
    Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
    Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    =====================
    Patchset description:
    
    ve/cgroup: Port release_agent virtualization from vz7
    
    This patchset ports virtualization of cgroup release_agent
    virtualization from vz7.
    
    Major challanges of porting are differences between vz7 and vz8 cgroup
    implementations:
    - transition of cgroups to kernfs
    - slightly changed locking scheme, which relies on css_set_lock in
      places, previously relied on cgroup_mutex.
    
    There is a small number of patches that have been ported without
    modifications, but most of the patches had suffered a lot of
    modification due to the factors described above.
    
    v1:
      - original patchset
    v2:
      - removed port of CGRP_REMOVED due to the use of CSS_ONLINE in VZ8 for
        same reason
      - changed ve_set(get)_release_agent_path signature for more optimal
      - added ve->is_running check before calling userspace executable
    v3:
      - use goto after check for ve->is_running in last patch
---
 include/linux/cgroup-defs.h |  3 +++
 include/linux/cgroup.h      |  1 +
 kernel/cgroup/cgroup.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 772fcee71f37..a8eb94d2f97f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -526,6 +526,9 @@ struct cgroup {
 
 	u64 subgroups_limit;
 
+	/* ve_owner, responsible for running release agent. */
+	struct ve_struct __rcu *ve_owner;
+
 	/* ids of the ancestors at each level including self */
 	int ancestor_ids[];
 };
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 391702cf43bd..17ee29f4071b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -900,6 +900,7 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 #ifdef CONFIG_VE
 extern void cgroup_mark_ve_root(struct ve_struct *ve);
 void cgroup_unmark_ve_roots(struct ve_struct *ve);
+struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp);
 #endif
 
 #else /* !CONFIG_CGROUPS */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index de105e651607..beb26dd7cd88 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -303,6 +303,43 @@ bool cgroup_on_dfl(const struct cgroup *cgrp)
 	return cgrp->root == &cgrp_dfl_root;
 }
 
+struct cgroup *cgroup_get_local_root(struct cgroup *cgrp)
+{
+	/*
+	 * Find nearest root cgroup, which might be host cgroup root
+	 * or ve cgroup root.
+	 *
+	 *    <host_root_cgroup> -> local_root
+	 *     \                    ^
+	 *      <cgroup>            |
+	 *       \                  |
+	 *        <cgroup>   --->   from here
+	 *        \
+	 *         <ve_root_cgroup> -> local_root
+	 *         \                   ^
+	 *          <cgroup>           |
+	 *          \                  |
+	 *           <cgroup>  --->    from here
+	 */
+
+	while (cgrp->kn->parent && !test_bit(CGRP_VE_ROOT, &cgrp->flags))
+		cgrp = cgrp->kn->parent->priv;
+
+	return cgrp;
+}
+
+struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp)
+{
+	struct ve_struct *ve;
+	/* Caller should hold RCU */
+
+	cgrp = cgroup_get_local_root(cgrp);
+	ve = rcu_dereference(cgrp->ve_owner);
+	if (!ve)
+		ve = get_ve0();
+	return ve;
+}
+
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 			    gfp_t gfp_mask)
@@ -1900,6 +1937,7 @@ void cgroup_mark_ve_root(struct ve_struct *ve)
 
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
 		cgrp = link->cgrp;
+		rcu_assign_pointer(cgrp->ve_owner, ve);
 		set_bit(CGRP_VE_ROOT, &cgrp->flags);
 	}
 	link_ve_root_cpu_cgroup(cset->subsys[cpu_cgrp_id]);
@@ -1907,6 +1945,7 @@ void cgroup_mark_ve_root(struct ve_struct *ve)
 	rcu_read_unlock();
 
 	spin_unlock_irq(&css_set_lock);
+	synchronize_rcu();
 }
 
 void cgroup_unmark_ve_roots(struct ve_struct *ve)
@@ -1924,12 +1963,15 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
 
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
 		cgrp = link->cgrp;
+		rcu_assign_pointer(cgrp->ve_owner, NULL);
 		clear_bit(CGRP_VE_ROOT, &cgrp->flags);
 	}
 unlock:
 	rcu_read_unlock();
 
 	spin_unlock_irq(&css_set_lock);
+	/* ve_owner == NULL will be visible */
+	synchronize_rcu();
 }
 
 struct cgroup *cgroup_get_ve_root1(struct cgroup *cgrp)
@@ -2114,6 +2156,8 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
 	struct dentry *dentry;
 	bool new_sb = false;
 
+	RCU_INIT_POINTER(root->cgrp.ve_owner, &ve0);
+
 	dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
 
 	/*


More information about the Devel mailing list