[Devel] [PATCH RHEL7 COMMIT] ms/cgroup: split cgroup destruction into two steps

Konstantin Khorenko khorenko at virtuozzo.com
Fri Aug 28 03:49:26 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.6.3
------>
commit 33f3496e5d1342b4497058d017261d3b3fde0fe1
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Fri Aug 28 14:49:26 2015 +0400

    ms/cgroup: split cgroup destruction into two steps
    
    Patchset description:
    
    Pulling upstream patches converting css refcnt to percpu_ref.
    
    https://jira.sw.ru/browse/PSBM-34174
    
    Kent Overstreet (2):
      percpu: implement generic percpu refcounting
      percpu-refcount: Don't use silly cmpxchg()
    
    Tejun Heo (9):
      percpu-refcount: consistently use plain (non-sched) RCU
      percpu-refcount: cosmetic updates
      percpu-refcount: add __must_check to percpu_ref_init() and don't use
        ACCESS_ONCE() in percpu_ref_kill_rcu()
      percpu-refcount: implement percpu_ref_cancel_init()
      percpu-refcount: implement percpu_tryget() along with
        percpu_ref_kill_and_confirm()
      percpu-refcount: use RCU-sched insted of normal RCU
      cgroup: reorder the operations in cgroup_destroy_locked()
      cgroup: split cgroup destruction into two steps
      cgroup: use percpu refcnt for cgroup_subsys_states
    
    ===
    This patch description:
    
    From: Tejun Heo <tj at kernel.org>
    
    Split cgroup_destroy_locked() into two steps and put the latter half
    into cgroup_offline_fn() which is executed from a work item.  The
    latter half is responsible for offlining the css's, removing the
    cgroup from internal lists, and propagating release notification to
    the parent.  The separation is to allow using percpu refcnt for css.
    
    Note that this allows for other cgroup operations to happen between
    the first and second halves of destruction, including creating a new
    cgroup with the same name.  As the target cgroup is marked DEAD in the
    first half and cgroup internals don't care about the names of cgroups,
    this should be fine.  A comment explaining this will be added by the
    next patch which implements the actual percpu refcnting.
    
    As RCU freeing is guaranteed to happen after the second step of
    destruction, we can use the same work item for both.  This patch
    renames cgroup->free_work to ->destroy_work and uses it for both
    purposes.  INIT_WORK() is now performed right before queueing the work
    item.
    
    Signed-off-by: Tejun Heo <tj at kernel.org>
    Acked-by: Li Zefan <lizefan at huawei.com>
    (cherry picked from commit ea15f8ccdb430af1e8bc9b4e19a230eb4c356777)
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    
    Conflicts:
    	kernel/cgroup.c
---
 include/linux/cgroup.h |  2 +-
 kernel/cgroup.c        | 25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 626bc84..d34c42b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -259,7 +259,7 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
-	struct work_struct free_work;
+	struct work_struct destroy_work;
 
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 062e0f4..6fd7038 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -213,6 +213,7 @@ static struct cgroup_name root_cgroup_name = { .name = "/" };
  */
 static int need_forkexit_callback __read_mostly;
 
+static void cgroup_offline_fn(struct work_struct *work);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 			      struct cftype cfts[], bool is_add);
@@ -836,7 +837,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
 
 static void cgroup_free_fn(struct work_struct *work)
 {
-	struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
+	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
 	struct cgroup_subsys *ss;
 
 	mutex_lock(&cgroup_mutex);
@@ -881,7 +882,8 @@ static void cgroup_free_rcu(struct rcu_head *head)
 {
 	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
 
-	queue_work(cgroup_destroy_wq, &cgrp->free_work);
+	INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
+	queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
 }
 
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -1416,7 +1418,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->allcg_node);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
-	INIT_WORK(&cgrp->free_work, cgroup_free_fn);
 	mutex_init(&cgrp->pidlist_mutex);
 	INIT_LIST_HEAD(&cgrp->event_list);
 	spin_lock_init(&cgrp->event_list_lock);
@@ -4355,7 +4356,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
 {
 	struct dentry *d = cgrp->dentry;
-	struct cgroup *parent = cgrp->parent;
 	struct cgroup_event *event, *tmp;
 	struct cgroup_subsys *ss;
 
@@ -4402,6 +4402,21 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	}
 	spin_unlock(&cgrp->event_list_lock);
 
+	INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
+	queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
+
+	return 0;
+};
+
+static void cgroup_offline_fn(struct work_struct *work)
+{
+	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
+	struct cgroup *parent = cgrp->parent;
+	struct dentry *d = cgrp->dentry;
+	struct cgroup_subsys *ss;
+
+	mutex_lock(&cgroup_mutex);
+
 	/* tell subsystems to initate destruction */
 	for_each_subsys(cgrp->root, ss)
 		offline_css(ss, cgrp);
@@ -4425,7 +4440,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	set_bit(CGRP_RELEASABLE, &parent->flags);
 	check_for_release(parent);
 
-	return 0;
+	mutex_unlock(&cgroup_mutex);
 }
 
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)



More information about the Devel mailing list