[Devel] [PATCH RHEL7 COMMIT] ve/cgroup: moved release_agent from system_wq to per-ve workqueues
Vasily Averin
vvs at virtuozzo.com
Thu Aug 6 08:31:43 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.el7
------>
commit 48ec2f8e0ba238ab142e456e621071e02f77e114
Author: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Date: Thu Aug 6 08:31:42 2020 +0300
ve/cgroup: moved release_agent from system_wq to per-ve workqueues
Each VE should execute release agent notifications within it's own
workqueue. This way we achieve a more fine-grained control over
release_agent work flushing at VE destruction.
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
include/linux/cgroup.h | 1 +
include/linux/ve.h | 10 +++++++++
kernel/cgroup.c | 55 +++++++++++++++++++++++---------------------------
kernel/ve/ve.c | 44 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 80 insertions(+), 30 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9b9465c..5f1460d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -671,6 +671,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen);
int cgroup_task_count(const struct cgroup *cgrp);
+void cgroup_release_agent(struct work_struct *work);
#ifdef CONFIG_VE
void cgroup_mark_ve_roots(struct ve_struct *ve);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 542fd08..4dbd216 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -126,7 +126,15 @@ struct ve_struct {
#endif
struct kmapset_key sysfs_perms_key;
+ /*
+ * cgroups, that want to notify about becoming
+ * empty, are linked to this release_list.
+ */
+ struct list_head release_list;
+ struct raw_spinlock release_list_lock;
+
struct workqueue_struct *wq;
+ struct work_struct release_agent_work;
/*
* All tasks, that belong to this ve, live
@@ -197,6 +205,8 @@ call_usermodehelper_ve(struct ve_struct *ve, char *path, char **argv,
}
void do_update_load_avg_ve(void);
+void ve_add_to_release_list(struct cgroup *cgrp);
+void ve_rm_from_release_list(struct cgroup *cgrp);
extern struct ve_struct *get_ve(struct ve_struct *ve);
extern void put_ve(struct ve_struct *ve);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index fe02e92..d0cd1b4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -273,10 +273,6 @@ static bool cgroup_lock_live_group(struct cgroup *cgrp)
/* the list of cgroups eligible for automatic release. Protected by
* release_list_lock */
-static LIST_HEAD(release_list);
-static DEFINE_RAW_SPINLOCK(release_list_lock);
-static void cgroup_release_agent(struct work_struct *work);
-static DECLARE_WORK(release_agent_work, cgroup_release_agent);
static void check_for_release(struct cgroup *cgrp);
/* Link structure for associating css_set objects with cgroups */
@@ -4576,6 +4572,15 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
mutex_unlock(&cgroup_mutex);
/* ve_owner == NULL will be visible */
synchronize_rcu();
+
+ /*
+ * Anyone already waiting in this wq to execute
+ * cgroup_release_agent doesn't know that ve_owner is NULL,
+ * but we can wait for all of them at flush_workqueue.
+ * After it is complete no other cgroup can seep through
+ * to this ve's workqueue, so it's safe to shutdown ve.
+ */
+ flush_workqueue(ve->wq);
}
struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
@@ -4875,11 +4880,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
set_bit(CGRP_REMOVED, &cgrp->flags);
- raw_spin_lock(&release_list_lock);
- if (!list_empty(&cgrp->release_list))
- list_del_init(&cgrp->release_list);
- raw_spin_unlock(&release_list_lock);
-
+ ve_rm_from_release_list(cgrp);
/*
* Remove @cgrp directory. The removal puts the base ref but we
* aren't quite done with @cgrp yet, so hold onto it.
@@ -5667,17 +5668,7 @@ static void check_for_release(struct cgroup *cgrp)
* already queued for a userspace notification, queue
* it now
*/
- int need_schedule_work = 0;
-
- raw_spin_lock(&release_list_lock);
- if (!cgroup_is_removed(cgrp) &&
- list_empty(&cgrp->release_list)) {
- list_add(&cgrp->release_list, &release_list);
- need_schedule_work = 1;
- }
- raw_spin_unlock(&release_list_lock);
- if (need_schedule_work)
- schedule_work(&release_agent_work);
+ ve_add_to_release_list(cgrp);
}
}
@@ -5704,20 +5695,24 @@ static void check_for_release(struct cgroup *cgrp)
* this routine has no use for the exit status of the release agent
* task, so no sense holding our caller up for that.
*/
-static void cgroup_release_agent(struct work_struct *work)
+void cgroup_release_agent(struct work_struct *work)
{
- BUG_ON(work != &release_agent_work);
+ struct ve_struct *ve;
+ ve = container_of(work, struct ve_struct, release_agent_work);
mutex_lock(&cgroup_mutex);
- raw_spin_lock(&release_list_lock);
- while (!list_empty(&release_list)) {
+ raw_spin_lock(&ve->release_list_lock);
+ while (!list_empty(&ve->release_list)) {
char *argv[3], *envp[3];
int i, err;
char *pathbuf = NULL, *agentbuf = NULL;
- struct cgroup *cgrp = list_entry(release_list.next,
- struct cgroup,
- release_list);
+ struct cgroup *cgrp;
+
+ cgrp = list_entry(ve->release_list.next,
+ struct cgroup,
+ release_list);
+
list_del_init(&cgrp->release_list);
- raw_spin_unlock(&release_list_lock);
+ raw_spin_unlock(&ve->release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!pathbuf)
goto continue_free;
@@ -5752,9 +5747,9 @@ static void cgroup_release_agent(struct work_struct *work)
continue_free:
kfree(pathbuf);
kfree(agentbuf);
- raw_spin_lock(&release_list_lock);
+ raw_spin_lock(&ve->release_list_lock);
}
- raw_spin_unlock(&release_list_lock);
+ raw_spin_unlock(&ve->release_list_lock);
mutex_unlock(&cgroup_mutex);
}
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 711050c..9e6bb8b 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -87,6 +87,11 @@ struct ve_struct ve0 = {
.netif_max_nr = INT_MAX,
.arp_neigh_nr = ATOMIC_INIT(0),
.nd_neigh_nr = ATOMIC_INIT(0),
+ .release_list_lock = __RAW_SPIN_LOCK_UNLOCKED(
+ ve0.release_list_lock),
+ .release_list = LIST_HEAD_INIT(ve0.release_list),
+ .release_agent_work = __WORK_INITIALIZER(ve0.release_agent_work,
+ cgroup_release_agent),
};
EXPORT_SYMBOL(ve0);
@@ -503,6 +508,41 @@ static void ve_workqueue_stop(struct ve_struct *ve)
destroy_workqueue(ve->wq);
}
+void ve_add_to_release_list(struct cgroup *cgrp)
+{
+ struct ve_struct *ve;
+ int need_schedule_work = 0;
+
+ rcu_read_lock();
+ ve = cgroup_get_ve_owner(cgrp);
+
+ raw_spin_lock(&ve->release_list_lock);
+ if (!cgroup_is_removed(cgrp) &&
+ list_empty(&cgrp->release_list)) {
+ list_add(&cgrp->release_list, &ve->release_list);
+ need_schedule_work = 1;
+ }
+ raw_spin_unlock(&ve->release_list_lock);
+
+ if (need_schedule_work)
+ queue_work(ve->wq, &ve->release_agent_work);
+
+ rcu_read_unlock();
+}
+
+void ve_rm_from_release_list(struct cgroup *cgrp)
+{
+ struct ve_struct *ve;
+ rcu_read_lock();
+ ve = cgroup_get_ve_owner(cgrp);
+
+ raw_spin_lock(&ve->release_list_lock);
+ if (!list_empty(&cgrp->release_list))
+ list_del_init(&cgrp->release_list);
+ raw_spin_unlock(&ve->release_list_lock);
+ rcu_read_unlock();
+}
+
/* under ve->op_sem write-lock */
static int ve_start_container(struct ve_struct *ve)
{
@@ -698,6 +738,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
if (!ve->ve_name)
goto err_name;
+ INIT_WORK(&ve->release_agent_work, cgroup_release_agent);
+ raw_spin_lock_init(&ve->release_list_lock);
+
ve->_randomize_va_space = ve0._randomize_va_space;
ve->features = VE_FEATURES_DEF;
@@ -732,6 +775,7 @@ do_init:
INIT_LIST_HEAD(&ve->devices);
INIT_LIST_HEAD(&ve->ve_list);
INIT_LIST_HEAD(&ve->devmnt_list);
+ INIT_LIST_HEAD(&ve->release_list);
mutex_init(&ve->devmnt_mutex);
#ifdef CONFIG_AIO
More information about the Devel
mailing list