[Devel] [PATCH v13 08/12] ve/cgroup: moved release_agent from system_wq to per-ve workqueues
Kirill Tkhai
ktkhai at virtuozzo.com
Tue Apr 21 10:44:29 MSK 2020
On 20.04.2020 15:13, Valeriy Vdovin wrote:
> Each VE should execute release agent notifications within it's own
> workqueue. This way we achieve a more fine-grained control over
> release_agent work flushing at VE destruction.
>
> Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
> ---
> include/linux/cgroup.h | 1 +
> include/linux/ve.h | 10 +++++++++
> kernel/cgroup.c | 55 +++++++++++++++++++++++---------------------------
> kernel/ve/ve.c | 44 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 80 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 7ed718d..d0ce3cc 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -636,6 +636,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
> int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen);
>
> int cgroup_task_count(const struct cgroup *cgrp);
> +void cgroup_release_agent(struct work_struct *work);
>
> #ifdef CONFIG_VE
> void cgroup_mark_ve_roots(struct ve_struct *ve);
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 02e00e9..787a96f 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -126,7 +126,15 @@ struct ve_struct {
> #endif
> struct kmapset_key sysfs_perms_key;
>
> + /*
> + * cgroups, that want to notify about becoming
> + * empty, are linked to this release_list.
> + */
> + struct list_head release_list;
> + struct raw_spinlock release_list_lock;
> +
> struct workqueue_struct *wq;
> + struct work_struct release_agent_work;
>
> /*
> * All tasks, that belong to this ve, live
> @@ -197,6 +205,8 @@ call_usermodehelper_ve(struct ve_struct *ve, char *path, char **argv,
> }
> void do_update_load_avg_ve(void);
>
> +void ve_add_to_release_list(struct cgroup *cgrp);
> +void ve_rm_from_release_list(struct cgroup *cgrp);
> extern struct ve_struct *get_ve(struct ve_struct *ve);
> extern void put_ve(struct ve_struct *ve);
>
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index b19d749..6437344 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -269,10 +269,6 @@ static bool cgroup_lock_live_group(struct cgroup *cgrp)
>
> /* the list of cgroups eligible for automatic release. Protected by
> * release_list_lock */
> -static LIST_HEAD(release_list);
> -static DEFINE_RAW_SPINLOCK(release_list_lock);
> -static void cgroup_release_agent(struct work_struct *work);
> -static DECLARE_WORK(release_agent_work, cgroup_release_agent);
> static void check_for_release(struct cgroup *cgrp);
>
> /* Link structure for associating css_set objects with cgroups */
> @@ -4316,6 +4312,15 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
> mutex_unlock(&cgroup_mutex);
> /* ve_owner == NULL will be visible */
> synchronize_rcu();
> +
> + /*
> + * Anyone already waiting in this wq to execute
> + * cgroup_release_agent doesn't know that ve_owner is NULL,
> + * but we can wait for all of them at flush_workqueue.
> + * After it is complete no other cgroup can seep through
> + * to this ve's workqueue, so it's safe to shutdown ve.
> + */
> + flush_workqueue(ve->wq);
> }
>
> struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
> @@ -4610,11 +4615,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
>
> set_bit(CGRP_REMOVED, &cgrp->flags);
>
> - raw_spin_lock(&release_list_lock);
> - if (!list_empty(&cgrp->release_list))
> - list_del_init(&cgrp->release_list);
> - raw_spin_unlock(&release_list_lock);
> -
> + ve_rm_from_release_list(cgrp);
> /*
> * Remove @cgrp directory. The removal puts the base ref but we
> * aren't quite done with @cgrp yet, so hold onto it.
> @@ -5398,17 +5399,7 @@ static void check_for_release(struct cgroup *cgrp)
> * already queued for a userspace notification, queue
> * it now
> */
> - int need_schedule_work = 0;
> -
> - raw_spin_lock(&release_list_lock);
> - if (!cgroup_is_removed(cgrp) &&
> - list_empty(&cgrp->release_list)) {
> - list_add(&cgrp->release_list, &release_list);
> - need_schedule_work = 1;
> - }
> - raw_spin_unlock(&release_list_lock);
> - if (need_schedule_work)
> - schedule_work(&release_agent_work);
> + ve_add_to_release_list(cgrp);
> }
> }
>
> @@ -5435,20 +5426,24 @@ static void check_for_release(struct cgroup *cgrp)
> * this routine has no use for the exit status of the release agent
> * task, so no sense holding our caller up for that.
> */
> -static void cgroup_release_agent(struct work_struct *work)
> +void cgroup_release_agent(struct work_struct *work)
> {
> - BUG_ON(work != &release_agent_work);
> + struct ve_struct *ve;
> + ve = container_of(work, struct ve_struct, release_agent_work);
> mutex_lock(&cgroup_mutex);
> - raw_spin_lock(&release_list_lock);
> - while (!list_empty(&release_list)) {
> + raw_spin_lock(&ve->release_list_lock);
> + while (!list_empty(&ve->release_list)) {
> char *argv[3], *envp[3];
> int i, err;
> char *pathbuf = NULL, *agentbuf = NULL;
> - struct cgroup *cgrp = list_entry(release_list.next,
> - struct cgroup,
> - release_list);
> + struct cgroup *cgrp;
> +
> + cgrp = list_entry(ve->release_list.next,
> + struct cgroup,
> + release_list);
> +
> list_del_init(&cgrp->release_list);
> - raw_spin_unlock(&release_list_lock);
> + raw_spin_unlock(&ve->release_list_lock);
> pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
> if (!pathbuf)
> goto continue_free;
> @@ -5483,9 +5478,9 @@ static void cgroup_release_agent(struct work_struct *work)
> continue_free:
> kfree(pathbuf);
> kfree(agentbuf);
> - raw_spin_lock(&release_list_lock);
> + raw_spin_lock(&ve->release_list_lock);
> }
> - raw_spin_unlock(&release_list_lock);
> + raw_spin_unlock(&ve->release_list_lock);
> mutex_unlock(&cgroup_mutex);
> }
>
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index db3b600..eb266ae 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -87,6 +87,11 @@ struct ve_struct ve0 = {
> .netif_max_nr = INT_MAX,
> .arp_neigh_nr = ATOMIC_INIT(0),
> .nd_neigh_nr = ATOMIC_INIT(0),
> + .release_list_lock = __RAW_SPIN_LOCK_UNLOCKED(
> + ve0.release_list_lock),
> + .release_list = LIST_HEAD_INIT(ve0.release_list),
> + .release_agent_work = __WORK_INITIALIZER(ve0.release_agent_work,
> + cgroup_release_agent),
> };
> EXPORT_SYMBOL(ve0);
>
> @@ -498,6 +503,41 @@ static void ve_workqueue_stop(struct ve_struct *ve)
> destroy_workqueue(ve->wq);
> }
>
> +void ve_add_to_release_list(struct cgroup *cgrp)
> +{
> + struct ve_struct *ve;
> + int need_schedule_work = 0;
> +
> + rcu_read_lock();
> + ve = cgroup_get_ve_owner(cgrp);
> +
> + raw_spin_lock(&ve->release_list_lock);
> + if (!cgroup_is_removed(cgrp) &&
> + list_empty(&cgrp->release_list)) {
> + list_add(&cgrp->release_list, &ve->release_list);
> + need_schedule_work = 1;
> + }
> + raw_spin_unlock(&ve->release_list_lock);
> +
> + if (need_schedule_work)
> + queue_work(ve->wq, &ve->release_agent_work);
> +
> + rcu_read_unlock();
> +}
> +
> +void ve_rm_from_release_list(struct cgroup *cgrp)
> +{
> + struct ve_struct *ve;
> + rcu_read_lock();
> + ve = cgroup_get_ve_owner(cgrp);
> +
> + raw_spin_lock(&ve->release_list_lock);
> + if (!list_empty(&cgrp->release_list))
> + list_del_init(&cgrp->release_list);
> + raw_spin_unlock(&ve->release_list_lock);
> + rcu_read_unlock();
> +}
> +
> /* under ve->op_sem write-lock */
> static int ve_start_container(struct ve_struct *ve)
> {
> @@ -693,6 +733,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg)
> if (!ve->ve_name)
> goto err_name;
>
> + INIT_WORK(&ve->release_agent_work, cgroup_release_agent);
> + raw_spin_lock_init(&ve->release_list_lock);
> +
> ve->_randomize_va_space = ve0._randomize_va_space;
>
> ve->features = VE_FEATURES_DEF;
> @@ -727,6 +770,7 @@ do_init:
> INIT_LIST_HEAD(&ve->devices);
> INIT_LIST_HEAD(&ve->ve_list);
> INIT_LIST_HEAD(&ve->devmnt_list);
> + INIT_LIST_HEAD(&ve->release_list);
> mutex_init(&ve->devmnt_mutex);
>
> #ifdef CONFIG_AIO
>
More information about the Devel
mailing list