[Devel] [PATCH RHEL7 COMMIT] ve/cgroup: do not link a CT cpu cgroup twice into ve_root_list
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Sep 5 15:14:45 MSK 2018
Patch dropped, will be reworked.
--
Best regards,
Konstantin Khorenko,
Virtuozzo Linux Kernel Team
On 09/05/2018 01:18 PM, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-862.11.6.vz7.71.8
> ------>
> commit d68530aa0ee943861d9dc8dc898087c58ad4520f
> Author: Konstantin Khorenko <khorenko at virtuozzo.com>
> Date: Wed Sep 5 13:18:31 2018 +0300
>
> ve/cgroup: do not link a CT cpu cgroup twice into ve_root_list
>
> Container's cpu cgroup is linked to "ve_root_list" on CT start.
> But if someone holds CT's cpu cgroup while CT is being stopped,
> next CT start tries to create same cpu cgroup (fails, already exists)
> and links this cpu cgroup to the "ve_root_list", thus corrupting it.
>
> As a consequence calc_load_ve() goes in an endless loop.
>
> Minimal patch is to add a marker to task_group linked/unlinked
> and skip redundant linking.
>
> How to reproduce:
>
> # vzctl start 200
> # echo $$ > /sys/fs/cgroup/cpu/machine.slice/200/tasks
> # vzctl stop 200
> // At this moment VE cgroup got destroyed, but cpu cgroup is still alive
> // and linked to "ve_root_list" list
>
> # vzctl start 200
> // double add of same tg (same cpu cgroup) to "ve_root_list" list =>
> // list corruption => endless loop in next calc_load_ve() call
>
> https://jira.sw.ru/browse/PSBM-88251
>
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
> include/linux/sched.h | 5 ++++-
> kernel/sched/core.c | 26 ++++++++++++++++++--------
> kernel/sched/sched.h | 1 +
> 3 files changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 623572043ece..00d7dbca8ed6 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -3346,6 +3346,9 @@ void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
> #ifdef CONFIG_VE
> struct cgroup;
> extern void link_ve_root_cpu_cgroup(struct cgroup *);
> -#endif
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp);
> +#else /* CONFIG_VE */
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp) { }
> +#endif /* CONFIG_VE */
>
> #endif
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e572d2b14a7f..7b30e17b16e3 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -9018,10 +9018,24 @@ void link_ve_root_cpu_cgroup(struct cgroup *cgrp)
>
> spin_lock_irqsave(&task_group_lock, flags);
> BUG_ON(!(cgrp->subsys[cpu_cgroup_subsys_id]->flags & CSS_ONLINE));
> - list_add_rcu(&tg->ve_root_list, &ve_root_list);
> + if (!tg->linked) {
> + list_add_rcu(&tg->ve_root_list, &ve_root_list);
> + tg->linked = 1;
> + }
> spin_unlock_irqrestore(&task_group_lock, flags);
> }
> -#endif
> +
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp)
> +{
> + struct task_group *tg = cgroup_tg(cgrp);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&task_group_lock, flags);
> + list_del_rcu(&tg->ve_root_list);
> + tg->linked = 0;
> + spin_unlock_irqrestore(&task_group_lock, flags);
> +}
> +#endif /* CONFIG_VE */
>
> static void free_sched_group(struct task_group *tg)
> {
> @@ -9595,6 +9609,7 @@ static int cpu_cgroup_css_online(struct cgroup *cgrp)
>
> #ifdef CONFIG_VE
> INIT_LIST_HEAD(&tg->ve_root_list);
> + /* tg->linked == 0 due to kzalloc() in sched_create_group() */
> #endif
> if (!cgrp->parent)
> return 0;
> @@ -9614,13 +9629,8 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
> static void cpu_cgroup_css_offline(struct cgroup *cgrp)
> {
> struct task_group *tg = cgroup_tg(cgrp);
> -#ifdef CONFIG_VE
> - unsigned long flags;
>
> - spin_lock_irqsave(&task_group_lock, flags);
> - list_del_rcu(&tg->ve_root_list);
> - spin_unlock_irqrestore(&task_group_lock, flags);
> -#endif
> + unlink_ve_root_cpu_cgroup(cgrp);
> sched_offline_group(tg);
> }
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 98a216b6c391..5192e1bef06c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -263,6 +263,7 @@ struct task_group {
>
> #ifdef CONFIG_VE
> struct list_head ve_root_list;
> + bool linked;
> #endif
>
> struct taskstats __percpu *taskstats;
> .
>
More information about the Devel
mailing list