[Devel] [PATCH RHEL7 COMMIT] ve/cgroup: do not link a CT cpu cgroup twice into ve_root_list

Konstantin Khorenko khorenko at virtuozzo.com
Wed Sep 5 15:14:45 MSK 2018


Patch dropped, will be reworked.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 09/05/2018 01:18 PM, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-862.11.6.vz7.71.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-862.11.6.vz7.71.8
> ------>
> commit d68530aa0ee943861d9dc8dc898087c58ad4520f
> Author: Konstantin Khorenko <khorenko at virtuozzo.com>
> Date:   Wed Sep 5 13:18:31 2018 +0300
>
>     ve/cgroup: do not link a CT cpu cgroup twice into ve_root_list
>
>     Container's cpu cgroup is linked to "ve_root_list" on CT start.
>     But if someone holds CT's cpu cgroup while CT is being stopped,
>     next CT start tries to create same cpu cgroup (fails, already exists)
>     and links this cpu cgroup to the "ve_root_list", thus corrupting it.
>
>     As a consequence calc_load_ve() goes in an endless loop.
>
>     Minimal patch is to add a marker to task_group linked/unlinked
>     and skip redundant linking.
>
>     How to reproduce:
>
>      # vzctl start 200
>      # echo $$ > /sys/fs/cgroup/cpu/machine.slice/200/tasks
>      # vzctl stop 200
>      // At this moment VE cgroup got destroyed, but cpu cgroup is still alive
>      // and linked to "ve_root_list" list
>
>      # vzctl start 200
>      // double add of same tg (same cpu cgroup) to "ve_root_list" list =>
>      // list corruption => endless loop in next calc_load_ve() call
>
>     https://jira.sw.ru/browse/PSBM-88251
>
>     Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
>  include/linux/sched.h |  5 ++++-
>  kernel/sched/core.c   | 26 ++++++++++++++++++--------
>  kernel/sched/sched.h  |  1 +
>  3 files changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 623572043ece..00d7dbca8ed6 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -3346,6 +3346,9 @@ void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
>  #ifdef CONFIG_VE
>  struct cgroup;
>  extern void link_ve_root_cpu_cgroup(struct cgroup *);
> -#endif
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp);
> +#else /* CONFIG_VE */
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp) { }
> +#endif /* CONFIG_VE */
>
>  #endif
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e572d2b14a7f..7b30e17b16e3 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -9018,10 +9018,24 @@ void link_ve_root_cpu_cgroup(struct cgroup *cgrp)
>
>  	spin_lock_irqsave(&task_group_lock, flags);
>  	BUG_ON(!(cgrp->subsys[cpu_cgroup_subsys_id]->flags & CSS_ONLINE));
> -	list_add_rcu(&tg->ve_root_list, &ve_root_list);
> +	if (!tg->linked) {
> +		list_add_rcu(&tg->ve_root_list, &ve_root_list);
> +		tg->linked = 1;
> +	}
>  	spin_unlock_irqrestore(&task_group_lock, flags);
>  }
> -#endif
> +
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp)
> +{
> +	struct task_group *tg = cgroup_tg(cgrp);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&task_group_lock, flags);
> +	list_del_rcu(&tg->ve_root_list);
> +	tg->linked = 0;
> +	spin_unlock_irqrestore(&task_group_lock, flags);
> +}
> +#endif /* CONFIG_VE */
>
>  static void free_sched_group(struct task_group *tg)
>  {
> @@ -9595,6 +9609,7 @@ static int cpu_cgroup_css_online(struct cgroup *cgrp)
>
>  #ifdef CONFIG_VE
>  	INIT_LIST_HEAD(&tg->ve_root_list);
> +	/* tg->linked == 0 due to kzalloc() in sched_create_group() */
>  #endif
>  	if (!cgrp->parent)
>  		return 0;
> @@ -9614,13 +9629,8 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
>  static void cpu_cgroup_css_offline(struct cgroup *cgrp)
>  {
>  	struct task_group *tg = cgroup_tg(cgrp);
> -#ifdef CONFIG_VE
> -	unsigned long flags;
>
> -	spin_lock_irqsave(&task_group_lock, flags);
> -	list_del_rcu(&tg->ve_root_list);
> -	spin_unlock_irqrestore(&task_group_lock, flags);
> -#endif
> +	unlink_ve_root_cpu_cgroup(cgrp);
>  	sched_offline_group(tg);
>  }
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 98a216b6c391..5192e1bef06c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -263,6 +263,7 @@ struct task_group {
>
>  #ifdef CONFIG_VE
>  	struct list_head ve_root_list;
> +	bool linked;
>  #endif
>
>  	struct taskstats __percpu *taskstats;
> .
>


More information about the Devel mailing list