[Devel] [PATCH vz7] ve/cgroup: do not link a CT cpu cgroup twice into ve_root_list

Kirill Tkhai ktkhai at virtuozzo.com
Wed Sep 5 15:47:21 MSK 2018


On 04.09.2018 18:55, Konstantin Khorenko wrote:
> Container's cpu cgroup is linked to "ve_root_list" on CT start.
> But if someone holds CT's cpu cgroup while CT is being stopped,
> next CT start tries to create same cpu cgroup (fails, already exists)
> and links this cpu cgroup to the "ve_root_list", thus corrupting it.
> 
> As a consequence calc_load_ve() goes in an endless loop.
> 
> Minimal patch is to add a marker to task_group linked/unlinked
> and skip redundant linking.
> 
> How to reproduce:
> 
>  # vzctl start 200
>  # echo $$ > /sys/fs/cgroup/cpu/machine.slice/200/tasks
>  # vzctl stop 200
>  // At this moment VE cgroup got destroyed, but cpu cgroup is still alive
>  // and linked to "ve_root_list" list
> 
>  # vzctl start 200
>  // double add of same tg (same cpu cgroup) to "ve_root_list" list =>
>  // list corruption => endless loop in next calc_load_ve() call
> 
> https://jira.sw.ru/browse/PSBM-88251
> 
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
>  include/linux/sched.h |  5 ++++-
>  kernel/sched/core.c   | 26 ++++++++++++++++++--------
>  kernel/sched/sched.h  |  1 +
>  3 files changed, 23 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 623572043ece..00d7dbca8ed6 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -3346,6 +3346,9 @@ void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
>  #ifdef CONFIG_VE
>  struct cgroup;
>  extern void link_ve_root_cpu_cgroup(struct cgroup *);
> -#endif
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp);
> +#else /* CONFIG_VE */
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp) { }
> +#endif /* CONFIG_VE */
>  
>  #endif
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e572d2b14a7f..7b30e17b16e3 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -9018,10 +9018,24 @@ void link_ve_root_cpu_cgroup(struct cgroup *cgrp)
>  
>  	spin_lock_irqsave(&task_group_lock, flags);
>  	BUG_ON(!(cgrp->subsys[cpu_cgroup_subsys_id]->flags & CSS_ONLINE));
> -	list_add_rcu(&tg->ve_root_list, &ve_root_list);
> +	if (!tg->linked) {
> +		list_add_rcu(&tg->ve_root_list, &ve_root_list);
> +		tg->linked = 1;

Just an idea: we may use INIT_LIST_HEAD() on cgroup allocation,
and list_empty() here instead.

> +	}
>  	spin_unlock_irqrestore(&task_group_lock, flags);
>  }
> -#endif
> +
> +void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp)
> +{
> +	struct task_group *tg = cgroup_tg(cgrp);
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&task_group_lock, flags);
> +	list_del_rcu(&tg->ve_root_list);
> +	tg->linked = 0;
> +	spin_unlock_irqrestore(&task_group_lock, flags);
> +}
> +#endif /* CONFIG_VE */
>  
>  static void free_sched_group(struct task_group *tg)
>  {
> @@ -9595,6 +9609,7 @@ static int cpu_cgroup_css_online(struct cgroup *cgrp)
>  
>  #ifdef CONFIG_VE
>  	INIT_LIST_HEAD(&tg->ve_root_list);
> +	/* tg->linked == 0 due to kzalloc() in sched_create_group() */
>  #endif
>  	if (!cgrp->parent)
>  		return 0;
> @@ -9614,13 +9629,8 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
>  static void cpu_cgroup_css_offline(struct cgroup *cgrp)
>  {
>  	struct task_group *tg = cgroup_tg(cgrp);
> -#ifdef CONFIG_VE
> -	unsigned long flags;
>  
> -	spin_lock_irqsave(&task_group_lock, flags);
> -	list_del_rcu(&tg->ve_root_list);
> -	spin_unlock_irqrestore(&task_group_lock, flags);
> -#endif
> +	unlink_ve_root_cpu_cgroup(cgrp);
>  	sched_offline_group(tg);
>  }
>  
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 98a216b6c391..5192e1bef06c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -263,6 +263,7 @@ struct task_group {
>  
>  #ifdef CONFIG_VE
>  	struct list_head ve_root_list;
> +	bool linked;
>  #endif
>  
>  	struct taskstats __percpu *taskstats;
> 


More information about the Devel mailing list