[Devel] [PATCH vz7] ve/cgroup: do not link a CT cpu cgroup twice into ve_root_list

Konstantin Khorenko khorenko at virtuozzo.com
Tue Sep 4 18:55:38 MSK 2018


Container's cpu cgroup is linked to "ve_root_list" on CT start.
But if someone holds CT's cpu cgroup while CT is being stopped,
next CT start tries to create same cpu cgroup (fails, already exists)
and links this cpu cgroup to the "ve_root_list", thus corrupting it.

As a consequence calc_load_ve() goes in an endless loop.

Minimal patch is to add a marker to task_group linked/unlinked
and skip redundant linking.

How to reproduce:

 # vzctl start 200
 # echo $$ > /sys/fs/cgroup/cpu/machine.slice/200/tasks
 # vzctl stop 200
 // At this moment VE cgroup got destroyed, but cpu cgroup is still alive
 // and linked to "ve_root_list" list

 # vzctl start 200
 // double add of same tg (same cpu cgroup) to "ve_root_list" list =>
 // list corruption => endless loop in next calc_load_ve() call

https://jira.sw.ru/browse/PSBM-88251

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
 include/linux/sched.h |  5 ++++-
 kernel/sched/core.c   | 26 ++++++++++++++++++--------
 kernel/sched/sched.h  |  1 +
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 623572043ece..00d7dbca8ed6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3346,6 +3346,9 @@ void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
 #ifdef CONFIG_VE
 struct cgroup;
 extern void link_ve_root_cpu_cgroup(struct cgroup *);
-#endif
+void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp);
+#else /* CONFIG_VE */
+void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp) { }
+#endif /* CONFIG_VE */
 
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e572d2b14a7f..7b30e17b16e3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9018,10 +9018,24 @@ void link_ve_root_cpu_cgroup(struct cgroup *cgrp)
 
 	spin_lock_irqsave(&task_group_lock, flags);
 	BUG_ON(!(cgrp->subsys[cpu_cgroup_subsys_id]->flags & CSS_ONLINE));
-	list_add_rcu(&tg->ve_root_list, &ve_root_list);
+	if (!tg->linked) {
+		list_add_rcu(&tg->ve_root_list, &ve_root_list);
+		tg->linked = 1;
+	}
 	spin_unlock_irqrestore(&task_group_lock, flags);
 }
-#endif
+
+void unlink_ve_root_cpu_cgroup(struct cgroup *cgrp)
+{
+	struct task_group *tg = cgroup_tg(cgrp);
+	unsigned long flags;
+
+	spin_lock_irqsave(&task_group_lock, flags);
+	list_del_rcu(&tg->ve_root_list);
+	tg->linked = 0;
+	spin_unlock_irqrestore(&task_group_lock, flags);
+}
+#endif /* CONFIG_VE */
 
 static void free_sched_group(struct task_group *tg)
 {
@@ -9595,6 +9609,7 @@ static int cpu_cgroup_css_online(struct cgroup *cgrp)
 
 #ifdef CONFIG_VE
 	INIT_LIST_HEAD(&tg->ve_root_list);
+	/* tg->linked == 0 due to kzalloc() in sched_create_group() */
 #endif
 	if (!cgrp->parent)
 		return 0;
@@ -9614,13 +9629,8 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
 static void cpu_cgroup_css_offline(struct cgroup *cgrp)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
-#ifdef CONFIG_VE
-	unsigned long flags;
 
-	spin_lock_irqsave(&task_group_lock, flags);
-	list_del_rcu(&tg->ve_root_list);
-	spin_unlock_irqrestore(&task_group_lock, flags);
-#endif
+	unlink_ve_root_cpu_cgroup(cgrp);
 	sched_offline_group(tg);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 98a216b6c391..5192e1bef06c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -263,6 +263,7 @@ struct task_group {
 
 #ifdef CONFIG_VE
 	struct list_head ve_root_list;
+	bool linked;
 #endif
 
 	struct taskstats __percpu *taskstats;
-- 
2.15.1



More information about the Devel mailing list