[Devel] [PATCH rh7 2/2] sched: Fix double put_prev_task_fair() because of trigger_cpulimit_balance()

Kirill Tkhai ktkhai at odin.com
Thu Aug 20 04:27:05 PDT 2015


The scheduller code is written with the assumption, that rq->curr task can't
be already put. For example, in sched_move_task() we check for

	running = task_current(rq, tsk);

and call put_prev_task() if "running" is true.

When we're unlocking rq->lock in trigger_cpulimit_balance(), the task has
already been put, so concurrent cpu_cgroup_attach_task()->sched_move_task()
puts it one more time.

https://jira.sw.ru/browse/PSBM-35082

Signed-off-by: Kirill Tkhai <ktkhai at odin.com>
---
 kernel/sched/core.c  |    8 ++++++++
 kernel/sched/fair.c  |   34 +++++++++++++++-------------------
 kernel/sched/sched.h |    2 ++
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 30f39a25..bb6ee5a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2209,6 +2209,14 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
 {
 	if (prev->sched_class->pre_schedule)
 		prev->sched_class->pre_schedule(rq, prev);
+
+	if (rq->active_balance == ACTIVE_BALANCE_CPULIMIT) {
+		raw_spin_unlock(&rq->lock);
+		stop_one_cpu_nowait(rq->cpu,
+				    cpulimit_balance_cpu_stop, rq,
+				    &rq->active_balance_work);
+		raw_spin_lock(&rq->lock);
+	}
 }
 
 /* rq->lock is NOT held, but preemption is disabled */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 167d0f6..32188a8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5059,8 +5059,6 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
 }
 
 #if defined(CONFIG_SMP) && defined(CONFIG_CFS_CPULIMIT)
-static int cpulimit_balance_cpu_stop(void *data);
-
 static inline void trigger_cpulimit_balance(struct task_struct *p)
 {
 	struct rq *this_rq;
@@ -5068,18 +5066,16 @@ static inline void trigger_cpulimit_balance(struct task_struct *p)
 	int this_cpu, cpu, target_cpu = -1;
 	struct sched_domain *sd;
 
-	if (!p->se.on_rq)
-		return;
-
 	this_rq = rq_of(cfs_rq_of(&p->se));
 	this_cpu = cpu_of(this_rq);
 
+	if (!p->se.on_rq || this_rq->active_balance)
+		return;
+
 	cfs_rq = top_cfs_rq_of(&p->se);
 	if (check_cpulimit_spread(cfs_rq, this_cpu) >= 0)
 		return;
 
-	raw_spin_unlock(&this_rq->lock);
-
 	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
 		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -5096,17 +5092,10 @@ static inline void trigger_cpulimit_balance(struct task_struct *p)
 unlock:
 	rcu_read_unlock();
 
-	raw_spin_lock(&this_rq->lock);
 	if (target_cpu >= 0) {
-		if (!this_rq->active_balance) {
-			this_rq->active_balance = 1;
-			this_rq->push_cpu = target_cpu;
-			raw_spin_unlock(&this_rq->lock);
-			stop_one_cpu_nowait(this_cpu,
-					    cpulimit_balance_cpu_stop, this_rq,
-					    &this_rq->active_balance_work);
-			raw_spin_lock(&this_rq->lock);
-		}
+		this_rq->active_balance = ACTIVE_BALANCE_CPULIMIT;
+		this_rq->push_cpu = target_cpu;
+		resched_task(this_rq->curr);
 	}
 }
 #else
@@ -5777,7 +5766,7 @@ static int do_cpulimit_balance(struct lb_env *env)
 	return pushed;
 }
 
-static int cpulimit_balance_cpu_stop(void *data)
+int cpulimit_balance_cpu_stop(void *data)
 {
 	struct rq *rq = data;
 	int cpu = cpu_of(rq);
@@ -5787,7 +5776,8 @@ static int cpulimit_balance_cpu_stop(void *data)
 
 	raw_spin_lock_irq(&rq->lock);
 
-	if (unlikely(cpu != smp_processor_id() || !rq->active_balance))
+	if (unlikely(cpu != smp_processor_id() || !rq->active_balance ||
+		     !cpu_online(target_cpu)))
 		goto out_unlock;
 
 	if (unlikely(!rq->nr_running))
@@ -7269,6 +7259,11 @@ static int active_load_balance_cpu_stop(void *data)
 	return 0;
 }
 
+static void pre_schedule_fair(struct rq *rq, struct task_struct *prev)
+{
+	trigger_cpulimit_balance(prev);
+}
+
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * idle load balancing details
@@ -8171,6 +8166,7 @@ const struct sched_class fair_sched_class = {
 	.rq_offline		= rq_offline_fair,
 
 	.task_waking		= task_waking_fair,
+	.pre_schedule		= pre_schedule_fair,
 #endif
 
 	.set_curr_task          = set_curr_task_fair,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 44c5ad6..ae01198 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -509,6 +509,7 @@ struct rq {
 	/* For active balancing */
 	int post_schedule;
 	int active_balance;
+#define ACTIVE_BALANCE_CPULIMIT		0x10000
 	int push_cpu;
 	struct cpu_stop_work active_balance_work;
 	/* cpu of this runqueue: */
@@ -1111,6 +1112,7 @@ extern const struct sched_class idle_sched_class;
 
 extern void update_group_power(struct sched_domain *sd, int cpu);
 
+extern int cpulimit_balance_cpu_stop(void *data);
 extern void trigger_load_balance(struct rq *rq, int cpu);
 extern void idle_balance(int this_cpu, struct rq *this_rq);
 



More information about the Devel mailing list