[Devel] [PATCH 3/3] sched: Call calc_load_ve() out of jiffies_lock

Kirill Tkhai ktkhai at virtuozzo.com
Wed Jul 18 13:50:30 MSK 2018


jiffies_lock is a big global seqlock, which is used in many
places. In combination with another actions like smp call
functions and readers of this seqlock, system may hang for
a long time. There is already a pair of hard lockups because
of long iteration in calc_load_ve() with jiffies_lock held,
which made readers of this seqlock to spin long time.

This patch makes calc_load_ve() to use separate lock,
and this relaxes jiffies_lock. I think, this should be enough
to resolve the problem, since both the crashes I saw contains
readers of the seqlock on parallel cpus, and we won't have
to relax further (say, moving calc_load_ve() to softirq).

Note, that the principal change of this patch makes is
jiffies_lock readers on parallel cpus won't wait till calc_load_ve()
finishes, so instead of (n_readers + 1) cpus waiting till
this function completes, there will be only 1 cpu doing that.

https://jira.sw.ru/browse/PSBM-84967

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
 kernel/sched/core.c       |   13 ++++++++-----
 kernel/time/tick-common.c |    8 +++++++-
 kernel/time/tick-sched.c  |    5 ++++-
 kernel/time/timekeeping.c |    5 ++++-
 4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 693823a1bd36..26f4959f9ab4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2885,10 +2885,16 @@ static LIST_HEAD(ve_root_list);
 
 void calc_load_ve(void)
 {
+	static DEFINE_SPINLOCK(load_ve_lock);
 	unsigned long nr_unint, nr_active;
 	struct task_group *tg;
 	int i;
 
+	/*
+	 * This is called without jiffies_lock, and here we protect
+	 * against very rare parallel execution on two or more cpus.
+	 */
+	spin_lock(&load_ve_lock);
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &ve_root_list, ve_root_list) {
 		nr_active = 0;
@@ -2913,16 +2919,13 @@ void calc_load_ve(void)
 	rcu_read_unlock();
 
 	nr_unint = nr_uninterruptible() * FIXED_1;
-	/*
-	 * This is called from do_timer() only, which can't be excuted
-	 * in parallel on two or more cpus. So, we have to protect
-	 * the below modifications from readers only.
-	 */
+
 	write_seqcount_begin(&kstat_glob.nr_unint_avg_seq);
 	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
 	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
 	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
 	write_seqcount_end(&kstat_glob.nr_unint_avg_seq);
+	spin_unlock(&load_ve_lock);
 }
 #endif /* CONFIG_VE */
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ed88d128c5ce..35462b2d236d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,13 +63,19 @@ int tick_is_oneshot_available(void)
 static void tick_periodic(int cpu)
 {
 	if (tick_do_timer_cpu == cpu) {
+		bool calc_ve;
+
 		write_seqlock(&jiffies_lock);
 
 		/* Keep track of the next tick event */
 		tick_next_period = ktime_add(tick_next_period, tick_period);
 
-		do_timer(1);
+		calc_ve = do_timer(1);
 		write_sequnlock(&jiffies_lock);
+
+		if (calc_ve)
+			calc_load_ve();
+
 		update_wall_time();
 	}
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 85c7fe06eace..baba7c990290 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -52,6 +52,7 @@ struct tick_sched *tick_get_tick_sched(int cpu)
 static void tick_do_update_jiffies64(ktime_t now)
 {
 	unsigned long ticks = 0;
+	bool calc_ve = false;
 	ktime_t delta;
 
 	/*
@@ -80,7 +81,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 			last_jiffies_update = ktime_add_ns(last_jiffies_update,
 							   incr * ticks);
 		}
-		do_timer(++ticks);
+		calc_ve = do_timer(++ticks);
 
 		/* Keep the tick_next_period variable up to date */
 		tick_next_period = ktime_add(last_jiffies_update, tick_period);
@@ -89,6 +90,8 @@ static void tick_do_update_jiffies64(ktime_t now)
 		return;
 	}
 	write_sequnlock(&jiffies_lock);
+	if (calc_ve)
+		calc_load_ve();
 	update_wall_time();
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 774651652076..be6dbff71d48 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2151,8 +2151,11 @@ EXPORT_SYMBOL(hardpps);
  */
 void xtime_update(unsigned long ticks)
 {
+	bool calc_ve;
 	write_seqlock(&jiffies_lock);
-	do_timer(ticks);
+	calc_ve = do_timer(ticks);
 	write_sequnlock(&jiffies_lock);
+	if (calc_ve)
+		calc_load_ve();
 	update_wall_time();
 }



More information about the Devel mailing list