[Devel] [PATCH RHEL7 COMMIT] sched: Call calc_load_ve() out of jiffies_lock
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jul 19 15:56:57 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-862.6.3.vz7.62.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.6.3.vz7.62.4
------>
commit e8d11b283ec87d45ea0101430ab226a6f7c34e19
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Thu Jul 19 15:56:57 2018 +0300
sched: Call calc_load_ve() out of jiffies_lock
jiffies_lock is a big global seqlock, which is used in many
places. In combination with another actions like smp call
functions and readers of this seqlock, system may hang for
a long time. There is already a pair of hard lockups because
of long iteration in calc_load_ve() with jiffies_lock held,
which made readers of this seqlock to spin long time.
This patch makes calc_load_ve() to use separate lock,
and this relaxes jiffies_lock. I think, this should be enough
to resolve the problem, since both the crashes I saw contains
readers of the seqlock on parallel cpus, and we won't have
to relax further (say, moving calc_load_ve() to softirq).
Note, that the principal change of this patch makes is
jiffies_lock readers on parallel cpus won't wait till calc_load_ve()
finishes, so instead of (n_readers + 1) cpus waiting till
this function completes, there will be only 1 cpu doing that.
https://jira.sw.ru/browse/PSBM-84967
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
=========================
Patchset description:
Make calc_load_ve() be executed out of jiffies_lock
https://jira.sw.ru/browse/PSBM-84967
Kirill Tkhai (3):
sched: Make calc_global_load() return true when it's need to update ve statistic
sched: Export calc_load_ve()
sched: Call calc_load_ve() out of jiffies_lock
---
kernel/sched/core.c | 13 ++++++++-----
kernel/time/tick-common.c | 8 +++++++-
kernel/time/tick-sched.c | 5 ++++-
kernel/time/timekeeping.c | 5 ++++-
4 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 693823a1bd36..26f4959f9ab4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2885,10 +2885,16 @@ static LIST_HEAD(ve_root_list);
void calc_load_ve(void)
{
+ static DEFINE_SPINLOCK(load_ve_lock);
unsigned long nr_unint, nr_active;
struct task_group *tg;
int i;
+ /*
+ * This is called without jiffies_lock, and here we protect
+ * against very rare parallel execution on two or more cpus.
+ */
+ spin_lock(&load_ve_lock);
rcu_read_lock();
list_for_each_entry_rcu(tg, &ve_root_list, ve_root_list) {
nr_active = 0;
@@ -2913,16 +2919,13 @@ void calc_load_ve(void)
rcu_read_unlock();
nr_unint = nr_uninterruptible() * FIXED_1;
- /*
- * This is called from do_timer() only, which can't be excuted
- * in parallel on two or more cpus. So, we have to protect
- * the below modifications from readers only.
- */
+
write_seqcount_begin(&kstat_glob.nr_unint_avg_seq);
CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
write_seqcount_end(&kstat_glob.nr_unint_avg_seq);
+ spin_unlock(&load_ve_lock);
}
#endif /* CONFIG_VE */
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ed88d128c5ce..35462b2d236d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,13 +63,19 @@ int tick_is_oneshot_available(void)
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
+ bool calc_ve;
+
write_seqlock(&jiffies_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
- do_timer(1);
+ calc_ve = do_timer(1);
write_sequnlock(&jiffies_lock);
+
+ if (calc_ve)
+ calc_load_ve();
+
update_wall_time();
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 85c7fe06eace..baba7c990290 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -52,6 +52,7 @@ struct tick_sched *tick_get_tick_sched(int cpu)
static void tick_do_update_jiffies64(ktime_t now)
{
unsigned long ticks = 0;
+ bool calc_ve = false;
ktime_t delta;
/*
@@ -80,7 +81,7 @@ static void tick_do_update_jiffies64(ktime_t now)
last_jiffies_update = ktime_add_ns(last_jiffies_update,
incr * ticks);
}
- do_timer(++ticks);
+ calc_ve = do_timer(++ticks);
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
@@ -89,6 +90,8 @@ static void tick_do_update_jiffies64(ktime_t now)
return;
}
write_sequnlock(&jiffies_lock);
+ if (calc_ve)
+ calc_load_ve();
update_wall_time();
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 774651652076..be6dbff71d48 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2151,8 +2151,11 @@ EXPORT_SYMBOL(hardpps);
*/
void xtime_update(unsigned long ticks)
{
+ bool calc_ve;
write_seqlock(&jiffies_lock);
- do_timer(ticks);
+ calc_ve = do_timer(ticks);
write_sequnlock(&jiffies_lock);
+ if (calc_ve)
+ calc_load_ve();
update_wall_time();
}
More information about the Devel
mailing list