[Devel] [PATCH RHEL7 COMMIT] Revert "ve/sched: port boosting hacks against prio inversion"
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jan 17 12:34:48 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1062.7.1.vz7.130.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.7.1.vz7.130.10
------>
commit 28a9251d7129c280d500734f9303e5af18c6c4ae
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date: Fri Jan 17 12:34:45 2020 +0300
Revert "ve/sched: port boosting hacks against prio inversion"
This reverts commit 6a052cbf8aefba3deaba6cf93072c0810f38de92.
Boosting hacks doesn't work well with the optimized pick_next_task_fair()
from the commit 678d5718d8d0 ("sched/fair: Optimize cgroup
pick_next_task_fair()").
And actually causes performance degradation, so lets remove it.
https://jira.sw.ru/browse/PSBM-100188
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>
---
include/linux/sched.h | 6 --
kernel/sched/core.c | 14 +---
kernel/sched/fair.c | 186 ++----------------------------------------------
kernel/sched/features.h | 3 -
kernel/sched/sched.h | 6 +-
5 files changed, 6 insertions(+), 209 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 07f9954483b80..f0d976c3800a1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1258,11 +1258,6 @@ struct sched_entity {
struct list_head cfs_rq_node;
unsigned int on_rq;
-#ifdef CONFIG_CFS_BANDWIDTH
- unsigned int boosted;
- struct list_head boost_node;
-#endif
-
u64 exec_start;
u64 sum_exec_runtime;
u64 vruntime;
@@ -1496,7 +1491,6 @@ struct task_struct {
unsigned sched_contributes_to_load:1;
RH_KABI_FILL_HOLE(unsigned sched_remote_wakeup:1)
unsigned sched_interruptible_sleep:1;
- unsigned woken_while_running:1;
unsigned sched_iothrottled_sleep:1;
unsigned :0; /* force alignment to the next boundary */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f7f76cbe6f271..f264c209b1e77 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1776,7 +1776,6 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p);
if (task_on_rq_queued(p)) {
ttwu_do_wakeup(rq, p, wake_flags);
- p->woken_while_running = 1;
ret = 1;
}
__task_rq_unlock(rq);
@@ -2102,10 +2101,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
-#ifdef CONFIG_CFS_BANDWIDTH
- p->se.boosted = 0;
-#endif
-
#ifdef CONFIG_SCHEDSTATS
/* Even if schedstat is disabled, there should not be garbage */
p->se.statistics = &p->statistics;
@@ -3789,7 +3784,6 @@ static void __sched __schedule(void)
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
- int resched_next;
int cpu;
need_resched:
@@ -3880,14 +3874,8 @@ static void __sched __schedule(void)
balance_callback(rq);
- resched_next = READ_ONCE(rq->resched_next);
- if (resched_next) {
- set_tsk_need_resched(current);
- rq->resched_next = 0;
- }
-
sched_preempt_enable_no_resched();
- if (!resched_next && need_resched())
+ if (need_resched())
goto need_resched;
}
STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0e7ab2050af04..0d874caab7bd0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -454,16 +454,6 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
return cfs_bandwidth_used() && cfs_rq->throttled;
}
-static inline int cfs_rq_has_boosted_entities(struct cfs_rq *cfs_rq)
-{
- return !list_empty(&cfs_rq->boosted_entities);
-}
-
-static inline int entity_boosted(struct sched_entity *se)
-{
- return se->boosted;
-}
-
#else /* !CONFIG_CFS_BANDWIDTH */
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
@@ -471,16 +461,6 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
return 0;
}
-static inline int cfs_rq_has_boosted_entities(struct cfs_rq *cfs_rq)
-{
- return 0;
-}
-
-static inline int entity_boosted(struct sched_entity *se)
-{
- return 0;
-}
-
#endif /* CONFIG_CFS_BANDWIDTH */
#ifdef CONFIG_CFS_CPULIMIT
@@ -994,100 +974,6 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->exec_start = rq_clock_task(rq_of(cfs_rq));
}
-#ifdef CONFIG_CFS_BANDWIDTH
-static inline void update_entity_boost(struct sched_entity *se)
-{
- if (!entity_is_task(se))
- se->boosted = cfs_rq_has_boosted_entities(group_cfs_rq(se));
- else {
- struct task_struct *p = task_of(se);
-
- if (unlikely(p != current))
- return;
-
- if (!(preempt_count() & PREEMPT_ACTIVE)) {
- se->boosted = sched_feat(BOOST_WAKEUPS) &&
- p->woken_while_running;
- p->woken_while_running = 0;
- } else
- se->boosted = sched_feat(BOOST_PREEMPT) &&
- !p->may_throttle;
- }
-}
-
-static int check_enqueue_boost(struct rq *rq, struct task_struct *p, int flags)
-{
- if (sched_feat(BOOST_WAKEUPS) && (flags & ENQUEUE_WAKEUP))
- p->se.boosted = 1;
- return p->se.boosted;
-}
-
-static inline void __enqueue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se)
-{
- list_add(&se->boost_node, &cfs_rq->boosted_entities);
-}
-
-static inline void __dequeue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se)
-{
- list_del(&se->boost_node);
-}
-
-static int enqueue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se)
-{
- if (entity_is_task(se) || !entity_boosted(se)) {
- if (se != cfs_rq->curr)
- __enqueue_boosted_entity(cfs_rq, se);
- se->boosted = 1;
- return 1;
- }
-
- return 0;
-}
-
-static int dequeue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se)
-{
- if (entity_is_task(se) ||
- !cfs_rq_has_boosted_entities(group_cfs_rq(se))) {
- if (se != cfs_rq->curr)
- __dequeue_boosted_entity(cfs_rq, se);
- if (!entity_is_task(se))
- se->boosted = 0;
- return 1;
- }
-
- return 0;
-}
-#else
-static inline void update_entity_boost(struct sched_entity *se) {}
-
-static inline int check_enqueue_boost(struct rq *rq,
- struct task_struct *p, int flags)
-{
- return 0;
-}
-
-static inline void __enqueue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se) {}
-static inline void __dequeue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se) {}
-
-static inline int enqueue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se)
-{
- return 0;
-}
-
-static inline int dequeue_boosted_entity(struct cfs_rq *cfs_rq,
- struct sched_entity *se)
-{
- return 0;
-}
-#endif
-
/**************************************************
* Scheduling class queueing methods:
*/
@@ -3105,7 +2991,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
se->vruntime = max_vruntime(se->vruntime, vruntime);
}
-static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags);
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
static inline void check_schedstat_required(void)
{
@@ -3206,7 +3092,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (cfs_rq->nr_running == 1) {
list_add_leaf_cfs_rq(cfs_rq);
- check_enqueue_throttle(cfs_rq, flags);
+ check_enqueue_throttle(cfs_rq);
}
}
@@ -3361,8 +3247,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (schedstat_enabled())
update_stats_wait_end(cfs_rq, se);
__dequeue_entity(cfs_rq, se);
- if (entity_boosted(se))
- __dequeue_boosted_entity(cfs_rq, se);
}
update_stats_curr_start(cfs_rq, se);
@@ -3439,20 +3323,6 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
se = cfs_rq->next;
-#ifdef CONFIG_CFS_BANDWIDTH
- /*
- * Give boosted tasks a chance to finish their kernel-mode execution in
- * order to avoid prio inversion in case they hold a lock, but resched
- * them asap for the sake of fairness.
- */
- if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0) {
- if (cfs_rq_has_boosted_entities(cfs_rq))
- se = list_first_entry(&cfs_rq->boosted_entities,
- struct sched_entity, boost_node);
- rq_of(cfs_rq)->resched_next = 1;
- }
-#endif
-
clear_buddies(cfs_rq, se);
return se;
@@ -3469,14 +3339,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
if (prev->on_rq)
update_curr(cfs_rq);
- update_entity_boost(prev);
- if (entity_boosted(prev) && prev->on_rq) {
- __enqueue_boosted_entity(cfs_rq, prev);
- if (unlikely(cfs_rq_throttled(cfs_rq)))
- /* prev was moved to throttled cfs_rq */
- unthrottle_cfs_rq(cfs_rq);
- }
-
/* throttle cfs_rqs exceeding runtime */
check_cfs_rq_runtime(cfs_rq);
@@ -4083,7 +3945,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
* expired/exceeded, otherwise it may be allowed to steal additional ticks of
* runtime as update_curr() throttling can not not trigger until it's on-rq.
*/
-static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags)
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
{
if (!cfs_bandwidth_used())
return;
@@ -4108,9 +3970,6 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags)
}
}
- if (flags & ENQUEUE_BOOST)
- return;
-
/* an active group must be handled by the update_curr()->put() path */
if (!cfs_rq->runtime_enabled || cfs_rq->curr)
return;
@@ -4158,9 +4017,6 @@ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
if (cfs_rq_throttled(cfs_rq))
return true;
- if (cfs_rq_has_boosted_entities(cfs_rq))
- return false;
-
throttle_cfs_rq(cfs_rq);
return true;
}
@@ -4247,7 +4103,6 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
cfs_rq->runtime_enabled = 0;
INIT_LIST_HEAD(&cfs_rq->throttled_list);
- INIT_LIST_HEAD(&cfs_rq->boosted_entities);
#ifdef CONFIG_CFS_CPULIMIT
hrtimer_init(&cfs_rq->active_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_rq->active_timer.function = sched_cfs_active_timer;
@@ -4310,7 +4165,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
-static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags) {}
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
static inline void sync_throttle(struct task_group *tg, int cpu) {}
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -4402,14 +4257,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
- int boost = check_enqueue_boost(rq, p, flags);
for_each_sched_entity(se) {
if (se->on_rq)
break;
cfs_rq = cfs_rq_of(se);
- if (boost)
- flags |= ENQUEUE_BOOST;
enqueue_entity(cfs_rq, se, flags);
/*
@@ -4422,9 +4274,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
break;
cfs_rq->h_nr_running++;
- if (boost)
- boost = enqueue_boosted_entity(cfs_rq, se);
-
flags = ENQUEUE_WAKEUP;
}
@@ -4435,9 +4284,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
- if (boost)
- boost = enqueue_boosted_entity(cfs_rq, se);
-
update_cfs_shares(cfs_rq);
update_entity_load_avg(se, 1);
}
@@ -4445,14 +4291,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!se) {
update_rq_runnable_avg(rq, rq->nr_running);
inc_nr_running(rq);
- } else if (boost) {
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
- if (!enqueue_boosted_entity(cfs_rq, se))
- break;
- if (cfs_rq_throttled(cfs_rq))
- unthrottle_cfs_rq(cfs_rq);
- }
}
hrtick_update(rq);
}
@@ -4468,7 +4306,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
- int boosted = entity_boosted(se);
int task_sleep = flags & DEQUEUE_SLEEP;
if (task_sleep)
@@ -4488,9 +4325,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
break;
cfs_rq->h_nr_running--;
- if (boosted)
- boosted = dequeue_boosted_entity(cfs_rq, se);
-
/* Don't dequeue parent if it has other entities besides us */
if (cfs_rq->load.weight) {
/* Avoid re-evaluating load for this entity: */
@@ -4513,9 +4347,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
- if (boosted)
- boosted = dequeue_boosted_entity(cfs_rq, se);
-
update_cfs_shares(cfs_rq);
update_entity_load_avg(se, 1);
}
@@ -5493,15 +5324,6 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev)
if (hrtick_enabled(rq))
hrtick_start_fair(rq, p);
- if (rq->resched_next && !entity_boosted(&p->se)) {
- /*
- * seems boosted tasks have gone from the throttled cfs_rq,
- * pick another task then
- */
- resched_curr(rq);
- rq->resched_next = 0;
- }
-
return p;
idle:
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5694d74b7bcf9..57497b6d0dfb5 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -82,6 +82,3 @@ SCHED_FEAT(NUMA_FAVOUR_HIGHER, true)
*/
SCHED_FEAT(NUMA_RESIST_LOWER, false)
#endif
-
-SCHED_FEAT(BOOST_WAKEUPS, true)
-SCHED_FEAT(BOOST_PREEMPT, true)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e8d099b7cb284..86181a147b941 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -477,8 +477,6 @@ struct cfs_rq {
u64 throttled_clock_task_time;
int throttled, throttle_count, throttle_uptodate;
struct list_head throttled_list;
-
- struct list_head boosted_entities;
#endif /* CONFIG_CFS_BANDWIDTH */
#ifdef CONFIG_SMP
RH_KABI_EXTEND(u64 last_h_load_update)
@@ -678,8 +676,7 @@ struct rq {
#ifdef CONFIG_NO_HZ_FULL
unsigned long last_sched_tick;
#endif
- signed char skip_clock_update;
- unsigned char resched_next;
+ int skip_clock_update;
/* capture load from *all* tasks on this cpu: */
struct load_weight load;
@@ -1313,7 +1310,6 @@ static const u32 prio_to_wmult[40] = {
#endif
#define ENQUEUE_HEAD 0x08
#define ENQUEUE_REPLENISH 0x10
-#define ENQUEUE_BOOST 0x20
#define DEQUEUE_SLEEP 0x01
#define DEQUEUE_SAVE 0x02
More information about the Devel
mailing list