[Devel] [PATCH RH9 04/12] sched: Count rq::nr_sleeping and cfs_rq::nr_unint
Alexander Mikhalitsyn
alexander.mikhalitsyn at virtuozzo.com
Thu Sep 23 14:31:28 MSK 2021
From: Kirill Tkhai <ktkhai at virtuozzo.com>
Extracted from "Initial patch".
Note: it will be better to move nr_unint
to struct task_group in the future.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
Rebase to RHEL8.3 kernel-4.18.0-240.1.1.el8_3 notes:
khorenko@:
i've substituted task_contributes_to_load() with
tsk->sched_contributes_to_load check in sched_move_task()
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
See also
e6fe3f422 ("sched: Make multiple runqueue task counters 32-bit")
unsigned long -> unsigned int
https://jira.sw.ru/browse/PSBM-133986
task->state -> READ_ONCE(task->__state)
(cherry picked from commit 4833c2e3239ec8c0bfeebe375d54b5b767b8967e)
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
---
include/linux/sched.h | 2 +-
kernel/sched/core.c | 41 +++++++++++++++++++++++++++++++++++++++--
kernel/sched/sched.h | 3 +++
3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d67bb65bf77..31e9e41b9d9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -817,7 +817,7 @@ struct task_struct {
#ifdef CONFIG_PSI
unsigned sched_psi_wake_requeue:1;
#endif
-
+ unsigned sched_interruptible_sleep:1;
/* Force alignment to the next boundary: */
unsigned :0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bdff24034916..b7a1d5d09ade 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1224,6 +1224,18 @@ static void set_load_weight(struct task_struct *p, bool update_load)
}
}
+static inline void check_inc_sleeping(struct rq *rq, struct task_struct *t)
+{
+ if (READ_ONCE(t->__state) == TASK_INTERRUPTIBLE)
+ rq->nr_sleeping++;
+}
+
+static inline void check_dec_sleeping(struct rq *rq, struct task_struct *t)
+{
+ if (READ_ONCE(t->__state) == TASK_INTERRUPTIBLE)
+ rq->nr_sleeping--;
+}
+
#ifdef CONFIG_UCLAMP_TASK
/*
* Serializes updates of utilization clamp values
@@ -1237,6 +1249,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
*/
static DEFINE_MUTEX(uclamp_mutex);
+
/* Max allowed minimum utilization */
unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
@@ -1991,6 +2004,7 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
+ check_inc_sleeping(rq, p);
p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
dequeue_task(rq, p, flags);
@@ -3362,8 +3376,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
lockdep_assert_rq_held(rq);
- if (p->sched_contributes_to_load)
+ if (p->sched_contributes_to_load) {
rq->nr_uninterruptible--;
+ task_cfs_rq(p)->nr_unint--;
+ }
#ifdef CONFIG_SMP
if (wake_flags & WF_MIGRATED)
@@ -3375,6 +3391,9 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
atomic_dec(&task_rq(p)->nr_iowait);
}
+ if (p->sched_interruptible_sleep)
+ rq->nr_sleeping--;
+
activate_task(rq, p, en_flags);
ttwu_do_wakeup(rq, p, wake_flags, rf);
}
@@ -5963,9 +5982,13 @@ static void __sched notrace __schedule(bool preempt)
(prev_state & TASK_UNINTERRUPTIBLE) &&
!(prev_state & TASK_NOLOAD) &&
!(prev->flags & PF_FROZEN);
+ prev->sched_interruptible_sleep =
+ (prev_state == TASK_INTERRUPTIBLE);
- if (prev->sched_contributes_to_load)
+ if (prev->sched_contributes_to_load) {
rq->nr_uninterruptible++;
+ task_cfs_rq(prev)->nr_unint++;
+ }
/*
* __schedule() ttwu()
@@ -9522,6 +9545,13 @@ void sched_move_task(struct task_struct *tsk)
if (queued)
dequeue_task(rq, tsk, queue_flags);
+ else {
+ if (tsk->sched_contributes_to_load)
+ task_cfs_rq(tsk)->nr_unint--;
+
+ check_dec_sleeping(rq, tsk);
+ }
+
if (running)
put_prev_task(rq, tsk);
@@ -9529,6 +9559,13 @@ void sched_move_task(struct task_struct *tsk)
if (queued)
enqueue_task(rq, tsk, queue_flags);
+ else {
+ if (tsk->sched_contributes_to_load)
+ task_cfs_rq(tsk)->nr_unint++;
+
+ check_inc_sleeping(rq, tsk);
+ }
+
if (running) {
set_next_task(rq, tsk);
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ddefb0419d7a..649210b93e11 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -525,6 +525,8 @@ struct cfs_rq {
unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
+ unsigned int nr_unint;
+
u64 exec_clock;
u64 min_vruntime;
#ifdef CONFIG_SCHED_CORE
@@ -962,6 +964,7 @@ struct rq {
* it on another CPU. Always updated under the runqueue lock:
*/
unsigned int nr_uninterruptible;
+ unsigned int nr_sleeping;
struct task_struct __rcu *curr;
struct task_struct *idle;
--
2.31.1
More information about the Devel
mailing list