[Devel] [PATCH RH9 04/12] sched: Count rq::nr_sleeping and cfs_rq::nr_unint

Alexander Mikhalitsyn alexander.mikhalitsyn at virtuozzo.com
Thu Sep 23 14:31:28 MSK 2021


From: Kirill Tkhai <ktkhai at virtuozzo.com>

Extracted from "Initial patch".

Note: it will be better to move nr_unint
      to struct task_group in the future.

Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>

Rebase to RHEL8.3 kernel-4.18.0-240.1.1.el8_3 notes:
khorenko@:
i've substituted task_contributes_to_load() with
tsk->sched_contributes_to_load check in sched_move_task()

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>

See also
e6fe3f422 ("sched: Make multiple runqueue task counters 32-bit")

unsigned long -> unsigned int

https://jira.sw.ru/browse/PSBM-133986

task->state -> READ_ONCE(task->__state)

(cherry picked from commit 4833c2e3239ec8c0bfeebe375d54b5b767b8967e)
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
---
 include/linux/sched.h |  2 +-
 kernel/sched/core.c   | 41 +++++++++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h  |  3 +++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d67bb65bf77..31e9e41b9d9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -817,7 +817,7 @@ struct task_struct {
 #ifdef CONFIG_PSI
 	unsigned			sched_psi_wake_requeue:1;
 #endif
-
+	unsigned			sched_interruptible_sleep:1;
 	/* Force alignment to the next boundary: */
 	unsigned			:0;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bdff24034916..b7a1d5d09ade 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1224,6 +1224,18 @@ static void set_load_weight(struct task_struct *p, bool update_load)
 	}
 }
 
+static inline void check_inc_sleeping(struct rq *rq, struct task_struct *t)
+{
+	if (READ_ONCE(t->__state) == TASK_INTERRUPTIBLE)
+		rq->nr_sleeping++;
+}
+
+static inline void check_dec_sleeping(struct rq *rq, struct task_struct *t)
+{
+	if (READ_ONCE(t->__state) == TASK_INTERRUPTIBLE)
+		rq->nr_sleeping--;
+}
+
 #ifdef CONFIG_UCLAMP_TASK
 /*
  * Serializes updates of utilization clamp values
@@ -1237,6 +1249,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
  */
 static DEFINE_MUTEX(uclamp_mutex);
 
+
 /* Max allowed minimum utilization */
 unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
 
@@ -1991,6 +2004,7 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
 
 void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 {
+	check_inc_sleeping(rq, p);
 	p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
 
 	dequeue_task(rq, p, flags);
@@ -3362,8 +3376,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 
 	lockdep_assert_rq_held(rq);
 
-	if (p->sched_contributes_to_load)
+	if (p->sched_contributes_to_load) {
 		rq->nr_uninterruptible--;
+		task_cfs_rq(p)->nr_unint--;
+	}
 
 #ifdef CONFIG_SMP
 	if (wake_flags & WF_MIGRATED)
@@ -3375,6 +3391,9 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 		atomic_dec(&task_rq(p)->nr_iowait);
 	}
 
+	if (p->sched_interruptible_sleep)
+		rq->nr_sleeping--;
+
 	activate_task(rq, p, en_flags);
 	ttwu_do_wakeup(rq, p, wake_flags, rf);
 }
@@ -5963,9 +5982,13 @@ static void __sched notrace __schedule(bool preempt)
 				(prev_state & TASK_UNINTERRUPTIBLE) &&
 				!(prev_state & TASK_NOLOAD) &&
 				!(prev->flags & PF_FROZEN);
+			prev->sched_interruptible_sleep =
+				(prev_state == TASK_INTERRUPTIBLE);
 
-			if (prev->sched_contributes_to_load)
+			if (prev->sched_contributes_to_load) {
 				rq->nr_uninterruptible++;
+				task_cfs_rq(prev)->nr_unint++;
+			}
 
 			/*
 			 * __schedule()			ttwu()
@@ -9522,6 +9545,13 @@ void sched_move_task(struct task_struct *tsk)
 
 	if (queued)
 		dequeue_task(rq, tsk, queue_flags);
+	else {
+		if (tsk->sched_contributes_to_load)
+			task_cfs_rq(tsk)->nr_unint--;
+
+		check_dec_sleeping(rq, tsk);
+	}
+
 	if (running)
 		put_prev_task(rq, tsk);
 
@@ -9529,6 +9559,13 @@ void sched_move_task(struct task_struct *tsk)
 
 	if (queued)
 		enqueue_task(rq, tsk, queue_flags);
+	else {
+		if (tsk->sched_contributes_to_load)
+			task_cfs_rq(tsk)->nr_unint++;
+
+		check_inc_sleeping(rq, tsk);
+	}
+
 	if (running) {
 		set_next_task(rq, tsk);
 		/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ddefb0419d7a..649210b93e11 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -525,6 +525,8 @@ struct cfs_rq {
 	unsigned int		h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
 
+	unsigned int nr_unint;
+
 	u64			exec_clock;
 	u64			min_vruntime;
 #ifdef CONFIG_SCHED_CORE
@@ -962,6 +964,7 @@ struct rq {
 	 * it on another CPU. Always updated under the runqueue lock:
 	 */
 	unsigned int		nr_uninterruptible;
+	unsigned int		nr_sleeping;
 
 	struct task_struct __rcu	*curr;
 	struct task_struct	*idle;
-- 
2.31.1



More information about the Devel mailing list