[Devel] [PATCH RHEL8 COMMIT] sched/stat: account ctxsw per task group
Konstantin Khorenko
khorenko at virtuozzo.com
Tue Nov 3 16:09:59 MSK 2020
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.15
------>
commit b8f0da164311bd1acf6b78e3cbaa73ab8c2875f3
Author: Vladimir Davydov <vdavydov.dev at gmail.com>
Date: Thu Mar 14 21:00:44 2013 +0400
sched/stat: account ctxsw per task group
This is a backport of diff-sched-account-ctxsw-per-task-group:
Subject: sched: account ctxsw per task group
Date: Fri, 28 Dec 2012 15:09:45 +0400
* [sched] the number of context switches should be reported correctly
inside a CT in /proc/stat (PSBM-18113)
For /proc/stat:ctxt to be correct inside containers.
https://jira.sw.ru/browse/PSBM-18113
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
(cherry picked from vz7 commit d388f0bf64adb74cd62c4deff58e181bd63d62ac)
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
kernel/sched/cpuacct.c | 4 +++-
kernel/sched/fair.c | 14 ++++++++++++--
kernel/sched/sched.h | 3 +++
3 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index aafaee1f0722..8756560d0b4f 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -647,6 +647,7 @@ int cpu_cgroup_proc_stat(struct cgroup_subsys_state *cpu_css,
struct kernel_cpustat *kcpustat;
unsigned long tg_nr_running = 0;
unsigned long tg_nr_iowait = 0;
+ unsigned long long tg_nr_switches = 0;
getboottime64(&boottime);
@@ -665,6 +666,7 @@ int cpu_cgroup_proc_stat(struct cgroup_subsys_state *cpu_css,
#ifdef CONFIG_FAIR_GROUP_SCHED
tg_nr_running += tg->cfs_rq[i]->h_nr_running;
tg_nr_iowait += tg->cfs_rq[i]->nr_iowait;
+ tg_nr_switches += tg->cfs_rq[i]->nr_switches;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
tg_nr_running += tg->rt_rq[i]->rt_nr_running;
@@ -738,7 +740,7 @@ int cpu_cgroup_proc_stat(struct cgroup_subsys_state *cpu_css,
"processes %lu\n"
"procs_running %lu\n"
"procs_blocked %lu\n",
- nr_context_switches(),
+ tg_nr_switches,
(unsigned long long)boot_sec,
total_forks,
tg_nr_running,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6546d8511417..0b9bb108625a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4153,6 +4153,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
clear_buddies(cfs_rq, se);
+ if (cfs_rq->prev == se)
+ cfs_rq->prev = NULL;
+
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
se->on_rq = 0;
@@ -4167,8 +4170,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (!(flags & DEQUEUE_SLEEP))
se->vruntime -= cfs_rq->min_vruntime;
- /* return excess runtime on last dequeue */
- return_cfs_rq_runtime(cfs_rq);
+ if (!cfs_rq->nr_running) {
+ /* return excess runtime on last dequeue */
+ return_cfs_rq_runtime(cfs_rq);
+ /* account switch to idle task */
+ cfs_rq->nr_switches++;
+ }
update_cfs_group(se);
@@ -4242,6 +4249,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
update_stats_curr_start(cfs_rq, se);
cfs_rq->curr = se;
+ if (cfs_rq->prev != se)
+ cfs_rq->nr_switches++;
/*
* Track our maximum slice length, if the CPU's load is at
@@ -4341,6 +4350,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
__enqueue_entity(cfs_rq, prev);
/* in !on_rq case, update occurred at dequeue */
update_load_avg(cfs_rq, prev, 0);
+ cfs_rq->prev = prev;
}
cfs_rq->curr = NULL;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d8331e5b4c4f..3d55b45f1ea6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -542,6 +542,9 @@ struct cfs_rq {
struct sched_entity *next;
struct sched_entity *last;
struct sched_entity *skip;
+ struct sched_entity *prev;
+
+ u64 nr_switches;
#ifdef CONFIG_SCHED_DEBUG
unsigned int nr_spread_over;
More information about the Devel
mailing list