[Devel] [PATCH vz9 04/20] /proc/<pid>/vz_latency: Add scheduling stats

Nikita Yushchenko nikita.yushchenko at virtuozzo.com
Wed Oct 13 18:26:15 MSK 2021


From: Andrey Ryabinin <aryabinin at virtuozzo.com>

Add scheduling latencies to /proc/<pid>/vz_latency.
They are the same as alloc latencies - total cumulative
latency, number of schedule events, and latency maximum
in the last 2 minutes.

The sysctl kernel.sched_schedstats must be enabled to
see these stats.

https://jira.sw.ru/browse/PSBM-87797

Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Cc: Pavel Borzenkov <Pavel.Borzenkov at acronis.com>
Reviewed-by: Denis V. Lunev <den at openvz.org>

Rebase to vz8:
 - Add new argument to update_sched_lat. It was reworked during previous
   ports to have only time delta, which is unsufficient for
   update_maxlat.

(cherry-picked from vz7 commit 0ed8df6d0d18 ("/proc/<pid>/vz_latency:
Add scheduling stats"))

Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>

(cherry-picked from vz8 commit f11900f53ba3 ("/proc/<pid>/vz_latency:
Add scheduling stats"))

Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 fs/proc/base.c               | 18 ++++++++++++++++--
 include/linux/kstat.h        |  5 +++++
 include/linux/sched.h        |  2 +-
 include/linux/sched/signal.h |  2 +-
 kernel/exit.c                |  6 ++----
 kernel/sched/fair.c          | 13 +++++++++----
 6 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5a98e4bdd5d3..b1609703b90a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -558,13 +558,18 @@ static const char *alloc_descr[] = {
 	"allocatomic:",
 	"alloc:",
 	"allocmp:",
+	"scheduling:",
 };
 static const int alloc_types[] = {
 	KSTAT_ALLOCSTAT_ATOMIC,
 	KSTAT_ALLOCSTAT_LOW,
 	KSTAT_ALLOCSTAT_LOW_MP,
+	KSTAT_SCHED,
 };
 
+extern struct static_key sched_schedstats;
+# define schedstat_enabled()		static_key_false(&sched_schedstats)
+
 static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -573,9 +578,13 @@ static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 	seq_printf(m, "%-12s %20s %20s %20s\n",
 			"Type", "Total_lat", "Calls", "Max (2min)");
 
-	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+		if (alloc_types[i] == KSTAT_SCHED && !schedstat_enabled())
+			continue;
 		lastlat_seq_show(m, alloc_descr[i],
 				&task->alloc_lat[alloc_types[i]]);
+	}
+
 	return 0;
 }
 
@@ -615,15 +624,20 @@ static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 				maxlats[i] = maxlat;
 
 		}
+
 		unlock_task_sighand(task, &flags);
 	}
 
 	seq_printf(m, "%-12s %20s %20s %20s\n",
 			"Type", "Total_lat", "Calls", "Max (2min)");
 
-	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+		if (alloc_types[i] == KSTAT_SCHED && !schedstat_enabled())
+			continue;
+
 		seq_printf(m, "%-12s %20Lu %20Lu %20Lu\n", alloc_descr[i],
 			lat[i], count[i], maxlats[i]);
+	}
 
 	return 0;
 }
diff --git a/include/linux/kstat.h b/include/linux/kstat.h
index c25de162a00b..e33f70fbed42 100644
--- a/include/linux/kstat.h
+++ b/include/linux/kstat.h
@@ -17,6 +17,8 @@ enum {
 	KSTAT_ALLOCSTAT_LOW_MP,
 	KSTAT_ALLOCSTAT_HIGH_MP,
 	KSTAT_ALLOCSTAT_NR,
+	KSTAT_SCHED = KSTAT_ALLOCSTAT_NR,
+	KSTAT_NR,
 };
 
 struct kstat_perf_snap_struct {
@@ -55,4 +57,7 @@ struct kstat_lat_pcpu_struct {
 	u64 avg[3];
 };
 
+void update_maxlat(struct kstat_lat_snap_struct *alloc_lat,
+				u64 lat, unsigned long time);
+
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e0a562d76aa1..81ae96f909a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1244,7 +1244,7 @@ struct task_struct {
 	struct tlbflush_unmap_batch	tlb_ubc;
 
 #ifdef CONFIG_VE
-	struct kstat_lat_snap_struct	alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_snap_struct	alloc_lat[KSTAT_NR];
 #endif
 
 	union {
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b48fba3fc9dd..9e5b8add41d1 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -209,7 +209,7 @@ struct signal_struct {
 	struct taskstats *stats;
 #endif
 #ifdef CONFIG_VE
-	struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_snap_struct alloc_lat[KSTAT_NR];
 #endif
 #ifdef CONFIG_AUDIT
 	unsigned audit_tty;
diff --git a/kernel/exit.c b/kernel/exit.c
index 67a26b302e61..1185fc9027af 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -750,16 +750,13 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-void update_maxlat(struct kstat_lat_snap_struct *alloc_lat,
-				u64 lat, unsigned long time);
-
 void kstat_add_dying(struct task_struct *tsk)
 {
 #ifdef CONFIG_VE
 	int i;
 
 	spin_lock_irq(&tsk->sighand->siglock);
-	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+	for (i = 0; i < KSTAT_NR; i++) {
 		int j;
 
 		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
@@ -773,6 +770,7 @@ void kstat_add_dying(struct task_struct *tsk)
 			}
 		}
 	}
+
 	spin_unlock_irq(&tsk->sighand->siglock);
 #endif
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index db7eb593e9db..2afe9f705aa6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -937,11 +937,15 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	__schedstat_set(se->statistics.wait_start, wait_start);
 }
 
-static inline void update_sched_lat(struct task_struct *t, u64 delta)
+static inline void update_sched_lat(struct task_struct *t, u64 delta, u64 now)
 {
 #ifdef CONFIG_VE
 	KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat, delta);
 	KSTAT_LAT_PCPU_ADD(&t->task_ve->sched_lat_ve, delta);
+
+	t->alloc_lat[KSTAT_SCHED].totlat += delta;
+	t->alloc_lat[KSTAT_SCHED].count++;
+	update_maxlat(&t->alloc_lat[KSTAT_SCHED], delta, now);
 #endif
 }
 
@@ -949,7 +953,7 @@ static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
-	u64 delta;
+	u64 delta, now;
 
 	if (!schedstat_enabled())
 		return;
@@ -963,7 +967,8 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	if (unlikely(!schedstat_val(se->statistics.wait_start)))
 		return;
 
-	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
+	now = rq_clock(rq_of(cfs_rq));
+	delta = now - schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -977,7 +982,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
-		update_sched_lat(p, delta);
+		update_sched_lat(p, delta, now);
 	}
 
 	__schedstat_set(se->statistics.wait_max,
-- 
2.30.2



More information about the Devel mailing list