[Devel] [PATCH RHEL8 COMMIT] /proc/<pid>/vz_latency: Add scheduling stats

Konstantin Khorenko khorenko at virtuozzo.com
Mon May 24 20:14:43 MSK 2021


The commit is pushed to "branch-rh8-4.18.0-240.1.1.vz8.5.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-240.1.1.vz8.5.32
------>
commit df815b15987078ae598fd7cd80e63da8bcd717eb
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Mon May 24 20:04:10 2021 +0300

    /proc/<pid>/vz_latency: Add scheduling stats
    
    Add scheduling latencies to /proc/<pid>/vz_latency.
    They are the same as alloc latencies - total cumulative
    latency, number of schedule events, and latency maximum
    in the last 2 minutes.
    
    The sysctl kernel.sched_schedstats must be enabled to
    see these stats.
    
    https://jira.sw.ru/browse/PSBM-87797
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    Cc: Pavel Borzenkov <Pavel.Borzenkov at acronis.com>
    Reviewed-by: Denis V. Lunev <den at openvz.org>
    
    Rebase to vz8:
     - Add new argument to update_sched_lat. It was reworked during previous
       ports to have only time delta, which is unsufficient for
       update_maxlat.
    
    (cherry-picked from vz7 commit 0ed8df6d0d18 ("/proc/<pid>/vz_latency:
    Add scheduling stats"))
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
 fs/proc/base.c               | 18 ++++++++++++++++--
 include/linux/kstat.h        |  5 +++++
 include/linux/sched.h        |  2 +-
 include/linux/sched/signal.h |  2 +-
 kernel/exit.c                |  6 ++----
 kernel/sched/fair.c          | 13 +++++++++----
 6 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index cf8afe1efe02..4860efc2d509 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -538,13 +538,18 @@ static const char *alloc_descr[] = {
 	"allocatomic:",
 	"alloc:",
 	"allocmp:",
+	"scheduling:",
 };
 static const int alloc_types[] = {
 	KSTAT_ALLOCSTAT_ATOMIC,
 	KSTAT_ALLOCSTAT_LOW,
 	KSTAT_ALLOCSTAT_LOW_MP,
+	KSTAT_SCHED,
 };
 
+extern struct static_key sched_schedstats;
+# define schedstat_enabled()		static_key_false(&sched_schedstats)
+
 static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -553,9 +558,13 @@ static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 	seq_printf(m, "%-12s %20s %20s %20s\n",
 			"Type", "Total_lat", "Calls", "Max (2min)");
 
-	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+		if (alloc_types[i] == KSTAT_SCHED && !schedstat_enabled())
+			continue;
 		lastlat_seq_show(m, alloc_descr[i],
 				&task->alloc_lat[alloc_types[i]]);
+	}
+
 	return 0;
 }
 
@@ -595,15 +604,20 @@ static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 				maxlats[i] = maxlat;
 
 		}
+
 		unlock_task_sighand(task, &flags);
 	}
 
 	seq_printf(m, "%-12s %20s %20s %20s\n",
 			"Type", "Total_lat", "Calls", "Max (2min)");
 
-	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+		if (alloc_types[i] == KSTAT_SCHED && !schedstat_enabled())
+			continue;
+
 		seq_printf(m, "%-12s %20Lu %20Lu %20Lu\n", alloc_descr[i],
 			lat[i], count[i], maxlats[i]);
+	}
 
 	return 0;
 }
diff --git a/include/linux/kstat.h b/include/linux/kstat.h
index 4a4933767eae..97a0f9b70456 100644
--- a/include/linux/kstat.h
+++ b/include/linux/kstat.h
@@ -8,6 +8,8 @@ enum {
 	KSTAT_ALLOCSTAT_LOW_MP,
 	KSTAT_ALLOCSTAT_HIGH_MP,
 	KSTAT_ALLOCSTAT_NR,
+	KSTAT_SCHED = KSTAT_ALLOCSTAT_NR,
+	KSTAT_NR,
 };
 
 struct kstat_perf_snap_struct {
@@ -46,4 +48,7 @@ struct kstat_lat_pcpu_struct {
 	u64 avg[3];
 };
 
+void update_maxlat(struct kstat_lat_snap_struct *alloc_lat,
+				u64 lat, unsigned long time);
+
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 72b3f40623b4..f8a0e14ece41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1126,7 +1126,7 @@ struct task_struct {
 	struct tlbflush_unmap_batch	tlb_ubc;
 
 #ifdef CONFIG_VE
-	struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_snap_struct alloc_lat[KSTAT_NR];
 #endif
 
 	RH_KABI_REPLACE(struct rcu_head rcu, union {
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index adfe9cc9bd88..1f4a0c55a700 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -212,7 +212,7 @@ struct signal_struct {
 	struct taskstats *stats;
 #endif
 #ifdef CONFIG_VE
-	struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+	struct kstat_lat_snap_struct alloc_lat[KSTAT_NR];
 #endif
 #ifdef CONFIG_AUDIT
 	unsigned audit_tty;
diff --git a/kernel/exit.c b/kernel/exit.c
index d1050444f428..55c1e50b24d8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -723,16 +723,13 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-void update_maxlat(struct kstat_lat_snap_struct *alloc_lat,
-				u64 lat, unsigned long time);
-
 void kstat_add_dying(struct task_struct *tsk)
 {
 #ifdef CONFIG_VE
 	int i;
 
 	spin_lock_irq(&tsk->sighand->siglock);
-	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+	for (i = 0; i < KSTAT_NR; i++) {
 		int j;
 
 		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
@@ -746,6 +743,7 @@ void kstat_add_dying(struct task_struct *tsk)
 			}
 		}
 	}
+
 	spin_unlock_irq(&tsk->sighand->siglock);
 #endif
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8f22066ddd82..da2e976a6c12 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -947,11 +947,15 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	__schedstat_set(se->statistics.wait_start, wait_start);
 }
 
-static inline void update_sched_lat(struct task_struct *t, u64 delta)
+static inline void update_sched_lat(struct task_struct *t, u64 delta, u64 now)
 {
 #ifdef CONFIG_VE
 	KSTAT_LAT_PCPU_ADD(&kstat_glob.sched_lat, delta);
 	KSTAT_LAT_PCPU_ADD(&t->task_ve->sched_lat_ve, delta);
+
+	t->alloc_lat[KSTAT_SCHED].totlat += delta;
+	t->alloc_lat[KSTAT_SCHED].count++;
+	update_maxlat(&t->alloc_lat[KSTAT_SCHED], delta, now);
 #endif
 }
 
@@ -959,12 +963,13 @@ static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
-	u64 delta;
+	u64 delta, now;
 
 	if (!schedstat_enabled())
 		return;
 
-	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
+	now = rq_clock(rq_of(cfs_rq));
+	delta = now - schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -978,7 +983,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
-		update_sched_lat(p, delta);
+		update_sched_lat(p, delta, now);
 	}
 
 	__schedstat_set(se->statistics.wait_max,


More information about the Devel mailing list