[Devel] [PATCH RHEL9 COMMIT] /proc/<pid>/vz_latency: Show maximal allocation latency in the last 2min.

Konstantin Khorenko khorenko at virtuozzo.com
Wed Oct 20 11:39:26 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-4.vz9.10.12
------>
commit c7a66cc5308b7d85669b63d09e86d500bac8d506
Author: Andrey Ryabinin <ryabinin.a.a at gmail.com>
Date:   Wed Oct 20 11:39:25 2021 +0300

    /proc/<pid>/vz_latency: Show maximal allocation latency in the last 2min.
    
    Add to '/proc/<pid>/vz_latency' column with maximal latency task have seen
    in the last 2 minutes.
    
    E.g.:
    
    cat /proc/1/vz_latency
    Type                    Total_lat                Calls           Max (2min)
    allocatomic:                    0                  294                    0
    alloc:                    3000000                43394                    0
    allocmp:                        0                 1018                    0
    
    AFAICS this changes output format but shouldn't break our the only user of
    this interface - pstorage. Accordind to the pstorage code it reads this
    file line by line, reads 'Total_lat' and 'Calls' fields and skips to the next
    line. Thus adding new field shouldn't break it.
    
    https://jira.sw.ru/browse/PSBM-87797
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    Cc: Pavel Borzenkov <Pavel.Borzenkov at acronis.com>
    Reviewed-by: Denis V. Lunev <den at openvz.org>
    
    (cherry-picked from vz7 commit 1914c29eb875 ("/proc/<pid>/vz_latency: Show
    maximal allocation latency in the last second."))
    
    Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
    
    +++
    proc/vestat: show correct maxlat in /proc/vz/vestat
    
    Don't show pointer as a latency value, it does not look valid.
    
    https://jira.sw.ru/browse/PSBM-93675
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    
    Added to VZ8 in the scope of https://jira.sw.ru/browse/PSBM-127844.
    Signed-off-by: Evgenii Shatokhin <eshatokhin at virtuozzo.com>
    
    (cherry-picked from vz8 commit 9fd73423306f ("/proc/<pid>/vz_latency: Show
    maximal allocation latency in the last second."))
    
    Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 fs/proc/base.c          | 28 ++++++++++++++++++++--------
 include/linux/kstat.h   |  3 ++-
 include/linux/vzstat.h  | 15 +++++++++++++++
 kernel/exit.c           | 13 +++++++++++++
 kernel/ve/vecalls.c     |  2 +-
 kernel/ve/vzstat.c      |  2 +-
 kernel/ve/vzstat_core.c |  6 +++---
 mm/page_alloc.c         | 35 +++++++++++++++++++++++++++++++++++
 8 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f395ec139587..5a98e4bdd5d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -82,6 +82,7 @@
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
+#include <linux/vzstat.h>
 #include <linux/oom.h>
 #include <linux/elf.h>
 #include <linux/pid_namespace.h>
@@ -550,8 +551,8 @@ static void lastlat_seq_show(struct seq_file *m,
 		const char *name,
 		struct kstat_lat_snap_struct *snap)
 {
-	seq_printf(m, "%-12s %20Lu %20lu\n", name,
-			snap->totlat, snap->count);
+	seq_printf(m, "%-12s %20Lu %20lu %20Lu\n", name,
+			snap->totlat, snap->count, get_max_lat(snap));
 }
 static const char *alloc_descr[] = {
 	"allocatomic:",
@@ -569,8 +570,8 @@ static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 {
 	int i;
 
-	seq_printf(m, "%-12s %20s %20s\n",
-			"Type", "Total_lat", "Calls");
+	seq_printf(m, "%-12s %20s %20s %20s\n",
+			"Type", "Total_lat", "Calls", "Max (2min)");
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
 		lastlat_seq_show(m, alloc_descr[i],
@@ -585,33 +586,44 @@ static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long flags;
 	u64 lat[ARRAY_SIZE(alloc_types)];
 	u64 count[ARRAY_SIZE(alloc_types)];
+	u64 maxlats[ARRAY_SIZE(alloc_types)];
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
 		lat[i] = task->alloc_lat[alloc_types[i]].totlat;
 		count[i] = task->alloc_lat[alloc_types[i]].count;
+		maxlats[i] = get_max_lat(&task->alloc_lat[alloc_types[i]]);
 	}
 
 	if (lock_task_sighand(task, &flags)) {
 		struct task_struct *t = task;
+		u64 maxlat;
+
 		while_each_thread(task, t) {
 			for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
 				lat[i] += t->alloc_lat[alloc_types[i]].totlat;
 				count[i] += t->alloc_lat[alloc_types[i]].count;
+				maxlat = get_max_lat(&t->alloc_lat[alloc_types[i]]);
+				if (maxlats[i] < maxlat)
+					maxlats[i] = maxlat;
 			}
 		}
 		for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
 			lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
 			count[i] += t->signal->alloc_lat[alloc_types[i]].count;
+			maxlat = get_max_lat(&t->signal->alloc_lat[alloc_types[i]]);
+			if (maxlats[i] < maxlat)
+				maxlats[i] = maxlat;
+
 		}
 		unlock_task_sighand(task, &flags);
 	}
 
-	seq_printf(m, "%-12s %20s %20s\n",
-			"Type", "Total_lat", "Calls");
+	seq_printf(m, "%-12s %20s %20s %20s\n",
+			"Type", "Total_lat", "Calls", "Max (2min)");
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
-		seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
-			lat[i], count[i]);
+		seq_printf(m, "%-12s %20Lu %20Lu %20Lu\n", alloc_descr[i],
+			lat[i], count[i], maxlats[i]);
 
 	return 0;
 }
diff --git a/include/linux/kstat.h b/include/linux/kstat.h
index fcf6f0fc4b6f..c25de162a00b 100644
--- a/include/linux/kstat.h
+++ b/include/linux/kstat.h
@@ -38,8 +38,9 @@ struct kstat_perf_pcpu_struct {
 };
 
 struct kstat_lat_snap_struct {
-	u64 maxlat, totlat;
+	u64 maxlat[2], totlat;
 	unsigned long count;
+	unsigned long time[2];
 };
 
 struct kstat_lat_pcpu_snap_struct {
diff --git a/include/linux/vzstat.h b/include/linux/vzstat.h
index 36408f06a2e3..505487117ca4 100644
--- a/include/linux/vzstat.h
+++ b/include/linux/vzstat.h
@@ -10,6 +10,7 @@
 #ifndef __VZSTAT_H__
 #define __VZSTAT_H__
 
+#include <linux/jiffies.h>
 #include <linux/mmzone.h>
 #include <linux/kstat.h>
 
@@ -64,6 +65,20 @@ extern void KSTAT_PERF_ADD(struct kstat_perf_pcpu_struct *ptr, u64 real_time,
 	sleep_time = current->se.statistics->sum_sleep_runtime - sleep_time; \
 	KSTAT_PERF_ADD(&kstat_glob.name, start, start - sleep_time);
 
+#define KSTAT_ALLOC_MAX_LAT_PERIOD (120*HZ)
+
+static inline u64 get_max_lat(struct kstat_lat_snap_struct *snap)
+{
+	int i;
+	u64 max = 0;
+
+	for (i = 0; i < 2; i++) {
+		if (time_before(jiffies, snap->time[i] + KSTAT_ALLOC_MAX_LAT_PERIOD))
+			max = max > snap->maxlat[i] ? max : snap->maxlat[i];
+	}
+	return max;
+}
+
 extern void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, u64 dur);
 extern void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 5a0607ee41b1..67a26b302e61 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -750,6 +750,9 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
+void update_maxlat(struct kstat_lat_snap_struct *alloc_lat,
+				u64 lat, unsigned long time);
+
 void kstat_add_dying(struct task_struct *tsk)
 {
 #ifdef CONFIG_VE
@@ -757,8 +760,18 @@ void kstat_add_dying(struct task_struct *tsk)
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		int j;
+
 		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
 		tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
+		for (j = 0; j < 2; j++) {
+			if (time_after(tsk->signal->alloc_lat[i].time[j],
+					tsk->alloc_lat[i].time[0])) {
+				update_maxlat(&tsk->alloc_lat[i],
+					tsk->signal->alloc_lat[i].maxlat[j],
+					tsk->signal->alloc_lat[i].time[j]);
+			}
+		}
 	}
 	spin_unlock_irq(&tsk->sighand->siglock);
 #endif
diff --git a/kernel/ve/vecalls.c b/kernel/ve/vecalls.c
index 1b23181acc56..8f057aa79b11 100644
--- a/kernel/ve/vecalls.c
+++ b/kernel/ve/vecalls.c
@@ -160,7 +160,7 @@ static int vestat_seq_show(struct seq_file *m, void *v)
 		   (unsigned long long)strv_time,
 		   (unsigned long long)uptime_cycles,
 		   (unsigned long long)used,
-		   (unsigned long long)ve->sched_lat_ve.last.maxlat,
+		   (unsigned long long)ve->sched_lat_ve.last.maxlat[0],
 		   (unsigned long long)ve->sched_lat_ve.last.totlat,
 		   ve->sched_lat_ve.last.count);
 	return 0;
diff --git a/kernel/ve/vzstat.c b/kernel/ve/vzstat.c
index c278b62e830d..a7ef426b1bfc 100644
--- a/kernel/ve/vzstat.c
+++ b/kernel/ve/vzstat.c
@@ -167,7 +167,7 @@ static void lastlat_seq_show(struct seq_file *m,
 		struct kstat_lat_snap_struct *snap)
 {
 	seq_printf(m, "%-11s %20Lu %20Lu %20lu\n", name,
-			snap->maxlat, snap->totlat, snap->count);
+			snap->maxlat[0], snap->totlat, snap->count);
 }
 
 static void avglat_seq_show(struct seq_file *m,
diff --git a/kernel/ve/vzstat_core.c b/kernel/ve/vzstat_core.c
index 26fe8fba2e5b..42f4564285b4 100644
--- a/kernel/ve/vzstat_core.c
+++ b/kernel/ve/vzstat_core.c
@@ -89,11 +89,11 @@ void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
 
 		p->last.count += snap.count;
 		p->last.totlat += snap.totlat;
-		if (p->last.maxlat < snap.maxlat)
-			p->last.maxlat = snap.maxlat;
+		if (p->last.maxlat[0] < snap.maxlat)
+			p->last.maxlat[0] = snap.maxlat;
 	}
 
-	m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
+	m = (p->last.maxlat[0] > p->max_snap ? p->last.maxlat[0] : p->max_snap);
 	p->avg[0] = calc_load(p->avg[0], EXP_1, m);
 	p->avg[1] = calc_load(p->avg[1], EXP_5, m);
 	p->avg[2] = calc_load(p->avg[2], EXP_15, m);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ce726b40fcf6..415d14402226 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5406,6 +5406,40 @@ static __always_inline void warn_high_order(int order, gfp_t gfp_mask)
 	}
 }
 
+void update_maxlat(struct kstat_lat_snap_struct *alloc_lat,
+				u64 lat, unsigned long time)
+{
+	if (time_before(time, alloc_lat->time[0] +
+				KSTAT_ALLOC_MAX_LAT_PERIOD/2)) {
+		if (alloc_lat->maxlat[0] < lat) {
+			alloc_lat->maxlat[0] = lat;
+			alloc_lat->time[0] = time;
+		}
+	} else if (time_before(time, alloc_lat->time[0] +
+				KSTAT_ALLOC_MAX_LAT_PERIOD)) {
+		if (alloc_lat->maxlat[1] < lat) {
+			alloc_lat->maxlat[1] = lat;
+			alloc_lat->time[1] = time;
+		}
+	} else if (time_before(time, alloc_lat->time[0] +
+				KSTAT_ALLOC_MAX_LAT_PERIOD*3/2)) {
+		if (alloc_lat->maxlat[1] < lat) {
+			alloc_lat->maxlat[0] = lat;
+			alloc_lat->time[0] = time;
+		} else {
+			alloc_lat->maxlat[0] = alloc_lat->maxlat[1];
+			alloc_lat->time[0] = alloc_lat->time[1];
+		}
+		alloc_lat->maxlat[1] = 0;
+		alloc_lat->time[1] = 0;
+	} else {
+		alloc_lat->maxlat[0] = lat;
+		alloc_lat->time[0] = time;
+		alloc_lat->maxlat[1] = 0;
+		alloc_lat->time[1] = 0;
+	}
+}
+
 static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
 		struct page *page, u64 time)
 {
@@ -5431,6 +5465,7 @@ static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
 	if (in_task()) {
 		current->alloc_lat[ind].totlat += delta;
 		current->alloc_lat[ind].count++;
+		update_maxlat(&current->alloc_lat[ind], delta, current_clock);
 	}
 
 	if (!page)


More information about the Devel mailing list