[Devel] [PATCH rh7 v2 4/4] /proc/<pid>/vz_latency: Show maximal allocation latency in the last second.

Andrey Ryabinin aryabinin at virtuozzo.com
Mon Aug 20 15:03:08 MSK 2018


Add to '/proc/<pid>/vz_latency' column with maximal latency task have seen
in the last 2 minutes.

E.g.:

cat /proc/1/vz_latency
Type                    Total_lat                Calls           Max (2min)
allocatomic:                    0                  294                    0
alloc:                    3000000                43394                    0
allocmp:                        0                 1018                    0

AFAICS this changes output format but shouldn't break our the only user of
this interface - pstorage. Accordind to the pstorage code it reads this
file line by line, reads 'Total_lat' and 'Calls' fields and skips to the next
line. Thus adding new field shouldn't break it.

Note: technically this is not maximum in last period of time.
It's maximum since the last reset of max latency with reset happening once
per period of time (2 minutes in this patch).
The true max latency in last X-sec/min is technically impossible to implement
as this would require to remember all allocations times and and latencies
in last X-sec/min. However, we can record max latencies every 10 sec for the
past 2min and based on that calculate the max latency in last 2 minutes (+/-10sec).
But the same logic can be implemented in user-space with less performance cost,
since in the kernel we would need to put that complex logic into every allocation,
while in userspace this can be done only for task we want to monitor.

https://jira.sw.ru/browse/PSBM-87797
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Cc: Pavel Borzenkov <Pavel.Borzenkov at acronis.com>
---

Changes since v1:
 - change period from 1sec to 2min

 fs/proc/base.c         | 28 ++++++++++++++++++++--------
 include/linux/kstat.h  |  1 +
 include/linux/vzstat.h | 10 ++++++++++
 kernel/exit.c          |  8 ++++++++
 mm/page_alloc.c        |  8 ++++++++
 5 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 64bbbc387afc..719e93b42ff5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -80,6 +80,7 @@
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
+#include <linux/vzstat.h>
 #include <linux/oom.h>
 #include <linux/elf.h>
 #include <linux/pid_namespace.h>
@@ -580,8 +581,8 @@ static void lastlat_seq_show(struct seq_file *m,
 		const char *name,
 		struct kstat_lat_snap_struct *snap)
 {
-	seq_printf(m, "%-12s %20Lu %20lu\n", name,
-			snap->totlat, snap->count);
+	seq_printf(m, "%-12s %20Lu %20lu %20Lu\n", name,
+			snap->totlat, snap->count, get_max_lat(snap));
 }
 static const char *alloc_descr[] = {
 	"allocatomic:",
@@ -599,8 +600,8 @@ static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 {
 	int i;
 
-	seq_printf(m, "%-12s %20s %20s\n",
-			"Type", "Total_lat", "Calls");
+	seq_printf(m, "%-12s %20s %20s %20s\n",
+			"Type", "Total_lat", "Calls", "Max (2min)");
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
 		lastlat_seq_show(m, alloc_descr[i],
@@ -615,33 +616,44 @@ static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long flags;
 	u64 lat[ARRAY_SIZE(alloc_types)];
 	u64 count[ARRAY_SIZE(alloc_types)];
+	u64 maxlats[ARRAY_SIZE(alloc_types)];
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
 		lat[i] = task->alloc_lat[alloc_types[i]].totlat;
 		count[i] = task->alloc_lat[alloc_types[i]].count;
+		maxlats[i] = get_max_lat(&task->alloc_lat[alloc_types[i]]);
 	}
 
 	if (lock_task_sighand(task, &flags)) {
 		struct task_struct *t = task;
+		u64 maxlat;
+
 		while_each_thread(task, t) {
 			for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
 				lat[i] += t->alloc_lat[alloc_types[i]].totlat;
 				count[i] += t->alloc_lat[alloc_types[i]].count;
+				maxlat = get_max_lat(&t->alloc_lat[alloc_types[i]]);
+				if (maxlats[i] < maxlat)
+					maxlats[i] = maxlat;
 			}
 		}
 		for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
 			lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
 			count[i] += t->signal->alloc_lat[alloc_types[i]].count;
+			maxlat = get_max_lat(&t->signal->alloc_lat[alloc_types[i]]);
+			if (maxlats[i] < maxlat)
+				maxlats[i] = maxlat;
+
 		}
 		unlock_task_sighand(task, &flags);
 	}
 
-	seq_printf(m, "%-12s %20s %20s\n",
-			"Type", "Total_lat", "Calls");
+	seq_printf(m, "%-12s %20s %20s %20s\n",
+			"Type", "Total_lat", "Calls", "Max (1sec)");
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
-		seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
-			lat[i], count[i]);
+		seq_printf(m, "%-12s %20Lu %20Lu %20Lu\n", alloc_descr[i],
+			lat[i], count[i], maxlats[i]);
 
 	return 0;
 }
diff --git a/include/linux/kstat.h b/include/linux/kstat.h
index b268752f2e15..45bdd52ee7d9 100644
--- a/include/linux/kstat.h
+++ b/include/linux/kstat.h
@@ -32,6 +32,7 @@ struct kstat_perf_pcpu_struct {
 struct kstat_lat_snap_struct {
 	u64 maxlat, totlat;
 	unsigned long count;
+	unsigned long time;
 };
 
 struct kstat_lat_pcpu_snap_struct {
diff --git a/include/linux/vzstat.h b/include/linux/vzstat.h
index f1475b2b763a..9df7d4ff9a17 100644
--- a/include/linux/vzstat.h
+++ b/include/linux/vzstat.h
@@ -9,6 +9,7 @@
 #ifndef __VZSTAT_H__
 #define __VZSTAT_H__
 
+#include <linux/jiffies.h>
 #include <linux/mmzone.h>
 #include <linux/kstat.h>
 
@@ -64,6 +65,15 @@ extern void KSTAT_PERF_ADD(struct kstat_perf_pcpu_struct *ptr, u64 real_time,
 	sleep_time = current->se.statistics->sum_sleep_runtime - sleep_time; \
 	KSTAT_PERF_ADD(&kstat_glob.name, start, start - sleep_time);
 
+#define KSTAT_ALLOC_MAX_LAT_PERIOD (120*HZ)
+
+static inline u64 get_max_lat(struct kstat_lat_snap_struct *snap)
+{
+	if (time_before(jiffies, snap->time + KSTAT_ALLOC_MAX_LAT_PERIOD))
+		return snap->maxlat;
+	return 0;
+}
+
 extern void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, u64 dur);
 extern void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index bf68b87a4d60..c2dc3a2c2c50 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -742,8 +742,16 @@ void kstat_add_dying(struct task_struct *tsk)
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		u64 maxlat_tsk, maxlat_sig;
+
 		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
 		tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
+		maxlat_tsk = get_max_lat(&tsk->alloc_lat[i]);
+		maxlat_sig = get_max_lat(&tsk->signal->alloc_lat[i]);
+		if (maxlat_sig < maxlat_tsk) {
+			tsk->signal->alloc_lat[i].maxlat = tsk->alloc_lat[i].maxlat;
+			tsk->signal->alloc_lat[i].time = tsk->alloc_lat[i].time;
+		}
 	}
 	spin_unlock_irq(&tsk->sighand->siglock);
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d8c9e4eb970..e8b0f16de295 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3241,6 +3241,14 @@ static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
 	if (in_task()) {
 		current->alloc_lat[ind].totlat += time;
 		current->alloc_lat[ind].count++;
+		if (time_before(jiffies, current->alloc_lat[ind].time +
+					KSTAT_ALLOC_MAX_LAT_PERIOD)) {
+			if (current->alloc_lat[ind].maxlat < time)
+				current->alloc_lat[ind].maxlat = time;
+		} else {
+			current->alloc_lat[ind].time = jiffies;
+			current->alloc_lat[ind].maxlat = time;
+		}
 	}
 
 	if (!page)
-- 
2.16.4



More information about the Devel mailing list