[Devel] [PATCH rh7 v2 4/4] /proc/<pid>/vz_latency: Show maximal allocation latency in the last second.

Pavel Borzenkov Pavel.Borzenkov at acronis.com
Mon Aug 20 16:48:03 MSK 2018



> On 20 Aug 2018, at 15:03, Andrey Ryabinin <aryabinin at virtuozzo.com> wrote:
> 
> Add to '/proc/<pid>/vz_latency' column with maximal latency task have seen
> in the last 2 minutes.
> 
> E.g.:
> 
> cat /proc/1/vz_latency
> Type                    Total_lat                Calls           Max (2min)
> allocatomic:                    0                  294                    0
> alloc:                    3000000                43394                    0
> allocmp:                        0                 1018                    0
> 
> AFAICS this changes output format but shouldn't break our the only user of
> this interface - pstorage. Accordind to the pstorage code it reads this
> file line by line, reads 'Total_lat' and 'Calls' fields and skips to the next
> line. Thus adding new field shouldn't break it.
> 
> Note: technically this is not maximum in last period of time.
> It's maximum since the last reset of max latency with reset happening once
> per period of time (2 minutes in this patch).
> The true max latency in last X-sec/min is technically impossible to implement
> as this would require to remember all allocations times and and latencies
> in last X-sec/min. However, we can record max latencies every 10 sec for the
> past 2min and based on that calculate the max latency in last 2 minutes (+/-10sec).
> But the same logic can be implemented in user-space with less performance cost,
> since in the kernel we would need to put that complex logic into every allocation,
> while in userspace this can be done only for task we want to monitor.
> 
> https://jira.sw.ru/browse/PSBM-87797
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> Cc: Pavel Borzenkov <Pavel.Borzenkov at acronis.com>
> ---
> 
> Changes since v1:
> - change period from 1sec to 2min
> 
> fs/proc/base.c         | 28 ++++++++++++++++++++--------
> include/linux/kstat.h  |  1 +
> include/linux/vzstat.h | 10 ++++++++++
> kernel/exit.c          |  8 ++++++++
> mm/page_alloc.c        |  8 ++++++++
> 5 files changed, 47 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 64bbbc387afc..719e93b42ff5 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -80,6 +80,7 @@
> #include <linux/audit.h>
> #include <linux/poll.h>
> #include <linux/nsproxy.h>
> +#include <linux/vzstat.h>
> #include <linux/oom.h>
> #include <linux/elf.h>
> #include <linux/pid_namespace.h>
> @@ -580,8 +581,8 @@ static void lastlat_seq_show(struct seq_file *m,
> 		const char *name,
> 		struct kstat_lat_snap_struct *snap)
> {
> -	seq_printf(m, "%-12s %20Lu %20lu\n", name,
> -			snap->totlat, snap->count);
> +	seq_printf(m, "%-12s %20Lu %20lu %20Lu\n", name,
> +			snap->totlat, snap->count, get_max_lat(snap));
> }
> static const char *alloc_descr[] = {
> 	"allocatomic:",
> @@ -599,8 +600,8 @@ static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
> {
> 	int i;
> 
> -	seq_printf(m, "%-12s %20s %20s\n",
> -			"Type", "Total_lat", "Calls");
> +	seq_printf(m, "%-12s %20s %20s %20s\n",
> +			"Type", "Total_lat", "Calls", "Max (2min)");
> 
> 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
> 		lastlat_seq_show(m, alloc_descr[i],
> @@ -615,33 +616,44 @@ static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
> 	unsigned long flags;
> 	u64 lat[ARRAY_SIZE(alloc_types)];
> 	u64 count[ARRAY_SIZE(alloc_types)];
> +	u64 maxlats[ARRAY_SIZE(alloc_types)];
> 
> 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> 		lat[i] = task->alloc_lat[alloc_types[i]].totlat;
> 		count[i] = task->alloc_lat[alloc_types[i]].count;
> +		maxlats[i] = get_max_lat(&task->alloc_lat[alloc_types[i]]);
> 	}
> 
> 	if (lock_task_sighand(task, &flags)) {
> 		struct task_struct *t = task;
> +		u64 maxlat;
> +
> 		while_each_thread(task, t) {
> 			for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> 				lat[i] += t->alloc_lat[alloc_types[i]].totlat;
> 				count[i] += t->alloc_lat[alloc_types[i]].count;
> +				maxlat = get_max_lat(&t->alloc_lat[alloc_types[i]]);
> +				if (maxlats[i] < maxlat)
> +					maxlats[i] = maxlat;
> 			}
> 		}
> 		for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> 			lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
> 			count[i] += t->signal->alloc_lat[alloc_types[i]].count;
> +			maxlat = get_max_lat(&t->signal->alloc_lat[alloc_types[i]]);
> +			if (maxlats[i] < maxlat)
> +				maxlats[i] = maxlat;
> +
> 		}
> 		unlock_task_sighand(task, &flags);
> 	}
> 
> -	seq_printf(m, "%-12s %20s %20s\n",
> -			"Type", "Total_lat", "Calls");
> +	seq_printf(m, "%-12s %20s %20s %20s\n",
> +			"Type", "Total_lat", "Calls", "Max (1sec)");

Wrong header? Should be 2min

> 
> 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
> -		seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
> -			lat[i], count[i]);
> +		seq_printf(m, "%-12s %20Lu %20Lu %20Lu\n", alloc_descr[i],
> +			lat[i], count[i], maxlats[i]);
> 
> 	return 0;
> }
> diff --git a/include/linux/kstat.h b/include/linux/kstat.h
> index b268752f2e15..45bdd52ee7d9 100644
> --- a/include/linux/kstat.h
> +++ b/include/linux/kstat.h
> @@ -32,6 +32,7 @@ struct kstat_perf_pcpu_struct {
> struct kstat_lat_snap_struct {
> 	u64 maxlat, totlat;
> 	unsigned long count;
> +	unsigned long time;
> };
> 
> struct kstat_lat_pcpu_snap_struct {
> diff --git a/include/linux/vzstat.h b/include/linux/vzstat.h
> index f1475b2b763a..9df7d4ff9a17 100644
> --- a/include/linux/vzstat.h
> +++ b/include/linux/vzstat.h
> @@ -9,6 +9,7 @@
> #ifndef __VZSTAT_H__
> #define __VZSTAT_H__
> 
> +#include <linux/jiffies.h>
> #include <linux/mmzone.h>
> #include <linux/kstat.h>
> 
> @@ -64,6 +65,15 @@ extern void KSTAT_PERF_ADD(struct kstat_perf_pcpu_struct *ptr, u64 real_time,
> 	sleep_time = current->se.statistics->sum_sleep_runtime - sleep_time; \
> 	KSTAT_PERF_ADD(&kstat_glob.name, start, start - sleep_time);
> 
> +#define KSTAT_ALLOC_MAX_LAT_PERIOD (120*HZ)
> +
> +static inline u64 get_max_lat(struct kstat_lat_snap_struct *snap)
> +{
> +	if (time_before(jiffies, snap->time + KSTAT_ALLOC_MAX_LAT_PERIOD))
> +		return snap->maxlat;
> +	return 0;
> +}
> +
> extern void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, u64 dur);
> extern void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p);
> 
> diff --git a/kernel/exit.c b/kernel/exit.c
> index bf68b87a4d60..c2dc3a2c2c50 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -742,8 +742,16 @@ void kstat_add_dying(struct task_struct *tsk)
> 
> 	spin_lock_irq(&tsk->sighand->siglock);
> 	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
> +		u64 maxlat_tsk, maxlat_sig;
> +
> 		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
> 		tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
> +		maxlat_tsk = get_max_lat(&tsk->alloc_lat[i]);
> +		maxlat_sig = get_max_lat(&tsk->signal->alloc_lat[i]);
> +		if (maxlat_sig < maxlat_tsk) {
> +			tsk->signal->alloc_lat[i].maxlat = tsk->alloc_lat[i].maxlat;
> +			tsk->signal->alloc_lat[i].time = tsk->alloc_lat[i].time;
> +		}
> 	}
> 	spin_unlock_irq(&tsk->sighand->siglock);
> #endif
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 9d8c9e4eb970..e8b0f16de295 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3241,6 +3241,14 @@ static void __alloc_collect_stats(gfp_t gfp_mask, unsigned int order,
> 	if (in_task()) {
> 		current->alloc_lat[ind].totlat += time;
> 		current->alloc_lat[ind].count++;
> +		if (time_before(jiffies, current->alloc_lat[ind].time +
> +					KSTAT_ALLOC_MAX_LAT_PERIOD)) {
> +			if (current->alloc_lat[ind].maxlat < time)
> +				current->alloc_lat[ind].maxlat = time;
> +		} else {
> +			current->alloc_lat[ind].time = jiffies;
> +			current->alloc_lat[ind].maxlat = time;
> +		}
> 	}
> 
> 	if (!page)
> -- 
> 2.16.4
> 




More information about the Devel mailing list