[Devel] [PATCH RHEL COMMIT] vzstat: Add vzstat module and kstat interfaces
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Sep 24 15:15:55 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 86d9e9f5332fa691e8b361a606424d14dcc2ff53
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Fri Sep 24 15:15:55 2021 +0300
vzstat: Add vzstat module and kstat interfaces
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
+++
vzstat: account cpu total time properly in mm performance stats
/proc/vz/mmperf occasionally accounts/shows wall total time in both
"Wall_tot_time" and "CPU_tot_time" columns, fix this.
mFixes: c0a20dd32be6 ("vzstat: Add vzstat module and kstat interfaces")
https://pmc.acronis.com/browse/VSTOR-16659
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
(cherry-picked from vz7 commit 306162b35d01 ("vzstat: account cpu total time
properly in mm performance stats"))
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
+++
vzstat: drop double KSTAT_LAT_PCPU_UPDATE declaration
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
mFixes: c0a20dd32be6 ("vzstat: Add vzstat module and kstat interfaces")
(cherry-picked from vz7 commit 8242e706656b ("vzstat: drop double
KSTAT_LAT_PCPU_UPDATE declaration"))
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
+++
ve/proc: increment position in "next" seq_operations callback
"next" callback of struct "seq_operations" should move the iterator
forward to the next position in the sequence.
Some Virtuozzo specific proc files skip such increment,
and trigger incorrect file output.
https://bugs.openvz.org/browse/OVZ-7158
Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
(cherry-picked from vz7 commit 0a458a0598b4 ("ve/proc: increment position in
"next" seq_operations callback"))
https://jira.sw.ru/browse/PSBM-127849
Signed-off-by: Valeriy Vdovin <valeriy.vdovin at virtuozzo.com>
(cherry picked from vz8 commit 4a5af6067ead98fa516ffcd7404308f67af3ee2a)
Signed-off-by: Andrey Zhadchenko <andrey.zhadchenko at virtuozzo.com>
---
include/linux/vzstat.h | 28 ++
kernel/ve/Makefile | 4 +-
kernel/ve/vzstat.c | 705 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/ve/vzstat_core.c | 103 +++++++
4 files changed, 839 insertions(+), 1 deletion(-)
diff --git a/include/linux/vzstat.h b/include/linux/vzstat.h
index 32f1132404c1..36408f06a2e3 100644
--- a/include/linux/vzstat.h
+++ b/include/linux/vzstat.h
@@ -48,4 +48,32 @@ extern struct kernel_stat_glob kstat_glob ____cacheline_aligned;
extern spinlock_t kstat_glb_lock;
extern void kstat_init(void);
+
+#ifdef CONFIG_VE
+extern void KSTAT_PERF_ADD(struct kstat_perf_pcpu_struct *ptr, u64 real_time,
+ u64 cpu_time);
+
+#define KSTAT_PERF_ENTER(name) \
+ u64 start, sleep_time; \
+ \
+ start = ktime_to_ns(ktime_get()); \
+ sleep_time = current->se.statistics->sum_sleep_runtime; \
+
+#define KSTAT_PERF_LEAVE(name) \
+ start = ktime_to_ns(ktime_get()) - start; \
+ sleep_time = current->se.statistics->sum_sleep_runtime - sleep_time; \
+ KSTAT_PERF_ADD(&kstat_glob.name, start, start - sleep_time);
+
+extern void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, u64 dur);
+extern void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p);
+
+#else /* !CONFIG_VE */
+#define KSTAT_PERF_ADD(ptr, real_time, cpu_time)
+#define KSTAT_PERF_ENTER(name)
+#define KSTAT_PERF_LEAVE(name)
+#define KSTAT_LAT_PCPU_ADD(p, dur)
+#define KSTAT_LAT_PCPU_UPDATE(p)
+#endif /* CONFIG_VE */
+
+
#endif /* __VZSTAT_H__ */
diff --git a/kernel/ve/Makefile b/kernel/ve/Makefile
index e0c23ca9a867..6219357803fb 100644
--- a/kernel/ve/Makefile
+++ b/kernel/ve/Makefile
@@ -6,10 +6,12 @@
# Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
#
-obj-$(CONFIG_VE) = ve.o hooks.o veowner.o
+obj-$(CONFIG_VE) = ve.o hooks.o veowner.o vzstat_core.o
obj-$(CONFIG_VZ_DEV) += vzdev.o
obj-$(CONFIG_VZ_EVENT) += vzevent.o
obj-$(CONFIG_VE_CALLS) += vzmon.o
vzmon-objs = vecalls.o
+
+obj-$(CONFIG_VE_CALLS) += vzstat.o
diff --git a/kernel/ve/vzstat.c b/kernel/ve/vzstat.c
new file mode 100644
index 000000000000..9c1287a94a05
--- /dev/null
+++ b/kernel/ve/vzstat.c
@@ -0,0 +1,705 @@
+/*
+ * kernel/ve/vzstat.c
+ *
+ * Copyright (c) 2015 Parallels IP Holdings GmbH
+ * Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+#include <linux/sched/loadavg.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/sched/stat.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/ve.h>
+#include <linux/ve_proto.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/errno.h>
+#include <linux/suspend.h>
+#include <linux/interrupt.h>
+#include <linux/mmzone.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/veowner.h>
+#include <linux/swap.h>
+
+#include <linux/vzstat.h>
+
+/* local variables */
+static struct task_struct *vzstat_thread_tsk;
+
+static const char *alloc_descr[KSTAT_ALLOCSTAT_NR] = {
+ "alocatomic:",
+ "aloclow:",
+ "alochigh:",
+ "aloclowmp:",
+ "alochighmp:"
+};
+
+/*
+ * ------------------------------------------------------------------------
+ * Kernel protection: kernel code checksumming
+ * ------------------------------------------------------------------------
+ */
+#ifdef CONFIG_VE_KERNEL_CSUM
+
+#ifdef __x86_64__
+/* skip init_level4_pgt */
+#define KERNEL_PROT_START ((unsigned long)(&_stext) + 0x2000)
+#else
+#define KERNEL_PROT_START ((unsigned long)(&_stext))
+#endif
+#define KERNEL_PROT_END ((unsigned long)(&_etext))
+#define CSALIGN(value, size) ((value + (size - 1)) & ~(size - 1))
+
+void kernel_text_csum_check(void)
+{
+#define CSUM_NR 2
+ static unsigned long text_csum[CSUM_NR], text_csumed, csum_time;
+ unsigned long start, end, ptr, csum[CSUM_NR];
+ int i;
+
+ if (jiffies - csum_time < 60*HZ)
+ return;
+
+ csum_time = jiffies;
+ for (i = 0; i < CSUM_NR; i++) csum[i] = 0;
+ start = CSALIGN(KERNEL_PROT_START, sizeof(csum[0]));
+ end = CSALIGN(KERNEL_PROT_END, sizeof(csum[0]));
+
+ for (ptr = start; ptr < end; ptr += sizeof(csum[0])) {
+ unsigned long i = *(unsigned long*)ptr;
+ csum[0] = csum[0] + i;
+ csum[1] = (csum[1] ^ i) + ((csum[1] << 1) + (csum[1] >> 31));
+ cond_resched();
+ }
+
+ if (!text_csumed) {
+ for (i = 0; i < CSUM_NR; i++) text_csum[i] = csum[i];
+ text_csumed = 1;
+ return;
+ }
+ for (i = 0; i < CSUM_NR; i++)
+ if (text_csum[i] != csum[i]) {
+ printk(KERN_EMERG "Kernel checksum %d changed "
+ "(csum%d=%08lx, onboot csum%d=%08lx)\n",
+ i, i, csum[i], i, text_csum[i]);
+ kernel_text_csum_broken++;
+ }
+}
+
+#endif
+
+/*
+ * ------------------------------------------------------------------------
+ * Latency update and show functions
+ * ------------------------------------------------------------------------
+ */
+static void update_alloc_latency(void)
+{
+ int i;
+
+ spin_lock_irq(&kstat_glb_lock);
+ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++)
+ KSTAT_LAT_PCPU_UPDATE(&kstat_glob.alloc_lat[i]);
+ KSTAT_LAT_PCPU_UPDATE(&kstat_glob.swap_in);
+ KSTAT_LAT_PCPU_UPDATE(&kstat_glob.page_in);
+ spin_unlock_irq(&kstat_glb_lock);
+}
+
+static void lastlat_seq_show(struct seq_file *m,
+ const char *name,
+ struct kstat_lat_snap_struct *snap)
+{
+ seq_printf(m, "%-11s %20Lu %20Lu %20lu\n", name,
+ snap->maxlat, snap->totlat, snap->count);
+}
+
+static void avglat_seq_show(struct seq_file *m,
+ const char *name,
+ u64 *avg)
+{
+ seq_printf(m, "%-11s %20Lu %20Lu %20Lu\n", name,
+ avg[0], avg[1], avg[2]);
+}
+
+static int latency_seq_show(struct seq_file *m, void *v)
+{
+ int i;
+
+ if (!v)
+ return 0;
+
+ seq_puts(m, "Version: 2.5\n");
+
+ seq_puts(m, "\nLatencies:\n");
+ seq_printf(m, "%-11s %20s %20s %20s\n",
+ "Type", "Lat", "Total_lat", "Calls");
+ lastlat_seq_show(m, "scheduling:", &kstat_glob.sched_lat.last);
+ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++)
+ lastlat_seq_show(m, alloc_descr[i],
+ &kstat_glob.alloc_lat[i].last);
+ lastlat_seq_show(m, "swap_in:", &kstat_glob.swap_in.last);
+ lastlat_seq_show(m, "page_in:", &kstat_glob.page_in.last);
+
+ seq_puts(m, "\nAverages:\n");
+ seq_printf(m, "%-11s %20s %20s %20s\n",
+ "Type", "Avg1", "Avg5", "Avg15");
+ avglat_seq_show(m, "scheduling:", kstat_glob.sched_lat.avg);
+ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++)
+ avglat_seq_show(m, alloc_descr[i],
+ kstat_glob.alloc_lat[i].avg);
+ avglat_seq_show(m, "swap_in:", kstat_glob.swap_in.avg);
+ avglat_seq_show(m, "page_in:", kstat_glob.page_in.avg);
+
+ return 0;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * General system info: processes, memory, VE
+ * ------------------------------------------------------------------------
+ */
+static void update_memory(void)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+ struct kstat_zone_avg *zone_avg;
+ unsigned type;
+ unsigned long nr_free, nr_active, nr_inactive, *p;
+ unsigned present;
+
+ for (type = 0; type < MAX_NR_ZONES; type++) {
+ present = 0;
+ nr_free = 0;
+ nr_active = 0;
+ nr_inactive = 0;
+
+ for_each_online_pgdat (pgdat) {
+ zone = pgdat->node_zones + type;
+ if (!zone->present_pages)
+ continue;
+
+ present++;
+ nr_free += zone_page_state(zone, NR_FREE_PAGES);
+ nr_active += zone_page_state(zone, NR_ACTIVE_ANON) +
+ zone_page_state(zone, NR_ACTIVE_FILE);
+ nr_inactive += zone_page_state(zone, NR_INACTIVE_ANON) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+ }
+
+ if (!present)
+ continue;
+
+ zone_avg = &kstat_glob.zone_avg[type];
+
+ p = zone_avg->free_pages_avg;
+ p[0] = calc_load(p[0], EXP_1, nr_free);
+ p[1] = calc_load(p[1], EXP_5, nr_free);
+ p[2] = calc_load(p[2], EXP_15, nr_free);
+
+ p = zone_avg->nr_active_avg;
+ p[0] = calc_load(p[0], EXP_1, nr_active);
+ p[1] = calc_load(p[1], EXP_5, nr_active);
+ p[2] = calc_load(p[2], EXP_15, nr_active);
+
+ p = zone_avg->nr_inactive_avg;
+ p[0] = calc_load(p[0], EXP_1, nr_inactive);
+ p[1] = calc_load(p[1], EXP_5, nr_inactive);
+ p[2] = calc_load(p[2], EXP_15, nr_inactive);
+ }
+}
+
+static void mem_avg_show(struct seq_file *m, void *v)
+{
+ unsigned type;
+ pg_data_t *pgdat;
+ struct zone *zone;
+ struct kstat_zone_avg *zone_avg;
+ unsigned present;
+ int zone_id;
+
+ zone_id = 0;
+
+ for (type = 0; type < MAX_NR_ZONES; type++) {
+ present = 0;
+
+ for_each_online_pgdat (pgdat) {
+ zone = pgdat->node_zones + type;
+ if (zone->present_pages) {
+ present++;
+ break;
+ }
+ }
+ if (!present)
+ continue;
+
+ zone_avg = &kstat_glob.zone_avg[type];
+ seq_printf(m, "ZONE%u %s averages: "
+ "active %lu %lu %lu, "
+ "inactive %lu %lu %lu, "
+ "free %lu %lu %lu\n",
+ zone_id++,
+ zone->name,
+ zone_avg->nr_active_avg[0],
+ zone_avg->nr_active_avg[1],
+ zone_avg->nr_active_avg[2],
+ zone_avg->nr_inactive_avg[0],
+ zone_avg->nr_inactive_avg[1],
+ zone_avg->nr_inactive_avg[2],
+ zone_avg->free_pages_avg[0],
+ zone_avg->free_pages_avg[1],
+ zone_avg->free_pages_avg[2]);
+ }
+}
+
+static void task_counts_seq_show(struct seq_file *m, void *v)
+{
+ unsigned long _nr_running, _nr_sleeping, _nr_unint,
+ _nr_zombie, _nr_dead, _nr_stopped;
+ unsigned long avg[3], seq;
+
+ _nr_running = nr_running();
+ _nr_unint = nr_uninterruptible();
+ _nr_sleeping = nr_sleeping();
+ _nr_zombie = nr_zombie;
+ _nr_dead = atomic_read(&nr_dead);
+ _nr_stopped = 0; /* Broken since 3.10 */
+
+ do {
+ seq = read_seqcount_begin(&kstat_glob.nr_unint_avg_seq);
+ memcpy(avg, kstat_glob.nr_unint_avg, sizeof(avg));
+ } while (read_seqcount_retry(&kstat_glob.nr_unint_avg_seq, seq));
+
+ seq_printf(m, "VEs: %d\n", nr_ve);
+ seq_printf(m, "Processes: R %lu, S %lu, D %lu, "
+ "Z %lu, T %lu, X %lu\n",
+ _nr_running,
+ _nr_sleeping,
+ _nr_unint,
+ _nr_zombie,
+ _nr_stopped,
+ _nr_dead);
+ seq_printf(m, "Processes avg: unint %lu %lu %lu\n",
+ avg[0] >> FSHIFT, avg[1] >> FSHIFT, avg[2] >> FSHIFT);
+}
+
+static void cycles_per_jiffy_show(struct seq_file *m, void *v)
+{
+ /* Now all time slices are measured in nanoseconds */
+ seq_printf(m, "cycles_per_jiffy: %llu\n", ((u64) jiffies_to_usecs(1)) * 1000);
+}
+
+static void jiffies_per_second_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "jiffies_per_second: %u\n", HZ);
+}
+
+static void kernel_text_csum_seq_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "kernel_text_csum_broken: %d\n", 0);
+}
+
+static void swap_cache_seq_show(struct seq_file *m, void *v)
+{
+ struct swap_cache_info *swpcache;
+
+ swpcache = &swap_cache_info;
+ seq_printf(m, "Swap cache: add %lu, del %lu, find %lu/%lu\n",
+ swpcache->add_total,
+ swpcache->del_total,
+ swpcache->find_success,
+ swpcache->find_total);
+}
+
+/*
+ * Declare special structure to store summarized statistics. The 'struct zone'
+ * is not used because of it's tremendous size.
+ */
+struct zonestat {
+ const char *name;
+ unsigned long free_pages;
+ unsigned long nr_free[MAX_ORDER];
+ unsigned long pages_min;
+ unsigned long pages_low;
+ unsigned long pages_high;
+ unsigned long nr_active;
+ unsigned long nr_inactive;
+ unsigned long present_pages;
+};
+
+/*
+ * Show information about all memory zones.
+ */
+static void mem_free_areas_show_zonestat(struct seq_file *m,
+ struct zonestat *zstat)
+{
+ unsigned int order;
+ unsigned type;
+
+ for (type = 0; type < MAX_NR_ZONES; type++) {
+ struct zonestat *zone = &zstat[type];
+
+ if (!zone->name)
+ continue;
+
+ /* Skip empty zones */
+ if (!zone->present_pages)
+ continue;
+
+ seq_printf(m, "%s free %lu (", zone->name, zone->free_pages);
+ for (order = 0; order < MAX_ORDER; order++)
+ seq_printf(m, "%lu*%lu ", zone->nr_free[order],
+ 1UL << order);
+
+ seq_printf(m, ") min %lu low %lu high %lu "
+ "active %lu inactive %lu size %lu\n",
+ zone->pages_min,
+ zone->pages_low,
+ zone->pages_high,
+ zone->nr_active,
+ zone->nr_inactive,
+ zone->present_pages);
+ }
+}
+
+/*
+ * Scan all registered pgdat's (i.e. memory nodes) and summarize
+ * values for identical zones.
+ */
+static void mem_free_areas_show(struct seq_file *m, void *v)
+{
+ pg_data_t *pgdat;
+ struct zonestat zones[MAX_NR_ZONES];
+ struct zonestat *zdst;
+ struct zone *zsrc;
+ int type, order;
+
+ memset(zones, 0, sizeof(zones));
+
+ for_each_online_pgdat (pgdat) {
+ for (type = 0; type < MAX_NR_ZONES; type++) {
+ unsigned long flags;
+
+ zdst = &zones[type];
+ zsrc = pgdat->node_zones + type;
+ if (!zsrc || !zsrc->name)
+ continue;
+
+ if (!zdst->name)
+ zdst->name = zsrc->name;
+ else if (strcmp(zsrc->name, zdst->name))
+ /* This shouldn't happen! */
+ printk("Warning: names mismatch for "
+ "zone %d: %s != %s\n",
+ type, zsrc->name, zdst->name);
+
+ spin_lock_irqsave(&zsrc->lock, flags);
+ for (order = 0; order < MAX_ORDER; order++)
+ zdst->nr_free[order] += zsrc->free_area[order].nr_free;
+ spin_unlock_irqrestore(&zsrc->lock, flags);
+
+ zdst->nr_active += zone_page_state(zsrc, NR_ACTIVE_ANON) +
+ zone_page_state(zsrc, NR_ACTIVE_FILE);
+ zdst->nr_inactive += zone_page_state(zsrc, NR_INACTIVE_ANON) +
+ zone_page_state(zsrc, NR_INACTIVE_FILE);
+ zdst->pages_min += min_wmark_pages(zsrc);
+ zdst->pages_low += low_wmark_pages(zsrc);
+ zdst->pages_high += high_wmark_pages(zsrc);
+ zdst->present_pages += zsrc->present_pages;
+ zdst->free_pages += zone_page_state(zsrc, NR_FREE_PAGES);
+ }
+ }
+ mem_free_areas_show_zonestat(m, zones);
+}
+
+static void mem_fails_show(struct seq_file *m, void *v)
+{
+ int i, cpu;
+ unsigned long alloc_fails[KSTAT_ALLOCSTAT_NR];
+
+ memset(alloc_fails, 0, sizeof(alloc_fails));
+ for_each_online_cpu(cpu)
+ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++)
+ alloc_fails[i] += kstat_glob.alloc_fails[cpu][i];
+
+ seq_puts(m, "\nMemory fails:\n");
+ for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++)
+ seq_printf(m, "%-11s %20lu\n", alloc_descr[i],
+ alloc_fails[i]);
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * Memory management profiling
+ * ------------------------------------------------------------------------
+ */
+static void KSTAT_PERF_UPDATE(struct kstat_perf_pcpu_struct *p)
+{
+ unsigned i, cpu;
+ struct kstat_perf_pcpu_snap_struct snap, *cur;
+
+ memset(&p->last, 0, sizeof(p->last));
+ for_each_online_cpu(cpu) {
+ cur = per_cpu_ptr(p->cur, cpu);
+ do {
+ i = read_seqcount_begin(&cur->lock);
+ memcpy(&snap, cur, sizeof(snap));
+ } while (read_seqcount_retry(&cur->lock, i));
+
+ if (p->last.wall_maxdur < snap.wall_maxdur)
+ p->last.wall_maxdur = snap.wall_maxdur;
+ if (p->last.cpu_maxdur < snap.cpu_maxdur)
+ p->last.cpu_maxdur = snap.cpu_maxdur;
+ cur->wall_maxdur = cur->cpu_maxdur = 0;
+
+ p->last.count += snap.count;
+ p->last.wall_tottime += snap.wall_tottime;
+ p->last.cpu_tottime += snap.cpu_tottime;
+ }
+}
+
+static void update_mmperf(void)
+{
+ KSTAT_PERF_UPDATE(&kstat_glob.ttfp);
+ KSTAT_PERF_UPDATE(&kstat_glob.cache_reap);
+ KSTAT_PERF_UPDATE(&kstat_glob.refill_inact);
+ KSTAT_PERF_UPDATE(&kstat_glob.shrink_icache);
+ KSTAT_PERF_UPDATE(&kstat_glob.shrink_dcache);
+}
+
+static void perf_seq_show(struct seq_file *m,
+ const char *name,
+ struct kstat_perf_pcpu_struct *p)
+{
+ seq_printf(m, "%-14s %10lu %20Lu %20Lu %20Lu %20Lu\n",
+ name,
+ p->last.count,
+ p->last.cpu_maxdur,
+ p->last.wall_maxdur,
+ p->last.cpu_tottime,
+ p->last.wall_tottime);
+}
+
+static int mmperf_seq_show(struct seq_file *m, void *v)
+{
+ if (!v)
+ return 0;
+ seq_puts(m, "Version: 2.5.1\n");
+ seq_printf(m, "%-14s %10s %20s %20s %20s %20s\n",
+ "Type",
+ "Count",
+ "CPU_max_dur",
+ "Wall_max_dur",
+ "CPU_tot_time",
+ "Wall_tot_time");
+ perf_seq_show(m, "ttfp:", &kstat_glob.ttfp);
+ perf_seq_show(m, "cache_reap:", &kstat_glob.cache_reap);
+ perf_seq_show(m, "refill_inact:", &kstat_glob.refill_inact);
+ perf_seq_show(m, "shrink_icache:", &kstat_glob.shrink_icache);
+ perf_seq_show(m, "shrink_dcache:", &kstat_glob.shrink_dcache);
+ return 0;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * Main loop
+ * ------------------------------------------------------------------------
+ */
+static int vzstat_mon_loop(void* data)
+{
+ while (1) {
+ try_to_freeze();
+#ifdef CONFIG_VE_KERNEL_CSUM
+ kernel_text_csum_check();
+#endif
+ update_alloc_latency();
+ update_memory();
+ update_mmperf();
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
+ schedule_timeout(LOAD_FREQ);
+ }
+ return 0;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * default sequential files methods
+ * ------------------------------------------------------------------------
+ */
+static void *empty_seq_start(struct seq_file *m, loff_t *pos)
+{
+ if (*pos == 0)
+ return (void*)1;
+ else
+ return NULL;
+}
+
+static void *empty_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return NULL;
+}
+
+static void empty_seq_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * /proc/vz/latency sequential file methods
+ * ------------------------------------------------------------------------
+ */
+static struct seq_operations latency_seq_op = {
+ start: empty_seq_start,
+ next: empty_seq_next,
+ stop: empty_seq_stop,
+ show: latency_seq_show
+};
+
+static int latency_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &latency_seq_op);
+}
+
+static struct proc_ops proc_latency_operations = {
+ .proc_open = latency_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
+/*
+ * ------------------------------------------------------------------------
+ * /proc/vz/stats sequential file methods
+ * ------------------------------------------------------------------------
+ */
+static int stats_seq_show(struct seq_file *m, void *v)
+{
+ if (!v)
+ return 0;
+ seq_puts(m, "Version: 2.6\n");
+ cycles_per_jiffy_show(m, v);
+ jiffies_per_second_show(m, v);
+ seq_puts(m, "\nLoad info:\n");
+ task_counts_seq_show(m, v);
+ seq_puts(m, "\nMemory info:\n");
+ kernel_text_csum_seq_show(m, v);
+ swap_cache_seq_show(m, v);
+ mem_free_areas_show(m, v);
+ mem_avg_show(m, v);
+ mem_fails_show(m, v);
+ return 0;
+}
+
+static struct seq_operations stats_seq_op = {
+ start: empty_seq_start,
+ next: empty_seq_next,
+ stop: empty_seq_stop,
+ show: stats_seq_show
+};
+
+static int stats_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &stats_seq_op);
+}
+
+static struct proc_ops proc_stats_operations = {
+ .proc_open = stats_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
+/*
+ * ------------------------------------------------------------------------
+ * /proc/vz/mmperf sequential file methods
+ * ------------------------------------------------------------------------
+ */
+static struct seq_operations mmperf_seq_op = {
+ start: empty_seq_start,
+ next: empty_seq_next,
+ stop: empty_seq_stop,
+ show: mmperf_seq_show
+};
+
+static int mmperf_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &mmperf_seq_op);
+}
+
+static struct proc_ops proc_mmperf_operations = {
+ .proc_open = mmperf_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
+/*
+ * ------------------------------------------------------------------------
+ * module init/exit code
+ * ------------------------------------------------------------------------
+ */
+
+int __init vzstat_mon_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ entry = proc_create("latency", S_IRUGO, proc_vz_dir, &proc_latency_operations);
+ if (entry == NULL) {
+ printk(KERN_WARNING "VZSTAT: can't make proc entry\n");
+ goto fail_lat;
+ }
+
+ entry = proc_create("stats", S_IRUGO, proc_vz_dir, &proc_stats_operations);
+ if (!entry) {
+ printk(KERN_WARNING "VZSTAT: can't make proc entry\n");
+ goto fail_stat;
+ }
+
+ entry = proc_create("mmperf", S_IRUGO, proc_vz_dir, &proc_mmperf_operations);
+ if (!entry) {
+ printk(KERN_WARNING "VZSTAT: can't make proc entry\n");
+ goto fail_perf;
+ }
+
+ vzstat_thread_tsk = kthread_run(vzstat_mon_loop, NULL, "vzstat");
+ if (IS_ERR(vzstat_thread_tsk))
+ goto fail_thread;
+
+ printk(KERN_INFO "VZSTAT: initialized successfully\n");
+
+ return 0;
+
+fail_thread:
+ remove_proc_entry("mmperf", proc_vz_dir);
+fail_perf:
+ remove_proc_entry("stats", proc_vz_dir);
+fail_stat:
+ remove_proc_entry("latency", proc_vz_dir);
+fail_lat:
+ return -EBUSY;
+}
+
+void __exit vzstat_mon_exit(void)
+{
+ kthread_stop(vzstat_thread_tsk);
+
+ remove_proc_entry("mmperf", proc_vz_dir);
+ remove_proc_entry("stats", proc_vz_dir);
+ remove_proc_entry("latency", proc_vz_dir);
+}
+
+module_init(vzstat_mon_init);
+module_exit(vzstat_mon_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Virtuozzo <devel at openvz.org>");
diff --git a/kernel/ve/vzstat_core.c b/kernel/ve/vzstat_core.c
new file mode 100644
index 000000000000..26fe8fba2e5b
--- /dev/null
+++ b/kernel/ve/vzstat_core.c
@@ -0,0 +1,103 @@
+/*
+ * kernel/ve/vzstat_core.c
+ *
+ * Copyright (c) 2015 Parallels IP Holdings GmbH
+ * Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+#include <linux/sched/loadavg.h>
+#include <linux/vzstat.h>
+#include <linux/sched.h>
+
+void KSTAT_PERF_ADD(struct kstat_perf_pcpu_struct *ptr, u64 real_time, u64 cpu_time)
+{
+ struct kstat_perf_pcpu_snap_struct *cur = get_cpu_ptr(ptr->cur);
+
+ write_seqcount_begin(&cur->lock);
+ cur->count++;
+ if (cur->wall_maxdur < real_time)
+ cur->wall_maxdur = real_time;
+ cur->wall_tottime += real_time;
+ if (cur->cpu_maxdur < cpu_time)
+ cur->cpu_maxdur = cpu_time;
+ cur->cpu_tottime += cpu_time;
+ write_seqcount_end(&cur->lock);
+ put_cpu_ptr(cur);
+}
+
+/*
+ * Must be called with disabled interrupts to remove any possible
+ * locks and seqcounts under write-lock and avoid this 3-way deadlock:
+ *
+ * timer interrupt:
+ * write_seqlock(&xtime_lock);
+ * spin_lock_irqsave(&kstat_glb_lock);
+ *
+ * update_schedule_latency():
+ * spin_lock_irq(&kstat_glb_lock);
+ * read_seqcount_begin(&cur->lock)
+ *
+ * some-interrupt during KSTAT_LAT_PCPU_ADD()
+ * KSTAT_LAT_PCPU_ADD()
+ * write_seqcount_begin(&cur->lock);
+ * <interrupt>
+ * ktime_get()
+ * read_seqcount_begin(&xtime_lock);
+ */
+void KSTAT_LAT_PCPU_ADD(struct kstat_lat_pcpu_struct *p, u64 dur)
+{
+ struct kstat_lat_pcpu_snap_struct *cur;
+ seqcount_t *seq;
+
+ cur = this_cpu_ptr(p->cur);
+ seq = this_cpu_ptr(&kstat_pcpu_seq);
+
+ write_seqcount_begin(seq);
+ cur->count++;
+ if (cur->maxlat < dur)
+ cur->maxlat = dur;
+ cur->totlat += dur;
+ write_seqcount_end(seq);
+}
+
+/*
+ * Move current statistics to last, clear last.
+ * Serialization is the caller's due.
+ */
+void KSTAT_LAT_PCPU_UPDATE(struct kstat_lat_pcpu_struct *p)
+{
+ struct kstat_lat_pcpu_snap_struct snap, *cur;
+ unsigned i, cpu;
+ seqcount_t *seq;
+ u64 m;
+
+ memset(&p->last, 0, sizeof(p->last));
+ for_each_online_cpu(cpu) {
+ cur = per_cpu_ptr(p->cur, cpu);
+ seq = per_cpu_ptr(&kstat_pcpu_seq, cpu);
+ do {
+ i = read_seqcount_begin(seq);
+ memcpy(&snap, cur, sizeof(snap));
+ } while (read_seqcount_retry(seq, i));
+ /*
+ * read above and this update of maxlat is not atomic,
+ * but this is OK, since it happens rarely and losing
+ * a couple of peaks is not essential. xemul
+ */
+ cur->maxlat = 0;
+
+ p->last.count += snap.count;
+ p->last.totlat += snap.totlat;
+ if (p->last.maxlat < snap.maxlat)
+ p->last.maxlat = snap.maxlat;
+ }
+
+ m = (p->last.maxlat > p->max_snap ? p->last.maxlat : p->max_snap);
+ p->avg[0] = calc_load(p->avg[0], EXP_1, m);
+ p->avg[1] = calc_load(p->avg[1], EXP_5, m);
+ p->avg[2] = calc_load(p->avg[2], EXP_15, m);
+ /* reset max_snap to calculate it correctly next time */
+ p->max_snap = 0;
+}
+EXPORT_SYMBOL(KSTAT_LAT_PCPU_UPDATE);
More information about the Devel
mailing list