[Devel] [PATCH RH7 draft] loadavg: add avencgmtxwait to show average cgroup_mutex wait queue size

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Wed Apr 6 18:40:12 MSK 2022


This should show average cgroup_mutex wait queue size over 1m/5m/15m,
similar to what we have in load average in the same file.

https://jira.sw.ru/browse/PSBM-139206

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 fs/proc/loadavg.c   | 11 +++++++++--
 kernel/cgroup.c     | 12 ++++++++++++
 kernel/sched/core.c | 29 +++++++++++++++++++++++++++--
 3 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 40d8a90b0f13..71e5a5519b08 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -11,9 +11,12 @@
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
+void get_avencgmtxwait(unsigned long *waites, unsigned long offset, int shift);
+
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
 	unsigned long avnrun[3];
+	unsigned long waites[3];
 	struct ve_struct *ve;
 
 	ve = get_exec_env();
@@ -25,13 +28,17 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
 	}
 
 	get_avenrun(avnrun, FIXED_1/200, 0);
+	get_avencgmtxwait(waites, FIXED_1/200, 0);
 
-	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n %lu.%02lu %lu.%02lu %lu.%02lu",
 		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
 		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
 		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
 		nr_running(), nr_threads,
-		task_active_pid_ns(current)->last_pid);
+		task_active_pid_ns(current)->last_pid,
+		LOAD_INT(waites[0]), LOAD_FRAC(waites[0]),
+		LOAD_INT(waites[1]), LOAD_FRAC(waites[1]),
+		LOAD_INT(waites[2]), LOAD_FRAC(waites[2]));
 	return 0;
 }
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 90490017ac0f..87ac4418de62 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6475,3 +6475,15 @@ void cgroup_kernel_close(struct cgroup *cgrp)
 		check_for_release(cgrp);
 	}
 }
+
+long cgmtx_get_waiters(void)
+{
+	struct list_head *lh;
+	long waiters = 0;
+
+	list_for_each(lh, &cgroup_mutex.wait_list)
+		waiters++;
+
+	return waiters;
+}
+EXPORT_SYMBOL_GPL(cgmtx_get_waiters);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f5c2545abef5..118572d6bd85 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2899,6 +2899,8 @@ static atomic_long_t calc_load_tasks;
 static unsigned long calc_load_update;
 unsigned long avenrun[3];
 EXPORT_SYMBOL(avenrun); /* should be removed */
+unsigned long avencgmtxwait[3];
+EXPORT_SYMBOL(avencgmtxwait);
 
 /**
  * get_avenrun - get the load average array
@@ -2915,6 +2917,13 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
 	loads[2] = (avenrun[2] + offset) << shift;
 }
 
+void get_avencgmtxwait(unsigned long *waites, unsigned long offset, int shift)
+{
+	waites[0] = (avencgmtxwait[0] + offset) << shift;
+	waites[1] = (avencgmtxwait[1] + offset) << shift;
+	waites[2] = (avencgmtxwait[2] + offset) << shift;
+}
+
 void get_avenrun_ve(unsigned long *loads, unsigned long offset, int shift)
 {
 	struct task_group *tg = task_group(current);
@@ -3187,6 +3196,8 @@ calc_load_n(unsigned long load, unsigned long exp,
 	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
 }
 
+extern long cgmtx_get_waiters(void);
+
 /*
  * NO_HZ can leave us missing all per-cpu ticks calling
  * calc_load_account_active(), but since an idle CPU folds its delta into
@@ -3198,7 +3209,7 @@ calc_load_n(unsigned long load, unsigned long exp,
  */
 static void calc_global_nohz(void)
 {
-	long delta, active, n;
+	long delta, active, n, waiters;
 
 	if (!time_before(jiffies, calc_load_update + 10)) {
 		/*
@@ -3214,6 +3225,13 @@ static void calc_global_nohz(void)
 		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
 		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
 
+		waiters = cgmtx_get_waiters();
+		waiters = waiters > 0 ? waiters * FIXED_1 : 0;
+
+		avencgmtxwait[0] = calc_load_n(avencgmtxwait[0], EXP_1, waiters, n);
+		avencgmtxwait[1] = calc_load_n(avencgmtxwait[1], EXP_5, waiters, n);
+		avencgmtxwait[2] = calc_load_n(avencgmtxwait[2], EXP_15, waiters, n);
+
 		calc_load_update += n * LOAD_FREQ;
 	}
 
@@ -3240,7 +3258,7 @@ static inline void calc_global_nohz(void) { }
  */
 bool calc_global_load(unsigned long ticks)
 {
-	long active, delta;
+	long active, delta, waiters;
 
 	if (time_before(jiffies, calc_load_update + 10))
 		return false;
@@ -3259,6 +3277,13 @@ bool calc_global_load(unsigned long ticks)
 	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
 	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
 
+	waiters = cgmtx_get_waiters();
+	waiters = waiters > 0 ? waiters * FIXED_1 : 0;
+
+	avencgmtxwait[0] = calc_load(avencgmtxwait[0], EXP_1, waiters);
+	avencgmtxwait[1] = calc_load(avencgmtxwait[1], EXP_5, waiters);
+	avencgmtxwait[2] = calc_load(avencgmtxwait[2], EXP_15, waiters);
+
 	calc_load_update += LOAD_FREQ;
 
 	/*
-- 
2.35.1



More information about the Devel mailing list