[Devel] [PATCH RHEL8 COMMIT] ve/proc/loadavg: Virtualize /proc/loadavg in Containers
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Oct 28 19:21:32 MSK 2020
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.14
------>
commit 41f6bed2b424afbd51364f262ed7473b706f24b5
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date: Wed Oct 21 16:01:31 2020 +0300
ve/proc/loadavg: Virtualize /proc/loadavg in Containers
The patch is based on following vz7 commits:
ecdce58b214c ("sched: Export per task_group statistics_work")
a58fb58bff1c ("Use ve init task's css instead of opening cgroup via vfs")
5f2a49a05629 ("sched/ve: Use cfs_rq::h_nr_running to count loadavg")
vz8 rebase notes:
1) cpu cgroup vz specific file "proc.loadavg" has been dropped
2) "nr_running" field in /proc/loadavg inside a CT includes running
realtime tasks (although they are not allowed to be run inside a CT)
and tasks in D state (like on the Host)
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
fs/proc/loadavg.c | 10 ++++++++++
include/linux/ve.h | 8 ++++++++
kernel/sched/core.c | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/ve/ve.c | 16 ++++++++++++++++
4 files changed, 74 insertions(+)
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 40467c3ade86..b884a1a59a3d 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -9,10 +9,20 @@
#include <linux/seq_file.h>
#include <linux/seqlock.h>
#include <linux/time.h>
+#include <linux/ve.h>
static int loadavg_proc_show(struct seq_file *m, void *v)
{
unsigned long avnrun[3];
+ struct ve_struct *ve;
+
+ ve = get_exec_env();
+ if (!ve_is_super(ve)) {
+ int ret;
+ ret = ve_show_loadavg(ve, m);
+ if (ret != -ENOSYS)
+ return ret;
+ }
get_avenrun(avnrun, FIXED_1/200, 0);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index ec7dc522ac1f..0341bb915923 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -176,4 +176,12 @@ static inline void monotonic_ve_to_abs(clockid_t which_clock,
#endif /* CONFIG_VE */
+struct seq_file;
+
+#if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
+int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
+#else
+static inline int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p) { return -ENOSYS; }
+#endif
+
#endif /* _LINUX_VE_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a6100bf3f625..0116742de578 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -41,6 +41,8 @@ const_debug unsigned int sysctl_sched_features =
#undef SCHED_FEAT
#endif
+#include "../cgroup/cgroup-internal.h" /* For cgroup_task_count() */
+
/*
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
@@ -7134,6 +7136,44 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
}
#endif /* CONFIG_RT_GROUP_SCHED */
+int cpu_cgroup_proc_loadavg(struct cgroup_subsys_state *css,
+ struct seq_file *p)
+{
+ struct cgroup *cgrp = css->cgroup;
+ struct task_group *tg = css_tg(css);
+ unsigned long avnrun[3];
+ int nr_running = 0;
+ int i;
+
+ avnrun[0] = tg->avenrun[0] + FIXED_1/200;
+ avnrun[1] = tg->avenrun[1] + FIXED_1/200;
+ avnrun[2] = tg->avenrun[2] + FIXED_1/200;
+
+ for_each_possible_cpu(i) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ nr_running += tg->cfs_rq[i]->h_nr_running;
+ /*
+ * We do not export nr_unint to parent task groups
+ * like we do for h_nr_running, as it gives additional
+ * overhead for activate/deactivate operations. So, we
+ * don't account child cgroup unint tasks here.
+ */
+ nr_running += tg->cfs_rq[i]->nr_unint;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+ nr_running += tg->rt_rq[i]->rt_nr_running;
+#endif
+ }
+
+ seq_printf(p, "%lu.%02lu %lu.%02lu %lu.%02lu %d/%d %d\n",
+ LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+ LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+ LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
+ nr_running, cgroup_task_count(cgrp),
+ idr_get_cursor(&task_active_pid_ns(current)->idr));
+ return 0;
+}
+
static struct cftype cpu_legacy_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
{
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 43e37b27e887..193fdb95daab 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -1147,3 +1147,19 @@ int vz_security_protocol_check(struct net *net, int protocol)
}
}
EXPORT_SYMBOL_GPL(vz_security_protocol_check);
+
+#ifdef CONFIG_CGROUP_SCHED
+int cpu_cgroup_proc_loadavg(struct cgroup_subsys_state *css,
+ struct seq_file *p);
+
+int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p)
+{
+ struct cgroup_subsys_state *css;
+ int err;
+
+ css = ve_get_init_css(ve, cpu_cgrp_id);
+ err = cpu_cgroup_proc_loadavg(css, p);
+ css_put(css);
+ return err;
+}
+#endif /* CONFIG_CGROUP_SCHED */
More information about the Devel
mailing list