[Devel] [PATCH RHEL COMMIT] ve/proc/loadavg: Virtualize /proc/loadavg in Containers
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Oct 1 19:38:45 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit fedfe80301da8a2c7cda4475bae8a601e7089fd3
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date: Fri Oct 1 19:38:45 2021 +0300
ve/proc/loadavg: Virtualize /proc/loadavg in Containers
The patch is based on following vz7 commits:
ecdce58b214c ("sched: Export per task_group statistics_work")
a58fb58bff1c ("Use ve init task's css instead of opening cgroup via vfs")
5f2a49a05629 ("sched/ve: Use cfs_rq::h_nr_running to count loadavg")
vz8 rebase notes:
1) cpu cgroup vz specific file "proc.loadavg" has been dropped
2) "nr_running" field in /proc/loadavg inside a CT includes running
realtime tasks (although they are not allowed to be run inside a CT)
and tasks in D state (like on the Host)
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
(cherry-picked from vz8 commit e0012c83f2a8 ("ve/proc/loadavg:
Virtualize /proc/loadavg in Containers"))
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
fs/proc/loadavg.c | 10 ++++++++++
include/linux/ve.h | 8 ++++++++
kernel/sched/core.c | 40 ++++++++++++++++++++++++++++++++++++++++
kernel/ve/ve.c | 16 ++++++++++++++++
4 files changed, 74 insertions(+)
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index c651c6a2d285..32148d6f66ef 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -9,10 +9,20 @@
#include <linux/seq_file.h>
#include <linux/seqlock.h>
#include <linux/time.h>
+#include <linux/ve.h>
static int loadavg_proc_show(struct seq_file *m, void *v)
{
unsigned long avnrun[3];
+ struct ve_struct *ve;
+
+ ve = get_exec_env();
+ if (!ve_is_super(ve)) {
+ int ret;
+ ret = ve_show_loadavg(ve, m);
+ if (ret != -ENOSYS)
+ return ret;
+ }
get_avenrun(avnrun, FIXED_1/200, 0);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 19a590bc86d4..95dcd99267df 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -150,4 +150,12 @@ static inline void monotonic_ve_to_abs(clockid_t which_clock,
#endif /* CONFIG_VE */
+struct seq_file;
+
+#if defined(CONFIG_VE) && defined(CONFIG_CGROUP_SCHED)
+int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p);
+#else
+static inline int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p) { return -ENOSYS; }
+#endif
+
#endif /* _LINUX_VE_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 62a31f1b9cc9..4c5eb09b4888 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -72,6 +72,8 @@ __read_mostly int sysctl_resched_latency_warn_ms = 100;
__read_mostly int sysctl_resched_latency_warn_once = 1;
#endif /* CONFIG_SCHED_DEBUG */
+#include "../cgroup/cgroup-internal.h" /* For cgroup_task_count() */
+
/*
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
@@ -10538,6 +10540,44 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
}
#endif /* CONFIG_RT_GROUP_SCHED */
+int cpu_cgroup_proc_loadavg(struct cgroup_subsys_state *css,
+ struct seq_file *p)
+{
+ struct cgroup *cgrp = css->cgroup;
+ struct task_group *tg = css_tg(css);
+ unsigned long avnrun[3];
+ int nr_running = 0;
+ int i;
+
+ avnrun[0] = tg->avenrun[0] + FIXED_1/200;
+ avnrun[1] = tg->avenrun[1] + FIXED_1/200;
+ avnrun[2] = tg->avenrun[2] + FIXED_1/200;
+
+ for_each_possible_cpu(i) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ nr_running += tg->cfs_rq[i]->h_nr_running;
+ /*
+ * We do not export nr_unint to parent task groups
+ * like we do for h_nr_running, as it gives additional
+ * overhead for activate/deactivate operations. So, we
+ * don't account child cgroup unint tasks here.
+ */
+ nr_running += tg->cfs_rq[i]->nr_unint;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+ nr_running += tg->rt_rq[i]->rt_nr_running;
+#endif
+ }
+
+ seq_printf(p, "%lu.%02lu %lu.%02lu %lu.%02lu %d/%d %d\n",
+ LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+ LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+ LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
+ nr_running, cgroup_task_count(cgrp),
+ idr_get_cursor(&task_active_pid_ns(current)->idr));
+ return 0;
+}
+
static struct cftype cpu_legacy_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
{
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index f3df12f8638b..178aa658b50b 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -1095,3 +1095,19 @@ static int __init ve_subsys_init(void)
return 0;
}
late_initcall(ve_subsys_init);
+
+#ifdef CONFIG_CGROUP_SCHED
+int cpu_cgroup_proc_loadavg(struct cgroup_subsys_state *css,
+ struct seq_file *p);
+
+int ve_show_loadavg(struct ve_struct *ve, struct seq_file *p)
+{
+ struct cgroup_subsys_state *css;
+ int err;
+
+ css = ve_get_init_css(ve, cpu_cgrp_id);
+ err = cpu_cgroup_proc_loadavg(css, p);
+ css_put(css);
+ return err;
+}
+#endif /* CONFIG_CGROUP_SCHED */
More information about the Devel
mailing list