[Devel] [PATCH RHEL7 COMMIT] ve/sched: take nr_cpus and cpu_rate from ve root task group
Konstantin Khorenko
khorenko at virtuozzo.com
Tue Nov 7 12:06:21 MSK 2017
The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.1.1.vz7.37.21
------>
commit e661261a0f8af475ae0dd7980bd73555ff7724a1
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date: Tue Nov 7 12:06:21 2017 +0300
ve/sched: take nr_cpus and cpu_rate from ve root task group
Patchset description:
ve: properly handle nr_cpus and cpu_rate for nested cgroups
https://jira.sw.ru/browse/PSBM-69678
Pavel Tikhomirov (3):
cgroup: remove rcu_read_lock from cgroup_get_ve_root
cgroup: make cgroup_get_ve_root visible in kernel/sched/core.c
sched: take nr_cpus and cpu_rate from ve root task group
=============================================================
This patch description:
Cpu view in container should depend only from root cpu cgroup
nr_cpus/rate configuration. So replace tg->xxx references by
tg_xxx(tg) helpers to get xxx from root ve cgroup. We still
allow set/read rate and nr_cpus directly in nested cgroups,
but they are just converted to corresponding cfs_period and
cfs_quota setup, and does _not_ influence in container view
of cpus and their stats.
Also remove excessive rcu_read_lock/unlock as we have no rcu
dereference in between, looks like some leftover for task_group()
which differs in VZ6 and VZ7.
https://jira.sw.ru/browse/PSBM-69678
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/sched.h | 2 ++
kernel/sched/core.c | 56 +++++++++++++++++++++++++++++++++++++++++----------
kernel/sched/fair.c | 9 +++++----
3 files changed, 52 insertions(+), 15 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 84fe6cd..03c06ff6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3182,6 +3182,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
+extern unsigned int tg_cpu_rate(struct task_group *tg);
+extern unsigned int tg_nr_cpus(struct task_group *tg);
#ifdef CONFIG_CFS_CPULIMIT
extern unsigned int task_nr_cpus(struct task_struct *p);
extern unsigned int task_vcpu_id(struct task_struct *p);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7a40fa8..5b3daa1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -340,15 +340,40 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;
+static inline struct task_group *cgroup_tg(struct cgroup *cgrp);
+
+static struct task_group *ve_root_tg(struct task_group *tg) {
+ struct cgroup *cg;
+
+ if (!tg)
+ return NULL;
+
+ cg = cgroup_get_ve_root(tg->css.cgroup);
+ WARN_ONCE(!cg, "Failed to find ve root cgroup, possible container configuration problem.\n");
+ return cg ? cgroup_tg(cg) : NULL;
+}
+
+unsigned int tg_cpu_rate(struct task_group *tg)
+{
+ unsigned int cpu_rate = 0;
#ifdef CONFIG_CFS_CPULIMIT
-unsigned int task_nr_cpus(struct task_struct *p)
+ tg = ve_root_tg(tg);
+ if (tg)
+ cpu_rate = tg->cpu_rate;
+#endif
+ return cpu_rate;
+}
+
+unsigned int tg_nr_cpus(struct task_group *tg)
{
unsigned int nr_cpus = 0;
unsigned int max_nr_cpus = num_online_cpus();
- rcu_read_lock();
- nr_cpus = task_group(p)->nr_cpus;
- rcu_read_unlock();
+#ifdef CONFIG_CFS_CPULIMIT
+ tg = ve_root_tg(tg);
+ if (tg)
+ nr_cpus = tg->nr_cpus;
+#endif
if (!nr_cpus || nr_cpus > max_nr_cpus)
nr_cpus = max_nr_cpus;
@@ -356,6 +381,17 @@ unsigned int task_nr_cpus(struct task_struct *p)
return nr_cpus;
}
+#ifdef CONFIG_CFS_CPULIMIT
+unsigned int task_nr_cpus(struct task_struct *p)
+{
+ return tg_nr_cpus(task_group(p));
+}
+
+static unsigned int task_cpu_rate(struct task_struct *p)
+{
+ return tg_cpu_rate(task_group(p));
+}
+
unsigned int task_vcpu_id(struct task_struct *p)
{
return task_cpu(p) % task_nr_cpus(p);
@@ -370,9 +406,7 @@ unsigned int sched_cpulimit_scale_cpufreq(unsigned int freq)
if (!sysctl_sched_cpulimit_scale_cpufreq)
return freq;
- rcu_read_lock();
- rate = task_group(current)->cpu_rate;
- rcu_read_unlock();
+ rate = task_cpu_rate(current);
max_rate = num_online_vcpus() * MAX_CPU_RATE;
if (!rate || rate >= max_rate)
@@ -9919,8 +9953,8 @@ static void cpu_cgroup_update_vcpustat(struct cgroup *cgrp)
spin_lock(&tg->vcpustat_lock);
now = ktime_get();
- nr_vcpus = tg->nr_cpus ?: num_online_cpus();
- vcpu_rate = DIV_ROUND_UP(tg->cpu_rate, nr_vcpus);
+ nr_vcpus = tg_nr_cpus(tg);
+ vcpu_rate = DIV_ROUND_UP(tg_cpu_rate(tg), nr_vcpus);
if (!vcpu_rate || vcpu_rate > MAX_CPU_RATE)
vcpu_rate = MAX_CPU_RATE;
@@ -10005,7 +10039,7 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft,
struct timespec boottime;
struct task_group *tg = cgroup_tg(cgrp);
bool virt = !ve_is_super(get_exec_env()) && tg != &root_task_group;
- int nr_vcpus = tg->nr_cpus ?: num_online_cpus();
+ int nr_vcpus = tg_nr_cpus(tg);
struct kernel_cpustat *kcpustat;
unsigned long tg_nr_running = 0;
unsigned long tg_nr_iowait = 0;
@@ -10132,7 +10166,7 @@ int cpu_cgroup_proc_loadavg(struct cgroup *cgrp, struct cftype *cft,
int cpu_cgroup_get_stat(struct cgroup *cgrp, struct kernel_cpustat *kstat)
{
struct task_group *tg = cgroup_tg(cgrp);
- int nr_vcpus = tg->nr_cpus ?: num_online_cpus();
+ int nr_vcpus = tg_nr_cpus(tg);
int i;
kernel_cpustat_zero(kstat);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c677e93..5697778 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -524,11 +524,12 @@ static enum hrtimer_restart sched_cfs_active_timer(struct hrtimer *timer)
static inline int check_cpulimit_spread(struct task_group *tg, int target_cpu)
{
int nr_cpus_active = atomic_read(&tg->nr_cpus_active);
- int nr_cpus_limit = DIV_ROUND_UP(tg->cpu_rate, MAX_CPU_RATE);
+ int nr_cpus_limit = DIV_ROUND_UP(tg_cpu_rate(tg), MAX_CPU_RATE);
+ int nr_vcpus = tg_nr_cpus(tg);
- nr_cpus_limit = nr_cpus_limit && tg->nr_cpus ?
- min_t(int, nr_cpus_limit, tg->nr_cpus) :
- max_t(int, nr_cpus_limit, tg->nr_cpus);
+ nr_cpus_limit = nr_cpus_limit && nr_vcpus ?
+ min_t(int, nr_cpus_limit, nr_vcpus) :
+ max_t(int, nr_cpus_limit, nr_vcpus);
if (!nr_cpus_limit || nr_cpus_active < nr_cpus_limit)
return 1;
More information about the Devel
mailing list