[Devel] [PATCH RHEL7 COMMIT] sched: Revert "SCHED: rework cputime accounting (v2)"
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jun 4 05:58:28 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.9
------>
commit ed523ec92064c7b792fcbfea3a01cf9f1e80dd63
Author: Vladimir Davydov <vdavydov at parallels.com>
Date: Thu Jun 4 16:58:28 2015 +0400
sched: Revert "SCHED: rework cputime accounting (v2)"
This reverts commit 6071473d0440fcfd128f3243dfb82d19f6aef668.
The above-mentioned commit dramatically complicates porting of cpu acct
patches from RH6, so revert it. The next patch will fix cpu accounting
once again.
Related to https://jira.sw.ru/browse/PSBM-33642
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Conflicts:
kernel/sched/core.c
---
drivers/iommu/amd_iommu.c | 4 +-
include/linux/fairsched.h | 3 +-
include/linux/kernel_stat.h | 1 +
include/linux/sched.h | 3 --
kernel/sched/core.c | 92 +++++++++++++++++++++++++--------------------
kernel/sched/cputime.c | 15 --------
kernel/sched/sched.h | 1 +
7 files changed, 57 insertions(+), 62 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c1eefe2..6dc6594 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4227,7 +4227,7 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask,
return 0;
}
-static int amd_iommu_free_irq(int irq)
+static int free_irq(int irq)
{
struct irq_2_irte *irte_info;
struct irq_cfg *cfg;
@@ -4352,7 +4352,7 @@ struct irq_remap_ops amd_iommu_irq_ops = {
.enable_faulting = amd_iommu_enable_faulting,
.setup_ioapic_entry = setup_ioapic_entry,
.set_affinity = set_affinity,
- .free_irq = amd_iommu_free_irq,
+ .free_irq = free_irq,
.compose_msi_msg = compose_msi_msg,
.msi_alloc_irq = msi_alloc_irq,
.msi_setup_irq = msi_setup_irq,
diff --git a/include/linux/fairsched.h b/include/linux/fairsched.h
index 12bbc5b..e242c0d 100644
--- a/include/linux/fairsched.h
+++ b/include/linux/fairsched.h
@@ -18,6 +18,8 @@
#ifdef __KERNEL__
+struct kernel_cpustat;
+
#ifdef CONFIG_VZ_FAIRSCHED
#define FSCHWEIGHT_MAX ((1 << 16) - 1)
@@ -79,7 +81,6 @@ static inline int fairsched_get_cpu_stat(const char *name, struct kernel_cpustat
#endif /* CONFIG_VZ_FAIRSCHED */
-struct kernel_cpustat;
void cpu_cgroup_get_stat(struct cgroup *cgrp, struct kernel_cpustat *kstat);
#endif /* __KERNEL__ */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a63a497..d105ab3 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -6,6 +6,7 @@
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/interrupt.h>
+#include <linux/sched.h>
#include <linux/vtime.h>
#include <asm/irq.h>
#include <asm/cputime.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e62dc2b..f4a5e3d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -53,7 +53,6 @@ struct sched_param {
#include <linux/uidgid.h>
#include <linux/gfp.h>
#include <linux/ve_proto.h>
-#include <linux/kernel_stat.h>
#include <asm/processor.h>
@@ -976,8 +975,6 @@ struct sched_avg {
#ifdef CONFIG_SCHEDSTATS
struct sched_statistics {
- u64 cpustat[NR_STATS];
-
u64 wait_start;
u64 wait_max;
u64 wait_count;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0e8c921..50273af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7394,6 +7394,7 @@ void __init sched_init(void)
#endif /* CONFIG_CPUMASK_OFFSTACK */
}
+ root_task_group.cpustat = alloc_percpu(struct kernel_cpustat);
root_task_group.taskstats = alloc_percpu(struct taskstats);
#ifdef CONFIG_SMP
@@ -7694,6 +7695,7 @@ static void free_sched_group(struct task_group *tg)
free_fair_sched_group(tg);
free_rt_sched_group(tg);
autogroup_free(tg);
+ free_percpu(tg->cpustat);
free_percpu(tg->taskstats);
kfree(tg);
}
@@ -7713,6 +7715,10 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent))
goto err;
+ tg->cpustat = alloc_percpu(struct kernel_cpustat);
+ if (!tg->cpustat)
+ goto err;
+
tg->taskstats = alloc_percpu(struct taskstats);
if (!tg->taskstats)
goto err;
@@ -8661,16 +8667,34 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft)
}
#endif /* CONFIG_RT_GROUP_SCHED */
-static void __task_group_get_cpu_stat(struct task_group *tg, int cpu,
- struct kernel_cpustat *kcpustat)
+static u64 cpu_cgroup_usage_cpu(struct task_group *tg, int i)
+{
+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SCHEDSTATS)
+ /* root_task_group has not sched entities */
+ if (tg == &root_task_group)
+ return cpu_rq(i)->rq_cpu_time;
+
+ return tg->se[i]->sum_exec_runtime;
+#else
+ return 0;
+#endif
+}
+
+static void cpu_cgroup_update_stat(struct task_group *tg, int i)
{
#if defined(CONFIG_SCHEDSTATS) && defined(CONFIG_FAIR_GROUP_SCHED)
- struct sched_entity *se = tg->se[cpu];
- u64 now = cpu_clock(cpu);
+ struct sched_entity *se = tg->se[i];
+ struct kernel_cpustat *kcpustat = per_cpu_ptr(tg->cpustat, i);
+ u64 now = cpu_clock(i);
u64 delta, idle, iowait;
+ /* root_task_group has not sched entities */
+ if (tg == &root_task_group)
+ return;
+
iowait = se->statistics.iowait_sum;
idle = se->statistics.sum_sleep_runtime;
+ kcpustat->cpustat[CPUTIME_STEAL] = se->statistics.wait_sum;
if (idle > iowait)
idle -= iowait;
@@ -8691,28 +8715,13 @@ static void __task_group_get_cpu_stat(struct task_group *tg, int cpu,
kcpustat->cpustat[CPUTIME_STEAL] += delta;
}
- kcpustat->cpustat[CPUTIME_USER] = se->statistics.cpustat[CPUTIME_USER];
- kcpustat->cpustat[CPUTIME_NICE] = se->statistics.cpustat[CPUTIME_NICE];
- kcpustat->cpustat[CPUTIME_SYSTEM] =
- se->statistics.cpustat[CPUTIME_SYSTEM];
kcpustat->cpustat[CPUTIME_IDLE] =
max(kcpustat->cpustat[CPUTIME_IDLE], idle);
kcpustat->cpustat[CPUTIME_IOWAIT] =
max(kcpustat->cpustat[CPUTIME_IOWAIT], iowait);
- kcpustat->cpustat[CPUTIME_STEAL] = se->statistics.wait_sum;
- kcpustat->cpustat[CPUTIME_USED] = se->sum_exec_runtime;
-#endif
-}
-static void task_group_get_cpu_stat(struct task_group *tg, int cpu,
- struct kernel_cpustat *kcpustat)
-{
- if (tg == &root_task_group) {
- memcpy(kcpustat, &kcpustat_cpu(cpu), sizeof(*kcpustat));
- return;
- }
- memset(kcpustat, 0, sizeof(*kcpustat));
- __task_group_get_cpu_stat(tg, cpu, kcpustat);
+ kcpustat->cpustat[CPUTIME_USED] = cpu_cgroup_usage_cpu(tg, i);
+#endif
}
int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft,
@@ -8724,7 +8733,7 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft,
u64 user, nice, system, idle, iowait, steal;
struct timespec boottime;
struct task_group *tg = cgroup_tg(cgrp);
- struct kernel_cpustat st;
+ struct kernel_cpustat *kcpustat;
unsigned long tg_nr_running = 0;
unsigned long tg_nr_iowait = 0;
unsigned long long tg_nr_switches = 0;
@@ -8736,14 +8745,16 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft,
user = nice = system = idle = iowait = steal = 0;
for_each_possible_cpu(i) {
- task_group_get_cpu_stat(tg, i, &st);
+ kcpustat = per_cpu_ptr(tg->cpustat, i);
- user += st.cpustat[CPUTIME_USER];
- nice += st.cpustat[CPUTIME_NICE];
- system += st.cpustat[CPUTIME_SYSTEM];
- idle += st.cpustat[CPUTIME_IDLE];
- iowait += st.cpustat[CPUTIME_IOWAIT];
- steal += st.cpustat[CPUTIME_STEAL];
+ cpu_cgroup_update_stat(tg, i);
+
+ user += kcpustat->cpustat[CPUTIME_USER];
+ nice += kcpustat->cpustat[CPUTIME_NICE];
+ system += kcpustat->cpustat[CPUTIME_SYSTEM];
+ idle += kcpustat->cpustat[CPUTIME_IDLE];
+ iowait += kcpustat->cpustat[CPUTIME_IOWAIT];
+ steal += kcpustat->cpustat[CPUTIME_STEAL];
/* root task group has autogrouping, so this doesn't hold */
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8770,15 +8781,14 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft,
for_each_online_cpu(j) {
if (j % nr_ve_vcpus != i)
continue;
-
- task_group_get_cpu_stat(tg, i, &st);
-
- user += st.cpustat[CPUTIME_USER];
- nice += st.cpustat[CPUTIME_NICE];
- system += st.cpustat[CPUTIME_SYSTEM];
- idle += st.cpustat[CPUTIME_IDLE];
- iowait += st.cpustat[CPUTIME_IOWAIT];
- steal += st.cpustat[CPUTIME_STEAL];
+ kcpustat = per_cpu_ptr(tg->cpustat, j);
+
+ user += kcpustat->cpustat[CPUTIME_USER];
+ nice += kcpustat->cpustat[CPUTIME_NICE];
+ system += kcpustat->cpustat[CPUTIME_SYSTEM];
+ idle += kcpustat->cpustat[CPUTIME_IDLE];
+ iowait += kcpustat->cpustat[CPUTIME_IOWAIT];
+ steal += kcpustat->cpustat[CPUTIME_STEAL];
}
seq_printf(p,
"cpu%d %llu %llu %llu %llu %llu 0 0 %llu\n",
@@ -8845,12 +8855,12 @@ void cpu_cgroup_get_stat(struct cgroup *cgrp, struct kernel_cpustat *kstat)
memset(kstat, 0, sizeof(struct kernel_cpustat));
for_each_possible_cpu(i) {
- struct kernel_cpustat st;
+ struct kernel_cpustat *st = per_cpu_ptr(tg->cpustat, i);
- task_group_get_cpu_stat(tg, i, &st);
+ cpu_cgroup_update_stat(tg, i);
for (j = 0; j < NR_STATS; j++)
- kstat->cpustat[j] += st.cpustat[j];
+ kstat->cpustat[j] += st->cpustat[j];
}
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3ad9fa8..ba4bfc0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -112,19 +112,6 @@ static int irqtime_account_si_update(void)
#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
-static inline void update_stats_account_cpu_time(struct task_struct *p,
- int index, u64 tmp)
-{
-#if defined(CONFIG_SCHEDSTATS) && defined(CONFIG_FAIR_GROUP_SCHED)
- struct sched_entity *se = &p->se;
-
- do {
- se->statistics.cpustat[index] += tmp;
- se = se->parent;
- } while (se);
-#endif
-}
-
static inline void task_group_account_field(struct task_struct *p, int index,
u64 tmp)
{
@@ -136,8 +123,6 @@ static inline void task_group_account_field(struct task_struct *p, int index,
*/
__get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
- update_stats_account_cpu_time(p, index, tmp);
-
cpuacct_account_field(p, index, tmp);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e4f92a5..e0c03d8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -167,6 +167,7 @@ struct task_group {
struct autogroup *autogroup;
#endif
+ struct kernel_cpustat __percpu *cpustat;
struct taskstats __percpu *taskstats;
unsigned long avenrun[3]; /* loadavg data */
struct timespec start_time;
More information about the Devel
mailing list