[Devel] [PATCH RHEL COMMIT] sched: Add cpulimit cgroup interfaces
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Sep 24 14:49:33 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 4b54146a90dcbae0f09d5904a597675e38735a61
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date: Fri Sep 24 14:49:33 2021 +0300
sched: Add cpulimit cgroup interfaces
Add CONFIG_CPULIMIT cpu cgroup files.
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
https://jira.sw.ru/browse/PSBM-133986
See
f4183717b ("sched/fair: Introduce the burstable CFS controller")
(cherry picked from commit 0e0e0bfbf884f8fb0347e8cb6ed27aa2bf991c91)
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
---
kernel/sched/core.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 153 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2159edeb0e02..a32a7626f458 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9517,6 +9517,8 @@ struct task_group *sched_create_group(struct task_group *parent)
return ERR_PTR(-ENOMEM);
}
+static void tg_update_topmost_limited_ancestor(struct task_group *tg);
+
void sched_online_group(struct task_group *tg, struct task_group *parent)
{
unsigned long flags;
@@ -9530,6 +9532,9 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
tg->parent = parent;
INIT_LIST_HEAD(&tg->children);
list_add_rcu(&tg->siblings, &parent->children);
+#ifdef CONFIG_CFS_BANDWIDTH
+ tg_update_topmost_limited_ancestor(tg);
+#endif
spin_unlock_irqrestore(&task_group_lock, flags);
online_fair_sched_group(tg);
@@ -9968,6 +9973,7 @@ static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
+static void tg_limit_toggled(struct task_group *tg);
static int __tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
u64 burst)
@@ -10046,6 +10052,8 @@ static int __tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
unthrottle_cfs_rq(cfs_rq);
rq_unlock_irq(rq, &rf);
}
+ if (runtime_enabled != runtime_was_enabled)
+ tg_limit_toggled(tg);
if (runtime_was_enabled && !runtime_enabled)
cfs_bandwidth_usage_dec();
out:
@@ -10053,6 +10061,8 @@ static int __tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
return ret;
}
+static void tg_update_cpu_limit(struct task_group *tg);
+
static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
u64 burst)
{
@@ -10061,6 +10071,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
get_online_cpus();
mutex_lock(&cfs_constraints_mutex);
ret = __tg_set_cfs_bandwidth(tg, period, quota, burst);
+ tg_update_cpu_limit(tg);
mutex_unlock(&cfs_constraints_mutex);
put_online_cpus();
@@ -10284,6 +10295,136 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
return 0;
}
+
+#ifdef CONFIG_CFS_CPULIMIT
+static int __tg_update_topmost_limited_ancestor(struct task_group *tg, void *unused)
+{
+ struct task_group *parent = tg->parent;
+
+ /*
+ * Parent and none of its uncestors is limited? The task group should
+ * become a topmost limited uncestor then, provided it has a limit set.
+ * Otherwise inherit topmost limited ancestor from the parent.
+ */
+ if (parent->topmost_limited_ancestor == parent &&
+ parent->cfs_bandwidth.quota == RUNTIME_INF)
+ tg->topmost_limited_ancestor = tg;
+ else
+ tg->topmost_limited_ancestor = parent->topmost_limited_ancestor;
+ return 0;
+}
+
+static void tg_update_topmost_limited_ancestor(struct task_group *tg)
+{
+ __tg_update_topmost_limited_ancestor(tg, NULL);
+}
+
+static void tg_limit_toggled(struct task_group *tg)
+{
+ if (tg->topmost_limited_ancestor != tg) {
+ /*
+ * This task group is not a topmost limited ancestor, so both
+ * it and all its children must already point to their topmost
+ * limited ancestor, and we have nothing to do.
+ */
+ return;
+ }
+
+ /*
+ * This task group is a topmost limited ancestor. Walk over all its
+ * children and update their pointers to the topmost limited ancestor.
+ */
+
+ spin_lock_irq(&task_group_lock);
+ walk_tg_tree_from(tg, __tg_update_topmost_limited_ancestor, tg_nop, NULL);
+ spin_unlock_irq(&task_group_lock);
+}
+
+static void tg_update_cpu_limit(struct task_group *tg)
+{
+ long quota, period;
+ unsigned long rate = 0;
+
+ quota = tg_get_cfs_quota(tg);
+ period = tg_get_cfs_period(tg);
+
+ if (quota > 0 && period > 0) {
+ rate = quota * MAX_CPU_RATE / period;
+ rate = max(rate, 1UL);
+ }
+
+ tg->cpu_rate = rate;
+ tg->nr_cpus = 0;
+}
+
+static int tg_set_cpu_limit(struct task_group *tg,
+ unsigned long cpu_rate, unsigned int nr_cpus)
+{
+ int ret;
+ unsigned long rate;
+ u64 quota = RUNTIME_INF;
+ u64 burst = tg_get_cfs_burst(tg);
+ u64 period = default_cfs_period();
+
+ rate = (cpu_rate && nr_cpus) ?
+ min_t(unsigned long, cpu_rate, nr_cpus * MAX_CPU_RATE) :
+ max_t(unsigned long, cpu_rate, nr_cpus * MAX_CPU_RATE);
+ if (rate) {
+ quota = div_u64(period * rate, MAX_CPU_RATE);
+ quota = max(quota, min_cfs_quota_period);
+ }
+
+ mutex_lock(&cfs_constraints_mutex);
+ ret = __tg_set_cfs_bandwidth(tg, period, quota, burst);
+ if (!ret) {
+ tg->cpu_rate = cpu_rate;
+ tg->nr_cpus = nr_cpus;
+ }
+ mutex_unlock(&cfs_constraints_mutex);
+
+ return ret;
+}
+
+static u64 cpu_rate_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return css_tg(css)->cpu_rate;
+}
+
+static int cpu_rate_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 rate)
+{
+ struct task_group *tg = css_tg(css);
+
+ if (rate > num_online_cpus() * MAX_CPU_RATE)
+ rate = num_online_cpus() * MAX_CPU_RATE;
+ return tg_set_cpu_limit(tg, rate, tg->nr_cpus);
+}
+
+static u64 nr_cpus_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return css_tg(css)->nr_cpus;
+}
+
+static int nr_cpus_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 nr_cpus)
+{
+ struct task_group *tg = css_tg(css);
+
+ if (nr_cpus > num_online_cpus())
+ nr_cpus = num_online_cpus();
+ return tg_set_cpu_limit(tg, tg->cpu_rate, nr_cpus);
+}
+#else
+static void tg_update_topmost_limited_ancestor(struct task_group *tg)
+{
+}
+static void tg_limit_toggled(struct task_group *tg)
+{
+}
+static void tg_update_cpu_limit(struct task_group *tg)
+{
+}
+#endif /* CONFIG_CFS_CPULIMIT */
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -10342,6 +10483,18 @@ static struct cftype cpu_legacy_files[] = {
.seq_show = cpu_cfs_stat_show,
},
#endif
+#ifdef CONFIG_CFS_CPULIMIT
+ {
+ .name = "rate",
+ .read_u64 = cpu_rate_read_u64,
+ .write_u64 = cpu_rate_write_u64,
+ },
+ {
+ .name = "nr_cpus",
+ .read_u64 = nr_cpus_read_u64,
+ .write_u64 = nr_cpus_write_u64,
+ },
+#endif
#ifdef CONFIG_RT_GROUP_SCHED
{
.name = "rt_runtime_us",
More information about the Devel
mailing list