[Devel] [PATCH RHEL COMMIT] sched: Add cpulimit cgroup interfaces

Konstantin Khorenko khorenko at virtuozzo.com
Fri Sep 24 14:49:33 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 4b54146a90dcbae0f09d5904a597675e38735a61
Author: Kirill Tkhai <ktkhai at virtuozzo.com>
Date:   Fri Sep 24 14:49:33 2021 +0300

    sched: Add cpulimit cgroup interfaces
    
    Add CONFIG_CPULIMIT cpu cgroup files.
    
    Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
    
    https://jira.sw.ru/browse/PSBM-133986
    
    See
    f4183717b ("sched/fair: Introduce the burstable CFS controller")
    
    (cherry picked from commit 0e0e0bfbf884f8fb0347e8cb6ed27aa2bf991c91)
    Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
---
 kernel/sched/core.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2159edeb0e02..a32a7626f458 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9517,6 +9517,8 @@ struct task_group *sched_create_group(struct task_group *parent)
 	return ERR_PTR(-ENOMEM);
 }
 
+static void tg_update_topmost_limited_ancestor(struct task_group *tg);
+
 void sched_online_group(struct task_group *tg, struct task_group *parent)
 {
 	unsigned long flags;
@@ -9530,6 +9532,9 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
 	tg->parent = parent;
 	INIT_LIST_HEAD(&tg->children);
 	list_add_rcu(&tg->siblings, &parent->children);
+#ifdef CONFIG_CFS_BANDWIDTH
+	tg_update_topmost_limited_ancestor(tg);
+#endif
 	spin_unlock_irqrestore(&task_group_lock, flags);
 
 	online_fair_sched_group(tg);
@@ -9968,6 +9973,7 @@ static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
 static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
+static void tg_limit_toggled(struct task_group *tg);
 
 static int __tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 				u64 burst)
@@ -10046,6 +10052,8 @@ static int __tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 			unthrottle_cfs_rq(cfs_rq);
 		rq_unlock_irq(rq, &rf);
 	}
+	if (runtime_enabled != runtime_was_enabled)
+		tg_limit_toggled(tg);
 	if (runtime_was_enabled && !runtime_enabled)
 		cfs_bandwidth_usage_dec();
 out:
@@ -10053,6 +10061,8 @@ static int __tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 	return ret;
 }
 
+static void tg_update_cpu_limit(struct task_group *tg);
+
 static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 				u64 burst)
 {
@@ -10061,6 +10071,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 	get_online_cpus();
 	mutex_lock(&cfs_constraints_mutex);
 	ret = __tg_set_cfs_bandwidth(tg, period, quota, burst);
+	tg_update_cpu_limit(tg);
 	mutex_unlock(&cfs_constraints_mutex);
 	put_online_cpus();
 
@@ -10284,6 +10295,136 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
 
 	return 0;
 }
+
+#ifdef CONFIG_CFS_CPULIMIT
+static int __tg_update_topmost_limited_ancestor(struct task_group *tg, void *unused)
+{
+	struct task_group *parent = tg->parent;
+
+	/*
+	 * Parent and none of its uncestors is limited? The task group should
+	 * become a topmost limited uncestor then, provided it has a limit set.
+	 * Otherwise inherit topmost limited ancestor from the parent.
+	 */
+	if (parent->topmost_limited_ancestor == parent &&
+	    parent->cfs_bandwidth.quota == RUNTIME_INF)
+		tg->topmost_limited_ancestor = tg;
+	else
+		tg->topmost_limited_ancestor = parent->topmost_limited_ancestor;
+	return 0;
+}
+
+static void tg_update_topmost_limited_ancestor(struct task_group *tg)
+{
+	__tg_update_topmost_limited_ancestor(tg, NULL);
+}
+
+static void tg_limit_toggled(struct task_group *tg)
+{
+	if (tg->topmost_limited_ancestor != tg) {
+		/*
+		 * This task group is not a topmost limited ancestor, so both
+		 * it and all its children must already point to their topmost
+		 * limited ancestor, and we have nothing to do.
+		 */
+		return;
+	}
+
+	/*
+	 * This task group is a topmost limited ancestor. Walk over all its
+	 * children and update their pointers to the topmost limited ancestor.
+	 */
+
+	spin_lock_irq(&task_group_lock);
+	walk_tg_tree_from(tg, __tg_update_topmost_limited_ancestor, tg_nop, NULL);
+	spin_unlock_irq(&task_group_lock);
+}
+
+static void tg_update_cpu_limit(struct task_group *tg)
+{
+	long quota, period;
+	unsigned long rate = 0;
+
+	quota = tg_get_cfs_quota(tg);
+	period = tg_get_cfs_period(tg);
+
+	if (quota > 0 && period > 0) {
+		rate = quota * MAX_CPU_RATE / period;
+		rate = max(rate, 1UL);
+	}
+
+	tg->cpu_rate = rate;
+	tg->nr_cpus = 0;
+}
+
+static int tg_set_cpu_limit(struct task_group *tg,
+			    unsigned long cpu_rate, unsigned int nr_cpus)
+{
+	int ret;
+	unsigned long rate;
+	u64 quota = RUNTIME_INF;
+	u64 burst = tg_get_cfs_burst(tg);
+	u64 period = default_cfs_period();
+
+	rate = (cpu_rate && nr_cpus) ?
+		min_t(unsigned long, cpu_rate, nr_cpus * MAX_CPU_RATE) :
+		max_t(unsigned long, cpu_rate, nr_cpus * MAX_CPU_RATE);
+	if (rate) {
+		quota = div_u64(period * rate, MAX_CPU_RATE);
+		quota = max(quota, min_cfs_quota_period);
+	}
+
+	mutex_lock(&cfs_constraints_mutex);
+	ret = __tg_set_cfs_bandwidth(tg, period, quota, burst);
+	if (!ret) {
+		tg->cpu_rate = cpu_rate;
+		tg->nr_cpus = nr_cpus;
+	}
+	mutex_unlock(&cfs_constraints_mutex);
+
+	return ret;
+}
+
+static u64 cpu_rate_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return css_tg(css)->cpu_rate;
+}
+
+static int cpu_rate_write_u64(struct cgroup_subsys_state *css,
+			      struct cftype *cftype, u64 rate)
+{
+	struct task_group *tg = css_tg(css);
+
+	if (rate > num_online_cpus() * MAX_CPU_RATE)
+		rate = num_online_cpus() * MAX_CPU_RATE;
+	return tg_set_cpu_limit(tg, rate, tg->nr_cpus);
+}
+
+static u64 nr_cpus_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return css_tg(css)->nr_cpus;
+}
+
+static int nr_cpus_write_u64(struct cgroup_subsys_state *css,
+			     struct cftype *cftype, u64 nr_cpus)
+{
+	struct task_group *tg = css_tg(css);
+
+	if (nr_cpus > num_online_cpus())
+		nr_cpus = num_online_cpus();
+	return tg_set_cpu_limit(tg, tg->cpu_rate, nr_cpus);
+}
+#else
+static void tg_update_topmost_limited_ancestor(struct task_group *tg)
+{
+}
+static void tg_limit_toggled(struct task_group *tg)
+{
+}
+static void tg_update_cpu_limit(struct task_group *tg)
+{
+}
+#endif /* CONFIG_CFS_CPULIMIT */
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -10342,6 +10483,18 @@ static struct cftype cpu_legacy_files[] = {
 		.seq_show = cpu_cfs_stat_show,
 	},
 #endif
+#ifdef CONFIG_CFS_CPULIMIT
+	{
+		.name = "rate",
+		.read_u64 = cpu_rate_read_u64,
+		.write_u64 = cpu_rate_write_u64,
+	},
+	{
+		.name = "nr_cpus",
+		.read_u64 = nr_cpus_read_u64,
+		.write_u64 = nr_cpus_write_u64,
+	},
+#endif
 #ifdef CONFIG_RT_GROUP_SCHED
 	{
 		.name = "rt_runtime_us",


More information about the Devel mailing list