[Devel] [PATCH RHEL7 COMMIT] sched: Port diff-fairsched-cpuset-add-fake-cpuset-for-containers
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jun 4 06:14:09 PDT 2015
The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.9
------>
commit 66ae81139f560aa67e1c14e10acb3d22f301e01b
Author: Vladimir Davydov <vdavydov at parallels.com>
Date: Thu Jun 4 17:14:09 2015 +0400
sched: Port diff-fairsched-cpuset-add-fake-cpuset-for-containers
Author: Pavel Tikhomirov
Email: ptikhomirov at parallels.com
Subject: cpuset: add fake cpuset for containers
Date: Tue, 27 Jan 2015 15:40:12 +0300
If container want to write/read cpumask or nodemask of cpuset through
cgroupfs for incontainer cgroup, fake it - add special ve_* fields
to cpuset structure and operate with them. We don't want to validate
change as it is just fake, so allow any.
For flags, relax_domain_level, mem_migration_pending do not
allow access from container.
for docker integration-cli test: TestRunWithCpuset
https://jira.sw.ru/browse/PSBM-30878
v2: add for mems, cpus_allowed, mems_allowed; simplify checks in
update_cpumask/update_nodemask, no excessive code in alloc_trial_cpuset
and change naming for masks
v3: do not take the callback_mutex for printing ve_cpus_allowed,
do not permit r/w to cpuset_cpus_allowed, cpuset_mems_allowed,
add ve_flags and ve_relax_domain_level.
v4: leave only ve_cpus/mems_allowed, others are not faked,
block access to others from CT
v5: cleanup code
Signed-off-by: Pavel Tikhomirov <ptikhomirov at parallels.com>
Reviewed-by: Vladimir Davydov <vdavydov at parallels.com>
=============================================================================
Related to https://jira.sw.ru/browse/PSBM-33642
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
kernel/cpuset.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3c4355e..ef08c19 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -87,6 +87,9 @@ struct cpuset {
cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
+ cpumask_var_t ve_cpus_allowed;
+ nodemask_t ve_mems_allowed;
+
struct fmeter fmeter; /* memory_pressure filter */
/*
@@ -866,6 +869,15 @@ static int __update_cpumask(struct cpuset *cs,
if (cs == &top_cpuset)
return -EACCES;
+ /*
+ * If we are in CT use fake cpu mask
+ * can set and read, but no effect
+ */
+ if (!ve_is_super(get_exec_env())) {
+ cpumask_copy(cs->ve_cpus_allowed, cpus_allowed);
+ return 0;
+ }
+
if (!cpumask_subset(cpus_allowed, cpu_active_mask))
return -EINVAL;
@@ -1127,6 +1139,16 @@ static int __update_nodemask(struct cpuset *cs,
goto done;
}
+ /*
+ * If we are in CT use fake node mask
+ * can set and read, but no effect
+ */
+ if (!ve_is_super(get_exec_env())) {
+ cs->ve_mems_allowed = *mems_allowed;
+ retval = 0;
+ goto done;
+ }
+
if (!nodes_subset(*mems_allowed, node_states[N_MEMORY])) {
retval = -EINVAL;
goto done;
@@ -1563,6 +1585,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
cpuset_filetype_t type = cft->private;
int retval = 0;
+ if (!ve_is_super(get_exec_env()))
+ return -EACCES;
+
mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) {
retval = -ENODEV;
@@ -1612,6 +1637,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
cpuset_filetype_t type = cft->private;
int retval = -ENODEV;
+ if (!ve_is_super(get_exec_env()))
+ return -EACCES;
+
mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs))
goto out_unlock;
@@ -1693,6 +1721,9 @@ static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
size_t count;
+ if (!ve_is_super(get_exec_env()))
+ return cpulist_scnprintf(page, PAGE_SIZE, cs->ve_cpus_allowed);
+
mutex_lock(&callback_mutex);
count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);
@@ -1704,6 +1735,9 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
size_t count;
+ if (!ve_is_super(get_exec_env()))
+ return nodelist_scnprintf(page, PAGE_SIZE, cs->ve_mems_allowed);
+
mutex_lock(&callback_mutex);
count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
mutex_unlock(&callback_mutex);
@@ -1751,6 +1785,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
{
struct cpuset *cs = cgroup_cs(cont);
cpuset_filetype_t type = cft->private;
+
+ if (!ve_is_super(get_exec_env()))
+ return 0;
+
switch (type) {
case FILE_CPU_EXCLUSIVE:
return is_cpu_exclusive(cs);
@@ -1782,6 +1820,10 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
{
struct cpuset *cs = cgroup_cs(cont);
cpuset_filetype_t type = cft->private;
+
+ if (!ve_is_super(get_exec_env()))
+ return 0;
+
switch (type) {
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
return cs->relax_domain_level;
@@ -1909,10 +1951,17 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
kfree(cs);
return ERR_PTR(-ENOMEM);
}
+ if (!alloc_cpumask_var(&cs->ve_cpus_allowed, GFP_KERNEL)) {
+ free_cpumask_var(cs->cpus_allowed);
+ kfree(cs);
+ return ERR_PTR(-ENOMEM);
+ }
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
+ cpumask_clear(cs->ve_cpus_allowed);
+ nodes_clear(cs->ve_mems_allowed);
fmeter_init(&cs->fmeter);
INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn);
cs->relax_domain_level = -1;
@@ -2000,6 +2049,7 @@ static void cpuset_css_free(struct cgroup *cont)
struct cpuset *cs = cgroup_cs(cont);
free_cpumask_var(cs->cpus_allowed);
+ free_cpumask_var(cs->ve_cpus_allowed);
kfree(cs);
}
@@ -2029,10 +2079,15 @@ int __init cpuset_init(void)
if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
BUG();
+ if (!alloc_cpumask_var(&top_cpuset.ve_cpus_allowed, GFP_KERNEL))
+ BUG();
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
+ cpumask_clear(top_cpuset.ve_cpus_allowed);
+ nodes_clear(top_cpuset.ve_mems_allowed);
+
fmeter_init(&top_cpuset.fmeter);
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;
More information about the Devel
mailing list