[Devel] [PATCH RHEL7 COMMIT] sched: Port diff-fairsched-cpuset-add-fake-cpuset-for-containers

Konstantin Khorenko khorenko at virtuozzo.com
Thu Jun 4 06:14:09 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.9
------>
commit 66ae81139f560aa67e1c14e10acb3d22f301e01b
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Thu Jun 4 17:14:09 2015 +0400

    sched: Port diff-fairsched-cpuset-add-fake-cpuset-for-containers
    
    Author: Pavel Tikhomirov
    Email: ptikhomirov at parallels.com
    Subject: cpuset: add fake cpuset for containers
    Date: Tue, 27 Jan 2015 15:40:12 +0300
    
    If container want to write/read cpumask or nodemask of cpuset through
    cgroupfs for incontainer cgroup, fake it - add special ve_* fields
    to cpuset structure and operate with them. We don't want to validate
    change as it is just fake, so allow any.
    For flags, relax_domain_level, mem_migration_pending do not
    allow access from container.
    
    for docker integration-cli test: TestRunWithCpuset
    https://jira.sw.ru/browse/PSBM-30878
    
    v2: add for mems, cpus_allowed, mems_allowed; simplify checks in
    update_cpumask/update_nodemask, no excessive code in alloc_trial_cpuset
    and change naming for masks
    v3: do not take the callback_mutex for printing ve_cpus_allowed,
    do not permit r/w to cpuset_cpus_allowed, cpuset_mems_allowed,
    add ve_flags and ve_relax_domain_level.
    v4: leave only ve_cpus/mems_allowed, others are not faked,
    block access to others from CT
    v5: cleanup code
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at parallels.com>
    
    Reviewed-by: Vladimir Davydov <vdavydov at parallels.com>
    =============================================================================
    
    Related to https://jira.sw.ru/browse/PSBM-33642
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
---
 kernel/cpuset.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3c4355e..ef08c19 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -87,6 +87,9 @@ struct cpuset {
 	cpumask_var_t cpus_allowed;	/* CPUs allowed to tasks in cpuset */
 	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */
 
+	cpumask_var_t ve_cpus_allowed;
+	nodemask_t ve_mems_allowed;
+
 	struct fmeter fmeter;		/* memory_pressure filter */
 
 	/*
@@ -866,6 +869,15 @@ static int __update_cpumask(struct cpuset *cs,
 	if (cs == &top_cpuset)
 		return -EACCES;
 
+	/*
+	 * If we are in CT use fake cpu mask
+	 * can set and read, but no effect
+	 */
+	if (!ve_is_super(get_exec_env())) {
+		cpumask_copy(cs->ve_cpus_allowed, cpus_allowed);
+		return 0;
+	}
+
 	if (!cpumask_subset(cpus_allowed, cpu_active_mask))
 		return -EINVAL;
 
@@ -1127,6 +1139,16 @@ static int __update_nodemask(struct cpuset *cs,
 		goto done;
 	}
 
+	/*
+	 * If we are in CT use fake node mask
+	 * can set and read, but no effect
+	 */
+	if (!ve_is_super(get_exec_env())) {
+		cs->ve_mems_allowed = *mems_allowed;
+		retval = 0;
+		goto done;
+	}
+
 	if (!nodes_subset(*mems_allowed, node_states[N_MEMORY])) {
 		retval = -EINVAL;
 		goto done;
@@ -1563,6 +1585,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
 	cpuset_filetype_t type = cft->private;
 	int retval = 0;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EACCES;
+
 	mutex_lock(&cpuset_mutex);
 	if (!is_cpuset_online(cs)) {
 		retval = -ENODEV;
@@ -1612,6 +1637,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
 	cpuset_filetype_t type = cft->private;
 	int retval = -ENODEV;
 
+	if (!ve_is_super(get_exec_env()))
+		return -EACCES;
+
 	mutex_lock(&cpuset_mutex);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
@@ -1693,6 +1721,9 @@ static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
 {
 	size_t count;
 
+	if (!ve_is_super(get_exec_env()))
+		return cpulist_scnprintf(page, PAGE_SIZE, cs->ve_cpus_allowed);
+
 	mutex_lock(&callback_mutex);
 	count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
 	mutex_unlock(&callback_mutex);
@@ -1704,6 +1735,9 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
 {
 	size_t count;
 
+	if (!ve_is_super(get_exec_env()))
+		return nodelist_scnprintf(page, PAGE_SIZE, cs->ve_mems_allowed);
+
 	mutex_lock(&callback_mutex);
 	count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
 	mutex_unlock(&callback_mutex);
@@ -1751,6 +1785,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
 {
 	struct cpuset *cs = cgroup_cs(cont);
 	cpuset_filetype_t type = cft->private;
+
+	if (!ve_is_super(get_exec_env()))
+		return 0;
+
 	switch (type) {
 	case FILE_CPU_EXCLUSIVE:
 		return is_cpu_exclusive(cs);
@@ -1782,6 +1820,10 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
 {
 	struct cpuset *cs = cgroup_cs(cont);
 	cpuset_filetype_t type = cft->private;
+
+	if (!ve_is_super(get_exec_env()))
+		return 0;
+
 	switch (type) {
 	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
 		return cs->relax_domain_level;
@@ -1909,10 +1951,17 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
 		kfree(cs);
 		return ERR_PTR(-ENOMEM);
 	}
+	if (!alloc_cpumask_var(&cs->ve_cpus_allowed, GFP_KERNEL)) {
+		free_cpumask_var(cs->cpus_allowed);
+		kfree(cs);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cpumask_clear(cs->cpus_allowed);
 	nodes_clear(cs->mems_allowed);
+	cpumask_clear(cs->ve_cpus_allowed);
+	nodes_clear(cs->ve_mems_allowed);
 	fmeter_init(&cs->fmeter);
 	INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn);
 	cs->relax_domain_level = -1;
@@ -2000,6 +2049,7 @@ static void cpuset_css_free(struct cgroup *cont)
 	struct cpuset *cs = cgroup_cs(cont);
 
 	free_cpumask_var(cs->cpus_allowed);
+	free_cpumask_var(cs->ve_cpus_allowed);
 	kfree(cs);
 }
 
@@ -2029,10 +2079,15 @@ int __init cpuset_init(void)
 
 	if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
 		BUG();
+	if (!alloc_cpumask_var(&top_cpuset.ve_cpus_allowed, GFP_KERNEL))
+		BUG();
 
 	cpumask_setall(top_cpuset.cpus_allowed);
 	nodes_setall(top_cpuset.mems_allowed);
 
+	cpumask_clear(top_cpuset.ve_cpus_allowed);
+	nodes_clear(top_cpuset.ve_mems_allowed);
+
 	fmeter_init(&top_cpuset.fmeter);
 	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
 	top_cpuset.relax_domain_level = -1;



More information about the Devel mailing list