[Devel] [PATCH rh7] cpuset: revert changes allowing to attach to empty cpusets
Vladimir Davydov
vdavydov at virtuozzo.com
Fri Jan 29 04:18:32 PST 2016
After PSBM-34089 is done, there's no need in hacks that allowed us to
attach tasks to cpuset cgroups with empty cpuset.cpus or cpuset.mems.
So let's revert them.
https://jira.sw.ru/browse/PSBM-42087
Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
kernel/cpuset.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 75 insertions(+), 17 deletions(-)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 81030b340dbd..123cdc5b58cf 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -268,14 +268,6 @@ static DEFINE_MUTEX(cpuset_mutex);
static DEFINE_MUTEX(callback_mutex);
/*
- * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
- * but we can't allocate it dynamically there. Define it global and
- * allocate from cpuset_init().
- */
-static cpumask_var_t cpus_attach;
-
-
-/*
* CPU / memory hotplug is handled asynchronously.
*/
static struct workqueue_struct *cpuset_propagate_hotplug_wq;
@@ -491,6 +483,16 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
goto out;
}
+ /*
+ * Cpusets with tasks - existing or newly being attached - can't
+ * have empty cpus_allowed or mems_allowed.
+ */
+ ret = -ENOSPC;
+ if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
+ (cpumask_empty(trial->cpus_allowed) ||
+ nodes_empty(trial->mems_allowed)))
+ goto out;
+
ret = 0;
out:
rcu_read_unlock();
@@ -812,7 +814,8 @@ void rebuild_sched_domains(void)
static int cpuset_test_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
- return !cpumask_equal(&tsk->cpus_allowed, cpus_attach);
+ return !cpumask_equal(&tsk->cpus_allowed,
+ (cgroup_cs(scan->cg))->cpus_allowed);
}
/**
@@ -829,7 +832,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
static void cpuset_change_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
- set_cpus_allowed_ptr(tsk, cpus_attach);
+ set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
}
/**
@@ -849,7 +852,6 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
{
struct cgroup_scanner scan;
- guarantee_online_cpus(cs, cpus_attach);
scan.cg = cs->css.cgroup;
scan.test_task = cpuset_test_cpumask;
scan.process_task = cpuset_change_cpumask;
@@ -935,8 +937,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
return -ENOMEM;
/*
+ * An empty cpus_allowed is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
- * that parsing.
+ * that parsing. The validate_change() call ensures that cpusets
+ * with tasks have cpus.
*/
if (!*buf)
cpumask_clear(cpus_allowed);
@@ -1059,9 +1063,9 @@ static void cpuset_change_nodemask(struct task_struct *p,
migrate = is_memory_migrate(cs);
- mpol_rebind_mm(mm, &newmems);
+ mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
- cpuset_migrate_mm(mm, oldmem, &newmems);
+ cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
mmput(mm);
}
@@ -1162,7 +1166,7 @@ static int __update_nodemask(struct cpuset *cs,
trialcs->mems_allowed = *mems_allowed;
- guarantee_online_mems(cs, oldmem);
+ *oldmem = cs->mems_allowed;
if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
@@ -1198,8 +1202,10 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
return -ENOMEM;
/*
+ * An empty mems_allowed is ok iff there are no tasks in the cpuset.
* Since nodelist_parse() fails on an empty mask, we special case
- * that parsing.
+ * that parsing. The validate_change() call ensures that cpusets
+ * with tasks have memory.
*/
if (!*buf)
nodes_clear(*mems_allowed);
@@ -1438,6 +1444,10 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
mutex_lock(&cpuset_mutex);
+ ret = -ENOSPC;
+ if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+ goto out_unlock;
+
cgroup_taskset_for_each(task, cgrp, tset) {
/*
* Kthreads which disallow setaffinity shouldn't be moved
@@ -1475,6 +1485,13 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
mutex_unlock(&cpuset_mutex);
}
+/*
+ * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
+ * but we can't allocate it dynamically there. Define it global and
+ * allocate from cpuset_init().
+ */
+static cpumask_var_t cpus_attach;
+
static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
/* static bufs protected by cpuset_mutex */
@@ -2103,18 +2120,48 @@ int __init cpuset_init(void)
return 0;
}
+/*
+ * If CPU and/or memory hotplug handlers, below, unplug any CPUs
+ * or memory nodes, we need to walk over the cpuset hierarchy,
+ * removing that CPU or node from all cpusets. If this removes the
+ * last CPU or node from a cpuset, then move the tasks in the empty
+ * cpuset to its next-highest non-empty parent.
+ */
+static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
+{
+ struct cpuset *parent;
+
+ /*
+ * Find its next-highest non-empty parent, (top cpuset
+ * has online cpus, so can't be empty).
+ */
+ parent = parent_cs(cs);
+ while (cpumask_empty(parent->cpus_allowed) ||
+ nodes_empty(parent->mems_allowed))
+ parent = parent_cs(parent);
+
+ if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
+ rcu_read_lock();
+ printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
+ cgroup_name(cs->css.cgroup));
+ rcu_read_unlock();
+ }
+}
+
/**
* cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
* @cs: cpuset in interest
*
* Compare @cs's cpu and mem masks against top_cpuset and if some have gone
- * offline, update @cs accordingly.
+ * offline, update @cs accordingly. If @cs ends up with no CPU or memory,
+ * all its tasks are moved to the nearest ancestor with both resources.
*/
static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
{
static cpumask_t off_cpus;
static nodemask_t off_mems, tmp_mems;
struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
+ bool is_empty;
mutex_lock(&cpuset_mutex);
@@ -2138,8 +2185,19 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
update_tasks_nodemask(cs, &tmp_mems, NULL);
}
+ is_empty = cpumask_empty(cs->cpus_allowed) ||
+ nodes_empty(cs->mems_allowed);
+
mutex_unlock(&cpuset_mutex);
+ /*
+ * If @cs became empty, move tasks to the nearest ancestor with
+ * execution resources. This is full cgroup operation which will
+ * also call back into cpuset. Should be done outside any lock.
+ */
+ if (is_empty)
+ remove_tasks_in_empty_cpuset(cs);
+
/* the following may free @cs, should be the last operation */
css_put(&cs->css);
}
--
2.1.4
More information about the Devel
mailing list