[Devel] [PATCH rh7] cpuset: revert changes allowing to attach to empty cpusets

Vladimir Davydov vdavydov at virtuozzo.com
Fri Jan 29 04:18:32 PST 2016


After PSBM-34089 is done, there's no need in hacks that allowed us to
attach tasks to cpuset cgroups with empty cpuset.cpus or cpuset.mems.
So let's revert them.

https://jira.sw.ru/browse/PSBM-42087

Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 kernel/cpuset.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 17 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 81030b340dbd..123cdc5b58cf 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -268,14 +268,6 @@ static DEFINE_MUTEX(cpuset_mutex);
 static DEFINE_MUTEX(callback_mutex);
 
 /*
- * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
- * but we can't allocate it dynamically there.  Define it global and
- * allocate from cpuset_init().
- */
-static cpumask_var_t cpus_attach;
-
-
-/*
  * CPU / memory hotplug is handled asynchronously.
  */
 static struct workqueue_struct *cpuset_propagate_hotplug_wq;
@@ -491,6 +483,16 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 			goto out;
 	}
 
+	/*
+	 * Cpusets with tasks - existing or newly being attached - can't
+	 * have empty cpus_allowed or mems_allowed.
+	 */
+	ret = -ENOSPC;
+	if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
+	    (cpumask_empty(trial->cpus_allowed) ||
+	     nodes_empty(trial->mems_allowed)))
+		goto out;
+
 	ret = 0;
 out:
 	rcu_read_unlock();
@@ -812,7 +814,8 @@ void rebuild_sched_domains(void)
 static int cpuset_test_cpumask(struct task_struct *tsk,
 			       struct cgroup_scanner *scan)
 {
-	return !cpumask_equal(&tsk->cpus_allowed, cpus_attach);
+	return !cpumask_equal(&tsk->cpus_allowed,
+			(cgroup_cs(scan->cg))->cpus_allowed);
 }
 
 /**
@@ -829,7 +832,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
 static void cpuset_change_cpumask(struct task_struct *tsk,
 				  struct cgroup_scanner *scan)
 {
-	set_cpus_allowed_ptr(tsk, cpus_attach);
+	set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
 }
 
 /**
@@ -849,7 +852,6 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
 {
 	struct cgroup_scanner scan;
 
-	guarantee_online_cpus(cs, cpus_attach);
 	scan.cg = cs->css.cgroup;
 	scan.test_task = cpuset_test_cpumask;
 	scan.process_task = cpuset_change_cpumask;
@@ -935,8 +937,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
 		return -ENOMEM;
 
 	/*
+	 * An empty cpus_allowed is ok only if the cpuset has no tasks.
 	 * Since cpulist_parse() fails on an empty mask, we special case
-	 * that parsing.
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have cpus.
 	 */
 	if (!*buf)
 		cpumask_clear(cpus_allowed);
@@ -1059,9 +1063,9 @@ static void cpuset_change_nodemask(struct task_struct *p,
 
 	migrate = is_memory_migrate(cs);
 
-	mpol_rebind_mm(mm, &newmems);
+	mpol_rebind_mm(mm, &cs->mems_allowed);
 	if (migrate)
-		cpuset_migrate_mm(mm, oldmem, &newmems);
+		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
 	mmput(mm);
 }
 
@@ -1162,7 +1166,7 @@ static int __update_nodemask(struct cpuset *cs,
 
 	trialcs->mems_allowed = *mems_allowed;
 
-	guarantee_online_mems(cs, oldmem);
+	*oldmem = cs->mems_allowed;
 	if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
 		retval = 0;		/* Too easy - nothing to do */
 		goto done;
@@ -1198,8 +1202,10 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
 		return -ENOMEM;
 
 	/*
+	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
 	 * Since nodelist_parse() fails on an empty mask, we special case
-	 * that parsing.
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have memory.
 	 */
 	if (!*buf)
 		nodes_clear(*mems_allowed);
@@ -1438,6 +1444,10 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 
 	mutex_lock(&cpuset_mutex);
 
+	ret = -ENOSPC;
+	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+		goto out_unlock;
+
 	cgroup_taskset_for_each(task, cgrp, tset) {
 		/*
 		 * Kthreads which disallow setaffinity shouldn't be moved
@@ -1475,6 +1485,13 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
 	mutex_unlock(&cpuset_mutex);
 }
 
+/*
+ * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
+ * but we can't allocate it dynamically there.  Define it global and
+ * allocate from cpuset_init().
+ */
+static cpumask_var_t cpus_attach;
+
 static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 {
 	/* static bufs protected by cpuset_mutex */
@@ -2103,18 +2120,48 @@ int __init cpuset_init(void)
 	return 0;
 }
 
+/*
+ * If CPU and/or memory hotplug handlers, below, unplug any CPUs
+ * or memory nodes, we need to walk over the cpuset hierarchy,
+ * removing that CPU or node from all cpusets.  If this removes the
+ * last CPU or node from a cpuset, then move the tasks in the empty
+ * cpuset to its next-highest non-empty parent.
+ */
+static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
+{
+	struct cpuset *parent;
+
+	/*
+	 * Find its next-highest non-empty parent, (top cpuset
+	 * has online cpus, so can't be empty).
+	 */
+	parent = parent_cs(cs);
+	while (cpumask_empty(parent->cpus_allowed) ||
+			nodes_empty(parent->mems_allowed))
+		parent = parent_cs(parent);
+
+	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
+		rcu_read_lock();
+		printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
+		       cgroup_name(cs->css.cgroup));
+		rcu_read_unlock();
+	}
+}
+
 /**
  * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
  * @cs: cpuset in interest
  *
  * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
- * offline, update @cs accordingly.
+ * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
+ * all its tasks are moved to the nearest ancestor with both resources.
  */
 static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
 {
 	static cpumask_t off_cpus;
 	static nodemask_t off_mems, tmp_mems;
 	struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
+	bool is_empty;
 
 	mutex_lock(&cpuset_mutex);
 
@@ -2138,8 +2185,19 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
 		update_tasks_nodemask(cs, &tmp_mems, NULL);
 	}
 
+	is_empty = cpumask_empty(cs->cpus_allowed) ||
+		nodes_empty(cs->mems_allowed);
+
 	mutex_unlock(&cpuset_mutex);
 
+	/*
+	 * If @cs became empty, move tasks to the nearest ancestor with
+	 * execution resources.  This is full cgroup operation which will
+	 * also call back into cpuset.  Should be done outside any lock.
+	 */
+	if (is_empty)
+		remove_tasks_in_empty_cpuset(cs);
+
 	/* the following may free @cs, should be the last operation */
 	css_put(&cs->css);
 }
-- 
2.1.4



More information about the Devel mailing list