[Devel] [PATCH rh7] cpuset: revert changes allowing to attach to empty cpusets

Konstantin Khorenko khorenko at virtuozzo.com
Fri Jan 29 06:54:18 PST 2016


Andrey, please review the patch.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 01/29/2016 03:18 PM, Vladimir Davydov wrote:
> After PSBM-34089 is done, there's no need in hacks that allowed us to
> attach tasks to cpuset cgroups with empty cpuset.cpus or cpuset.mems.
> So let's revert them.
>
> https://jira.sw.ru/browse/PSBM-42087
>
> Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
> ---
>   kernel/cpuset.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++-----------
>   1 file changed, 75 insertions(+), 17 deletions(-)
>
> diff --git a/kernel/cpuset.c b/kernel/cpuset.c
> index 81030b340dbd..123cdc5b58cf 100644
> --- a/kernel/cpuset.c
> +++ b/kernel/cpuset.c
> @@ -268,14 +268,6 @@ static DEFINE_MUTEX(cpuset_mutex);
>   static DEFINE_MUTEX(callback_mutex);
>
>   /*
> - * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
> - * but we can't allocate it dynamically there.  Define it global and
> - * allocate from cpuset_init().
> - */
> -static cpumask_var_t cpus_attach;
> -
> -
> -/*
>    * CPU / memory hotplug is handled asynchronously.
>    */
>   static struct workqueue_struct *cpuset_propagate_hotplug_wq;
> @@ -491,6 +483,16 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
>   			goto out;
>   	}
>
> +	/*
> +	 * Cpusets with tasks - existing or newly being attached - can't
> +	 * have empty cpus_allowed or mems_allowed.
> +	 */
> +	ret = -ENOSPC;
> +	if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
> +	    (cpumask_empty(trial->cpus_allowed) ||
> +	     nodes_empty(trial->mems_allowed)))
> +		goto out;
> +
>   	ret = 0;
>   out:
>   	rcu_read_unlock();
> @@ -812,7 +814,8 @@ void rebuild_sched_domains(void)
>   static int cpuset_test_cpumask(struct task_struct *tsk,
>   			       struct cgroup_scanner *scan)
>   {
> -	return !cpumask_equal(&tsk->cpus_allowed, cpus_attach);
> +	return !cpumask_equal(&tsk->cpus_allowed,
> +			(cgroup_cs(scan->cg))->cpus_allowed);
>   }
>
>   /**
> @@ -829,7 +832,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
>   static void cpuset_change_cpumask(struct task_struct *tsk,
>   				  struct cgroup_scanner *scan)
>   {
> -	set_cpus_allowed_ptr(tsk, cpus_attach);
> +	set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
>   }
>
>   /**
> @@ -849,7 +852,6 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
>   {
>   	struct cgroup_scanner scan;
>
> -	guarantee_online_cpus(cs, cpus_attach);
>   	scan.cg = cs->css.cgroup;
>   	scan.test_task = cpuset_test_cpumask;
>   	scan.process_task = cpuset_change_cpumask;
> @@ -935,8 +937,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
>   		return -ENOMEM;
>
>   	/*
> +	 * An empty cpus_allowed is ok only if the cpuset has no tasks.
>   	 * Since cpulist_parse() fails on an empty mask, we special case
> -	 * that parsing.
> +	 * that parsing.  The validate_change() call ensures that cpusets
> +	 * with tasks have cpus.
>   	 */
>   	if (!*buf)
>   		cpumask_clear(cpus_allowed);
> @@ -1059,9 +1063,9 @@ static void cpuset_change_nodemask(struct task_struct *p,
>
>   	migrate = is_memory_migrate(cs);
>
> -	mpol_rebind_mm(mm, &newmems);
> +	mpol_rebind_mm(mm, &cs->mems_allowed);
>   	if (migrate)
> -		cpuset_migrate_mm(mm, oldmem, &newmems);
> +		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
>   	mmput(mm);
>   }
>
> @@ -1162,7 +1166,7 @@ static int __update_nodemask(struct cpuset *cs,
>
>   	trialcs->mems_allowed = *mems_allowed;
>
> -	guarantee_online_mems(cs, oldmem);
> +	*oldmem = cs->mems_allowed;
>   	if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
>   		retval = 0;		/* Too easy - nothing to do */
>   		goto done;
> @@ -1198,8 +1202,10 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
>   		return -ENOMEM;
>
>   	/*
> +	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
>   	 * Since nodelist_parse() fails on an empty mask, we special case
> -	 * that parsing.
> +	 * that parsing.  The validate_change() call ensures that cpusets
> +	 * with tasks have memory.
>   	 */
>   	if (!*buf)
>   		nodes_clear(*mems_allowed);
> @@ -1438,6 +1444,10 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
>
>   	mutex_lock(&cpuset_mutex);
>
> +	ret = -ENOSPC;
> +	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
> +		goto out_unlock;
> +
>   	cgroup_taskset_for_each(task, cgrp, tset) {
>   		/*
>   		 * Kthreads which disallow setaffinity shouldn't be moved
> @@ -1475,6 +1485,13 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
>   	mutex_unlock(&cpuset_mutex);
>   }
>
> +/*
> + * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
> + * but we can't allocate it dynamically there.  Define it global and
> + * allocate from cpuset_init().
> + */
> +static cpumask_var_t cpus_attach;
> +
>   static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
>   {
>   	/* static bufs protected by cpuset_mutex */
> @@ -2103,18 +2120,48 @@ int __init cpuset_init(void)
>   	return 0;
>   }
>
> +/*
> + * If CPU and/or memory hotplug handlers, below, unplug any CPUs
> + * or memory nodes, we need to walk over the cpuset hierarchy,
> + * removing that CPU or node from all cpusets.  If this removes the
> + * last CPU or node from a cpuset, then move the tasks in the empty
> + * cpuset to its next-highest non-empty parent.
> + */
> +static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
> +{
> +	struct cpuset *parent;
> +
> +	/*
> +	 * Find its next-highest non-empty parent, (top cpuset
> +	 * has online cpus, so can't be empty).
> +	 */
> +	parent = parent_cs(cs);
> +	while (cpumask_empty(parent->cpus_allowed) ||
> +			nodes_empty(parent->mems_allowed))
> +		parent = parent_cs(parent);
> +
> +	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
> +		rcu_read_lock();
> +		printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
> +		       cgroup_name(cs->css.cgroup));
> +		rcu_read_unlock();
> +	}
> +}
> +
>   /**
>    * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
>    * @cs: cpuset in interest
>    *
>    * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
> - * offline, update @cs accordingly.
> + * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
> + * all its tasks are moved to the nearest ancestor with both resources.
>    */
>   static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
>   {
>   	static cpumask_t off_cpus;
>   	static nodemask_t off_mems, tmp_mems;
>   	struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
> +	bool is_empty;
>
>   	mutex_lock(&cpuset_mutex);
>
> @@ -2138,8 +2185,19 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
>   		update_tasks_nodemask(cs, &tmp_mems, NULL);
>   	}
>
> +	is_empty = cpumask_empty(cs->cpus_allowed) ||
> +		nodes_empty(cs->mems_allowed);
> +
>   	mutex_unlock(&cpuset_mutex);
>
> +	/*
> +	 * If @cs became empty, move tasks to the nearest ancestor with
> +	 * execution resources.  This is full cgroup operation which will
> +	 * also call back into cpuset.  Should be done outside any lock.
> +	 */
> +	if (is_empty)
> +		remove_tasks_in_empty_cpuset(cs);
> +
>   	/* the following may free @cs, should be the last operation */
>   	css_put(&cs->css);
>   }
>


More information about the Devel mailing list