[Devel] [PATCH rh7] cpuset: revert changes allowing to attach to empty cpusets
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Jan 29 06:54:18 PST 2016
Andrey, please review the patch.
--
Best regards,
Konstantin Khorenko,
Virtuozzo Linux Kernel Team
On 01/29/2016 03:18 PM, Vladimir Davydov wrote:
> After PSBM-34089 is done, there's no need in hacks that allowed us to
> attach tasks to cpuset cgroups with empty cpuset.cpus or cpuset.mems.
> So let's revert them.
>
> https://jira.sw.ru/browse/PSBM-42087
>
> Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
> ---
> kernel/cpuset.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 75 insertions(+), 17 deletions(-)
>
> diff --git a/kernel/cpuset.c b/kernel/cpuset.c
> index 81030b340dbd..123cdc5b58cf 100644
> --- a/kernel/cpuset.c
> +++ b/kernel/cpuset.c
> @@ -268,14 +268,6 @@ static DEFINE_MUTEX(cpuset_mutex);
> static DEFINE_MUTEX(callback_mutex);
>
> /*
> - * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
> - * but we can't allocate it dynamically there. Define it global and
> - * allocate from cpuset_init().
> - */
> -static cpumask_var_t cpus_attach;
> -
> -
> -/*
> * CPU / memory hotplug is handled asynchronously.
> */
> static struct workqueue_struct *cpuset_propagate_hotplug_wq;
> @@ -491,6 +483,16 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
> goto out;
> }
>
> + /*
> + * Cpusets with tasks - existing or newly being attached - can't
> + * have empty cpus_allowed or mems_allowed.
> + */
> + ret = -ENOSPC;
> + if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
> + (cpumask_empty(trial->cpus_allowed) ||
> + nodes_empty(trial->mems_allowed)))
> + goto out;
> +
> ret = 0;
> out:
> rcu_read_unlock();
> @@ -812,7 +814,8 @@ void rebuild_sched_domains(void)
> static int cpuset_test_cpumask(struct task_struct *tsk,
> struct cgroup_scanner *scan)
> {
> - return !cpumask_equal(&tsk->cpus_allowed, cpus_attach);
> + return !cpumask_equal(&tsk->cpus_allowed,
> + (cgroup_cs(scan->cg))->cpus_allowed);
> }
>
> /**
> @@ -829,7 +832,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
> static void cpuset_change_cpumask(struct task_struct *tsk,
> struct cgroup_scanner *scan)
> {
> - set_cpus_allowed_ptr(tsk, cpus_attach);
> + set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
> }
>
> /**
> @@ -849,7 +852,6 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
> {
> struct cgroup_scanner scan;
>
> - guarantee_online_cpus(cs, cpus_attach);
> scan.cg = cs->css.cgroup;
> scan.test_task = cpuset_test_cpumask;
> scan.process_task = cpuset_change_cpumask;
> @@ -935,8 +937,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
> return -ENOMEM;
>
> /*
> + * An empty cpus_allowed is ok only if the cpuset has no tasks.
> * Since cpulist_parse() fails on an empty mask, we special case
> - * that parsing.
> + * that parsing. The validate_change() call ensures that cpusets
> + * with tasks have cpus.
> */
> if (!*buf)
> cpumask_clear(cpus_allowed);
> @@ -1059,9 +1063,9 @@ static void cpuset_change_nodemask(struct task_struct *p,
>
> migrate = is_memory_migrate(cs);
>
> - mpol_rebind_mm(mm, &newmems);
> + mpol_rebind_mm(mm, &cs->mems_allowed);
> if (migrate)
> - cpuset_migrate_mm(mm, oldmem, &newmems);
> + cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
> mmput(mm);
> }
>
> @@ -1162,7 +1166,7 @@ static int __update_nodemask(struct cpuset *cs,
>
> trialcs->mems_allowed = *mems_allowed;
>
> - guarantee_online_mems(cs, oldmem);
> + *oldmem = cs->mems_allowed;
> if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
> retval = 0; /* Too easy - nothing to do */
> goto done;
> @@ -1198,8 +1202,10 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
> return -ENOMEM;
>
> /*
> + * An empty mems_allowed is ok iff there are no tasks in the cpuset.
> * Since nodelist_parse() fails on an empty mask, we special case
> - * that parsing.
> + * that parsing. The validate_change() call ensures that cpusets
> + * with tasks have memory.
> */
> if (!*buf)
> nodes_clear(*mems_allowed);
> @@ -1438,6 +1444,10 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
>
> mutex_lock(&cpuset_mutex);
>
> + ret = -ENOSPC;
> + if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
> + goto out_unlock;
> +
> cgroup_taskset_for_each(task, cgrp, tset) {
> /*
> * Kthreads which disallow setaffinity shouldn't be moved
> @@ -1475,6 +1485,13 @@ static void cpuset_cancel_attach(struct cgroup *cgrp,
> mutex_unlock(&cpuset_mutex);
> }
>
> +/*
> + * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
> + * but we can't allocate it dynamically there. Define it global and
> + * allocate from cpuset_init().
> + */
> +static cpumask_var_t cpus_attach;
> +
> static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
> {
> /* static bufs protected by cpuset_mutex */
> @@ -2103,18 +2120,48 @@ int __init cpuset_init(void)
> return 0;
> }
>
> +/*
> + * If CPU and/or memory hotplug handlers, below, unplug any CPUs
> + * or memory nodes, we need to walk over the cpuset hierarchy,
> + * removing that CPU or node from all cpusets. If this removes the
> + * last CPU or node from a cpuset, then move the tasks in the empty
> + * cpuset to its next-highest non-empty parent.
> + */
> +static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
> +{
> + struct cpuset *parent;
> +
> + /*
> + * Find its next-highest non-empty parent, (top cpuset
> + * has online cpus, so can't be empty).
> + */
> + parent = parent_cs(cs);
> + while (cpumask_empty(parent->cpus_allowed) ||
> + nodes_empty(parent->mems_allowed))
> + parent = parent_cs(parent);
> +
> + if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
> + rcu_read_lock();
> + printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
> + cgroup_name(cs->css.cgroup));
> + rcu_read_unlock();
> + }
> +}
> +
> /**
> * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
> * @cs: cpuset in interest
> *
> * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
> - * offline, update @cs accordingly.
> + * offline, update @cs accordingly. If @cs ends up with no CPU or memory,
> + * all its tasks are moved to the nearest ancestor with both resources.
> */
> static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
> {
> static cpumask_t off_cpus;
> static nodemask_t off_mems, tmp_mems;
> struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
> + bool is_empty;
>
> mutex_lock(&cpuset_mutex);
>
> @@ -2138,8 +2185,19 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
> update_tasks_nodemask(cs, &tmp_mems, NULL);
> }
>
> + is_empty = cpumask_empty(cs->cpus_allowed) ||
> + nodes_empty(cs->mems_allowed);
> +
> mutex_unlock(&cpuset_mutex);
>
> + /*
> + * If @cs became empty, move tasks to the nearest ancestor with
> + * execution resources. This is full cgroup operation which will
> + * also call back into cpuset. Should be done outside any lock.
> + */
> + if (is_empty)
> + remove_tasks_in_empty_cpuset(cs);
> +
> /* the following may free @cs, should be the last operation */
> css_put(&cs->css);
> }
>
More information about the Devel
mailing list