[Devel] [PATCH RHEL7 COMMIT] ms/memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online

Konstantin Khorenko khorenko at virtuozzo.com
Fri May 13 08:13:55 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.10.1.vz7.12.19
------>
commit d2802d03dd59b5a3c0b240a025b02f8d6a1cd368
Author: Tejun Heo <tj at kernel.org>
Date:   Fri May 13 19:13:55 2016 +0400

    ms/memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online
    
    vdavydov@: The following patch needs to call mem_cgroup_read_stat from a
    non-sleepable context, hence this patch.
    
    cpu_possible_mask represents the CPUs which are actually possible
    during that boot instance.  For systems which don't support CPU
    hotplug, this will match cpu_online_mask exactly in most cases.  Even
    for systems which support CPU hotplug, the number of possible CPU
    slots is highly unlikely to diverge greatly from the number of online
    CPUs.  The only cases where the difference between possible and online
    caused problems were when the boot code failed to initialize the
    possible mask and left it fully set at NR_CPUS - 1.
    
    As such, most per-cpu constructs allocate for all possible CPUs and
    often iterate over the possibles, which also has the benefit of
    avoiding the blocking CPU hotplug synchronization.
    
    memcg open codes per-cpu stat counting for mem_cgroup_read_stat() and
    mem_cgroup_read_events(), which iterates over online CPUs and handles
    CPU hotplug operations explicitly.  This complexity doesn't actually
    buy anything.  Switch to iterating over the possibles and drop the
    explicit CPU hotplug handling.
    
    Eventually, we want to convert memcg to use percpu_counter instead of
    its own custom implementation which also benefits from quick access
    w/o summing for cases where larger error margin is acceptable.
    
    This will allow mem_cgroup_read_stat() to be called from non-sleepable
    contexts which will be used by cgroup writeback.
    
    Signed-off-by: Tejun Heo <tj at kernel.org>
    Cc: Michal Hocko <mhocko at suse.cz>
    Acked-by: Johannes Weiner <hannes at cmpxchg.org>
    Signed-off-by: Jens Axboe <axboe at fb.com>
    (cherry picked from commit 733a572e66d2a23c852fdce34dba5bbd40667817)
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
    Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    Conflicts:
    	mm/memcontrol.c
---
 mm/memcontrol.c | 49 ++-----------------------------------------------
 1 file changed, 2 insertions(+), 47 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f603758..f52cd8e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -364,11 +364,6 @@ struct mem_cgroup {
 	 * percpu counter.
 	 */
 	struct mem_cgroup_stat_cpu __percpu *stat;
-	/*
-	 * used when a cpu is offlined or other synchronizations
-	 * See mem_cgroup_read_stat().
-	 */
-	struct mem_cgroup_stat_cpu nocpu_base;
 	spinlock_t pcp_counter_lock;
 
 	atomic_t	dead_count;
@@ -931,15 +926,8 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
 	long val = 0;
 	int cpu;
 
-	get_online_cpus();
-	for_each_online_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		val += per_cpu(memcg->stat->count[idx], cpu);
-#ifdef CONFIG_HOTPLUG_CPU
-	spin_lock(&memcg->pcp_counter_lock);
-	val += memcg->nocpu_base.count[idx];
-	spin_unlock(&memcg->pcp_counter_lock);
-#endif
-	put_online_cpus();
 	return val;
 }
 
@@ -982,13 +970,8 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 	unsigned long val = 0;
 	int cpu;
 
-	for_each_online_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		val += per_cpu(memcg->stat->events[idx], cpu);
-#ifdef CONFIG_HOTPLUG_CPU
-	spin_lock(&memcg->pcp_counter_lock);
-	val += memcg->nocpu_base.events[idx];
-	spin_unlock(&memcg->pcp_counter_lock);
-#endif
 	return val;
 }
 
@@ -2717,37 +2700,12 @@ static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
 	mutex_unlock(&percpu_charge_mutex);
 }
 
-/*
- * This function drains percpu counter value from DEAD cpu and
- * move it to local cpu. Note that this function can be preempted.
- */
-static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
-{
-	int i;
-
-	spin_lock(&memcg->pcp_counter_lock);
-	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-		long x = per_cpu(memcg->stat->count[i], cpu);
-
-		per_cpu(memcg->stat->count[i], cpu) = 0;
-		memcg->nocpu_base.count[i] += x;
-	}
-	for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
-		unsigned long x = per_cpu(memcg->stat->events[i], cpu);
-
-		per_cpu(memcg->stat->events[i], cpu) = 0;
-		memcg->nocpu_base.events[i] += x;
-	}
-	spin_unlock(&memcg->pcp_counter_lock);
-}
-
 static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 					unsigned long action,
 					void *hcpu)
 {
 	int cpu = (unsigned long)hcpu;
 	struct memcg_stock_pcp *stock;
-	struct mem_cgroup *iter;
 
 	if (action == CPU_ONLINE)
 		return NOTIFY_OK;
@@ -2755,9 +2713,6 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
 		return NOTIFY_OK;
 
-	for_each_mem_cgroup(iter)
-		mem_cgroup_drain_pcp_counter(iter, cpu);
-
 	stock = &per_cpu(memcg_stock, cpu);
 	drain_stock(stock);
 	return NOTIFY_OK;


More information about the Devel mailing list