[Devel] [PATCH rh7 19/21] ms/mm: memcontrol: revert use of root_mem_cgroup res_counter
Andrey Ryabinin
aryabinin at virtuozzo.com
Tue Nov 1 02:21:38 PDT 2016
From: Johannes Weiner <hannes at cmpxchg.org>
Dave Hansen reports a massive scalability regression in an uncontained
page fault benchmark with more than 30 concurrent threads, which he
bisected down to 05b843012335 ("mm: memcontrol: use root_mem_cgroup
res_counter") and pin-pointed on res_counter spinlock contention.
That change relied on the per-cpu charge caches to mostly swallow the
res_counter costs, but it's apparent that the caches don't scale yet.
Revert memcg back to bypassing res_counters on the root level in order
to restore performance for uncontained workloads.
Reported-by: Dave Hansen <dave at sr71.net>
Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
Tested-by: Dave Hansen <dave.hansen at intel.com>
Acked-by: Michal Hocko <mhocko at suse.cz>
Acked-by: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
https://jira.sw.ru/browse/PSBM-51558
(cherry picked from commit ce00a967377baadf2481521e131771adc7652856)
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
mm/memcontrol.c | 82 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 43 insertions(+), 39 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 11c06d3..cb7657e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2814,13 +2814,14 @@ done:
static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- if (!mem_cgroup_is_root(memcg)) {
- unsigned long bytes = nr_pages * PAGE_SIZE;
+ unsigned long bytes = nr_pages * PAGE_SIZE;
- res_counter_uncharge(&memcg->res, bytes);
- if (do_swap_account)
- res_counter_uncharge(&memcg->memsw, bytes);
- }
+ if (mem_cgroup_is_root(memcg))
+ return;
+
+ res_counter_uncharge(&memcg->res, bytes);
+ if (do_swap_account)
+ res_counter_uncharge(&memcg->memsw, bytes);
}
/*
@@ -4183,7 +4184,6 @@ out:
return retval;
}
-
static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
@@ -4199,6 +4199,30 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
return val;
}
+static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
+{
+ u64 val;
+
+ if (!mem_cgroup_is_root(memcg)) {
+ if (!swap)
+ return res_counter_read_u64(&memcg->res, RES_USAGE);
+ else
+ return res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ }
+
+ /*
+ * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
+ * as well as in MEM_CGROUP_STAT_RSS_HUGE.
+ */
+ val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+ val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
+
+ if (swap)
+ val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
+
+ return val << PAGE_SHIFT;
+}
+
void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi)
{
int nid;
@@ -4236,30 +4260,6 @@ int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages)
return free < pages ? -ENOMEM : 0;
}
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
- u64 val;
-
- if (!mem_cgroup_is_root(memcg)) {
- if (!swap)
- return res_counter_read_u64(&memcg->res, RES_USAGE);
- else
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
- }
-
- /*
- * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
- * as well as in MEM_CGROUP_STAT_RSS_HUGE.
- */
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
- if (swap)
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
- return val << PAGE_SHIFT;
-}
-
static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
@@ -6429,7 +6429,7 @@ static void __mem_cgroup_clear_mc(void)
/* uncharge swap account from the old cgroup */
if (!mem_cgroup_is_root(mc.from))
res_counter_uncharge(&mc.from->memsw,
- PAGE_SIZE * mc.moved_swap);
+ PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++)
css_put(&mc.from->css);
@@ -6811,7 +6811,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
if (memcg) {
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ if (!mem_cgroup_is_root(memcg))
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
css_put(&memcg->css);
}
@@ -6970,12 +6971,15 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
{
unsigned long flags;
- if (nr_mem)
- res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
- if (nr_memsw)
- res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
-
- memcg_oom_recover(memcg);
+ if (!mem_cgroup_is_root(memcg)) {
+ if (nr_mem)
+ res_counter_uncharge(&memcg->res,
+ nr_mem * PAGE_SIZE);
+ if (nr_memsw)
+ res_counter_uncharge(&memcg->memsw,
+ nr_memsw * PAGE_SIZE);
+ memcg_oom_recover(memcg);
+ }
local_irq_save(flags);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
--
2.7.3
More information about the Devel
mailing list