[Devel] [PATCH rh7 2/2] mm/memcg: improve mem_cgroup_dcache_is_low() performance.
Andrey Ryabinin
aryabinin at virtuozzo.com
Tue Jul 25 14:28:36 MSK 2017
mem_cgroup_dcache_is_low() is called during memory reclaim for every
mem cgroup, but it's awfully slow. It iterates through every possible
cpu to collect anon, file and slab reclaimable counters.
Switch to percpu_counter anon,file and slab reclaimable counters.
This allows to read them by doing a single load instead of itearting
over all cpus.
https://jira.sw.ru/browse/PSBM-68644
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
mm/memcontrol.c | 108 +++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 84 insertions(+), 24 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9678957bf22..11f9bc07e41 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -95,28 +95,38 @@ enum mem_cgroup_stat_index {
/*
* For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
*/
- MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
- MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */
MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
MEM_CGROUP_STAT_SHMEM, /* # of charged shmem pages */
- MEM_CGROUP_STAT_SLAB_RECLAIMABLE, /* # of reclaimable slab pages */
MEM_CGROUP_STAT_SLAB_UNRECLAIMABLE, /* # of unreclaimable slab pages */
MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
MEM_CGROUP_STAT_NSTATS,
};
+enum mem_cgroup_stat2_index {
+ /*
+ * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
+ */
+ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
+ MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
+ MEM_CGROUP_STAT_SLAB_RECLAIMABLE, /* # of reclaimable slab pages */
+ MEM_CGROUP_STAT2_NSTATS,
+};
+
static const char * const mem_cgroup_stat_names[] = {
- "cache",
- "rss",
"rss_huge",
"mapped_file",
"shmem",
- "slab_reclaimable",
"slab_unreclaimable",
"swap",
};
+static const char * const mem_cgroup_stat2_names[] = {
+ "cache",
+ "rss",
+ "slab_reclaimable",
+};
+
enum mem_cgroup_events_index {
MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
@@ -167,6 +177,10 @@ struct mem_cgroup_stat_cpu {
unsigned long targets[MEM_CGROUP_NTARGETS];
};
+struct mem_cgroup_stat2_cpu {
+ struct percpu_counter counters[MEM_CGROUP_STAT2_NSTATS];
+};
+
struct mem_cgroup_reclaim_iter {
/*
* last scanned hierarchy member. Valid only if last_dead_count
@@ -368,6 +382,7 @@ struct mem_cgroup {
* percpu counter.
*/
struct mem_cgroup_stat_cpu __percpu *stat;
+ struct mem_cgroup_stat2_cpu stat2;
spinlock_t pcp_counter_lock;
atomic_t dead_count;
@@ -956,6 +971,11 @@ mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
val = 0;
return val;
}
+static inline unsigned long
+mem_cgroup_read_stat2(struct mem_cgroup *memcg, enum mem_cgroup_stat2_index idx)
+{
+ return percpu_counter_read_positive(&memcg->stat2.counters[idx]);
+}
static void mem_cgroup_update_swap_max(struct mem_cgroup *memcg)
{
@@ -1013,10 +1033,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
* counted as CACHE even if it's on ANON LRU.
*/
if (PageAnon(page))
- __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
+ percpu_counter_add(&memcg->stat2.counters[MEM_CGROUP_STAT_RSS],
nr_pages);
else {
- __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
+ percpu_counter_add(&memcg->stat2.counters[MEM_CGROUP_STAT_CACHE],
nr_pages);
if (PageSwapBacked(page))
__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SHMEM],
@@ -1593,9 +1613,9 @@ bool mem_cgroup_dcache_is_low(struct mem_cgroup *memcg)
if (vfs_cache_min_ratio <= 0)
return false;
- anon = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
- file = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
- dcache = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SLAB_RECLAIMABLE);
+ anon = mem_cgroup_read_stat2(memcg, MEM_CGROUP_STAT_RSS);
+ file = mem_cgroup_read_stat2(memcg, MEM_CGROUP_STAT_CACHE);
+ dcache = mem_cgroup_read_stat2(memcg, MEM_CGROUP_STAT_SLAB_RECLAIMABLE);
return dcache / vfs_cache_min_ratio <
(anon + file + dcache) / 100;
@@ -1979,6 +1999,10 @@ done:
pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i)));
}
+ for (i = 0; i < MEM_CGROUP_STAT2_NSTATS; i++) {
+ pr_cont(" %s:%luKB", mem_cgroup_stat2_names[i],
+ K(mem_cgroup_read_stat2(iter, i)));
+ }
for (i = 0; i < NR_LRU_LISTS; i++)
pr_cont(" %s:%luKB", mem_cgroup_lru_names[i],
@@ -3120,10 +3144,11 @@ int __memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, unsigned int nr_pages)
if (s->flags & SLAB_RECLAIM_ACCOUNT) {
page_counter_charge(&memcg->dcache, nr_pages);
idx = MEM_CGROUP_STAT_SLAB_RECLAIMABLE;
- } else
+ percpu_counter_add(&memcg->stat2.counters[idx], nr_pages);
+ } else {
idx = MEM_CGROUP_STAT_SLAB_UNRECLAIMABLE;
-
- this_cpu_add(memcg->stat->count[idx], nr_pages);
+ this_cpu_add(memcg->stat->count[idx], nr_pages);
+ }
return 0;
}
@@ -3139,10 +3164,11 @@ void __memcg_uncharge_slab(struct kmem_cache *s, unsigned int nr_pages)
if (s->flags & SLAB_RECLAIM_ACCOUNT) {
page_counter_uncharge(&memcg->dcache, nr_pages);
idx = MEM_CGROUP_STAT_SLAB_RECLAIMABLE;
- } else
+ percpu_counter_sub(&memcg->stat2.counters[idx], nr_pages);
+ } else {
idx = MEM_CGROUP_STAT_SLAB_UNRECLAIMABLE;
-
- this_cpu_sub(memcg->stat->count[idx], nr_pages);
+ this_cpu_sub(memcg->stat->count[idx], nr_pages);
+ }
}
/*
@@ -4195,6 +4221,17 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
return val;
}
+static unsigned long mem_cgroup_recursive_stat2(struct mem_cgroup *memcg,
+ enum mem_cgroup_stat2_index idx)
+{
+ struct mem_cgroup *iter;
+ unsigned long val = 0;
+
+ for_each_mem_cgroup_tree(iter, memcg)
+ val += mem_cgroup_read_stat2(iter, idx);
+
+ return val;
+}
static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
{
@@ -4211,8 +4248,8 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
* Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
* as well as in MEM_CGROUP_STAT_RSS_HUGE.
*/
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
+ val = mem_cgroup_recursive_stat2(memcg, MEM_CGROUP_STAT_CACHE);
+ val += mem_cgroup_recursive_stat2(memcg, MEM_CGROUP_STAT_RSS);
if (swap)
val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
@@ -4228,11 +4265,11 @@ void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi)
for_each_online_node(nid)
mem_cgroup_get_nr_pages(memcg, nid, mi->pages);
- mi->slab_reclaimable = mem_cgroup_recursive_stat(memcg,
+ mi->slab_reclaimable = mem_cgroup_recursive_stat2(memcg,
MEM_CGROUP_STAT_SLAB_RECLAIMABLE);
mi->slab_unreclaimable = mem_cgroup_recursive_stat(memcg,
MEM_CGROUP_STAT_SLAB_UNRECLAIMABLE);
- mi->cached = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+ mi->cached = mem_cgroup_recursive_stat2(memcg, MEM_CGROUP_STAT_CACHE);
mi->shmem = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SHMEM);
}
@@ -4247,7 +4284,7 @@ int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages)
free += page_counter_read(&memcg->dcache);
/* assume file cache is reclaimable */
- free += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+ free += mem_cgroup_recursive_stat2(memcg, MEM_CGROUP_STAT_CACHE);
/* but do not count shmem pages as they can't be purged,
* only swapped out */
@@ -5094,6 +5131,10 @@ static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
}
+ for (i = 0; i < MEM_CGROUP_STAT2_NSTATS; i++) {
+ seq_printf(m, "%s %lu\n", mem_cgroup_stat2_names[i],
+ mem_cgroup_read_stat2(memcg, i) * PAGE_SIZE);
+ }
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++)
seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i],
@@ -5124,6 +5165,13 @@ static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
}
+ for (i = 0; i < MEM_CGROUP_STAT2_NSTATS; i++) {
+ unsigned long long val = 0;
+
+ for_each_mem_cgroup_tree(mi, memcg)
+ val += mem_cgroup_read_stat2(mi, i) * PAGE_SIZE;
+ seq_printf(m, "total_%s %llu\n", mem_cgroup_stat2_names[i], val);
+ }
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
unsigned long long val = 0;
@@ -5858,6 +5906,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
{
struct mem_cgroup *memcg;
size_t size;
+ int i, ret;
size = sizeof(struct mem_cgroup);
size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
@@ -5869,9 +5918,20 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
if (!memcg->stat)
goto out_free;
+
+ for (i = 0; i < MEM_CGROUP_STAT2_NSTATS; i++) {
+ ret = percpu_counter_init(&memcg->stat2.counters[i], 0, GFP_KERNEL);
+ if (ret)
+ goto out_pcpu_free;
+ }
spin_lock_init(&memcg->pcp_counter_lock);
return memcg;
+out_pcpu_free:
+ while (--i >= 0)
+ percpu_counter_destroy(&memcg->stat2.counters[i]);
+
+ free_percpu(memcg->stat);
out_free:
kfree(memcg);
return NULL;
@@ -7017,8 +7077,8 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
}
local_irq_save(flags);
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+ percpu_counter_sub(&memcg->stat2.counters[MEM_CGROUP_STAT_RSS], nr_anon);
+ percpu_counter_sub(&memcg->stat2.counters[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_SHMEM], nr_shmem);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
--
2.13.0
More information about the Devel
mailing list