[Devel] [RFC][PATCH] memory cgroup enhancements updated [10/10] NUMA aware account
KAMEZAWA Hiroyuki
kamezawa.hiroyu at jp.fujitsu.com
Fri Oct 19 02:37:30 PDT 2007
This patch is a trial for making stat for memory_cgroup NUMA-aware.
* dividing per-cpu stat to handle nid information.
* add nid information to page_cgroup
* Because init routine has to call kmalloc, init_early is set to be 0.
This is just a trial at this stage. Any comments are welcome.
Works well on my fake NUMA system.
I think we can add "numastat" based on this.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu at jp.fujitsu.com>
mm/memcontrol.c | 97 ++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 71 insertions(+), 26 deletions(-)
Index: devel-2.6.23-mm1/mm/memcontrol.c
===================================================================
--- devel-2.6.23-mm1.orig/mm/memcontrol.c
+++ devel-2.6.23-mm1/mm/memcontrol.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
@@ -59,12 +60,18 @@ enum mem_cgroup_stat_index {
MEM_CGROUP_STAT_NSTATS,
};
+#ifndef CONFIG_NUMA
struct mem_cgroup_stat_cpu {
- s64 count[MEM_CGROUP_STAT_NSTATS];
+ s64 count[1][MEM_CGROUP_STAT_NSTATS];
} ____cacheline_aligned_in_smp;
+#else
+struct mem_cgroup_stat_cpu {
+ s64 count[MAX_NUMNODES][MEM_CGROUP_STAT_NSTATS];
+} ____cacheline_aligned_in_smp;
+#endif
struct mem_cgroup_stat {
- struct mem_cgroup_stat_cpu cpustat[NR_CPUS];
+ struct mem_cgroup_stat_cpu *cpustat[NR_CPUS];
};
/*
@@ -72,28 +79,28 @@ struct mem_cgroup_stat {
* MUST be called under preempt_disable().
*/
static inline void __mem_cgroup_stat_add(struct mem_cgroup_stat *stat,
- enum mem_cgroup_stat_index idx, int val)
+ enum mem_cgroup_stat_index idx, int nid, int val)
{
int cpu = smp_processor_id();
#ifdef CONFIG_PREEMPT
VM_BUG_ON(preempt_count() == 0);
#endif
- stat->cpustat[cpu].count[idx] += val;
+ stat->cpustat[cpu]->count[nid][idx] += val;
}
static inline void mem_cgroup_stat_inc(struct mem_cgroup_stat *stat,
- enum mem_cgroup_stat_index idx)
+ enum mem_cgroup_stat_index idx, int nid)
{
preempt_disable();
- __mem_cgroup_stat_add(stat, idx, 1);
+ __mem_cgroup_stat_add(stat, idx, nid, 1);
preempt_enable();
}
static inline void mem_cgroup_stat_dec(struct mem_cgroup_stat *stat,
- enum mem_cgroup_stat_index idx)
+ enum mem_cgroup_stat_index idx, int nid)
{
preempt_disable();
- __mem_cgroup_stat_add(stat, idx, -1);
+ __mem_cgroup_stat_add(stat, idx, nid, -1);
preempt_enable();
}
@@ -149,6 +156,7 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct page *page;
struct mem_cgroup *mem_cgroup;
+ int nid;
atomic_t ref_cnt; /* Helpful when pages move b/w */
/* mapped and cached states */
int flags;
@@ -169,21 +177,23 @@ enum {
* We have to modify several values at charge/uncharge..
*/
static inline void
-mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, int charge)
+mem_cgroup_charge_statistics(struct mem_cgroup *mem, int nid,
+ int flags, int charge)
{
int val = (charge)? 1 : -1;
struct mem_cgroup_stat *stat = &mem->stat;
preempt_disable();
if (flags & PCGF_PAGECACHE)
- __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_PAGECACHE, val);
+ __mem_cgroup_stat_add(stat,
+ MEM_CGROUP_STAT_PAGECACHE, nid, val);
else
- __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_RSS, val);
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_RSS, nid, val);
if (flags & PCGF_ACTIVE)
- __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_ACTIVE, val);
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_ACTIVE, nid, val);
else
- __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_INACTIVE, val);
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_INACTIVE, nid, val);
preempt_enable();
}
@@ -315,8 +325,10 @@ static void __mem_cgroup_move_lists(stru
if (moved) {
struct mem_cgroup_stat *stat = &mem->stat;
preempt_disable();
- __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_ACTIVE, moved);
- __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_INACTIVE, -moved);
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_ACTIVE,
+ pc->nid, moved);
+ __mem_cgroup_stat_add(stat, MEM_CGROUP_STAT_INACTIVE,
+ pc->nid, -moved);
preempt_enable();
}
if (active) {
@@ -493,10 +505,10 @@ retry:
bool is_atomic = gfp_mask & GFP_ATOMIC;
if (is_cache)
mem_cgroup_stat_inc(&mem->stat,
- MEM_CGROUP_STAT_FAIL_CACHE);
+ MEM_CGROUP_STAT_FAIL_CACHE, page_to_nid(page));
else
mem_cgroup_stat_inc(&mem->stat,
- MEM_CGROUP_STAT_FAIL_RSS);
+ MEM_CGROUP_STAT_FAIL_RSS, page_to_nid(page));
/*
* We cannot reclaim under GFP_ATOMIC, fail the charge
*/
@@ -537,6 +549,7 @@ noreclaim:
atomic_set(&pc->ref_cnt, 1);
pc->mem_cgroup = mem;
pc->page = page;
+ pc->nid = page_to_nid(page);
if (is_cache)
pc->flags = PCGF_PAGECACHE | PCGF_ACTIVE;
else
@@ -554,7 +567,7 @@ noreclaim:
}
/* Update statistics vector */
- mem_cgroup_charge_statistics(mem, pc->flags, true);
+ mem_cgroup_charge_statistics(mem, pc->nid, pc->flags, true);
spin_lock_irqsave(&mem->lru_lock, flags);
list_add(&pc->lru, &mem->active_list);
@@ -621,7 +634,8 @@ void mem_cgroup_uncharge(struct page_cgr
spin_lock_irqsave(&mem->lru_lock, flags);
list_del_init(&pc->lru);
spin_unlock_irqrestore(&mem->lru_lock, flags);
- mem_cgroup_charge_statistics(mem, pc->flags, false);
+ mem_cgroup_charge_statistics(mem, pc->nid, pc->flags,
+ false);
kfree(pc);
}
}
@@ -657,6 +671,7 @@ void mem_cgroup_end_migration(struct pag
void mem_cgroup_page_migration(struct page *page, struct page *newpage)
{
struct page_cgroup *pc;
+ struct mem_cgroup *mem;
retry:
pc = page_get_page_cgroup(page);
if (!pc)
@@ -664,6 +679,11 @@ retry:
if (clear_page_cgroup(page, pc) != pc)
goto retry;
pc->page = newpage;
+ pc->nid = page_to_nid(newpage);
+ mem = pc->mem_cgroup;
+ mem_cgroup_charge_statistics(mem, page_to_nid(page), pc->flags, false);
+ mem_cgroup_charge_statistics(mem,
+ page_to_nid(newpage), pc->flags, true);
lock_page_cgroup(newpage);
page_assign_page_cgroup(newpage, pc);
unlock_page_cgroup(newpage);
@@ -697,7 +717,8 @@ retry:
css_put(&mem->css);
res_counter_uncharge(&mem->res, PAGE_SIZE);
list_del_init(&pc->lru);
- mem_cgroup_charge_statistics(mem, pc->flags, false);
+ mem_cgroup_charge_statistics(mem, pc->flags, pc->nid,
+ false);
kfree(pc);
} else /* being uncharged ? ...do relax */
break;
@@ -872,13 +893,16 @@ static int mem_control_stat_show(struct
struct mem_cgroup_stat *stat = &mem_cont->stat;
int i;
- for (i = 0; i < ARRAY_SIZE(stat->cpustat[0].count); i++) {
+ for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
unsigned int cpu;
+ int node;
s64 val;
val = 0;
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- val += stat->cpustat[cpu].count[i];
+ for_each_possible_cpu(cpu)
+ for_each_node_state(node, N_POSSIBLE)
+ val += stat->cpustat[cpu]->count[node][i];
+
val *= mem_cgroup_stat_desc[i].unit;
seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg, val);
}
@@ -941,12 +965,14 @@ static struct cgroup_subsys_state *
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
struct mem_cgroup *mem;
+ int cpu;
if (unlikely((cont->parent) == NULL)) {
mem = &init_mem_cgroup;
init_mm.mem_cgroup = mem;
- } else
+ } else {
mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
+ }
if (mem == NULL)
return NULL;
@@ -956,6 +982,17 @@ mem_cgroup_create(struct cgroup_subsys *
INIT_LIST_HEAD(&mem->inactive_list);
spin_lock_init(&mem->lru_lock);
mem->control_type = MEM_CGROUP_TYPE_ALL;
+
+ for_each_possible_cpu(cpu) {
+ int nid = cpu_to_node(cpu);
+ struct mem_cgroup_stat_cpu *mcsc;
+ if (sizeof(*mcsc) < PAGE_SIZE)
+ mcsc = kmalloc_node(sizeof(*mcsc), GFP_KERNEL, nid);
+ else
+ mcsc = vmalloc_node(sizeof(*mcsc), nid);
+ memset(mcsc, 0, sizeof(*mcsc));
+ mem->stat.cpustat[cpu] = mcsc;
+ }
return &mem->css;
}
@@ -969,7 +1006,15 @@ static void mem_cgroup_pre_destroy(struc
static void mem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- kfree(mem_cgroup_from_cont(cont));
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ int cpu;
+ for_each_possible_cpu(cpu) {
+ if (sizeof(struct mem_cgroup_stat_cpu) < PAGE_SIZE)
+ kfree(mem->stat.cpustat[cpu]);
+ else
+ vfree(mem->stat.cpustat[cpu]);
+ }
+ kfree(mem);
}
static int mem_cgroup_populate(struct cgroup_subsys *ss,
@@ -1021,5 +1066,5 @@ struct cgroup_subsys mem_cgroup_subsys =
.destroy = mem_cgroup_destroy,
.populate = mem_cgroup_populate,
.attach = mem_cgroup_move_task,
- .early_init = 1,
+ .early_init = 0,
};
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list