[Devel] [PATCH rh7 18/21] ms/mm: memcontrol: use page lists for uncharge batching
Andrey Ryabinin
aryabinin at virtuozzo.com
Tue Nov 1 02:21:37 PDT 2016
From: Johannes Weiner <hannes at cmpxchg.org>
Pages are now uncharged at release time, and all sources of batched
uncharges operate on lists of pages. Directly use those lists, and
get rid of the per-task batching state.
This also batches statistics accounting, in addition to the res
counter charges, to reduce IRQ-disabling and re-enabling.
Signed-off-by: Johannes Weiner <hannes at cmpxchg.org>
Acked-by: Michal Hocko <mhocko at suse.cz>
Cc: Hugh Dickins <hughd at google.com>
Cc: Tejun Heo <tj at kernel.org>
Cc: Vladimir Davydov <vdavydov at parallels.com>
Cc: Naoya Horiguchi <n-horiguchi at ah.jp.nec.com>
Cc: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
https://jira.sw.ru/browse/PSBM-51558
(cherry picked from commit 747db954cab64c6b7a95b121b517165f34751898)
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/linux/memcontrol.h | 12 +--
include/linux/sched.h | 6 --
kernel/fork.c | 4 -
mm/memcontrol.c | 206 ++++++++++++++++++++++++---------------------
mm/swap.c | 6 +-
mm/vmscan.c | 12 ++-
6 files changed, 117 insertions(+), 129 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 08f165b..59f477f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,12 +51,8 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare);
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
-
void mem_cgroup_uncharge(struct page *page);
-
-/* Batched uncharging */
-void mem_cgroup_uncharge_start(void);
-void mem_cgroup_uncharge_end(void);
+void mem_cgroup_uncharge_list(struct list_head *page_list);
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare);
@@ -244,11 +240,7 @@ static inline void mem_cgroup_uncharge(struct page *page)
{
}
-static inline void mem_cgroup_uncharge_start(void)
-{
-}
-
-static inline void mem_cgroup_uncharge_end(void)
+static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ab5cfed..33b9cea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1563,12 +1563,6 @@ struct task_struct {
struct ve_struct *task_ve;
#endif
#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
- struct memcg_batch_info {
- int do_batch; /* incremented when batch uncharge started */
- struct mem_cgroup *memcg; /* target memcg of uncharge */
- unsigned long nr_pages; /* uncharged usage */
- unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
- } memcg_batch;
unsigned int memcg_kmem_skip_account;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/kernel/fork.c b/kernel/fork.c
index 1e7b897..69fb02d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1368,10 +1368,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
-#ifdef CONFIG_MEMCG
- p->memcg_batch.do_batch = 0;
- p->memcg_batch.memcg = NULL;
-#endif
#ifdef CONFIG_BCACHE
p->sequential_io = 0;
p->sequential_io_avg = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5efb5d0..11c06d3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3637,53 +3637,6 @@ out:
return ret;
}
-/*
- * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
- * In that cases, pages are freed continuously and we can expect pages
- * are in the same memcg. All these calls itself limits the number of
- * pages freed at once, then uncharge_start/end() is called properly.
- * This may be called prural(2) times in a context,
- */
-
-void mem_cgroup_uncharge_start(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- current->memcg_batch.do_batch++;
- /* We can do nest. */
- if (current->memcg_batch.do_batch == 1) {
- current->memcg_batch.memcg = NULL;
- current->memcg_batch.nr_pages = 0;
- current->memcg_batch.memsw_nr_pages = 0;
- }
- local_irq_restore(flags);
-}
-
-void mem_cgroup_uncharge_end(void)
-{
- struct memcg_batch_info *batch = ¤t->memcg_batch;
- unsigned long flags;
-
- local_irq_save(flags);
- VM_BUG_ON(!batch->do_batch);
- if (--batch->do_batch) /* If stacked, do nothing */
- goto out;
- /*
- * This "batch->memcg" is valid without any css_get/put etc...
- * bacause we hide charges behind us.
- */
- if (batch->nr_pages)
- res_counter_uncharge(&batch->memcg->res,
- batch->nr_pages * PAGE_SIZE);
- if (batch->memsw_nr_pages)
- res_counter_uncharge(&batch->memcg->memsw,
- batch->memsw_nr_pages * PAGE_SIZE);
- memcg_oom_recover(batch->memcg);
-out:
- local_irq_restore(flags);
-}
-
#ifdef CONFIG_MEMCG_SWAP
static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
bool charge)
@@ -7010,6 +6963,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
cancel_charge(memcg, nr_pages);
}
+static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+ unsigned long nr_mem, unsigned long nr_memsw,
+ unsigned long nr_anon, unsigned long nr_file,
+ unsigned long nr_huge, struct page *dummy_page)
+{
+ unsigned long flags;
+
+ if (nr_mem)
+ res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
+ if (nr_memsw)
+ res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
+
+ memcg_oom_recover(memcg);
+
+ local_irq_save(flags);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
+ __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
+ __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ memcg_check_events(memcg, dummy_page);
+ local_irq_restore(flags);
+}
+
+static void uncharge_list(struct list_head *page_list)
+{
+ struct mem_cgroup *memcg = NULL;
+ unsigned long nr_memsw = 0;
+ unsigned long nr_anon = 0;
+ unsigned long nr_file = 0;
+ unsigned long nr_huge = 0;
+ unsigned long pgpgout = 0;
+ unsigned long nr_mem = 0;
+ struct list_head *next;
+ struct page *page;
+
+ next = page_list->next;
+ do {
+ unsigned int nr_pages = 1;
+ struct page_cgroup *pc;
+
+ page = list_entry(next, struct page, lru);
+ next = page->lru.next;
+
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(page_count(page), page);
+
+ pc = lookup_page_cgroup(page);
+ if (!PageCgroupUsed(pc))
+ continue;
+
+ /*
+ * Nobody should be changing or seriously looking at
+ * pc->mem_cgroup and pc->flags at this point, we have
+ * fully exclusive access to the page.
+ */
+
+ if (memcg != pc->mem_cgroup) {
+ if (memcg) {
+ uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+ nr_anon, nr_file, nr_huge, page);
+ pgpgout = nr_mem = nr_memsw = 0;
+ nr_anon = nr_file = nr_huge = 0;
+ }
+ memcg = pc->mem_cgroup;
+ }
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ nr_huge += nr_pages;
+ }
+
+ if (PageAnon(page))
+ nr_anon += nr_pages;
+ else
+ nr_file += nr_pages;
+
+ if (pc->flags & PCG_MEM)
+ nr_mem += nr_pages;
+ if (pc->flags & PCG_MEMSW)
+ nr_memsw += nr_pages;
+ pc->flags = 0;
+
+ pgpgout++;
+ } while (next != page_list);
+
+ if (memcg)
+ uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
+ nr_anon, nr_file, nr_huge, page);
+}
+
/**
* mem_cgroup_uncharge - uncharge a page
* @page: page to uncharge
@@ -7019,67 +7064,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
*/
void mem_cgroup_uncharge(struct page *page)
{
- struct memcg_batch_info *batch;
- unsigned int nr_pages = 1;
- struct mem_cgroup *memcg;
struct page_cgroup *pc;
- unsigned long pc_flags;
- unsigned long flags;
-
- VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
if (mem_cgroup_disabled())
return;
+ /* Don't touch page->lru of any random page, pre-check: */
pc = lookup_page_cgroup(page);
-
- /* Every final put_page() ends up here */
if (!PageCgroupUsed(pc))
return;
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- }
- /*
- * Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point, we have fully
- * exclusive access to the page.
- */
- memcg = pc->mem_cgroup;
- pc_flags = pc->flags;
- pc->flags = 0;
-
- local_irq_save(flags);
+ INIT_LIST_HEAD(&page->lru);
+ uncharge_list(&page->lru);
+}
- if (nr_pages > 1)
- goto direct;
- if (unlikely(test_thread_flag(TIF_MEMDIE)))
- goto direct;
- batch = ¤t->memcg_batch;
- if (!batch->do_batch)
- goto direct;
- if (batch->memcg && batch->memcg != memcg)
- goto direct;
- if (!batch->memcg)
- batch->memcg = memcg;
- if (pc_flags & PCG_MEM)
- batch->nr_pages++;
- if (pc_flags & PCG_MEMSW)
- batch->memsw_nr_pages++;
- goto out;
-direct:
- if (pc_flags & PCG_MEM)
- res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
- if (pc_flags & PCG_MEMSW)
- res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
- memcg_oom_recover(memcg);
-out:
- mem_cgroup_charge_statistics(memcg, page, -nr_pages);
- memcg_check_events(memcg, page);
+/**
+ * mem_cgroup_uncharge_list - uncharge a list of page
+ * @page_list: list of pages to uncharge
+ *
+ * Uncharge a list of pages previously charged with
+ * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
+ */
+void mem_cgroup_uncharge_list(struct list_head *page_list)
+{
+ if (mem_cgroup_disabled())
+ return;
- local_irq_restore(flags);
+ if (!list_empty(page_list))
+ uncharge_list(page_list);
}
/**
diff --git a/mm/swap.c b/mm/swap.c
index 2a334e2..38dc666 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -891,8 +891,6 @@ void release_pages(struct page **pages, int nr, bool cold)
struct lruvec *lruvec;
unsigned long uninitialized_var(flags);
- mem_cgroup_uncharge_start();
-
for (i = 0; i < nr; i++) {
struct page *page = pages[i];
@@ -924,7 +922,6 @@ void release_pages(struct page **pages, int nr, bool cold)
__ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_off_lru(page));
}
- mem_cgroup_uncharge(page);
/* Clear Active bit in case of parallel mark_page_accessed */
ClearPageActive(page);
@@ -934,8 +931,7 @@ void release_pages(struct page **pages, int nr, bool cold)
if (zone)
spin_unlock_irqrestore(&zone->lru_lock, flags);
- mem_cgroup_uncharge_end();
-
+ mem_cgroup_uncharge_list(&pages_to_free);
free_hot_cold_page_list(&pages_to_free, cold);
}
EXPORT_SYMBOL(release_pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 356e411..df57ff0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -844,7 +844,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
cond_resched();
- mem_cgroup_uncharge_start();
while (!list_empty(page_list)) {
struct address_space *mapping;
struct page *page;
@@ -1122,7 +1121,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
__clear_page_locked(page);
free_it:
- mem_cgroup_uncharge(page);
nr_reclaimed++;
/*
@@ -1152,8 +1150,8 @@ keep:
list_add(&page->lru, &ret_pages);
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
}
- mem_cgroup_uncharge_end();
+ mem_cgroup_uncharge_list(&free_pages);
free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list);
@@ -1474,10 +1472,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
- mem_cgroup_uncharge(page);
-
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1572,6 +1569,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&page_list);
free_hot_cold_page_list(&page_list, true);
/*
@@ -1683,10 +1681,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
__ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru);
- mem_cgroup_uncharge(page);
-
if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock);
} else
@@ -1796,6 +1793,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+ mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true);
KSTAT_PERF_LEAVE(refill_inact);
--
2.7.3
More information about the Devel
mailing list