[Devel] [PATCH RHEL7 COMMIT] mm: charge/uncharge kmemcg from generic page allocator paths

Konstantin Khorenko khorenko at virtuozzo.com
Tue May 31 02:14:09 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.9
------>
commit 1265d34743919f73ae85326474b67cc192b58b1b
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Tue May 31 13:14:09 2016 +0400

    mm: charge/uncharge kmemcg from generic page allocator paths
    
    Currently, to charge a non-slab allocation to kmemcg one has to use
    alloc_kmem_pages helper with __GFP_ACCOUNT flag. A page allocated with
    this helper should finally be freed using free_kmem_pages, otherwise it
    won't be uncharged.
    
    This API suits its current users fine, but it turns out to be impossible
    to use along with page reference counting, i.e. when an allocation is
    supposed to be freed with put_page, as it is the case with pipe or unix
    socket buffers.
    
    To overcome this limitation, this patch moves charging/uncharging to
    generic page allocator paths, i.e. to __alloc_pages_nodemask and
    free_pages_prepare, and zaps alloc/free_kmem_pages helpers. This way,
    one can use any of the available page allocation functions to get the
    allocated page charged to kmemcg - it's enough to pass __GFP_ACCOUNT,
    just like in case of kmalloc and friends. A charged page will be
    automatically uncharged on free.
    
    To make it possible, we need to mark pages charged to kmemcg somehow. To
    avoid introducing a new page flag, we make use of page->_mapcount for
    marking such pages. Since pages charged to kmemcg are not supposed to be
    mapped to userspace, it should work just fine. There are other (ab)users
    of page->_mapcount - buddy and balloon pages - but we don't conflict
    with them.
    
    In case kmemcg is compiled out or not used at runtime, this patch
    introduces no overhead to generic page allocator paths. If kmemcg is
    used, it will be plus one gfp flags check on alloc and plus one
    page->_mapcount check on free, which shouldn't hurt performance, because
    the data accessed are hot.
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 include/linux/memcontrol.h |  3 ++-
 include/linux/page-flags.h | 19 +++++++++++++++++++
 mm/memcontrol.c            |  4 ++++
 mm/page_alloc.c            |  4 ++++
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d26adf1..48bf2ca 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -23,6 +23,7 @@
 #include <linux/vm_event_item.h>
 #include <linux/hardirq.h>
 #include <linux/jump_label.h>
+#include <linux/page-flags.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -617,7 +618,7 @@ memcg_kmem_newpage_charge(struct page *page, gfp_t gfp, int order)
 static inline void
 memcg_kmem_uncharge_pages(struct page *page, int order)
 {
-	if (memcg_kmem_enabled())
+	if (memcg_kmem_enabled() && PageKmemcg(page))
 		__memcg_kmem_uncharge_pages(page, order);
 }
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index cdf83ec..d15d20d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -523,6 +523,25 @@ static inline void __ClearPageBalloon(struct page *page)
 	atomic_set(&page->_mapcount, -1);
 }
 
+#define PAGE_KMEMCG_MAPCOUNT_VALUE (-256)
+
+static inline int PageKmemcg(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == PAGE_KMEMCG_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageKmemcg(struct page *page)
+{
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
+	atomic_set(&page->_mapcount, PAGE_KMEMCG_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageKmemcg(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageKmemcg(page), page);
+	atomic_set(&page->_mapcount, -1);
+}
+
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8eb4807..1c3fbb2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3555,6 +3555,8 @@ __memcg_kmem_newpage_charge(struct page *page, gfp_t gfp, int order)
 	SetPageCgroupUsed(pc);
 	unlock_page_cgroup(pc);
 
+	__SetPageKmemcg(page);
+
 	return true;
 }
 
@@ -3588,6 +3590,8 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 
 	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
 	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+
+	__ClearPageKmemcg(page);
 }
 
 struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f02d80..2b04f36 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -748,6 +748,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
 
 	if (PageAnon(page))
 		page->mapping = NULL;
+	memcg_kmem_uncharge_pages(page, order);
 	for (i = 0; i < (1 << order); i++) {
 		bad += free_pages_check(page + i);
 		if (static_key_false(&zero_free_pages))
@@ -2804,6 +2805,9 @@ out:
 	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
 		goto retry_cpuset;
 
+	if (page && !memcg_kmem_newpage_charge(page, gfp_mask, order))
+		__free_pages(page, order);
+
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);


More information about the Devel mailing list