[Devel] [PATCH RHEL7 COMMIT] arch: x86: charge page tables to memcg/kmem

Vladimir Davydov vdavydov at odin.com
Mon Sep 7 03:15:48 PDT 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.6.7
------>
commit a9a6c8559647338a7eec15fa5bde09c2bcb9fa61
Author: Vladimir Davydov <vdavydov at parallels.com>
Date:   Mon Sep 7 14:15:48 2015 +0400

    arch: x86: charge page tables to memcg/kmem
    
    Note the implementation is as simple as possible - I only insert
    memcg_kmem_newpage_charge and memcg_kmem_uncharge_pages to
    pte/pmd/pud/pgd alloc/free paths. There is no precharging!
    
    If our performance team reports a noticeable performance degradation due
    to this, I'll implement batched charge/uncharge for memcg/kmem resource
    counter to overcome it, but not until then.
    
    https://jira.sw.ru/browse/PSBM-33840
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    Reviewed-by: Kirill Tkhai <ktkhai at odin.com>
---
 arch/x86/include/asm/pgalloc.h | 13 +++++++------
 arch/x86/mm/pgtable.c          | 24 +++++++++++++++---------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index c4412e972bbd..758a6a7c527a 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -48,7 +48,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
 	pgtable_page_dtor(pte);
-	__free_page(pte);
+	__free_kmem_pages(pte, 0);
 }
 
 extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
@@ -81,11 +81,11 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *page;
-	page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	page = alloc_kmem_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
 	if (!page)
 		return NULL;
 	if (!pgtable_pmd_page_ctor(page)) {
-		__free_pages(page, 0);
+		__free_kmem_pages(page, 0);
 		return NULL;
 	}
 	return (pmd_t *)page_address(page);
@@ -95,7 +95,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
-	free_page((unsigned long)pmd);
+	free_kmem_pages((unsigned long)pmd, 0);
 }
 
 extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
@@ -125,13 +125,14 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pud_t *)__get_free_kmem_pages(GFP_KERNEL|__GFP_REPEAT|
+					      __GFP_ZERO, 0);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
 	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
+	free_kmem_pages((unsigned long)pud, 0);
 }
 
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d5aa594e6332..3715dda0c41b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,5 +1,6 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/memcontrol.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
@@ -24,11 +25,11 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
-	pte = alloc_pages(__userpte_alloc_gfp, 0);
+	pte = alloc_kmem_pages(__userpte_alloc_gfp, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
-		__free_page(pte);
+		__free_kmem_pages(pte, 0);
 		return NULL;
 	}
 	return pte;
@@ -55,6 +56,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
+	memcg_kmem_uncharge_pages(pte, 0);
 	tlb_remove_page(tlb, pte);
 }
 
@@ -71,14 +73,18 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 	tlb->need_flush_all = 1;
 #endif
 	pgtable_pmd_page_dtor(page);
+	memcg_kmem_uncharge_pages(page, 0);
 	tlb_remove_page(tlb, page);
 }
 
 #if PAGETABLE_LEVELS > 3
 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
+	struct page *page = virt_to_page(pud);
+
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
-	tlb_remove_page(tlb, virt_to_page(pud));
+	memcg_kmem_uncharge_pages(page, 0);
+	tlb_remove_page(tlb, page);
 }
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
@@ -197,7 +203,7 @@ static void free_pmds(pmd_t *pmds[])
 	for(i = 0; i < PREALLOCATED_PMDS; i++)
 		if (pmds[i]) {
 			pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
-			free_page((unsigned long)pmds[i]);
+			free_kmem_pages((unsigned long)pmds[i], 0);
 		}
 }
 
@@ -207,11 +213,11 @@ static int preallocate_pmds(pmd_t *pmds[])
 	bool failed = false;
 
 	for(i = 0; i < PREALLOCATED_PMDS; i++) {
-		pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
+		pmd_t *pmd = (pmd_t *)__get_free_kmem_pages(PGALLOC_GFP, 0);
 		if (!pmd)
 			failed = true;
 		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
-			free_page((unsigned long)pmd);
+			free_kmem_pages((unsigned long)pmd, 0);
 			pmd = NULL;
 			failed = true;
 		}
@@ -278,7 +284,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	pgd_t *pgd;
 	pmd_t *pmds[PREALLOCATED_PMDS];
 
-	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+	pgd = (pgd_t *)__get_free_kmem_pages(PGALLOC_GFP, 0);
 
 	if (pgd == NULL)
 		goto out;
@@ -308,7 +314,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 out_free_pmds:
 	free_pmds(pmds);
 out_free_pgd:
-	free_page((unsigned long)pgd);
+	free_kmem_pages((unsigned long)pgd, 0);
 out:
 	return NULL;
 }
@@ -318,7 +324,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	pgd_mop_up_pmds(mm, pgd);
 	pgd_dtor(pgd);
 	paravirt_pgd_free(mm, pgd);
-	free_page((unsigned long)pgd);
+	free_kmem_pages((unsigned long)pgd, 0);
 }
 
 /*



More information about the Devel mailing list