[Devel] [PATCH RHEL7 COMMIT] Revert "ms/mm/proc: add kpageidle file"

Konstantin Khorenko khorenko at virtuozzo.com
Thu Nov 5 05:36:20 PST 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.6
------>
commit ca69ff90172ee578b38a1b4d4e86db6e94472280
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Thu Nov 5 17:36:20 2015 +0400

    Revert "ms/mm/proc: add kpageidle file"
    
    https://jira.sw.ru/browse/PSBM-32460
    
    Mainstream has been updated since we pulled this feature. Need to pull
    it again.
    
    This reverts commit 3be71b71067ac697512d10da567ccf761cb720d4.
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 Documentation/vm/pagemap.txt |  12 +---
 fs/proc/page.c               | 168 -------------------------------------------
 fs/proc/task_mmu.c           |   3 +-
 include/linux/mm.h           |  50 -------------
 include/linux/page-flags.h   |   9 ---
 mm/Kconfig                   |  12 ----
 mm/page_alloc.c              |   4 --
 mm/rmap.c                    |   9 ---
 mm/swap.c                    |   2 -
 9 files changed, 2 insertions(+), 267 deletions(-)

diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index a4fe9b2..e37cff9 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -5,7 +5,7 @@ pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow
 userspace programs to examine the page tables and related information by
 reading files in /proc.
 
-There are five components to pagemap:
+There are four components to pagemap:
 
  * /proc/pid/pagemap.  This file lets a userspace process find out which
    physical frame each virtual page is mapped to.  It contains one 64-bit
@@ -67,16 +67,6 @@ There are five components to pagemap:
    memory cgroup each page is charged to, indexed by PFN. Only available when
    CONFIG_MEMCG is set.
 
- * /proc/kpageidle.  This file implements a bitmap where each bit corresponds
-   to a page, indexed by PFN. When the bit is set, the corresponding page is
-   idle. A page is considered idle if it has not been accessed since it was
-   marked idle. To mark a page idle one should set the bit corresponding to the
-   page by writing to the file. A value written to the file is OR-ed with the
-   current bitmap value. Only user memory pages can be marked idle, for other
-   page types input is silently ignored. Writing to this file beyond max PFN
-   results in the ENXIO error. Only available when CONFIG_IDLE_PAGE_TRACKING is
-   set.
-
 Short descriptions to the page flags:
 
  0. LOCKED
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 9829f49..f198973 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -15,7 +15,6 @@
 
 #define KPMSIZE sizeof(u64)
 #define KPMMASK (KPMSIZE - 1)
-#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
 
 /* /proc/kpagecount - an array exposing page counts
  *
@@ -263,169 +262,6 @@ static const struct file_operations proc_kpagecgroup_operations = {
 };
 #endif /* CONFIG_MEMCG */
 
-#ifdef CONFIG_IDLE_PAGE_TRACKING
-/*
- * Idle page tracking only considers user memory pages, for other types of
- * pages the idle flag is always unset and an attempt to set it is silently
- * ignored.
- *
- * We treat a page as a user memory page if it is on an LRU list, because it is
- * always safe to pass such a page to page_referenced(), which is essential for
- * idle page tracking. With such an indicator of user pages we can skip
- * isolated pages, but since there are not usually many of them, it will hardly
- * affect the overall result.
- *
- * This function tries to get a user memory page by pfn as described above.
- */
-static struct page *kpageidle_get_page(unsigned long pfn)
-{
-	struct page *page;
-	struct zone *zone;
-
-	if (!pfn_valid(pfn))
-		return NULL;
-
-	page = pfn_to_page(pfn);
-	if (!page || !PageLRU(page))
-		return NULL;
-	if (!get_page_unless_zero(page))
-		return NULL;
-
-	zone = page_zone(page);
-	spin_lock_irq(&zone->lru_lock);
-	if (unlikely(!PageLRU(page))) {
-		put_page(page);
-		page = NULL;
-	}
-	spin_unlock_irq(&zone->lru_lock);
-	return page;
-}
-
-/*
- * This function calls page_referenced() to clear the referenced bit for all
- * mappings to a page. Since the latter also clears the page idle flag if the
- * page was referenced, it can be used to update the idle flag of a page.
- */
-static void kpageidle_clear_pte_refs(struct page *page)
-{
-	unsigned long dummy;
-
-	if (page_referenced(page, 0, NULL, &dummy))
-		/*
-		 * We cleared the referenced bit in a mapping to this page. To
-		 * avoid interference with the reclaimer, mark it young so that
-		 * the next call to page_referenced() will also return > 0 (see
-		 * page_referenced_one())
-		 */
-		set_page_young(page);
-}
-
-static ssize_t kpageidle_read(struct file *file, char __user *buf,
-			      size_t count, loff_t *ppos)
-{
-	u64 __user *out = (u64 __user *)buf;
-	struct page *page;
-	unsigned long pfn, end_pfn;
-	ssize_t ret = 0;
-	u64 idle_bitmap = 0;
-	int bit;
-
-	if (*ppos & KPMMASK || count & KPMMASK)
-		return -EINVAL;
-
-	pfn = *ppos * BITS_PER_BYTE;
-	if (pfn >= max_pfn)
-		return 0;
-
-	end_pfn = pfn + count * BITS_PER_BYTE;
-	if (end_pfn > max_pfn)
-		end_pfn = ALIGN(max_pfn, KPMBITS);
-
-	for (; pfn < end_pfn; pfn++) {
-		bit = pfn % KPMBITS;
-		page = kpageidle_get_page(pfn);
-		if (page) {
-			if (page_is_idle(page)) {
-				/*
-				 * The page might have been referenced via a
-				 * pte, in which case it is not idle. Clear
-				 * refs and recheck.
-				 */
-				kpageidle_clear_pte_refs(page);
-				if (page_is_idle(page))
-					idle_bitmap |= 1ULL << bit;
-			}
-			put_page(page);
-		}
-		if (bit == KPMBITS - 1) {
-			if (put_user(idle_bitmap, out)) {
-				ret = -EFAULT;
-				break;
-			}
-			idle_bitmap = 0;
-			out++;
-		}
-	}
-
-	*ppos += (char __user *)out - buf;
-	if (!ret)
-		ret = (char __user *)out - buf;
-	return ret;
-}
-
-static ssize_t kpageidle_write(struct file *file, const char __user *buf,
-			       size_t count, loff_t *ppos)
-{
-	const u64 __user *in = (const u64 __user *)buf;
-	struct page *page;
-	unsigned long pfn, end_pfn;
-	ssize_t ret = 0;
-	u64 idle_bitmap = 0;
-	int bit;
-
-	if (*ppos & KPMMASK || count & KPMMASK)
-		return -EINVAL;
-
-	pfn = *ppos * BITS_PER_BYTE;
-	if (pfn >= max_pfn)
-		return -ENXIO;
-
-	end_pfn = pfn + count * BITS_PER_BYTE;
-	if (end_pfn > max_pfn)
-		end_pfn = ALIGN(max_pfn, KPMBITS);
-
-	for (; pfn < end_pfn; pfn++) {
-		bit = pfn % KPMBITS;
-		if (bit == 0) {
-			if (get_user(idle_bitmap, in)) {
-				ret = -EFAULT;
-				break;
-			}
-			in++;
-		}
-		if (idle_bitmap >> bit & 1) {
-			page = kpageidle_get_page(pfn);
-			if (page) {
-				kpageidle_clear_pte_refs(page);
-				set_page_idle(page);
-				put_page(page);
-			}
-		}
-	}
-
-	*ppos += (const char __user *)in - buf;
-	if (!ret)
-		ret = (const char __user *)in - buf;
-	return ret;
-}
-
-static const struct file_operations proc_kpageidle_operations = {
-	.llseek = mem_lseek,
-	.read = kpageidle_read,
-	.write = kpageidle_write,
-};
-#endif /* CONFIG_IDLE_PAGE_TRACKING */
-
 static int __init proc_page_init(void)
 {
 	proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
@@ -433,10 +269,6 @@ static int __init proc_page_init(void)
 #ifdef CONFIG_MEMCG
 	proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations);
 #endif
-#ifdef CONFIG_IDLE_PAGE_TRACKING
-	proc_create("kpageidle", S_IRUSR | S_IWUSR, NULL,
-		    &proc_kpageidle_operations);
-#endif
 	return 0;
 }
 module_init(proc_page_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 45179ba..d0e311f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -480,7 +480,7 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
 
 	mss->resident += ptent_size;
 	/* Accumulate the size in pages that have been accessed. */
-	if (pte_young(ptent) || page_is_young(page) || PageReferenced(page))
+	if (pte_young(ptent) || PageReferenced(page))
 		mss->referenced += ptent_size;
 	mapcount = page_mapcount(page);
 	if (mapcount >= 2) {
@@ -776,7 +776,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 
 		/* Clear accessed and referenced bits. */
 		ptep_test_and_clear_young(vma, addr, pte);
-		clear_page_young(page);
 		ClearPageReferenced(page);
 	}
 	pte_unmap_unlock(pte - 1, ptl);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 163d3d8..0036352 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2106,55 +2106,5 @@ void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
-#ifdef CONFIG_IDLE_PAGE_TRACKING
-static inline bool page_is_young(struct page *page)
-{
-	return PageYoung(page);
-}
-
-static inline void set_page_young(struct page *page)
-{
-	SetPageYoung(page);
-}
-
-static inline void clear_page_young(struct page *page)
-{
-	ClearPageYoung(page);
-}
-
-static inline bool page_is_idle(struct page *page)
-{
-	return PageIdle(page);
-}
-
-static inline void set_page_idle(struct page *page)
-{
-	SetPageIdle(page);
-}
-
-static inline void clear_page_idle(struct page *page)
-{
-	ClearPageIdle(page);
-}
-#else /* !CONFIG_IDLE_PAGE_TRACKING */
-static inline bool page_is_young(struct page *page)
-{
-	return false;
-}
-
-static inline void clear_page_young(struct page *page)
-{
-}
-
-static inline bool page_is_idle(struct page *page)
-{
-	return false;
-}
-
-static inline void clear_page_idle(struct page *page)
-{
-}
-#endif /* CONFIG_IDLE_PAGE_TRACKING */
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d532eab..b529ed7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,10 +109,6 @@ enum pageflags {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
 #endif
-#ifdef CONFIG_IDLE_PAGE_TRACKING
-	PG_young,
-	PG_idle,
-#endif
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -279,11 +275,6 @@ PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
 
-#ifdef CONFIG_IDLE_PAGE_TRACKING
-PAGEFLAG(Young, young)
-PAGEFLAG(Idle, idle)
-#endif
-
 u64 stable_page_flags(struct page *page);
 
 static inline int PageUptodate(struct page *page)
diff --git a/mm/Kconfig b/mm/Kconfig
index e26abe9..41d8ec7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -572,15 +572,3 @@ config TSWAP
 	  stores reclaimed pages in memory without any modifications. It is
 	  only worth enabling if used along with memory cgroups in order to
 	  cache pages which were reclaimed on local pressure.
-
-config IDLE_PAGE_TRACKING
-	bool "Enable idle page tracking"
-	depends on 64BIT
-	select PROC_PAGE_MONITOR
-	help
-	  This feature allows to estimate the amount of user pages that have
-	  not been touched during a given period of time. This information can
-	  be useful to tune memory cgroup limits and/or for job placement
-	  within a compute cluster.
-
-	  See Documentation/vm/pagemap.txt for more details.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f70c5f4..f271a75 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6471,10 +6471,6 @@ static const struct trace_print_flags pageflag_names[] = {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	{1UL << PG_compound_lock,	"compound_lock"	},
 #endif
-#ifdef CONFIG_IDLE_PAGE_TRACKING
-	{1UL << PG_young,		"young"		},
-	{1UL << PG_idle,		"idle"		},
-#endif
 };
 
 static void dump_page_flags(unsigned long flags)
diff --git a/mm/rmap.c b/mm/rmap.c
index c92f05c..d68b071 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -723,15 +723,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 		pte_unmap_unlock(pte, ptl);
 	}
 
-	if (referenced && page_is_idle(page))
-		clear_page_idle(page);
-
-	if (page_is_young(page)) {
-		clear_page_young(page);
-		if (!referenced)
-			referenced++;
-	}
-
 	(*mapcount)--;
 
 	if (referenced)
diff --git a/mm/swap.c b/mm/swap.c
index eb3add7..89a690a 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -612,8 +612,6 @@ void mark_page_accessed(struct page *page)
 	} else if (!PageReferenced(page)) {
 		SetPageReferenced(page);
 	}
-	if (page_is_idle(page))
-		clear_page_idle(page);
 }
 EXPORT_SYMBOL(mark_page_accessed);
 



More information about the Devel mailing list