[Devel] [PATCH RHEL7 COMMIT] ms/mm/rmap: extend rmap_walk_xxx() to cope with different cases

Konstantin Khorenko khorenko at virtuozzo.com
Tue Dec 8 06:15:56 PST 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.15
------>
commit b0b85ef542d082b5eaf67e6c666fb2e56dd3b099
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Tue Dec 8 18:15:56 2015 +0400

    ms/mm/rmap: extend rmap_walk_xxx() to cope with different cases
    
    Patchset description:
    
    rmap_walk() present in RH7 requires the caller to either hold mmap_sem
    or pin the page's anon_vma. page_idle_clear_pte_refs does neither. As a
    result, it might end up trying to lock/unlock anon_vma which has already
    been freed and possibly reallocated. This won't do any good.
    
    Let's pull the new version of rmap_walk() from upstream, which allows to
    specify a custom anon_vma lock function and use it in page_idle code to
    avoid this issue. This patch puts page_idle in sync with upstream.
    
    I hope this will fix:
    
    https://jira.sw.ru/browse/PSBM-42015
    
    Joonsoo Kim (3):
      mm/rmap: factor lock function out of rmap_walk_anon()
      mm/rmap: make rmap_walk to get the rmap_walk_control argument
      mm/rmap: extend rmap_walk_xxx() to cope with different cases
    
    Vladimir Davydov (1):
      mm: page_idle: look up page anon_vma carefully when checking references
    
    ============================
    This patch description:
    
    From: Joonsoo Kim <iamjoonsoo.kim at lge.com>
    
    There are a lot of common parts in traversing functions, but there are
    also a little of uncommon parts in it.  By assigning proper function
    pointer on each rmap_walker_control, we can handle these difference
    correctly.
    
    Following are differences we should handle.
    
    1. difference of lock function in anon mapping case
    2. nonlinear handling in file mapping case
    3. prechecked condition:
    	checking memcg in page_referenced(),
    	checking VM_SHARE in page_mkclean()
    	checking temporary vma in try_to_unmap()
    4. exit condition:
    	checking page_mapped() in try_to_unmap()
    
    So, in this patch, I introduce 4 function pointers to handle above
    differences.
    
    Signed-off-by: Joonsoo Kim <iamjoonsoo.kim at lge.com>
    Cc: Naoya Horiguchi <n-horiguchi at ah.jp.nec.com>
    Cc: Mel Gorman <mgorman at suse.de>
    Cc: Hugh Dickins <hughd at google.com>
    Cc: Rik van Riel <riel at redhat.com>
    Cc: Ingo Molnar <mingo at kernel.org>
    Cc: Hillf Danton <dhillf at gmail.com>
    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
    (cherry picked from commit 0dd1c7bbce8d1d142bb25aefaa50262dfd77cb78)
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
---
 include/linux/rmap.h | 15 +++++++++++++++
 mm/ksm.c             |  7 +++++++
 mm/rmap.c            | 37 +++++++++++++++++++++++++++++--------
 3 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 083b1df..2491eae 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -237,10 +237,25 @@ extern struct anon_vma *page_lock_anon_vma_read(struct page *page);
 extern void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
+/*
+ * rmap_walk_control: To control rmap traversing for specific needs
+ *
+ * arg: passed to rmap_one() and invalid_vma()
+ * rmap_one: executed on each vma where page is mapped
+ * done: for checking traversing termination condition
+ * file_nonlinear: for handling file nonlinear mapping
+ * anon_lock: for getting anon_lock by optimized way rather than default
+ * invalid_vma: for skipping uninterested vma
+ */
 struct rmap_walk_control {
 	void *arg;
 	int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
 					unsigned long addr, void *arg);
+	int (*done)(struct page *page);
+	int (*file_nonlinear)(struct page *, struct address_space *,
+					struct vm_area_struct *vma);
+	struct anon_vma *(*anon_lock)(struct page *page);
+	bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
 };
 
 /*
diff --git a/mm/ksm.c b/mm/ksm.c
index ff2d5a3..e6bedcc 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2038,12 +2038,19 @@ again:
 			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
 				continue;
 
+			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+				continue;
+
 			ret = rwc->rmap_one(page, vma,
 					rmap_item->address, rwc->arg);
 			if (ret != SWAP_AGAIN) {
 				anon_vma_unlock_read(anon_vma);
 				goto out;
 			}
+			if (rwc->done && rwc->done(page)) {
+				anon_vma_unlock_read(anon_vma);
+				goto out;
+			}
 		}
 		anon_vma_unlock_read(anon_vma);
 	}
diff --git a/mm/rmap.c b/mm/rmap.c
index 362c5e8..3eda1ed 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1749,10 +1749,14 @@ void __put_anon_vma(struct anon_vma *anon_vma)
 }
 
 #ifdef CONFIG_MIGRATION
-static struct anon_vma *rmap_walk_anon_lock(struct page *page)
+static struct anon_vma *rmap_walk_anon_lock(struct page *page,
+					struct rmap_walk_control *rwc)
 {
 	struct anon_vma *anon_vma;
 
+	if (rwc->anon_lock)
+		return rwc->anon_lock(page);
+
 	/*
 	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
 	 * because that depends on page_mapped(); but not all its usages
@@ -1778,16 +1782,22 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
 	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
-	anon_vma = rmap_walk_anon_lock(page);
+	anon_vma = rmap_walk_anon_lock(page, rwc);
 	if (!anon_vma)
 		return ret;
 
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
 		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
+
+		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+			continue;
+
 		ret = rwc->rmap_one(page, vma, address, rwc->arg);
 		if (ret != SWAP_AGAIN)
 			break;
+		if (rwc->done && rwc->done(page))
+			break;
 	}
 	anon_vma_unlock_read(anon_vma);
 	return ret;
@@ -1805,15 +1815,26 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 	mutex_lock(&mapping->i_mmap_mutex);
 	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
+
+		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+			continue;
+
 		ret = rwc->rmap_one(page, vma, address, rwc->arg);
 		if (ret != SWAP_AGAIN)
-			break;
+			goto done;
+		if (rwc->done && rwc->done(page))
+			goto done;
 	}
-	/*
-	 * No nonlinear handling: being always shared, nonlinear vmas
-	 * never contain migration ptes.  Decide what to do about this
-	 * limitation to linear when we need rmap_walk() on nonlinear.
-	 */
+
+	if (!rwc->file_nonlinear)
+		goto done;
+
+	if (list_empty(&mapping->i_mmap_nonlinear))
+		goto done;
+
+	ret = rwc->file_nonlinear(page, mapping, vma);
+
+done:
 	mutex_unlock(&mapping->i_mmap_mutex);
 	return ret;
 }


More information about the Devel mailing list