[Devel] [PATCH RHEL7 COMMIT] ms/huge pagecache: extend mremap pmd rmap lockout to files
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jan 10 14:00:28 MSK 2019
The commit is pushed to "branch-rh7-3.10.0-957.1.3.vz7.83.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.1.3.vz7.83.4
------>
commit 4fd2d9451ae7d60ce3f7c3d39484b143d428702e
Author: Hugh Dickins <hughd at google.com>
Date: Thu May 19 17:12:57 2016 -0700
ms/huge pagecache: extend mremap pmd rmap lockout to files
Whatever huge pagecache implementation we go with, file rmap locking
must be added to anon rmap locking, when mremap's move_page_tables()
finds a pmd_trans_huge pmd entry: a simple change, let's do it now.
Factor out take_rmap_locks() and drop_rmap_locks() to handle the locking
for make move_ptes() and move_page_tables(), and delete the
VM_BUG_ON_VMA which rejected vm_file and required anon_vma.
Signed-off-by: Hugh Dickins <hughd at google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov at linux.intel.com>
Cc: Andrea Arcangeli <aarcange at redhat.com>
Cc: Andres Lagar-Cavilla <andreslc at google.com>
Cc: Yang Shi <yang.shi at linaro.org>
Cc: Ning Qu <quning at gmail.com>
Cc: Mel Gorman <mgorman at techsingularity.net>
Cc: Andres Lagar-Cavilla <andreslc at google.com>
Cc: Konstantin Khlebnikov <koct9i at gmail.com>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
(cherry picked from commit 1d069b7dd56728a0eb6acb138dce0d37600dee00)
Signed-off-by: Kirill Tkhai <ktkhai at virtuozzo.com>
ms commit 1d069b7dd567
Porting w/o removing VM_BUG_ON_VMA() check -- we need it,
since hugepages are for anon mapping only in our kernel.
Patchset description:
Patch "mm: speed up mremap by 20x on large regions" introduces
optimization: when a moved region has source and destination addresses
and size equal to multiple of PMD_SIZE; PTEs are not really copyed.
Instead of this, new PMD pointer is changed to point to old PTEs, while
old PMD is cleared.
This may be useful, when CRIU remaps large memory areas on restore (but
really, alignment to PMD_SIZE is not very often, though possible).
---
mm/mremap.c | 40 ++++++++++++++++++++++------------------
1 file changed, 22 insertions(+), 18 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index 4e705fe3c747..9e65d144853f 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -74,6 +74,22 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
return pmd;
}
+static void take_rmap_locks(struct vm_area_struct *vma)
+{
+ if (vma->vm_file)
+ mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
+ if (vma->anon_vma)
+ anon_vma_lock_write(vma->anon_vma);
+}
+
+static void drop_rmap_locks(struct vm_area_struct *vma)
+{
+ if (vma->anon_vma)
+ anon_vma_unlock_write(vma->anon_vma);
+ if (vma->vm_file)
+ mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+}
+
static pte_t move_soft_dirty_pte(pte_t pte)
{
/*
@@ -94,8 +110,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
{
- struct address_space *mapping = NULL;
- struct anon_vma *anon_vma = NULL;
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
@@ -120,16 +134,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
* serialize access to individual ptes, but only rmap traversal
* order guarantees that we won't miss both the old and new ptes).
*/
- if (need_rmap_locks) {
- if (vma->vm_file) {
- mapping = vma->vm_file->f_mapping;
- mutex_lock(&mapping->i_mmap_mutex);
- }
- if (vma->anon_vma) {
- anon_vma = vma->anon_vma;
- anon_vma_lock_write(anon_vma);
- }
- }
+ if (need_rmap_locks)
+ take_rmap_locks(vma);
/*
* We don't have to worry about the ordering of src and dst
@@ -172,10 +178,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
else
*need_flush = true;
pte_unmap_unlock(old_pte - 1, old_ptl);
- if (anon_vma)
- anon_vma_unlock_write(anon_vma);
- if (mapping)
- mutex_unlock(&mapping->i_mmap_mutex);
+ if (need_rmap_locks)
+ drop_rmap_locks(vma);
}
#define LATENCY_LIMIT (64 * PAGE_SIZE)
@@ -217,13 +221,13 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
VM_BUG_ON(vma->vm_file || !vma->anon_vma);
/* See comment in move_ptes() */
if (need_rmap_locks)
- anon_vma_lock_write(vma->anon_vma);
+ take_rmap_locks(vma);
err = move_huge_pmd(vma, new_vma, old_addr,
new_addr, old_end,
old_pmd, new_pmd,
&need_flush);
if (need_rmap_locks)
- anon_vma_unlock_write(vma->anon_vma);
+ drop_rmap_locks(vma);
}
if (err > 0)
continue;
More information about the Devel
mailing list