[Devel] [PATCH RHEL8 COMMIT] khugepaged: skip collapse if uffd-wp detected

Konstantin Khorenko khorenko at virtuozzo.com
Mon Apr 20 10:34:43 MSK 2020


The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.6
------>
commit 2078ade991b2b244064b681f6be8a72cf3dfcc38
Author: Peter Xu <peterx at redhat.com>
Date:   Mon Apr 20 10:34:42 2020 +0300

    khugepaged: skip collapse if uffd-wp detected
    
    Don't collapse the huge PMD if there is any userfault write protected
    small PTEs.  The problem is that the write protection is in small page
    granularity and there's no way to keep all these write protection
    information if the small pages are going to be merged into a huge PMD.
    
    The same thing needs to be considered for swap entries and migration
    entries.  So do the check as well disregarding khugepaged_max_ptes_swap.
    
    Signed-off-by: Peter Xu <peterx at redhat.com>
    Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
    Reviewed-by: Jerome Glisse <jglisse at redhat.com>
    Reviewed-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
    Cc: Andrea Arcangeli <aarcange at redhat.com>
    Cc: Bobby Powers <bobbypowers at gmail.com>
    Cc: Brian Geffon <bgeffon at google.com>
    Cc: David Hildenbrand <david at redhat.com>
    Cc: Denis Plotnikov <dplotnikov at virtuozzo.com>
    Cc: "Dr . David Alan Gilbert" <dgilbert at redhat.com>
    Cc: Hugh Dickins <hughd at google.com>
    Cc: Johannes Weiner <hannes at cmpxchg.org>
    Cc: "Kirill A . Shutemov" <kirill at shutemov.name>
    Cc: Martin Cracauer <cracauer at cons.org>
    Cc: Marty McFadden <mcfadden8 at llnl.gov>
    Cc: Maya Gokhale <gokhale2 at llnl.gov>
    Cc: Mel Gorman <mgorman at suse.de>
    Cc: Mike Kravetz <mike.kravetz at oracle.com>
    Cc: Pavel Emelyanov <xemul at openvz.org>
    Cc: Rik van Riel <riel at redhat.com>
    Cc: Shaohua Li <shli at fb.com>
    Link: http://lkml.kernel.org/r/20200220163112.11409-12-peterx@redhat.com
    Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
    
    https://jira.sw.ru/browse/PSBM-102938
    (cherry picked from commit e1e267c7928fe387e5e1cffeafb0de2d0473663a)
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 include/trace/events/huge_memory.h |  1 +
 mm/khugepaged.c                    | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index dd4db334bd63..2d7bad9cb976 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -13,6 +13,7 @@
 	EM( SCAN_PMD_NULL,		"pmd_null")			\
 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
+	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\
 	EM( SCAN_PAGE_RO,		"no_writable_page")		\
 	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
 	EM( SCAN_PAGE_NULL,		"page_null")			\
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index dff5c75d5310..66616e73bb00 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -29,6 +29,7 @@ enum scan_result {
 	SCAN_PMD_NULL,
 	SCAN_EXCEED_NONE_PTE,
 	SCAN_PTE_NON_PRESENT,
+	SCAN_PTE_UFFD_WP,
 	SCAN_PAGE_RO,
 	SCAN_LACK_REFERENCED_PAGE,
 	SCAN_PAGE_NULL,
@@ -1124,6 +1125,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 		pte_t pteval = *_pte;
 		if (is_swap_pte(pteval)) {
 			if (++unmapped <= khugepaged_max_ptes_swap) {
+				/*
+				 * Always be strict with uffd-wp
+				 * enabled swap entries.  Please see
+				 * comment below for pte_uffd_wp().
+				 */
+				if (pte_swp_uffd_wp(pteval)) {
+					result = SCAN_PTE_UFFD_WP;
+					goto out_unmap;
+				}
 				continue;
 			} else {
 				result = SCAN_EXCEED_SWAP_PTE;
@@ -1143,6 +1153,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 			result = SCAN_PTE_NON_PRESENT;
 			goto out_unmap;
 		}
+		if (pte_uffd_wp(pteval)) {
+			/*
+			 * Don't collapse the page if any of the small
+			 * PTEs are armed with uffd write protection.
+			 * Here we can also mark the new huge pmd as
+			 * write protected if any of the small ones is
+			 * marked but that could bring uknown
+			 * userfault messages that falls outside of
+			 * the registered range.  So, just be simple.
+			 */
+			result = SCAN_PTE_UFFD_WP;
+			goto out_unmap;
+		}
 		if (pte_write(pteval))
 			writable = true;
 


More information about the Devel mailing list