[Devel] [PATCH RHEL7 COMMIT] ms/Revert "mm: page_alloc: skip over regions of invalid pfns where possible"
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Apr 27 13:00:56 MSK 2018
The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.47.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.47.1
------>
commit 70b9d7d5791d062e18e47565f463ad6face86623
Author: Daniel Vacek <neelx at redhat.com>
Date: Fri Apr 27 13:00:55 2018 +0300
ms/Revert "mm: page_alloc: skip over regions of invalid pfns where possible"
This reverts commit b92df1de5d28 ("mm: page_alloc: skip over regions of
invalid pfns where possible"). The commit is meant to be a boot init
speed up skipping the loop in memmap_init_zone() for invalid pfns.
But given some specific memory mapping on x86_64 (or more generally
theoretically anywhere but on arm with CONFIG_HAVE_ARCH_PFN_VALID) the
implementation also skips valid pfns which is plain wrong and causes
'kernel BUG at mm/page_alloc.c:1389!'
crash> log | grep -e BUG -e RIP -e Call.Trace -e move_freepages_block -e rmqueue -e freelist -A1
kernel BUG at mm/page_alloc.c:1389!
invalid opcode: 0000 [#1] SMP
--
RIP: 0010: move_freepages+0x15e/0x160
--
Call Trace:
move_freepages_block+0x73/0x80
__rmqueue+0x263/0x460
get_page_from_freelist+0x7e1/0x9e0
__alloc_pages_nodemask+0x176/0x420
--
crash> page_init_bug -v | grep RAM
<struct resource 0xffff88067fffd2f8> 1000 - 9bfff System RAM (620.00 KiB)
<struct resource 0xffff88067fffd3a0> 100000 - 430bffff System RAM ( 1.05 GiB = 1071.75 MiB = 1097472.00 KiB)
<struct resource 0xffff88067fffd410> 4b0c8000 - 4bf9cfff System RAM ( 14.83 MiB = 15188.00 KiB)
<struct resource 0xffff88067fffd480> 4bfac000 - 646b1fff System RAM (391.02 MiB = 400408.00 KiB)
<struct resource 0xffff88067fffd560> 7b788000 - 7b7fffff System RAM (480.00 KiB)
<struct resource 0xffff88067fffd640> 100000000 - 67fffffff System RAM ( 22.00 GiB)
crash> page_init_bug | head -6
<struct resource 0xffff88067fffd560> 7b788000 - 7b7fffff System RAM (480.00 KiB)
<struct page 0xffffea0001ede200> 1fffff00000000 0 <struct pglist_data 0xffff88047ffd9000> 1 <struct zone 0xffff88047ffd9800> DMA32 4096 1048575
<struct page 0xffffea0001ede200> 505736 505344 <struct page 0xffffea0001ed8000> 505855 <struct page 0xffffea0001edffc0>
<struct page 0xffffea0001ed8000> 0 0 <struct pglist_data 0xffff88047ffd9000> 0 <struct zone 0xffff88047ffd9000> DMA 1 4095
<struct page 0xffffea0001edffc0> 1fffff00000400 0 <struct pglist_data 0xffff88047ffd9000> 1 <struct zone 0xffff88047ffd9800> DMA32 4096 1048575
BUG, zones differ!
crash> kmem -p 77fff000 78000000 7b5ff000 7b600000 7b787000 7b788000
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea0001e00000 78000000 0 0 0 0
ffffea0001ed7fc0 7b5ff000 0 0 0 0
ffffea0001ed8000 7b600000 0 0 0 0 <<<<
ffffea0001ede1c0 7b787000 0 0 0 0
ffffea0001ede200 7b788000 0 0 1 1fffff00000000
Link: http://lkml.kernel.org/r/20180316143855.29838-1-neelx@redhat.com
Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible")
Signed-off-by: Daniel Vacek <neelx at redhat.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
Acked-by: Michal Hocko <mhocko at suse.com>
Reviewed-by: Andrew Morton <akpm at linux-foundation.org>
Cc: Vlastimil Babka <vbabka at suse.cz>
Cc: Mel Gorman <mgorman at techsingularity.net>
Cc: Pavel Tatashin <pasha.tatashin at oracle.com>
Cc: Paul Burton <paul.burton at imgtec.com>
Cc: <stable at vger.kernel.org>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
Just an additional fix while investigating:
https://jira.sw.ru/browse/PSBM-83746
(cherry picked from commit f59f1caf72ba00d519c793c3deb32cd3be32edc2)
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
include/linux/memblock.h | 1 -
mm/memblock.c | 28 ----------------------------
mm/page_alloc.c | 11 +----------
3 files changed, 1 insertion(+), 39 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5a439c937c3c..dad171f37f66 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -186,7 +186,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
unsigned long *end_pfn);
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
unsigned long *out_end_pfn, int *out_nid);
-unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
/**
* for_each_mem_pfn_range - early memory pfn range iterator
diff --git a/mm/memblock.c b/mm/memblock.c
index fbc8071d9f43..e08475cd1c0a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1061,34 +1061,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
*out_nid = r->nid;
}
-unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
- unsigned long max_pfn)
-{
- struct memblock_type *type = &memblock.memory;
- unsigned int right = type->cnt;
- unsigned int mid, left = 0;
- phys_addr_t addr = PFN_PHYS(pfn + 1);
-
- do {
- mid = (right + left) / 2;
-
- if (addr < type->regions[mid].base)
- right = mid;
- else if (addr >= (type->regions[mid].base +
- type->regions[mid].size))
- left = mid + 1;
- else {
- /* addr is within the region, so pfn + 1 is valid */
- return min(pfn + 1, max_pfn);
- }
- } while (left < right);
-
- if (right == type->cnt)
- return max_pfn;
- else
- return min(PHYS_PFN(type->regions[right].base), max_pfn);
-}
-
/**
* memblock_set_node - set node ID on memblock regions
* @base: base of area to set node ID for
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7fa5f026434c..e658abb6ff10 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4717,17 +4717,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (context != MEMMAP_EARLY)
goto not_early;
- if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
- /*
- * Skip to the pfn preceding the next valid one (or
- * end_pfn), such that we hit a valid pfn (or end_pfn)
- * on our next iteration of the loop.
- */
- pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
-#endif
+ if (!early_pfn_valid(pfn))
continue;
- }
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, size, end_pfn,
More information about the Devel
mailing list