[Devel] [PATCH RH7 1/2] backport: arch/x86/mm/numa.c: fix boot failure when all nodes are hotpluggable

Konstantin Khorenko khorenko at virtuozzo.com
Wed Feb 3 07:41:10 PST 2016


Already presents in RH7.2 kernel-3.10.0-327.3.1.el7 kernel.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 01/30/2016 03:01 PM, Dmitry Safonov wrote:
> From: Xishi Qiu <qiuxishi at huawei.com>
> commit bd5cfb8977fbb49d9350f7c81cf1516142e35a6a
> arch/x86/mm/numa.c: fix boot failure when all nodes are hotpluggable
>
> If all the nodes are marked hotpluggable, alloc node data will fail.
> Because __next_mem_range_rev() will skip the hotpluggable memory
> regions.  numa_clear_kernel_node_hotplug() is called after alloc node
> data.
>
> numa_init()
>      ...
>      ret = init_func();  // this will mark hotpluggable flag from SRAT
>      ...
>      memblock_set_bottom_up(false);
>      ...
>      ret = numa_register_memblks(&numa_meminfo);  // this will alloc node data(pglist_data)
>      ...
>      numa_clear_kernel_node_hotplug();  // in case all the nodes are hotpluggable
>      ...
>
> numa_register_memblks()
>      setup_node_data()
>          memblock_find_in_range_node()
>              __memblock_find_range_top_down()
>                  for_each_mem_range_rev()
>                      __next_mem_range_rev()
>
> This patch moves numa_clear_kernel_node_hotplug() into
> numa_register_memblks(), clear kernel node hotpluggable flag before
> alloc node data, then alloc node data won't fail even all the nodes
> are hotpluggable.
>
> [akpm at linux-foundation.org: coding-style fixes]
> Signed-off-by: Xishi Qiu <qiuxishi at huawei.com>
> Cc: Dave Jones <davej at redhat.com>
> Cc: Tang Chen <tangchen at cn.fujitsu.com>
> Cc: Gu Zheng <guz.fnst at cn.fujitsu.com>
> Cc: Ingo Molnar <mingo at elte.hu>
> Cc: "H. Peter Anvin" <hpa at zytor.com>
> Cc: Thomas Gleixner <tglx at linutronix.de>
> Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
>
> [backport from mainline]
> https://jira.sw.ru/browse/PSBM-43010
> Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
> ---
>   arch/x86/mm/numa.c | 89 +++++++++++++++++++++++++++---------------------------
>   1 file changed, 45 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 9e0f45e..f11060b 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -473,6 +473,42 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
>   	return true;
>   }
>
> +static void __init numa_clear_kernel_node_hotplug(void)
> +{
> +	int i, nid;
> +	nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
> +	unsigned long start, end;
> +	struct memblock_region *r;
> +
> +	/*
> +	 * At this time, all memory regions reserved by memblock are
> +	 * used by the kernel. Set the nid in memblock.reserved will
> +	 * mark out all the nodes the kernel resides in.
> +	 */
> +	for (i = 0; i < numa_meminfo.nr_blks; i++) {
> +		struct numa_memblk *mb = &numa_meminfo.blk[i];
> +
> +		memblock_set_node(mb->start, mb->end - mb->start,
> +				  &memblock.reserved, mb->nid);
> +	}
> +
> +	/* Mark all kernel nodes. */
> +	for_each_memblock(reserved, r)
> +		node_set(r->nid, numa_kernel_nodes);
> +
> +	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
> +	for (i = 0; i < numa_meminfo.nr_blks; i++) {
> +		nid = numa_meminfo.blk[i].nid;
> +		if (!node_isset(nid, numa_kernel_nodes))
> +			continue;
> +
> +		start = numa_meminfo.blk[i].start;
> +		end = numa_meminfo.blk[i].end;
> +
> +		memblock_clear_hotplug(start, end - start);
> +	}
> +}
> +
>   static int __init numa_register_memblks(struct numa_meminfo *mi)
>   {
>   	unsigned long uninitialized_var(pfn_align);
> @@ -491,6 +527,15 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
>   	}
>
>   	/*
> +	 * At very early time, the kernel have to use some memory such as
> +	 * loading the kernel image. We cannot prevent this anyway. So any
> +	 * node the kernel resides in should be un-hotpluggable.
> +	 *
> +	 * And when we come here, alloc node data won't fail.
> +	 */
> +	numa_clear_kernel_node_hotplug();
> +
> +	/*
>   	 * If sections array is gonna be used for pfn -> nid mapping, check
>   	 * whether its granularity is fine enough.
>   	 */
> @@ -549,41 +594,6 @@ static void __init numa_init_array(void)
>   	}
>   }
>
> -static void __init numa_clear_kernel_node_hotplug(void)
> -{
> -	int i, nid;
> -	nodemask_t numa_kernel_nodes = NODE_MASK_NONE;
> -	unsigned long start, end;
> -	struct memblock_type *type = &memblock.reserved;
> -
> -	/*
> -	 * At this time, all memory regions reserved by memblock are
> -	 * used by the kernel. Set the nid in memblock.reserved will
> -	 * mark out all the nodes the kernel resides in.
> -	 */
> -	for (i = 0; i < numa_meminfo.nr_blks; i++) {
> -		struct numa_memblk *mb = &numa_meminfo.blk[i];
> -		memblock_set_node(mb->start, mb->end - mb->start,
> -				  &memblock.reserved, mb->nid);
> -	}
> -
> -	/* Mark all kernel nodes. */
> -	for (i = 0; i < type->cnt; i++)
> -		node_set(type->regions[i].nid, numa_kernel_nodes);
> -
> -	/* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
> -	for (i = 0; i < numa_meminfo.nr_blks; i++) {
> -		nid = numa_meminfo.blk[i].nid;
> -		if (!node_isset(nid, numa_kernel_nodes))
> -			continue;
> -
> -		start = numa_meminfo.blk[i].start;
> -		end = numa_meminfo.blk[i].end;
> -
> -		memblock_clear_hotplug(start, end - start);
> -	}
> -}
> -
>   static int __init numa_init(int (*init_func)(void))
>   {
>   	int i;
> @@ -638,15 +648,6 @@ static int __init numa_init(int (*init_func)(void))
>   	}
>   	numa_init_array();
>
> -	/*
> -	 * At very early time, the kernel have to use some memory such as
> -	 * loading the kernel image. We cannot prevent this anyway. So any
> -	 * node the kernel resides in should be un-hotpluggable.
> -	 *
> -	 * And when we come here, numa_init() won't fail.
> -	 */
> -	numa_clear_kernel_node_hotplug();
> -
>   	return 0;
>   }
>
>


More information about the Devel mailing list