Re: Panic on 8-node system in memblock_virt_alloc_try_nid()
From: Yinghai Lu
Date: Fri Jan 24 2014 - 02:46:35 EST
On Thu, Jan 23, 2014 at 11:22 PM, Santosh Shilimkar
<santosh.shilimkar@xxxxxx> wrote:
> On Friday 24 January 2014 02:04 AM, Yinghai Lu wrote:
>> On Thu, Jan 23, 2014 at 10:56 PM, Santosh Shilimkar
>> <santosh.shilimkar@xxxxxx> wrote:
>>> On Friday 24 January 2014 01:38 AM, Santosh Shilimkar wrote:
>>
>>> The patch which is now commit 457ff1d {lib/swiotlb.c: use
>>> memblock apis for early memory allocations} was the breaking the
>>> boot on Andrew's machine. Now if I look back the patch, based on your
>>> above description, I believe below hunk waS/is the culprit.
>>>
>>> @@ -172,8 +172,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
>>> /*
>>> * Get the overflow emergency buffer
>>> */
>>> - v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
>>> - PAGE_ALIGN(io_tlb_overflow));
>>> + v_overflow_buffer = memblock_virt_alloc_nopanic(
>>> + PAGE_ALIGN(io_tlb_overflow),
>>> + PAGE_SIZE);
>>> if (!v_overflow_buffer)
>>> return -ENOMEM;
>>>
>>>
>>> Looks like 'v_overflow_buffer' must be allocated from low memory in this
>>> case. Is that correct ?
>>
>> yes.
>>
>> but should the change like following
>>
>> commit 457ff1de2d247d9b8917c4664c2325321a35e313
>> Author: Santosh Shilimkar <santosh.shilimkar@xxxxxx>
>> Date: Tue Jan 21 15:50:30 2014 -0800
>>
>> lib/swiotlb.c: use memblock apis for early memory allocations
>>
>>
>> @@ -215,13 +220,13 @@ swiotlb_init(int verbose)
>> bytes = io_tlb_nslabs << IO_TLB_SHIFT;
>>
>> /* Get IO TLB memory from the low pages */
>> - vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
>> + vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
>> if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
>> return;
>>
> OK. So we need '__alloc_bootmem_low()' equivalent memblock API. We will try
> to come up with a patch for the same. Thanks for inputs.
Yes,
Andrew, can you try attached two patches in your setup?
Assume your system does not have intel iommu support?
Thanks
Yinghai
Subject: [PATCH] x86: Fix numa with reverting wrong memblock setting.
Dave reported Numa on x86 is broken on system with 1T memory.
It turns out
| commit 5b6e529521d35e1bcaa0fe43456d1bbb335cae5d
| Author: Santosh Shilimkar <santosh.shilimkar@xxxxxx>
| Date: Tue Jan 21 15:50:03 2014 -0800
|
| x86: memblock: set current limit to max low memory address
set limit to low wrongly.
max_low_pfn_mapped is different from max_pfn_mapped.
max_low_pfn_mapped is always under 4G.
That will memblock_alloc_nid all go under 4G.
Revert that offending patch.
Reported-by: Dave Hansen <dave.hansen@xxxxxxxxx>
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>
---
arch/x86/include/asm/page_types.h | 4 ++--
arch/x86/kernel/setup.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
Index: linux-2.6/arch/x86/include/asm/page_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_types.h
+++ linux-2.6/arch/x86/include/asm/page_types.h
@@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned lo
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
-static inline phys_addr_t get_max_low_mapped(void)
+static inline phys_addr_t get_max_mapped(void)
{
- return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT;
+ return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
}
bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -1173,7 +1173,7 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
- memblock_set_current_limit(get_max_low_mapped());
+ memblock_set_current_limit(get_max_mapped());
dma_contiguous_reserve(0);
/*
---
arch/arm/kernel/setup.c | 2 +-
include/linux/bootmem.h | 37 +++++++++++++++++++++++++++++++++++++
lib/swiotlb.c | 4 ++--
3 files changed, 40 insertions(+), 3 deletions(-)
Index: linux-2.6/include/linux/bootmem.h
===================================================================
--- linux-2.6.orig/include/linux/bootmem.h
+++ linux-2.6/include/linux/bootmem.h
@@ -175,6 +175,27 @@ static inline void * __init memblock_vir
NUMA_NO_NODE);
}
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
+#endif
+
+static inline void * __init memblock_virt_alloc_low(
+ phys_addr_t size, phys_addr_t align)
+{
+ return memblock_virt_alloc_try_nid(size, align,
+ BOOTMEM_LOW_LIMIT,
+ ARCH_LOW_ADDRESS_LIMIT,
+ NUMA_NO_NODE);
+}
+static inline void * __init memblock_virt_alloc_low_nopanic(
+ phys_addr_t size, phys_addr_t align)
+{
+ return memblock_virt_alloc_try_nid_nopanic(size, align,
+ BOOTMEM_LOW_LIMIT,
+ ARCH_LOW_ADDRESS_LIMIT,
+ NUMA_NO_NODE);
+}
+
static inline void * __init memblock_virt_alloc_from_nopanic(
phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
{
@@ -238,6 +259,22 @@ static inline void * __init memblock_vir
return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
}
+static inline void * __init memblock_virt_alloc_low(
+ phys_addr_t size, phys_addr_t align)
+{
+ if (!align)
+ align = SMP_CACHE_BYTES;
+ return __alloc_bootmem_low(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_low_nopanic(
+ phys_addr_t size, phys_addr_t align)
+{
+ if (!align)
+ align = SMP_CACHE_BYTES;
+ return __alloc_bootmem_low_nopanic(size, align, BOOTMEM_LOW_LIMIT);
+}
+
static inline void * __init memblock_virt_alloc_from_nopanic(
phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
{
Index: linux-2.6/lib/swiotlb.c
===================================================================
--- linux-2.6.orig/lib/swiotlb.c
+++ linux-2.6/lib/swiotlb.c
@@ -172,7 +172,7 @@ int __init swiotlb_init_with_tbl(char *t
/*
* Get the overflow emergency buffer
*/
- v_overflow_buffer = memblock_virt_alloc_nopanic(
+ v_overflow_buffer = memblock_virt_alloc_low_nopanic(
PAGE_ALIGN(io_tlb_overflow),
PAGE_SIZE);
if (!v_overflow_buffer)
@@ -220,7 +220,7 @@ swiotlb_init(int verbose)
bytes = io_tlb_nslabs << IO_TLB_SHIFT;
/* Get IO TLB memory from the low pages */
- vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
+ vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
return;
Index: linux-2.6/arch/arm/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/arm/kernel/setup.c
+++ linux-2.6/arch/arm/kernel/setup.c
@@ -717,7 +717,7 @@ static void __init request_standard_reso
kernel_data.end = virt_to_phys(_end - 1);
for_each_memblock(memory, region) {
- res = memblock_virt_alloc(sizeof(*res), 0);
+ res = memblock_virt_alloc_low(sizeof(*res), 0);
res->name = "System RAM";
res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;