[PATCH 23/50] x86/boot/e820: Move the memblock_find_dma_reserve() function and rename it to memblock_set_dma_reserve()

From: Ingo Molnar
Date: Sat Jan 28 2017 - 17:22:50 EST


We introduced memblock_find_dma_reserve() in this commit:

6f2a75369e75 x86, memblock: Use memblock_memory_size()/memblock_free_memory_size() to get correct dma_reserve

But there's several problems with it:

- The changelog is full of typos and is incomprehensible in general, and
the comments in the code are not much better either.

- The function was inexplicably placed into e820.c, while it has very
little connection to the E820 table: when we call
memblock_find_dma_reserve() then memblock is already set up and we
are not using the E820 table anymore.

- The function is a wrapper around set_dma_reserve(), but changed the 'set'
name to 'find' - actively misleading about its primary purpose, which is
still to set the DMA-reserve value.

- The function is limited to 64-bit systems, but neither the changelog nor
the comments explain why. The change would appear to be relevant to
32-bit systems as well, as the ISA DMA zone is the first 16 MB of RAM.

So address some of these problems:

- Move it into arch/x86/mm/init.c, next to the other zone setup related
functions.

- Clean up the code flow and names of local variables a bit.

- Rename it to memblock_set_dma_reserve()

- Improve the comments.

No change in functionality. Enabling it for 32-bit systems is left
for a separate patch.

Cc: Alex Thorlton <athorlton@xxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Brian Gerst <brgerst@xxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Denys Vlasenko <dvlasenk@xxxxxxxxxx>
Cc: H. Peter Anvin <hpa@xxxxxxxxx>
Cc: Huang, Ying <ying.huang@xxxxxxxxx>
Cc: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Paul Jackson <pj@xxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rafael J. Wysocki <rjw@xxxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Wei Yang <richard.weiyang@xxxxxxxxx>
Cc: Yinghai Lu <yinghai@xxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/x86/include/asm/e820/api.h | 1 -
arch/x86/include/asm/pgtable.h | 1 +
arch/x86/kernel/e820.c | 31 -------------------------------
arch/x86/mm/init.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 01698548fbc8..7c9e4bb84059 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -22,7 +22,6 @@ extern unsigned long e820_end_of_ram_pfn(void);
extern unsigned long e820_end_of_low_ram_pfn(void);
extern u64 early_reserve_e820(u64 sizet, u64 align);
extern void e820__memblock_setup(void);
-extern void memblock_find_dma_reserve(void);
extern void finish_e820_parsing(void);
extern void e820_reserve_resources(void);
extern void e820_reserve_resources_late(void);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index ffaefb24891c..2d8116136f3e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -733,6 +733,7 @@ static inline int pgd_none(pgd_t pgd)
extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
+extern void memblock_find_dma_reserve(void);

#ifdef CONFIG_X86_64
/* Realmode trampoline initialization. */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 4a8d905226c9..5f8ba898e29d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1172,34 +1172,3 @@ void __init e820__memblock_setup(void)

memblock_dump_all();
}
-
-void __init memblock_find_dma_reserve(void)
-{
-#ifdef CONFIG_X86_64
- u64 nr_pages = 0, nr_free_pages = 0;
- unsigned long start_pfn, end_pfn;
- phys_addr_t start, end;
- int i;
- u64 u;
-
- /*
- * need to find out used area below MAX_DMA_PFN
- * need to use memblock to get free size in [0, MAX_DMA_PFN]
- * at first, and assume boot_mem will not take below MAX_DMA_PFN
- */
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
- start_pfn = min(start_pfn, MAX_DMA_PFN);
- end_pfn = min(end_pfn, MAX_DMA_PFN);
- nr_pages += end_pfn - start_pfn;
- }
-
- for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) {
- start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
- end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
- if (start_pfn < end_pfn)
- nr_free_pages += end_pfn - start_pfn;
- }
-
- set_dma_reserve(nr_pages - nr_free_pages);
-#endif
-}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index b7b4ad569d13..922671d3af85 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -724,6 +724,53 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
}
#endif

+/*
+ * Calculate the precise size of the DMA zone (first 16 MB of RAM),
+ * and pass it to the MM layer - to help it set zone watermarks more
+ * accurately.
+ *
+ * Done on 64-bit systems only for the time being, although 32-bit systems
+ * might benefit from this as well.
+ */
+void __init memblock_find_dma_reserve(void)
+{
+#ifdef CONFIG_X86_64
+ u64 nr_pages = 0, nr_free_pages = 0;
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t start_addr, end_addr;
+ int i;
+ u64 u;
+
+ /*
+ * Iterate over all memory ranges (free and reserved ones alike),
+ * to calculate the total number of pages in the first 16 MB of RAM:
+ */
+ nr_pages = 0;
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
+ start_pfn = min(start_pfn, MAX_DMA_PFN);
+ end_pfn = min(end_pfn, MAX_DMA_PFN);
+
+ nr_pages += end_pfn - start_pfn;
+ }
+
+ /*
+ * Iterate over free memory ranges to calculate the number of free
+ * pages in the DMA zone, while not counting potential partial
+ * pages at the beginning or the end of the range:
+ */
+ nr_free_pages = 0;
+ for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) {
+ start_pfn = min_t(unsigned long, PFN_UP(start_addr), MAX_DMA_PFN);
+ end_pfn = min_t(unsigned long, PFN_DOWN(end_addr), MAX_DMA_PFN);
+
+ if (start_pfn < end_pfn)
+ nr_free_pages += end_pfn - start_pfn;
+ }
+
+ set_dma_reserve(nr_pages - nr_free_pages);
+#endif
+}
+
void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
--
2.7.4