Re: [PATCH 4.14 023/159] mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y

From: Mike Galbraith
Date: Sun Jan 07 2018 - 00:27:17 EST


On Fri, 2017-12-22 at 09:45 +0100, Greg Kroah-Hartman wrote:
> 4.14-stable review patch. If anyone has any objections, please let me know.

FYI, this broke kdump, or rather the makedumpfile part thereof.
 Forward looking wreckage is par for the kdump course, but...

> ------------------
>
> From: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
>
> commit 83e3c48729d9ebb7af5a31a504f3fd6aff0348c4 upstream.
>
> Size of the mem_section[] array depends on the size of the physical address space.
>
> In preparation for boot-time switching between paging modes on x86-64
> we need to make the allocation of mem_section[] dynamic, because otherwise
> we waste a lot of RAM: with CONFIG_NODE_SHIFT=10, mem_section[] size is 32kB
> for 4-level paging and 2MB for 5-level paging mode.
>
> The patch allocates the array on the first call to sparse_memory_present_with_active_regions().
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
> Cc: Borislav Petkov <bp@xxxxxxx>
> Cc: Cyrill Gorcunov <gorcunov@xxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: linux-mm@xxxxxxxxx
> Link: http://lkml.kernel.org/r/20170929140821.37654-2-kirill.shutemov@xxxxxxxxxxxxxxx
> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
>
> ---
> include/linux/mmzone.h | 6 +++++-
> mm/page_alloc.c | 10 ++++++++++
> mm/sparse.c | 17 +++++++++++------
> 3 files changed, 26 insertions(+), 7 deletions(-)
>
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1152,13 +1152,17 @@ struct mem_section {
> #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
>
> #ifdef CONFIG_SPARSEMEM_EXTREME
> -extern struct mem_section *mem_section[NR_SECTION_ROOTS];
> +extern struct mem_section **mem_section;
> #else
> extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
> #endif
>
> static inline struct mem_section *__nr_to_section(unsigned long nr)
> {
> +#ifdef CONFIG_SPARSEMEM_EXTREME
> + if (!mem_section)
> + return NULL;
> +#endif
> if (!mem_section[SECTION_NR_TO_ROOT(nr)])
> return NULL;
> return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5651,6 +5651,16 @@ void __init sparse_memory_present_with_a
> unsigned long start_pfn, end_pfn;
> int i, this_nid;
>
> +#ifdef CONFIG_SPARSEMEM_EXTREME
> + if (!mem_section) {
> + unsigned long size, align;
> +
> + size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
> + align = 1 << (INTERNODE_CACHE_SHIFT);
> + mem_section = memblock_virt_alloc(size, align);
> + }
> +#endif
> +
> for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
> memory_present(this_nid, start_pfn, end_pfn);
> }
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -23,8 +23,7 @@
> * 1) mem_section - memory sections, mem_map's for valid memory
> */
> #ifdef CONFIG_SPARSEMEM_EXTREME
> -struct mem_section *mem_section[NR_SECTION_ROOTS]
> - ____cacheline_internodealigned_in_smp;
> +struct mem_section **mem_section;
> #else
> struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
> ____cacheline_internodealigned_in_smp;
> @@ -101,7 +100,7 @@ static inline int sparse_index_init(unsi
> int __section_nr(struct mem_section* ms)
> {
> unsigned long root_nr;
> - struct mem_section* root;
> + struct mem_section *root = NULL;
>
> for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
> root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
> @@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
> break;
> }
>
> - VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
> + VM_BUG_ON(!root);
>
> return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
> }
> @@ -330,11 +329,17 @@ again:
> static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
> {
> unsigned long usemap_snr, pgdat_snr;
> - static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
> - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
> + static unsigned long old_usemap_snr;
> + static unsigned long old_pgdat_snr;
> struct pglist_data *pgdat = NODE_DATA(nid);
> int usemap_nid;
>
> + /* First call */
> + if (!old_usemap_snr) {
> + old_usemap_snr = NR_MEM_SECTIONS;
> + old_pgdat_snr = NR_MEM_SECTIONS;
> + }
> +
> usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
> pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
> if (usemap_snr == pgdat_snr)
>
>