Re: [PATCH v2 08/22] memblock: Introduce a generic phys_addr_to_target_node()

From: Mike Rapoport
Date: Mon Jul 13 2020 - 03:04:50 EST


Hi Dan,

On Sun, Jul 12, 2020 at 09:26:48AM -0700, Dan Williams wrote:
> Similar to how generic memory_add_physaddr_to_nid() interrogates
> memblock data for numa information, introduce
> get_reserved_pfn_range_from_nid() to enable the same operation for
> reserved memory ranges. Example memory ranges that are reserved, but
> still have associated numa-info are persistent memory or Soft Reserved
> (EFI_MEMORY_SP) memory.

Here again, I would prefer to add a weak default for
phys_to_target_node() because the "generic" implementation is not really
generic.

The fallback to reserved ranges is x86 specfic because on x86 most of the
reserved areas is not in memblock.memory. AFAIK, no other architecture
does this.

And x86 anyway has implementation of phys_to_target_node().

> Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx>
> Cc: Jia He <justin.he@xxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: David Hildenbrand <david@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
> ---
> include/linux/memblock.h | 4 +++
> include/linux/mm.h | 2 +
> include/linux/numa.h | 2 +
> mm/memblock.c | 22 ++++++++++++++--
> mm/page_alloc.c | 63 +++++++++++++++++++++++++++++++++++++++++++++-
> 5 files changed, 87 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index 017fae833d4a..0655e8376c72 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -234,6 +234,10 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
> for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
> i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
>
> +#define for_each_reserved_pfn_range(i, nid, p_start, p_end, p_nid) \
> + for (i = -1, __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid); \
> + i >= 0; __next_reserved_pfn_range(&i, nid, p_start, p_end, p_nid))
> +
> #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
> void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
> unsigned long *out_spfn,
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 1e76ee5da20b..82dac9f42c46 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2438,6 +2438,8 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
>
> extern void get_pfn_range_for_nid(unsigned int nid,
> unsigned long *start_pfn, unsigned long *end_pfn);
> +extern void get_reserved_pfn_range_for_nid(unsigned int nid,
> + unsigned long *start_pfn, unsigned long *end_pfn);
> extern unsigned long find_min_pfn_with_active_regions(void);
> extern void sparse_memory_present_with_active_regions(int nid);
>
> diff --git a/include/linux/numa.h b/include/linux/numa.h
> index 5d25c5de1322..52b2430bc759 100644
> --- a/include/linux/numa.h
> +++ b/include/linux/numa.h
> @@ -19,7 +19,7 @@ int numa_map_to_online_node(int node);
>
> /*
> * Optional architecture specific implementation, users need a "depends
> - * on $ARCH"
> + * on $ARCH" or depends on CONFIG_MEMBLOCK_NUMA_INFO
> */
> int phys_to_target_node(phys_addr_t addr);
> #else
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 39aceafc57f6..43c3abab705e 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1200,11 +1200,11 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
> /*
> * Common iterator interface used to define for_each_mem_pfn_range().
> */
> -void __init_memblock __next_mem_pfn_range(int *idx, int nid,
> +static void __init_memblock __next_memblock_pfn_range(int *idx, int nid,
> unsigned long *out_start_pfn,
> - unsigned long *out_end_pfn, int *out_nid)
> + unsigned long *out_end_pfn, int *out_nid,
> + struct memblock_type *type)
> {
> - struct memblock_type *type = &memblock.memory;
> struct memblock_region *r;
> int r_nid;
>
> @@ -1230,6 +1230,22 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
> *out_nid = r_nid;
> }
>
> +void __init_memblock __next_mem_pfn_range(int *idx, int nid,
> + unsigned long *out_start_pfn,
> + unsigned long *out_end_pfn, int *out_nid)
> +{
> + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid,
> + &memblock.memory);
> +}
> +
> +void __init_memblock __next_reserved_pfn_range(int *idx, int nid,
> + unsigned long *out_start_pfn,
> + unsigned long *out_end_pfn, int *out_nid)
> +{
> + __next_memblock_pfn_range(idx, nid, out_start_pfn, out_end_pfn, out_nid,
> + &memblock.reserved);
> +}
> +
> /**
> * memblock_set_node - set node ID on memblock regions
> * @base: base of area to set node ID for
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index df8bd169dbb4..94ad77c0c338 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6369,12 +6369,39 @@ void __init_or_memblock get_pfn_range_for_nid(unsigned int nid,
> *start_pfn = 0;
> }
>
> +/**
> + * get_reserved_pfn_range_for_nid - Return the start and end page frames for a node
> + * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
> + * @start_pfn: Passed by reference. On return, it will have the node start_pfn.
> + * @end_pfn: Passed by reference. On return, it will have the node end_pfn.
> + *
> + * Mostly identical to get_pfn_range_for_nid() except it operates on
> + * reserved ranges rather than online memory.
> + */
> +void __init_or_memblock get_reserved_pfn_range_for_nid(unsigned int nid,
> + unsigned long *start_pfn, unsigned long *end_pfn)
> +{
> + unsigned long this_start_pfn, this_end_pfn;
> + int i;
> +
> + *start_pfn = -1UL;
> + *end_pfn = 0;
> +
> + for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
> + *start_pfn = min(*start_pfn, this_start_pfn);
> + *end_pfn = max(*end_pfn, this_end_pfn);
> + }
> +
> + if (*start_pfn == -1UL)
> + *start_pfn = 0;
> +}
> +
> /*
> * Generic implementation of memory_add_physaddr_to_nid() depends on
> * architecture using memblock data for numa information.
> */
> #ifdef CONFIG_MEMBLOCK_NUMA_INFO
> -int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
> +static int __init_or_memblock __memory_add_physaddr_to_nid(u64 addr)
> {
> unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
> int nid;
> @@ -6384,10 +6411,42 @@ int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
> if (pfn >= start_pfn && pfn <= end_pfn)
> return nid;
> }
> + return NUMA_NO_NODE;
> +}
> +
> +int __init_or_memblock memory_add_physaddr_to_nid(u64 addr)
> +{
> + int nid = __memory_add_physaddr_to_nid(addr);
> +
> /* Default to node0 as not all callers are prepared for this to fail */
> - return 0;
> + if (nid == NUMA_NO_NODE)
> + return 0;
> + return nid;
> }
> EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
> +
> +int __init_or_memblock phys_to_target_node(u64 addr)
> +{
> + unsigned long start_pfn, end_pfn, pfn = PHYS_PFN(addr);
> + int nid = __memory_add_physaddr_to_nid(addr);
> +
> + if (nid != NUMA_NO_NODE)
> + return nid;
> +
> + /*
> + * Search reserved memory ranges since the memory address does
> + * not appear to be online
> + */
> + for_each_possible_node(nid) {
> + if (node_online(nid))
> + continue;
> + get_reserved_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
> + if (pfn >= start_pfn && pfn <= end_pfn)
> + return nid;
> + }
> + return NUMA_NO_NODE;
> +}
> +EXPORT_SYMBOL_GPL(phys_to_target_node);
> #endif /* CONFIG_MEMBLOCK_NUMA_INFO */
>
> /*
>

--
Sincerely yours,
Mike.