Re: [PATCH 2/2] mm, memory_hotplug: provide a more generic restrictions for memory hotplug

From: David Hildenbrand
Date: Thu Apr 04 2019 - 11:02:51 EST


On 04.04.19 16:57, David Hildenbrand wrote:
> On 04.04.19 14:59, Oscar Salvador wrote:
>> From: Michal Hocko <mhocko@xxxxxxxx>
>>
>> arch_add_memory, __add_pages take a want_memblock which controls whether
>> the newly added memory should get the sysfs memblock user API (e.g.
>> ZONE_DEVICE users do not want/need this interface). Some callers even
>> want to control where do we allocate the memmap from by configuring
>> altmap.
>>
>> Add a more generic hotplug context for arch_add_memory and __add_pages.
>> struct mhp_restrictions contains flags which contains additional
>> features to be enabled by the memory hotplug (MHP_MEMBLOCK_API
>> currently) and altmap for alternative memmap allocator.
>>
>> This patch shouldn't introduce any functional change.
>>
>> Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
>> Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
>> ---
>> arch/arm64/mm/mmu.c | 6 +++---
>> arch/ia64/mm/init.c | 6 +++---
>> arch/powerpc/mm/mem.c | 6 +++---
>> arch/s390/mm/init.c | 6 +++---
>> arch/sh/mm/init.c | 6 +++---
>> arch/x86/mm/init_32.c | 6 +++---
>> arch/x86/mm/init_64.c | 10 +++++-----
>> include/linux/memory_hotplug.h | 29 +++++++++++++++++++++++------
>> kernel/memremap.c | 10 +++++++---
>> mm/memory_hotplug.c | 10 ++++++----
>> 10 files changed, 59 insertions(+), 36 deletions(-)
>>
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index e6acfa7be4c7..db509550329d 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -1046,8 +1046,8 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
>> }
>>
>> #ifdef CONFIG_MEMORY_HOTPLUG
>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>
> Should the restrictions be marked const?
>
>> {
>> int flags = 0;
>>
>> @@ -1058,6 +1058,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> size, PAGE_KERNEL, pgd_pgtable_alloc, flags);
>>
>> return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
>> - altmap, want_memblock);
>> + restrictions);
>
> Again, some strange alignment thingies going on here :)
>
>> }
>> #endif
>> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
>> index e49200e31750..379eb1f9adc9 100644
>> --- a/arch/ia64/mm/init.c
>> +++ b/arch/ia64/mm/init.c
>> @@ -666,14 +666,14 @@ mem_init (void)
>> }
>>
>> #ifdef CONFIG_MEMORY_HOTPLUG
>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>> {
>> unsigned long start_pfn = start >> PAGE_SHIFT;
>> unsigned long nr_pages = size >> PAGE_SHIFT;
>> int ret;
>>
>> - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
>> if (ret)
>> printk("%s: Problem encountered in __add_pages() as ret=%d\n",
>> __func__, ret);
>> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>> index 1aa27aac73c5..76deaa8525db 100644
>> --- a/arch/powerpc/mm/mem.c
>> +++ b/arch/powerpc/mm/mem.c
>> @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
>> return -ENODEV;
>> }
>>
>> -int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int __meminit arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>> {
>> unsigned long start_pfn = start >> PAGE_SHIFT;
>> unsigned long nr_pages = size >> PAGE_SHIFT;
>> @@ -127,7 +127,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
>> }
>> flush_inval_dcache_range(start, start + size);
>>
>> - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + return __add_pages(nid, start_pfn, nr_pages, restrictions);
>> }
>>
>> #ifdef CONFIG_MEMORY_HOTREMOVE
>> diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
>> index 25e3113091ea..f5db961ad792 100644
>> --- a/arch/s390/mm/init.c
>> +++ b/arch/s390/mm/init.c
>> @@ -216,8 +216,8 @@ device_initcall(s390_cma_mem_init);
>>
>> #endif /* CONFIG_CMA */
>>
>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>> {
>> unsigned long start_pfn = PFN_DOWN(start);
>> unsigned long size_pages = PFN_DOWN(size);
>> @@ -227,7 +227,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> if (rc)
>> return rc;
>>
>> - rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
>> + rc = __add_pages(nid, start_pfn, size_pages, restrictions);
>> if (rc)
>> vmem_remove_mapping(start, size);
>> return rc;
>> diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
>> index 8e004b2f1a6a..168d3a6b9358 100644
>> --- a/arch/sh/mm/init.c
>> +++ b/arch/sh/mm/init.c
>> @@ -404,15 +404,15 @@ void __init mem_init(void)
>> }
>>
>> #ifdef CONFIG_MEMORY_HOTPLUG
>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>> {
>> unsigned long start_pfn = PFN_DOWN(start);
>> unsigned long nr_pages = size >> PAGE_SHIFT;
>> int ret;
>>
>> /* We only have ZONE_NORMAL, so this is easy.. */
>> - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
>> if (unlikely(ret))
>> printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
>>
>> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
>> index 85c94f9a87f8..755dbed85531 100644
>> --- a/arch/x86/mm/init_32.c
>> +++ b/arch/x86/mm/init_32.c
>> @@ -850,13 +850,13 @@ void __init mem_init(void)
>> }
>>
>> #ifdef CONFIG_MEMORY_HOTPLUG
>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>> {
>> unsigned long start_pfn = start >> PAGE_SHIFT;
>> unsigned long nr_pages = size >> PAGE_SHIFT;
>>
>> - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + return __add_pages(nid, start_pfn, nr_pages, restrictions);
>> }
>>
>> #ifdef CONFIG_MEMORY_HOTREMOVE
>> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
>> index bccff68e3267..db42c11b48fb 100644
>> --- a/arch/x86/mm/init_64.c
>> +++ b/arch/x86/mm/init_64.c
>> @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size)
>> }
>>
>> int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>> - struct vmem_altmap *altmap, bool want_memblock)
>> + struct mhp_restrictions *restrictions)
>> {
>> int ret;
>>
>> - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
>> WARN_ON_ONCE(ret);
>>
>> /* update max_pfn, max_low_pfn and high_memory */
>> @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>> return ret;
>> }
>>
>> -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> +int arch_add_memory(int nid, u64 start, u64 size,
>> + struct mhp_restrictions *restrictions)
>> {
>> unsigned long start_pfn = start >> PAGE_SHIFT;
>> unsigned long nr_pages = size >> PAGE_SHIFT;
>>
>> init_memory_mapping(start, start + size);
>>
>> - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + return add_pages(nid, start_pfn, nr_pages, restrictions);
>> }
>>
>> #define PAGE_INUSE 0xFD
>> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
>> index 3c8cf347804c..5bd4b56f639c 100644
>> --- a/include/linux/memory_hotplug.h
>> +++ b/include/linux/memory_hotplug.h
>> @@ -118,20 +118,37 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
>> unsigned long nr_pages, struct vmem_altmap *altmap);
>> #endif /* CONFIG_MEMORY_HOTREMOVE */
>>
>> +/*
>> + * Do we want sysfs memblock files created. This will allow userspace to online
>> + * and offline memory explicitly. Lack of this bit means that the caller has to
>> + * call move_pfn_range_to_zone to finish the initialization.
>> + */
>
> I think you can be more precise here.
>
> "Create memory block devices for added pages. This is usually the case
> for all system ram (and only system ram), as only this way memory can be
> onlined/offlined by user space and kdump to correctly detect the new
> memory using udev events."
>
> Maybe we should even go a step further and call this
>
> MHP_SYSTEM_RAM
>
> Because that is what it is right now.
>
>> +
>> +#define MHP_MEMBLOCK_API (1<<0)
>> +
>> +/*
>> + * Restrictions for the memory hotplug:
>> + * flags: MHP_ flags
>> + * altmap: alternative allocator for memmap array
>> + */
>> +struct mhp_restrictions {
>> + unsigned long flags;
>> + struct vmem_altmap *altmap;
>> +};
>> +
>> /* reasonably generic interface to expand the physical pages */
>> extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>> - struct vmem_altmap *altmap, bool want_memblock);
>> + struct mhp_restrictions *restrictions);
>>
>> #ifndef CONFIG_ARCH_HAS_ADD_PAGES
>> static inline int add_pages(int nid, unsigned long start_pfn,
>> - unsigned long nr_pages, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> + unsigned long nr_pages, struct mhp_restrictions *restrictions)
>> {
>> - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
>> + return __add_pages(nid, start_pfn, nr_pages, restrictions);
>> }
>> #else /* ARCH_HAS_ADD_PAGES */
>> int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>> - struct vmem_altmap *altmap, bool want_memblock);
>> + struct mhp_restrictions *restrictions);
>
> dito alignment. You have tabs configured to 8 characters, right?
>
>> #endif /* ARCH_HAS_ADD_PAGES */
>>
>> #ifdef CONFIG_NUMA
>> @@ -333,7 +350,7 @@ extern int __add_memory(int nid, u64 start, u64 size);
>> extern int add_memory(int nid, u64 start, u64 size);
>> extern int add_memory_resource(int nid, struct resource *resource);
>> extern int arch_add_memory(int nid, u64 start, u64 size,
>> - struct vmem_altmap *altmap, bool want_memblock);
>> + struct mhp_restrictions *restrictions);
>> extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>> unsigned long nr_pages, struct vmem_altmap *altmap);
>> extern bool is_memblock_offlined(struct memory_block *mem);
>> diff --git a/kernel/memremap.c b/kernel/memremap.c
>> index a856cb5ff192..cc5e3e34417d 100644
>> --- a/kernel/memremap.c
>> +++ b/kernel/memremap.c
>> @@ -149,6 +149,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
>> struct resource *res = &pgmap->res;
>> struct dev_pagemap *conflict_pgmap;
>> pgprot_t pgprot = PAGE_KERNEL;
>> + struct mhp_restrictions restrictions = {};
>> int error, nid, is_ram;
>>
>> if (!pgmap->ref || !pgmap->kill)
>> @@ -199,6 +200,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
>> if (error)
>> goto err_pfn_remap;
>>
>> + /* We do not want any optional features only our own memmap */
>> + restrictions.altmap = altmap;
>> +> mem_hotplug_begin();
>>
>> /*
>> @@ -214,7 +218,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
>> */
>> if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
>> error = add_pages(nid, align_start >> PAGE_SHIFT,
>> - align_size >> PAGE_SHIFT, NULL, false);
>> + align_size >> PAGE_SHIFT, &restrictions);
>> } else {
>> error = kasan_add_zero_shadow(__va(align_start), align_size);
>> if (error) {
>> @@ -222,8 +226,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
>> goto err_kasan;
>> }
>>
>> - error = arch_add_memory(nid, align_start, align_size, altmap,
>> - false);
>> + error = arch_add_memory(nid, align_start, align_size,
>> + &restrictions);
>
> dito alignment
>
>> }
>>
>> if (!error) {
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index d8a3e9554aec..50f77e059457 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -274,12 +274,12 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
>> * add the new pages.
>> */
>> int __ref __add_pages(int nid, unsigned long phys_start_pfn,
>> - unsigned long nr_pages, struct vmem_altmap *altmap,
>> - bool want_memblock)
>> + unsigned long nr_pages, struct mhp_restrictions *restrictions)
>> {
>> unsigned long i;
>> int err = 0;
>> int start_sec, end_sec;
>> + struct vmem_altmap *altmap = restrictions->altmap;
>>
>> /* during initialize mem_map, align hot-added range to section */
>> start_sec = pfn_to_section_nr(phys_start_pfn);
>> @@ -300,7 +300,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
>>
>> for (i = start_sec; i <= end_sec; i++) {
>> err = __add_section(nid, section_nr_to_pfn(i), altmap,
>> - want_memblock);
>> + restrictions->flags & MHP_MEMBLOCK_API);
>>
>> /*
>> * EEXIST is finally dealt with by ioresource collision
>> @@ -1102,6 +1102,7 @@ int __ref add_memory_resource(int nid, struct resource *res)
>> u64 start, size;
>> bool new_node = false;
>> int ret;
>> + struct mhp_restrictions restrictions = {};
>
> I'd make this the very first variable.
>
> Also eventually
>
> struct mhp_restrictions restrictions = {
> .restrictions = MHP_MEMBLOCK_API
> };
>
>>
>> start = res->start;
>> size = resource_size(res);
>> @@ -1126,7 +1127,8 @@ int __ref add_memory_resource(int nid, struct resource *res)
>> new_node = ret;
>>
>> /* call arch's memory hotadd */
>> - ret = arch_add_memory(nid, start, size, NULL, true);
>> + restrictions.flags = MHP_MEMBLOCK_API;
>> + ret = arch_add_memory(nid, start, size, &restrictions);
>> if (ret < 0)
>> goto error;
>>
>>
>
>

s/alignment/indentation/

I think I should take a nap :)

--

Thanks,

David / dhildenb