Re: [RFC PATCH 0/5] mm, memory_hotplug: allocate memmap from hotadded memory

From: Michal Hocko
Date: Fri Jul 28 2017 - 08:19:52 EST


On Thu 27-07-17 08:56:52, Michal Hocko wrote:
> On Wed 26-07-17 17:06:59, Jerome Glisse wrote:
> [...]
> > This does not seems to be an opt-in change ie if i am reading patch 3
> > correctly if an altmap is not provided to __add_pages() you fallback
> > to allocating from begining of zone. This will not work with HMM ie
> > device private memory. So at very least i would like to see some way
> > to opt-out of this. Maybe a new argument like bool forbid_altmap ?
>
> OK, I see! I will think about how to make a sane api for that.

This is what I came up with. s390 guys mentioned that I cannot simply
use the new range at this stage yet. This will need probably some other
changes but I guess we want an opt-in approach with an arch veto in general.

So what do you think about the following? Only x86 is update now and I
will split it into two parts but the idea should be clear at least.
---
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e4f749e5652f..a4a29af28bcf 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,7 +772,8 @@ static void update_end_of_memory_vars(u64 start, u64 size)
}
}

-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_restrictions *restrictions)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -780,7 +781,9 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)

init_memory_mapping(start, start + size);

- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ /* newly hotadded memory range is ready to be used for the memmap */
+ restrictions->flags |= MHP_RANGE_ACCESSIBLE;
+ ret = __add_pages(nid, start_pfn, nr_pages, restrictions);
WARN_ON_ONCE(ret);

/* update max_pfn, max_low_pfn and high_memory */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f2636ad2d00f..928d93e2a555 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -129,9 +129,29 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
#endif /* CONFIG_MEMORY_HOTREMOVE */

+/*
+ * Do we want sysfs memblock files created. This will allow userspace to online
+ * and offline memory explicitly. Lack of this bit means that the caller has to
+ * call move_pfn_range_to_zone to finish the initialization.
+ */
+#define MHP_MEMBLOCK_API 1<<0
+
+/*
+ * Is the hotadded memory accessible directly or it needs a special handling.
+ * We will try to allocate the memmap for the range from within the added memory
+ * if the bit is set.
+ */
+#define MHP_RANGE_ACCESSIBLE 1<<1
+
+/* Restrictions for the memory hotplug */
+struct mhp_restrictions {
+ unsigned long flags; /* MHP_ flags */
+ struct vmem_altmap *altmap; /* use this alternative allocatro for memmaps */
+};
+
/* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn,
- unsigned long nr_pages, bool want_memblock);
+ unsigned long nr_pages, struct mhp_restrictions *restrictions);

#ifdef CONFIG_NUMA
extern int memory_add_physaddr_to_nid(u64 start);
@@ -306,7 +326,8 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource, bool online);
-extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
+extern int arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_restrictions *restrictions);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index a72eb5932d2f..cf0998cfcb13 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -286,6 +286,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
struct dev_pagemap *pgmap;
struct page_map *page_map;
int error, nid, is_ram;
+ struct mhp_restrictions restrictions = {};
unsigned long pfn;

align_start = res->start & ~(SECTION_SIZE - 1);
@@ -357,8 +358,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (error)
goto err_pfn_remap;

+ /* We do not want any optional features only our own memmap */
+ restrictions.altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
+
mem_hotplug_begin();
- error = arch_add_memory(nid, align_start, align_size, false);
+ error = arch_add_memory(nid, align_start, align_size, &restrictions);
if (!error)
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
align_start >> PAGE_SHIFT,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 19037d0191e5..9d11c3b5b448 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -287,12 +287,13 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
* add the new pages.
*/
int __ref __add_pages(int nid, unsigned long phys_start_pfn,
- unsigned long nr_pages, bool want_memblock)
+ unsigned long nr_pages,
+ struct mhp_restrictions *restrictions)
{
unsigned long i;
int err = 0;
int start_sec, end_sec;
- struct vmem_altmap *altmap;
+ struct vmem_altmap *altmap = restrictions->altmap;
struct vmem_altmap __section_altmap = {.base_pfn = phys_start_pfn};

/* during initialize mem_map, align hot-added range to section */
@@ -301,10 +302,9 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,

/*
* Check device specific altmap and fallback to allocating from the
- * begining of the section otherwise
+ * begining of the added range otherwise
*/
- altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
- if (!altmap) {
+ if (!altmap && restrictions->flags & MHP_RANGE_ACCESSIBLE) {
__section_altmap.free = nr_pages;
__section_altmap.flush_alloc_pfns = mark_vmemmap_pages;
altmap = &__section_altmap;
@@ -324,7 +324,9 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
}

for (i = start_sec; i <= end_sec; i++) {
- err = __add_section(nid, section_nr_to_pfn(i), want_memblock, altmap);
+ err = __add_section(nid, section_nr_to_pfn(i),
+ restrictions->flags & MHP_MEMBLOCK_API,
+ altmap);

/*
* EEXIST is finally dealt with by ioresource collision
@@ -1160,6 +1162,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
bool new_pgdat;
bool new_node;
int ret;
+ struct mhp_restrictions restrictions = {};

start = res->start;
size = resource_size(res);
@@ -1191,8 +1194,10 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
goto error;
}

+ restrictions.flags = MHP_MEMBLOCK_API;
+
/* call arch's memory hotadd */
- ret = arch_add_memory(nid, start, size, true);
+ ret = arch_add_memory(nid, start, size, &restrictions);

if (ret < 0)
goto error;
--
Michal Hocko
SUSE Labs