Re: [PATCH v1 2/4] mm/memory_hotplug: Make unregister_memory_section() never fail

From: Oscar Salvador
Date: Wed Apr 17 2019 - 08:46:06 EST


On Tue, 2019-04-09 at 12:01 +0200, David Hildenbrand wrote:
> Failing while removing memory is mostly ignored and cannot really be
> handled. Let's treat errors in unregister_memory_section() in a nice
> way, warning, but continuing.
>
> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> Cc: "Rafael J. Wysocki" <rafael@xxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Andrew Banman <andrew.banman@xxxxxxx>
> Cc: "mike.travis@xxxxxxx" <mike.travis@xxxxxxx>
> Cc: David Hildenbrand <david@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Oscar Salvador <osalvador@xxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxx>
> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
> Cc: Qian Cai <cai@xxxxxx>
> Cc: Wei Yang <richard.weiyang@xxxxxxxxx>
> Cc: Arun KS <arunks@xxxxxxxxxxxxxx>
> Cc: Mathieu Malaterre <malat@xxxxxxxxxx>
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
> drivers/base/memory.c | 16 +++++-----------
> include/linux/memory.h | 2 +-
> mm/memory_hotplug.c | 4 +---
> 3 files changed, 7 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 0c9e22ffa47a..f180427e48f4 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -734,15 +734,18 @@ unregister_memory(struct memory_block *memory)
> {
> BUG_ON(memory->dev.bus != &memory_subsys);
>
> - /* drop the ref. we got in remove_memory_section() */
> + /* drop the ref. we got via find_memory_block() */
> put_device(&memory->dev);
> device_unregister(&memory->dev);
> }
>
> -static int remove_memory_section(struct mem_section *section)
> +void unregister_memory_section(struct mem_section *section)
> {
> struct memory_block *mem;
>
> + if (WARN_ON_ONCE(!present_section(section)))
> + return;
> +
> mutex_lock(&mem_sysfs_mutex);
>
> /*
> @@ -763,15 +766,6 @@ static int remove_memory_section(struct
> mem_section *section)
>
> out_unlock:
> mutex_unlock(&mem_sysfs_mutex);
> - return 0;
> -}
> -
> -int unregister_memory_section(struct mem_section *section)
> -{
> - if (!present_section(section))
> - return -EINVAL;
> -
> - return remove_memory_section(section);
> }
> #endif /* CONFIG_MEMORY_HOTREMOVE */
>
> diff --git a/include/linux/memory.h b/include/linux/memory.h
> index a6ddefc60517..e1dc1bb2b787 100644
> --- a/include/linux/memory.h
> +++ b/include/linux/memory.h
> @@ -113,7 +113,7 @@ extern int
> register_memory_isolate_notifier(struct notifier_block *nb);
> extern void unregister_memory_isolate_notifier(struct notifier_block
> *nb);
> int hotplug_memory_register(int nid, struct mem_section *section);
> #ifdef CONFIG_MEMORY_HOTREMOVE
> -extern int unregister_memory_section(struct mem_section *);
> +extern void unregister_memory_section(struct mem_section *);
> #endif
> extern int memory_dev_init(void);
> extern int memory_notify(unsigned long val, void *v);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 696ed7ee5e28..b0cb05748f99 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -527,9 +527,7 @@ static int __remove_section(struct zone *zone,
> struct mem_section *ms,
> if (!valid_section(ms))
> return ret;
>
> - ret = unregister_memory_section(ms);
> - if (ret)
> - return ret;
> + unregister_memory_section(ms);

So, technically unregister_memory_section() can __only__ fail in case
the section is not present, returning -EINVAL.

Now, I was checking how the pair valid/present sections work.
Unless I am mistaken, we only mark sections as memmap those sections
that are present.

This can come from two paths:

- Early boot:
* memblocks_present
memory_present - mark sections as present
sparse_init - iterates only over present sections
sparse_init_nid
sparse_init_one_section - mark section as valid

- Hotplug path:
* sparse_add_one_section
section_mark_present - mark section as present
sparse_init_one_section - mark section as valid


During early boot, sparse_init iterates __only__ over present sections,
so only those are marked valid as well, and during hotplug, the section
is both marked present and valid.

All in all, I think that we are safe if we remove the present_section
check in your new unregister_memory_section(), as a valid_section
cannot be non-present, and we do already catch those early in
__remove_section().

Then, the only thing missing to be completely error-less in that
codepath is to make unregister_mem_sect_under_nodes() void return-type.
Not that it matters a lot as we are already ignoring its return code,
but I find that quite disturbing and wrong.

So, would you like to take this patch in your patchset in case you re-
submit?

From: Oscar Salvador <osalvador@xxxxxxx>
Date: Wed, 17 Apr 2019 14:41:32 +0200
Subject: [PATCH] mm,memory_hotplug: Refactor
unregister_mem_sect_under_nodes

Currently, the return code of unregister_mem_sect_under_nodes gets
ignored.
It can only fail in case we are not able to allocate a nodemask_t,
but such allocation is too small, so it is not really clear
we can actually fail.

The maximum a nodemask_t can be is 128 bytes, so we can make the whole
thing more simple if we simply allocate a nodemask_t within the stack.

With this change, we can convert unregister_mem_sect_under_nodes to
be void-return type.

Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
---
drivers/base/node.c | 16 ++++------------
include/linux/node.h | 5 ++---
2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8598fcbd2a17..fcd0f442e73d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -802,19 +802,13 @@ int register_mem_sect_under_node(struct
memory_block *mem_blk, void *arg)
}

/* unregister memory section under all nodes that it spans */
-int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
+void unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
unsigned long phys_index)
{
- NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
+ nodemask_t unlinked_nodes;
unsigned long pfn, sect_start_pfn, sect_end_pfn;

- if (!mem_blk) {
- NODEMASK_FREE(unlinked_nodes);
- return -EFAULT;
- }
- if (!unlinked_nodes)
- return -ENOMEM;
- nodes_clear(*unlinked_nodes);
+ nodes_clear(unlinked_nodes);

sect_start_pfn = section_nr_to_pfn(phys_index);
sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
@@ -826,15 +820,13 @@ int unregister_mem_sect_under_nodes(struct
memory_block *mem_blk,
continue;
if (!node_online(nid))
continue;
- if (node_test_and_set(nid, *unlinked_nodes))
+ if (node_test_and_set(nid, unlinked_nodes))
continue;
sysfs_remove_link(&node_devices[nid]->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
sysfs_remove_link(&mem_blk->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
}
- NODEMASK_FREE(unlinked_nodes);
- return 0;
}

int link_mem_sections(int nid, unsigned long start_pfn, unsigned long
end_pfn)
diff --git a/include/linux/node.h b/include/linux/node.h
index 1a557c589ecb..094ec9922bf5 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -139,7 +139,7 @@ extern int register_cpu_under_node(unsigned int
cpu, unsigned int nid);
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int
nid);
extern int register_mem_sect_under_node(struct memory_block *mem_blk,
void *arg);
-extern int unregister_mem_sect_under_nodes(struct memory_block
*mem_blk,
+extern void unregister_mem_sect_under_nodes(struct memory_block
*mem_blk,
unsigned long phys_index);

extern int register_memory_node_under_compute_node(unsigned int
mem_nid,
@@ -176,10 +176,9 @@ static inline int
register_mem_sect_under_node(struct memory_block *mem_blk,
{
return 0;
}
-static inline int unregister_mem_sect_under_nodes(struct memory_block
*mem_blk,
+static inline void unregister_mem_sect_under_nodes(struct memory_block
*mem_blk,
unsigned long
phys_index)
{
- return 0;
}

static inline void
register_hugetlbfs_with_node(node_registration_func_t reg,
--
2.12.3


--
Oscar Salvador
SUSE L3