Re: [PATCH v1 2/6] mm/memory_hotplug: make add_memory() take the device_hotplug_lock

From: Rafael J. Wysocki
Date: Tue Sep 18 2018 - 17:20:03 EST


On Tue, Sep 18, 2018 at 1:48 PM David Hildenbrand <david@xxxxxxxxxx> wrote:
>
> add_memory() currently does not take the device_hotplug_lock, however
> is aleady called under the lock from
> arch/powerpc/platforms/pseries/hotplug-memory.c
> drivers/acpi/acpi_memhotplug.c
> to synchronize against CPU hot-remove and similar.
>
> In general, we should hold the device_hotplug_lock when adding memory
> to synchronize against online/offline request (e.g. from user space) -
> which already resulted in lock inversions due to device_lock() and
> mem_hotplug_lock - see 30467e0b3be ("mm, hotplug: fix concurrent memory
> hot-add deadlock"). add_memory()/add_memory_resource() will create memory
> block devices, so this really feels like the right thing to do.
>
> Holding the device_hotplug_lock makes sure that a memory block device
> can really only be accessed (e.g. via .online/.state) from user space,
> once the memory has been fully added to the system.
>
> The lock is not held yet in
> drivers/xen/balloon.c
> arch/powerpc/platforms/powernv/memtrace.c
> drivers/s390/char/sclp_cmd.c
> drivers/hv/hv_balloon.c
> So, let's either use the locked variants or take the lock.
>
> Don't export add_memory_resource(), as it once was exported to be used
> by XEN, which is never built as a module. If somebody requires it, we
> also have to export a locked variant (as device_hotplug_lock is never
> exported).
>
> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
> Cc: Paul Mackerras <paulus@xxxxxxxxx>
> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
> Cc: "Rafael J. Wysocki" <rjw@xxxxxxxxxxxxx>
> Cc: Len Brown <lenb@xxxxxxxxxx>
> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
> Cc: Juergen Gross <jgross@xxxxxxxx>
> Cc: Nathan Fontenot <nfont@xxxxxxxxxxxxxxxxxx>
> Cc: John Allen <jallen@xxxxxxxxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxx>
> Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>
> Cc: Vlastimil Babka <vbabka@xxxxxxx>
> Cc: Oscar Salvador <osalvador@xxxxxxx>
> Cc: Mathieu Malaterre <malat@xxxxxxxxxx>
> Cc: Pavel Tatashin <pavel.tatashin@xxxxxxxxxxxxx>
> Cc: YASUAKI ISHIMATSU <yasu.isimatu@xxxxxxxxx>
> Reviewed-by: Pavel Tatashin <pavel.tatashin@xxxxxxxxxxxxx>
> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
> ---
> .../platforms/pseries/hotplug-memory.c | 2 +-
> drivers/acpi/acpi_memhotplug.c | 2 +-
> drivers/base/memory.c | 9 ++++++--
> drivers/xen/balloon.c | 3 +++
> include/linux/memory_hotplug.h | 1 +
> mm/memory_hotplug.c | 22 ++++++++++++++++---
> 6 files changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index b3f54466e25f..2e6f41dc103a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -702,7 +702,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
> nid = memory_add_physaddr_to_nid(lmb->base_addr);
>
> /* Add the memory */
> - rc = add_memory(nid, lmb->base_addr, block_sz);
> + rc = __add_memory(nid, lmb->base_addr, block_sz);
> if (rc) {
> dlpar_remove_device_tree_lmb(lmb);
> return rc;
> diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
> index 811148415993..8fe0960ea572 100644
> --- a/drivers/acpi/acpi_memhotplug.c
> +++ b/drivers/acpi/acpi_memhotplug.c
> @@ -228,7 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
> if (node < 0)
> node = memory_add_physaddr_to_nid(info->start_addr);
>
> - result = add_memory(node, info->start_addr, info->length);
> + result = __add_memory(node, info->start_addr, info->length);
>
> /*
> * If the memory block has been used by the kernel, add_memory()
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 817320c7c4c1..40cac122ec73 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -519,15 +519,20 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
> if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
> return -EINVAL;
>
> + ret = lock_device_hotplug_sysfs();
> + if (ret)
> + goto out;
> +
> nid = memory_add_physaddr_to_nid(phys_addr);
> - ret = add_memory(nid, phys_addr,
> - MIN_MEMORY_BLOCK_SIZE * sections_per_block);
> + ret = __add_memory(nid, phys_addr,
> + MIN_MEMORY_BLOCK_SIZE * sections_per_block);
>
> if (ret)
> goto out;
>
> ret = count;
> out:
> + unlock_device_hotplug();
> return ret;
> }
>
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index e12bb256036f..6bab019a82b1 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -395,7 +395,10 @@ static enum bp_state reserve_additional_memory(void)
> * callers drop the mutex before trying again.
> */
> mutex_unlock(&balloon_mutex);
> + /* add_memory_resource() requires the device_hotplug lock */
> + lock_device_hotplug();
> rc = add_memory_resource(nid, resource, memhp_auto_online);
> + unlock_device_hotplug();
> mutex_lock(&balloon_mutex);
>
> if (rc) {
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 1f096852f479..ffd9cd10fcf3 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -324,6 +324,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
> extern void __ref free_area_init_core_hotplug(int nid);
> extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
> void *arg, int (*func)(struct memory_block *, void *));
> +extern int __add_memory(int nid, u64 start, u64 size);
> extern int add_memory(int nid, u64 start, u64 size);
> extern int add_memory_resource(int nid, struct resource *resource, bool online);
> extern int arch_add_memory(int nid, u64 start, u64 size,
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b8b1bd970322..ef5444145c88 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1111,7 +1111,12 @@ static int online_memory_block(struct memory_block *mem, void *arg)
> return device_online(&mem->dev);
> }
>
> -/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
> +/*
> + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
> + * and online/offline operations (triggered e.g. by sysfs).
> + *
> + * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
> + */
> int __ref add_memory_resource(int nid, struct resource *res, bool online)
> {
> u64 start, size;
> @@ -1180,9 +1185,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
> mem_hotplug_done();
> return ret;
> }
> -EXPORT_SYMBOL_GPL(add_memory_resource);
>
> -int __ref add_memory(int nid, u64 start, u64 size)
> +/* requires device_hotplug_lock, see add_memory_resource() */
> +int __ref __add_memory(int nid, u64 start, u64 size)
> {
> struct resource *res;
> int ret;
> @@ -1196,6 +1201,17 @@ int __ref add_memory(int nid, u64 start, u64 size)
> release_memory_resource(res);
> return ret;
> }
> +
> +int add_memory(int nid, u64 start, u64 size)
> +{
> + int rc;
> +
> + lock_device_hotplug();
> + rc = __add_memory(nid, start, size);
> + unlock_device_hotplug();
> +
> + return rc;
> +}
> EXPORT_SYMBOL_GPL(add_memory);
>
> #ifdef CONFIG_MEMORY_HOTREMOVE
> --

Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx>