[PATCH RFC 7/8] mm: allow to control onlining/offlining of memory by a driver
From: David Hildenbrand
Date: Fri Apr 13 2018 - 09:33:44 EST
Some devices (esp. paravirtualized) might want to control
- when to online/offline a memory block
- how to online memory (MOVABLE/NORMAL)
- in which granularity to online/offline memory
So let's add a new flag "driver_managed" and disallow to change the
state by user space. Device onlining/offlining will still work, however
the memory will not be actually onlined/offlined. That has to be handled
by the device driver that owns the memory.
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
drivers/base/memory.c | 22 ++++++++++++++--------
drivers/xen/balloon.c | 2 +-
include/linux/memory.h | 1 +
include/linux/memory_hotplug.h | 4 +++-
mm/memory_hotplug.c | 34 ++++++++++++++++++++++++++++++++--
5 files changed, 51 insertions(+), 12 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index bffe8616bd55..3b8616551561 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -231,27 +231,28 @@ static bool pages_correctly_probed(unsigned long start_pfn)
* Must already be protected by mem_hotplug_begin().
*/
static int
-memory_block_action(unsigned long phys_index, unsigned long action, int online_type)
+memory_block_action(struct memory_block *mem, unsigned long action)
{
- unsigned long start_pfn;
+ unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
- int ret;
+ int ret = 0;
- start_pfn = section_nr_to_pfn(phys_index);
+ if (mem->driver_managed)
+ return 0;
switch (action) {
case MEM_ONLINE:
if (!pages_correctly_probed(start_pfn))
return -EBUSY;
- ret = online_pages(start_pfn, nr_pages, online_type);
+ ret = online_pages(start_pfn, nr_pages, mem->online_type);
break;
case MEM_OFFLINE:
ret = offline_pages(start_pfn, nr_pages);
break;
default:
WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
- "%ld\n", __func__, phys_index, action, action);
+ "%ld\n", __func__, mem->start_section_nr, action, action);
ret = -EINVAL;
}
@@ -269,8 +270,7 @@ static int memory_block_change_state(struct memory_block *mem,
if (to_state == MEM_OFFLINE)
mem->state = MEM_GOING_OFFLINE;
- ret = memory_block_action(mem->start_section_nr, to_state,
- mem->online_type);
+ ret = memory_block_action(mem, to_state);
mem->state = ret ? from_state_req : to_state;
@@ -350,6 +350,11 @@ store_mem_state(struct device *dev,
*/
mem_hotplug_begin();
+ if (mem->driver_managed) {
+ ret = -EINVAL;
+ goto out;
+ }
+
switch (online_type) {
case MMOP_ONLINE_KERNEL:
case MMOP_ONLINE_MOVABLE:
@@ -364,6 +369,7 @@ store_mem_state(struct device *dev,
ret = -EINVAL; /* should never happen */
}
+out:
mem_hotplug_done();
err:
unlock_device_hotplug();
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 065f0b607373..89981d573c06 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -401,7 +401,7 @@ static enum bp_state reserve_additional_memory(void)
* callers drop the mutex before trying again.
*/
mutex_unlock(&balloon_mutex);
- rc = add_memory_resource(nid, resource, memhp_auto_online);
+ rc = add_memory_resource(nid, resource, memhp_auto_online, false);
mutex_lock(&balloon_mutex);
if (rc) {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 9f8cd856ca1e..018c5e5ecde1 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -29,6 +29,7 @@ struct memory_block {
unsigned long state; /* serialized by the dev->lock */
int section_count; /* serialized by mem_sysfs_mutex */
int online_type; /* for passing data to online routine */
+ bool driver_managed; /* driver handles online/offline */
int phys_device; /* to which fru does this belong? */
void *hw; /* optional pointer to fw/hw data */
int (*phys_callback)(struct memory_block *);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e0e49b5b1ee1..46c6ceb1110d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -320,7 +320,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
-extern int add_memory_resource(int nid, struct resource *resource, bool online);
+extern int add_memory_driver_managed(int nid, u64 start, u64 size);
+extern int add_memory_resource(int nid, struct resource *resource, bool online,
+ bool driver_managed);
extern int arch_add_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap, bool want_memblock);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1d6054edc241..ac14ea772792 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1108,8 +1108,15 @@ static int online_memory_block(struct memory_block *mem, void *arg)
return device_online(&mem->dev);
}
+static int mark_memory_block_driver_managed(struct memory_block *mem, void *arg)
+{
+ mem->driver_managed = true;
+ return 0;
+}
+
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-int __ref add_memory_resource(int nid, struct resource *res, bool online)
+int __ref add_memory_resource(int nid, struct resource *res, bool online,
+ bool driver_managed)
{
u64 start, size;
pg_data_t *pgdat = NULL;
@@ -1117,6 +1124,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
bool new_node;
int ret;
+ if (online && driver_managed)
+ return -EINVAL;
+
start = res->start;
size = resource_size(res);
@@ -1188,6 +1198,9 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
if (online)
walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
NULL, online_memory_block);
+ else if (driver_managed)
+ walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1),
+ NULL, mark_memory_block_driver_managed);
goto out;
@@ -1212,13 +1225,30 @@ int __ref add_memory(int nid, u64 start, u64 size)
if (IS_ERR(res))
return PTR_ERR(res);
- ret = add_memory_resource(nid, res, memhp_auto_online);
+ ret = add_memory_resource(nid, res, memhp_auto_online, false);
if (ret < 0)
release_memory_resource(res);
return ret;
}
EXPORT_SYMBOL_GPL(add_memory);
+int __ref add_memory_driver_managed(int nid, u64 start, u64 size)
+{
+ struct resource *res;
+ int ret;
+
+ res = register_memory_resource(start, size);
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ ret = add_memory_resource(nid, res, false, true);
+ if (ret < 0)
+ release_memory_resource(res);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(add_memory_driver_managed);
+
+
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
* A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
--
2.14.3