[PATCH -mm 1/3] mem-hotplug: turn mem_hotplug_mutex to rwsem

From: Vladimir Davydov
Date: Sun Apr 06 2014 - 11:34:56 EST


{un}lock_memory_hotplug is used to synchronize against memory hotplug.
Currently it is backed by a mutex, which makes it a bit of hammer -
threads that only want to get a stable value of online nodes mask won't
be able to proceed concurrently.

This patch fixes this by turning mem_hotplug_mutex to an rw sempahore so
that lock_memory_hotplug only takes it for reading while the memory
hotplug code locks it for writing. This will allow to invoke code under
{un}lock_memory_hotplug concurrently while still guaranteeing protection
against memory hotplug.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
---
include/linux/mmzone.h | 7 +++--
mm/memory_hotplug.c | 70 +++++++++++++++++++++---------------------------
mm/vmscan.c | 2 +-
3 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fac5509c18f0..586cc2d2c25e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -481,9 +481,8 @@ struct zone {
* give them a chance of being in the same cacheline.
*
* Write access to present_pages at runtime should be protected by
- * lock_memory_hotplug()/unlock_memory_hotplug(). Any reader who can't
- * tolerant drift of present_pages should hold memory hotplug lock to
- * get a stable value.
+ * mem_hotplug_sem. Any reader who can't tolerant drift of
+ * present_pages should take it for reading to get a stable value.
*
* Read access to managed_pages should be safe because it's unsigned
* long. Write access to zone->managed_pages and totalram_pages are
@@ -766,7 +765,7 @@ typedef struct pglist_data {
nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */
wait_queue_head_t kswapd_wait;
wait_queue_head_t pfmemalloc_wait;
- struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
+ struct task_struct *kswapd; /* Protected by mem_hotplug_sem */
int kswapd_max_order;
enum zone_type classzone_idx;
#ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a650db29606f..052db2e72035 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
#include <linux/stop_machine.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
+#include <linux/rwsem.h>

#include <asm/tlbflush.h>

@@ -47,16 +48,16 @@ static void generic_online_page(struct page *page);

static online_page_callback_t online_page_callback = generic_online_page;

-DEFINE_MUTEX(mem_hotplug_mutex);
+static DECLARE_RWSEM(mem_hotplug_sem);

void lock_memory_hotplug(void)
{
- mutex_lock(&mem_hotplug_mutex);
+ down_read(&mem_hotplug_sem);
}

void unlock_memory_hotplug(void)
{
- mutex_unlock(&mem_hotplug_mutex);
+ up_read(&mem_hotplug_sem);
}


@@ -727,14 +728,14 @@ int set_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);

if (online_page_callback == generic_online_page) {
online_page_callback = callback;
rc = 0;
}

- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);

return rc;
}
@@ -744,14 +745,14 @@ int restore_online_page_callback(online_page_callback_t callback)
{
int rc = -EINVAL;

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);

if (online_page_callback == callback) {
online_page_callback = generic_online_page;
rc = 0;
}

- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);

return rc;
}
@@ -899,7 +900,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
int ret;
struct memory_notify arg;

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);
/*
* This doesn't need a lock to do pfn_to_page().
* The section can't be removed here because of the
@@ -907,23 +908,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
*/
zone = page_zone(pfn_to_page(pfn));

+ ret = -EINVAL;
if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
- !can_online_high_movable(zone)) {
- unlock_memory_hotplug();
- return -EINVAL;
- }
+ !can_online_high_movable(zone))
+ goto out;

if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
- if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) {
- unlock_memory_hotplug();
- return -EINVAL;
- }
+ if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
+ goto out;
}
if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
- if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) {
- unlock_memory_hotplug();
- return -EINVAL;
- }
+ if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
+ goto out;
}

/* Previous code may changed the zone of the pfn range */
@@ -939,8 +935,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
ret = notifier_to_errno(ret);
if (ret) {
memory_notify(MEM_CANCEL_ONLINE, &arg);
- unlock_memory_hotplug();
- return ret;
+ goto out;
}
/*
* If this zone is not populated, then it is not in zonelist.
@@ -964,8 +959,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
(((unsigned long long) pfn + nr_pages)
<< PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
- unlock_memory_hotplug();
- return ret;
+ goto out;
}

zone->present_pages += onlined_pages;
@@ -995,9 +989,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ

if (onlined_pages)
memory_notify(MEM_ONLINE, &arg);
- unlock_memory_hotplug();
-
- return 0;
+out:
+ up_write(&mem_hotplug_sem);
+ return ret;
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */

@@ -1055,7 +1049,7 @@ int try_online_node(int nid)
if (node_online(nid))
return 0;

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);
pgdat = hotadd_new_pgdat(nid, 0);
if (!pgdat) {
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
@@ -1073,7 +1067,7 @@ int try_online_node(int nid)
}

out:
- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);
return ret;
}

@@ -1117,7 +1111,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
new_pgdat = !p;
}

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);

new_node = !node_online(nid);
if (new_node) {
@@ -1158,7 +1152,7 @@ error:
release_memory_resource(res);

out:
- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);
return ret;
}
EXPORT_SYMBOL_GPL(add_memory);
@@ -1565,7 +1559,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
if (!test_pages_in_a_zone(start_pfn, end_pfn))
return -EINVAL;

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);

zone = page_zone(pfn_to_page(start_pfn));
node = zone_to_nid(zone);
@@ -1672,7 +1666,7 @@ repeat:
writeback_set_ratelimit();

memory_notify(MEM_OFFLINE, &arg);
- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);
return 0;

failed_removal:
@@ -1684,7 +1678,7 @@ failed_removal:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);

out:
- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);
return ret;
}

@@ -1888,7 +1882,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)

BUG_ON(check_hotplug_memory_range(start, size));

- lock_memory_hotplug();
+ down_write(&mem_hotplug_sem);

/*
* All memory blocks must be offlined before removing memory. Check
@@ -1897,10 +1891,8 @@ void __ref remove_memory(int nid, u64 start, u64 size)
*/
ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
check_memblock_offlined_cb);
- if (ret) {
- unlock_memory_hotplug();
+ if (ret)
BUG();
- }

/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
@@ -1909,7 +1901,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)

try_offline_node(nid);

- unlock_memory_hotplug();
+ up_write(&mem_hotplug_sem);
}
EXPORT_SYMBOL_GPL(remove_memory);
#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06879ead7380..b6fe28a5dcd1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3418,7 +3418,7 @@ int kswapd_run(int nid)

/*
* Called by memory hotplug when all memory in a node is offlined. Caller must
- * hold lock_memory_hotplug().
+ * hold mem_hotplug_sem for writing.
*/
void kswapd_stop(int nid)
{
--
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/