[RFC PATCH 07/14] mm/hms: register main memory with heterogenenous memory system

From: jglisse
Date: Mon Dec 03 2018 - 18:36:10 EST


From: JÃrÃme Glisse <jglisse@xxxxxxxxxx>

Register main memory as target under HMS scheme. Memory is registered
per node (one target device per node). We also create a default link
to connect main memory and CPU that are in the same node. For details
see Documentation/vm/hms.rst.

This is done to allow application to use one API for regular memory or
device memory.

Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Cc: Rafael J. Wysocki <rafael@xxxxxxxxxx>
Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
Cc: Haggai Eran <haggaie@xxxxxxxxxxxx>
Cc: Balbir Singh <balbirs@xxxxxxxxxxx>
Cc: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Felix Kuehling <felix.kuehling@xxxxxxx>
Cc: Philip Yang <Philip.Yang@xxxxxxx>
Cc: Christian KÃnig <christian.koenig@xxxxxxx>
Cc: Paul Blinzer <Paul.Blinzer@xxxxxxx>
Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Ralph Campbell <rcampbell@xxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxxxx>
Cc: Jonathan Cameron <jonathan.cameron@xxxxxxxxxx>
Cc: Mark Hairgrove <mhairgrove@xxxxxxxxxx>
Cc: Vivek Kini <vkini@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Dave Airlie <airlied@xxxxxxxxxx>
Cc: Ben Skeggs <bskeggs@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
---
drivers/base/node.c | 65 +++++++++++++++++++++++++++++++++++++++++++-
include/linux/node.h | 6 ++++
2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 86d6cd92ce3d..05621ba3cf13 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -323,6 +323,11 @@ static int register_node(struct node *node, int num)
if (error)
put_device(&node->dev);
else {
+ hms_link_register(&node->link, &node->dev, 0);
+ hms_target_register(&node->target, &node->dev,
+ num, NULL, 0, 0);
+ hms_link_target(node->link, node->target);
+
hugetlb_register_node(node);

compaction_register_node(node);
@@ -339,6 +344,9 @@ static int register_node(struct node *node, int num)
*/
void unregister_node(struct node *node)
{
+ hms_target_unregister(&node->target);
+ hms_link_unregister(&node->link);
+
hugetlb_unregister_node(node); /* no-op, if memoryless node */

device_unregister(&node->dev);
@@ -415,6 +423,9 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
sect_end_pfn += PAGES_PER_SECTION - 1;
for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+#if defined(CONFIG_HMS)
+ unsigned long size = PAGE_SIZE;
+#endif
int page_nid;

/*
@@ -445,9 +456,35 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
if (ret)
return ret;

- return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
+ ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
&node_devices[nid]->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
+ if (ret)
+ return ret;
+
+#if defined(CONFIG_HMS)
+ /*
+ * Right now here i do not see any easier way to get the size
+ * in bytes of valid memory that is added to this node.
+ */
+ for (++pfn; pfn <= sect_end_pfn; pfn++) {
+ if (!pfn_present(pfn)) {
+ pfn = round_down(pfn + PAGES_PER_SECTION,
+ PAGES_PER_SECTION) - 1;
+ continue;
+ }
+ page_nid = get_nid_for_pfn(pfn);
+ if (page_nid < 0)
+ continue;
+ if (page_nid != nid)
+ continue;
+ size += PAGE_SIZE;
+ }
+
+ hms_target_add_memory(node_devices[nid]->target, size);
+#endif
+
+ return 0;
}
/* mem section does not span the specified node */
return 0;
@@ -471,6 +508,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
sect_start_pfn = section_nr_to_pfn(phys_index);
sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+#if defined(CONFIG_HMS)
+ unsigned long size = 0;
+ int page_nid;
+#endif
int nid;

nid = get_nid_for_pfn(pfn);
@@ -484,6 +525,28 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
kobject_name(&mem_blk->dev.kobj));
sysfs_remove_link(&mem_blk->dev.kobj,
kobject_name(&node_devices[nid]->dev.kobj));
+
+#if defined(CONFIG_HMS)
+ /*
+ * Right now here i do not see any easier way to get the size
+ * in bytes of valid memory that is added to this node.
+ */
+ for (; pfn <= sect_end_pfn; pfn++) {
+ if (!pfn_present(pfn)) {
+ pfn = round_down(pfn + PAGES_PER_SECTION,
+ PAGES_PER_SECTION) - 1;
+ continue;
+ }
+ page_nid = get_nid_for_pfn(pfn);
+ if (page_nid < 0)
+ continue;
+ if (page_nid != nid)
+ break;
+ size += PAGE_SIZE;
+ }
+
+ hms_target_remove_memory(node_devices[nid]->target, size);
+#endif
}
NODEMASK_FREE(unlinked_nodes);
return 0;
diff --git a/include/linux/node.h b/include/linux/node.h
index 257bb3d6d014..297b01d3c1ed 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -15,6 +15,7 @@
#ifndef _LINUX_NODE_H_
#define _LINUX_NODE_H_

+#include <linux/hms.h>
#include <linux/device.h>
#include <linux/cpumask.h>
#include <linux/workqueue.h>
@@ -22,6 +23,11 @@
struct node {
struct device dev;

+#if defined(CONFIG_HMS)
+ struct hms_target *target;
+ struct hms_link *link;
+#endif
+
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
struct work_struct node_work;
#endif
--
2.17.2