[RFC PATCH v3 3/4] node: add interleave weights to node accessor

From: Gregory Price
Date: Mon Oct 30 2023 - 20:38:36 EST


Add a configurable interleave weight to the node for each possible
accessor. The intent of this weight is to enable set_mempolicy() to
to distribute memory across nodes based on the accessor and the
effective bandwidth available.

The goal is to maximize the effective use of available bandwidth.

The default weight is 1 for all nodes, which will mimic the current
interleave (basic round-robin).

Signed-off-by: Gregory Price <gregory.price@xxxxxxxxxxxx>
Suggested-by: Ying Huang <ying.huang@xxxxxxxxx>
Suggested-by: Ravi Shankar <ravis.opensrc@xxxxxxxxxx>
---
drivers/base/node.c | 95 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/node.h | 17 ++++++++
2 files changed, 112 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b09c9c8e6830..29bb3874a885 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -83,9 +83,84 @@ struct node_access_nodes {
#ifdef CONFIG_HMEM_REPORTING
struct node_hmem_attrs hmem_attrs;
#endif
+ unsigned char il_weight;
};
#define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)

+#define MAX_NODE_INTERLEAVE_WEIGHT 100
+static ssize_t il_weight_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n",
+ to_access_nodes(dev)->il_weight);
+}
+
+static ssize_t il_weight_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ unsigned char weight;
+ int ret;
+
+ ret = kstrtou8(buf, 0, &weight);
+ if (ret)
+ return ret;
+
+ if (!weight || weight > MAX_NODE_INTERLEAVE_WEIGHT)
+ return -EINVAL;
+
+ to_access_nodes(dev)->il_weight = weight;
+ return len;
+}
+DEVICE_ATTR_RW(il_weight);
+
+unsigned char node_get_il_weight(unsigned int nid, unsigned int access_nid)
+{
+ struct node *node;
+ struct node_access_nodes *c;
+ unsigned char weight = 1;
+
+ node = node_devices[nid];
+ if (!node)
+ return weight;
+
+ list_for_each_entry(c, &node->access_list, list_node) {
+ if (c->access != access_nid)
+ continue;
+ weight = c->il_weight;
+ break;
+ }
+ return weight;
+}
+
+unsigned int nodes_get_il_weights(unsigned int access_nid, nodemask_t *nodes,
+ unsigned char *weights)
+{
+ unsigned int nid;
+ struct node *node;
+ struct node_access_nodes *c;
+ unsigned int ttl_weight = 0;
+ unsigned char weight = 1;
+
+ for_each_node_mask(nid, *nodes) {
+ weight = 1;
+ node = node_devices[nid];
+ if (!node)
+ goto next_node;
+ list_for_each_entry(c, &node->access_list, list_node) {
+ if (c->access != access_nid)
+ continue;
+ weight = c->il_weight;
+ break;
+ }
+next_node:
+ weights[nid] = weight;
+ ttl_weight += weight;
+ }
+ return ttl_weight;
+}
+
static struct attribute *node_init_access_node_attrs[] = {
NULL,
};
@@ -116,6 +191,7 @@ static void node_remove_accesses(struct node *node)

list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
list_del(&c->list_node);
+ device_remove_file(&c->dev, &dev_attr_il_weight);
device_unregister(&c->dev);
}
}
@@ -140,6 +216,7 @@ static struct node_access_nodes *node_init_node_access(struct node *node,
return NULL;

access_node->access = access;
+ access_node->il_weight = 1;
dev = &access_node->dev;
dev->parent = &node->dev;
dev->release = node_access_release;
@@ -150,6 +227,9 @@ static struct node_access_nodes *node_init_node_access(struct node *node,
if (device_register(dev))
goto free_name;

+ if (device_create_file(dev, &dev_attr_il_weight))
+ dev_warn(dev, "failed to add il_weight attribute\n");
+
pm_runtime_no_callbacks(dev);
list_add_tail(&access_node->list_node, &node->access_list);
return access_node;
@@ -363,6 +443,21 @@ static void node_init_caches(unsigned int nid)
#else
static void node_init_caches(unsigned int nid) { }
static void node_remove_caches(struct node *node) { }
+
+unsigned char node_get_il_weight(unsigned int nid, unsigned int access_nid)
+{
+ return 1;
+}
+
+unsigned int nodes_get_il_weights(unsigned int access_nid, nodemask_t *nodes,
+ unsigned char *weights)
+{
+ unsigned int nid;
+
+ for_each_node_mask(nid, *nodes)
+ weights[nid] = 1;
+ return nodes_weight(nodes);
+}
#endif

#define K(x) ((x) << (PAGE_SHIFT - 10))
diff --git a/include/linux/node.h b/include/linux/node.h
index 427a5975cf40..3c7a6dd2d954 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -138,6 +138,12 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
unsigned access);
+
+extern unsigned char node_get_il_weight(unsigned int nid,
+ unsigned int access_nid);
+extern unsigned int nodes_get_il_weights(unsigned int access_nid,
+ nodemask_t *nodes,
+ unsigned char *weights);
#else
static inline void node_dev_init(void)
{
@@ -165,6 +171,17 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
}
+static inline unsigned char node_get_il_weight(unsigned int nid,
+ unsigned int access_nid)
+{
+ return 0;
+}
+static inline unsigned int nodes_get_il_weights(unsigned int access_nid,
+ nodemask_t *nodes,
+ unsigned char *weights)
+{
+ return 0;
+}
#endif

#define to_node(device) container_of(device, struct node, dev)
--
2.39.1