On Wed, Mar 2, 2016 at 4:55 PM, David Daney <ddaney.cavm@xxxxxxxxx> wrote:[...]
From: David Daney <david.daney@xxxxxxxxxx>
Add device tree parsing for NUMA topology using device
"numa-node-id" property in distance-map and cpu nodes.
This is a complete rewrite of a previous patch by:
Ganapatrao Kulkarni<gkulkarni@xxxxxxxxxxxxxxxxxx>
Signed-off-by: David Daney <david.daney@xxxxxxxxxx>
---
drivers/of/Kconfig | 3 +
drivers/of/Makefile | 1 +
drivers/of/of_numa.c | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/of.h | 9 +++
4 files changed, 213 insertions(+)
create mode 100644 drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -0,0 +1,200 @@
+/*
+ * OF NUMA Parsing support.
+ *
+ * Copyright (C) 2015 - 2016 Cavium Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/of.h>
+#include <linux/of_fdt.h>
This can be dropped now.
+#include <linux/nodemask.h>
+
+#include <asm/numa.h>
+
+/* define default numa node to 0 */
+#define DEFAULT_NODE 0
+
+/*
+ * Even though we connect cpus to numa domains later in SMP
+ * init, we need to know the node ids now for all cpus.
+*/
+static void __init of_find_cpu_nodes(void)
Perhaps of_parse_cpu_nodes for consistency.
Actually, if all the functions were prefixed with "of_numa_" that
would be better.
+{
+ u32 nid;
+ int r;
+ struct device_node *np = NULL;
+
+ for (;;) {
+ np = of_find_node_by_type(np, "cpu");
+ if (!np)
+ break;
Can't we use the child node iterator for /cpus here?
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r)
+ continue;
+
+ pr_debug("NUMA: CPU on %u\n", nid);
+ if (nid >= MAX_NUMNODES)
+ pr_warn("NUMA: Node id %u exceeds maximum value\n",
+ nid);
+ else
+ node_set(nid, numa_nodes_parsed);
I'm not sure how this works, but don't you need to match this up with
MPIDR of the cpu here?
+ }
+}
+
+static void __init of_parse_memory_nodes(void)
+{
+ struct device_node *np = NULL;
+ int na, ns;
+ const __be32 *prop;
+ unsigned int psize;
+ u32 nid;
+ u64 base, size;
+ int r;
+
+ for (;;) {
+ np = of_find_node_by_type(np, "memory");
+ if (!np)
+ break;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r)
+ continue;
+
+ prop = of_get_property(np, "reg", &psize);
+ if (!prop)
+ continue;
+
+ psize /= sizeof(__be32);
+ na = of_n_addr_cells(np);
+ ns = of_n_size_cells(np);
+
+ if (psize < na + ns) {
+ pr_err("NUMA: memory reg property too small\n");
+ continue;
+ }
+ base = of_read_number(prop, na);
+ size = of_read_number(prop + na, ns);
You should be able to use of_address_to_resource for all this.
+
+ pr_debug("NUMA: base = %llx len = %llx, node = %u\n",
+ base, size, nid);
+
+ if (numa_add_memblk(nid, base, size) < 0)
+ break;
+ }
+
+ of_node_put(np);
+}
+
+static int __init parse_distance_map_v1(struct device_node *map)
+{
+ const __be32 *matrix;
+ unsigned int matrix_size;
+ int entry_count;
+ int i;
+ int nr_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
I believe the defaults are for some old DT files. As this is new, it
should rely on explicit #size-cells in the DT.
OTOH, what is point of using #size-cells at all versus fixing the
sizes to 1 cell. The documentation doesn't indicate that it uses
#size-cells. That also means that the sizes basically follow the cell
size for the memory given that this is at the top-level.
+
+ pr_info("NUMA: parsing numa-distance-map-v1\n");
+
+ matrix = of_get_property(map, "distance-matrix", &matrix_size);
+ if (!matrix) {
+ pr_err("NUMA: No distance-matrix property in distance-map\n");
+ return -EINVAL;
+ }
+
+ entry_count = matrix_size / (sizeof(__be32) * 3 * nr_size_cells);
+
+ for (i = 0; i < entry_count; i++) {
+ u32 nodea, nodeb, distance;
+
+ nodea = of_read_number(matrix, nr_size_cells);
+ matrix += nr_size_cells;
+ nodeb = of_read_number(matrix, nr_size_cells);
+ matrix += nr_size_cells;
+ distance = of_read_number(matrix, nr_size_cells);
+ matrix += nr_size_cells;
Assuming you fix this to 1 cell, you can use
of_property_count_u32_elems and of_property_read_u32_array.
+
+ numa_set_distance(nodea, nodeb, distance);
+ pr_debug("NUMA: distance[node%d -> node%d] = %d\n",
+ nodea, nodeb, distance);
+
+ /* Set default distance of node B->A same as A->B */
+ if (nodeb > nodea)
+ numa_set_distance(nodeb, nodea, distance);
+ }
+
+ return 0;
+}
+
+static int __init of_parse_distance_map(void)
+{
+ int ret = -EINVAL;
+ struct device_node *np = of_find_node_by_name(NULL, "distance-map");
+
+ if (!np)
+ return ret;
+
+ if (of_device_is_compatible(np, "numa-distance-map-v1")) {
You can use of_find_compatible_node() instead of these 2 calls.
+ ret = parse_distance_map_v1(np);
+ goto out;
+ }
+
+ pr_err("NUMA: invalid distance-map device node\n");
+out:
+ of_node_put(np);
+ return ret;
+}
+
+int of_node_to_nid(struct device_node *device)
+{
+ struct device_node *np;
+ u32 nid;
+ int r = -ENODATA;
+
+ np = of_node_get(device);
+
+ while (np) {
+ struct device_node *parent;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r != -EINVAL)
You want to break for other err values?
+ break;
+
+ /* property doesn't exist in this node, look in parent */
+ parent = of_get_parent(np);
+ of_node_put(np);
+ np = parent;
+ }
+ if (np && r)
+ pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n",
+ np->name);
+ of_node_put(np);
+
+ if (!r) {
+ if (nid >= MAX_NUMNODES)
+ pr_warn("NUMA: Node id %u exceeds maximum value\n",
+ nid);
+ else
+ return nid;
+ }
+
+ return NUMA_NO_NODE;
+}
Needs to be exported?
+
+int __init of_numa_init(void)
+{
+ of_find_cpu_nodes();
+ of_parse_memory_nodes();
+ return of_parse_distance_map();
+}
diff --git a/include/linux/of.h b/include/linux/of.h
index dc6e396..fe67a4c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -685,6 +685,15 @@ static inline int of_node_to_nid(struct device_node *device)
}
#endif
+#ifdef CONFIG_OF_NUMA
+extern int of_numa_init(void);
+#else
+static inline int of_numa_init(void)
+{
+ return -ENOSYS;
+}
+#endif
+
static inline struct device_node *of_find_matching_node(
struct device_node *from,
const struct of_device_id *matches)
--
1.8.3.1