Re: [PATCH v10 3/8] dt, numa: adding numa dt binding implementation.

From: Rob Herring
Date: Tue Feb 02 2016 - 18:34:33 EST


On Tue, Feb 02, 2016 at 03:39:18PM +0530, Ganapatrao Kulkarni wrote:
> dt node parsing for numa topology is done using device property
> numa-node-id and device node distance-map.
>
> Reviewed-by: Robert Richter <rrichter@xxxxxxxxxx>
> Signed-off-by: Ganapatrao Kulkarni <gkulkarni@xxxxxxxxxxxxxxxxxx>
> ---
> drivers/of/Kconfig | 11 +++
> drivers/of/Makefile | 1 +
> drivers/of/of_numa.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/of.h | 4 +
> 4 files changed, 223 insertions(+)
> create mode 100644 drivers/of/of_numa.c
>
> diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
> index e2a4841..8f9cc3a 100644
> --- a/drivers/of/Kconfig
> +++ b/drivers/of/Kconfig
> @@ -112,4 +112,15 @@ config OF_OVERLAY
> While this option is selected automatically when needed, you can
> enable it manually to improve device tree unit test coverage.
>
> +config OF_NUMA
> + bool "Device Tree NUMA support"

Does this need to be user visible?

> + depends on NUMA
> + depends on OF
> + depends on ARM64

drop this (and make sure it compiles on other arches). It will fail
because you also have a dependency on FDT.

> + default y
> + help
> + Enable Device Tree NUMA support.
> + This enables the numa mapping of cpu, memory, io and
> + inter node distances using dt bindings.
> +
> endif # OF
> diff --git a/drivers/of/Makefile b/drivers/of/Makefile
> index 156c072..bee3fa9 100644
> --- a/drivers/of/Makefile
> +++ b/drivers/of/Makefile
> @@ -14,5 +14,6 @@ obj-$(CONFIG_OF_MTD) += of_mtd.o
> obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
> obj-$(CONFIG_OF_RESOLVE) += resolver.o
> obj-$(CONFIG_OF_OVERLAY) += overlay.o
> +obj-$(CONFIG_OF_NUMA) += of_numa.o
>
> obj-$(CONFIG_OF_UNITTEST) += unittest-data/
> diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
> new file mode 100644
> index 0000000..1142cdb
> --- /dev/null
> +++ b/drivers/of/of_numa.c
> @@ -0,0 +1,207 @@
> +/*
> + * OF NUMA Parsing support.
> + *
> + * Copyright (C) 2015 Cavium Inc.
> + * Author: Ganapatrao Kulkarni <gkulkarni@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <linux/of.h>
> +#include <linux/of_fdt.h>

Surely you need some numa related includes.

> +
> +/* define default numa node to 0 */
> +#define DEFAULT_NODE 0
> +
> +/* Returns nid in the range [0..MAX_NUMNODES-1],
> + * or NUMA_NO_NODE if no valid numa-node-id entry found
> + * or DEFAULT_NODE if no numa-node-id entry exists
> + */
> +static int of_numa_prop_to_nid(const __be32 *of_numa_prop, int length)
> +{
> + int nid;
> +
> + if (!of_numa_prop)
> + return DEFAULT_NODE;
> +
> + if (length != sizeof(*of_numa_prop)) {
> + pr_warn("NUMA: Invalid of_numa_prop length %d found.\n",
> + length);
> + return NUMA_NO_NODE;
> + }
> +
> + nid = of_read_number(of_numa_prop, 1);
> + if (nid >= MAX_NUMNODES) {
> + pr_warn("NUMA: Invalid numa node %d found.\n", nid);
> + return NUMA_NO_NODE;
> + }
> +
> + return nid;
> +}
> +
> +static int __init early_init_of_node_to_nid(unsigned long node)
> +{
> + int length;
> + const __be32 *of_numa_prop;
> +
> + of_numa_prop = of_get_flat_dt_prop(node, "numa-node-id", &length);
> +
> + return of_numa_prop_to_nid(of_numa_prop, length);
> +}
> +
> +/*
> + * Even though we connect cpus to numa domains later in SMP
> + * init, we need to know the node ids now for all cpus.
> +*/
> +static int __init early_init_parse_cpu_node(unsigned long node)
> +{
> + int nid;
> + const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
> +
> + if (type == NULL)
> + return 0;
> +
> + if (strcmp(type, "cpu") != 0)
> + return 0;
> +
> + nid = early_init_of_node_to_nid(node);
> + if (nid == NUMA_NO_NODE)
> + return -EINVAL;
> +
> + node_set(nid, numa_nodes_parsed);
> + return 0;
> +}
> +
> +static int __init early_init_parse_memory_node(unsigned long node)
> +{
> + const __be32 *reg, *endp;
> + int length;
> + int nid;
> + const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
> +
> + if (type == NULL)
> + return 0;
> +
> + if (strcmp(type, "memory") != 0)
> + return 0;
> +
> + nid = early_init_of_node_to_nid(node);
> + if (nid == NUMA_NO_NODE)
> + return -EINVAL;
> +
> + reg = of_get_flat_dt_prop(node, "reg", &length);
> + endp = reg + (length / sizeof(__be32));
> +
> + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
> + u64 base, size;
> +
> + base = dt_mem_next_cell(dt_root_addr_cells, &reg);
> + size = dt_mem_next_cell(dt_root_size_cells, &reg);
> + pr_debug("NUMA: base = %llx , node = %u\n",
> + base, nid);
> +

We already have code to parse memory nodes. Can the NUMA part be
combined somehow.

> + if (numa_add_memblk(nid, base, size) < 0)
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int __init early_init_parse_distance_map_v1(unsigned long node,
> + const char *uname)
> +{
> + const __be32 *prop_dist_matrix;
> + int length = 0, i, matrix_count;
> + int nr_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT;
> +
> + pr_info("NUMA: parsing numa-distance-map-v1\n");
> +
> + prop_dist_matrix =
> + of_get_flat_dt_prop(node, "distance-matrix", &length);
> +
> + if (!length) {
> + pr_err("NUMA: failed to parse distance-matrix\n");
> + return -ENODEV;
> + }
> +
> + matrix_count = ((length / sizeof(__be32)) / (3 * nr_size_cells));
> +
> + if ((matrix_count * sizeof(__be32) * 3 * nr_size_cells) != length) {
> + pr_warn("NUMA: invalid distance-matrix length %d\n", length);
> + return -EINVAL;
> + }
> +
> + for (i = 0; i < matrix_count; i++) {
> + u32 nodea, nodeb, distance;
> +
> + nodea = dt_mem_next_cell(nr_size_cells, &prop_dist_matrix);
> + nodeb = dt_mem_next_cell(nr_size_cells, &prop_dist_matrix);
> + distance = dt_mem_next_cell(nr_size_cells, &prop_dist_matrix);
> + numa_set_distance(nodea, nodeb, distance);
> + pr_debug("NUMA: distance[node%d -> node%d] = %d\n",
> + nodea, nodeb, distance);
> +
> + /* Set default distance of node B->A same as A->B */
> + if (nodeb > nodea)
> + numa_set_distance(nodeb, nodea, distance);
> + }
> +
> + return 0;
> +}
> +
> +static int __init early_init_parse_distance_map(unsigned long node,
> + const char *uname)
> +{
> + if (strcmp(uname, "distance-map") != 0)
> + return 0;
> +
> + if (of_flat_dt_is_compatible(node, "numa-distance-map-v1"))
> + return early_init_parse_distance_map_v1(node, uname);
> +
> + pr_err("NUMA: invalid distance-map device node\n");
> + return -EINVAL;
> +}
> +
> +static int __init early_init_of_scan_numa_map(unsigned long node, const char *uname,
> + int depth, void *data)
> +{
> + int ret;
> +
> + ret = early_init_parse_cpu_node(node);
> + if (ret)
> + return ret;
> +
> + ret = early_init_parse_memory_node(node);
> + if (ret)
> + return ret;
> +
> + return early_init_parse_distance_map(node, uname);
> +}
> +
> +int of_node_to_nid(struct device_node *device)
> +{
> + const __be32 *of_numa_prop;
> + int length;
> +
> + of_numa_prop = of_get_property(device, "numa-node-id", &length);
> + if (of_numa_prop)
> + return of_numa_prop_to_nid(of_numa_prop, length);
> +
> + return NUMA_NO_NODE;
> +}
> +
> +/* DT node mapping is done already early_init_of_scan_memory */
> +int __init of_numa_init(void)
> +{
> + return of_scan_flat_dt(early_init_of_scan_numa_map, NULL);
> +}
> diff --git a/include/linux/of.h b/include/linux/of.h
> index dd10626..23e5bad 100644
> --- a/include/linux/of.h
> +++ b/include/linux/of.h
> @@ -685,6 +685,10 @@ static inline int of_node_to_nid(struct device_node *device)
> }
> #endif
>
> +#ifdef CONFIG_OF_NUMA
> +extern int __init of_numa_init(void);
> +#endif

You don't need an ifdef here unless you need an empty function.

Can this be called internally by the DT code when it does the all
the other flat scanning instead of adding another call from arch code to
DT code. Ideally, the arch code would call DT code once and it would do
all the parsing it needs to.

Rob