Re: [PATCH 09/33] ACPI / MPAM: Parse the MPAM table

From: Fenghua Yu

Date: Wed Nov 12 2025 - 21:16:49 EST


Hi, Ben and James,

On 11/7/25 04:34, Ben Horgan wrote:
From: James Morse <james.morse@xxxxxxx>

Add code to parse the arm64 specific MPAM table, looking up the cache
level from the PPTT and feeding the end result into the MPAM driver.

This happens in two stages. Platform devices are created first for the
MSC devices. Once the driver probes it calls acpi_mpam_parse_resources()
to discover the RIS entries the MSC contains.

For now the MPAM hook mpam_ris_create() is stubbed out, but will update
the MPAM driver with optional discovered data about the RIS entries.

CC: Carl Worth <carl@xxxxxxxxxxxxxxxxxxxxxx>
Link: https://developer.arm.com/documentation/den0065/3-0bet/?lang=en
Reviewed-by: Lorenzo Pieralisi <lpieralisi@xxxxxxxxxx>
Tested-by: Fenghua Yu <fenghuay@xxxxxxxxxx>
Tested-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxxxxx>
Tested-by: Peter Newman <peternewman@xxxxxxxxxx>
Signed-off-by: James Morse <james.morse@xxxxxxx>
Signed-off-by: Ben Horgan <ben.horgan@xxxxxxx>
---
Changes since v3:
return irq from acpi_mpam_register_irq (Jonathan)
err -> len rename (Jonathan)
Move table initialisation after checking (Jonathan)
Add sanity checking in acpi_mpam_count_msc() (Jonathan)
---
arch/arm64/Kconfig | 1 +
drivers/acpi/arm64/Kconfig | 3 +
drivers/acpi/arm64/Makefile | 1 +
drivers/acpi/arm64/mpam.c | 403 ++++++++++++++++++++++++++++++++++++
drivers/acpi/tables.c | 2 +-
include/linux/arm_mpam.h | 47 +++++
6 files changed, 456 insertions(+), 1 deletion(-)
create mode 100644 drivers/acpi/arm64/mpam.c
create mode 100644 include/linux/arm_mpam.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 67015d51f7b5..c5e66d5d72cd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2025,6 +2025,7 @@ config ARM64_TLB_RANGE
config ARM64_MPAM
bool "Enable support for MPAM"
+ select ACPI_MPAM if ACPI
help
Memory System Resource Partitioning and Monitoring (MPAM) is an
optional extension to the Arm architecture that allows each
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
index b3ed6212244c..f2fd79f22e7d 100644
--- a/drivers/acpi/arm64/Kconfig
+++ b/drivers/acpi/arm64/Kconfig
@@ -21,3 +21,6 @@ config ACPI_AGDI
config ACPI_APMT
bool
+
+config ACPI_MPAM
+ bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 05ecde9eaabe..9390b57cb564 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI_APMT) += apmt.o
obj-$(CONFIG_ACPI_FFH) += ffh.o
obj-$(CONFIG_ACPI_GTDT) += gtdt.o
obj-$(CONFIG_ACPI_IORT) += iort.o
+obj-$(CONFIG_ACPI_MPAM) += mpam.o
obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o
obj-$(CONFIG_ARM_AMBA) += amba.o
obj-y += dma.o init.o
diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c
new file mode 100644
index 000000000000..c199944862ed
--- /dev/null
+++ b/drivers/acpi/arm64/mpam.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 Arm Ltd.
+
+/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */
+
+#define pr_fmt(fmt) "ACPI MPAM: " fmt
+
+#include <linux/acpi.h>
+#include <linux/arm_mpam.h>
+#include <linux/bits.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/platform_device.h>
+
+#include <acpi/processor.h>
+
+/*
+ * Flags for acpi_table_mpam_msc.*_interrupt_flags.
+ * See 2.1.1 Interrupt Flags, Table 5, of DEN0065B_MPAM_ACPI_3.0-bet.
+ */
+#define ACPI_MPAM_MSC_IRQ_MODE BIT(0)
+#define ACPI_MPAM_MSC_IRQ_TYPE_MASK GENMASK(2, 1)
+#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0
+#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK BIT(3)
+#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR 0
+#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER 1
+#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4)
+
+/*
+ * Encodings for the MSC node body interface type field.
+ * See 2.1 MPAM MSC node, Table 4 of DEN0065B_MPAM_ACPI_3.0-bet.
+ */
+#define ACPI_MPAM_MSC_IFACE_MMIO 0x00
+#define ACPI_MPAM_MSC_IFACE_PCC 0x0a
+
+static bool _is_ppi_partition(u32 flags)
+{
+ u32 aff_type, is_ppi;
+ bool ret;
+
+ is_ppi = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_VALID, flags);
+ if (!is_ppi)
+ return false;
+
+ aff_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK, flags);
+ ret = (aff_type == ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER);
+ if (ret)
+ pr_err_once("Partitioned interrupts not supported\n");
+
+ return ret;
+}
+
+static int acpi_mpam_register_irq(struct platform_device *pdev,
+ int intid, u32 flags)
+{
+ int irq;
+ u32 int_type;
+ int trigger;
+
+ if (!intid)
+ return -EINVAL;
+
+ if (_is_ppi_partition(flags))
+ return -EINVAL;
+
+ trigger = FIELD_GET(ACPI_MPAM_MSC_IRQ_MODE, flags);
+ int_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_TYPE_MASK, flags);
+ if (int_type != ACPI_MPAM_MSC_IRQ_TYPE_WIRED)
+ return -EINVAL;
+
+ irq = acpi_register_gsi(&pdev->dev, intid, trigger, ACPI_ACTIVE_HIGH);
+ if (irq <= 0)
+ pr_err_once("Failed to register interrupt 0x%x with ACPI\n", intid);
+
+ return irq;
+}
+
+static void acpi_mpam_parse_irqs(struct platform_device *pdev,
+ struct acpi_mpam_msc_node *tbl_msc,
+ struct resource *res, int *res_idx)
+{
+ u32 flags, intid;
+ int irq;
+
+ intid = tbl_msc->overflow_interrupt;
+ flags = tbl_msc->overflow_interrupt_flags;
+ irq = acpi_mpam_register_irq(pdev, intid, flags);
+ if (irq > 0)
+ res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "overflow");
+
+ intid = tbl_msc->error_interrupt;
+ flags = tbl_msc->error_interrupt_flags;
+ irq = acpi_mpam_register_irq(pdev, intid, flags);
+ if (irq > 0)
+ res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "error");
+}
+
+static int acpi_mpam_parse_resource(struct mpam_msc *msc,
+ struct acpi_mpam_resource_node *res)
+{
+ int level, nid;
+ u32 cache_id;
+
+ switch (res->locator_type) {
+ case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE:
+ cache_id = res->locator.cache_locator.cache_reference;
+ level = find_acpi_cache_level_from_id(cache_id);
+ if (level <= 0) {
+ pr_err_once("Bad level (%d) for cache with id %u\n", level, cache_id);
+ return -EINVAL;
+ }
+ return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE,
+ level, cache_id);
+ case ACPI_MPAM_LOCATION_TYPE_MEMORY:
+ nid = pxm_to_node(res->locator.memory_locator.proximity_domain);
+ if (nid == NUMA_NO_NODE) {
+ pr_debug("Bad proxmity domain %lld, using node 0 instead\n",

Typo.
s/proxmity/proximity/

+ res->locator.memory_locator.proximity_domain);
+ nid = 0;
+ }
+ return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY,
+ 255, nid);
+ default:
+ /* These get discovered later and are treated as unknown */
+ return 0;
+ }
+}
+
+int acpi_mpam_parse_resources(struct mpam_msc *msc,
+ struct acpi_mpam_msc_node *tbl_msc)
+{
+ int i, err;
+ char *ptr, *table_end;
+ struct acpi_mpam_resource_node *resource;
+
+ ptr = (char *)(tbl_msc + 1);
+ table_end = ptr + tbl_msc->length;

tbl_msc->length equals size of the ENTIRE msc node. ptr points to the end of tbl_msc. ptr + tbl_msc->length is past the end of the msc node. This will access data outside of this MSC node.

Better to change to:
+ table_end = (char *)tbl_msc + tbl_msc->length;

+ for (i = 0; i < tbl_msc->num_resource_nodes; i++) {
+ u64 max_deps, remaining_table;
+
+ if (ptr + sizeof(*resource) > table_end)
+ return -EINVAL;
+
+ resource = (struct acpi_mpam_resource_node *)ptr;
+
+ remaining_table = table_end - ptr;
+ max_deps = remaining_table / sizeof(struct acpi_mpam_func_deps);
+ if (resource->num_functional_deps > max_deps) {
+ pr_debug("MSC has impossible number of functional dependencies\n");
+ return -EINVAL;
+ }
+
+ err = acpi_mpam_parse_resource(msc, resource);
+ if (err)
+ return err;
+
+ ptr += sizeof(*resource);
+ ptr += resource->num_functional_deps * sizeof(struct acpi_mpam_func_deps);
+ }
+
+ return 0;
+}
+
+/*
+ * Creates the device power management link and returns true if the
+ * acpi id is valid and usable for cpu affinity. This is the case
+ * when the linked device is a processor or a processor container.
+ */
+static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc,
+ struct platform_device *pdev,
+ u32 *acpi_id)
+{
+ char hid[sizeof(tbl_msc->hardware_id_linked_device) + 1] = { 0 };
+ bool acpi_id_valid = false;
+ struct acpi_device *buddy;
+ char uid[11];
+ int len;
+
+ memcpy(hid, &tbl_msc->hardware_id_linked_device,
+ sizeof(tbl_msc->hardware_id_linked_device));
+
+ if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) {
+ *acpi_id = tbl_msc->instance_id_linked_device;
+ acpi_id_valid = true;
+ }
+
+ len = snprintf(uid, sizeof(uid), "%u",
+ tbl_msc->instance_id_linked_device);
+ if (len >= sizeof(uid)) {
+ pr_debug("Failed to convert uid of device for power management.");
+ return acpi_id_valid;
+ }
+
+ buddy = acpi_dev_get_first_match_dev(hid, uid, -1);
+ if (buddy)
+ device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS);

Refcount leak here?

Refcount of the device object pointed by buddy is not released and refcount leaks.

Better to change to:
+ if (buddy) {
+ device_link_add(...);
+ acpi_dev_put(buddy); <====== release refcount here
+ }

or free refcount automatically:
+DEFINE_FREE(acpi_dev_put, struct acpi_device *, if (_T) acpi_dev_put(_T))
...
+ struct acpi_device *buddy __free(acpi_dev_put);
...

+
+ return acpi_id_valid;
+}
+
+static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc,
+ enum mpam_msc_iface *iface)
+{
+ switch (tbl_msc->interface_type) {
+ case ACPI_MPAM_MSC_IFACE_MMIO:
+ *iface = MPAM_IFACE_MMIO;
+ return 0;
+ case ACPI_MPAM_MSC_IFACE_PCC:
+ *iface = MPAM_IFACE_PCC;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct platform_device * __init acpi_mpam_parse_msc(struct acpi_mpam_msc_node *tbl_msc)
+{
+ struct platform_device *pdev __free(platform_device_put) =
+ platform_device_alloc("mpam_msc", tbl_msc->identifier);
+ int next_res = 0, next_prop = 0, err;
+ /* pcc, nrdy, affinity and a sentinel */
+ struct property_entry props[4] = { 0 };
+ /* mmio, 2xirq, no sentinel. */
+ struct resource res[3] = { 0 };
+ struct acpi_device *companion;
+ enum mpam_msc_iface iface;
+ char uid[16];
+ u32 acpi_id;
+
+ if (!pdev)
+ return ERR_PTR(-ENOMEM);
+
+ /* Some power management is described in the namespace: */
+ err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier);
+ if (err > 0 && err < sizeof(uid)) {
+ companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1);
+ if (companion)
+ ACPI_COMPANION_SET(&pdev->dev, companion);

Ditto. companion's refcount leak here as well?

+ else
+ pr_debug("MSC.%u: missing namespace entry\n",
+ tbl_msc->identifier);
+ }
+
+ if (decode_interface_type(tbl_msc, &iface)) {
+ pr_debug("MSC.%u: unknown interface type\n", tbl_msc->identifier);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (iface == MPAM_IFACE_MMIO)
+ res[next_res++] = DEFINE_RES_MEM_NAMED(tbl_msc->base_address,
+ tbl_msc->mmio_size,
+ "MPAM:MSC");
+ else if (iface == MPAM_IFACE_PCC)
+ props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel",
+ tbl_msc->base_address);
+
+ acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res);
+
+ WARN_ON_ONCE(next_res > ARRAY_SIZE(res));

Not sure if this WARN_ON_ONCE() is really helpful.

Even before this WARN happens, previously res[next_res] accesseing outside of res[] may hit panic or data corruption already.

Maybe it's better to add a helper to access res[] and report error when accessing out of res[] scope. A few places can call the helper to access res[]:

+static int add_resource(struct resource *res, int *idx, int max,
+ struct resource new_res)
+{
+ if (*idx >= max) {
+ pr_err("Too many resources (max %d)\n", max);
+ return -ENOSPC;
+ }
+ res[(*idx)++] = new_res;
+ return 0;
+}

Then can call the helper to replace res[next_res++]:
+ if (iface == MPAM_IFACE_MMIO) {
+ err = add_resource(res, &next_res, ARRAY_SIZE(res),
+ DEFINE_RES_MEM_NAMED(tbl_msc->base_address,
+ tbl_msc, + mmio_size,
+ "MPAM:MSC"));
+ if (err)
+ return ERR_PTR(-ENOSPC);
+ }

+ err = platform_device_add_resources(pdev, res, next_res);
+ if (err)
+ return ERR_PTR(err);
+
+ props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us",
+ tbl_msc->max_nrdy_usec);
+
+ /*
+ * The MSC's CPU affinity is described via its linked power
+ * management device, but only if it points at a Processor or
+ * Processor Container.
+ */
+ if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id))
+ props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", acpi_id);
+
+ WARN_ON_ONCE(next_prop > ARRAY_SIZE(props));

Ditto for this WARN here?

[SNIP]

Thanks.

-Fenghua