[PATCH v2 6/7] platform/x86/amd/hsmp: ACPI HSMP refcounted sockets and coordinated release

From: Muralidhara M K

Date: Thu Jun 25 2026 - 08:36:08 EST


Replace the global is_probed flag with miscdevice.this_device for misc
registration state, count ACPI socket platform devices with struct kref,
and run hsmp_acpi_sock_release() on the final put to deregister /dev/hsmp
when needed, unmap metric DRAM, and free the socket array. Extend struct
hsmp_plat_device with acpi_sock_kref and acpi_sock_kref_started so ACPI
teardown can coordinate with the refcount, and clear mdev.this_device in
hsmp_misc_deregister() so a later re-probe does not skip registration on
a stale pointer.

Switch ACPI socket storage to kcalloc(), initialize the per-socket metric
mutexes once the array exists, and free the allocation on early probe
failures before any kref reference is handed out.

Both teardown paths run under the data-plane rwsem via
hsmp_sock_teardown_lock(), so they are serialized against the lock-free
data plane. hsmp_acpi_remove() clears this socket's dev under the write
lock before devres unmaps the mailbox, so a message issued after a
non-final unbind cannot reach an unmapped mailbox on multi-socket systems.

The probe-failure path clears the socket's dev under the same write lock
too. On a multi-socket system a non-first socket can fail inside
init_acpi() after hsmp_parse_acpi_table() has published sock->dev and
mapped the mailbox; remove() is not called for a failed probe and the
array stays alive (owned by an already-probed socket), so without this
clear devres would unmap the mailbox while a /dev/hsmp ioctl to that
index still reaches it.

Signed-off-by: Muralidhara M K <muralidhara.mk@xxxxxxx>
---
drivers/platform/x86/amd/hsmp/acpi.c | 124 +++++++++++++++++++++++----
drivers/platform/x86/amd/hsmp/hsmp.c | 6 ++
drivers/platform/x86/amd/hsmp/hsmp.h | 4 +-
3 files changed, 118 insertions(+), 16 deletions(-)

diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c
index bf5601229c6c..475f0076d262 100644
--- a/drivers/platform/x86/amd/hsmp/acpi.c
+++ b/drivers/platform/x86/amd/hsmp/acpi.c
@@ -610,6 +610,72 @@ static const struct acpi_device_id amd_hsmp_acpi_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, amd_hsmp_acpi_ids);

+static void hsmp_acpi_sock_release(struct kref *kref)
+{
+ struct hsmp_plat_device *pdev = container_of(kref, struct hsmp_plat_device,
+ acpi_sock_kref);
+
+ /*
+ * The caller (hsmp_acpi_remove()) drops the last reference while
+ * holding hsmp_acpi_probe_mutex, so the get/put and the teardown done
+ * here are fully serialized against a concurrent probe. It also holds
+ * the write side of the data-plane rwsem (hsmp_sock_teardown_lock()),
+ * which has drained any in-flight hsmp_send_message() and keeps new
+ * ones out, so unmapping the mailbox and freeing the socket array here
+ * cannot race the lock-free data plane.
+ */
+ lockdep_assert_held(&hsmp_acpi_probe_mutex);
+
+ if (!IS_ERR_OR_NULL(pdev->mdev.this_device))
+ hsmp_misc_deregister();
+ hsmp_destroy_metric_read_locks(pdev, pdev->num_sockets);
+ kfree(pdev->sock);
+ pdev->sock = NULL;
+ pdev->num_sockets = 0;
+ pdev->proto_ver = 0;
+ pdev->acpi_sock_kref_started = false;
+}
+
+/**
+ * hsmp_acpi_probe_failure_cleanup() - Undo a failed ACPI socket probe.
+ * @dev: ACPI companion device whose probe failed.
+ *
+ * Runs the whole cleanup under the teardown rwsem so it is serialized against
+ * the lock-free data plane (init_acpi() runs hsmp_test() and a previously
+ * probed socket may already have exposed /dev/hsmp).
+ *
+ * Always clears this socket's dev: on a probe failure for a socket other than
+ * the first, the socket array stays alive (owned by an already-probed socket)
+ * and remove() is never called for this device, yet devres unmaps its mailbox
+ * once probe() returns. Without clearing dev, a later message to this index
+ * would pass every gate in hsmp_send_message() and reach the unmapped mailbox.
+ *
+ * When no ACPI socket reference has been handed out via kref yet (the first
+ * socket's failure), it also frees the array and destroys the per-socket
+ * mutexes; hsmp_destroy_metric_read_locks() additionally unmaps any metric
+ * table DRAM that init_acpi() may have ioremap()ed, so there is no leak.
+ */
+static void hsmp_acpi_probe_failure_cleanup(struct device *dev)
+{
+ struct hsmp_socket *sock = dev_get_drvdata(dev);
+
+ lockdep_assert_held(&hsmp_acpi_probe_mutex);
+
+ hsmp_sock_teardown_lock();
+
+ if (sock)
+ sock->dev = NULL;
+
+ if (!hsmp_pdev->acpi_sock_kref_started && hsmp_pdev->sock) {
+ hsmp_destroy_metric_read_locks(hsmp_pdev, hsmp_pdev->num_sockets);
+ kfree(hsmp_pdev->sock);
+ hsmp_pdev->sock = NULL;
+ hsmp_pdev->num_sockets = 0;
+ }
+
+ hsmp_sock_teardown_unlock();
+}
+
static int hsmp_acpi_probe(struct platform_device *pdev)
{
int ret;
@@ -620,34 +686,44 @@ static int hsmp_acpi_probe(struct platform_device *pdev)

guard(mutex)(&hsmp_acpi_probe_mutex);

- if (!hsmp_pdev->is_probed) {
+ if (!hsmp_pdev->sock) {
hsmp_pdev->num_sockets = topology_max_packages();
if (!hsmp_pdev->num_sockets) {
dev_err(&pdev->dev, "No CPU sockets detected\n");
return -ENODEV;
}

- hsmp_pdev->sock = devm_kcalloc(&pdev->dev, hsmp_pdev->num_sockets,
- sizeof(*hsmp_pdev->sock),
- GFP_KERNEL);
+ hsmp_pdev->sock = kcalloc(hsmp_pdev->num_sockets,
+ sizeof(*hsmp_pdev->sock),
+ GFP_KERNEL);
if (!hsmp_pdev->sock)
return -ENOMEM;
+
+ hsmp_init_metric_read_locks(hsmp_pdev, hsmp_pdev->num_sockets);
}

ret = init_acpi(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Failed to initialize HSMP interface.\n");
+ hsmp_acpi_probe_failure_cleanup(&pdev->dev);
return ret;
}

- if (!hsmp_pdev->is_probed) {
+ if (IS_ERR_OR_NULL(hsmp_pdev->mdev.this_device)) {
ret = hsmp_misc_register(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Failed to register misc device\n");
+ hsmp_acpi_probe_failure_cleanup(&pdev->dev);
return ret;
}
- hsmp_pdev->is_probed = true;
- dev_dbg(&pdev->dev, "AMD HSMP ACPI is probed successfully\n");
+ dev_dbg(&pdev->dev, "AMD HSMP ACPI misc device registered\n");
+ }
+
+ if (!hsmp_pdev->acpi_sock_kref_started) {
+ kref_init(&hsmp_pdev->acpi_sock_kref);
+ hsmp_pdev->acpi_sock_kref_started = true;
+ } else {
+ kref_get(&hsmp_pdev->acpi_sock_kref);
}

return 0;
@@ -655,16 +731,34 @@ static int hsmp_acpi_probe(struct platform_device *pdev)

static void hsmp_acpi_remove(struct platform_device *pdev)
{
- mutex_lock(&hsmp_acpi_probe_mutex);
+ struct hsmp_socket *sock = dev_get_drvdata(&pdev->dev);
+
/*
- * We register only one misc_device even on multi-socket system.
- * So, deregister should happen only once.
+ * Serialize the final put (and the teardown it triggers) against a
+ * concurrent probe so the refcount cannot be revived from zero.
*/
- if (hsmp_pdev->is_probed) {
- hsmp_misc_deregister();
- hsmp_pdev->is_probed = false;
- }
- mutex_unlock(&hsmp_acpi_probe_mutex);
+ guard(mutex)(&hsmp_acpi_probe_mutex);
+
+ /*
+ * Drain the lock-free data plane and keep it out for the duration of
+ * the teardown. This covers both the per-socket unbind (this socket's
+ * mailbox is unmapped by devres once we return) and the final put that
+ * frees the socket array in hsmp_acpi_sock_release().
+ */
+ hsmp_sock_teardown_lock();
+
+ /*
+ * Clear this socket's dev so hsmp_send_message() rejects it before
+ * touching the mailbox that devres is about to unmap. On a non-final
+ * unbind the socket array stays alive, so without this a later message
+ * to this index would reach an unmapped iomem region.
+ */
+ if (sock)
+ sock->dev = NULL;
+
+ kref_put(&hsmp_pdev->acpi_sock_kref, hsmp_acpi_sock_release);
+
+ hsmp_sock_teardown_unlock();
}

static struct platform_driver amd_hsmp_driver = {
diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c
index c15acba241c4..2e836124f486 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp/hsmp.c
@@ -546,6 +546,12 @@ EXPORT_SYMBOL_NS_GPL(hsmp_misc_register, "AMD_HSMP");
void hsmp_misc_deregister(void)
{
misc_deregister(&hsmp_pdev.mdev);
+ /*
+ * misc_deregister() leaves mdev.this_device pointing at the now
+ * destroyed device. Clear it so a subsequent re-probe does not skip
+ * registration on a stale pointer.
+ */
+ hsmp_pdev.mdev.this_device = NULL;
}
EXPORT_SYMBOL_NS_GPL(hsmp_misc_deregister, "AMD_HSMP");

diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h
index 5d0a6d819865..118922785d18 100644
--- a/drivers/platform/x86/amd/hsmp/hsmp.h
+++ b/drivers/platform/x86/amd/hsmp/hsmp.h
@@ -14,6 +14,7 @@
#include <linux/device.h>
#include <linux/hwmon.h>
#include <linux/kconfig.h>
+#include <linux/kref.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/pci.h>
@@ -57,7 +58,8 @@ struct hsmp_plat_device {
struct hsmp_socket *sock;
u32 proto_ver;
u16 num_sockets;
- bool is_probed;
+ struct kref acpi_sock_kref;
+ bool acpi_sock_kref_started;
};

int hsmp_cache_proto_ver(u16 sock_ind);
--
2.43.0