Re: [PATCH v6 12/15] platform/x86/intel/pmc/ssram: Switch to static array with per-index probe state
From: Ilpo Järvinen
Date: Wed Jun 10 2026 - 08:49:30 EST
On Sun, 31 May 2026, David E. Box wrote:
> From: Xi Pardee <xi.pardee@xxxxxxxxxxxxxxx>
>
> Replace devm-allocated pmc_ssram_telems pointer with a fixed-size static
> array and introduce per-index probe state tracking.
>
> This prepares the driver for later per-device probe handling where tying
> the PMC tracking storage to one probed PCI device is no longer suitable.
>
> The previous single global device_probed flag cannot describe the state of
> individual PMC indices when multiple devices can be probed independently.
> Replace it with per-index state (UNPROBED, PROBING, PRESENT, ABSENT) and a
> staging cache that publishes discovered values only after probe completes.
> This avoids races between probe/unbind and concurrent readers.
>
> Use READ_ONCE/WRITE_ONCE for pmc_ssram_state[] accesses to prevent compiler
> optimizations from refetching or tearing the state value across concurrent
> probe/unbind cycles.
>
> Signed-off-by: Xi Pardee <xi.pardee@xxxxxxxxxxxxxxx>
> Signed-off-by: David E. Box <david.e.box@xxxxxxxxxxxxxxx>
> Assisted-by: Claude:claude-sonnet-4-5
> ---
> V6 changes:
> - Squashed patch combining v5 patches 10 ("Use fixed-size static pmc
> array") and 13 ("Refactor memory barrier for reentrant probe"). Both
> patches addressed per-index probe state tracking and reentrant probe
> protection, so they were combined for better logical cohesion.
> - Added per-index probe state enum (UNPROBED, PROBING, PRESENT, ABSENT)
> to replace devid overload where devid was used as both payload and
> probe state indicator. This fixes stale data issues on reprobe,
> distinguishes between -EAGAIN (probe in progress) and -ENODEV (probe
> failed) error semantics, and prevents stale values from being visible
> after failed reprobe (Ilpo/Sashiko/Claude).
> - Added staging cache that publishes devid and base_addr only after probe
> completes successfully to avoid races between probe/unbind and
> concurrent readers.
> - Added .remove callback to handle proper state cleanup on driver unbind.
> - Used READ_ONCE/WRITE_ONCE for pmc_ssram_state[] accesses to prevent
> compiler optimizations from causing issues across concurrent probe/
> unbind cycles.
>
> V5 - No changes (for both original patches)
>
> V4 - No changes (for both original patches)
>
> V3 - No changes (for both original patches)
>
> V2 changes (from original patch 10 "Use fixed-size static pmc array"):
> - Replaced hardcoded array size [3] with MAX_NUM_PMC constant
>
> V2 changes (from original patch 13 "Refactor memory barrier"):
> - Expanded commit message to explain synchronization rationale
> - Remove unused probe_finish label associated with the old global flag
>
> .../platform/x86/intel/pmc/ssram_telemetry.c | 198 ++++++++++++++----
> 1 file changed, 159 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c
> index e7ddd1788132..ad961ee469b2 100644
> --- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c
> +++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c
> @@ -5,6 +5,7 @@
> * Copyright (c) 2023, Intel Corporation.
> */
>
> +#include <linux/bitmap.h>
> #include <linux/bits.h>
> #include <linux/cleanup.h>
> #include <linux/device.h>
> @@ -24,6 +25,7 @@
> #define SSRAM_IOE_OFFSET 0x68
> #define SSRAM_DEVID_OFFSET 0x70
> #define SSRAM_BASE_ADDR_MASK GENMASK_ULL(63, 3)
> +#define SSRAM_PCI_PMC_MASK (BIT(PMC_IDX_MAIN) | BIT(PMC_IDX_IOE) | BIT(PMC_IDX_PCH))
>
> DEFINE_FREE(pmc_ssram_telemetry_iounmap, void __iomem *, if (_T) iounmap(_T))
>
> @@ -39,15 +41,33 @@ static const struct ssram_type pci_main = {
> .method = RES_METHOD_PCI,
> };
>
> -static struct pmc_ssram_telemetry *pmc_ssram_telems;
> -static bool device_probed;
> +enum pmc_ssram_state {
> + PMC_SSRAM_UNPROBED,
If I understand things correctly, this should be made = 0 explicitly as
it refers to the default value from initialization.
> + PMC_SSRAM_PROBING,
> + PMC_SSRAM_PRESENT,
> + PMC_SSRAM_ABSENT,
> +};
> +
> +static enum pmc_ssram_state pmc_ssram_state[MAX_NUM_PMC];
> +static struct pmc_ssram_telemetry pmc_ssram_telems[MAX_NUM_PMC];
> +
> +struct pmc_ssram_probe_cache {
> + struct pmc_ssram_telemetry telems[MAX_NUM_PMC];
> + unsigned long owned_mask;
> + unsigned long valid_mask;
> +};
> +
> +struct pmc_ssram_drvdata {
> + unsigned long owned_mask;
> +};
>
> static inline u64 get_base(void __iomem *addr, u32 offset)
> {
> return lo_hi_readq(addr + offset) & SSRAM_BASE_ADDR_MASK;
> }
>
> -static void pmc_ssram_get_devid_pwrmbase(void __iomem *ssram, unsigned int pmc_idx)
> +static void pmc_ssram_get_devid_pwrmbase(struct pmc_ssram_probe_cache *probe_cache,
> + void __iomem *ssram, unsigned int pmc_idx)
> {
> u64 pwrm_base;
> u16 devid;
> @@ -55,8 +75,46 @@ static void pmc_ssram_get_devid_pwrmbase(void __iomem *ssram, unsigned int pmc_i
> pwrm_base = get_base(ssram, SSRAM_PWRM_OFFSET);
> devid = readw(ssram + SSRAM_DEVID_OFFSET);
>
> - pmc_ssram_telems[pmc_idx].devid = devid;
> - pmc_ssram_telems[pmc_idx].base_addr = pwrm_base;
> + probe_cache->telems[pmc_idx].base_addr = pwrm_base;
> + probe_cache->telems[pmc_idx].devid = devid;
> +}
> +
> +static void pmc_ssram_publish_absent(unsigned int pmc_idx)
> +{
> + /*
> + * Publish only the state without modifying telemetry data. This avoids
> + * a TOCTOU race where a reader that sampled state==PRESENT before unbind
> + * could read modified data after its smp_rmb(). Readers check state first
> + * and return -ENODEV for ABSENT without accessing data.
> + */
> + WRITE_ONCE(pmc_ssram_state[pmc_idx], PMC_SSRAM_ABSENT);
> +}
> +
> +static void pmc_ssram_publish_present(struct pmc_ssram_probe_cache *probe_cache,
> + unsigned int pmc_idx)
> +{
> + /*
> + * The devid and base_addr fields are read from immutable hardware MMIO
> + * registers and do not change across unbind/rebind cycles. A reader
> + * observing PRESENT from an earlier probe can safely read fields being
> + * updated by a concurrent rebind because both probes read identical
> + * values from the same hardware.
> + */
> + pmc_ssram_telems[pmc_idx] = probe_cache->telems[pmc_idx];
> + /*
> + * Barrier ensures telemetry data write completes before PRESENT state
> + * becomes visible. Pairs with smp_rmb() in reader.
> + */
> + smp_wmb();
> + WRITE_ONCE(pmc_ssram_state[pmc_idx], PMC_SSRAM_PRESENT);
> +}
> +
> +static void pmc_ssram_mark_probing(unsigned long mask)
> +{
> + unsigned long bit;
> +
> + for_each_set_bit(bit, &mask, MAX_NUM_PMC)
> + WRITE_ONCE(pmc_ssram_state[bit], PMC_SSRAM_PROBING);
> }
>
> static int
> @@ -96,11 +154,14 @@ pmc_ssram_telemetry_add_pmt(struct pci_dev *pcidev, u64 ssram_base, void __iomem
> }
>
> static int
> -pmc_ssram_telemetry_get_pmc_pci(struct pci_dev *pcidev, unsigned int pmc_idx, u32 offset)
> +pmc_ssram_telemetry_get_pmc_pci(struct pci_dev *pcidev,
> + struct pmc_ssram_probe_cache *probe_cache,
> + unsigned int pmc_idx, u32 offset)
> {
> void __iomem __free(pmc_ssram_telemetry_iounmap) *tmp_ssram = NULL;
> void __iomem __free(pmc_ssram_telemetry_iounmap) *ssram = NULL;
> u64 ssram_base;
> + int ret;
>
> ssram_base = pci_resource_start(pcidev, 0);
> tmp_ssram = ioremap(ssram_base, SSRAM_HDR_SIZE);
> @@ -125,22 +186,38 @@ pmc_ssram_telemetry_get_pmc_pci(struct pci_dev *pcidev, unsigned int pmc_idx, u3
> ssram = no_free_ptr(tmp_ssram);
> }
>
> - pmc_ssram_get_devid_pwrmbase(ssram, pmc_idx);
> + pmc_ssram_get_devid_pwrmbase(probe_cache, ssram, pmc_idx);
>
> /* Find and register and PMC telemetry entries */
> - return pmc_ssram_telemetry_add_pmt(pcidev, ssram_base, ssram);
> + ret = pmc_ssram_telemetry_add_pmt(pcidev, ssram_base, ssram);
> + if (ret)
> + return ret;
> +
> + probe_cache->valid_mask |= BIT(pmc_idx);
> +
> + return 0;
> }
>
> -static int pmc_ssram_telemetry_pci_init(struct pci_dev *pcidev)
> +static int pmc_ssram_telemetry_pci_init(struct pci_dev *pcidev,
> + struct pmc_ssram_probe_cache *probe_cache)
> {
> int ret;
>
> - ret = pmc_ssram_telemetry_get_pmc_pci(pcidev, PMC_IDX_MAIN, 0);
> + ret = pmc_ssram_telemetry_get_pmc_pci(pcidev, probe_cache, PMC_IDX_MAIN, 0);
> if (ret)
> return ret;
>
> - pmc_ssram_telemetry_get_pmc_pci(pcidev, PMC_IDX_IOE, SSRAM_IOE_OFFSET);
> - pmc_ssram_telemetry_get_pmc_pci(pcidev, PMC_IDX_PCH, SSRAM_PCH_OFFSET);
> + /*
> + * If MAIN PMC enumeration is successful but either IOE or PCH fail,
> + * don't fail probe as the MAIN PMC is still useful as it provides the
> + * global reset and slp_s0 counter access. Failed or missing secondary
> + * PMCs are left out of valid_mask and published as absent.
> + */
> + pmc_ssram_telemetry_get_pmc_pci(pcidev, probe_cache, PMC_IDX_IOE,
> + SSRAM_IOE_OFFSET);
> +
> + pmc_ssram_telemetry_get_pmc_pci(pcidev, probe_cache, PMC_IDX_PCH,
> + SSRAM_PCH_OFFSET);
>
> return ret;
> }
> @@ -159,53 +236,86 @@ static int pmc_ssram_telemetry_pci_init(struct pci_dev *pcidev)
> int pmc_ssram_telemetry_get_pmc_info(unsigned int pmc_idx,
> struct pmc_ssram_telemetry *pmc_ssram_telemetry)
> {
> + enum pmc_ssram_state state;
> +
> + if (pmc_idx >= MAX_NUM_PMC)
> + return -EINVAL;
> +
> /*
> * PMCs are discovered in probe function. If this function is called before
> - * probe function complete, the result would be invalid. Use device_probed
> - * variable to avoid this case. Return -EAGAIN to inform the consumer to call
> - * again later.
> + * probe function complete, the result would be invalid. Use per-PMC state
> + * to inform the consumer to call again later.
> */
> - if (!device_probed)
> + state = READ_ONCE(pmc_ssram_state[pmc_idx]);
> + if (state == PMC_SSRAM_UNPROBED || state == PMC_SSRAM_PROBING)
> return -EAGAIN;
>
> + if (state == PMC_SSRAM_ABSENT)
> + return -ENODEV;
> +
> /*
> * Memory barrier is used to ensure the correct read order between
> - * device_probed variable and PMC info.
> + * pmc_ssram_state and PMC info.
> */
> smp_rmb();
> - if (pmc_idx >= MAX_NUM_PMC)
> - return -EINVAL;
> -
> - if (!pmc_ssram_telems || !pmc_ssram_telems[pmc_idx].devid)
> - return -ENODEV;
> -
> pmc_ssram_telemetry->devid = pmc_ssram_telems[pmc_idx].devid;
> pmc_ssram_telemetry->base_addr = pmc_ssram_telems[pmc_idx].base_addr;
> return 0;
> }
> EXPORT_SYMBOL_GPL(pmc_ssram_telemetry_get_pmc_info);
>
> +static void pmc_ssram_publish_absent_mask(unsigned long mask)
> +{
> + unsigned long bit;
> +
> + for_each_set_bit(bit, &mask, MAX_NUM_PMC)
> + pmc_ssram_publish_absent(bit);
> +}
> +
> +static void pmc_ssram_publish_telems(struct pmc_ssram_probe_cache *probe_cache, int ret)
> +{
> + unsigned long bit;
> +
> + if (ret) {
> + pmc_ssram_publish_absent_mask(probe_cache->owned_mask);
> + return;
> + }
> +
> + for_each_set_bit(bit, &probe_cache->owned_mask, MAX_NUM_PMC) {
> + if (probe_cache->valid_mask & BIT(bit))
> + pmc_ssram_publish_present(probe_cache, bit);
> + else
> + pmc_ssram_publish_absent(bit);
> + }
> +}
> +
> static int pmc_ssram_telemetry_probe(struct pci_dev *pcidev, const struct pci_device_id *id)
> {
> + struct pmc_ssram_probe_cache probe_cache = {};
> + struct pmc_ssram_drvdata *drvdata;
> const struct ssram_type *ssram_type;
> enum resource_method method;
> int ret;
>
> - pmc_ssram_telems = devm_kzalloc(&pcidev->dev, sizeof(*pmc_ssram_telems) * MAX_NUM_PMC,
> - GFP_KERNEL);
> - if (!pmc_ssram_telems) {
> - ret = -ENOMEM;
> - goto probe_finish;
> - }
> -
> ssram_type = (const struct ssram_type *)id->driver_data;
> if (!ssram_type) {
> dev_dbg(&pcidev->dev, "missing driver data\n");
> - ret = -EINVAL;
> - goto probe_finish;
> + return -EINVAL;
> }
>
> method = ssram_type->method;
> + if (method == RES_METHOD_PCI)
> + probe_cache.owned_mask = SSRAM_PCI_PMC_MASK;
> + else
> + return -EINVAL;
> +
> + pmc_ssram_mark_probing(probe_cache.owned_mask);
> +
> + drvdata = devm_kzalloc(&pcidev->dev, sizeof(*drvdata), GFP_KERNEL);
> + if (!drvdata) {
> + ret = -ENOMEM;
> + goto probe_finish;
> + }
>
> ret = pcim_enable_device(pcidev);
> if (ret) {
> @@ -214,20 +324,29 @@ static int pmc_ssram_telemetry_probe(struct pci_dev *pcidev, const struct pci_de
> }
>
> if (method == RES_METHOD_PCI)
> - ret = pmc_ssram_telemetry_pci_init(pcidev);
> + ret = pmc_ssram_telemetry_pci_init(pcidev, &probe_cache);
> else
> ret = -EINVAL;
>
> + if (!ret) {
> + drvdata->owned_mask = probe_cache.owned_mask;
> + pci_set_drvdata(pcidev, drvdata);
> + }
> +
> probe_finish:
> - /*
> - * Memory barrier is used to ensure the correct write order between PMC info
> - * and device_probed variable.
> - */
> - smp_wmb();
> - device_probed = true;
> + pmc_ssram_publish_telems(&probe_cache, ret);
> +
> return ret;
> }
>
> +static void pmc_ssram_telemetry_remove(struct pci_dev *pcidev)
> +{
> + struct pmc_ssram_drvdata *drvdata = pci_get_drvdata(pcidev);
> +
> + if (drvdata)
> + pmc_ssram_publish_absent_mask(drvdata->owned_mask);
> +}
> +
> static const struct pci_device_id pmc_ssram_telemetry_pci_ids[] = {
> { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_MTL_SOCM),
> .driver_data = (kernel_ulong_t)&pci_main },
> @@ -251,6 +370,7 @@ static struct pci_driver pmc_ssram_telemetry_driver = {
> .name = "intel_pmc_ssram_telemetry",
> .id_table = pmc_ssram_telemetry_pci_ids,
> .probe = pmc_ssram_telemetry_probe,
> + .remove = pmc_ssram_telemetry_remove,
> };
> module_pci_driver(pmc_ssram_telemetry_driver);
>
>
--
i.