Re: [PATCH] iommu/arm-smmu: Use pm_runtime in fault handlers

From: Robin Murphy

Date: Tue Jan 27 2026 - 11:07:01 EST


On 2026-01-27 12:11 pm, Prakash Gupta wrote:
Commit d4a44f0750bb ("iommu/arm-smmu: Invoke pm_runtime across the driver")
enabled pm_runtime for the arm-smmu device. On systems where the SMMU
sits in a power domain, all register accesses must be done while the
device is runtime-resumed to avoid unclocked register reads and
potential NoC errors.

So far, this has not been an issue for most SMMU clients because
stall-on-fault is enabled by default. While a translation fault is
being handled, the SMMU stalls further translations for that context
bank, so the fault handler would not race with a powered-down
SMMU.

Adreno SMMU now disables stall-on-fault in the presence of fault
storms to avoid saturating SMMU resources and hanging the GMU. With
stall-on-fault disabled, the SMMU can generate faults while its power
domain may no longer be enabled, which makes unclocked accesses to
fault-status registers in the SMMU fault handlers possible.

At face value, that sounds wrong - how does an SMMU generate a fault, or indeed do anything, when it's powered off? In principle it's possible that the SMMU might signal an interrupt, and is _then_ suspended (with the interrupt line somehow remaining asserted, so probably more clock-gated than completely powered down) before the interrupt hander runs, but we rather assume that we're not going to have an unhandled hardware IRQ hanging around for longer than the autosuspend delay.

So, judging by the diff below, I guess what you really mean is that in the case of a threaded context IRQ handler, it can take long enough between handling the hardware IRQ and the thread actually running that the SMMU may have suspended in between?

Guard the context and global fault handlers with arm_smmu_rpm_get() /
arm_smmu_rpm_put() so that all SMMU fault register accesses are done
with the SMMU powered.

Fixes: b13044092c1e ("drm/msm: Temporarily disable stall-on-fault after a page fault")
Co-developed-by: Pratyush Brahma <pratyush.brahma@xxxxxxxxxxxxxxxx>
Signed-off-by: Pratyush Brahma <pratyush.brahma@xxxxxxxxxxxxxxxx>
Signed-off-by: Prakash Gupta <prakash.gupta@xxxxxxxxxxxxxxxx>
---
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 5 ++-
drivers/iommu/arm/arm-smmu/arm-smmu.c | 53 ++++++++++++++++++++++--------
2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 573085349df3..2d03df72612d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -317,6 +317,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
const struct of_device_id *client_match;
+ const struct arm_smmu_impl *impl = qsmmu->data->impl;
int cbndx = smmu_domain->cfg.cbndx;
struct adreno_smmu_priv *priv;
@@ -350,10 +351,12 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
priv->get_ttbr1_cfg = qcom_adreno_smmu_get_ttbr1_cfg;
priv->set_ttbr0_cfg = qcom_adreno_smmu_set_ttbr0_cfg;
priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
- priv->set_stall = qcom_adreno_smmu_set_stall;
priv->set_prr_bit = NULL;
priv->set_prr_addr = NULL;
+ if (impl->context_fault_needs_threaded_irq)
+ priv->set_stall = qcom_adreno_smmu_set_stall;
+
if (of_device_is_compatible(np, "qcom,smmu-500") &&
!of_device_is_compatible(np, "qcom,sm8250-smmu-500") &&
of_device_is_compatible(np, "qcom,adreno-smmu")) {
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 5e690cf85ec9..183f12e45b02 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -462,10 +462,23 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
int idx = smmu_domain->cfg.cbndx;
int ret;
+ if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq) {

Why is this conditional on being threaded, if the global fault handler that can never be threaded at all apparently needs it unconditionally?

Thanks,
Robin.

+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return IRQ_NONE;
+ }
+
+ if (smmu->impl && smmu->impl->context_fault) {
+ ret = smmu->impl->context_fault(irq, dev);
+ goto out_power_off;
+ }
+
arm_smmu_read_context_fault_info(smmu, idx, &cfi);
- if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT))
- return IRQ_NONE;
+ if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) {
+ ret = IRQ_NONE;
+ goto out_power_off;
+ }
ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
@@ -480,7 +493,14 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE);
}
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
+
+out_power_off:
+
+ if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -489,14 +509,21 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
struct arm_smmu_device *smmu = dev;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
+ int ret;
+
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return IRQ_NONE;
gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
- if (!gfsr)
- return IRQ_NONE;
+ if (!gfsr) {
+ ret = IRQ_NONE;
+ goto out_power_off;
+ }
if (__ratelimit(&rs)) {
if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
@@ -513,7 +540,11 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
}
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
- return IRQ_HANDLED;
+ ret = IRQ_HANDLED;
+
+out_power_off:
+ arm_smmu_rpm_put(smmu);
+ return ret;
}
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
@@ -683,7 +714,6 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
enum io_pgtable_fmt fmt;
struct iommu_domain *domain = &smmu_domain->domain;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- irqreturn_t (*context_fault)(int irq, void *dev);
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -850,19 +880,14 @@ static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain,
*/
irq = smmu->irqs[cfg->irptndx];
- if (smmu->impl && smmu->impl->context_fault)
- context_fault = smmu->impl->context_fault;
- else
- context_fault = arm_smmu_context_fault;
-
if (smmu->impl && smmu->impl->context_fault_needs_threaded_irq)
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
- context_fault,
+ arm_smmu_context_fault,
IRQF_ONESHOT | IRQF_SHARED,
"arm-smmu-context-fault",
smmu_domain);
else
- ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED,
+ ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault, IRQF_SHARED,
"arm-smmu-context-fault", smmu_domain);
if (ret < 0) {

---
base-commit: fcb70a56f4d81450114034b2c61f48ce7444a0e2
change-id: 20251208-smmu-rpm-8bd67db93dca

Best regards,
--
Prakash Gupta <prakash.gupta@xxxxxxxxxxxxxxxx>