[PATCH v2 2/2] iommu/arm-smmu-v3: Issue CFGI/TLBI twice on Tegra264
From: Ashish Mhetre
Date: Fri May 29 2026 - 10:10:19 EST
Apply the workaround for Tegra264 erratum by issuing every CFGI/TLBI
command twice on affected SMMU instances, with CMD_SYNC after each.
The erratum requires this exact sequencing:
TLBI/CFGI ... CMD_SYNC TLBI/CFGI ... CMD_SYNC
To get this sequence with minimal surgery, hook the workaround into
arm_smmu_cmdq_issue_cmdlist(). Rename the original function to
__arm_smmu_cmdq_issue_cmdlist() and add a thin wrapper that, on
affected SMMUs and when @sync is true, re-issues the same cmdlist a
second time.
A new arm_smmu_cmd_needs_tlbi_twice() helper classifies which opcodes
need the doubling: CFGI_* and TLBI_*.
For batches that exceed CMDQ_BATCH_ENTRIES commands,
arm_smmu_cmdq_batch_add_cmd_p() normally flushes the full buffer with
sync=false, deferring the SYNC to the eventual batch_submit(). On
affected SMMUs this would leave the first chunk's commands issued
only once, since the WAR hook in arm_smmu_cmdq_issue_cmdlist() only
fires on synced submissions. Force a SYNC on the capacity rollover
when the buffer carries CFGI/TLBI commands so every flushed chunk is
correctly doubled.
Signed-off-by: Ashish Mhetre <amhetre@xxxxxxxxxx>
Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 66 +++++++++++++++++++--
1 file changed, 61 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88296c0a5337..38d45f175a2c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -698,10 +698,10 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq,
* insert their own list of commands then all of the commands from one
* CPU will appear before any of the commands from the other CPU.
*/
-int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
- struct arm_smmu_cmdq *cmdq,
- struct arm_smmu_cmd *cmds, int n,
- bool sync)
+static int __arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds, int n,
+ bool sync)
{
struct arm_smmu_cmd cmd_sync;
u32 prod;
@@ -820,6 +820,52 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
return ret;
}
+/*
+ * Returns true if @opcode is a CFGI_* or TLBI_* command, i.e. one of the
+ * invalidations covered by Tegra264 erratum (see ARM_SMMU_OPT_TLBI_TWICE).
+ */
+static bool arm_smmu_cmd_needs_tlbi_twice(u8 opcode)
+{
+ switch (opcode) {
+ case CMDQ_OP_CFGI_STE:
+ case CMDQ_OP_CFGI_ALL:
+ case CMDQ_OP_CFGI_CD:
+ case CMDQ_OP_CFGI_CD_ALL:
+ case CMDQ_OP_TLBI_NH_ALL:
+ case CMDQ_OP_TLBI_NH_ASID:
+ case CMDQ_OP_TLBI_NH_VA:
+ case CMDQ_OP_TLBI_NH_VAA:
+ case CMDQ_OP_TLBI_EL2_ALL:
+ case CMDQ_OP_TLBI_EL2_ASID:
+ case CMDQ_OP_TLBI_EL2_VA:
+ case CMDQ_OP_TLBI_S12_VMALL:
+ case CMDQ_OP_TLBI_S2_IPA:
+ case CMDQ_OP_TLBI_NSNH_ALL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+ struct arm_smmu_cmdq *cmdq,
+ struct arm_smmu_cmd *cmds, int n,
+ bool sync)
+{
+ int ret = __arm_smmu_cmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
+
+ /*
+ * The driver's batch invariants keep a single submission's
+ * opcode class uniform, so checking the first command is enough.
+ */
+ if (!ret && sync && (smmu->options & ARM_SMMU_OPT_TLBI_TWICE) &&
+ arm_smmu_cmd_needs_tlbi_twice(FIELD_GET(CMDQ_0_OP,
+ cmds[0].data[0])))
+ ret = __arm_smmu_cmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
+
+ return ret;
+}
+
static int arm_smmu_cmdq_issue_cmd_p(struct arm_smmu_device *smmu,
struct arm_smmu_cmd *cmd, bool sync)
{
@@ -863,8 +909,18 @@ static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
+ /*
+ * Force a SYNC only when the batch carries commands that
+ * have to be doubled (see ARM_SMMU_OPT_TLBI_TWICE).
+ * The batch holds a uniform opcode class, so checking
+ * the first command is sufficient.
+ */
+ bool need_sync = (smmu->options & ARM_SMMU_OPT_TLBI_TWICE) &&
+ arm_smmu_cmd_needs_tlbi_twice(FIELD_GET(CMDQ_0_OP,
+ cmds->cmds[0].data[0]));
+
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
- cmds->num, false);
+ cmds->num, need_sync);
arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd);
}
--
2.50.1