Re: [PATCH v7 18/23] iommu/smmuv3: Report non recoverable faults

From: Robin Murphy
Date: Mon May 13 2019 - 09:48:45 EST


On 13/05/2019 13:32, Auger Eric wrote:
Hi Robin,
On 5/13/19 1:54 PM, Robin Murphy wrote:
On 13/05/2019 08:46, Auger Eric wrote:
Hi Robin,

On 5/8/19 7:20 PM, Robin Murphy wrote:
On 08/04/2019 13:19, Eric Auger wrote:
When a stage 1 related fault event is read from the event queue,
let's propagate it to potential external fault listeners, ie. users
who registered a fault handler.

Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>

---
v4 -> v5:
- s/IOMMU_FAULT_PERM_INST/IOMMU_FAULT_PERM_EXEC
---
ÂÂ drivers/iommu/arm-smmu-v3.c | 169
+++++++++++++++++++++++++++++++++---
ÂÂ 1 file changed, 158 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8044445bc32a..1fd320788dcb 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -167,6 +167,26 @@
ÂÂ #define ARM_SMMU_PRIQ_IRQ_CFG1ÂÂÂÂÂÂÂ 0xd8
ÂÂ #define ARM_SMMU_PRIQ_IRQ_CFG2ÂÂÂÂÂÂÂ 0xdc
ÂÂ +/* Events */
+#define ARM_SMMU_EVT_F_UUTÂÂÂÂÂÂÂ 0x01
+#define ARM_SMMU_EVT_C_BAD_STREAMIDÂÂÂ 0x02
+#define ARM_SMMU_EVT_F_STE_FETCHÂÂÂ 0x03
+#define ARM_SMMU_EVT_C_BAD_STEÂÂÂÂÂÂÂ 0x04
+#define ARM_SMMU_EVT_F_BAD_ATS_TREQÂÂÂ 0x05
+#define ARM_SMMU_EVT_F_STREAM_DISABLEDÂÂÂ 0x06
+#define ARM_SMMU_EVT_F_TRANSL_FORBIDDENÂÂÂ 0x07
+#define ARM_SMMU_EVT_C_BAD_SUBSTREAMIDÂÂÂ 0x08
+#define ARM_SMMU_EVT_F_CD_FETCHÂÂÂÂÂÂÂ 0x09
+#define ARM_SMMU_EVT_C_BAD_CDÂÂÂÂÂÂÂ 0x0a
+#define ARM_SMMU_EVT_F_WALK_EABTÂÂÂ 0x0b
+#define ARM_SMMU_EVT_F_TRANSLATIONÂÂÂ 0x10
+#define ARM_SMMU_EVT_F_ADDR_SIZEÂÂÂ 0x11
+#define ARM_SMMU_EVT_F_ACCESSÂÂÂÂÂÂÂ 0x12
+#define ARM_SMMU_EVT_F_PERMISSIONÂÂÂ 0x13
+#define ARM_SMMU_EVT_F_TLB_CONFLICTÂÂÂ 0x20
+#define ARM_SMMU_EVT_F_CFG_CONFLICTÂÂÂ 0x21
+#define ARM_SMMU_EVT_E_PAGE_REQUESTÂÂÂ 0x24
+
ÂÂ /* Common MSI config fields */
ÂÂ #define MSI_CFG0_ADDR_MASKÂÂÂÂÂÂÂ GENMASK_ULL(51, 2)
ÂÂ #define MSI_CFG2_SHÂÂÂÂÂÂÂÂÂÂÂ GENMASK(5, 4)
@@ -332,6 +352,15 @@
ÂÂ #define EVTQ_MAX_SZ_SHIFTÂÂÂÂÂÂÂ 7
ÂÂ Â #define EVTQ_0_IDÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(7, 0)
+#define EVTQ_0_SSVÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(11, 11)
+#define EVTQ_0_SUBSTREAMIDÂÂÂÂÂÂÂ GENMASK_ULL(31, 12)
+#define EVTQ_0_STREAMIDÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(63, 32)
+#define EVTQ_1_PNUÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(33, 33)
+#define EVTQ_1_INDÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(34, 34)
+#define EVTQ_1_RNWÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(35, 35)
+#define EVTQ_1_S2ÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(39, 39)
+#define EVTQ_1_CLASSÂÂÂÂÂÂÂÂÂÂÂ GENMASK_ULL(40, 41)
+#define EVTQ_3_FETCH_ADDRÂÂÂÂÂÂÂ GENMASK_ULL(51, 3)
ÂÂ Â /* PRI queue */
ÂÂ #define PRIQ_ENT_DWORDSÂÂÂÂÂÂÂÂÂÂÂ 2
@@ -639,6 +668,64 @@ struct arm_smmu_domain {
ÂÂÂÂÂÂ spinlock_tÂÂÂÂÂÂÂÂÂÂÂ devices_lock;
ÂÂ };
ÂÂ +/* fault propagation */
+
+#define IOMMU_FAULT_F_FIELDSÂÂÂ (IOMMU_FAULT_UNRECOV_PASID_VALID | \
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_PERM_VALID | \
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_ADDR_VALID)
+
+struct arm_smmu_fault_propagation_data {
+ÂÂÂ enum iommu_fault_reason reason;
+ÂÂÂ bool s1_check;
+ÂÂÂ u32 fields; /* IOMMU_FAULT_UNRECOV_*_VALID bits */
+};
+
+/*
+ * Describes how SMMU faults translate into generic IOMMU faults
+ * and if they need to be reported externally
+ */
+static const struct arm_smmu_fault_propagation_data
fault_propagation[] = {
+[ARM_SMMU_EVT_F_UUT]ÂÂÂÂÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_C_BAD_STREAMID]ÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_F_STE_FETCH]ÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_C_BAD_STE]ÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_F_BAD_ATS_TREQ]ÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_F_STREAM_DISABLED]ÂÂÂ = { },
+[ARM_SMMU_EVT_F_TRANSL_FORBIDDEN]ÂÂÂ = { },
+[ARM_SMMU_EVT_C_BAD_SUBSTREAMID]ÂÂÂ =
{IOMMU_FAULT_REASON_PASID_INVALID,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ false,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_PASID_VALID
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_CD_FETCH]ÂÂÂÂÂÂÂ = {IOMMU_FAULT_REASON_PASID_FETCH,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ false,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_PASID_VALID |

It doesn't make sense to presume validity here, or in any of the faults
below...



+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_C_BAD_CD]ÂÂÂÂÂÂÂÂÂÂÂ =
{IOMMU_FAULT_REASON_BAD_PASID_ENTRY,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ false,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_PASID_VALID
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_WALK_EABT]ÂÂÂÂÂÂÂ = {IOMMU_FAULT_REASON_WALK_EABT,
true,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_F_FIELDS |
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_TRANSLATION]ÂÂÂÂÂÂÂ = {IOMMU_FAULT_REASON_PTE_FETCH,
true,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_F_FIELDS
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_ADDR_SIZE]ÂÂÂÂÂÂÂ = {IOMMU_FAULT_REASON_OOR_ADDRESS,
true,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_F_FIELDS
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_ACCESS]ÂÂÂÂÂÂÂÂÂÂÂ = {IOMMU_FAULT_REASON_ACCESS, true,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_F_FIELDS
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_PERMISSION]ÂÂÂÂÂÂÂ = {IOMMU_FAULT_REASON_PERMISSION,
true,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ IOMMU_FAULT_F_FIELDS
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ },
+[ARM_SMMU_EVT_F_TLB_CONFLICT]ÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_F_CFG_CONFLICT]ÂÂÂÂÂÂÂ = { },
+[ARM_SMMU_EVT_E_PAGE_REQUEST]ÂÂÂÂÂÂÂ = { },
+};
+
ÂÂ struct arm_smmu_option_prop {
ÂÂÂÂÂÂ u32 opt;
ÂÂÂÂÂÂ const char *prop;
@@ -1258,7 +1345,6 @@ static int arm_smmu_init_l2_strtab(struct
arm_smmu_device *smmu, u32 sid)
ÂÂÂÂÂÂ return 0;
ÂÂ }
ÂÂ -__maybe_unused
ÂÂ static struct arm_smmu_master_data *
ÂÂ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
ÂÂ {
@@ -1284,24 +1370,85 @@ arm_smmu_find_master(struct arm_smmu_device
*smmu, u32 sid)
ÂÂÂÂÂÂ return master;
ÂÂ }
ÂÂ +/* Populates the record fields according to the input SMMU event */
+static bool arm_smmu_transcode_fault(u64 *evt, u8 type,
+ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ struct iommu_fault_unrecoverable *record)
+{
+ÂÂÂ const struct arm_smmu_fault_propagation_data *data;
+ÂÂÂ u32 fields;
+
+ÂÂÂ if (type >= ARRAY_SIZE(fault_propagation))
+ÂÂÂÂÂÂÂ return false;
+
+ÂÂÂ data = &fault_propagation[type];
+ÂÂÂ if (!data->reason)
+ÂÂÂÂÂÂÂ return false;
+
+ÂÂÂ fields = data->fields;
+
+ÂÂÂ if (data->s1_check & FIELD_GET(EVTQ_1_S2, evt[1]))
+ÂÂÂÂÂÂÂ return false; /* S2 related fault, don't propagate */
+
+ÂÂÂ if (fields & IOMMU_FAULT_UNRECOV_PASID_VALID) {
+ÂÂÂÂÂÂÂ if (FIELD_GET(EVTQ_0_SSV, evt[0]))
+ÂÂÂÂÂÂÂÂÂÂÂ record->pasid = FIELD_GET(EVTQ_0_SUBSTREAMID, evt[0]);
+ÂÂÂÂÂÂÂ else
+ÂÂÂÂÂÂÂÂÂÂÂ fields &= ~IOMMU_FAULT_UNRECOV_PASID_VALID;

...because this logic then breaks for C_BAD_SUBSTREAMID, which ends up
coming out of here *without* reporting the offending PASID.
Correct.

+ÂÂÂ }
+ÂÂÂ if (fields & IOMMU_FAULT_UNRECOV_PERM_VALID) {
+ÂÂÂÂÂÂÂ if (!FIELD_GET(EVTQ_1_RNW, evt[1]))
+ÂÂÂÂÂÂÂÂÂÂÂ record->perm |= IOMMU_FAULT_PERM_WRITE;
+ÂÂÂÂÂÂÂ if (FIELD_GET(EVTQ_1_PNU, evt[1]))
+ÂÂÂÂÂÂÂÂÂÂÂ record->perm |= IOMMU_FAULT_PERM_PRIV;
+ÂÂÂÂÂÂÂ if (FIELD_GET(EVTQ_1_IND, evt[1]))
+ÂÂÂÂÂÂÂÂÂÂÂ record->perm |= IOMMU_FAULT_PERM_EXEC;
+ÂÂÂ }
+ÂÂÂ if (fields & IOMMU_FAULT_UNRECOV_ADDR_VALID)
+ÂÂÂÂÂÂÂ record->addr = evt[2];
+
+ÂÂÂ if (fields & IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID)
+ÂÂÂÂÂÂÂ record->fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]);
+
+ÂÂÂ record->flags = fields;
+ÂÂÂ return true;
+}
+
+static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64
*evt)
+{
+ÂÂÂ u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]);
+ÂÂÂ u8 type = FIELD_GET(EVTQ_0_ID, evt[0]);
+ÂÂÂ struct arm_smmu_master_data *master;
+ÂÂÂ struct iommu_fault_event event = {};
+ÂÂÂ int i;
+
+ÂÂÂ master = arm_smmu_find_master(smmu, sid);
+ÂÂÂ if (WARN_ON(!master))
+ÂÂÂÂÂÂÂ return;

NAK. If I'm getting global faults like C_BAD_STE where a device almost
certainly *isn't* configured (because hey, we would have initialised its
STEs if we knew), then I sure as hell want to see the actual faults.
Spamming a constant stream of stack traces *instead* of showing them is
worse than useless.
Sure, if !master I will output the original traces.

+
+ÂÂÂ event.fault.type = IOMMU_FAULT_DMA_UNRECOV;
+
+ÂÂÂ if (arm_smmu_transcode_fault(evt, type, &event.fault.event)) {
+ÂÂÂÂÂÂÂ iommu_report_device_fault(master->dev, &event);
+ÂÂÂÂÂÂÂ return;

And again, the vast majority of the time, there won't be a fault handler
registered, so unconditionally suppressing the most common and useful
stuff like translation and permission faults is very much not OK.
Going to test whether we are in nested mode before entering that path.

I don't think this has to be exclusive to nesting - the generic
reporting mechanism feels like it might ultimately be extensible to
other things like Rob's case for generalised stalling. It's just that
for robustness, even when a fault handler is present, we still want the
driver to be able to report if it didn't actually handle a fault.


Jean-Philippe pointed out in a previous review
(https://patchwork.kernel.org/patch/10751801/#22424047) that the guest
can flood the host log with S1 related faults. At the moment we do not
check that a fault handler is registered in nested mode. Maybe we
should? Even if the fault handler is registered, as it is based on a
circular buffer, this latter can be full and lead to a log flood.

Ah, right, I didn't quite have the full picture in mind, thanks for the jog. I guess in that case we want some degree of special-casing for nested configs - anything S1-related that we expect the fault handler to simply inject back into the guest, we can indeed suppress on the host, but I do think it's worth anticipating host-related faults (and entirely host-owned fault-handlers in future) as well.

Robin.