[PATCH v3 03/11] iommu: Add reset_device_done callback for hardware fault recovery

From: Nicolin Chen

Date: Thu Apr 16 2026 - 19:32:28 EST


When an IOMMU hardware detects an error due to a faulty device (e.g. an ATS
invalidation timeout), IOMMU drivers may quarantine the device by disabling
specific hardware features or dropping translation capabilities.

To recover from these states, the IOMMU driver needs a reliable signal that
the underlying physical hardware has been cleanly reset (e.g., via PCIe AER
or a sysfs Function Level Reset) so as to lift the quarantine.

Introduce a reset_device_done callback in struct iommu_ops. Trigger it from
the existing pci_dev_reset_iommu_done() path to notify the underlying IOMMU
driver that the device's internal state has been sanitized.

Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
---
include/linux/iommu.h | 4 ++++
drivers/iommu/iommu.c | 12 ++++++++++++
2 files changed, 16 insertions(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d3685967e960a..3c5c5fa5cdc6a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -626,6 +626,9 @@ __iommu_copy_struct_to_user(const struct iommu_user_data *dst_data,
* @release_device: Remove device from iommu driver handling
* @probe_finalize: Do final setup work after the device is added to an IOMMU
* group and attached to the groups domain
+ * @reset_device_done: Notify the driver that a device has reset successfully.
+ * Note that the core invokes the callback function while
+ * holding the group->mutex
* @device_group: find iommu group for a particular device
* @get_resv_regions: Request list of reserved regions for a device
* @of_xlate: add OF master IDs to iommu grouping
@@ -683,6 +686,7 @@ struct iommu_ops {
struct iommu_device *(*probe_device)(struct device *dev);
void (*release_device)(struct device *dev);
void (*probe_finalize)(struct device *dev);
+ void (*reset_device_done)(struct device *dev);
struct iommu_group *(*device_group)(struct device *dev);

/* Request/Free a list of reserved regions for a device */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 28d4c1f143a08..df23ef0a26e6c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -4071,12 +4071,14 @@ EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare);
void pci_dev_reset_iommu_done(struct pci_dev *pdev, bool reset_succeeds)
{
struct iommu_group *group = pdev->dev.iommu_group;
+ const struct iommu_ops *ops;
struct group_device *gdev;
unsigned long pasid;
void *entry;

if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))
return;
+ ops = dev_iommu_ops(&pdev->dev);

guard(mutex)(&group->mutex);

@@ -4105,6 +4107,16 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev, bool reset_succeeds)
return;
}

+ /*
+ * A PCI device might have been in an error state, so the IOMMU driver
+ * had to quarantine the device by disabling specific hardware features
+ * or dropping translation capability. Here notify the IOMMU driver as
+ * a reliable signal that the faulty PCI device has been cleanly reset
+ * so now it can lift its quarantine and restore full functionality.
+ */
+ if (ops->reset_device_done)
+ ops->reset_device_done(&pdev->dev);
+
/* Re-attach RID domain back to group->domain */
if (group->domain != group->blocking_domain) {
WARN_ON(__iommu_attach_device(group->domain, &pdev->dev,
--
2.43.0