[PATCH 2/2] iommu/vt-d: Share DMAR fault IRQ to prevent vector exhaustion

From: Jacob Pan
Date: Wed Apr 03 2024 - 19:41:40 EST


DMAR fault interrupt is used for per-IOMMU unrecoverable fault reporting,
it occurs only if there is a kernel programming error or serious hardware
failure. In other words, they should never occur under normal circumstances.

However, we are permanently occupying IRQ vectors per DMAR unit. On a
dual-socket Saphire Rapids system, DMAR fault interrupts can consume 16
vectors on BSP, which can lead to vector exhaustion. The effort to spread
vectors to each socket only partially alleviates the problem.

This patch leverages the shared IRQ mechanism such that only a single IRQ
vector is consumed for all the DMAR units on a system. When any DMAR faults
occur, all DMAR handlers are called to check their own fault records.

After this patch /proc/interrupts will show the list of DMAR units that share
the fault interrupt, e.g.

24 DMAR-MSI 14-edge dmar14, dmar13, dmar12, dmar11, dmar10, dmar9,
dmar8, dmar7, dmar6, dmar5, dmar4, dmar3, dmar2, dmar1, dmar0, dmar15

Link: https://lore.kernel.org/lkml/20240325115638.342716e5@jacob-builder/t/#mc08892e405456428773bcc3b0bbe8971886c5ab9

Reported-by: Dimitri Sivanich <sivanich@xxxxxxx>
Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
---
drivers/iommu/intel/dmar.c | 71 +++++++++++++++++++++++++++++--------
drivers/iommu/intel/iommu.h | 1 +
2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index ab325af93f71..cf68464b3404 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1182,7 +1182,6 @@ static void free_iommu(struct intel_iommu *iommu)
iommu->pr_irq = 0;
}
free_irq(iommu->fault_irq, iommu);
- dmar_free_hwirq(iommu->fault_irq);
iommu->fault_irq = 0;
}

@@ -1956,9 +1955,8 @@ void dmar_msi_mask(struct irq_data *data)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}

-void dmar_msi_write(int irq, struct msi_msg *msg)
+static void dmar_msi_write_msg(struct intel_iommu *iommu, int irq, struct msi_msg *msg)
{
- struct intel_iommu *iommu = irq_get_handler_data(irq);
int reg = dmar_msi_reg(iommu, irq);
unsigned long flag;

@@ -1969,6 +1967,13 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}

+void dmar_msi_write(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = irq_get_handler_data(irq);
+
+ dmar_msi_write_msg(iommu, irq, msg);
+}
+
void dmar_msi_read(int irq, struct msi_msg *msg)
{
struct intel_iommu *iommu = irq_get_handler_data(irq);
@@ -2098,27 +2103,63 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
return IRQ_HANDLED;
}

+static inline void dmar_fault_irq_unmask(struct intel_iommu *iommu)
+{
+ unsigned long flag;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flag);
+ writel(0, iommu->reg + DMAR_FECTL_REG);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
int dmar_set_interrupt(struct intel_iommu *iommu)
{
- int irq, ret;
+ static int dmar_irq;
+ int ret;

- /*
- * Check if the fault interrupt is already initialized.
- */
+ /* Don't initialize it twice for a given iommu */
if (iommu->fault_irq)
return 0;
+ /*
+ * There is one shared interrupt for all IOMMUs to prevent vector
+ * exhaustion.
+ */
+ if (!dmar_irq) {
+ int irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);

- irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
- if (irq > 0) {
- iommu->fault_irq = irq;
+ if (irq <= 0) {
+ pr_err("No free IRQ vectors\n");
+ return -EINVAL;
+ }
+ dmar_irq = irq;
+ iommu->fault_irq = dmar_irq;
+ iommu->flags |= VTD_FLAG_FAULT_IRQ_OWNER;
} else {
- pr_err("No free IRQ vectors\n");
- return -EINVAL;
- }
+ struct msi_msg msg;

- ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
- if (ret)
+ /*
+ * Get the MSI message from the shared interrupt and write
+ * it to the iommu MSI registers. Must assign fault_irq to get
+ * the MSI register offset.
+ */
+ iommu->fault_irq = dmar_irq;
+ dmar_msi_read(dmar_irq, &msg);
+ dmar_msi_write_msg(iommu, dmar_irq, &msg);
+ }
+ ret = request_irq(dmar_irq, dmar_fault, IRQF_NO_THREAD | IRQF_SHARED | IRQF_NOBALANCING, iommu->name, iommu);
+ if (ret) {
pr_err("Can't request irq\n");
+ return ret;
+ }
+
+ /*
+ * Only the owner IOMMU of the shared IRQ has its fault event
+ * interrupt unmasked after request_irq(), the rest are explicitly
+ * unmasked.
+ */
+ if (!(iommu->flags & VTD_FLAG_FAULT_IRQ_OWNER))
+ dmar_fault_irq_unmask(iommu);
+
return ret;
}

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index deebd4817d27..128f6cdaebac 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -536,6 +536,7 @@ enum {
#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0)
#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1)
#define VTD_FLAG_SVM_CAPABLE (1 << 2)
+#define VTD_FLAG_FAULT_IRQ_OWNER (1 << 3)

#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
#define pasid_supported(iommu) (sm_supported(iommu) && \
--
2.25.1