[Intel-IOMMU 08/10] DMAR fault handling support

From: anil . s . keshavamurthy
Date: Mon Jun 04 2007 - 18:33:44 EST


MSI interrupt handler registrations and fault handling support
for Intel-IOMMU hadrware.

Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@xxxxxxxxx>
---
Documentation/Intel-IOMMU.txt | 17 +++
arch/x86_64/kernel/io_apic.c | 59 ++++++++++++
drivers/pci/intel-iommu.c | 194 ++++++++++++++++++++++++++++++++++++++++++
include/linux/dmar.h | 12 ++
4 files changed, 281 insertions(+), 1 deletion(-)

Index: linux-2.6.22-rc3/Documentation/Intel-IOMMU.txt
===================================================================
--- linux-2.6.22-rc3.orig/Documentation/Intel-IOMMU.txt 2007-06-04 12:40:29.000000000 -0700
+++ linux-2.6.22-rc3/Documentation/Intel-IOMMU.txt 2007-06-04 12:40:58.000000000 -0700
@@ -63,6 +63,15 @@
The same is true for peer to peer transactions. Hence we reserve the
address from PCI MMIO ranges so they are not allocated for IOVA addresses.

+
+Fault reporting
+---------------
+When errors are reported, the DMA engine signals via an interrupt. The fault
+reason and device that caused it with fault reason is printed on console.
+
+See below for sample.
+
+
Boot Message Sample
-------------------

@@ -85,6 +94,14 @@

PCI-DMA: Using DMAR IOMMU

+Fault reporting
+---------------
+
+DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+DMAR:[fault reason 05] PTE Write access is not set
+DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+DMAR:[fault reason 05] PTE Write access is not set
+
TBD
----

Index: linux-2.6.22-rc3/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux-2.6.22-rc3.orig/arch/x86_64/kernel/io_apic.c 2007-06-04 12:19:13.000000000 -0700
+++ linux-2.6.22-rc3/arch/x86_64/kernel/io_apic.c 2007-06-04 12:40:58.000000000 -0700
@@ -31,6 +31,7 @@
#include <linux/sysdev.h>
#include <linux/msi.h>
#include <linux/htirq.h>
+#include <linux/dmar.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
@@ -1972,8 +1973,64 @@
destroy_irq(irq);
}

-#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct msi_msg msg;
+ unsigned int dest;
+ cpumask_t tmp;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ dmar_msi_read(irq, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+ dmar_msi_write(irq, &msg);
+ irq_desc[irq].affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+ .name = "DMAR_MSI",
+ .unmask = dmar_msi_unmask,
+ .mask = dmar_msi_mask,
+ .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = dmar_msi_set_affinity,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+ int ret;
+ struct msi_msg msg;
+
+ ret = msi_compose_msg(NULL, irq, &msg);
+ if (ret < 0)
+ return ret;
+ dmar_msi_write(irq, &msg);
+ set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
+ return 0;
+}
+#endif

+#endif /* CONFIG_PCI_MSI */
/*
* Hypertransport interrupt support
*/
Index: linux-2.6.22-rc3/drivers/pci/intel-iommu.c
===================================================================
--- linux-2.6.22-rc3.orig/drivers/pci/intel-iommu.c 2007-06-04 12:40:41.000000000 -0700
+++ linux-2.6.22-rc3/drivers/pci/intel-iommu.c 2007-06-04 12:40:58.000000000 -0700
@@ -684,6 +684,196 @@
return 0;
}

+/* iommu interrupt handling. Most stuff are MSI-like. */
+
+static char *fault_reason_strings[] =
+{
+ "Software",
+ "Present bit in root entry is clear",
+ "Present bit in context entry is clear",
+ "Invalid context entry",
+ "Access beyond MGAW",
+ "PTE Write access is not set",
+ "PTE Read access is not set",
+ "Next page table ptr is invalid",
+ "Root table address invalid",
+ "Context table ptr is invalid",
+ "non-zero reserved fields in RTP",
+ "non-zero reserved fields in CTP",
+ "non-zero reserved fields in PTE",
+ "Unknown"
+};
+#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
+
+char *dmar_get_fault_reason(u8 fault_reason)
+{
+ if (fault_reason > MAX_FAULT_REASON_IDX)
+ return fault_reason_strings[MAX_FAULT_REASON_IDX];
+ else
+ return fault_reason_strings[fault_reason];
+}
+
+void dmar_msi_unmask(unsigned int irq)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ /* unmask it */
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
+ /* Read a reg to force flush the post write */
+ dmar_readl(iommu->reg, DMAR_FECTL_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_mask(unsigned int irq)
+{
+ unsigned long flag;
+ struct intel_iommu *iommu = get_irq_data(irq);
+
+ /* mask it */
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
+ /* Read a reg to force flush the post write */
+ dmar_readl(iommu->reg, DMAR_FECTL_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_write(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg->data);
+ dmar_writel(iommu->reg, DMAR_FEADDR_REG, msg->address_lo);
+ dmar_writel(iommu->reg, DMAR_FEUADDR_REG, msg->address_hi);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_read(int irq, struct msi_msg *msg)
+{
+ struct intel_iommu *iommu = get_irq_data(irq);
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ msg->data = dmar_readl(iommu->reg, DMAR_FEDATA_REG);
+ msg->address_lo = dmar_readl(iommu->reg, DMAR_FEADDR_REG);
+ msg->address_hi = dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
+ u8 fault_reason, u16 source_id, u64 addr)
+{
+ char *reason;
+
+ reason = dmar_get_fault_reason(fault_reason);
+
+ printk(KERN_ERR
+ "DMAR:[%s] Request device [%02x:%02x.%d] "
+ "fault addr %llx \n"
+ "DMAR:[fault reason %02d] %s\n",
+ (type ? "DMA Read" : "DMA Write"),
+ (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+ return 0;
+}
+
+#define PRIMARY_FAULT_REG_LEN (16)
+static irqreturn_t iommu_page_fault(int irq, void *dev_id)
+{
+ struct intel_iommu *iommu = dev_id;
+ int reg, fault_index;
+ u32 fault_status;
+ unsigned long flag;
+
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
+
+ /* TBD: ignore advanced fault log currently */
+ if (!(fault_status & DMA_FSTS_PPF))
+ goto clear_overflow;
+
+ fault_index = dma_fsts_fault_record_index(fault_status);
+ reg = cap_fault_reg_offset(iommu->cap);
+ while (1) {
+ u8 fault_reason;
+ u16 source_id;
+ u64 guest_addr;
+ int type;
+ u32 data;
+
+ /* highest 32 bits */
+ data = dmar_readl(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12);
+ if (!(data & DMA_FRCD_F))
+ break;
+
+ fault_reason = dma_frcd_fault_reason(data);
+ type = dma_frcd_type(data);
+
+ data = dmar_readl(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 8);
+ source_id = dma_frcd_source_id(data);
+
+ guest_addr = dmar_readq(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN);
+ guest_addr = dma_frcd_page_addr(guest_addr);
+ /* clear the fault */
+ dmar_writel(iommu->reg, reg +
+ fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ iommu_page_fault_do_one(iommu, type, fault_reason,
+ source_id, guest_addr);
+
+ fault_index++;
+ if (fault_index > cap_num_fault_regs(iommu->cap))
+ fault_index = 0;
+ spin_lock_irqsave(&iommu->register_lock, flag);
+ }
+clear_overflow:
+ /* clear primary fault overflow */
+ fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
+ if (fault_status & DMA_FSTS_PFO)
+ dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flag);
+ return IRQ_HANDLED;
+}
+
+int dmar_set_interrupt(struct intel_iommu *iommu)
+{
+ int irq, ret;
+
+ irq = create_irq();
+ if (!irq) {
+ printk(KERN_ERR "IOMMU: no free vectors\n");
+ return -EINVAL;
+ }
+
+ set_irq_data(irq, iommu);
+ iommu->irq = irq;
+
+ ret = arch_setup_dmar_msi(irq);
+ if (ret) {
+ set_irq_data(irq, NULL);
+ iommu->irq = 0;
+ destroy_irq(irq);
+ return 0;
+ }
+
+ /* Force fault register is cleared */
+ iommu_page_fault(irq, iommu);
+
+ ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
+ if (ret)
+ printk(KERN_ERR "IOMMU: can't request irq\n");
+ return ret;
+}
+
static int iommu_init_domains(struct intel_iommu *iommu)
{
unsigned long ndomains;
@@ -1436,6 +1626,10 @@

iommu_flush_write_buffer(iommu);

+ ret = dmar_set_interrupt(iommu);
+ if (ret)
+ goto error;
+
iommu_set_root_entry(iommu);

iommu_flush_context_global(iommu, 0);
Index: linux-2.6.22-rc3/include/linux/dmar.h
===================================================================
--- linux-2.6.22-rc3.orig/include/linux/dmar.h 2007-06-04 12:40:29.000000000 -0700
+++ linux-2.6.22-rc3/include/linux/dmar.h 2007-06-04 12:40:58.000000000 -0700
@@ -28,6 +28,18 @@

struct intel_iommu;

+extern char *dmar_get_fault_reason(u8 fault_reason);
+
+/* Can't use the common MSI interrupt functions
+ * since DMAR is not a pci device
+ */
+extern void dmar_msi_unmask(unsigned int irq);
+extern void dmar_msi_mask(unsigned int irq);
+extern void dmar_msi_read(int irq, struct msi_msg *msg);
+extern void dmar_msi_write(int irq, struct msi_msg *msg);
+extern int dmar_set_interrupt(struct intel_iommu *iommu);
+extern int arch_setup_dmar_msi(unsigned int irq);
+
/* Intel IOMMU detection and initialization functions */
extern void detect_intel_iommu(void);
extern int intel_iommu_init(void);

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/