[patch 01/26] x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization

From: Suresh Siddha
Date: Thu Jul 10 2008 - 14:45:13 EST


code reorganization of the generic Intel vt-d parsing related routines and linux
iommu routines specific to Intel vt-d.

drivers/pci/dmar.c now contains the generic vt-d parsing related routines
drivers/pci/intel_iommu.c contains the iommu routines specific to vt-d

Signed-off-by: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
---

Index: tree-x86/drivers/pci/dmar.c
===================================================================
--- tree-x86.orig/drivers/pci/dmar.c 2008-07-10 09:51:45.000000000 -0700
+++ tree-x86/drivers/pci/dmar.c 2008-07-10 09:51:46.000000000 -0700
@@ -19,9 +19,11 @@
* Author: Shaohua Li <shaohua.li@xxxxxxxxx>
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@xxxxxxxxx>
*
- * This file implements early detection/parsing of DMA Remapping Devices
+ * This file implements early detection/parsing of Remapping Devices
* reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
* tables.
+ *
+ * These routines are used by both DMA-remapping and Interrupt-remapping
*/

#include <linux/pci.h>
@@ -300,6 +302,37 @@
return ret;
}

+int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
+ struct pci_dev *dev)
+{
+ int index;
+
+ while (dev) {
+ for (index = 0; index < cnt; index++)
+ if (dev == devices[index])
+ return 1;
+
+ /* Check our parent */
+ dev = dev->bus->self;
+ }
+
+ return 0;
+}
+
+struct dmar_drhd_unit *
+dmar_find_matched_drhd_unit(struct pci_dev *dev)
+{
+ struct dmar_drhd_unit *drhd = NULL;
+
+ list_for_each_entry(drhd, &dmar_drhd_units, list) {
+ if (drhd->include_all || dmar_pci_device_match(drhd->devices,
+ drhd->devices_cnt, dev))
+ return drhd;
+ }
+
+ return NULL;
+}
+

int __init dmar_table_init(void)
{
@@ -343,3 +376,58 @@

return (ACPI_SUCCESS(status) ? 1 : 0);
}
+
+struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+ struct dmar_drhd_unit *drhd)
+{
+ int map_size;
+ u32 ver;
+
+ iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+ if (!iommu->reg) {
+ printk(KERN_ERR "IOMMU: can't map the region\n");
+ goto error;
+ }
+ iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+ iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+ /* the registers might be more than one page */
+ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+ cap_max_fault_reg_offset(iommu->cap));
+ map_size = PAGE_ALIGN_4K(map_size);
+ if (map_size > PAGE_SIZE_4K) {
+ iounmap(iommu->reg);
+ iommu->reg = ioremap(drhd->reg_base_addr, map_size);
+ if (!iommu->reg) {
+ printk(KERN_ERR "IOMMU: can't map the region\n");
+ goto error;
+ }
+ }
+
+ ver = readl(iommu->reg + DMAR_VER_REG);
+ pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+ drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+ iommu->cap, iommu->ecap);
+
+ spin_lock_init(&iommu->register_lock);
+
+ drhd->iommu = iommu;
+ return iommu;
+error:
+ kfree(iommu);
+ return NULL;
+}
+
+void free_iommu(struct intel_iommu *iommu)
+{
+ if (!iommu)
+ return;
+
+#ifdef CONFIG_DMAR
+ free_dmar_iommu(iommu);
+#endif
+
+ if (iommu->reg)
+ iounmap(iommu->reg);
+ kfree(iommu);
+}
Index: tree-x86/drivers/pci/intel-iommu.c
===================================================================
--- tree-x86.orig/drivers/pci/intel-iommu.c 2008-07-10 09:51:45.000000000 -0700
+++ tree-x86/drivers/pci/intel-iommu.c 2008-07-10 09:51:46.000000000 -0700
@@ -990,6 +990,8 @@
return -ENOMEM;
}

+ spin_lock_init(&iommu->lock);
+
/*
* if Caching mode is set, then invalid translations are tagged
* with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +1000,15 @@
set_bit(0, iommu->domain_ids);
return 0;
}
-static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
- struct dmar_drhd_unit *drhd)
-{
- int ret;
- int map_size;
- u32 ver;
-
- iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
- if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
- goto error;
- }
- iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
- iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
- /* the registers might be more than one page */
- map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
- cap_max_fault_reg_offset(iommu->cap));
- map_size = PAGE_ALIGN_4K(map_size);
- if (map_size > PAGE_SIZE_4K) {
- iounmap(iommu->reg);
- iommu->reg = ioremap(drhd->reg_base_addr, map_size);
- if (!iommu->reg) {
- printk(KERN_ERR "IOMMU: can't map the region\n");
- goto error;
- }
- }

- ver = readl(iommu->reg + DMAR_VER_REG);
- pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
- drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
- iommu->cap, iommu->ecap);
- ret = iommu_init_domains(iommu);
- if (ret)
- goto error_unmap;
- spin_lock_init(&iommu->lock);
- spin_lock_init(&iommu->register_lock);
-
- drhd->iommu = iommu;
- return iommu;
-error_unmap:
- iounmap(iommu->reg);
-error:
- kfree(iommu);
- return NULL;
-}

static void domain_exit(struct dmar_domain *domain);
-static void free_iommu(struct intel_iommu *iommu)
+
+void free_dmar_iommu(struct intel_iommu *iommu)
{
struct dmar_domain *domain;
int i;

- if (!iommu)
- return;
-
i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
for (; i < cap_ndoms(iommu->cap); ) {
domain = iommu->domains[i];
@@ -1078,10 +1033,6 @@

/* free context mapping */
free_context_table(iommu);
-
- if (iommu->reg)
- iounmap(iommu->reg);
- kfree(iommu);
}

static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1377,6 @@
return NULL;
}

-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
- struct pci_dev *dev)
-{
- int index;
-
- while (dev) {
- for (index = 0; index < cnt; index++)
- if (dev == devices[index])
- return 1;
-
- /* Check our parent */
- dev = dev->bus->self;
- }
-
- return 0;
-}
-
-static struct dmar_drhd_unit *
-dmar_find_matched_drhd_unit(struct pci_dev *dev)
-{
- struct dmar_drhd_unit *drhd = NULL;
-
- list_for_each_entry(drhd, &dmar_drhd_units, list) {
- if (drhd->include_all || dmar_pci_device_match(drhd->devices,
- drhd->devices_cnt, dev))
- return drhd;
- }
-
- return NULL;
-}
-
/* domain is initialized */
static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
{
@@ -1764,6 +1684,10 @@
goto error;
}

+ ret = iommu_init_domains(iommu);
+ if (ret)
+ goto error;
+
/*
* TBD:
* we could share the same root & context tables
Index: tree-x86/drivers/pci/intel-iommu.h
===================================================================
--- tree-x86.orig/drivers/pci/intel-iommu.h 2008-07-10 09:51:45.000000000 -0700
+++ tree-x86/drivers/pci/intel-iommu.h 2008-07-10 09:51:46.000000000 -0700
@@ -27,19 +27,7 @@
#include <linux/sysdev.h>
#include "iova.h"
#include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K (12)
-#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+#include "dma_remapping.h"

/*
* Intel IOMMU register specification per version 1.0 public spec.
@@ -187,158 +175,31 @@
#define dma_frcd_source_id(c) (c & 0xffff)
#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */

-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
- u64 val;
- u64 rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
- return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
- root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
- root->val |= value & PAGE_MASK_4K;
-}
-
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
- return (struct context_entry *)
- (root_present(root)?phys_to_virt(
- root->val & PAGE_MASK_4K):
- NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
- u64 lo;
- u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi & 7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
- do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
- do { \
- (c).lo &= (((u64)-1) << 4) | 3; \
- (c).lo |= ((val) & 3) << 2; \
- } while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
- do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
- do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
- u64 val;
-};
-#define dma_clear_pte(p) do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
- do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
- (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
-struct intel_iommu;
-
-struct dmar_domain {
- int id; /* domain id */
- struct intel_iommu *iommu; /* back pointer to owning iommu */
-
- struct list_head devices; /* all devices' list */
- struct iova_domain iovad; /* iova's that belong to this domain */
-
- struct dma_pte *pgd; /* virtual address */
- spinlock_t mapping_lock; /* page table lock */
- int gaw; /* max guest address width */
-
- /* adjusted guest address width, 0 is level 2 30-bit */
- int agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
- int flags;
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
- struct list_head link; /* link to domain siblings */
- struct list_head global; /* link to global list */
- u8 bus; /* PCI bus numer */
- u8 devfn; /* PCI devfn number */
- struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
- struct dmar_domain *domain; /* pointer to domain */
-};
-
-extern int init_dmars(void);
-
struct intel_iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u64 cap;
u64 ecap;
- unsigned long *domain_ids; /* bitmap of domains */
- struct dmar_domain **domains; /* ptr to domains */
int seg;
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
- spinlock_t lock; /* protect context, domain ids */
spinlock_t register_lock; /* protect register handling */
+
+#ifdef CONFIG_DMAR
+ unsigned long *domain_ids; /* bitmap of domains */
+ struct dmar_domain **domains; /* ptr to domains */
+ spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */

unsigned int irq;
unsigned char name[7]; /* Device Name */
struct msi_msg saved_msg;
struct sys_device sysdev;
+#endif
};

-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
-{
- return;
-}
-#endif /* !CONFIG_DMAR_GFX_WA */
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
+ struct dmar_drhd_unit *drhd);
+extern void free_iommu(struct intel_iommu *iommu);

#endif
Index: tree-x86/drivers/pci/dma_remapping.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ tree-x86/drivers/pci/dma_remapping.h 2008-07-10 09:51:46.000000000 -0700
@@ -0,0 +1,155 @@
+#ifndef _DMA_REMAPPING_H
+#define _DMA_REMAPPING_H
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K (12)
+#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 val;
+ u64 rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+ return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+ root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+ root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+ return (struct context_entry *)
+ (root_present(root)?phys_to_virt(
+ root->val & PAGE_MASK_4K):
+ NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi & 7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+ do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+ do { \
+ (c).lo &= (((u64)-1) << 4) | 3; \
+ (c).lo |= ((val) & 3) << 2; \
+ } while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+ do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+ do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+ u64 val;
+};
+#define dma_clear_pte(p) do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+ (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+ int id; /* domain id */
+ struct intel_iommu *iommu; /* back pointer to owning iommu */
+
+ struct list_head devices; /* all devices' list */
+ struct iova_domain iovad; /* iova's that belong to this domain */
+
+ struct dma_pte *pgd; /* virtual address */
+ spinlock_t mapping_lock; /* page table lock */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+ int flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ struct list_head global; /* link to global list */
+ u8 bus; /* PCI bus numer */
+ u8 devfn; /* PCI devfn number */
+ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+ struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+ return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/