[PATCH 8/9] iommu/amd: Decrypt interrupt remapping table for AMD IOMMU emulation in SEV guest

From: Suravee Suthikulpanit
Date: Tue Apr 30 2024 - 11:26:49 EST


The interrupt remapping table must be decrypted so that the VMM can access
the memory to emulate interrupt remapping. However, the amd iommu driver
currently allocate the table with kmem_cache mainly to enforce 128-byte
memory alignment as specified in the AMD IOMMU spec.

For SEV guest, memory encryption is done on a page basis. The driver must
be modified to allocate the table using page-aligned memory, which still
satisfies the original 128-byte alignment.

In addition, the table is setup per-device, which can be allocated with
NUMA-aware page to help reduce IRTE access latency.

Suggested-by: Thomas Lendacky <thomas.lendacky@xxxxxxx>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu_types.h | 3 +-
drivers/iommu/amd/init.c | 31 +++++++-----------
drivers/iommu/amd/iommu.c | 50 ++++++++++++++++-------------
3 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 8ced34cac1db..980fbb9bae39 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -309,7 +309,6 @@
* AMD IOMMU hardware only support 512 IRTEs despite
* the architectural limitation of 2048 entries.
*/
-#define DTE_INTTAB_ALIGNMENT 128
#define DTE_INTTABLEN_VALUE 9ULL
#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1)
#define DTE_INTTABLEN_MASK (0xfULL << 1)
@@ -497,7 +496,7 @@ struct amd_iommu_mem {
struct irq_remap_table {
raw_spinlock_t lock;
unsigned min_index;
- u32 *table;
+ struct amd_iommu_mem mem;
};

/* Interrupt remapping feature used? */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 1b74a31b4337..b3ff89952c7f 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -697,6 +697,17 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se

static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
{
+ int i;
+ struct irq_remap_table *table;
+
+ for (i = 0 ; i <= pci_seg->last_bdf; ++i) {
+ table = pci_seg->irq_lookup_table[i];
+ if (table) {
+ amd_iommu_free_mem(&table->mem);
+ kfree(table);
+ }
+ }
+
kmemleak_free(pci_seg->irq_lookup_table);
iommu_free_pages(pci_seg->irq_lookup_table,
get_order(pci_seg->rlookup_table_size));
@@ -2923,9 +2934,6 @@ static struct syscore_ops amd_iommu_syscore_ops = {

static void __init free_iommu_resources(void)
{
- kmem_cache_destroy(amd_iommu_irq_cache);
- amd_iommu_irq_cache = NULL;
-
free_iommu_all();
free_pci_segments();
}
@@ -3026,7 +3034,7 @@ static void __init ivinfo_init(void *ivrs)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
- int remap_cache_sz, ret;
+ int ret;
acpi_status status;

if (!amd_iommu_detected)
@@ -3090,21 +3098,6 @@ static int __init early_amd_iommu_init(void)

if (amd_iommu_irq_remap) {
struct amd_iommu_pci_seg *pci_seg;
- /*
- * Interrupt remapping enabled, create kmem_cache for the
- * remapping tables.
- */
- ret = -ENOMEM;
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
- else
- remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
- amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
- remap_cache_sz,
- DTE_INTTAB_ALIGNMENT,
- 0, NULL);
- if (!amd_iommu_irq_cache)
- goto out;

for_each_pci_segment(pci_seg) {
if (alloc_irq_lookup_table(pci_seg))
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4f95c726e139..f98a10b7925b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -73,8 +73,6 @@ struct iommu_cmd {
u32 data[4];
};

-struct kmem_cache *amd_iommu_irq_cache;
-
static void detach_device(struct device *dev);

static void set_dte_entry(struct amd_iommu *iommu,
@@ -2998,7 +2996,7 @@ static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,

dte = dev_table[devid].data[2];
dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
- dte |= iommu_virt_to_phys(table->table);
+ dte |= amd_iommu_mem_to_phys(&table->mem);
dte |= DTE_IRQ_REMAP_INTCTL;
dte |= DTE_INTTABLEN;
dte |= DTE_IRQ_REMAP_ENABLE;
@@ -3024,27 +3022,35 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
return table;
}

-static struct irq_remap_table *__alloc_irq_table(void)
+static size_t get_irq_table_size(void)
+{
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ return (MAX_IRQS_PER_TABLE * sizeof(u32));
+ else
+ return (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2));
+}
+
+static struct irq_remap_table *__alloc_irq_table(struct amd_iommu *iommu)
{
+ struct amd_iommu_mem *mem;
struct irq_remap_table *table;
+ int order = get_order(get_irq_table_size());
+ int nid = (iommu && iommu->dev) ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;

table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;

- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
- if (!table->table) {
+ mem = &table->mem;
+ mem->modes = ALLOC_MODE_GUEST_MEM_DECRYPT;
+ mem->order = order;
+ mem->buf = amd_iommu_get_zeroed_mem_node(nid, GFP_KERNEL, mem);
+ if (!mem->buf) {
kfree(table);
return NULL;
}
raw_spin_lock_init(&table->lock);

- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- memset(table->table, 0,
- MAX_IRQS_PER_TABLE * sizeof(u32));
- else
- memset(table->table, 0,
- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
return table;
}

@@ -3101,7 +3107,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);

/* Nothing there yet, allocate new irq remapping table */
- new_table = __alloc_irq_table();
+ new_table = __alloc_irq_table(iommu);
if (!new_table)
return NULL;

@@ -3136,7 +3142,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);

if (new_table) {
- kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+ amd_iommu_free_mem(&new_table->mem);
kfree(new_table);
}
return table;
@@ -3202,7 +3208,7 @@ static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,

raw_spin_lock_irqsave(&table->lock, flags);

- entry = (struct irte_ga *)table->table;
+ entry = (struct irte_ga *)table->mem.buf;
entry = &entry[index];

/*
@@ -3244,7 +3250,7 @@ static int modify_irte(struct amd_iommu *iommu,
return -ENOMEM;

raw_spin_lock_irqsave(&table->lock, flags);
- table->table[index] = irte->val;
+ ((u32 *)table->mem.buf)[index] = irte->val;
raw_spin_unlock_irqrestore(&table->lock, flags);

iommu_flush_irt_and_complete(iommu, devid);
@@ -3358,12 +3364,12 @@ static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid
#define IRTE_ALLOCATED (~1U)
static void irte_set_allocated(struct irq_remap_table *table, int index)
{
- table->table[index] = IRTE_ALLOCATED;
+ ((u32 *)table->mem.buf)[index] = IRTE_ALLOCATED;
}

static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
{
- struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *ptr = (struct irte_ga *)table->mem.buf;
struct irte_ga *irte = &ptr[index];

memset(&irte->lo.val, 0, sizeof(u64));
@@ -3373,7 +3379,7 @@ static void irte_ga_set_allocated(struct irq_remap_table *table, int index)

static bool irte_is_allocated(struct irq_remap_table *table, int index)
{
- union irte *ptr = (union irte *)table->table;
+ union irte *ptr = (union irte *)table->mem.buf;
union irte *irte = &ptr[index];

return irte->val != 0;
@@ -3381,7 +3387,7 @@ static bool irte_is_allocated(struct irq_remap_table *table, int index)

static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
{
- struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *ptr = (struct irte_ga *)table->mem.buf;
struct irte_ga *irte = &ptr[index];

return irte->hi.fields.vector != 0;
@@ -3389,12 +3395,12 @@ static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)

static void irte_clear_allocated(struct irq_remap_table *table, int index)
{
- table->table[index] = 0;
+ ((u32 *)table->mem.buf)[index] = 0;
}

static void irte_ga_clear_allocated(struct irq_remap_table *table, int index)
{
- struct irte_ga *ptr = (struct irte_ga *)table->table;
+ struct irte_ga *ptr = (struct irte_ga *)table->mem.buf;
struct irte_ga *irte = &ptr[index];

memset(&irte->lo.val, 0, sizeof(u64));
--
2.34.1