[RFC PATCH 15/21] iommu/amd: Introduce vIOMMU vminit and vmdestroy ioctl

From: Suravee Suthikulpanit
Date: Wed Jun 21 2023 - 19:57:00 EST


These ioctl interfaces are called when QEMU initialize and destroy VMs.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 2 +
drivers/iommu/amd/iommu.c | 4 +-
drivers/iommu/amd/viommu.c | 294 ++++++++++++++++++++++++++++++++++
3 files changed, 298 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index a65d22384ab8..fccae07e8c9f 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -85,6 +85,8 @@ extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
+extern void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather);

extern void amd_iommu_build_efr(u64 *efr, u64 *efr2);

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b5c62bc8249c..f22b2a5a8bfc 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2447,8 +2447,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
spin_unlock_irqrestore(&dom->lock, flags);
}

-static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
- struct iommu_iotlb_gather *gather)
+void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
{
struct protection_domain *dom = to_pdomain(domain);
unsigned long flags;
diff --git a/drivers/iommu/amd/viommu.c b/drivers/iommu/amd/viommu.c
index 18036d03c747..2bafa5102ffa 100644
--- a/drivers/iommu/amd/viommu.c
+++ b/drivers/iommu/amd/viommu.c
@@ -12,6 +12,7 @@

#include <linux/fs.h>
#include <linux/cdev.h>
+#include <linux/hashtable.h>
#include <linux/ioctl.h>
#include <linux/iommufd.h>
#include <linux/mem_encrypt.h>
@@ -28,8 +29,25 @@
#define SET_CTRL_BITS(reg, bit1, bit2, msk) \
((((reg) >> (bit1)) & (ULL(msk))) << (bit2))

+#define VIOMMU_MAX_GDEVID 0xFFFF
+#define VIOMMU_MAX_GDOMID 0xFFFF
+
+#define VIOMMU_GID_HASH_BITS 16
+static DEFINE_HASHTABLE(viommu_gid_hash, VIOMMU_GID_HASH_BITS);
+static DEFINE_SPINLOCK(viommu_gid_hash_lock);
+static u32 viommu_next_gid;
+static bool next_viommu_gid_wrapped;
+
LIST_HEAD(viommu_devid_map);

+struct amd_iommu_vminfo {
+ u16 gid;
+ bool init;
+ struct hlist_node hnode;
+ u64 *devid_table;
+ u64 *domid_table;
+};
+
struct amd_iommu *get_amd_iommu_from_devid(u16 devid)
{
struct amd_iommu *iommu;
@@ -138,6 +156,50 @@ static void *alloc_private_region(struct amd_iommu *iommu,
return NULL;
}

+static int alloc_private_vm_region(struct amd_iommu *iommu, u64 **entry,
+ u64 base, size_t size, u16 guestId)
+{
+ int ret;
+ u64 addr = base + (guestId * size);
+
+ *entry = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
+
+ ret = set_memory_uc((unsigned long)*entry, size >> PAGE_SHIFT);
+ if (ret)
+ return ret;
+
+ pr_debug("%s: entry=%#llx(%#llx), addr=%#llx\n", __func__,
+ (unsigned long long)*entry, iommu_virt_to_phys(*entry), addr);
+
+ ret = amd_iommu_v1_map_pages(&iommu->viommu_pdom->iop.iop.ops, addr,
+ iommu_virt_to_phys(*entry), PAGE_SIZE, (size / PAGE_SIZE),
+ IOMMU_PROT_IR | IOMMU_PROT_IW, GFP_KERNEL, NULL);
+
+ return ret;
+}
+
+static void free_private_vm_region(struct amd_iommu *iommu, u64 **entry,
+ u64 base, size_t size, u16 guestId)
+{
+ size_t ret;
+ struct iommu_iotlb_gather gather;
+ u64 addr = base + (guestId * size);
+
+ pr_debug("entry=%#llx(%#llx), addr=%#llx\n",
+ (unsigned long long)*entry,
+ iommu_virt_to_phys(*entry), addr);
+
+ if (!iommu || iommu->viommu_pdom)
+ return;
+ ret = amd_iommu_v1_unmap_pages(&iommu->viommu_pdom->iop.iop.ops,
+ addr, PAGE_SIZE, (size / PAGE_SIZE), &gather);
+ if (ret)
+ amd_iommu_iotlb_sync(&iommu->viommu_pdom->domain, &gather);
+
+ free_pages((unsigned long)*entry, get_order(size));
+ *entry = NULL;
+}
+
static int viommu_private_space_init(struct amd_iommu *iommu)
{
u64 pte_root = 0;
@@ -225,3 +287,235 @@ int __init iommu_init_viommu(struct amd_iommu *iommu)
amd_iommu_viommu = false;
return ret;
}
+
+static void viommu_uninit_one(struct amd_iommu *iommu, struct amd_iommu_vminfo *vminfo, u16 guestId)
+{
+ free_private_vm_region(iommu, &vminfo->devid_table,
+ VIOMMU_DEVID_MAPPING_BASE,
+ VIOMMU_DEVID_MAPPING_ENTRY_SIZE,
+ guestId);
+ free_private_vm_region(iommu, &vminfo->domid_table,
+ VIOMMU_DOMID_MAPPING_BASE,
+ VIOMMU_DOMID_MAPPING_ENTRY_SIZE,
+ guestId);
+}
+
+/*
+ * Clear the DevID via VFCTRL registers
+ * This function will be called during VM destroy via VFIO.
+ */
+static void clear_device_mapping(struct amd_iommu *iommu, u16 hDevId, u16 guestId,
+ u16 queueId, u16 gDevId)
+{
+ u64 val, tmp1, tmp2;
+ u8 __iomem *vfctrl;
+
+ /*
+ * Clear the DevID in VFCTRL registers
+ */
+ tmp1 = gDevId;
+ tmp1 = ((tmp1 & 0xFFFFULL) << 46);
+ tmp2 = hDevId;
+ tmp2 = ((tmp2 & 0xFFFFULL) << 14);
+ val = tmp1 | tmp2 | 0x8000000000000001ULL;
+ vfctrl = VIOMMU_VFCTRL_MMIO_BASE(iommu, guestId);
+ writeq(val, vfctrl + VIOMMU_VFCTRL_GUEST_DID_MAP_CONTROL0_OFFSET);
+}
+
+/*
+ * Clear the DomID via VFCTRL registers
+ * This function will be called during VM destroy via VFIO.
+ */
+static void clear_domain_mapping(struct amd_iommu *iommu, u16 hDomId, u16 guestId, u16 gDomId)
+{
+ u64 val, tmp1, tmp2;
+ u8 __iomem *vfctrl = VIOMMU_VFCTRL_MMIO_BASE(iommu, guestId);
+
+ tmp1 = gDomId;
+ tmp1 = ((tmp1 & 0xFFFFULL) << 46);
+ tmp2 = hDomId;
+ tmp2 = ((tmp2 & 0xFFFFULL) << 14);
+ val = tmp1 | tmp2 | 0x8000000000000001UL;
+ writeq(val, vfctrl + VIOMMU_VFCTRL_GUEST_DID_MAP_CONTROL1_OFFSET);
+}
+
+static void viommu_clear_mapping(struct amd_iommu *iommu, u16 guestId)
+{
+ int i;
+
+ for (i = 0; i <= VIOMMU_MAX_GDEVID; i++)
+ clear_device_mapping(iommu, 0, guestId, 0, i);
+
+ for (i = 0; i <= VIOMMU_MAX_GDOMID; i++)
+ clear_domain_mapping(iommu, 0, guestId, i);
+}
+
+static void viommu_clear_dirty_status_mask(struct amd_iommu *iommu, unsigned int gid)
+{
+ u32 offset, index, bits;
+ u64 *group, val;
+
+ if (gid >= 256 * 256)
+ return;
+
+ group = (u64 *)(iommu->cmdbuf_dirty_mask +
+ (((gid & 0xFF) << 4) | (((gid >> 13) & 0x7) << 2)));
+ offset = (gid >> 8) & 0x1F;
+ index = offset >> 6;
+ bits = offset & 0x3F;
+
+ val = READ_ONCE(group[index]);
+ val &= ~(1ULL << bits);
+ WRITE_ONCE(group[index], val);
+}
+
+/*
+ * Allocate pages for the following regions:
+ * - Guest MMIO
+ * - DeviceID/DomainId Mapping Table
+ * - Cmd buffer
+ * - Event/PRR (A/B) logs
+ */
+static int viommu_init_one(struct amd_iommu *iommu, struct amd_iommu_vminfo *vminfo)
+{
+ int ret;
+
+ ret = alloc_private_vm_region(iommu, &vminfo->devid_table,
+ VIOMMU_DEVID_MAPPING_BASE,
+ VIOMMU_DEVID_MAPPING_ENTRY_SIZE,
+ vminfo->gid);
+ if (ret)
+ goto err_out;
+
+ ret = alloc_private_vm_region(iommu, &vminfo->domid_table,
+ VIOMMU_DOMID_MAPPING_BASE,
+ VIOMMU_DOMID_MAPPING_ENTRY_SIZE,
+ vminfo->gid);
+ if (ret)
+ goto err_out;
+
+ viommu_clear_mapping(iommu, vminfo->gid);
+ viommu_clear_dirty_status_mask(iommu, vminfo->gid);
+
+ return 0;
+err_out:
+ viommu_uninit_one(iommu, vminfo, vminfo->gid);
+ return -ENOMEM;
+}
+
+int viommu_gid_alloc(struct amd_iommu *iommu, struct amd_iommu_vminfo *vminfo)
+{
+ u32 gid;
+ struct amd_iommu_vminfo *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&viommu_gid_hash_lock, flags);
+again:
+ gid = viommu_next_gid = (viommu_next_gid + 1) & 0xFFFF;
+
+ if (gid == 0) { /* id is 1-based, zero is not allowed */
+ next_viommu_gid_wrapped = 1;
+ goto again;
+ }
+ /* Is it still in use? Only possible if wrapped at least once */
+ if (next_viommu_gid_wrapped) {
+ hash_for_each_possible(viommu_gid_hash, tmp, hnode, gid) {
+ if (tmp->gid == gid)
+ goto again;
+ }
+ }
+
+ pr_debug("%s: gid=%u\n", __func__, gid);
+ vminfo->gid = gid;
+ hash_add(viommu_gid_hash, &vminfo->hnode, vminfo->gid);
+ spin_unlock_irqrestore(&viommu_gid_hash_lock, flags);
+ return 0;
+}
+
+static void viommu_gid_free(struct amd_iommu *iommu,
+ struct amd_iommu_vminfo *vminfo)
+{
+ unsigned long flags;
+
+ pr_debug("%s: gid=%u\n", __func__, vminfo->gid);
+ spin_lock_irqsave(&viommu_gid_hash_lock, flags);
+ hash_del(&vminfo->hnode);
+ spin_unlock_irqrestore(&viommu_gid_hash_lock, flags);
+}
+
+struct amd_iommu_vminfo *get_vminfo(struct amd_iommu *iommu, int gid)
+{
+ unsigned long flags;
+ struct amd_iommu_vminfo *tmp, *ptr = NULL;
+
+ spin_lock_irqsave(&viommu_gid_hash_lock, flags);
+ hash_for_each_possible(viommu_gid_hash, tmp, hnode, gid) {
+ if (tmp->gid == gid) {
+ ptr = tmp;
+ break;
+ }
+ }
+ if (!ptr)
+ pr_debug("%s : gid=%u not found\n", __func__, gid);
+ spin_unlock_irqrestore(&viommu_gid_hash_lock, flags);
+ return ptr;
+}
+
+int amd_viommu_iommu_init(struct amd_viommu_iommu_info *data)
+{
+ int ret;
+ struct amd_iommu_vminfo *vminfo;
+ unsigned int iommu_id = data->iommu_id;
+ struct amd_iommu *iommu = get_amd_iommu_from_devid(iommu_id);
+
+ if (!iommu)
+ return -ENODEV;
+
+ vminfo = kzalloc(sizeof(*vminfo), GFP_KERNEL);
+ if (!vminfo)
+ return -ENOMEM;
+
+ ret = viommu_gid_alloc(iommu, vminfo);
+ if (ret)
+ goto err_out;
+
+ ret = viommu_init_one(iommu, vminfo);
+ if (ret)
+ goto err_out;
+
+ vminfo->init = true;
+ data->gid = vminfo->gid;
+ pr_debug("%s: iommu_id=%#x, gid=%#x\n", __func__,
+ pci_dev_id(iommu->dev), vminfo->gid);
+
+ return ret;
+
+err_out:
+ viommu_gid_free(iommu, vminfo);
+ kfree(vminfo);
+ return ret;
+}
+EXPORT_SYMBOL(amd_viommu_iommu_init);
+
+int amd_viommu_iommu_destroy(struct amd_viommu_iommu_info *data)
+{
+ unsigned int gid = data->gid;
+ struct amd_iommu_vminfo *vminfo;
+ unsigned int iommu_id = data->iommu_id;
+ struct amd_iommu *iommu = get_amd_iommu_from_devid(iommu_id);
+
+ if (!iommu)
+ return -ENODEV;
+
+ vminfo = get_vminfo(iommu, gid);
+ if (!vminfo)
+ return -EINVAL;
+
+ viommu_uninit_one(iommu, vminfo, gid);
+
+ if (vminfo->init)
+ vminfo->init = false;
+ return 0;
+
+}
+EXPORT_SYMBOL(amd_viommu_iommu_destroy);
--
2.34.1