[PATCH 13/14] iommu/vt-d: Add qi_batch for dmar_domain
From: Lu Baolu
Date: Sun Sep 01 2024 - 22:34:20 EST
Introduces a qi_batch structure to hold batched cache invalidation
descriptors on a per-dmar_domain basis. A fixed-size descriptor
array is used for simplicity. The qi_batch is allocated when the
first cache tag is added to the domain and freed during
iommu_free_domain().
Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
Signed-off-by: Tina Zhang <tina.zhang@xxxxxxxxx>
Link: https://lore.kernel.org/r/20240815065221.50328-4-tina.zhang@xxxxxxxxx
---
drivers/iommu/intel/iommu.h | 14 ++++++++++++++
drivers/iommu/intel/cache.c | 7 +++++++
drivers/iommu/intel/iommu.c | 1 +
drivers/iommu/intel/nested.c | 1 +
drivers/iommu/intel/svm.c | 5 ++++-
5 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index ba40db4f8399..428d253f1348 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -584,6 +584,19 @@ struct iommu_domain_info {
* to VT-d spec, section 9.3 */
};
+/*
+ * We start simply by using a fixed size for the batched descriptors. This
+ * size is currently sufficient for our needs. Future improvements could
+ * involve dynamically allocating the batch buffer based on actual demand,
+ * allowing us to adjust the batch size for optimal performance in different
+ * scenarios.
+ */
+#define QI_MAX_BATCHED_DESC_COUNT 16
+struct qi_batch {
+ struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT];
+ unsigned int index;
+};
+
struct dmar_domain {
int nid; /* node id */
struct xarray iommu_array; /* Attached IOMMU array */
@@ -608,6 +621,7 @@ struct dmar_domain {
spinlock_t cache_lock; /* Protect the cache tag list */
struct list_head cache_tags; /* Cache tag list */
+ struct qi_batch *qi_batch; /* Batched QI descriptors */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 08f7ce2c16c3..2e997d782beb 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -190,6 +190,13 @@ int cache_tag_assign_domain(struct dmar_domain *domain,
u16 did = domain_get_id_for_dev(domain, dev);
int ret;
+ /* domain->qi_bach will be freed in iommu_free_domain() path. */
+ if (!domain->qi_batch) {
+ domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL);
+ if (!domain->qi_batch)
+ return -ENOMEM;
+ }
+
ret = __cache_tag_assign_domain(domain, did, dev, pasid);
if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
return ret;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 779e258188cd..0500022e789e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1572,6 +1572,7 @@ static void domain_exit(struct dmar_domain *domain)
if (WARN_ON(!list_empty(&domain->devices)))
return;
+ kfree(domain->qi_batch);
kfree(domain);
}
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index 36a91b1b52be..433c58944401 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -83,6 +83,7 @@ static void intel_nested_domain_free(struct iommu_domain *domain)
spin_lock(&s2_domain->s1_lock);
list_del(&dmar_domain->s2_link);
spin_unlock(&s2_domain->s1_lock);
+ kfree(dmar_domain->qi_batch);
kfree(dmar_domain);
}
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index ef12e95e400a..078d1e32a24e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,7 +184,10 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
static void intel_mm_free_notifier(struct mmu_notifier *mn)
{
- kfree(container_of(mn, struct dmar_domain, notifier));
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
+
+ kfree(domain->qi_batch);
+ kfree(domain);
}
static const struct mmu_notifier_ops intel_mmuops = {
--
2.34.1