[PATCH v2 3/5] iommu/vt-d: Introduce interfaces for QI batching operations

From: Tina Zhang
Date: Thu Aug 08 2024 - 22:57:11 EST


Introduces qi_batch_xxx() interfaces to the VT-d driver to enhance the
efficiency of IOTLB and Dev-IOTLB invalidation command processing.
By allowing these commands to be batched together before submission,
the patch aims to minimize the overhead previously incurred when
handling these operations individually.

The addition of qi_batch_add_xxx() functions enable the accumulation of
invalidation commands into a batch, while the qi_batch_flush_descs()
function allows for the collective submission of these commands.

Signed-off-by: Tina Zhang <tina.zhang@xxxxxxxxx>
---
drivers/iommu/intel/dmar.c | 78 +++++++++++++++++++++++++++++++++++++
drivers/iommu/intel/iommu.h | 39 +++++++++++++++++++
2 files changed, 117 insertions(+)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 64724af1a618..8d55c49382fc 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1636,6 +1636,84 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
qi_submit_sync(iommu, &desc, 1, 0);
}

+static void qi_batch_increment_index(struct intel_iommu *iommu,
+ struct qi_batch *batch)
+{
+ if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
+ qi_batch_flush_descs(iommu, batch);
+}
+
+void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+ if (!batch->index)
+ return;
+
+ qi_submit_sync(iommu, batch->descs, batch->index, 0);
+
+ /* Reset the index value and clean the whole batch buffer */
+ memset(batch, 0, sizeof(struct qi_batch));
+}
+
+void qi_batch_add_iotlb_desc(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type,
+ struct qi_batch *batch)
+{
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
+void qi_batch_add_dev_iotlb_desc(struct intel_iommu *iommu, u16 sid,
+ u16 pfsid, u16 qdep, u64 addr,
+ unsigned int mask,
+ struct qi_batch *batch)
+{
+ /*
+ * According to VT-d spec, software is recommended to not submit any Device-TLB
+ * invalidation requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
+void qi_batch_add_piotlb_desc(struct intel_iommu *iommu, u16 did,
+ u32 pasid, u64 addr,
+ unsigned long npages, bool ih,
+ struct qi_batch *batch)
+{
+ /*
+ * npages == -1 means a PASID-selective invalidation, otherwise,
+ * a positive value for Page-selective-within-PASID invalidation.
+ * 0 is not a valid input.
+ */
+ if (!npages)
+ return;
+
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
+void qi_batch_add_dev_iotlb_pasid_desc(struct intel_iommu *iommu,
+ u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep,
+ u64 addr, unsigned int size_order,
+ struct qi_batch *batch)
+{
+ /*
+ * According to VT-d spec, software is recommended to not submit any Device-TLB
+ * invalidation requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid,
+ qdep, addr, size_order,
+ &(batch->descs[batch->index]));
+ qi_batch_increment_index(iommu, batch);
+}
+
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u16 qdep, u64 addr, unsigned mask)
{
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index b67c14da1240..cd7c1d0a01c6 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -584,6 +584,22 @@ struct iommu_domain_info {
* to VT-d spec, section 9.3 */
};

+/*
+ * The QI_MAX_BATCHED_DESC_COUNT value is determined by the two considerations:
+ * 1) Maximizing the batching of IOTLB and Dev-IOTLB invalidation commands, which is
+ * especially advantageous in virtualization environments where multiple devices may be
+ * associated with a single virtual IOMMU.
+ * 2) Minimizing unnecessary memory allocation for domains lacking ATS support.
+ *
+ * Future enhancements could include dynamically allocating the batch buffer based on actual
+ * demand, allowing for adjustments to the batch size to better accommodate various use cases.
+ */
+#define QI_MAX_BATCHED_DESC_COUNT 16
+struct qi_batch {
+ struct qi_desc descs[QI_MAX_BATCHED_DESC_COUNT];
+ unsigned int index;
+};
+
struct dmar_domain {
int nid; /* node id */
struct xarray iommu_array; /* Attached IOMMU array */
@@ -1098,6 +1114,29 @@ void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,

int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options);
+
+void qi_batch_flush_descs(struct intel_iommu *iommu,
+ struct qi_batch *batch);
+
+void qi_batch_add_iotlb_desc(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type,
+ struct qi_batch *batch);
+
+void qi_batch_add_dev_iotlb_desc(struct intel_iommu *iommu, u16 sid,
+ u16 pfsid, u16 qdep, u64 addr,
+ unsigned int mask,
+ struct qi_batch *batch);
+
+void qi_batch_add_piotlb_desc(struct intel_iommu *iommu, u16 did,
+ u32 pasid, u64 addr,
+ unsigned long npages, bool ih,
+ struct qi_batch *batch);
+
+void qi_batch_add_dev_iotlb_pasid_desc(struct intel_iommu *iommu,
+ u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep,
+ u64 addr, unsigned int size_order,
+ struct qi_batch *batch);
/*
* Options used in qi_submit_sync:
* QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
--
2.43.0