From: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
Sent: Wednesday, April 15, 2020 1:26 PM
When a PASID is stopped or terminated, there can be pending
PRQs (requests that haven't received responses) in remapping
hardware. This adds the interface to drain page requests and
call it when a PASID is terminated.
Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx>
Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
drivers/iommu/intel-svm.c | 90 ++++++++++++++++++++++++++++++++++---
include/linux/intel-iommu.h | 1 +
2 files changed, 86 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 05aeb8ea51c4..736dd39fb52b 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
#include "intel-pasid.h"
static irqreturn_t prq_event_thread(int irq, void *d);
+static void intel_svm_drain_prq(struct device *dev, int pasid);
#define PRQ_ORDER 0
@@ -210,6 +211,7 @@ static void intel_mm_release(struct mmu_notifier
*mn, struct mm_struct *mm)
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list) {
intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm-
pasid);+ intel_svm_drain_prq(sdev->dev, svm->pasid);
I feel there is a problem here. If you clear the PASID entry before draining,
in-fly requests will hit unrecoverable fault instead, due to invalid PASID
entry.
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
}
rcu_read_unlock();
@@ -403,12 +405,8 @@ int intel_svm_unbind_gpasid(struct device *dev, int
pasid)
if (!sdev->users) {
list_del_rcu(&sdev->list);
intel_pasid_tear_down_entry(iommu, dev, svm-
pasid);+ intel_svm_drain_prq(dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
- /* TODO: Drain in flight PRQ for the PASID since it
- * may get reused soon, we don't want to
- * confuse with its previous life.
- * intel_svm_drain_prq(dev, pasid);
- */
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
@@ -646,6 +644,7 @@ int intel_svm_unbind_mm(struct device *dev, int
pasid)
* large and has to be physically contiguous. So it's
* hard to be as defensive as we might like. */
intel_pasid_tear_down_entry(iommu, dev, svm-
pasid);+ intel_svm_drain_prq(dev, svm->pasid);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
kfree_rcu(sdev, rcu);
@@ -703,6 +702,7 @@ struct page_req_dsc {
struct page_req {
struct list_head list;
struct page_req_dsc desc;
+ struct completion complete;
unsigned int processing:1;
unsigned int drained:1;
unsigned int completed:1;
@@ -732,9 +732,83 @@ static bool is_canonical_address(u64 addr)
return (((saddr << shift) >> shift) == saddr);
}
+/**
+ * intel_svm_drain_prq:
+ *
+ * Drain all pending page requests related to a specific pasid in both
+ * software and hardware. The caller must guarantee that no more page
+ * requests related to this pasid coming.
+ */
+static void intel_svm_drain_prq(struct device *dev, int pasid)
+{
+ struct device_domain_info *info;
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ struct qi_desc desc[3];
+ struct pci_dev *pdev;
+ struct page_req *req;
+ unsigned long flags;
+ u16 sid, did;
+ int qdep;
+
+ info = get_domain_info(dev);
+ if (WARN_ON(!info || !dev_is_pci(dev)))
+ return;
+
+ iommu = info->iommu;
+ domain = info->domain;
+ pdev = to_pci_dev(dev);
+
+ /* Mark all related pending requests drained. */
+ spin_lock_irqsave(&iommu->prq_lock, flags);
+ list_for_each_entry(req, &iommu->prq_list, list)
+ if (req->desc.pasid_present && req->desc.pasid == pasid)
+ req->drained = true;
+ spin_unlock_irqrestore(&iommu->prq_lock, flags);
+
+ /* Wait until all related pending requests complete. */
+retry:
+ spin_lock_irqsave(&iommu->prq_lock, flags);
+ list_for_each_entry(req, &iommu->prq_list, list) {
+ if (req->desc.pasid_present &&
+ req->desc.pasid == pasid &&
+ !req->completed) {
+ spin_unlock_irqrestore(&iommu->prq_lock, flags);
+ wait_for_completion_timeout(&req->complete, 5 *
HZ);
+ goto retry;
+ }
+ }
+ spin_unlock_irqrestore(&iommu->prq_lock, flags);
+
+ /*
+ * Perform steps described in VT-d spec CH7.10 to drain page
+ * request and responses in hardware.
+ */
+ sid = PCI_DEVID(info->bus, info->devfn);
+ did = domain->iommu_did[iommu->seq_id];
+ qdep = pci_ats_queue_depth(pdev);
+
+ memset(desc, 0, sizeof(desc));
+ desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+ QI_IWD_FENCE |
+ QI_IWD_TYPE;
+ desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
+ QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) |
+ QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(info->pfsid);
+
+ qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
the completion of above sequence ensures that previous queued
page group responses are sent out and received by the endpoint
and vice versa all in-fly page requests from the endpoint are queued
in iommu page request queue. Then comes a problem - you didn't
wait for completion of those newly-queued requests and their
responses.
According to VT-d spec 7.10, step (d) mentions when queue overflow
happens, software needs to repeat the above draining sequence to
drain auto-responses.
According to VT-d spec 7.11, the device driver must be notified to
revoke the PASID before this draining sequence happens. When
does that happen? Possibly can add some comment to explain such
background.