RE: [PATCH v4 4/5] iommu/vt-d: Add page request draining support
From: Tian, Kevin
Date: Thu May 07 2020 - 02:36:05 EST
> From: Lu Baolu
> Sent: Thursday, May 7, 2020 8:56 AM
>
> When a PASID is stopped or terminated, there can be pending PRQs
> (requests that haven't received responses) in remapping hardware.
> This adds the interface to drain page requests and call it when a
> PASID is terminated.
>
> Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx>
> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> ---
> drivers/iommu/intel-svm.c | 102 ++++++++++++++++++++++++++++++++++-
> -
> include/linux/intel-iommu.h | 4 ++
> 2 files changed, 101 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index 9561ba59a170..7256eb965cf6 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -23,6 +23,7 @@
> #include "intel-pasid.h"
>
> static irqreturn_t prq_event_thread(int irq, void *d);
> +static void intel_svm_drain_prq(struct device *dev, int pasid);
>
> #define PRQ_ORDER 0
>
> @@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
> dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
> dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu-
> >prq) | PRQ_ORDER);
>
> + init_completion(&iommu->prq_complete);
> +
> return 0;
> }
>
> @@ -403,12 +406,8 @@ int intel_svm_unbind_gpasid(struct device *dev, int
> pasid)
> list_del_rcu(&sdev->list);
> intel_pasid_tear_down_entry(iommu, dev,
> svm->pasid, false);
> + intel_svm_drain_prq(dev, svm->pasid);
> intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
> - /* TODO: Drain in flight PRQ for the PASID since it
> - * may get reused soon, we don't want to
> - * confuse with its previous life.
> - * intel_svm_drain_prq(dev, pasid);
> - */
> kfree_rcu(sdev, rcu);
>
> if (list_empty(&svm->devs)) {
> @@ -647,6 +646,7 @@ int intel_svm_unbind_mm(struct device *dev, int
> pasid)
> * hard to be as defensive as we might like. */
> intel_pasid_tear_down_entry(iommu, dev,
> svm->pasid, false);
> + intel_svm_drain_prq(dev, svm->pasid);
> intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
> kfree_rcu(sdev, rcu);
>
> @@ -725,6 +725,92 @@ static bool is_canonical_address(u64 addr)
> return (((saddr << shift) >> shift) == saddr);
> }
>
> +/**
> + * intel_svm_drain_prq:
> + *
> + * Drain all pending page requests and responses related to a specific
> + * pasid in both software and hardware.
> + */
> +static void intel_svm_drain_prq(struct device *dev, int pasid)
> +{
> + struct device_domain_info *info;
> + struct dmar_domain *domain;
> + struct intel_iommu *iommu;
> + struct qi_desc desc[3];
> + struct pci_dev *pdev;
> + int head, tail;
> + u16 sid, did;
> + int qdep;
> +
> + info = get_domain_info(dev);
> + if (WARN_ON(!info || !dev_is_pci(dev)))
> + return;
> +
> + if (!info->ats_enabled)
> + return;
ats_enabled -> pri_enabled
> +
> + iommu = info->iommu;
> + domain = info->domain;
> + pdev = to_pci_dev(dev);
> + sid = PCI_DEVID(info->bus, info->devfn);
> + did = domain->iommu_did[iommu->seq_id];
> + qdep = pci_ats_queue_depth(pdev);
> +
> + memset(desc, 0, sizeof(desc));
> + desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
> + QI_IWD_FENCE |
> + QI_IWD_TYPE;
> + desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
> + QI_EIOTLB_DID(did) |
> + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
> + QI_EIOTLB_TYPE;
> + desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
> + QI_DEV_EIOTLB_SID(sid) |
> + QI_DEV_EIOTLB_QDEP(qdep) |
> + QI_DEIOTLB_TYPE |
> + QI_DEV_IOTLB_PFSID(info->pfsid);
> +
> + /*
> + * Submit an invalidation wait descriptor with fence and page request
> + * drain flags set to invalidation queue. This ensures that all requests
> + * submitted to the invalidation queue ahead of this wait descriptor
> are
> + * processed and completed, and all already issued page requests
> from
> + * the device are put in the page request queue.
> + */
I feel this comment is better moved earlier since it explains the overall
flow including all 3 descriptors. Also it is not one wait descriptor which
gets both fence and drain flags set. There are two wait descriptors with
one setting fence and the other setting drain.
> + qi_submit_sync(iommu, desc, 1, QI_OPT_WAIT_DRAIN);
the count is '3' instead of '1'.
> +
> + /*
> + * Check and wait until all pending page requests in the queue are
> + * handled by the intr thread.
> + */
> +prq_retry:
> + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) &
> PRQ_RING_MASK;
> + head = dmar_readq(iommu->reg + DMAR_PQH_REG) &
> PRQ_RING_MASK;
> + while (head != tail) {
> + struct page_req_dsc *req;
> +
> + req = &iommu->prq[head / sizeof(*req)];
> + if (!req->pasid_present || req->pasid != pasid) {
> + head = (head + sizeof(*req)) & PRQ_RING_MASK;
> + continue;
> + }
> +
> + wait_for_completion_timeout(&iommu->prq_complete, HZ);
> + goto prq_retry;
> + }
> +
> + /*
> + * Perform steps described in VT-d spec CH7.10 to drain page
> + * requests and responses in hardware.
> + */
> +qi_retry:
> + qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
> + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
> + wait_for_completion_timeout(&iommu->prq_complete, HZ);
> + goto qi_retry;
> + }
> +}
> +
> static irqreturn_t prq_event_thread(int irq, void *d)
> {
> struct intel_iommu *iommu = d;
> @@ -859,6 +945,12 @@ static irqreturn_t prq_event_thread(int irq, void *d)
> }
>
> dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
> + /*
> + * Clear the page request overflow bit and wake up all threads that
> + * are waiting for the completion of this handling.
> + */
> + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
> + complete(&iommu->prq_complete);
>
> return IRQ_RETVAL(handled);
> }
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index cca1e5f9aeaa..a0512b401a59 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -292,6 +292,8 @@
>
> /* PRS_REG */
> #define DMA_PRS_PPR ((u32)1)
> +#define DMA_PRS_PRO ((u32)2)
> +
> #define DMA_VCS_PAS ((u64)1)
>
> #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts)
> \
> @@ -333,6 +335,7 @@ enum {
>
> #define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
> #define QI_IWD_STATUS_WRITE (((u64)1) << 5)
> +#define QI_IWD_FENCE (((u64)1) << 6)
> #define QI_IWD_PRQ_DRAIN (((u64)1) << 7)
>
> #define QI_IOTLB_DID(did) (((u64)did) << 16)
> @@ -590,6 +593,7 @@ struct intel_iommu {
> #ifdef CONFIG_INTEL_IOMMU_SVM
> struct page_req_dsc *prq;
> unsigned char prq_name[16]; /* Name for PRQ interrupt */
> + struct completion prq_complete;
> struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for
> PASIDs */
> #endif
> struct q_inval *qi; /* Queued invalidation info */
> --
> 2.17.1
>
> _______________________________________________
> iommu mailing list
> iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/iommu