RE: [PATCH v7 3/4] iommu/vt-d: Add set_dev_pasid callback for dma domain

From: Tian, Kevin
Date: Thu May 25 2023 - 02:57:33 EST


> From: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> Sent: Wednesday, May 24, 2023 1:35 AM
>
> @@ -1472,6 +1482,37 @@ static void iommu_flush_dev_iotlb(struct
> dmar_domain *domain,
> spin_lock_irqsave(&domain->lock, flags);
> list_for_each_entry(info, &domain->devices, link)
> __iommu_flush_dev_iotlb(info, addr, mask);
> +
> + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
> {
> + info = dev_iommu_priv_get(dev_pasid->dev);
> + qi_flush_dev_iotlb_pasid(info->iommu,
> + PCI_DEVID(info->bus, info->devfn),
> + info->pfsid, dev_pasid->pasid,
> + info->ats_qdep, addr,
> + mask);
> + }

Check info->ats_enabled instead of doing it blindly.

> +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
> + struct dmar_domain *domain, u64 addr,
> + unsigned long npages, bool ih)
> +{
> + u16 did = domain_id_iommu(domain, iommu);
> + struct dev_pasid_info *dev_pasid;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&domain->lock, flags);
> + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
> + qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages,
> ih);
> +
> + if (!list_empty(&domain->devices))
> + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr,
> npages, ih);

Old code doesn't have this empty list check. I'm not sure whether any
corner case might exist but if you do plan to add it it's better to put it
in a separate patch to allow bisect.

> spin_unlock_irqrestore(&domain->lock, flags);
> }
>
> @@ -1492,7 +1533,7 @@ static void iommu_flush_iotlb_psi(struct
> intel_iommu *iommu,
> ih = 1 << 6;
>
> if (domain->use_first_level) {
> - qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, pages,
> ih);
> + domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
> } else {
> unsigned long bitmask = aligned_pages - 1;
>

Why cannot this pasid be used with a second level config?

> @@ -4720,25 +4762,99 @@ static void intel_iommu_iotlb_sync_map(struct
> iommu_domain *domain,
> static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t
> pasid)
> {
> struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
> + struct dev_pasid_info *curr, *dev_pasid = NULL;
> + struct dmar_domain *dmar_domain;
> struct iommu_domain *domain;
> + unsigned long flags;
>
> - /* Domain type specific cleanup: */
> domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
> - if (domain) {
> - switch (domain->type) {
> - case IOMMU_DOMAIN_SVA:
> - intel_svm_remove_dev_pasid(dev, pasid);
> - break;
> - default:
> - /* should never reach here */
> - WARN_ON(1);
> + if (!domain)
> + goto out_tear_down;
> +
> + /*
> + * The SVA implementation needs to stop mm notification, drain the
> + * pending page fault requests before tearing down the pasid entry.
> + * The VT-d spec (section 6.2.3.1) also recommends that software
> + * could use a reserved domain id for all first-only and pass-through
> + * translations. Hence there's no need to call
> domain_detach_iommu()
> + * in the sva domain case.
> + */

It's probably clearer to say:

/*
* SVA domain requires special treatment before tearing down the pasid
* entry:
* 1) pasid is stored in mm instead of in dev_pasid;
* 2) all SVA domains share a reserved domain id per recommendation
* from VT-d spec (section 6.2.3.1) so domain_detach_iommu() is
* not required;
* 3) additional cleanup is required e.g. stopping mm notification,
* draining the pending page fault requests, etc.
* Better handle it in a separate helper.
*/

>
> +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
> + struct device *dev, ioasid_t pasid)
> +{
> + struct device_domain_info *info = dev_iommu_priv_get(dev);
> + struct dmar_domain *dmar_domain = to_dmar_domain(domain);
> + struct intel_iommu *iommu = info->iommu;
> + struct dev_pasid_info *dev_pasid;
> + unsigned long flags;
> + int ret;
> +
> + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
> + return -EOPNOTSUPP;
> +
> + if (context_copied(iommu, info->bus, info->devfn))
> + return -EBUSY;
> +
> + ret = prepare_domain_attach_device(domain, dev);
> + if (ret)
> + return ret;
> +
> + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
> + if (!dev_pasid)
> + return -ENOMEM;

should it check whether this pasid has been attached?

> +
> + ret = domain_attach_iommu(dmar_domain, iommu);
> + if (ret)
> + goto out_free;
> +
> + if (domain_type_is_si(dmar_domain))
> + ret = intel_pasid_setup_pass_through(iommu, dmar_domain,
> + dev, pasid);
> + else if (dmar_domain->use_first_level)
> + ret = domain_setup_first_level(iommu, dmar_domain,
> + dev, pasid);
> + else
> + ret = intel_pasid_setup_second_level(iommu, dmar_domain,
> + dev, pasid);

Here you allow attaching pasid to a domain using second-level but all
prior changes are only for first-level.