Re: [PATCH v8 08/10] iommu/vt-d: Add custom allocator for IOASID

From: Jacob Pan
Date: Thu Jan 09 2020 - 17:01:28 EST


On Wed, 18 Dec 2019 12:10:55 +0800
Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> wrote:

> Hi,
>
> On 12/17/19 3:24 AM, Jacob Pan wrote:
> > When VT-d driver runs in the guest, PASID allocation must be
> > performed via virtual command interface. This patch registers a
> > custom IOASID allocator which takes precedence over the default
> > XArray based allocator. The resulting IOASID allocation will always
> > come from the host. This ensures that PASID namespace is system-
> > wide.
> >
> > Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> > Signed-off-by: Liu, Yi L <yi.l.liu@xxxxxxxxx>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> > ---
> > drivers/iommu/intel-iommu.c | 75
> > +++++++++++++++++++++++++++++++++++++++++++++
> > include/linux/intel-iommu.h | 2 ++ 2 files changed, 77
> > insertions(+)
> >
> > diff --git a/drivers/iommu/intel-iommu.c
> > b/drivers/iommu/intel-iommu.c index e90102c7540d..b0c0bb6f740e
> > 100644 --- a/drivers/iommu/intel-iommu.c
> > +++ b/drivers/iommu/intel-iommu.c
> > @@ -1700,6 +1700,9 @@ static void free_dmar_iommu(struct
> > intel_iommu *iommu) if (ecap_prs(iommu->ecap))
> > intel_svm_finish_prq(iommu);
> > }
> > + if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
> > +
> > ioasid_unregister_allocator(&iommu->pasid_allocator); +
> > #endif
> > }
> >
> > @@ -3181,6 +3184,75 @@ static int copy_translation_tables(struct
> > intel_iommu *iommu) return ret;
> > }
> >
> > +#ifdef CONFIG_INTEL_IOMMU_SVM
> > +static ioasid_t intel_ioasid_alloc(ioasid_t min, ioasid_t max,
> > void *data) +{
> > + struct intel_iommu *iommu = data;
> > + ioasid_t ioasid;
> > +
>
> Check !iommu just like the free api?
>
sounds good, will return INVALID_IOASID if NULL.

> > + /*
> > + * VT-d virtual command interface always uses the full 20
> > bit
> > + * PASID range. Host can partition guest PASID range based
> > on
> > + * policies but it is out of guest's control.
> > + */
> > + if (min < PASID_MIN || max > intel_pasid_max_id)
> > + return INVALID_IOASID;
> > +
> > + if (vcmd_alloc_pasid(iommu, &ioasid))
> > + return INVALID_IOASID;
> > +
> > + return ioasid;
> > +}
> > +
> > +static void intel_ioasid_free(ioasid_t ioasid, void *data)
> > +{
> > + struct intel_iommu *iommu = data;
> > +
> > + if (!iommu)
> > + return;
> > + /*
> > + * Sanity check the ioasid owner is done at upper layer,
> > e.g. VFIO
> > + * We can only free the PASID when all the devices are
> > unbound.
> > + */
> > + if (ioasid_find(NULL, ioasid, NULL)) {
> > + pr_alert("Cannot free active IOASID %d\n", ioasid);
> > + return;
> > + }
> > + vcmd_free_pasid(iommu, ioasid);
> > +}
> > +
> > +static void register_pasid_allocator(struct intel_iommu *iommu)
> > +{
> > + if (!intel_iommu_sm) {
>
> Use sm_supported(iommu) instead.
>
sounds good, seems we could separate the sm code more cleanly in the
future to avoid all these checks.

> > + pr_warn("VT-d scalable mode not enabled\n");
> > + return;
> > + }
> > +
> > + /*
> > + * Register a custom PASID allocator if we are running in
> > a guest,
> > + * guest PASID must be obtained via virtual command
> > interface.
> > + * There can be multiple vIOMMUs in each guest but only
> > one allocator
> > + * is active. All vIOMMU allocators will eventually be
> > calling the same
> > + * host allocator.
> > + */
> > + if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap)) {
> > + pr_info("Register custom PASID allocator\n");
> > + iommu->pasid_allocator.alloc = intel_ioasid_alloc;
> > + iommu->pasid_allocator.free = intel_ioasid_free;
> > + iommu->pasid_allocator.pdata = (void *)iommu;
> > + if
> > (!ioasid_register_allocator(&iommu->pasid_allocator)) {
> > + pr_warn("Custom PASID allocator failed,
> > scalable mode disabled\n");
> > + /*
> > + * Disable scalable mode on this IOMMU if
> > there
> > + * is no custom allocator. Mixing SM
> > capable vIOMMU
> > + * and non-SM vIOMMU are not supported.
> > + */
> > + intel_iommu_sm = 0;
> > + }
> > + }
> > +}
> > +#endif
> > +
> > static int __init init_dmars(void)
> > {
> > struct dmar_drhd_unit *drhd;
> > @@ -3298,6 +3370,9 @@ static int __init init_dmars(void)
> > */
> > for_each_active_iommu(iommu, drhd) {
> > iommu_flush_write_buffer(iommu);
> > +#ifdef CONFIG_INTEL_IOMMU_SVM
> > + register_pasid_allocator(iommu);
> > +#endif
> > iommu_set_root_entry(iommu);
> > iommu->flush.flush_context(iommu, 0, 0, 0,
> > DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0,
> > DMA_TLB_GLOBAL_FLUSH); diff --git a/include/linux/intel-iommu.h
> > b/include/linux/intel-iommu.h index 1e11560b0e59..8c30b23bd838
> > 100644 --- a/include/linux/intel-iommu.h
> > +++ b/include/linux/intel-iommu.h
> > @@ -19,6 +19,7 @@
> > #include <linux/iommu.h>
> > #include <linux/io-64-nonatomic-lo-hi.h>
> > #include <linux/dmar.h>
> > +#include <linux/ioasid.h>
> >
> > #include <asm/cacheflush.h>
> > #include <asm/iommu.h>
> > @@ -557,6 +558,7 @@ struct intel_iommu {
> > #ifdef CONFIG_INTEL_IOMMU_SVM
> > struct page_req_dsc *prq;
> > unsigned char prq_name[16]; /* Name for PRQ interrupt
> > */
> > + struct ioasid_allocator_ops pasid_allocator; /* Custom
> > allocator for PASIDs */ #endif
> > struct q_inval *qi; /* Queued invalidation
> > info */ u32 *iommu_state; /* Store iommu states between suspend and
> > resume.*/
>
> Best regards,
> baolu

[Jacob Pan]