RE: [PATCH 10/10] iommu/vt-d: Register PASID notifier for status change

From: Tian, Kevin
Date: Fri Mar 27 2020 - 06:23:03 EST


> From: Jacob Pan
> Sent: Thursday, March 26, 2020 1:56 AM
>
> In bare metal SVA, IOMMU driver ensures that IOASID free call always comes
> after IOASID unbind operation.
>
> However, for guest SVA the unbind and free call come from user space
> via VFIO, which could be out of order. This patch registers a notifier
> block in case IOASID free() comes before unbind such that VT-d driver
> can take action to clean up PASID context and data.

clearly the patch includes more than above usage. It also handles the
bind usage to notify KVM for setting PASID translation table.

>
> Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx>
> Signed-off-by: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx>
> ---
> drivers/iommu/intel-svm.c | 68
> ++++++++++++++++++++++++++++++++++++++++++++-
> include/linux/intel-iommu.h | 1 +
> 2 files changed, 68 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index f511855d187b..779dd2c6f9e1 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -23,6 +23,7 @@
> #include "intel-pasid.h"
>
> static irqreturn_t prq_event_thread(int irq, void *d);
> +static DEFINE_MUTEX(pasid_mutex);
>
> #define PRQ_ORDER 0
>
> @@ -92,6 +93,65 @@ static inline bool intel_svm_capable(struct
> intel_iommu *iommu)
> return iommu->flags & VTD_FLAG_SVM_CAPABLE;
> }
>
> +#define pasid_lock_held() lock_is_held(&pasid_mutex.dep_map)
> +
> +static int pasid_status_change(struct notifier_block *nb,
> + unsigned long code, void *data)
> +{
> + struct ioasid_nb_args *args = (struct ioasid_nb_args *)data;
> + struct intel_svm_dev *sdev;
> + struct intel_svm *svm;
> + int ret = NOTIFY_DONE;
> +
> + if (code == IOASID_FREE) {
> + /*
> + * Unbind all devices associated with this PASID which is
> + * being freed by other users such as VFIO.
> + */
> + mutex_lock(&pasid_mutex);
> + svm = ioasid_find(INVALID_IOASID_SET, args->id, NULL);
> + if (!svm || !svm->iommu)
> + goto done_unlock;

should we treat !svm->iommu as an error condition? if not, do you have
an example when it may occur in normal situation?

> +
> + if (IS_ERR(svm)) {
> + ret = NOTIFY_BAD;
> + goto done_unlock;
> + }

svm->iommu should be referenced after IS_ERR check

> +
> + list_for_each_entry_rcu(sdev, &svm->devs, list,
> pasid_lock_held()) {
> + /* Does not poison forward pointer */
> + list_del_rcu(&sdev->list);
> + intel_pasid_tear_down_entry(svm->iommu, sdev-
> >dev,
> + svm->pasid);
> + kfree_rcu(sdev, rcu);
> +
> + /*
> + * Free before unbind only happens with guest
> usaged
> + * host PASIDs. IOASID free will detach private data
> + * and free the IOASID entry.

"guest usaged host PASIDs"?

> + */
> + if (list_empty(&svm->devs))
> + kfree(svm);
> + }
> + mutex_unlock(&pasid_mutex);
> +
> + return NOTIFY_OK;
> + }
> +
> +done_unlock:
> + mutex_unlock(&pasid_mutex);
> + return ret;
> +}
> +
> +static struct notifier_block pasid_nb = {
> + .notifier_call = pasid_status_change,
> +};
> +
> +void intel_svm_add_pasid_notifier(void)
> +{
> + ioasid_add_notifier(&pasid_nb);
> +}
> +
> void intel_svm_check(struct intel_iommu *iommu)
> {
> if (!pasid_supported(iommu))
> @@ -219,7 +279,6 @@ static const struct mmu_notifier_ops intel_mmuops =
> {
> .invalidate_range = intel_invalidate_range,
> };
>
> -static DEFINE_MUTEX(pasid_mutex);
> static LIST_HEAD(global_svm_list);
>
> #define for_each_svm_dev(sdev, svm, d) \
> @@ -319,6 +378,7 @@ int intel_svm_bind_gpasid(struct iommu_domain
> *domain,
> svm->gpasid = data->gpasid;
> svm->flags |= SVM_FLAG_GUEST_PASID;
> }
> + svm->iommu = iommu;

ah, it's interesting to see we have a field defined before but never used. ð

>
> ioasid_attach_data(data->hpasid, svm);
> INIT_LIST_HEAD_RCU(&svm->devs);
> @@ -383,6 +443,11 @@ int intel_svm_bind_gpasid(struct iommu_domain
> *domain,
> }
> svm->flags |= SVM_FLAG_GUEST_MODE;
>
> + /*
> + * Notify KVM new host-guest PASID bind is ready. KVM will set up
> + * PASID translation table to support guest ENQCMD.
> + */

should take it as an example instead of the only possible usage.

> + ioasid_notify(data->hpasid, IOASID_BIND);
> init_rcu_head(&sdev->rcu);
> list_add_rcu(&sdev->list, &svm->devs);
> out:
> @@ -440,6 +505,7 @@ int intel_svm_unbind_gpasid(struct device *dev, int
> pasid)
> * used by another.
> */
> ioasid_attach_data(pasid, NULL);
> + ioasid_notify(pasid, IOASID_UNBIND);
> kfree(svm);
> }
> }
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index f8504a980981..64799067ea58 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -708,6 +708,7 @@ extern struct iommu_sva *
> intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata);
> extern void intel_svm_unbind(struct iommu_sva *handle);
> extern int intel_svm_get_pasid(struct iommu_sva *handle);
> +extern void intel_svm_add_pasid_notifier(void);
>
> struct svm_dev_ops;
>
> --
> 2.7.4
>
> _______________________________________________
> iommu mailing list
> iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/iommu