Re: [PATCH v3 09/11] iommu: Make iommu_queue_iopf() more generic

From: Jason Gunthorpe
Date: Mon Aug 21 2023 - 13:11:26 EST


On Fri, Aug 18, 2023 at 07:40:45AM +0800, Lu Baolu wrote:
> This completely separates the IO page fault handling framework from the
> SVA implementation. Previously, the SVA implementation was tightly coupled
> with the IO page fault handling framework. This makes SVA a "customer" of
> the IO page fault handling framework by converting domain's page fault
> handler to handle a group of faults and calling it directly from
> iommu_queue_iopf().
>
> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> ---
> include/linux/iommu.h | 5 +++--
> drivers/iommu/iommu-sva.h | 8 --------
> drivers/iommu/io-pgfault.c | 16 +++++++++++++---
> drivers/iommu/iommu-sva.c | 14 ++++----------
> drivers/iommu/iommu.c | 4 ++--
> 5 files changed, 22 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index ff292eea9d31..cf1cb0bb46af 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -41,6 +41,7 @@ struct iommu_sva;
> struct iommu_fault_event;
> struct iommu_dma_cookie;
> struct iopf_queue;
> +struct iopf_group;
>
> #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
> #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
> @@ -175,8 +176,7 @@ struct iommu_domain {
> unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
> struct iommu_domain_geometry geometry;
> struct iommu_dma_cookie *iova_cookie;
> - enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
> - void *data);
> + int (*iopf_handler)(struct iopf_group *group);
> void *fault_data;
> union {
> struct {
> @@ -526,6 +526,7 @@ struct iopf_group {
> struct list_head faults;
> struct work_struct work;
> struct device *dev;
> + void *data;
> };
>
> int iommu_device_register(struct iommu_device *iommu,
> diff --git a/drivers/iommu/iommu-sva.h b/drivers/iommu/iommu-sva.h
> index 510a7df23fba..cf41e88fac17 100644
> --- a/drivers/iommu/iommu-sva.h
> +++ b/drivers/iommu/iommu-sva.h
> @@ -22,8 +22,6 @@ int iopf_queue_flush_dev(struct device *dev);
> struct iopf_queue *iopf_queue_alloc(const char *name);
> void iopf_queue_free(struct iopf_queue *queue);
> int iopf_queue_discard_partial(struct iopf_queue *queue);
> -enum iommu_page_response_code
> -iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
> void iopf_free_group(struct iopf_group *group);
> int iopf_queue_work(struct iopf_group *group, work_func_t func);
> int iommu_sva_handle_iopf_group(struct iopf_group *group);
> @@ -65,12 +63,6 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
> return -ENODEV;
> }
>
> -static inline enum iommu_page_response_code
> -iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
> -{
> - return IOMMU_PAGE_RESP_INVALID;
> -}
> -
> static inline void iopf_free_group(struct iopf_group *group)
> {
> }
> diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
> index 00c2e447b740..a61c2aabd1b8 100644
> --- a/drivers/iommu/io-pgfault.c
> +++ b/drivers/iommu/io-pgfault.c
> @@ -11,8 +11,6 @@
> #include <linux/slab.h>
> #include <linux/workqueue.h>
>
> -#include "iommu-sva.h"
> -
> /**
> * struct iopf_queue - IO Page Fault queue
> * @wq: the fault workqueue
> @@ -93,6 +91,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
> {
> int ret;
> struct iopf_group *group;
> + struct iommu_domain *domain;
> struct iopf_fault *iopf, *next;
> struct iommu_fault_param *iopf_param;
> struct dev_iommu *param = dev->iommu;
> @@ -124,6 +123,16 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
> return 0;
> }
>
> + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)
> + domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0);
> + else
> + domain = iommu_get_domain_for_dev(dev);
> +
> + if (!domain || !domain->iopf_handler) {
> + ret = -ENODEV;
> + goto cleanup_partial;
> + }
> +
> group = kzalloc(sizeof(*group), GFP_KERNEL);
> if (!group) {
> /*
> @@ -137,6 +146,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
>
> group->dev = dev;
> group->last_fault.fault = *fault;
> + group->data = domain->fault_data;
> INIT_LIST_HEAD(&group->faults);
> list_add(&group->last_fault.list, &group->faults);
>
> @@ -147,7 +157,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, struct device *dev)
> list_move(&iopf->list, &group->faults);
> }
>
> - ret = iommu_sva_handle_iopf_group(group);
> + ret = domain->iopf_handler(group);
> if (ret)
> iopf_free_group(group);
>
> diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
> index df8734b6ec00..2811f34947ab 100644
> --- a/drivers/iommu/iommu-sva.c
> +++ b/drivers/iommu/iommu-sva.c
> @@ -148,13 +148,14 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
> /*
> * I/O page fault handler for SVA
> */
> -enum iommu_page_response_code
> +static enum iommu_page_response_code
> iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
> {
> vm_fault_t ret;
> struct vm_area_struct *vma;
> - struct mm_struct *mm = data;
> unsigned int access_flags = 0;
> + struct iommu_domain *domain = data;
> + struct mm_struct *mm = domain->mm;
> unsigned int fault_flags = FAULT_FLAG_REMOTE;
> struct iommu_fault_page_request *prm = &fault->prm;
> enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
> @@ -231,23 +232,16 @@ static void iommu_sva_iopf_handler(struct work_struct *work)
> {
> struct iopf_fault *iopf;
> struct iopf_group *group;
> - struct iommu_domain *domain;
> enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
>
> group = container_of(work, struct iopf_group, work);
> - domain = iommu_get_domain_for_dev_pasid(group->dev,
> - group->last_fault.fault.prm.pasid, 0);
> - if (!domain || !domain->iopf_handler)
> - status = IOMMU_PAGE_RESP_INVALID;
> -
> list_for_each_entry(iopf, &group->faults, list) {
> /*
> * For the moment, errors are sticky: don't handle subsequent
> * faults in the group if there is an error.
> */
> if (status == IOMMU_PAGE_RESP_SUCCESS)
> - status = domain->iopf_handler(&iopf->fault,
> - domain->fault_data);
> + status = iommu_sva_handle_iopf(&iopf->fault, group->data);
> }
>
> iommu_sva_complete_iopf(group->dev, &group->last_fault, status);
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index b280b9f4d8b4..9b622088c741 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3395,8 +3395,8 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
> domain->type = IOMMU_DOMAIN_SVA;
> mmgrab(mm);
> domain->mm = mm;
> - domain->iopf_handler = iommu_sva_handle_iopf;
> - domain->fault_data = mm;
> + domain->iopf_handler = iommu_sva_handle_iopf_group;
> + domain->fault_data = domain;

Why fault_data? The domain handling the fault should be passed
through naturally without relying on fault_data.

eg make

iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)

into
iommu_sva_handle_iopf(struct iommu_fault *fault, struct iommu_domain *domain)

And delete domain->fault_data until we have some use for it.

The core code should be keeping track of the iommu_domain lifetime.

Jason