Re: [PATCH RFC 10/11] iommu: Make IOPF handling framework generic

From: Lu Baolu
Date: Tue Mar 22 2022 - 01:28:49 EST


On 2022/3/21 19:39, Jean-Philippe Brucker wrote:
On Sun, Mar 20, 2022 at 02:40:29PM +0800, Lu Baolu wrote:
The existing IOPF handling framework only handles the I/O page faults for
SVA. Ginven that we are able to link iommu domain with each I/O page fault,
we can now make the I/O page fault handling framework more general for
more types of page faults.

Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
---
include/linux/iommu.h | 4 +++
drivers/iommu/io-pgfault.c | 67 ++++++-----------------------------
drivers/iommu/iommu-sva-lib.c | 59 ++++++++++++++++++++++++++++++
3 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 803e7b07605e..11c65a7bed88 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -50,6 +50,8 @@ struct iommu_dma_cookie;
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *);
+typedef enum iommu_page_response_code (*iommu_domain_iopf_handler_t)
+ (struct iommu_fault *, void *);
struct iommu_domain_geometry {
dma_addr_t aperture_start; /* First address that can be mapped */
@@ -101,6 +103,8 @@ struct iommu_domain {
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
struct mm_struct *sva_cookie;
+ iommu_domain_iopf_handler_t fault_handler;
+ void *fault_data;
};
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 1df8c1dcae77..dad0e40cd8d2 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -69,62 +69,6 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
return iommu_page_response(dev, &resp);
}
-static enum iommu_page_response_code
-iopf_handle_single(struct iopf_fault *iopf)
-{
- vm_fault_t ret;
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- unsigned int access_flags = 0;
- unsigned int fault_flags = FAULT_FLAG_REMOTE;
- struct iommu_fault_page_request *prm = &iopf->fault.prm;
- enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
-
- if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
- return status;
-
- mm = iommu_sva_find(prm->pasid);
- if (IS_ERR_OR_NULL(mm))
- return status;
-
- mmap_read_lock(mm);
-
- vma = find_extend_vma(mm, prm->addr);
- if (!vma)
- /* Unmapped area */
- goto out_put_mm;
-
- if (prm->perm & IOMMU_FAULT_PERM_READ)
- access_flags |= VM_READ;
-
- if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
- access_flags |= VM_WRITE;
- fault_flags |= FAULT_FLAG_WRITE;
- }
-
- if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
- access_flags |= VM_EXEC;
- fault_flags |= FAULT_FLAG_INSTRUCTION;
- }
-
- if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
- fault_flags |= FAULT_FLAG_USER;
-
- if (access_flags & ~vma->vm_flags)
- /* Access fault */
- goto out_put_mm;
-
- ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
- status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
- IOMMU_PAGE_RESP_SUCCESS;
-
-out_put_mm:
- mmap_read_unlock(mm);
- mmput(mm);
-
- return status;
-}
-
static void iopf_handle_group(struct work_struct *work)
{
struct iopf_group *group;
@@ -134,12 +78,21 @@ static void iopf_handle_group(struct work_struct *work)
group = container_of(work, struct iopf_group, work);
list_for_each_entry_safe(iopf, next, &group->faults, list) {
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev_pasid(group->dev,
+ iopf->fault.prm.pasid);

Do we have a guarantee that the domain is not freed while we handle the
fault? We could prevent unbind() while there are pending faults on this
bond. But a refcount on SVA domains could defer freeing, and would also
help with keeping the semantics where bind() returns a single refcounted
bond for any {dev, mm}.

Given that this path is full of circular locking pitfalls, and to keep the
fault handler efficient (well, at least not make it worse), we should
probably keep a getter like iommu_sva_find() that does not require
locking.

Agreed. We need a mechanism to ensure concurrency. I will look into it.


+
+ if (!domain || !domain->fault_handler)
+ status = IOMMU_PAGE_RESP_INVALID;
+
/*
* For the moment, errors are sticky: don't handle subsequent
* faults in the group if there is an error.
*/
if (status == IOMMU_PAGE_RESP_SUCCESS)
- status = iopf_handle_single(iopf);
+ status = domain->fault_handler(&iopf->fault,
+ domain->fault_data);

If we make this a direct call and only use a light getter for the
PASID->mm lookup we don't need to look at the domain at all. Or are you
planning to add external fault handlers?

Yes. I'd like the I/O page fault handling framework to work for
external domains as well, for example, the I/O page faults for user
space page table should be routed to user space.


if (!(iopf->fault.prm.flags &
IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 47cf98e661ff..01fa8096bd02 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -87,6 +87,63 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev)
return domain;
}
+static enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+ vm_fault_t ret;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned int access_flags = 0;
+ struct iommu_domain *domain = data;
+ unsigned int fault_flags = FAULT_FLAG_REMOTE;
+ struct iommu_fault_page_request *prm = &fault->prm;
+ enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+ if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+ return status;
+
+ mm = domain->sva_cookie;
+ if (IS_ERR_OR_NULL(mm))
+ return status;
+
+ mmap_read_lock(mm);
+
+ vma = find_extend_vma(mm, prm->addr);
+ if (!vma)
+ /* Unmapped area */
+ goto out_put_mm;
+
+ if (prm->perm & IOMMU_FAULT_PERM_READ)
+ access_flags |= VM_READ;
+
+ if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+ access_flags |= VM_WRITE;
+ fault_flags |= FAULT_FLAG_WRITE;
+ }
+
+ if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+ access_flags |= VM_EXEC;
+ fault_flags |= FAULT_FLAG_INSTRUCTION;
+ }
+
+ if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+ fault_flags |= FAULT_FLAG_USER;
+
+ if (access_flags & ~vma->vm_flags)
+ /* Access fault */
+ goto out_put_mm;
+
+ ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
+ status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+ IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+ mmap_read_unlock(mm);
+ mmput(mm);

mmget_not_zero() is missing since iommu_sva_find() is gone. I'm guessing
we still need it in case the process dies

Agreed.


Thanks,
Jean

Best regards,
baolu