[RFC 21/24] drm/msm: Add PRR support

From: Rob Clark
Date: Sat Dec 07 2024 - 11:21:53 EST


From: Rob Clark <robdclark@xxxxxxxxxxxx>

Add PRR (Partial Resident Region) is a bypass address which make GPU
writes go to /dev/null and reads return zero. This is used to implement
vulkan sparse residency.

To support PRR/NULL mappings, we allocate a page to reserve a physical
address which we know will not be used as part of a GEM object, and
configure the SMMU to use this address for PRR/NULL mappings.

Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx>
---
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++
drivers/gpu/drm/msm/msm_iommu.c | 62 ++++++++++++++++++++++++-
include/uapi/drm/msm_drm.h | 2 +
3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 90848852ee50..140b4e54bc96 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -308,6 +308,13 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags,
return 0;
}

+static bool
+adreno_smmu_has_prr(struct msm_gpu *gpu)
+{
+ struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev);
+ return adreno_smmu && adreno_smmu->set_prr_addr;
+}
+
int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx,
uint32_t param, uint64_t *value, uint32_t *len)
{
@@ -392,6 +399,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx,
case MSM_PARAM_UCHE_TRAP_BASE:
*value = adreno_gpu->uche_trap_base;
return 0;
+ case MSM_PARAM_HAS_PRR:
+ *value = adreno_smmu_has_prr(gpu);
+ return 0;
default:
return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param);
}
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 41cb629e25f3..bb65be95f7db 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -13,6 +13,7 @@ struct msm_iommu {
struct msm_mmu base;
struct iommu_domain *domain;
atomic_t pagetables;
+ struct page *prr_page;
};

#define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
@@ -112,6 +113,36 @@ static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
return (size == 0) ? 0 : -EINVAL;
}

+static int msm_iommu_pagetable_map_prr(struct msm_mmu *mmu, u64 iova, size_t len, int prot)
+{
+ struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
+ struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
+ struct msm_iommu *iommu = to_msm_iommu(pagetable->parent);
+ phys_addr_t phys = page_to_phys(iommu->prr_page);
+ u64 addr = iova;
+
+ while (len) {
+ size_t mapped = 0;
+ size_t size = PAGE_SIZE;
+ int ret;
+
+ ret = ops->map_pages(ops, addr, phys, size, 1, prot, GFP_KERNEL, &mapped);
+
+ /* map_pages could fail after mapping some of the pages,
+ * so update the counters before error handling.
+ */
+ addr += mapped;
+ len -= mapped;
+
+ if (ret) {
+ msm_iommu_pagetable_unmap(mmu, iova, addr - iova);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
struct sg_table *sgt, size_t off, size_t len,
int prot)
@@ -122,6 +153,9 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
u64 addr = iova;
unsigned int i;

+ if (!sgt)
+ return msm_iommu_pagetable_map_prr(mmu, iova, len, prot);
+
for_each_sgtable_sg(sgt, sg, i) {
size_t size = sg->length;
phys_addr_t phys = sg_phys(sg);
@@ -177,9 +211,16 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu)
* If this is the last attached pagetable for the parent,
* disable TTBR0 in the arm-smmu driver
*/
- if (atomic_dec_return(&iommu->pagetables) == 0)
+ if (atomic_dec_return(&iommu->pagetables) == 0) {
adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL);

+ if (adreno_smmu->set_prr_bit) {
+ adreno_smmu->set_prr_bit(adreno_smmu->cookie, false);
+ __free_page(iommu->prr_page);
+ iommu->prr_page = NULL;
+ }
+ }
+
free_io_pgtable_ops(pagetable->pgtbl_ops);
kfree(pagetable);
}
@@ -314,6 +355,25 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
kfree(pagetable);
return ERR_PTR(ret);
}
+
+ BUG_ON(iommu->prr_page);
+ if (adreno_smmu->set_prr_bit) {
+ /*
+ * We need a zero'd page for two reasons:
+ *
+ * 1) Reserve a known physical address to use when
+ * mapping NULL / sparsely resident regions
+ * 2) Read back zero
+ *
+ * It appears the hw drops writes to the PRR region
+ * on the floor, but reads actually return whatever
+ * is in the PRR page.
+ */
+ iommu->prr_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ adreno_smmu->set_prr_addr(adreno_smmu->cookie,
+ page_to_phys(iommu->prr_page));
+ adreno_smmu->set_prr_bit(adreno_smmu->cookie, true);
+ }
}

/* Needed later for TLB flush */
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a7e48ee1dd95..48bc0374e2ae 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -115,6 +115,8 @@ struct drm_msm_timespec {
* ioctl will throw -EPIPE.
*/
#define MSM_PARAM_EN_VM_BIND 0x15 /* WO, once */
+/* PRR (Partially Resident Region) is required for sparse residency: */
+#define MSM_PARAM_HAS_PRR 0x16 /* RO */

/* For backwards compat. The original support for preemption was based on
* a single ring per priority level so # of priority levels equals the #
--
2.47.1