Re: [PATCH 1/2] drm/etnaviv: Preallocate STLB according to CPU PAGE_SIZE
From: Lucas Stach
Date: Tue Dec 03 2024 - 13:04:31 EST
Am Freitag, dem 08.11.2024 um 22:36 +0800 schrieb Sui Jingfeng:
> The dma_direct_alloc() allocate one page at minmium, which size is the CPU
> PAGE_SIZE. while the etnaviv_iommuv2_ensure_stlb() only ask for 4KiB. The
> rest memory space that beyond 4KiB gets wasted on bigger page size systems.
> For example, on 16KiB CPU page size systems, we will waste the rest 12KiB.
> On 64KiB CPU page size systems, we will waste the rest 60KiB.
>
> Since addresses within one page are always contiguous, the rest memory can
> be used to store adjacent slave TLB entries. Then, when the neighbourhoods
> TLB is being hit on the next time, we don't have to ask another one page
> from the system. Saving both memorys and times overhead because of that.
>
While this isn't adding a lot of code to etnaviv, I wonder if this
couldn't be handled by using a dma_pool for the pagetable allocations.
Regards,
Lucas
> Signed-off-by: Sui Jingfeng <sui.jingfeng@xxxxxxxxx>
> ---
> drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c | 64 +++++++++++++++++++---
> 1 file changed, 56 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> index d664ae29ae20..fa6eed1ae1be 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> @@ -44,19 +44,66 @@ to_v2_context(struct etnaviv_iommu_context *context)
> return container_of(context, struct etnaviv_iommuv2_context, base);
> }
>
> +static int etnaviv_iommuv2_stlb_free(struct etnaviv_iommuv2_context *context)
> +{
> + struct device *dev = context->base.global->dev;
> + unsigned int i;
> +
> + for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; ++i) {
> + u32 *vaddr = context->stlb_cpu[i];
> +
> + if (!vaddr)
> + continue;
> +
> + context->stlb_cpu[i] = NULL;
> +
> + if (i % (PAGE_SIZE / SZ_4K))
> + continue;
> +
> + dma_free_wc(dev, PAGE_SIZE, vaddr, context->stlb_dma[i]);
> + }
> +
> + return 0;
> +}
> +
> +static int
> +etnaviv_iommuv2_ensure_stlb_new(struct etnaviv_iommuv2_context *context,
> + unsigned int stlb)
> +{
> + struct device *dev = context->base.global->dev;
> + void *vaddr;
> + dma_addr_t daddr;
> + unsigned int i;
> +
> + if (context->stlb_cpu[stlb])
> + return 0;
> +
> + vaddr = dma_alloc_wc(dev, PAGE_SIZE, &daddr, GFP_KERNEL);
> + if (!vaddr)
> + return -ENOMEM;
> +
> + memset32(vaddr, MMUv2_PTE_EXCEPTION, PAGE_SIZE / sizeof(u32));
> +
> + stlb &= ~(PAGE_SIZE / SZ_4K - 1);
> +
> + for (i = 0; i < PAGE_SIZE / SZ_4K; ++i) {
> + context->stlb_cpu[stlb + i] = vaddr;
> + context->stlb_dma[stlb + i] = daddr;
> + context->mtlb_cpu[stlb + i] = daddr | MMUv2_PTE_PRESENT;
> + vaddr += SZ_4K;
> + daddr += SZ_4K;
> + }
> +
> + return 0;
> +}
> +
> static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context)
> {
> struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
> - int i;
>
> drm_mm_takedown(&context->mm);
>
> - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
> - if (v2_context->stlb_cpu[i])
> - dma_free_wc(context->global->dev, SZ_4K,
> - v2_context->stlb_cpu[i],
> - v2_context->stlb_dma[i]);
> - }
> + etnaviv_iommuv2_stlb_free(v2_context);
>
> dma_free_wc(context->global->dev, SZ_4K, v2_context->mtlb_cpu,
> v2_context->mtlb_dma);
> @@ -65,6 +112,7 @@ static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context)
>
> vfree(v2_context);
> }
> +
> static int
> etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_context *v2_context,
> int stlb)
> @@ -109,7 +157,7 @@ static int etnaviv_iommuv2_map(struct etnaviv_iommu_context *context,
> mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT;
> stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT;
>
> - ret = etnaviv_iommuv2_ensure_stlb(v2_context, mtlb_entry);
> + ret = etnaviv_iommuv2_ensure_stlb_new(v2_context, mtlb_entry);
> if (ret)
> return ret;
>