Re: [RFC PATCH v1 15/15] drm/msm/a5xx: Support per-instance pagetables
From: Rob Clark
Date: Fri Mar 01 2019 - 15:24:50 EST
On Fri, Mar 1, 2019 at 2:39 PM Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> wrote:
>
> Add support for per-instance pagetables for 5XX targets. Create a support
> buffer for preemption to hold the SMMU pagetable information for a
> preempted ring, enable TTBR1 to support split pagetables and add the
> necessary PM4 commands to trigger a pagetable switch at the beginning
> of a user command.
>
> Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx>
> ---
>
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 120 +++++++++++++++++++++++++++++-
> drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 19 +++++
> drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 70 +++++++++++++----
> 3 files changed, 192 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index 3d6f414..9294fd5 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -202,6 +202,59 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit
> msm_gpu_retire(gpu);
> }
>
> +static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
> + struct msm_file_private *ctx)
> +{
> + u64 ttbr;
> + u32 asid;
> +
> + if (!msm_iommu_get_ptinfo(ctx->aspace->mmu, &ttbr, &asid))
> + return;
> +
> + ttbr = ttbr | ((u64) asid) << 48;
> +
> + /* Turn off protected mode */
> + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
> + OUT_RING(ring, 0);
> +
> + /* Turn on APIV mode to access critical regions */
> + OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
> + OUT_RING(ring, 1);
> +
> + /* Make sure the ME is synchronized before staring the update */
> + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
> +
> + /* Execute the table update */
> + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3);
> + OUT_RING(ring, lower_32_bits(ttbr));
> + OUT_RING(ring, upper_32_bits(ttbr));
> + OUT_RING(ring, 0);
> +
> + /*
> + * Write the new TTBR0 to the preemption records - this will be used to
> + * reload the pagetable if the current ring gets preempted out.
> + */
> + OUT_PKT7(ring, CP_MEM_WRITE, 4);
> + OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
> + OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
> + OUT_RING(ring, lower_32_bits(ttbr));
> + OUT_RING(ring, upper_32_bits(ttbr));
> +
> + /* Invalidate the draw state so we start off fresh */
> + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
> + OUT_RING(ring, 0x40000);
CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS?
> + OUT_RING(ring, 1);
was that really meant to be 1, not 0, for the low 32b of the state
object address?
BR,
-R
> + OUT_RING(ring, 0);
> +
> + /* Turn off APRIV */
> + OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
> + OUT_RING(ring, 0);
> +
> + /* Turn off protected mode */
> + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
> + OUT_RING(ring, 1);
> +}
> +
> static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> struct msm_file_private *ctx)
> {
> @@ -217,6 +270,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> return;
> }
>
> + a5xx_set_pagetable(gpu, ring, ctx);
> +
> OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
> OUT_RING(ring, 0x02);
>
> @@ -1456,21 +1511,77 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
> return (unsigned long)busy_time;
> }
>
> +static struct msm_gem_address_space *a5xx_new_address_space(struct msm_gpu *gpu)
> +{
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
> + struct msm_gem_address_space *aspace;
> + int ret;
> +
> + /* Return the default pagetable if per instance tables don't work */
> + if (!a5xx_gpu->per_instance_tables)
> + return gpu->aspace;
> +
> + aspace = msm_gem_address_space_create_instance(&gpu->pdev->dev,
> + "gpu", 0x100000000ULL, 0x1ffffffffULL);
> + if (IS_ERR(aspace))
> + return aspace;
> +
> + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
> + if (ret) {
> + /* -ENODEV means that aux domains aren't supported */
> + if (ret == -ENODEV)
> + return gpu->aspace;
> +
> + return ERR_PTR(ret);
> + }
> +
> + return aspace;
> +}
> +
> static struct msm_gem_address_space *
> a5xx_create_address_space(struct msm_gpu *gpu)
> {
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
> + struct device *dev = &gpu->pdev->dev;
> struct msm_gem_address_space *aspace;
> struct iommu_domain *iommu;
> - int ret;
> + int ret, val = 1;
> +
> + a5xx_gpu->per_instance_tables = false;
>
> iommu = iommu_domain_alloc(&platform_bus_type);
> if (!iommu)
> return NULL;
>
> - iommu->geometry.aperture_start = 0x100000000ULL;
> - iommu->geometry.aperture_end = 0x1ffffffffULL;
> + /* Try to enable split pagetables */
> + if (iommu_domain_set_attr(iommu, DOMAIN_ATTR_SPLIT_TABLES, &val)) {
> + /*
> + * If split pagetables aren't available we won't be able to do
> + * per-instance pagetables so set up the global va space at our
> + * susual location
> + */
> + iommu->geometry.aperture_start = 0x100000000ULL;
> + iommu->geometry.aperture_end = 0x1ffffffffULL;
> + } else {
> + /*
> + * If split pagetables are available then we might be able to do
> + * per-instance pagetables. Put the default va-space in TTBR1 to
> + * prepare
> + */
> + iommu->geometry.aperture_start = 0xfffffff100000000ULL;
> + iommu->geometry.aperture_end = 0xfffffff1ffffffffULL;
> +
> + /*
> + * If both split pagetables and aux domains are supported we can
> + * do per_instance pagetables
> + */
> + a5xx_gpu->per_instance_tables =
> + iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX);
> + }
>
> - aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu");
> + aspace = msm_gem_address_space_create(dev, iommu, "gpu");
> if (IS_ERR(aspace)) {
> iommu_domain_free(iommu);
> DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n",
> @@ -1510,6 +1621,7 @@ static const struct adreno_gpu_funcs funcs = {
> .gpu_state_get = a5xx_gpu_state_get,
> .gpu_state_put = a5xx_gpu_state_put,
> .create_address_space = a5xx_create_address_space,
> + .new_address_space = a5xx_new_address_space,
> },
> .get_timestamp = a5xx_get_timestamp,
> };
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
> index 7d71860..82ceb9b 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
> @@ -45,6 +45,11 @@ struct a5xx_gpu {
>
> atomic_t preempt_state;
> struct timer_list preempt_timer;
> + struct a5xx_smmu_info *smmu_info;
> + struct drm_gem_object *smmu_info_bo;
> + uint64_t smmu_info_iova;
> +
> + bool per_instance_tables;
> };
>
> #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
> @@ -132,6 +137,20 @@ struct a5xx_preempt_record {
> */
> #define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
>
> +/*
> + * This is a global structure that the preemption code uses to switch in the
> + * pagetable for the preempted process - the code switches in whatever we
> + * after preempting in a new ring.
> + */
> +struct a5xx_smmu_info {
> + uint32_t magic;
> + uint32_t _pad4;
> + uint64_t ttbr0;
> + uint32_t asid;
> + uint32_t contextidr;
> +};
> +
> +#define A5XX_SMMU_INFO_MAGIC 0x3618CDA3UL
>
> int a5xx_power_init(struct msm_gpu *gpu);
> void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
> index 3d62310..1050409 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
> @@ -12,6 +12,7 @@
> */
>
> #include "msm_gem.h"
> +#include "msm_mmu.h"
> #include "a5xx_gpu.h"
>
> /*
> @@ -145,6 +146,15 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
> a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
> spin_unlock_irqrestore(&ring->lock, flags);
>
> + /* Do read barrier to make sure we have updated pagetable info */
> + rmb();
> +
> + /* Set the SMMU info for the preemption */
> + if (a5xx_gpu->smmu_info) {
> + a5xx_gpu->smmu_info->ttbr0 = ring->memptrs->ttbr0;
> + a5xx_gpu->smmu_info->contextidr = 0;
> + }
> +
> /* Set the address of the incoming preemption record */
> gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
> REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
> @@ -221,9 +231,10 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
> a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova;
> }
>
> - /* Write a 0 to signal that we aren't switching pagetables */
> + /* Tell the CP where to find the smmu_info buffer*/
> gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
> - REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
> + REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
> + a5xx_gpu->smmu_info_iova);
>
> /* Reset the preemption state */
> set_preempt_state(a5xx_gpu, PREEMPT_NONE);
> @@ -271,6 +282,34 @@ void a5xx_preempt_fini(struct msm_gpu *gpu)
>
> for (i = 0; i < gpu->nr_rings; i++)
> msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true);
> +
> + msm_gem_kernel_put(a5xx_gpu->smmu_info_bo, gpu->aspace, true);
> +}
> +
> +static int a5xx_smmu_info_init(struct msm_gpu *gpu)
> +{
> + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
> + struct a5xx_smmu_info *ptr;
> + struct drm_gem_object *bo;
> + u64 iova;
> +
> + if (!a5xx_gpu->per_instance_tables)
> + return 0;
> +
> + ptr = msm_gem_kernel_new(gpu->dev, sizeof(struct a5xx_smmu_info),
> + MSM_BO_UNCACHED, gpu->aspace, &bo, &iova);
> +
> + if (IS_ERR(ptr))
> + return PTR_ERR(ptr);
> +
> + ptr->magic = A5XX_SMMU_INFO_MAGIC;
> +
> + a5xx_gpu->smmu_info_bo = bo;
> + a5xx_gpu->smmu_info_iova = iova;
> + a5xx_gpu->smmu_info = ptr;
> +
> + return 0;
> }
>
> void a5xx_preempt_init(struct msm_gpu *gpu)
> @@ -284,17 +323,22 @@ void a5xx_preempt_init(struct msm_gpu *gpu)
> return;
>
> for (i = 0; i < gpu->nr_rings; i++) {
> - if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) {
> - /*
> - * On any failure our adventure is over. Clean up and
> - * set nr_rings to 1 to force preemption off
> - */
> - a5xx_preempt_fini(gpu);
> - gpu->nr_rings = 1;
> -
> - return;
> - }
> + if (preempt_init_ring(a5xx_gpu, gpu->rb[i]))
> + goto fail;
> }
>
> - timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, 0);
> + if (a5xx_smmu_info_init(gpu))
> + goto fail;
> +
> + timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer,
> + (unsigned long) a5xx_gpu);
> +
> + return;
> +fail:
> + /*
> + * On any failure our adventure is over. Clean up and
> + * set nr_rings to 1 to force preemption off
> + */
> + a5xx_preempt_fini(gpu);
> + gpu->nr_rings = 1;
> }
> --
> 2.7.4
>