On Thu, Sep 05, 2024 at 04:51:22PM +0200, Antonino Maniscalco wrote:...
This patch implements preemption feature for A6xx targets, this allows
the GPU to switch to a higher priority ringbuffer if one is ready. A6XX
hardware as such supports multiple levels of preemption granularities,
ranging from coarse grained(ringbuffer level) to a more fine grained
such as draw-call level or a bin boundary level preemption. This patch
enables the basic preemption level, with more fine grained preemption
support to follow.
Signed-off-by: Sharat Masetty <smasetty@xxxxxxxxxxxxxx>
Signed-off-by: Antonino Maniscalco <antomani103@xxxxxxxxx>
Tested-by: Neil Armstrong <neil.armstrong@xxxxxxxxxx> # on SM8650-QRD
---
drivers/gpu/drm/msm/Makefile | 1 +
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 293 +++++++++++++++++++++-
drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 161 ++++++++++++
we can use the lighter smp variant here.
+
+ if (a6xx_gpu->cur_ring == ring)
+ gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+ else
+ ring->skip_inline_wptr = true;
+ } else {
+ ring->skip_inline_wptr = true;
+ }
+
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
}
static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
@@ -138,12 +231,14 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
set_pagetable checks "cur_ctx_seqno" to see if pt switch is needed or
not. This is currently not tracked separately for each ring. Can you
please check that?
I wonder why that didn't cause any gpu errors in testing. Not sure if I
am missing something.
.../*
* Write the new TTBR0 to the memstore. This is good for debugging.
+ * Needed for preemption
*/
- OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_PKT7(ring, CP_MEM_WRITE, 5);
OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
OUT_RING(ring, lower_32_bits(ttbr));
- OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
+ OUT_RING(ring, upper_32_bits(ttbr));
+ OUT_RING(ring, ctx->seqno);
/*
* Sync both threads after switching pagetables and enable BR only
@@ -268,6 +363,43 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
a6xx_flush(gpu, ring);
}
+ struct a6xx_preempt_record *record_ptr =
+ a6xx_gpu->preempt[ring->id] + PREEMPT_OFFSET_PRIV_NON_SECURE;
+ u64 ttbr0 = ring->memptrs->ttbr0;
+ u32 context_idr = ring->memptrs->context_idr;
+
+ smmu_info_ptr->ttbr0 = ttbr0;
+ smmu_info_ptr->context_idr = context_idr;
+ record_ptr->wptr = get_wptr(ring);
+
+ /*
+ * The GPU will write the wptr we set above when we preempt. Reset
+ * skip_inline_wptr to make sure that we don't write WPTR to the same
+ * thing twice. It's still possible subsequent submissions will update
+ * wptr again, in which case they will set the flag to true. This has
+ * to be protected by the lock for setting the flag and updating wptr
+ * to be atomic.
+ */
+ ring->skip_inline_wptr = false;
+
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ gpu_write64(gpu,
+ REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
+ a6xx_gpu->preempt_iova[ring->id] + PREEMPT_OFFSET_SMMU_INFO);
+
+ gpu_write64(gpu,
+ REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
+ a6xx_gpu->preempt_iova[ring->id] + PREEMPT_OFFSET_PRIV_NON_SECURE);
+
+ preempt_offset_priv_secure =
+ PREEMPT_OFFSET_PRIV_SECURE(adreno_gpu->info->preempt_record_size);
+ gpu_write64(gpu,
+ REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR,
+ a6xx_gpu->preempt_iova[ring->id] + preempt_offset_priv_secure);
Secure buffers are not supported currently, so we can skip this and the
context record allocation. Anyway this has to be a separate buffer
mapped in secure pagetable which don't currently have. We can skip the
same in pseudo register packet too.
...+
+ a6xx_gpu->next_ring = ring;
+
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
--
2.46.0