[PATCH 5.6 03/73] drm/amdgpu: invalidate L2 before SDMA IBs (v2)

From: Greg Kroah-Hartman
Date: Mon May 04 2020 - 14:06:11 EST


From: Marek OlÅÃk <marek.olsak@xxxxxxx>

commit fdf83646c0542ecfb9adc4db8f741a1f43dca058 upstream.

This fixes GPU hangs due to cache coherency issues.

v2: Split the version bump to a separate patch

Signed-off-by: Marek OlÅÃk <marek.olsak@xxxxxxx>
Reviewed-by: Christian KÃnig <christian.koenig@xxxxxxx>
Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h | 16 ++++++++++++++++
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++++-
2 files changed, 29 insertions(+), 1 deletion(-)

--- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -73,6 +73,22 @@
#define SDMA_OP_AQL_COPY 0
#define SDMA_OP_AQL_BARRIER_OR 0

+#define SDMA_GCR_RANGE_IS_PA (1 << 18)
+#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16)
+#define SDMA_GCR_GL2_WB (1 << 15)
+#define SDMA_GCR_GL2_INV (1 << 14)
+#define SDMA_GCR_GL2_DISCARD (1 << 13)
+#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11)
+#define SDMA_GCR_GL2_US (1 << 10)
+#define SDMA_GCR_GL1_INV (1 << 9)
+#define SDMA_GCR_GLV_INV (1 << 8)
+#define SDMA_GCR_GLK_INV (1 << 7)
+#define SDMA_GCR_GLK_WB (1 << 6)
+#define SDMA_GCR_GLM_INV (1 << 5)
+#define SDMA_GCR_GLM_WB (1 << 4)
+#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2)
+#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0)
+
/*define for op field*/
#define SDMA_PKT_HEADER_op_offset 0
#define SDMA_PKT_HEADER_op_mask 0x000000FF
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -382,6 +382,18 @@ static void sdma_v5_0_ring_emit_ib(struc
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);

+ /* Invalidate L2, because if we don't do it, we might get stale cache
+ * lines from previous IBs.
+ */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
+ SDMA_GCR_GL2_WB |
+ SDMA_GCR_GLM_INV |
+ SDMA_GCR_GLM_WB) << 16);
+ amdgpu_ring_write(ring, 0xffffff80);
+ amdgpu_ring_write(ring, 0xffff);
+
/* An IB packet must end on a 8 DW boundary--the next dword
* must be on a 8-dword boundary. Our IB packet below is 6
* dwords long, thus add x number of NOPs, such that, in
@@ -1597,7 +1609,7 @@ static const struct amdgpu_ring_funcs sd
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
- .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
+ .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
.emit_fence = sdma_v5_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,