[PATCH] drm/amdgpu:fix IH overflow on Iceland

From: Defang Bo
Date: Tue Jan 05 2021 - 04:17:19 EST


Similar to commit <b82175750131>("drm/amdgpu: fix IH overflow on Vega10 v2")
When an ring buffer overflow happens the appropriate bit is set in the WPTR
register which is also written back to memory. But clearing the bit in the
WPTR doesn't trigger another memory writeback.

So what can happen is that we end up processing the buffer overflow over and
over again because the bit is never cleared. Resulting in a random system
lockup because of an infinite loop in an interrupt handler.

Signed-off-by: Defang Bo <bodefang@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 37 +++++++++++++++++++++------------
1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index a13dd9a51149..d90f9000a445 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -193,19 +193,30 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,

wptr = le32_to_cpu(*ih->wptr_cpu);

- if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
- /* When a ring buffer overflow happen start parsing interrupt
- * from the last not overwritten vector (wptr + 16). Hopefully
- * this should allow us to catchup.
- */
- dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
- ih->rptr = (wptr + 16) & ih->ptr_mask;
- tmp = RREG32(mmIH_RB_CNTL);
- tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
- WREG32(mmIH_RB_CNTL, tmp);
- }
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ wptr = RREG32(mmIH_RB_CNTL);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+
+
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 16). Hopefully
+ * this should allow us to catchup.
+ */
+ dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+ ih->rptr = (wptr + 16) & ih->ptr_mask;
+ tmp = RREG32(mmIH_RB_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32(mmIH_RB_CNTL, tmp);
+
+
+out:
return (wptr & ih->ptr_mask);
}

--
2.7.4