[PATCH AUTOSEL 6.6 19/23] drm/amdgpu: grab an additional reference on the gang fence v2

From: Sasha Levin
Date: Thu Apr 03 2025 - 16:42:27 EST


From: Christian König <christian.koenig@xxxxxxx>

[ Upstream commit 0d9a95099dcb05b5f4719c830d15bf4fdcad0dc2 ]

We keep the gang submission fence around in adev, make sure that it
stays alive.

v2: fix memory leak on retry

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 45dd6cbad81e7..10f5a3d0f5916 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6015,6 +6015,7 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
{
struct dma_fence *old = NULL;

+ dma_fence_get(gang);
do {
dma_fence_put(old);
rcu_read_lock();
@@ -6024,12 +6025,19 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
if (old == gang)
break;

- if (!dma_fence_is_signaled(old))
+ if (!dma_fence_is_signaled(old)) {
+ dma_fence_put(gang);
return old;
+ }

} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
old, gang) != old);

+ /*
+ * Drop it once for the exchanged reference in adev and once for the
+ * thread local reference acquired in amdgpu_device_get_gang().
+ */
+ dma_fence_put(old);
dma_fence_put(old);
return NULL;
}
--
2.39.5