[PATCH 2/2] drm/amdgpu: Create an option to disable soft recovery
From: André Almeida
Date: Thu Aug 24 2023 - 12:26:15 EST
Create a module option to disable soft recoveries on amdgpu, making
every recovery go through the device reset path. This option makes
easier to force device resets for testing and debugging purposes.
Signed-off-by: André Almeida <andrealmeid@xxxxxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 6 +++++-
drivers/gpu/drm/amd/include/amd_shared.h | 1 +
4 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4de074243c4d..8f4a93890345 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -189,6 +189,7 @@ extern uint amdgpu_force_long_training;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
+extern bool amdgpu_soft_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d53e4097acc0..7d6c39b547cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -163,6 +163,7 @@ uint amdgpu_force_long_training;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
+bool amdgpu_soft_recovery = true;
int amdgpu_emu_mode;
uint amdgpu_smu_memory_pool_size;
int amdgpu_smu_pptable_id = -1;
@@ -2891,6 +2892,11 @@ static void amdgpu_init_debug_options(void)
pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
debug_largebar = true;
}
+
+ if (amdgpu_debug_mask & DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ amdgpu_soft_recovery = false;
+ }
}
static int __init amdgpu_init(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 80d6e132e409..40678d9fb17e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
unsigned long flags;
+ ktime_t deadline;
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ if (!amdgpu_soft_recovery)
+ return false;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 6fa644c249a5..afcbacce0a13 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -264,6 +264,7 @@ enum AMDGPU_DEBUG_MASK {
DEBUG_VERBOSE_EVICTIONS = (1 << 0), // 0x1
DEBUG_VM = (1 << 1), // 0x2
DEBUG_LARGEBAR = (1 << 2), // 0x4
+ DEBUG_DISABLE_GPU_SOFT_RECOVERY = (1 << 3), // 0x8
};
/**
--
2.41.0