[PATCH 3/3] drm/panthor: Rreset device and load FW after failed PM suspend

From: Adrián Larumbe
Date: Fri Oct 11 2024 - 19:00:11 EST


On rk3588 SoCs, during a runtime PM suspend, the transition to the
lowest voltage/frequency pair might sometimes fail for reasons not yet
understood. In that case, even a slow FW reset will fail, leaving the
device's PM runtime status as unusuable.

When that happens, successive attempts to resume the device upon running
a job will always fail.

Fix it by forcing a synchronous device reset, which will lead to a
successful FW reload, and also reset the device's PM runtime error
status before resuming it.

Signed-off-by: Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx>
---
drivers/gpu/drm/panthor/panthor_device.c | 10 ++++++++++
drivers/gpu/drm/panthor/panthor_device.h | 2 ++
drivers/gpu/drm/panthor/panthor_sched.c | 7 +++++++
3 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index 5430557bd0b8..ec6fed5e996b 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -105,6 +105,16 @@ static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
destroy_workqueue(ptdev->reset.wq);
}

+int panthor_device_reset_sync(struct panthor_device *ptdev)
+{
+ panthor_fw_pre_reset(ptdev, false);
+ panthor_mmu_pre_reset(ptdev);
+ panthor_gpu_soft_reset(ptdev);
+ panthor_gpu_l2_power_on(ptdev);
+ panthor_mmu_post_reset(ptdev);
+ return panthor_fw_post_reset(ptdev);
+}
+
static void panthor_device_reset_work(struct work_struct *work)
{
struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 0e68f5a70d20..05a5a7233378 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -217,6 +217,8 @@ struct panthor_file {
int panthor_device_init(struct panthor_device *ptdev);
void panthor_device_unplug(struct panthor_device *ptdev);

+int panthor_device_reset_sync(struct panthor_device *ptdev);
+
/**
* panthor_device_schedule_reset() - Schedules a reset operation
*/
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index c7b350fc3eba..9a854c8c5718 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -3101,6 +3101,13 @@ queue_run_job(struct drm_sched_job *sched_job)
return dma_fence_get(job->done_fence);
}

+ if (ptdev->base.dev->power.runtime_error) {
+ ret = panthor_device_reset_sync(ptdev);
+ if (drm_WARN_ON(&ptdev->base, ret))
+ return ERR_PTR(ret);
+ drm_WARN_ON(&ptdev->base, pm_runtime_set_active(ptdev->base.dev));
+ }
+
ret = pm_runtime_resume_and_get(ptdev->base.dev);
if (drm_WARN_ON(&ptdev->base, ret))
return ERR_PTR(ret);
--
2.46.2