[PATCH 5.12 090/242] drm/amdkfd: fix circular locking on get_wave_state

From: Greg Kroah-Hartman
Date: Thu Jul 15 2021 - 15:08:03 EST


From: Jonathan Kim <jonathan.kim@xxxxxxx>

[ Upstream commit 63f6e01237257e7226efc5087f3f0b525d320f54 ]

get_wave_state acquires the mmap_lock on copy_to_user but so do
mmu_notifiers. mmu_notifiers allows dqm locking so do get_wave_state
outside the dqm_lock to prevent circular locking.

v2: squash in unused variable removal.

Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx>
Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 28 +++++++++----------
1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a4266c4bca13..df05eca73275 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1677,29 +1677,27 @@ static int get_wave_state(struct device_queue_manager *dqm,
u32 *save_area_used_size)
{
struct mqd_manager *mqd_mgr;
- int r;

dqm_lock(dqm);

- if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.is_active || !q->device->cwsr_enabled) {
- r = -EINVAL;
- goto dqm_unlock;
- }
-
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];

- if (!mqd_mgr->get_wave_state) {
- r = -EINVAL;
- goto dqm_unlock;
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.is_active || !q->device->cwsr_enabled ||
+ !mqd_mgr->get_wave_state) {
+ dqm_unlock(dqm);
+ return -EINVAL;
}

- r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
- ctl_stack_used_size, save_area_used_size);
-
-dqm_unlock:
dqm_unlock(dqm);
- return r;
+
+ /*
+ * get_wave_state is outside the dqm lock to prevent circular locking
+ * and the queue should be protected against destruction by the process
+ * lock.
+ */
+ return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
+ ctl_stack_used_size, save_area_used_size);
}

static int process_termination_cpsch(struct device_queue_manager *dqm,
--
2.30.2