Re: [PATCH v1] drm/amdgpu: fix root reservation in amdgpu_vm_handle_fault
From: Christian König
Date: Tue Apr 14 2026 - 09:31:05 EST
On 4/14/26 14:25, Pierre-Eric Pelloux-Prayer wrote:
> svm_range_restore_pages might reserve the root bo so it must
> be called after unreserving it.
>
> The code checking that the VM still exists can be moved in the
> "if" block, since the VM can only be removed when the root bo
> is not reserved.
That won't work like this. Dropping and reacquiring the root BO lock is a pretty big nono.
I think we need to fix svm_range_restore_pages() instead.
Regards,
Christian.
>
> Fixes: 32b486e8541c ("drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault")
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 35 +++++++++++---------------
> 1 file changed, 15 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 63156289ae7f..d86be0108913 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2975,25 +2975,12 @@ struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
> return NULL;
>
> r = amdgpu_bo_reserve(*root, true);
> - if (r)
> - goto error_unref;
> -
> - /* Double check that the VM still exists */
> - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> - vm = xa_load(&adev->vm_manager.pasids, pasid);
> - if (vm && vm->root.bo != *root)
> - vm = NULL;
> - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> - if (!vm)
> - goto error_unlock;
> + if (r) {
> + amdgpu_bo_unref(root);
> + return NULL;
> + }
>
> return vm;
> -error_unlock:
> - amdgpu_bo_unreserve(*root);
> -
> -error_unref:
> - amdgpu_bo_unref(root);
> - return NULL;
> }
>
> /**
> @@ -3026,11 +3013,19 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>
> is_compute_context = vm->is_compute_context;
>
> - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
> - node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
> + if (is_compute_context) {
> + /* Unreserve root since svm_range_restore_pages might try to reserve it. */
> amdgpu_bo_unreserve(root);
> amdgpu_bo_unref(&root);
> - return true;
> +
> + if (!svm_range_restore_pages(adev, pasid, vmid,
> + node_id, addr >> PAGE_SHIFT, ts, write_fault))
> + return true;
> +
> + /* Double check that the VM still exists. */
> + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
> + if (!vm)
> + return false;
> }
>
> addr /= AMDGPU_GPU_PAGE_SIZE;