Re: [PATCH v1 4/6] drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault

From: Alex Deucher

Date: Tue Feb 17 2026 - 11:22:43 EST


On Wed, Feb 11, 2026 at 5:37 AM Pierre-Eric Pelloux-Prayer
<pierre-eric.pelloux-prayer@xxxxxxx> wrote:
>
> This is tricky to implement right and we're going to need
> it from the devcoredump.
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx>

Acked-by: Alex Deucher <alexander.deucher@xxxxxxx>

> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 84 +++++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +
> 2 files changed, 57 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 807f8bcc7de5..6a5b3e148554 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2930,6 +2930,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> return 0;
> }
>
> +/**
> + * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
> + * @adev: amdgpu device pointer
> + * @root: root BO of the VM
> + * @pasid: PASID of the VM
> + * The caller needs to unreserve and unref the root bo on success.
> + */
> +struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
> + struct amdgpu_bo **root, u32 pasid)
> +{
> + unsigned long irqflags;
> + struct amdgpu_vm *vm;
> + int r;
> +
> + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> + vm = xa_load(&adev->vm_manager.pasids, pasid);
> + *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
> + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> +
> + if (!*root)
> + return NULL;
> +
> + r = amdgpu_bo_reserve(*root, true);
> + if (r)
> + goto error_unref;
> +
> + /* Double check that the VM still exists */
> + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> + vm = xa_load(&adev->vm_manager.pasids, pasid);
> + if (vm && vm->root.bo != *root)
> + vm = NULL;
> + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> + if (!vm)
> + goto error_unlock;
> +
> + return vm;
> +error_unlock:
> + amdgpu_bo_unreserve(*root);
> +
> +error_unref:
> + amdgpu_bo_unref(root);
> + return NULL;
> +}
> +
> /**
> * amdgpu_vm_handle_fault - graceful handling of VM faults.
> * @adev: amdgpu device pointer
> @@ -2945,50 +2989,31 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> * shouldn't be reported any more.
> */
> bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> - u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
> - bool write_fault)
> + u32 vmid, u32 node_id, uint64_t addr,
> + uint64_t ts, bool write_fault)
> {
> bool is_compute_context = false;
> struct amdgpu_bo *root;
> - unsigned long irqflags;
> uint64_t value, flags;
> struct amdgpu_vm *vm;
> int r;
>
> - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> - vm = xa_load(&adev->vm_manager.pasids, pasid);
> - if (vm) {
> - root = amdgpu_bo_ref(vm->root.bo);
> - is_compute_context = vm->is_compute_context;
> - } else {
> - root = NULL;
> - }
> - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> -
> - if (!root)
> + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
> + if (!vm)
> return false;
>
> + is_compute_context = vm->is_compute_context;
> +
> addr /= AMDGPU_GPU_PAGE_SIZE;
>
> - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
> - node_id, addr, ts, write_fault)) {
> + if (is_compute_context &&
> + !svm_range_restore_pages(adev, pasid, vmid, node_id, addr,
> + ts, write_fault)) {
> + amdgpu_bo_unreserve(root);
> amdgpu_bo_unref(&root);
> return true;
> }
>
> - r = amdgpu_bo_reserve(root, true);
> - if (r)
> - goto error_unref;
> -
> - /* Double check that the VM still exists */
> - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> - vm = xa_load(&adev->vm_manager.pasids, pasid);
> - if (vm && vm->root.bo != root)
> - vm = NULL;
> - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> - if (!vm)
> - goto error_unlock;
> -
> flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
> AMDGPU_PTE_SYSTEM;
>
> @@ -3027,7 +3052,6 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> if (r < 0)
> dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
>
> -error_unref:
> amdgpu_bo_unref(&root);
>
> return false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 139642eacdd0..2051eda55c99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -589,6 +589,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
> bool write_fault);
>
> +struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
> + struct amdgpu_bo **root, u32 pasid);
> +
> void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>
> void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> --
> 2.43.0
>