[PATCH v5 1/2] drm/ttm: Drop tt->restore after successful restore

From: Matthew Brost

Date: Tue May 05 2026 - 16:04:57 EST


ttm_pool_restore_and_alloc() can successfully complete the restore
process via ttm_pool_restore_commit(), but tt->restore is not dropped
afterward. As a result, subsequent backup/restore flows observe what
appears to be a completed restore, while in reality shmem handles are
still installed in tt->pages, leading to the stack trace below.

Fix this by freeing and dropping tt->restore in
ttm_pool_restore_and_alloc() upon successful completion of the restore.

20545 [  309.784531] RIP: 0010:sg_alloc_append_table_from_pages+0x38c/0x490
20547 [  309.809570] RSP: 0018:ffffc9000623b838 EFLAGS: 00010206
20548 [  309.814827] RAX: 0000000000001000 RBX: ffff88816e42a160 RCX: 0000000000000000
20549 [  309.821986] RDX: 0000000000002000 RSI: 0000000000000003 RDI: 0000000000001000
20550 [  309.829147] RBP: ffff88816e42a168 R08: 0000000000000002 R09: 000000007ffff000
20551 [  309.836310] R10: ffffc9000623b928 R11: 0000000000000000 R12: 000000007ffff000
20552 [  309.843471] R13: ffff88815ba5a100 R14: 0000000000000000 R15: 0000000000000001
20553 [  309.850634] FS:  00007f9ff305e700(0000) GS:ffff888276c94000(0000) knlGS:0000000000000000
20554 [  309.858749] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
20555 [  309.864519] CR2: 00007f9fca701000 CR3: 00000001565e2005 CR4: 0000000008f70ef0
20556 [  309.871678] PKRU: 55555558
20557 [  309.874403] Call Trace:
20558 [  309.876866]  <TASK>
20559 [  309.878988]  sg_alloc_table_from_pages_segment+0x60/0x100
20560 [  309.884415]  ? ttm_resource_manager_usage+0x36/0x60 [ttm]
20561 [  309.889845]  ? xe_tt_map_sg+0x7d/0xd0 [xe]
20562 [  309.894045]  xe_tt_map_sg+0x7d/0xd0 [xe]
20563 [  309.898037]  xe_bo_move+0x927/0xaa0 [xe]
20564 [  309.902029]  ttm_bo_handle_move_mem+0xba/0x170 [ttm]
20565 [  309.907022]  ttm_bo_validate+0xbe/0x190 [ttm]
20566 [  309.911405]  xe_bo_validate+0x9a/0x120 [xe]
20567 [  309.915663]  xe_gpuvm_validate+0xd9/0x140 [xe]
20568 [  309.920206]  drm_gpuvm_validate+0x2f0/0x5b0 [drm_gpuvm]
20569 [  309.925459]  ? drm_exec_lock_obj+0x63/0x210 [drm_exec]
20570 [  309.930627]  xe_vm_validate_rebind+0x46/0xb0 [xe]
20571 [  309.935428]  xe_exec_fn+0x20/0x40 [xe]
20572 [  309.939249]  drm_gpuvm_exec_lock+0x78/0xc0 [drm_gpuvm]
20573 [  309.944410]  xe_validation_exec_lock+0x5a/0xa0 [xe]
20574 [  309.949385]  xe_exec_ioctl+0x806/0xc30 [xe]
20575 [  309.953639]  ? ttwu_queue_wakelist+0xd9/0xf0
20576 [  309.957935]  ? __pfx_xe_exec_fn+0x10/0x10 [xe]
20577 [  309.962449]  ? __wake_up_common+0x73/0xa0
20578 [  309.966482]  ? __pfx_xe_exec_ioctl+0x10/0x10 [xe]
20579 [  309.971263]  drm_ioctl_kernel+0xa3/0x100
20580 [  309.975209]  drm_ioctl+0x213/0x440
20581 [  309.978637]  ? __pfx_xe_exec_ioctl+0x10/0x10 [xe]
20582 [  309.983415]  xe_drm_ioctl+0x67/0xd0 [xe]
20583 [  309.987408]  __x64_sys_ioctl+0x7f/0xd0

Cc: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: Huang Rui <ray.huang@xxxxxxx>
Cc: Matthew Auld <matthew.auld@xxxxxxxxx>
Cc: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxxxx>
Cc: Maxime Ripard <mripard@xxxxxxxxxx>
Cc: Thomas Zimmermann <tzimmermann@xxxxxxx>
Cc: David Airlie <airlied@xxxxxxxxx>
Cc: Simona Vetter <simona@xxxxxxxx>
Cc: dri-devel@xxxxxxxxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Cc: stable@xxxxxxxxxxxxxxx
Fixes: b63d715b8090 ("drm/ttm/pool, drm/ttm/tt: Provide a helper to shrink pages")
Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx>
Reviewed-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>

---

v3:
- Call ttm_pool_apply_caching after freeing local restore (sashiko)
- Save alloc in snapshot on restore failure (sashiko)
v4:
- Actual 'Save alloc in snapshot on restore failure (sashiko)'
v5:
- kfree retore after ttm_pool_apply_caching (sashiko)
---
drivers/gpu/drm/ttm/ttm_pool.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 278bbe7a11ad..d380a3c7fe40 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -902,6 +902,7 @@ int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
{
struct ttm_pool_tt_restore *restore = tt->restore;
struct ttm_pool_alloc_state alloc;
+ int ret;

if (WARN_ON(!ttm_tt_is_backed_up(tt)))
return -EINVAL;
@@ -925,14 +926,22 @@ int ttm_pool_restore_and_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
} else {
alloc = restore->snapshot_alloc;
if (ttm_pool_restore_valid(restore)) {
- int ret = ttm_pool_restore_commit(restore, tt->backup,
- ctx, &alloc);
+ ret = ttm_pool_restore_commit(restore, tt->backup,
+ ctx, &alloc);

if (ret)
return ret;
}
- if (!alloc.remaining_pages)
+ if (!alloc.remaining_pages) {
+ ret = ttm_pool_apply_caching(&alloc);
+ if (ret)
+ return ret;
+
+ kfree(tt->restore);
+ tt->restore = NULL;
+
return 0;
+ }
}

return __ttm_pool_alloc(pool, tt, ctx, &alloc, restore);
--
2.34.1