[PATCH AUTOSEL 5.14 005/252] drm/ttm: Fix multihop assert on eviction.

From: Sasha Levin
Date: Thu Sep 09 2021 - 07:41:29 EST


From: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>

[ Upstream commit 403797925768d9fa870f5b1ebcd20016b397083b ]

Problem:
Under memory pressure when GTT domain is almost full multihop assert
will come up when trying to evict LRU BO from VRAM to SYSTEM.

Fix:
Don't assert on multihop error in evict code but rather do a retry
as we do in ttm_bo_move_buffer

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
Reviewed-by: Christian König <christian.koenig@xxxxxxx>
Link: https://patchwork.freedesktop.org/patch/msgid/20210622162339.761651-6-andrey.grodzovsky@xxxxxxx
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/gpu/drm/ttm/ttm_bo.c | 63 +++++++++++++++++++-----------------
1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8d7fd65ccced..32202385073a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -488,6 +488,31 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_device *bdev, int resched)
}
EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue);

+static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
+ struct ttm_resource **mem,
+ struct ttm_operation_ctx *ctx,
+ struct ttm_place *hop)
+{
+ struct ttm_placement hop_placement;
+ struct ttm_resource *hop_mem;
+ int ret;
+
+ hop_placement.num_placement = hop_placement.num_busy_placement = 1;
+ hop_placement.placement = hop_placement.busy_placement = hop;
+
+ /* find space in the bounce domain */
+ ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx);
+ if (ret)
+ return ret;
+ /* move to the bounce domain */
+ ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL);
+ if (ret) {
+ ttm_resource_free(bo, &hop_mem);
+ return ret;
+ }
+ return 0;
+}
+
static int ttm_bo_evict(struct ttm_buffer_object *bo,
struct ttm_operation_ctx *ctx)
{
@@ -527,12 +552,17 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
goto out;
}

+bounce:
ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
- if (unlikely(ret)) {
- WARN(ret == -EMULTIHOP, "Unexpected multihop in eviction - likely driver bug\n");
- if (ret != -ERESTARTSYS)
+ if (ret == -EMULTIHOP) {
+ ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop);
+ if (ret) {
pr_err("Buffer eviction failed\n");
- ttm_resource_free(bo, &evict_mem);
+ ttm_resource_free(bo, &evict_mem);
+ goto out;
+ }
+ /* try and move to final place now. */
+ goto bounce;
}
out:
return ret;
@@ -847,31 +877,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_mem_space);

-static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
- struct ttm_resource **mem,
- struct ttm_operation_ctx *ctx,
- struct ttm_place *hop)
-{
- struct ttm_placement hop_placement;
- struct ttm_resource *hop_mem;
- int ret;
-
- hop_placement.num_placement = hop_placement.num_busy_placement = 1;
- hop_placement.placement = hop_placement.busy_placement = hop;
-
- /* find space in the bounce domain */
- ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx);
- if (ret)
- return ret;
- /* move to the bounce domain */
- ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL);
- if (ret) {
- ttm_resource_free(bo, &hop_mem);
- return ret;
- }
- return 0;
-}
-
static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
struct ttm_placement *placement,
struct ttm_operation_ctx *ctx)
--
2.30.2