[PATCH drm-next v3 11/15] drm/nouveau: fence: fail to emit when fence context is killed

From: Danilo Krummrich
Date: Mon Apr 03 2023 - 21:30:29 EST


The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting
fences.

If a fence context is killed, e.g. due to a channel fault, jobs which
are already queued for execution might still emit new fences. In such a
case a job would hang forever.

To fix that, fail to emit a new fence on a killed fence context with
-ENODEV to unblock the job.

Signed-off-by: Danilo Krummrich <dakr@xxxxxxxxxx>
---
drivers/gpu/drm/nouveau/nouveau_fence.c | 7 +++++++
drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +-
2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index e946408f945b..77c739a55b19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
if (nouveau_fence_signal(fence))
nvif_event_block(&fctx->event);
}
+ fctx->killed = 1;
spin_unlock_irqrestore(&fctx->lock, flags);
}

@@ -229,6 +230,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
dma_fence_get(&fence->base);
spin_lock_irq(&fctx->lock);

+ if (unlikely(fctx->killed)) {
+ spin_unlock_irq(&fctx->lock);
+ dma_fence_put(&fence->base);
+ return -ENODEV;
+ }
+
if (nouveau_fence_update(chan, fctx))
nvif_event_block(&fctx->event);

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 7c73c7c9834a..2c72d96ef17d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -44,7 +44,7 @@ struct nouveau_fence_chan {
char name[32];

struct nvif_event event;
- int notify_ref, dead;
+ int notify_ref, dead, killed;
};

struct nouveau_fence_priv {
--
2.39.2