Re: [PATCH drm-misc-next v9 06/11] drm/nouveau: fence: separate fence alloc and emit

From: Danilo Krummrich
Date: Mon Aug 07 2023 - 14:55:25 EST


Hi Christian,

On 8/7/23 20:07, Christian König wrote:
Am 03.08.23 um 18:52 schrieb Danilo Krummrich:
The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
in order to emit fences within DMA fence signalling critical sections
(e.g. as typically done in the DRM GPU schedulers run_job() callback) we
need to separate fence allocation and fence emitting.

At least from the description that sounds like it might be illegal. Daniel can you take a look as well.

What exactly are you doing here?

I'm basically doing exactly the same as amdgpu_fence_emit() does in amdgpu_ib_schedule() called by amdgpu_job_run().

The difference - and this is what this patch is for - is that I separate the fence allocation from emitting the fence, such that the fence structure is allocated before the job is submitted to the GPU scheduler. amdgpu solves this with GFP_ATOMIC within amdgpu_fence_emit() to allocate the fence structure in this case.

- Danilo


Regards,
Christian.


Signed-off-by: Danilo Krummrich <dakr@xxxxxxxxxx>
---
  drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
  drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 +++++++++++++++----------
  drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
  drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
  drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
  drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
  drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
  7 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a6f2e681bde9..a34924523133 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
      PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
      PUSH_KICK(push);
-    ret = nouveau_fence_new(chan, false, pfence);
+    ret = nouveau_fence_new(pfence);
      if (ret)
          goto fail;
+    ret = nouveau_fence_emit(*pfence, chan);
+    if (ret)
+        goto fail_fence_unref;
+
      return 0;
+
+fail_fence_unref:
+    nouveau_fence_unref(pfence);
  fail:
      spin_lock_irqsave(&dev->event_lock, flags);
      list_del(&s->head);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 057bc995f19b..e9cbbf594e6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
          mutex_lock(&cli->mutex);
      else
          mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
+
      ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible);
-    if (ret == 0) {
-        ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
-        if (ret == 0) {
-            ret = nouveau_fence_new(chan, false, &fence);
-            if (ret == 0) {
-                /* TODO: figure out a better solution here
-                 *
-                 * wait on the fence here explicitly as going through
-                 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
-                 *
-                 * Without this the operation can timeout and we'll fallback to a
-                 * software copy, which might take several minutes to finish.
-                 */
-                nouveau_fence_wait(fence, false, false);
-                ret = ttm_bo_move_accel_cleanup(bo,
-                                &fence->base,
-                                evict, false,
-                                new_reg);
-                nouveau_fence_unref(&fence);
-            }
-        }
+    if (ret)
+        goto out_unlock;
+
+    ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
+    if (ret)
+        goto out_unlock;
+
+    ret = nouveau_fence_new(&fence);
+    if (ret)
+        goto out_unlock;
+
+    ret = nouveau_fence_emit(fence, chan);
+    if (ret) {
+        nouveau_fence_unref(&fence);
+        goto out_unlock;
      }
+
+    /* TODO: figure out a better solution here
+     *
+     * wait on the fence here explicitly as going through
+     * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+     *
+     * Without this the operation can timeout and we'll fallback to a
+     * software copy, which might take several minutes to finish.
+     */
+    nouveau_fence_wait(fence, false, false);
+    ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
+                    new_reg);
+    nouveau_fence_unref(&fence);
+
+out_unlock:
      mutex_unlock(&cli->mutex);
      return ret;
  }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 6d639314250a..f69be4c8f9f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
          struct nouveau_fence *fence = NULL;
          int ret;
-        ret = nouveau_fence_new(chan, false, &fence);
+        ret = nouveau_fence_new(&fence);
          if (!ret) {
-            ret = nouveau_fence_wait(fence, false, false);
+            ret = nouveau_fence_emit(fence, chan);
+            if (!ret)
+                ret = nouveau_fence_wait(fence, false, false);
              nouveau_fence_unref(&fence);
          }
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 789857faa048..4ad40e42cae1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -209,7 +209,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
          goto done;
      }
-    nouveau_fence_new(dmem->migrate.chan, false, &fence);
+    if (!nouveau_fence_new(&fence))
+        nouveau_fence_emit(fence, dmem->migrate.chan);
      migrate_vma_pages(&args);
      nouveau_dmem_fence_done(&fence);
      dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
          }
      }
-    nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
+    if (!nouveau_fence_new(&fence))
+        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
      migrate_device_pages(src_pfns, dst_pfns, npages);
      nouveau_dmem_fence_done(&fence);
      migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
          addr += PAGE_SIZE;
      }
-    nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
+    if (!nouveau_fence_new(&fence))
+        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
      migrate_vma_pages(args);
      nouveau_dmem_fence_done(&fence);
      nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ee5e9d40c166..e946408f945b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
      struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
      int ret;
+    if (unlikely(!chan->fence))
+        return -ENODEV;
+
      fence->channel  = chan;
      fence->timeout  = jiffies + (15 * HZ);
@@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
  }
  int
-nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
-          struct nouveau_fence **pfence)
+nouveau_fence_new(struct nouveau_fence **pfence)
  {
      struct nouveau_fence *fence;
-    int ret = 0;
-
-    if (unlikely(!chan->fence))
-        return -ENODEV;
      fence = kzalloc(sizeof(*fence), GFP_KERNEL);
      if (!fence)
          return -ENOMEM;
-    ret = nouveau_fence_emit(fence, chan);
-    if (ret)
-        nouveau_fence_unref(&fence);
-
      *pfence = fence;
-    return ret;
+    return 0;
  }
  static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 0ca2bc85adf6..7c73c7c9834a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -17,8 +17,7 @@ struct nouveau_fence {
      unsigned long timeout;
  };
-int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
-               struct nouveau_fence **);
+int  nouveau_fence_new(struct nouveau_fence **);
  void nouveau_fence_unref(struct nouveau_fence **);
  int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a48f42aaeab9..9c8d1b911a01 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
          }
      }
-    ret = nouveau_fence_new(chan, false, &fence);
+    ret = nouveau_fence_new(&fence);
+    if (!ret)
+        ret = nouveau_fence_emit(fence, chan);
      if (ret) {
+        nouveau_fence_unref(&fence);
          NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
          WIND_RING(chan);
          goto out;