Re: [PATCH 4/4] drm/msm: Remove vma use tracking
From: Rob Clark
Date: Thu Aug 03 2023 - 12:12:18 EST
On Thu, Aug 3, 2023 at 1:38 AM Daniel Vetter <daniel@xxxxxxxx> wrote:
>
> On Wed, Aug 02, 2023 at 03:21:52PM -0700, Rob Clark wrote:
> > From: Rob Clark <robdclark@xxxxxxxxxxxx>
> >
> > This was not strictly necessary, as page unpinning (ie. shrinker) only
> > cares about the resv. It did give us some extra sanity checking for
> > userspace controlled iova, and was useful to catch issues on kernel and
> > userspace side when enabling userspace iova. But if userspace screws
> > this up, it just corrupts it's own gpu buffers and/or gets iova faults.
> > So we can just let userspace shoot it's own foot and drop the extra per-
> > buffer SUBMIT overhead.
> >
> > Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx>
>
> I did check a few things (like that the gem lru helpers have all the
> needed lockdep_assert_held) and I think aside from the optimization this
> is a nice semantic cleanup. Since iirc we've had a locking inversion
> discussion and the vma tracking here came up as a culprit. On the series:
yup, I try to make sure there are sufficient locking asserts when it
comes to gem stuff ;-)
There shouldn't have been any locking inversion with the vma lock (no
other locks acquired under it) but it was hurting cpu overhead. The
big remaining locking headache for me is run-pm / pm-qos
BR,
-R
> Acked-by: Daniel Vetter <daniel.vetter@xxxxxxxx>
>
> > ---
> > drivers/gpu/drm/msm/msm_gem.c | 9 +---
> > drivers/gpu/drm/msm/msm_gem.h | 12 +----
> > drivers/gpu/drm/msm/msm_gem_submit.c | 14 ++----
> > drivers/gpu/drm/msm/msm_gem_vma.c | 67 +---------------------------
> > drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +-
> > 5 files changed, 9 insertions(+), 96 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> > index 1c81ff6115ac..ce1ed0f9ad2d 100644
> > --- a/drivers/gpu/drm/msm/msm_gem.c
> > +++ b/drivers/gpu/drm/msm/msm_gem.c
> > @@ -607,9 +607,6 @@ static int clear_iova(struct drm_gem_object *obj,
> > if (!vma)
> > return 0;
> >
> > - if (msm_gem_vma_inuse(vma))
> > - return -EBUSY;
> > -
> > msm_gem_vma_purge(vma);
> > msm_gem_vma_close(vma);
> > del_vma(vma);
> > @@ -660,7 +657,6 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj,
> > msm_gem_lock(obj);
> > vma = lookup_vma(obj, aspace);
> > if (!GEM_WARN_ON(!vma)) {
> > - msm_gem_vma_unpin(vma);
> > msm_gem_unpin_locked(obj);
> > }
> > msm_gem_unlock(obj);
> > @@ -991,11 +987,10 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m,
> > } else {
> > name = comm = NULL;
> > }
> > - seq_printf(m, " [%s%s%s: aspace=%p, %08llx,%s,inuse=%d]",
> > + seq_printf(m, " [%s%s%s: aspace=%p, %08llx,%s]",
> > name, comm ? ":" : "", comm ? comm : "",
> > vma->aspace, vma->iova,
> > - vma->mapped ? "mapped" : "unmapped",
> > - msm_gem_vma_inuse(vma));
> > + vma->mapped ? "mapped" : "unmapped");
> > kfree(comm);
> > }
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
> > index 2ddd896aac68..8ddef5443140 100644
> > --- a/drivers/gpu/drm/msm/msm_gem.h
> > +++ b/drivers/gpu/drm/msm/msm_gem.h
> > @@ -59,24 +59,16 @@ struct msm_fence_context;
> >
> > struct msm_gem_vma {
> > struct drm_mm_node node;
> > - spinlock_t lock;
> > uint64_t iova;
> > struct msm_gem_address_space *aspace;
> > struct list_head list; /* node in msm_gem_object::vmas */
> > bool mapped;
> > - int inuse;
> > - uint32_t fence_mask;
> > - uint32_t fence[MSM_GPU_MAX_RINGS];
> > - struct msm_fence_context *fctx[MSM_GPU_MAX_RINGS];
> > };
> >
> > struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace);
> > int msm_gem_vma_init(struct msm_gem_vma *vma, int size,
> > u64 range_start, u64 range_end);
> > -bool msm_gem_vma_inuse(struct msm_gem_vma *vma);
> > void msm_gem_vma_purge(struct msm_gem_vma *vma);
> > -void msm_gem_vma_unpin(struct msm_gem_vma *vma);
> > -void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx);
> > int msm_gem_vma_map(struct msm_gem_vma *vma, int prot, struct sg_table *sgt, int size);
> > void msm_gem_vma_close(struct msm_gem_vma *vma);
> >
> > @@ -298,15 +290,13 @@ struct msm_gem_submit {
> > /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
> > #define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */
> > #define BO_LOCKED 0x4000 /* obj lock is held */
> > -#define BO_OBJ_PINNED 0x2000 /* obj (pages) is pinned and on active list */
> > -#define BO_VMA_PINNED 0x1000 /* vma (virtual address) is pinned */
> > +#define BO_PINNED 0x2000 /* obj (pages) is pinned and on active list */
> > uint32_t flags;
> > union {
> > struct drm_gem_object *obj;
> > uint32_t handle;
> > };
> > uint64_t iova;
> > - struct msm_gem_vma *vma;
> > } bos[];
> > };
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> > index b17561ebd518..5f90cc8e7b7f 100644
> > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > @@ -261,10 +261,7 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
> > */
> > submit->bos[i].flags &= ~cleanup_flags;
> >
> > - if (flags & BO_VMA_PINNED)
> > - msm_gem_vma_unpin(submit->bos[i].vma);
> > -
> > - if (flags & BO_OBJ_PINNED)
> > + if (flags & BO_PINNED)
> > msm_gem_unpin_locked(obj);
> >
> > if (flags & BO_LOCKED)
> > @@ -273,7 +270,7 @@ static void submit_cleanup_bo(struct msm_gem_submit *submit, int i,
> >
> > static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
> > {
> > - unsigned cleanup_flags = BO_VMA_PINNED | BO_OBJ_PINNED | BO_LOCKED;
> > + unsigned cleanup_flags = BO_PINNED | BO_LOCKED;
> > submit_cleanup_bo(submit, i, cleanup_flags);
> >
> > if (!(submit->bos[i].flags & BO_VALID))
> > @@ -404,9 +401,6 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
> > if (ret)
> > break;
> >
> > - submit->bos[i].flags |= BO_VMA_PINNED;
> > - submit->bos[i].vma = vma;
> > -
> > if (vma->iova == submit->bos[i].iova) {
> > submit->bos[i].flags |= BO_VALID;
> > } else {
> > @@ -420,7 +414,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
> > mutex_lock(&priv->lru.lock);
> > for (i = 0; i < submit->nr_bos; i++) {
> > msm_gem_pin_obj_locked(submit->bos[i].obj);
> > - submit->bos[i].flags |= BO_OBJ_PINNED;
> > + submit->bos[i].flags |= BO_PINNED;
> > }
> > mutex_unlock(&priv->lru.lock);
> >
> > @@ -547,7 +541,7 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool error)
> > unsigned i;
> >
> > if (error)
> > - cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED;
> > + cleanup_flags |= BO_PINNED;
> >
> > for (i = 0; i < submit->nr_bos; i++) {
> > struct drm_gem_object *obj = submit->bos[i].obj;
> > diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
> > index 98287ed99960..11e842dda73c 100644
> > --- a/drivers/gpu/drm/msm/msm_gem_vma.c
> > +++ b/drivers/gpu/drm/msm/msm_gem_vma.c
> > @@ -38,41 +38,12 @@ msm_gem_address_space_get(struct msm_gem_address_space *aspace)
> > return aspace;
> > }
> >
> > -bool msm_gem_vma_inuse(struct msm_gem_vma *vma)
> > -{
> > - bool ret = true;
> > -
> > - spin_lock(&vma->lock);
> > -
> > - if (vma->inuse > 0)
> > - goto out;
> > -
> > - while (vma->fence_mask) {
> > - unsigned idx = ffs(vma->fence_mask) - 1;
> > -
> > - if (!msm_fence_completed(vma->fctx[idx], vma->fence[idx]))
> > - goto out;
> > -
> > - vma->fence_mask &= ~BIT(idx);
> > - }
> > -
> > - ret = false;
> > -
> > -out:
> > - spin_unlock(&vma->lock);
> > -
> > - return ret;
> > -}
> > -
> > /* Actually unmap memory for the vma */
> > void msm_gem_vma_purge(struct msm_gem_vma *vma)
> > {
> > struct msm_gem_address_space *aspace = vma->aspace;
> > unsigned size = vma->node.size;
> >
> > - /* Print a message if we try to purge a vma in use */
> > - GEM_WARN_ON(msm_gem_vma_inuse(vma));
> > -
> > /* Don't do anything if the memory isn't mapped */
> > if (!vma->mapped)
> > return;
> > @@ -82,33 +53,6 @@ void msm_gem_vma_purge(struct msm_gem_vma *vma)
> > vma->mapped = false;
> > }
> >
> > -static void vma_unpin_locked(struct msm_gem_vma *vma)
> > -{
> > - if (GEM_WARN_ON(!vma->inuse))
> > - return;
> > - if (!GEM_WARN_ON(!vma->iova))
> > - vma->inuse--;
> > -}
> > -
> > -/* Remove reference counts for the mapping */
> > -void msm_gem_vma_unpin(struct msm_gem_vma *vma)
> > -{
> > - spin_lock(&vma->lock);
> > - vma_unpin_locked(vma);
> > - spin_unlock(&vma->lock);
> > -}
> > -
> > -/* Replace pin reference with fence: */
> > -void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx)
> > -{
> > - spin_lock(&vma->lock);
> > - vma->fctx[fctx->index] = fctx;
> > - vma->fence[fctx->index] = fctx->last_fence;
> > - vma->fence_mask |= BIT(fctx->index);
> > - vma_unpin_locked(vma);
> > - spin_unlock(&vma->lock);
> > -}
> > -
> > /* Map and pin vma: */
> > int
> > msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
> > @@ -120,11 +64,6 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
> > if (GEM_WARN_ON(!vma->iova))
> > return -EINVAL;
> >
> > - /* Increase the usage counter */
> > - spin_lock(&vma->lock);
> > - vma->inuse++;
> > - spin_unlock(&vma->lock);
> > -
> > if (vma->mapped)
> > return 0;
> >
> > @@ -146,9 +85,6 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
> >
> > if (ret) {
> > vma->mapped = false;
> > - spin_lock(&vma->lock);
> > - vma->inuse--;
> > - spin_unlock(&vma->lock);
> > }
> >
> > return ret;
> > @@ -159,7 +95,7 @@ void msm_gem_vma_close(struct msm_gem_vma *vma)
> > {
> > struct msm_gem_address_space *aspace = vma->aspace;
> >
> > - GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped);
> > + GEM_WARN_ON(vma->mapped);
> >
> > spin_lock(&aspace->lock);
> > if (vma->iova)
> > @@ -179,7 +115,6 @@ struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace)
> > if (!vma)
> > return NULL;
> >
> > - spin_lock_init(&vma->lock);
> > vma->aspace = aspace;
> >
> > return vma;
> > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > index 6fa427d2992e..7f5e0a961bba 100644
> > --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> > @@ -26,9 +26,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
> > for (i = 0; i < submit->nr_bos; i++) {
> > struct drm_gem_object *obj = submit->bos[i].obj;
> >
> > - msm_gem_vma_unpin_fenced(submit->bos[i].vma, fctx);
> > msm_gem_unpin_active(obj);
> > - submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED);
> > + submit->bos[i].flags &= ~BO_PINNED;
> > }
> >
> > mutex_unlock(&priv->lru.lock);
> > --
> > 2.41.0
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch