Re: [PATCH v6 4/4] drm/vc4: Allocate binner bo when starting to use the V3D

From: Eric Anholt
Date: Tue Apr 23 2019 - 13:28:39 EST

Next message: Marcel Holtmann: "Re: [PATCH] Bluetooth: hci_bcm: Fix empty regulator supplies for Intel Macs"
Previous message: Pablo Greco: "[PATCH v5 7/7] ARM: dts: sun8i: v40: bananapi-m2-berry: Add Bluetooth device node"
In reply to: Paul Kocialkowski: "[PATCH v6 4/4] drm/vc4: Allocate binner bo when starting to use the V3D"
Next in thread: Paul Kocialkowski: "[PATCH v6 3/4] drm/vc4: Check for the binner bo before handling OOM interrupt"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Paul Kocialkowski <paul.kocialkowski@xxxxxxxxxxx> writes:

> The binner BO is not required until the V3D is in use, so avoid
> allocating it at probe and do it on the first non-dumb BO allocation.
>
> Keep track of which clients are using the V3D and liberate the buffer
> when there is none left, using a kref. Protect the logic with a
> mutex to avoid race conditions.
>
> The binner BO is created at the time of the first render ioctl and is
> destroyed when there is no client and no exec job using it left.
>
> The Out-Of-Memory (OOM) interrupt also gets some tweaking, to avoid
> enabling it before having allocated a binner bo.
>
> We also want to keep the BO alive during runtime suspend/resume to avoid
> failing to allocate it at resume. This happens when the CMA pool is
> full at that point and results in a hard crash.
>
> Signed-off-by: Paul Kocialkowski <paul.kocialkowski@xxxxxxxxxxx>
> ---
> drivers/gpu/drm/vc4/vc4_bo.c | 45 +++++++++++++++++++++++++--
> drivers/gpu/drm/vc4/vc4_drv.c | 6 ++++
> drivers/gpu/drm/vc4/vc4_drv.h | 14 +++++++++
> drivers/gpu/drm/vc4/vc4_gem.c | 13 ++++++++
> drivers/gpu/drm/vc4/vc4_irq.c | 21 +++++++++----
> drivers/gpu/drm/vc4/vc4_v3d.c | 58 +++++++++++++++++++++++++++--------
> 6 files changed, 135 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> index 88ebd681d7eb..03a26de620de 100644
> --- a/drivers/gpu/drm/vc4/vc4_bo.c
> +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> @@ -799,20 +799,47 @@ vc4_prime_import_sg_table(struct drm_device *dev,
> return obj;
> }
>
> +static int vc4_grab_bin_bo(struct vc4_dev *vc4, struct vc4_file *vc4file)
> +{
> + int ret;
> +
> + if (!vc4->v3d)
> + return -ENODEV;
> +
> + if (vc4file->bin_bo_used)
> + return 0;
> +
> + ret = vc4_v3d_bin_bo_get(vc4);
> + if (ret)
> + return ret;
> +
> + vc4file->bin_bo_used = true;
> +
> + return 0;
> +}
> +
> int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv)
> {
> struct drm_vc4_create_bo *args = data;
> + struct vc4_file *vc4file = file_priv->driver_priv;
> + struct vc4_dev *vc4 = to_vc4_dev(dev);
> struct vc4_bo *bo = NULL;
> int ret;
>
> + ret = vc4_grab_bin_bo(vc4, vc4file);
> + if (ret)
> + return ret;

Interesting note -- we'll now throw -ENODEV from this ioctl when v3d
isn't present. I think that's actually totally fine and maybe I should
have done that for the !v3d patches originally.

> /*
> * We can't allocate from the BO cache, because the BOs don't
> * get zeroed, and that might leak data between users.
> */
> bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_V3D);
> - if (IS_ERR(bo))
> + if (IS_ERR(bo)) {
> + vc4_v3d_bin_bo_put(vc4);
> return PTR_ERR(bo);
> + }

I actually don't think you want the bin_bo_put()s in the error paths
here -- vc4_grab_bin_bo has put a flag in vc4file that we need a
bin_bo_put() when the file is closed, and if we take these error paths
there will be a refcount underflow once the file gets closed.

>
> bo->madv = VC4_MADV_WILLNEED;
>
> @@ -846,6 +873,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv)
> {
> struct drm_vc4_create_shader_bo *args = data;
> + struct vc4_file *vc4file = file_priv->driver_priv;
> + struct vc4_dev *vc4 = to_vc4_dev(dev);
> struct vc4_bo *bo = NULL;
> int ret;
>
> @@ -865,9 +894,15 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
> return -EINVAL;
> }
>
> + ret = vc4_grab_bin_bo(vc4, vc4file);
> + if (ret)
> + return ret;
> +
> bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
> - if (IS_ERR(bo))
> + if (IS_ERR(bo)) {
> + vc4_v3d_bin_bo_put(vc4);
> return PTR_ERR(bo);
> + }
>
> bo->madv = VC4_MADV_WILLNEED;
>
> @@ -893,8 +928,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
> * races for users to do doing things like mmap the shader BO.
> */
> ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
> + goto complete;
> +
> +fail:
> + vc4_v3d_bin_bo_put(vc4);
>
> - fail:
> +complete:
> drm_gem_object_put_unlocked(&bo->base.base);
>
> return ret;
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
> index 6d9be20a32be..0f99ad03614e 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.c
> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
> @@ -128,8 +128,12 @@ static int vc4_open(struct drm_device *dev, struct drm_file *file)
>
> static void vc4_close(struct drm_device *dev, struct drm_file *file)
> {
> + struct vc4_dev *vc4 = to_vc4_dev(dev);
> struct vc4_file *vc4file = file->driver_priv;
>
> + if (vc4file->bin_bo_used)
> + vc4_v3d_bin_bo_put(vc4);
> +
> vc4_perfmon_close_file(vc4file);
> kfree(vc4file);
> }
> @@ -274,6 +278,8 @@ static int vc4_drm_bind(struct device *dev)
> drm->dev_private = vc4;
> INIT_LIST_HEAD(&vc4->debugfs_list);
>
> + mutex_init(&vc4->bin_bo_lock);
> +
> ret = vc4_bo_cache_init(drm);
> if (ret)
> goto dev_put;
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
> index 4f13f6262491..5bfca83deb8e 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.h
> +++ b/drivers/gpu/drm/vc4/vc4_drv.h
> @@ -216,6 +216,11 @@ struct vc4_dev {
> * the minor is available (after drm_dev_register()).
> */
> struct list_head debugfs_list;
> +
> + /* Mutex for binner bo allocation. */
> + struct mutex bin_bo_lock;
> + /* Reference count for our binner bo. */
> + struct kref bin_bo_kref;
> };
>
> static inline struct vc4_dev *
> @@ -584,6 +589,11 @@ struct vc4_exec_info {
> * NULL otherwise.
> */
> struct vc4_perfmon *perfmon;
> +
> + /* Whether the exec has taken a reference to the binner BO, which should
> + * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet.
> + */
> + bool bin_bo_used;
> };
>
> /* Per-open file private data. Any driver-specific resource that has to be
> @@ -594,6 +604,8 @@ struct vc4_file {
> struct idr idr;
> struct mutex lock;
> } perfmon;
> +
> + bool bin_bo_used;
> };

> diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
> index 723dc86b4511..e226c24e543f 100644
> --- a/drivers/gpu/drm/vc4/vc4_irq.c
> +++ b/drivers/gpu/drm/vc4/vc4_irq.c
> @@ -59,18 +59,22 @@ vc4_overflow_mem_work(struct work_struct *work)
> {
> struct vc4_dev *vc4 =
> container_of(work, struct vc4_dev, overflow_mem_work);
> - struct vc4_bo *bo = vc4->bin_bo;
> + struct vc4_bo *bo;
> int bin_bo_slot;
> struct vc4_exec_info *exec;
> unsigned long irqflags;
>
> - if (!bo)
> - return;
> + mutex_lock(&vc4->bin_bo_lock);
> +
> + if (!vc4->bin_bo)
> + goto complete;
> +
> + bo = vc4->bin_bo;
>
> bin_bo_slot = vc4_v3d_get_bin_slot(vc4);
> if (bin_bo_slot < 0) {
> DRM_ERROR("Couldn't allocate binner overflow mem\n");
> - return;
> + goto complete;
> }
>
> spin_lock_irqsave(&vc4->job_lock, irqflags);
> @@ -101,6 +105,9 @@ vc4_overflow_mem_work(struct work_struct *work)
> V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
> V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
> spin_unlock_irqrestore(&vc4->job_lock, irqflags);
> +
> +complete:
> + mutex_unlock(&vc4->bin_bo_lock);
> }
>
> static void
> @@ -252,8 +259,10 @@ vc4_irq_postinstall(struct drm_device *dev)
> if (!vc4->v3d)
> return 0;
>
> - /* Enable both the render done and out of memory interrupts. */
> - V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
> + /* Enable the render done interrupts. The out-of-memory interrupt is
> + * enabled as soon as we have a binner BO allocated.
> + */
> + V3D_WRITE(V3D_INTENA, V3D_INT_FLDONE | V3D_INT_FRDONE);
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
> index c16db4665af6..55abaae71856 100644
> --- a/drivers/gpu/drm/vc4/vc4_v3d.c
> +++ b/drivers/gpu/drm/vc4/vc4_v3d.c
> @@ -294,6 +294,14 @@ static int bin_bo_alloc(struct vc4_dev *vc4)
> WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
> bo->base.base.size / vc4->bin_alloc_size);
>
> + kref_init(&vc4->bin_bo_kref);
> +
> + /* Enable the out-of-memory interrupt to set our
> + * newly-allocated binner BO, potentially from an
> + * already-pending-but-masked interupt.
> + */
> + V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
> +
> break;
> }
>
> @@ -313,6 +321,43 @@ static int bin_bo_alloc(struct vc4_dev *vc4)
> return ret;
> }
>
> +int vc4_v3d_bin_bo_get(struct vc4_dev *vc4)
> +{
> + int ret = 0;
> +
> + mutex_lock(&vc4->bin_bo_lock);
> +
> + if (vc4->bin_bo) {
> + kref_get(&vc4->bin_bo_kref);
> + goto complete;
> + }
> +
> + ret = bin_bo_alloc(vc4);
> +
> +complete:
> + mutex_unlock(&vc4->bin_bo_lock);
> +
> + return ret;
> +}
> +
> +static void bin_bo_release(struct kref *ref)
> +{
> + struct vc4_dev *vc4 = container_of(ref, struct vc4_dev, bin_bo_kref);
> +
> + if (!vc4->bin_bo)
> + return;

Could we WARN_ON_ONCE instead of returning silenty? If we're going from
1->0 refcount without a bin_bo allocated, something has gone terribly
wrong and we want to know.

> + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
> + vc4->bin_bo = NULL;
> +}

Attachment: signature.asc
Description: PGP signature

Next message: Marcel Holtmann: "Re: [PATCH] Bluetooth: hci_bcm: Fix empty regulator supplies for Intel Macs"
Previous message: Pablo Greco: "[PATCH v5 7/7] ARM: dts: sun8i: v40: bananapi-m2-berry: Add Bluetooth device node"
In reply to: Paul Kocialkowski: "[PATCH v6 4/4] drm/vc4: Allocate binner bo when starting to use the V3D"
Next in thread: Paul Kocialkowski: "[PATCH v6 3/4] drm/vc4: Check for the binner bo before handling OOM interrupt"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]