Re: [PATCH 5/6] media: chips-media: wave5: refine SRAM usage

From: Brandon Brnich
Date: Thu Mar 21 2024 - 12:14:40 EST


Hi Ivan,

On 13:52-20240321, Ivan Bornyakov wrote:
> Hi!
>
> On Thu, Mar 21, 2024 at 09:29:04AM +0000, Nas Chung wrote:
> > Hi, Ivan and Brandon.
> >
> > >-----Original Message-----
> > >On 14:24-20240319, Ivan Bornyakov wrote:
> > >> Hello, Nas
> > >>
> > >> On Tue, Mar 19, 2024 at 10:56:22AM +0000, Nas Chung wrote:
> > >> > Hi, Ivan.
> > >> >
> > >> > >
> > >> > >Allocate SRAM memory on module probe, free on remove. There is no
> > >need
> > >> > >to allocate on device open, free on close, the memory is the same
> > >every
> > >> > >time.
> > >> >
> > >> > If there is no decoder/encoder instance, driver don't need to
> > >allocate SRAM memory.
> > >> > The main reason of allocating the memory in open() is to allow other
> > >modules to
> > >> > use more SRAM memory, if wave5 is not working.
> > >
> > >I have to agree with this statement. Moving allocation to probe results
> > >in wasting SRAM when VPU is not in use. VPU should only be allocating
> > >SRAM
> > >when a stream instance is running and free that back once all instances
> > >close.
> > >
> > >> > >
> > >> > >Also use gen_pool_size() to determine SRAM memory size to be
> > >allocated
> > >> > >instead of separate "sram-size" DT property to reduce duplication.
> > >> > >
> > >> > >Signed-off-by: Ivan Bornyakov <brnkv.i1@xxxxxxxxx>
> > >> > >---
> > >> > > .../platform/chips-media/wave5/wave5-helper.c | 3 ---
> > >> > > .../platform/chips-media/wave5/wave5-vdi.c | 21 ++++++++++-------
> > >--
> > >> > > .../chips-media/wave5/wave5-vpu-dec.c | 2 --
> > >> > > .../chips-media/wave5/wave5-vpu-enc.c | 2 --
> > >> > > .../platform/chips-media/wave5/wave5-vpu.c | 12 +++++------
> > >> > > .../platform/chips-media/wave5/wave5-vpuapi.h | 1 -
> > >> > > 6 files changed, 16 insertions(+), 25 deletions(-)
> > >> > >
> > >> > >diff --git a/drivers/media/platform/chips-media/wave5/wave5-helper.c
> > >> > >b/drivers/media/platform/chips-media/wave5/wave5-helper.c
> > >> > >index 8433ecab230c..ec710b838dfe 100644
> > >> > >--- a/drivers/media/platform/chips-media/wave5/wave5-helper.c
> > >> > >+++ b/drivers/media/platform/chips-media/wave5/wave5-helper.c
> > >> > >@@ -29,9 +29,6 @@ void wave5_cleanup_instance(struct vpu_instance
> > >*inst)
> > >> > > {
> > >> > > int i;
> > >> > >
> > >> > >- if (list_is_singular(&inst->list))
> > >> > >- wave5_vdi_free_sram(inst->dev);
> > >> > >-
> > >> > > for (i = 0; i < inst->fbc_buf_count; i++)
> > >> > > wave5_vpu_dec_reset_framebuffer(inst, i);
> > >> > >
> > >> > >diff --git a/drivers/media/platform/chips-media/wave5/wave5-vdi.c
> > >> > >b/drivers/media/platform/chips-media/wave5/wave5-vdi.c
> > >> > >index 3809f70bc0b4..ee671f5a2f37 100644
> > >> > >--- a/drivers/media/platform/chips-media/wave5/wave5-vdi.c
> > >> > >+++ b/drivers/media/platform/chips-media/wave5/wave5-vdi.c
> > >> > >@@ -174,16 +174,19 @@ int wave5_vdi_allocate_array(struct vpu_device
> > >> > >*vpu_dev, struct vpu_buf *array,
> > >> > > void wave5_vdi_allocate_sram(struct vpu_device *vpu_dev)
> > >> > > {
> > >> > > struct vpu_buf *vb = &vpu_dev->sram_buf;
> > >> > >+ dma_addr_t daddr;
> > >> > >+ void *vaddr;
> > >> > >+ size_t size;
> > >> > >
> > >> > >- if (!vpu_dev->sram_pool || !vpu_dev->sram_size)
> > >> > >+ if (!vpu_dev->sram_pool || vb->vaddr)
> > >> > > return;
> > >> > >
> > >> > >- if (!vb->vaddr) {
> > >> > >- vb->size = vpu_dev->sram_size;
> > >> > >- vb->vaddr = gen_pool_dma_alloc(vpu_dev->sram_pool, vb->size,
> > >> > >- &vb->daddr);
> > >> > >- if (!vb->vaddr)
> > >> > >- vb->size = 0;
> > >> > >+ size = gen_pool_size(vpu_dev->sram_pool);
> > >> > >+ vaddr = gen_pool_dma_alloc(vpu_dev->sram_pool, size, &daddr);
> > >> > >+ if (vaddr) {
> > >> > >+ vb->vaddr = vaddr;
> > >> > >+ vb->daddr = daddr;
> > >> > >+ vb->size = size;
> > >> > > }
> > >> > >
> > >> > > dev_dbg(vpu_dev->dev, "%s: sram daddr: %pad, size: %zu, vaddr:
> > >> > >0x%p\n",
> > >> > >@@ -197,9 +200,7 @@ void wave5_vdi_free_sram(struct vpu_device
> > >*vpu_dev)
> > >> > > if (!vb->size || !vb->vaddr)
> > >> > > return;
> > >> > >
> > >> > >- if (vb->vaddr)
> > >> > >- gen_pool_free(vpu_dev->sram_pool, (unsigned long)vb->vaddr,
> > >> > >- vb->size);
> > >> > >+ gen_pool_free(vpu_dev->sram_pool, (unsigned long)vb->vaddr, vb-
> > >> > >>size);
> > >> > >
> > >> > > memset(vb, 0, sizeof(*vb));
> > >> > > }
> > >> > >diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-
> > >dec.c
> > >> > >b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
> > >> > >index aa0401f35d32..84dbe56216ad 100644
> > >> > >--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
> > >> > >+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
> > >> > >@@ -1854,8 +1854,6 @@ static int wave5_vpu_open_dec(struct file
> > >*filp)
> > >> > > goto cleanup_inst;
> > >> > > }
> > >> > >
> > >> > >- wave5_vdi_allocate_sram(inst->dev);
> > >> > >-
> > >> > > return 0;
> > >> > >
> > >> > > cleanup_inst:
> > >> > >diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-
> > >enc.c
> > >> > >b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
> > >> > >index 8bbf9d10b467..86ddcb82443b 100644
> > >> > >--- a/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
> > >> > >+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
> > >> > >@@ -1727,8 +1727,6 @@ static int wave5_vpu_open_enc(struct file
> > >*filp)
> > >> > > goto cleanup_inst;
> > >> > > }
> > >> > >
> > >> > >- wave5_vdi_allocate_sram(inst->dev);
> > >> > >-
> > >> > > return 0;
> > >> > >
> > >> > > cleanup_inst:
> > >> > >diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
> > >> > >b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
> > >> > >index f3ecadefd37a..2a0a70dd7062 100644
> > >> > >--- a/drivers/media/platform/chips-media/wave5/wave5-vpu.c
> > >> > >+++ b/drivers/media/platform/chips-media/wave5/wave5-vpu.c
> > >> > >@@ -178,16 +178,11 @@ static int wave5_vpu_probe(struct
> > >platform_device
> > >> > >*pdev)
> > >> > > return ret;
> > >> > > }
> > >> > >
> > >> > >- ret = of_property_read_u32(pdev->dev.of_node, "sram-size",
> > >> > >- &dev->sram_size);
> > >> > >- if (ret) {
> > >> > >- dev_warn(&pdev->dev, "sram-size not found\n");
> > >> > >- dev->sram_size = 0;
> > >> > >- }
> > >> > >-
> > >> >
> > >> > Required SRAM size is different from each wave5 product.
> > >> > And, SoC vendor also can configure the different SRAM size
> > >> > depend on target SoC specification even they use the same wave5
> > >product.
> > >> >
> > >>
> > >> One can limit iomem address range in SRAM node. Here is the example of
> > >> how I setup Wave515 with SRAM:
> > >>
> > >> sram@2000000 {
> > >> compatible = "mmio-sram";
> > >> reg = <0x0 0x2000000 0x0 0x80000>;
> > >> #address-cells = <1>;
> > >> #size-cells = <1>;
> > >> ranges = <0x0 0x0 0x2000000 0x80000>;
> > >>
> > >> wave515_vpu_sram: wave515-vpu-sram@0 {
> > >> reg = <0x0 0x80000>;
> > >> pool;
> > >> };
> > >> };
> > >>
> > >> wave515@410000 {
> > >> compatible = "cnm,wave515";
> > >> reg = <0x0 0x410000 0x0 0x10000>;
> > >> clocks = <&clk_ref1>;
> > >> clock-names = "videc";
> > >> interrupt-parent = <&wave515_intc>;
> > >> interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
> > >> resets = <&wave515_reset 0>,
> > >> <&wave515_reset 4>,
> > >> <&wave515_reset 8>,
> > >> <&wave515_reset 12>;
> > >> sram = <&wave515_vpu_sram>;
> > >> };
> > >>
> > >> gen_pool_size() returns size of wave515_vpu_sram, no need for extra
> > >> "sram-size" property.
> >
> > Thanks for sharing the example.
> > I agree that the "sram-size" property is not needed.
> >
> > >
> > >"sram-size" property does need to be removed, as this was the consensus
> > >gathered from my patch[0]. However, I think your method is still taking
> >
> > I missed the previous consensus for the sram-size property.
> > Thanks for letting me know.
> >
> > >a more static approach. One of the recommendations in my thread[1] was
> > >making a list of known SRAM sizes given typical resolutions and
> > >iterating through until a valid allocation is done. I don't think this
> > >is the correct approach either based on Nas's comment that each Wave5
> > >has different SRAM size requirement. It would clutter up the file too
> > >much if each wave5 product had its own SRAM size mapping.
> > >
> > >Could another approach be to change Wave5 dts node to have property set
> > >as "sram = <&sram>;" in your example, then driver calls
> > >gen_pool_availble to get size remaining? From there, a check could be
> > >put in place to make sure an unnecessary amount is not being allocated.
> >
> > Ivan's approach looks good to me.
> > It is similar to your first patch, which adds the sram-size property
> > to configure different SRAM sizes for each device.
> > And, Driver won't know unnecessary amount is allocated before parsing
> > bitstream header.

I am aware of this, I should have been more specific. By unnecessary
amount, I meant something greater than the max use case for device.
Could we populate some macros that have max SRAM required for 4K stream?
There's never a need to allocate more SRAM than that for a particular
instance. If the amount available is less than that, then fine. But it
should never be greater.

> >
>
> To sum up, there is 2 favourable approaches:
>
> 1) to have dedicated SRAM partition for Wave5 VPU as suggested in this
> patchset. In this approach SoC vendor can setup address range of said
> partition to their needs, but other devices won't be able to use SRAM
> memory reserved for Wave5 VPU, unless other device's SRAM memory needs
> don't exceed the size of reserved partition.
>
> Therefore it is sensible to substitute alloc/free on open/close with
> alloc/free on open/close.

Not sure what you mean here. Were you trying to refer to your
substitution of alloc/free from open/close to probe/remove?

If that is what you mean, and the decision is a specific carveout for
SRAM, then I don't see a point in having allocation in open and close
either since Wave5 would be the only IP that could use the pool.

>
> Advantages: driver code is simpler, no need for platform-specific defines
> or DT properties. Wave5 is guaranteed to get SRAM memory.
>
> Disadvantage: waste of SRAM memory while VPU is not in use
>
> 2) allocate all available SRAM memory on open (free on close) from the
> common SRAM pool, but limit maximum amount with SoC-specific define.
>

Why does it have to be on SoC specific define? Max size required for
SRAM in a 4K case is known. A call can be made to get the size of the
pool and from there the driver can take a portion. Just make sure that
portion is less than known value for 4K.

> Advantage: less memory waste
>
> Disadvantages: still need SoC-specific define or DT property, not much
> differ from current state. Wave5 is not guaranteed to get SRAM memory.
>

Wave5 does not need SRAM to function properly so it doesn't have to be
guaranteed.

> Which of these approaches would be preferable?
>
> > >
> > >
> > >[0]:
> > >https://lore.kernel.org/lkml/99bf4d6d988d426492fffc8de9015751c323bd8a.cam
> > >el@xxxxxxxxxxxx/
> > >[1]: https://lore.kernel.org/lkml/9c5b7b2c-8a66-4173-dfe9-
> > >5724ec5f733d@xxxxxx/
> > >
> > >Thanks,
> > >Brandon
> > >>
> > >> > Thanks.
> > >> > Nas.
> > >> >
> > >> > > dev->sram_pool = of_gen_pool_get(pdev->dev.of_node, "sram", 0);
> > >> > > if (!dev->sram_pool)
> > >> > > dev_warn(&pdev->dev, "sram node not found\n");
> > >> > >+ else
> > >> > >+ wave5_vdi_allocate_sram(dev);
> > >> > >
> > >> > > dev->product_code = wave5_vdi_read_register(dev,
> > >> > >VPU_PRODUCT_CODE_REGISTER);
> > >> > > ret = wave5_vdi_init(&pdev->dev);
> > >> > >@@ -259,6 +254,8 @@ static int wave5_vpu_probe(struct
> > >platform_device
> > >> > >*pdev)
> > >> > > err_clk_dis:
> > >> > > clk_bulk_disable_unprepare(dev->num_clks, dev->clks);
> > >> > >
> > >> > >+ wave5_vdi_free_sram(dev);
> > >> > >+
> > >> > > return ret;
> > >> > > }
> > >> > >
> > >> > >@@ -275,6 +272,7 @@ static void wave5_vpu_remove(struct
> > >platform_device
> > >> > >*pdev)
> > >> > > v4l2_device_unregister(&dev->v4l2_dev);
> > >> > > wave5_vdi_release(&pdev->dev);
> > >> > > ida_destroy(&dev->inst_ida);
> > >> > >+ wave5_vdi_free_sram(dev);
> > >> > > }
> > >> > >
> > >> > > static const struct wave5_match_data ti_wave521c_data = {
> > >> > >diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > >> > >b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > >> > >index fa62a85080b5..8d88381ac55e 100644
> > >> > >--- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > >> > >+++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > >> > >@@ -749,7 +749,6 @@ struct vpu_device {
> > >> > > struct vpu_attr attr;
> > >> > > struct vpu_buf common_mem;
> > >> > > u32 last_performance_cycles;
> > >> > >- u32 sram_size;
> > >> > > struct gen_pool *sram_pool;
> > >> > > struct vpu_buf sram_buf;
> > >> > > void __iomem *vdb_register;
> > >> > >--
> > >> > >2.44.0
> > >> >
> > >>