RE: [PATCH v2 6/7] media: chips-media: wave5: Reduce high CPU load
From: jackson . lee
Date: Tue May 27 2025 - 01:06:05 EST
Hi Nicolas
> -----Original Message-----
> From: Nicolas Dufresne <nicolas.dufresne@xxxxxxxxxxxxx>
> Sent: Saturday, May 24, 2025 2:43 AM
> To: jackson.lee <jackson.lee@xxxxxxxxxxxxxxx>; mchehab@xxxxxxxxxx;
> hverkuil-cisco@xxxxxxxxx; sebastian.fricke@xxxxxxxxxxxxx;
> bob.beckett@xxxxxxxxxxxxx; dafna.hirschfeld@xxxxxxxxxxxxx
> Cc: linux-media@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; lafley.kim
> <lafley.kim@xxxxxxxxxxxxxxx>; b-brnich@xxxxxx; hverkuil@xxxxxxxxx; Nas
> Chung <nas.chung@xxxxxxxxxxxxxxx>
> Subject: Re: [PATCH v2 6/7] media: chips-media: wave5: Reduce high CPU
> load
>
> Hi,
>
> Le jeudi 22 mai 2025 à 16:26 +0900, Jackson.lee a écrit :
> > From: Jackson Lee <jackson.lee@xxxxxxxxxxxxxxx>
> >
> > Since applying changes for performance improvement of decoder, there
> > was a problem related to high CPU load.
> > CPU load was more than 4 times when comparing CPU load.
> > The root cause was the device_run was called many times even if there
> > was no bitstream which should be queued.
>
> You should squash this.
>
> Nicolas
>
I will also squash this to the performance patch.
Thanks
Jackson
> >
> > Signed-off-by: Jackson Lee <jackson.lee@xxxxxxxxxxxxxxx>
> > Signed-off-by: Nas Chung <nas.chung@xxxxxxxxxxxxxxx>
> > ---
> > .../media/platform/chips-media/wave5/wave5-vpu-dec.c | 12
> > +++++++++---
> > .../media/platform/chips-media/wave5/wave5-vpuapi.h | 1 +
> > 2 files changed, 10 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
> > b/drivers/media/platform/chips- media/wave5/wave5-vpu-dec.c index
> > 719c5527eb7f..421a9e1a6f15 100644
> > --- a/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
> > +++ b/drivers/media/platform/chips-media/wave5/wave5-vpu-dec.c
> > @@ -1280,10 +1280,13 @@ static void wave5_vpu_dec_buf_queue(struct
> vb2_buffer *vb)
> > __func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf,
> 0),
> > vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf-
> >vb2_buf,
> > 2));
> >
> > - if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > + if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
> > + if (inst->empty_queue)
> > + inst->empty_queue = false;
> > wave5_vpu_dec_buf_queue_src(vb);
> > - else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
> > + } else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
> > wave5_vpu_dec_buf_queue_dst(vb);
> > + }
> > }
> >
> > static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance
> > *inst) @@ -1474,6 +1477,7 @@ static void
> > wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
> >
> > dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type);
> > pm_runtime_resume_and_get(inst->dev->dev);
> > + inst->empty_queue = false;
> >
> > while (check_cmd) {
> > struct queue_status_info q_status;
> > @@ -1592,6 +1596,7 @@ static void wave5_vpu_dec_device_run(void *priv)
> > inst->queuing_num == 0 &&
> > inst->state == VPU_INST_STATE_PIC_RUN) {
> > dev_dbg(inst->dev->dev, "%s: no bitstream for feeding,
> so skip ",
> > __func__);
> > + inst->empty_queue = true;
> > goto finish_job_and_return;
> > }
> > }
> > @@ -1737,7 +1742,8 @@ static int wave5_vpu_dec_job_ready(void *priv)
> > "No capture buffer ready to decode!\n");
> > break;
> > } else if (!wave5_is_draining_or_eos(inst) &&
> > - !v4l2_m2m_num_src_bufs_ready(m2m_ctx)) {
> > + (!v4l2_m2m_num_src_bufs_ready(m2m_ctx) ||
> > + inst->empty_queue)) {
> > dev_dbg(inst->dev->dev,
> > "No bitstream data to decode!\n");
> > break;
> > diff --git a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > b/drivers/media/platform/chips-media/wave5/wave5-
> > vpuapi.h
> > index fd0aef0bac4e..f2596af08cdf 100644
> > --- a/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > +++ b/drivers/media/platform/chips-media/wave5/wave5-vpuapi.h
> > @@ -821,6 +821,7 @@ struct vpu_instance {
> > bool retry; /* retry to feed bitstream if failure reason is
> WAVE5_SYSERR_QUEUEING_FAIL*/
> > int queuing_num; /* check if there is input buffer or not */
> > struct mutex feed_lock; /* lock for feeding bitstream buffers */
> > + bool empty_queue;
> > struct vpu_buf bitstream_vbuf;
> > dma_addr_t last_rd_ptr;
> > size_t remaining_consumed_bytes;