Re: [PATCH, v2] media: mediatek: vcodec: Add to support VP9 inner racing mode
From: yunfei.dong@xxxxxxxxxxxx
Date: Wed Jul 27 2022 - 04:31:49 EST
Hi mingjia,
Thanks for your patch.
Reviewed-by: Yunfei Dong <yunfei.dong@xxxxxxxxxxxx>
On Wed, 2022-07-27 at 14:13 +0800, Mingjia Zhang wrote:
> In order to reduce decoder latency, enable VP9 inner racing mode.
> Send lat trans buffer information to core when trigger lat to work,
> need not to wait until lat decode done.
>
> Signed-off-by: mingjia zhang <mingjia.zhang@xxxxxxxxxxxx>
> ---
> 1. CTS/GTS test pass
> 2. Fluster result: Ran 240/303 tests successfully
> ---
> .../vcodec/vdec/vdec_vp9_req_lat_if.c | 64 ++++++++++++-----
> --
> 1 file changed, 40 insertions(+), 24 deletions(-)
>
> diff --git
> a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> index fb1c36a3592d..92b47f0fdf40 100644
> ---
> a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> +++
> b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> @@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
> * @frame_ctx: 4 frame context according to VP9 Spec
> * @frame_ctx_helper: 4 frame context according to newest
> kernel spec
> * @dirty: state of each frame context
> + * @local_vsi: local instance vsi information
> * @init_vsi: vsi used for initialized VP9 instance
> * @vsi: vsi used for decoding/flush ...
> * @core_vsi: vsi used for Core stage
> @@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
> struct v4l2_vp9_frame_context frame_ctx_helper;
> unsigned char dirty[4];
>
> + struct vdec_vp9_slice_vsi local_vsi;
> +
> /* MicroP vsi */
> union {
> struct vdec_vp9_slice_init_vsi *init_vsi;
> @@ -1616,16 +1619,10 @@ static int
> vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
> }
>
> static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance
> *instance,
> - struct vdec_lat_buf *lat_buf,
> - struct vdec_vp9_slice_pfc *pfc)
> + struct vdec_vp9_slice_vsi *vsi)
> {
> - struct vdec_vp9_slice_vsi *vsi;
> -
> - vsi = &pfc->vsi;
> - memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
> -
> mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
> - pfc->seq, vsi->state.crc[0],
> + (instance->seq - 1), vsi->state.crc[0],
> (unsigned long)vsi->trans.dma_addr,
> (unsigned long)vsi->trans.dma_addr_end);
>
> @@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void
> *h_vdec, struct mtk_vcodec_mem *bs,
> return ret;
> }
>
> + if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability)) {
> + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> + memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
> + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx,
> lat_buf);
> + vsi = &instance->local_vsi;
> + }
> +
> if (instance->irq) {
> ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_
> RECEIVED,
> WAIT_INTR_TIMEOUT_MS
> , MTK_VDEC_LAT0);
> @@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void
> *h_vdec, struct mtk_vcodec_mem *bs,
> }
>
> vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> - ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
> + ret = vdec_vp9_slice_update_lat(instance, vsi);
>
> - /* LAT trans full, no more UBE or decode timeout */
> - if (ret) {
> - mtk_vcodec_err(instance, "VP9 decode error: %d\n",
> ret);
> - return ret;
> - }
> + if (!IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
> + /* LAT trans full, no more UBE or decode timeout */
> + if (ret) {
> + mtk_vcodec_err(instance, "frame[%d] decode
> error: %d\n",
> + ret, (instance->seq - 1));
> + return ret;
> + }
>
> - mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
> - (unsigned long)pfc->vsi.trans.dma_addr,
> - (unsigned long)pfc->vsi.trans.dma_addr_end);
>
> - vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
> - vsi->trans.dma_addr_end +
> - ctx-
> >msg_queue.wdma_addr.dma_addr);
> - vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> + vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
> + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi-
> >trans.dma_addr_end);
> + if (!IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
> + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx,
> lat_buf);
> +
> + mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube
> start addr(0x%lx)\n",
> + (unsigned long)vsi->trans.dma_addr_end,
> + (unsigned long)ctx-
> >msg_queue.wdma_addr.dma_addr);
>
> return 0;
> }
> @@ -2193,10 +2200,14 @@ static int vdec_vp9_slice_core_decode(struct
> vdec_lat_buf *lat_buf)
> goto err;
> }
>
> - pfc->vsi.trans.dma_addr_end += ctx-
> >msg_queue.wdma_addr.dma_addr;
> mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
> (unsigned long)pfc->vsi.trans.dma_addr_end);
> - vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
> >vsi.trans.dma_addr_end);
> +
> + if (IS_VDEC_INNER_RACING(instance->ctx->dev->dec_capability))
> + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
> >vsi.trans.dma_addr);
> + else
> + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
> >vsi.trans.dma_addr_end);
> +
> ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf-
> >src_buf_req);
>
> return 0;
> @@ -2204,7 +2215,12 @@ static int vdec_vp9_slice_core_decode(struct
> vdec_lat_buf *lat_buf)
> err:
> if (ctx && pfc) {
> /* always update read pointer */
> - vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc-
> >vsi.trans.dma_addr_end);
> + if (IS_VDEC_INNER_RACING(instance->ctx->dev-
> >dec_capability))
> + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> + pfc-
> >vsi.trans.dma_addr);
> + else
> + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> + pfc-
> >vsi.trans.dma_addr_end);
>
> if (fb)
> ctx->dev->vdec_pdata->cap_to_disp(ctx, 1,
> lat_buf->src_buf_req);