[PATCH 4/8] media: iris: Introduce buffer size calculations for vpu4
From: Vikash Garodia
Date: Wed Sep 24 2025 - 19:17:20 EST
Introduces vp4 buffer size calculation for both encoder and decoder.
Reuse the buffer size calculation which are common, while adding the
vpu4 ones separately.
Co-developed-by: Vishnu Reddy <quic_bvisredd@xxxxxxxxxxx>
Signed-off-by: Vishnu Reddy <quic_bvisredd@xxxxxxxxxxx>
Signed-off-by: Vikash Garodia <vikash.garodia@xxxxxxxxxxxxxxxx>
---
drivers/media/platform/qcom/iris/iris_vpu_buffer.c | 289 +++++++++++++++++++++
drivers/media/platform/qcom/iris/iris_vpu_buffer.h | 5 +-
2 files changed, 293 insertions(+), 1 deletion(-)
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
index 4463be05ce165adef6b152eb0c155d2e6a7b3c36..a08925e941b34d6df86b19ca52691327c020c811 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
+++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
@@ -1408,6 +1408,251 @@ static u32 iris_vpu_enc_vpss_size(struct iris_inst *inst)
return hfi_buffer_vpss_enc(width, height, ds_enable, 0, 0);
}
+static u32 hfi_vpu4x_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
+{
+ u32 max_value = max(frame_width, frame_height);
+ u32 size_vp9d_qp = DIV_ROUND_UP(frame_width, 64) * DIV_ROUND_UP(frame_height, 64) * 128;
+ u32 size_dpb_obp = (ALIGN(max_value, 64) * 192) + (256 * 6);
+ u32 size_vp9d_fe_left_lb = ALIGN(max_value, 64) * 492;
+ u32 size_vp9d_top_lb = (ALIGN(max_value, 64) * 190) + 256;
+ u32 size_vp9d_se_left_lb = ALIGN(max_value, 64);
+
+ return size_vp9d_qp + (size_dpb_obp * num_vpp_pipes) + size_vp9d_fe_left_lb +
+ size_vp9d_top_lb + (size_vp9d_se_left_lb * num_vpp_pipes);
+}
+
+static u32 hfi_vpu4x_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min,
+ bool is_opb, u32 num_vpp_pipes)
+{
+ u32 lb_size = hfi_vpu4x_vp9d_lb_size(frame_width, frame_height, num_vpp_pipes);
+ u32 dpb_obp_size = 0;
+
+ if (is_opb)
+ dpb_obp_size = ((ALIGN(max(frame_width, frame_height), 64) * 192) + (256 * 6)) *
+ num_vpp_pipes;
+
+ return lb_size + dpb_obp_size;
+}
+
+static u32 iris_vpu4x_dec_line_size(struct iris_inst *inst)
+{
+ u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;
+ u32 out_min_count = inst->buffers[BUF_OUTPUT].min_count;
+ struct v4l2_format *f = inst->fmt_src;
+ u32 height = f->fmt.pix_mp.height;
+ u32 width = f->fmt.pix_mp.width;
+ bool is_opb = false;
+
+ if (iris_split_mode_enabled(inst))
+ is_opb = true;
+
+ if (inst->codec == V4L2_PIX_FMT_H264)
+ return hfi_buffer_line_h264d(width, height, is_opb, num_vpp_pipes);
+ else if (inst->codec == V4L2_PIX_FMT_HEVC)
+ return hfi_buffer_line_h265d(width, height, is_opb, num_vpp_pipes);
+ else if (inst->codec == V4L2_PIX_FMT_VP9)
+ return hfi_vpu4x_buffer_line_vp9d(width, height, out_min_count, is_opb,
+ num_vpp_pipes);
+
+ return 0;
+}
+
+static u32 hfi_buffer4x_persist_h265d(u32 rpu_enabled)
+{
+ return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 + H265_NUM_FRM_INFO *
+ H265_DISPLAY_BUF_SIZE + (H265_NUM_TILE * sizeof(u32)) + (NUM_HW_PIC_BUF *
+ (SIZE_SEI_USERDATA + SIZE_H265D_ARP + SIZE_THREE_DIMENSION_USERDATA)) +
+ rpu_enabled * NUM_HW_PIC_BUF * SIZE_DOLBY_RPU_METADATA), DMA_ALIGNMENT);
+}
+
+static u32 iris_vpu4x_dec_persist_size(struct iris_inst *inst)
+{
+ if (inst->codec == V4L2_PIX_FMT_H264)
+ return hfi_buffer_persist_h264d();
+ else if (inst->codec == V4L2_PIX_FMT_HEVC)
+ return hfi_buffer4x_persist_h265d(0);
+ else if (inst->codec == V4L2_PIX_FMT_VP9)
+ return hfi_buffer_persist_vp9d();
+
+ return 0;
+}
+
+static u32 size_se_lb(u32 standard, u32 num_vpp_pipes_enc,
+ u32 frame_width_coded, u32 frame_height_coded)
+{
+ u32 se_tlb_size = ALIGN(frame_width_coded, DMA_ALIGNMENT);
+ u32 se_llb_size = (standard == HFI_CODEC_ENCODE_HEVC) ?
+ ((frame_height_coded + 32 - 1) / 32) * 4 * 16 :
+ ((frame_height_coded + 16 - 1) / 16) * 5 * 16;
+
+ se_llb_size = ALIGN(se_llb_size, 32);
+
+ if (num_vpp_pipes_enc > 1)
+ se_llb_size = ALIGN(se_llb_size + 512, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+ return ALIGN(se_tlb_size + se_llb_size, DMA_ALIGNMENT);
+}
+
+static u32 size_te_lb(bool is_ten_bit, u32 num_vpp_pipes_enc, u32 width_in_lcus,
+ u32 frame_height_coded, u32 frame_width_coded)
+{
+ u32 te_llb_col_rc_size = ALIGN(32 * width_in_lcus / num_vpp_pipes_enc,
+ DMA_ALIGNMENT) * num_vpp_pipes_enc;
+ u32 te_tlb_recon_data_size = ALIGN((is_ten_bit ? 3 : 2) * frame_width_coded,
+ DMA_ALIGNMENT);
+ u32 te_llb_recon_data_size = ((1 + is_ten_bit) * 3 * frame_height_coded +
+ num_vpp_pipes_enc - 1) / num_vpp_pipes_enc;
+ te_llb_recon_data_size = ALIGN(te_llb_recon_data_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+ return ALIGN(te_llb_recon_data_size + te_llb_col_rc_size + te_tlb_recon_data_size,
+ DMA_ALIGNMENT);
+}
+
+static u32 size_fe_lb(bool is_ten_bit, u32 standard, u32 num_vpp_pipes_enc,
+ u32 frame_height_coded, u32 frame_width_coded)
+{
+ u32 log2_lcu_size, num_cu_in_height_pipe, num_cu_in_width,
+ fb_llb_db_ctrl_size, fb_llb_db_luma_size, fb_llb_db_chroma_size,
+ fb_tlb_db_ctrl_size, fb_tlb_db_luma_size, fb_tlb_db_chroma_size,
+ fb_llb_sao_ctrl_size, fb_llb_sao_luma_size,
+ fb_llb_sao_chroma_size, fb_tlb_sao_ctrl_size,
+ fb_tlb_sao_luma_size, fb_tlb_sao_chroma_size,
+ fb_lb_top_sdc_size, fb_lb_se_ctrl_size, fe_tlb_size,
+ size_per_lcu;
+
+ log2_lcu_size = (standard == HFI_CODEC_ENCODE_HEVC) ? 5 : 4;
+ num_cu_in_height_pipe = ((frame_height_coded >> log2_lcu_size) + num_vpp_pipes_enc - 1) /
+ num_vpp_pipes_enc;
+ num_cu_in_width = frame_width_coded >> log2_lcu_size;
+
+ size_per_lcu = 2;
+ fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_llb_db_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+ fb_llb_db_ctrl_size = ALIGN(fb_llb_db_ctrl_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+ size_per_lcu = (1 << (log2_lcu_size - 3));
+ fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_llb_db_luma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+ fb_llb_db_luma_size = ALIGN(fb_llb_db_luma_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+ size_per_lcu = ((1 << (log2_lcu_size - 4)) * 2);
+ fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_llb_db_chroma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+ fb_llb_db_chroma_size = ALIGN(fb_llb_db_chroma_size, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+ size_per_lcu = 1;
+ fe_tlb_size = 1 ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_tlb_db_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+ size_per_lcu = ((1 << (log2_lcu_size - 3)) + 1);
+ fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_tlb_db_luma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+ size_per_lcu = (2 * ((1 << (log2_lcu_size - 4)) + 1));
+ fe_tlb_size = is_ten_bit ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_tlb_db_chroma_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+ size_per_lcu = 1;
+ fe_tlb_size = 1 ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_llb_sao_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_height_pipe;
+ fb_llb_sao_ctrl_size = fb_llb_sao_ctrl_size * num_vpp_pipes_enc;
+
+ fb_llb_sao_luma_size = 256 * num_vpp_pipes_enc;
+ fb_llb_sao_chroma_size = 256 * num_vpp_pipes_enc;
+
+ size_per_lcu = 1;
+ fe_tlb_size = 1 ? (128 * (size_per_lcu + 1)) : (size_per_lcu * 64);
+ fb_tlb_sao_ctrl_size = ALIGN(fe_tlb_size, DMA_ALIGNMENT) * num_cu_in_width;
+
+ fb_tlb_sao_luma_size = 256;
+ fb_tlb_sao_chroma_size = 256;
+ fb_lb_top_sdc_size = ALIGN((16 * (frame_width_coded >> 5)), DMA_ALIGNMENT);
+
+ fb_lb_se_ctrl_size = ALIGN((2020 * (frame_width_coded >> 5)), DMA_ALIGNMENT);
+
+ return fb_llb_db_ctrl_size + fb_llb_db_luma_size + fb_llb_db_chroma_size +
+ fb_tlb_db_ctrl_size + fb_tlb_db_luma_size + fb_tlb_db_chroma_size +
+ fb_llb_sao_ctrl_size + fb_llb_sao_luma_size + fb_llb_sao_chroma_size +
+ fb_tlb_sao_ctrl_size + fb_tlb_sao_luma_size + fb_tlb_sao_chroma_size +
+ fb_lb_top_sdc_size + fb_lb_se_ctrl_size;
+}
+
+static u32 size_md_lb(u32 standard, u32 frame_width_coded,
+ u32 frame_height_coded, u32 num_vpp_pipes_enc)
+{
+ u32 md_tlb_size = ALIGN(frame_width_coded, DMA_ALIGNMENT);
+ u32 md_llb_size = (standard == HFI_CODEC_ENCODE_HEVC) ?
+ ((frame_height_coded + 32 - 1) / 32) * 4 * 16 :
+ ((frame_height_coded + 16 - 1) / 16) * 5 * 16;
+
+ md_llb_size = ALIGN(md_llb_size, 32);
+
+ if (num_vpp_pipes_enc > 1)
+ md_llb_size = ALIGN(md_llb_size + 512, DMA_ALIGNMENT) * num_vpp_pipes_enc;
+
+ md_llb_size = ALIGN(md_llb_size, DMA_ALIGNMENT);
+
+ return ALIGN(md_tlb_size + md_llb_size, DMA_ALIGNMENT);
+}
+
+static u32 size_dma_opb_lb(u32 num_vpp_pipes_enc, u32 frame_width_coded,
+ u32 frame_height_coded)
+{
+ u32 dma_opb_wr_tlb_y_size = ((frame_width_coded + 15) >> 4) << 7;
+ u32 dma_opb_wr_tlb_uv_size = ((frame_width_coded + 15) >> 4) << 7;
+ u32 dma_opb_wr2_tlb_y_size = ALIGN((2 * 6 * 64 * frame_height_coded / 8), DMA_ALIGNMENT) *
+ num_vpp_pipes_enc;
+ u32 dma_opb_wr2_tlb_uv_size = ALIGN((2 * 6 * 64 * frame_height_coded / 8), DMA_ALIGNMENT) *
+ num_vpp_pipes_enc;
+
+ dma_opb_wr2_tlb_y_size = max(dma_opb_wr2_tlb_y_size, dma_opb_wr_tlb_y_size << 1);
+ dma_opb_wr2_tlb_uv_size = max(dma_opb_wr2_tlb_uv_size, dma_opb_wr_tlb_uv_size << 1);
+
+ return ALIGN(dma_opb_wr_tlb_y_size + dma_opb_wr_tlb_uv_size + dma_opb_wr2_tlb_y_size +
+ dma_opb_wr2_tlb_uv_size, DMA_ALIGNMENT);
+}
+
+static u32 hfi_vpu4x_buffer_line_enc(u32 frame_width, u32 frame_height,
+ bool is_ten_bit, u32 num_vpp_pipes_enc,
+ u32 lcu_size, u32 standard)
+{
+ u32 width_in_lcus = (frame_width + lcu_size - 1) / lcu_size;
+ u32 height_in_lcus = (frame_height + lcu_size - 1) / lcu_size;
+ u32 frame_width_coded = width_in_lcus * lcu_size;
+ u32 frame_height_coded = height_in_lcus * lcu_size;
+
+ u32 se_lb_size = size_se_lb(standard, num_vpp_pipes_enc, frame_width_coded,
+ frame_height_coded);
+ u32 te_lb_size = size_te_lb(is_ten_bit, num_vpp_pipes_enc, width_in_lcus,
+ frame_height_coded, frame_width_coded);
+ u32 fe_lb_size = size_fe_lb(is_ten_bit, standard, num_vpp_pipes_enc, frame_height_coded,
+ frame_width_coded);
+ u32 md_lb_size = size_md_lb(standard, frame_width_coded, frame_height_coded,
+ num_vpp_pipes_enc);
+ u32 dma_opb_lb_size = size_dma_opb_lb(num_vpp_pipes_enc, frame_width_coded,
+ frame_height_coded);
+ u32 dse_lb_size = ALIGN((256 + (16 * (frame_width_coded >> 4))), DMA_ALIGNMENT);
+ u32 size_vpss_lb_enc = size_vpss_line_buf_vpu33(num_vpp_pipes_enc, frame_width_coded,
+ frame_height_coded);
+ u32 size = se_lb_size + te_lb_size + fe_lb_size + md_lb_size + dma_opb_lb_size +
+ dse_lb_size + size_vpss_lb_enc;
+ size = size << 1;
+
+ return size;
+}
+
+static u32 iris_vpu4x_enc_line_size(struct iris_inst *inst)
+{
+ u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;
+ u32 lcu_size = inst->codec == V4L2_PIX_FMT_HEVC ? 32 : 16;
+ struct v4l2_format *f = inst->fmt_dst;
+ u32 height = f->fmt.pix_mp.height;
+ u32 width = f->fmt.pix_mp.width;
+
+ return hfi_vpu4x_buffer_line_enc(width, height, 0, num_vpp_pipes,
+ lcu_size, inst->codec);
+}
+
static int output_min_count(struct iris_inst *inst)
{
int output_min_count = 4;
@@ -1503,6 +1748,50 @@ u32 iris_vpu33_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_typ
return size;
}
+u32 iris_vpu4x_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type)
+{
+ const struct iris_vpu_buf_type_handle *buf_type_handle_arr = NULL;
+ u32 size = 0, buf_type_handle_size = 0, i;
+
+ static const struct iris_vpu_buf_type_handle dec_internal_buf_type_handle[] = {
+ {BUF_BIN, iris_vpu_dec_bin_size },
+ {BUF_COMV, iris_vpu_dec_comv_size },
+ {BUF_NON_COMV, iris_vpu_dec_non_comv_size },
+ {BUF_LINE, iris_vpu4x_dec_line_size },
+ {BUF_PERSIST, iris_vpu4x_dec_persist_size },
+ {BUF_DPB, iris_vpu_dec_dpb_size },
+ {BUF_SCRATCH_1, iris_vpu_dec_scratch1_size },
+ };
+
+ static const struct iris_vpu_buf_type_handle enc_internal_buf_type_handle[] = {
+ {BUF_BIN, iris_vpu_enc_bin_size },
+ {BUF_COMV, iris_vpu_enc_comv_size },
+ {BUF_NON_COMV, iris_vpu_enc_non_comv_size },
+ {BUF_LINE, iris_vpu4x_enc_line_size },
+ {BUF_ARP, iris_vpu_enc_arp_size },
+ {BUF_VPSS, iris_vpu_enc_vpss_size },
+ {BUF_SCRATCH_1, iris_vpu_enc_scratch1_size },
+ {BUF_SCRATCH_2, iris_vpu_enc_scratch2_size },
+ };
+
+ if (inst->domain == DECODER) {
+ buf_type_handle_size = ARRAY_SIZE(dec_internal_buf_type_handle);
+ buf_type_handle_arr = dec_internal_buf_type_handle;
+ } else if (inst->domain == ENCODER) {
+ buf_type_handle_size = ARRAY_SIZE(enc_internal_buf_type_handle);
+ buf_type_handle_arr = enc_internal_buf_type_handle;
+ }
+
+ for (i = 0; i < buf_type_handle_size; i++) {
+ if (buf_type_handle_arr[i].type == buffer_type) {
+ size = buf_type_handle_arr[i].handle(inst);
+ break;
+ }
+ }
+
+ return size;
+}
+
static u32 internal_buffer_count(struct iris_inst *inst,
enum iris_buffer_type buffer_type)
{
diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
index 04f0b7400a1e4e1d274d690a2761b9e57778e8b7..fb544e8b3bf6b9ce86920a18537fd0a2c21cdc31 100644
--- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
+++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.h
@@ -46,7 +46,7 @@ struct iris_inst;
#define VP9_NUM_FRAME_INFO_BUF 32
#define VP9_NUM_PROBABILITY_TABLE_BUF (VP9_NUM_FRAME_INFO_BUF + 4)
#define VP9_PROB_TABLE_SIZE (3840)
-#define VP9_FRAME_INFO_BUF_SIZE (6144)
+#define VP9_FRAME_INFO_BUF_SIZE (6400)
#define BUFFER_ALIGNMENT_32_BYTES 32
#define CCE_TILE_OFFSET_SIZE ALIGN(32 * 4 * 4, BUFFER_ALIGNMENT_32_BYTES)
#define MAX_SUPERFRAME_HEADER_LEN (34)
@@ -66,6 +66,8 @@ struct iris_inst;
#define H265_CABAC_HDR_RATIO_HD_TOT 2
#define H265_CABAC_RES_RATIO_HD_TOT 2
#define SIZE_H265D_VPP_CMD_PER_BUF (256)
+#define SIZE_THREE_DIMENSION_USERDATA 768
+#define SIZE_H265D_ARP 9728
#define VPX_DECODER_FRAME_CONCURENCY_LVL (2)
#define VPX_DECODER_FRAME_BIN_HDR_BUDGET 1
@@ -148,6 +150,7 @@ static inline u32 size_h264d_qp(u32 frame_width, u32 frame_height)
u32 iris_vpu_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type);
u32 iris_vpu33_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type);
+u32 iris_vpu4x_buf_size(struct iris_inst *inst, enum iris_buffer_type buffer_type);
int iris_vpu_buf_count(struct iris_inst *inst, enum iris_buffer_type buffer_type);
#endif
--
2.34.1