[PATCH v4 4/5] media: meson: vdec: add VP9 input support

From: Neil Armstrong
Date: Thu Feb 06 2020 - 03:42:19 EST


From: Maxime Jourdan <mjourdan@xxxxxxxxxxxx>

Amlogic VP9 decoder requires an additional 16-byte payload before every
frame header.

The source buffer is updated in-place, then given to the Parser FIFO DMA.

The FIFO DMA copies the blocks into the 16MiB parser ring buffer, then parses
and copies the slice into the decoder "workspace".

Signed-off-by: Maxime Jourdan <mjourdan@xxxxxxxxxxxx>
Signed-off-by: Neil Armstrong <narmstrong@xxxxxxxxxxxx>
---
drivers/staging/media/meson/vdec/esparser.c | 150 +++++++++++++++++++-
1 file changed, 146 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c
index adc5c1e81a4c..4a9aad3fafeb 100644
--- a/drivers/staging/media/meson/vdec/esparser.c
+++ b/drivers/staging/media/meson/vdec/esparser.c
@@ -52,6 +52,7 @@
#define PARSER_VIDEO_HOLE 0x90

#define SEARCH_PATTERN_LEN 512
+#define VP9_HEADER_SIZE 16

static DECLARE_WAIT_QUEUE_HEAD(wq);
static int search_done;
@@ -74,14 +75,121 @@ static irqreturn_t esparser_isr(int irq, void *dev)
return IRQ_HANDLED;
}

+/**
+ * VP9 frame headers need to be appended by a 16-byte long
+ * Amlogic custom header
+ */
+static int vp9_update_header(struct amvdec_core *core, struct vb2_buffer *buf)
+{
+ u8 *dp;
+ u8 marker;
+ int dsize;
+ int num_frames, cur_frame;
+ int cur_mag, mag, mag_ptr;
+ int frame_size[8], tot_frame_size[8];
+ int total_datasize = 0;
+ int new_frame_size;
+ unsigned char *old_header = NULL;
+
+ dp = (uint8_t *)vb2_plane_vaddr(buf, 0);
+ dsize = vb2_get_plane_payload(buf, 0);
+
+ if (dsize == vb2_plane_size(buf, 0)) {
+ dev_warn(core->dev, "%s: unable to update header\n", __func__);
+ return 0;
+ }
+
+ marker = dp[dsize - 1];
+ if ((marker & 0xe0) == 0xc0) {
+ num_frames = (marker & 0x7) + 1;
+ mag = ((marker >> 3) & 0x3) + 1;
+ mag_ptr = dsize - mag * num_frames - 2;
+ if (dp[mag_ptr] != marker)
+ return 0;
+
+ mag_ptr++;
+ for (cur_frame = 0; cur_frame < num_frames; cur_frame++) {
+ frame_size[cur_frame] = 0;
+ for (cur_mag = 0; cur_mag < mag; cur_mag++) {
+ frame_size[cur_frame] |=
+ (dp[mag_ptr] << (cur_mag * 8));
+ mag_ptr++;
+ }
+ if (cur_frame == 0)
+ tot_frame_size[cur_frame] =
+ frame_size[cur_frame];
+ else
+ tot_frame_size[cur_frame] =
+ tot_frame_size[cur_frame - 1] +
+ frame_size[cur_frame];
+ total_datasize += frame_size[cur_frame];
+ }
+ } else {
+ num_frames = 1;
+ frame_size[0] = dsize;
+ tot_frame_size[0] = dsize;
+ total_datasize = dsize;
+ }
+
+ new_frame_size = total_datasize + num_frames * VP9_HEADER_SIZE;
+
+ if (new_frame_size >= vb2_plane_size(buf, 0)) {
+ dev_warn(core->dev, "%s: unable to update header\n", __func__);
+ return 0;
+ }
+
+ for (cur_frame = num_frames - 1; cur_frame >= 0; cur_frame--) {
+ int framesize = frame_size[cur_frame];
+ int framesize_header = framesize + 4;
+ int oldframeoff = tot_frame_size[cur_frame] - framesize;
+ int outheaderoff = oldframeoff + cur_frame * VP9_HEADER_SIZE;
+ u8 *fdata = dp + outheaderoff;
+ u8 *old_framedata = dp + oldframeoff;
+
+ memmove(fdata + VP9_HEADER_SIZE, old_framedata, framesize);
+
+ fdata[0] = (framesize_header >> 24) & 0xff;
+ fdata[1] = (framesize_header >> 16) & 0xff;
+ fdata[2] = (framesize_header >> 8) & 0xff;
+ fdata[3] = (framesize_header >> 0) & 0xff;
+ fdata[4] = ((framesize_header >> 24) & 0xff) ^ 0xff;
+ fdata[5] = ((framesize_header >> 16) & 0xff) ^ 0xff;
+ fdata[6] = ((framesize_header >> 8) & 0xff) ^ 0xff;
+ fdata[7] = ((framesize_header >> 0) & 0xff) ^ 0xff;
+ fdata[8] = 0;
+ fdata[9] = 0;
+ fdata[10] = 0;
+ fdata[11] = 1;
+ fdata[12] = 'A';
+ fdata[13] = 'M';
+ fdata[14] = 'L';
+ fdata[15] = 'V';
+
+ if (!old_header) {
+ /* nothing */
+ } else if (old_header > fdata + 16 + framesize) {
+ dev_dbg(core->dev, "%s: data has gaps, setting to 0\n",
+ __func__);
+ memset(fdata + 16 + framesize, 0,
+ (old_header - fdata + 16 + framesize));
+ } else if (old_header < fdata + 16 + framesize) {
+ dev_err(core->dev, "%s: data overwritten\n", __func__);
+ }
+ old_header = fdata;
+ }
+
+ return new_frame_size;
+}
+
/* Pad the packet to at least 4KiB bytes otherwise the VDEC unit won't trigger
* ISRs.
* Also append a start code 000001ff at the end to trigger
* the ESPARSER interrupt.
*/
-static u32 esparser_pad_start_code(struct amvdec_core *core, struct vb2_buffer *vb)
+static u32 esparser_pad_start_code(struct amvdec_core *core,
+ struct vb2_buffer *vb,
+ u32 payload_size)
{
- u32 payload_size = vb2_get_plane_payload(vb, 0);
u32 pad_size = 0;
u8 *vaddr = vb2_plane_vaddr(vb, 0);

@@ -186,13 +294,35 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
int ret;
struct vb2_buffer *vb = &vbuf->vb2_buf;
struct amvdec_core *core = sess->core;
+ struct amvdec_codec_ops *codec_ops = sess->fmt_out->codec_ops;
u32 payload_size = vb2_get_plane_payload(vb, 0);
dma_addr_t phy = vb2_dma_contig_plane_dma_addr(vb, 0);
+ u32 num_dst_bufs = 0;
u32 offset;
u32 pad_size;

- if (esparser_vififo_get_free_space(sess) < payload_size)
+ /*
+ * When max ref frame is held by VP9, this should be -= 3 to prevent a
+ * shortage of CAPTURE buffers on the decoder side.
+ * For the future, a good enhancement of the way this is handled could
+ * be to notify new capture buffers to the decoding modules, so that
+ * they could pause when there is no capture buffer available and
+ * resume on this notification.
+ */
+ if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
+ if (codec_ops->num_pending_bufs)
+ num_dst_bufs = codec_ops->num_pending_bufs(sess);
+
+ num_dst_bufs += v4l2_m2m_num_dst_bufs_ready(sess->m2m_ctx);
+ if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9)
+ num_dst_bufs -= 3;
+
+ if (esparser_vififo_get_free_space(sess) < payload_size ||
+ atomic_read(&sess->esparser_queued_bufs) >= num_dst_bufs)
+ return -EAGAIN;
+ } else if (esparser_vififo_get_free_space(sess) < payload_size) {
return -EAGAIN;
+ }

v4l2_m2m_src_buf_remove_by_buf(sess->m2m_ctx, vbuf);

@@ -206,7 +336,19 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
vbuf->field = V4L2_FIELD_NONE;
vbuf->sequence = sess->sequence_out++;

- pad_size = esparser_pad_start_code(core, vb);
+ if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
+ payload_size = vp9_update_header(core, vb);
+
+ /* If unable to alter buffer to add headers */
+ if (payload_size == 0) {
+ amvdec_remove_ts(sess, vb->timestamp);
+ v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
+
+ return 0;
+ }
+ }
+
+ pad_size = esparser_pad_start_code(core, vb, payload_size);
ret = esparser_write_data(core, phy, payload_size + pad_size);

if (ret <= 0) {
--
2.22.0