public inbox for linux-media@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/8] CODA7541 decoding support
@ 2013-06-21  7:55 Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly Philipp Zabel
                   ` (7 more replies)
  0 siblings, 8 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun

The following patch series depends on the CODA patches queued in Kamil's branch
and on the "mem2mem: add support for hardware buffered queue" patch I've posted
earlier. It should allow decoding h.264 high profile 1080p streams on i.MX53
with the current CODA7541 firmware version 1.4.50.

Changes since v1:
 - Only the last patch "coda: add CODA7541 decoding support" is changed, including 
   a coda_stop_streaming() locking fix by Wei Yongjun, and a fixed coda_job_ready()
   for the encoder case.

regards
Philipp

---
 drivers/media/platform/coda.c | 1469 +++++++++++++++++++++++++++++++++++------
 drivers/media/platform/coda.h |  107 ++-
 2 files changed, 1389 insertions(+), 187 deletions(-)


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-07-26 13:02   ` Mauro Carvalho Chehab
  2013-06-21  7:55 ` [PATCH v2 2/8] [media] coda: dynamic IRAM setup for encoder Philipp Zabel
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index c4566c4..90f3386 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -1662,12 +1662,12 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
 	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx));
 	/* Calculate bytesused field */
 	if (dst_buf->v4l2_buf.sequence == 0) {
-		dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr) +
-						ctx->vpu_header_size[0] +
-						ctx->vpu_header_size[1] +
-						ctx->vpu_header_size[2];
+		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr +
+					ctx->vpu_header_size[0] +
+					ctx->vpu_header_size[1] +
+					ctx->vpu_header_size[2]);
 	} else {
-		dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr);
+		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr);
 	}
 
 	v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/8] [media] coda: dynamic IRAM setup for encoder
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 3/8] [media] coda: do not allocate maximum number of framebuffers " Philipp Zabel
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

This sets up IRAM areas used as temporary memory for the different
hardware units depending on the frame size.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 145 +++++++++++++++++++++++++++++++++++++++---
 drivers/media/platform/coda.h |  11 +++-
 2 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 90f3386..baf0ce8 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -160,6 +160,18 @@ struct coda_params {
 	u32			slice_max_mb;
 };
 
+struct coda_iram_info {
+	u32		axi_sram_use;
+	phys_addr_t	buf_bit_use;
+	phys_addr_t	buf_ip_ac_dc_use;
+	phys_addr_t	buf_dbk_y_use;
+	phys_addr_t	buf_dbk_c_use;
+	phys_addr_t	buf_ovl_use;
+	phys_addr_t	buf_btp_use;
+	phys_addr_t	search_ram_paddr;
+	int		search_ram_size;
+};
+
 struct coda_ctx {
 	struct coda_dev			*dev;
 	struct list_head		list;
@@ -182,6 +194,7 @@ struct coda_ctx {
 	struct coda_aux_buf		internal_frames[CODA_MAX_FRAMEBUFFERS];
 	int				num_internal_frames;
 	int				idx;
+	struct coda_iram_info		iram_info;
 };
 
 static const u8 coda_filler_nal[14] = { 0x00, 0x00, 0x00, 0x01, 0x0c, 0xff,
@@ -791,6 +804,10 @@ static void coda_device_run(void *m2m_priv)
 				CODA7_REG_BIT_AXI_SRAM_USE);
 	}
 
+	if (dev->devtype->product != CODA_DX6)
+		coda_write(dev, ctx->iram_info.axi_sram_use,
+				CODA7_REG_BIT_AXI_SRAM_USE);
+
 	/* 1 second timeout in case CODA locks up */
 	schedule_delayed_work(&dev->timeout, HZ);
 
@@ -1026,6 +1043,110 @@ static int coda_h264_padding(int size, char *p)
 	return nal_size;
 }
 
+static void coda_setup_iram(struct coda_ctx *ctx)
+{
+	struct coda_iram_info *iram_info = &ctx->iram_info;
+	struct coda_dev *dev = ctx->dev;
+	int ipacdc_size;
+	int bitram_size;
+	int dbk_size;
+	int mb_width;
+	int me_size;
+	int size;
+
+	memset(iram_info, 0, sizeof(*iram_info));
+	size = dev->iram_size;
+
+	if (dev->devtype->product == CODA_DX6)
+		return;
+
+	if (ctx->inst_type == CODA_INST_ENCODER) {
+		struct coda_q_data *q_data_src;
+
+		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+		mb_width = DIV_ROUND_UP(q_data_src->width, 16);
+
+		/* Prioritize in case IRAM is too small for everything */
+		me_size = round_up(round_up(q_data_src->width, 16) * 36 + 2048,
+				   1024);
+		iram_info->search_ram_size = me_size;
+		if (size >= iram_info->search_ram_size) {
+			if (dev->devtype->product == CODA_7541)
+				iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE;
+			iram_info->search_ram_paddr = dev->iram_paddr;
+			size -= iram_info->search_ram_size;
+		} else {
+			pr_err("IRAM is smaller than the search ram size\n");
+			goto out;
+		}
+
+		/* Only H.264BP and H.263P3 are considered */
+		dbk_size = round_up(128 * mb_width, 1024);
+		if (size >= dbk_size) {
+			iram_info->axi_sram_use |= CODA7_USE_HOST_DBK_ENABLE;
+			iram_info->buf_dbk_y_use = dev->iram_paddr +
+						   iram_info->search_ram_size;
+			iram_info->buf_dbk_c_use = iram_info->buf_dbk_y_use +
+						   dbk_size / 2;
+			size -= dbk_size;
+		} else {
+			goto out;
+		}
+
+		bitram_size = round_up(128 * mb_width, 1024);
+		if (size >= bitram_size) {
+			iram_info->axi_sram_use |= CODA7_USE_HOST_BIT_ENABLE;
+			iram_info->buf_bit_use = iram_info->buf_dbk_c_use +
+						 dbk_size / 2;
+			size -= bitram_size;
+		} else {
+			goto out;
+		}
+
+		ipacdc_size = round_up(128 * mb_width, 1024);
+		if (size >= ipacdc_size) {
+			iram_info->axi_sram_use |= CODA7_USE_HOST_IP_ENABLE;
+			iram_info->buf_ip_ac_dc_use = iram_info->buf_bit_use +
+						      bitram_size;
+			size -= ipacdc_size;
+		}
+
+		/* OVL disabled for encoder */
+	}
+
+out:
+	switch (dev->devtype->product) {
+	case CODA_DX6:
+		break;
+	case CODA_7541:
+		/* i.MX53 uses secondary AXI for IRAM access */
+		if (iram_info->axi_sram_use & CODA7_USE_HOST_BIT_ENABLE)
+			iram_info->axi_sram_use |= CODA7_USE_BIT_ENABLE;
+		if (iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE)
+			iram_info->axi_sram_use |= CODA7_USE_IP_ENABLE;
+		if (iram_info->axi_sram_use & CODA7_USE_HOST_DBK_ENABLE)
+			iram_info->axi_sram_use |= CODA7_USE_DBK_ENABLE;
+		if (iram_info->axi_sram_use & CODA7_USE_HOST_OVL_ENABLE)
+			iram_info->axi_sram_use |= CODA7_USE_OVL_ENABLE;
+		if (iram_info->axi_sram_use & CODA7_USE_HOST_ME_ENABLE)
+			iram_info->axi_sram_use |= CODA7_USE_ME_ENABLE;
+	}
+
+	if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
+		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+			 "IRAM smaller than needed\n");
+
+	if (dev->devtype->product == CODA_7541) {
+		/* TODO - Enabling these causes picture errors on CODA7541 */
+		if (ctx->inst_type == CODA_INST_ENCODER) {
+			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
+						     CODA7_USE_HOST_DBK_ENABLE |
+						     CODA7_USE_IP_ENABLE |
+						     CODA7_USE_DBK_ENABLE);
+		}
+	}
+}
+
 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf,
 			      int header_code, u8 *header, int *size)
 {
@@ -1198,6 +1319,8 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 	}
 	coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
 
+	coda_setup_iram(ctx);
+
 	if (dst_fourcc == V4L2_PIX_FMT_H264) {
 		value  = (FMO_SLICE_SAVE_BUF_SIZE << 7);
 		value |= (0 & CODA_FMOPARAM_TYPE_MASK) << CODA_FMOPARAM_TYPE_OFFSET;
@@ -1205,8 +1328,10 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 		if (dev->devtype->product == CODA_DX6) {
 			coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
 		} else {
-			coda_write(dev, dev->iram_paddr, CODA7_CMD_ENC_SEQ_SEARCH_BASE);
-			coda_write(dev, 48 * 1024, CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
+			coda_write(dev, ctx->iram_info.search_ram_paddr,
+					CODA7_CMD_ENC_SEQ_SEARCH_BASE);
+			coda_write(dev, ctx->iram_info.search_ram_size,
+					CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
 		}
 	}
 
@@ -1231,12 +1356,16 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 	coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
 	coda_write(dev, round_up(q_data_src->width, 8), CODA_CMD_SET_FRAME_BUF_STRIDE);
 	if (dev->devtype->product != CODA_DX6) {
-		coda_write(dev, round_up(q_data_src->width, 8), CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
-		coda_write(dev, dev->iram_paddr + 48 * 1024, CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
-		coda_write(dev, dev->iram_paddr + 53 * 1024, CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
-		coda_write(dev, dev->iram_paddr + 58 * 1024, CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
-		coda_write(dev, dev->iram_paddr + 68 * 1024, CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
-		coda_write(dev, 0x0, CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
+		coda_write(dev, ctx->iram_info.buf_bit_use,
+				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
+		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
+				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
+		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
+				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
+		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
+				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
+		coda_write(dev, ctx->iram_info.buf_ovl_use,
+				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
 	}
 	ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
 	if (ret < 0) {
diff --git a/drivers/media/platform/coda.h b/drivers/media/platform/coda.h
index ace0bf0..39c17c6 100644
--- a/drivers/media/platform/coda.h
+++ b/drivers/media/platform/coda.h
@@ -47,10 +47,17 @@
 #define CODA_REG_BIT_WR_PTR(x)			(0x124 + 8 * (x))
 #define CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR	0x140
 #define CODA7_REG_BIT_AXI_SRAM_USE		0x140
-#define		CODA7_USE_BIT_ENABLE		(1 << 0)
+#define		CODA7_USE_HOST_ME_ENABLE	(1 << 11)
+#define		CODA7_USE_HOST_OVL_ENABLE	(1 << 10)
+#define		CODA7_USE_HOST_DBK_ENABLE	(1 << 9)
+#define		CODA7_USE_HOST_IP_ENABLE	(1 << 8)
 #define		CODA7_USE_HOST_BIT_ENABLE	(1 << 7)
 #define		CODA7_USE_ME_ENABLE		(1 << 4)
-#define		CODA7_USE_HOST_ME_ENABLE	(1 << 11)
+#define		CODA7_USE_OVL_ENABLE		(1 << 3)
+#define		CODA7_USE_DBK_ENABLE		(1 << 2)
+#define		CODA7_USE_IP_ENABLE		(1 << 1)
+#define		CODA7_USE_BIT_ENABLE		(1 << 0)
+
 #define CODA_REG_BIT_BUSY			0x160
 #define		CODA_REG_BIT_BUSY_FLAG		1
 #define CODA_REG_BIT_RUN_COMMAND		0x164
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 3/8] [media] coda: do not allocate maximum number of framebuffers for encoder
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 2/8] [media] coda: dynamic IRAM setup for encoder Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 4/8] [media] coda: update CODA7541 to firmware 1.4.50 Philipp Zabel
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

The encoder only ever needs two buffers, but we'll have to increase
CODA_MAX_FRAMEBUFFERS for the decoder.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index baf0ce8..6d76f1d 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -997,7 +997,6 @@ static int coda_alloc_framebuffers(struct coda_ctx *ctx, struct coda_q_data *q_d
 	ysize = round_up(q_data->width, 8) * height;
 
 	/* Allocate frame buffers */
-	ctx->num_internal_frames = CODA_MAX_FRAMEBUFFERS;
 	for (i = 0; i < ctx->num_internal_frames; i++) {
 		ctx->internal_frames[i].size = q_data->sizeimage;
 		if (fourcc == V4L2_PIX_FMT_H264 && dev->devtype->product != CODA_DX6)
@@ -1347,6 +1346,7 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 		goto out;
 	}
 
+	ctx->num_internal_frames = 2;
 	ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
 	if (ret < 0) {
 		v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 4/8] [media] coda: update CODA7541 to firmware 1.4.50
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
                   ` (2 preceding siblings ...)
  2013-06-21  7:55 ` [PATCH v2 3/8] [media] coda: do not allocate maximum number of framebuffers " Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 5/8] [media] coda: add bitstream ringbuffer handling for decoder Philipp Zabel
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

This patch splits the global workbuf into a global tempbuf and a per-context
workbuf, adds the codec mode aux register, and restores the work buffer
pointer on commands. With the new firmware, there is only a single set of
read/write pointers which need to be restored between context switches.
This allows more than four active contexts at the same time.
All auxiliary buffers are now allocated through a helper function to avoid
code duplication.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 235 +++++++++++++++++++++++++++++++-----------
 drivers/media/platform/coda.h |   9 ++
 2 files changed, 182 insertions(+), 62 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 6d76f1d..28ee3f7 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -41,7 +41,8 @@
 
 #define CODA_FMO_BUF_SIZE	32
 #define CODADX6_WORK_BUF_SIZE	(288 * 1024 + CODA_FMO_BUF_SIZE * 8 * 1024)
-#define CODA7_WORK_BUF_SIZE	(512 * 1024 + CODA_FMO_BUF_SIZE * 8 * 1024)
+#define CODA7_WORK_BUF_SIZE	(128 * 1024)
+#define CODA7_TEMP_BUF_SIZE	(304 * 1024)
 #define CODA_PARA_BUF_SIZE	(10 * 1024)
 #define CODA_ISRAM_SIZE	(2048 * 2)
 #define CODADX6_IRAM_SIZE	0xb000
@@ -129,6 +130,7 @@ struct coda_dev {
 	struct clk		*clk_ahb;
 
 	struct coda_aux_buf	codebuf;
+	struct coda_aux_buf	tempbuf;
 	struct coda_aux_buf	workbuf;
 	struct gen_pool		*iram_pool;
 	long unsigned int	iram_vaddr;
@@ -153,6 +155,7 @@ struct coda_params {
 	u8			mpeg4_inter_qp;
 	u8			gop_size;
 	int			codec_mode;
+	int			codec_mode_aux;
 	enum v4l2_mpeg_video_multi_slice_mode slice_mode;
 	u32			framerate;
 	u16			bitrate;
@@ -192,8 +195,10 @@ struct coda_ctx {
 	int				vpu_header_size[3];
 	struct coda_aux_buf		parabuf;
 	struct coda_aux_buf		internal_frames[CODA_MAX_FRAMEBUFFERS];
+	struct coda_aux_buf		workbuf;
 	int				num_internal_frames;
 	int				idx;
+	int				reg_idx;
 	struct coda_iram_info		iram_info;
 };
 
@@ -241,10 +246,18 @@ static int coda_wait_timeout(struct coda_dev *dev)
 static void coda_command_async(struct coda_ctx *ctx, int cmd)
 {
 	struct coda_dev *dev = ctx->dev;
+
+	if (dev->devtype->product == CODA_7541) {
+		/* Restore context related registers to CODA */
+		coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
+	}
+
 	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
 
 	coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
 	coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
+	coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
+
 	coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
 }
 
@@ -959,21 +972,6 @@ static void coda_wait_finish(struct vb2_queue *q)
 	coda_lock(ctx);
 }
 
-static void coda_free_framebuffers(struct coda_ctx *ctx)
-{
-	int i;
-
-	for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) {
-		if (ctx->internal_frames[i].vaddr) {
-			dma_free_coherent(&ctx->dev->plat_dev->dev,
-				ctx->internal_frames[i].size,
-				ctx->internal_frames[i].vaddr,
-				ctx->internal_frames[i].paddr);
-			ctx->internal_frames[i].vaddr = NULL;
-		}
-	}
-}
-
 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
 {
 	struct coda_dev *dev = ctx->dev;
@@ -985,28 +983,69 @@ static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
 		p[index ^ 1] = value;
 }
 
+static int coda_alloc_aux_buf(struct coda_dev *dev,
+			      struct coda_aux_buf *buf, size_t size)
+{
+	buf->vaddr = dma_alloc_coherent(&dev->plat_dev->dev, size, &buf->paddr,
+					GFP_KERNEL);
+	if (!buf->vaddr)
+		return -ENOMEM;
+
+	buf->size = size;
+
+	return 0;
+}
+
+static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
+					 struct coda_aux_buf *buf, size_t size)
+{
+	return coda_alloc_aux_buf(ctx->dev, buf, size);
+}
+
+static void coda_free_aux_buf(struct coda_dev *dev,
+			      struct coda_aux_buf *buf)
+{
+	if (buf->vaddr) {
+		dma_free_coherent(&dev->plat_dev->dev, buf->size,
+				  buf->vaddr, buf->paddr);
+		buf->vaddr = NULL;
+		buf->size = 0;
+	}
+}
+
+static void coda_free_framebuffers(struct coda_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
+		coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
+}
+
 static int coda_alloc_framebuffers(struct coda_ctx *ctx, struct coda_q_data *q_data, u32 fourcc)
 {
 	struct coda_dev *dev = ctx->dev;
-
 	int height = q_data->height;
 	dma_addr_t paddr;
 	int ysize;
+	int ret;
 	int i;
 
+	if (ctx->codec && ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
+		height = round_up(height, 16);
 	ysize = round_up(q_data->width, 8) * height;
 
 	/* Allocate frame buffers */
 	for (i = 0; i < ctx->num_internal_frames; i++) {
-		ctx->internal_frames[i].size = q_data->sizeimage;
-		if (fourcc == V4L2_PIX_FMT_H264 && dev->devtype->product != CODA_DX6)
+		size_t size;
+
+		size = q_data->sizeimage;
+		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
+		    dev->devtype->product != CODA_DX6)
 			ctx->internal_frames[i].size += ysize/4;
-		ctx->internal_frames[i].vaddr = dma_alloc_coherent(
-				&dev->plat_dev->dev, ctx->internal_frames[i].size,
-				&ctx->internal_frames[i].paddr, GFP_KERNEL);
-		if (!ctx->internal_frames[i].vaddr) {
+		ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i], size);
+		if (ret < 0) {
 			coda_free_framebuffers(ctx);
-			return -ENOMEM;
+			return ret;
 		}
 	}
 
@@ -1017,10 +1056,20 @@ static int coda_alloc_framebuffers(struct coda_ctx *ctx, struct coda_q_data *q_d
 		coda_parabuf_write(ctx, i * 3 + 1, paddr + ysize); /* Cb */
 		coda_parabuf_write(ctx, i * 3 + 2, paddr + ysize + ysize/4); /* Cr */
 
-		if (dev->devtype->product != CODA_DX6 && fourcc == V4L2_PIX_FMT_H264)
-			coda_parabuf_write(ctx, 96 + i, ctx->internal_frames[i].paddr + ysize + ysize/4 + ysize/4);
+		/* mvcol buffer for h.264 */
+		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
+		    dev->devtype->product != CODA_DX6)
+			coda_parabuf_write(ctx, 96 + i,
+					   ctx->internal_frames[i].paddr +
+					   ysize + ysize/4 + ysize/4);
 	}
 
+	/* mvcol buffer for mpeg4 */
+	if ((dev->devtype->product != CODA_DX6) &&
+	    (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
+		coda_parabuf_write(ctx, 97, ctx->internal_frames[i].paddr +
+					    ysize + ysize/4 + ysize/4);
+
 	return 0;
 }
 
@@ -1146,6 +1195,49 @@ out:
 	}
 }
 
+static void coda_free_context_buffers(struct coda_ctx *ctx)
+{
+	struct coda_dev *dev = ctx->dev;
+
+	if (dev->devtype->product != CODA_DX6)
+		coda_free_aux_buf(dev, &ctx->workbuf);
+}
+
+static int coda_alloc_context_buffers(struct coda_ctx *ctx,
+				      struct coda_q_data *q_data)
+{
+	struct coda_dev *dev = ctx->dev;
+	size_t size;
+	int ret;
+
+	switch (dev->devtype->product) {
+	case CODA_7541:
+		size = CODA7_WORK_BUF_SIZE;
+		break;
+	default:
+		return 0;
+	}
+
+	if (ctx->workbuf.vaddr) {
+		v4l2_err(&dev->v4l2_dev, "context buffer still allocated\n");
+		ret = -EBUSY;
+		return -ENOMEM;
+	}
+
+	ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size);
+	if (ret < 0) {
+		v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte context buffer",
+			 ctx->workbuf.size);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	coda_free_context_buffers(ctx);
+	return ret;
+}
+
 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf,
 			      int header_code, u8 *header, int *size)
 {
@@ -1161,7 +1253,7 @@ static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf,
 		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
 		return ret;
 	}
-	*size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx)) -
+	*size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
 		coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
 	memcpy(header, vb2_plane_vaddr(buf, 0), *size);
 
@@ -1214,6 +1306,11 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 		return -EINVAL;
 	}
 
+	/* Allocate per-instance buffers */
+	ret = coda_alloc_context_buffers(ctx, q_data_src);
+	if (ret < 0)
+		return ret;
+
 	if (!coda_is_initialized(dev)) {
 		v4l2_err(v4l2_dev, "coda is not initialized.\n");
 		return -EFAULT;
@@ -1222,8 +1319,8 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 	mutex_lock(&dev->coda_mutex);
 
 	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
-	coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->idx));
-	coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->idx));
+	coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
+	coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
 	switch (dev->devtype->product) {
 	case CODA_DX6:
 		coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
@@ -1648,7 +1745,13 @@ static int coda_open(struct file *file)
 	v4l2_fh_add(&ctx->fh);
 	ctx->dev = dev;
 	ctx->idx = idx;
-
+	switch (dev->devtype->product) {
+	case CODA_7541:
+		ctx->reg_idx = 0;
+		break;
+	default:
+		ctx->reg_idx = idx;
+	}
 	set_default_params(ctx);
 	ctx->m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx,
 					 &coda_queue_init);
@@ -1667,11 +1770,9 @@ static int coda_open(struct file *file)
 
 	ctx->fh.ctrl_handler = &ctx->ctrls;
 
-	ctx->parabuf.vaddr = dma_alloc_coherent(&dev->plat_dev->dev,
-			CODA_PARA_BUF_SIZE, &ctx->parabuf.paddr, GFP_KERNEL);
-	if (!ctx->parabuf.vaddr) {
+	ret = coda_alloc_context_buf(ctx, &ctx->parabuf, CODA_PARA_BUF_SIZE);
+	if (ret < 0) {
 		v4l2_err(&dev->v4l2_dev, "failed to allocate parabuf");
-		ret = -ENOMEM;
 		goto err;
 	}
 
@@ -1706,9 +1807,11 @@ static int coda_release(struct file *file)
 	list_del(&ctx->list);
 	coda_unlock(ctx);
 
-	dma_free_coherent(&dev->plat_dev->dev, CODA_PARA_BUF_SIZE,
-		ctx->parabuf.vaddr, ctx->parabuf.paddr);
-	v4l2_m2m_ctx_release(ctx->m2m_ctx);
+	coda_free_context_buffers(ctx);
+	if (ctx->dev->devtype->product == CODA_DX6)
+		coda_free_aux_buf(dev, &ctx->workbuf);
+
+	coda_free_aux_buf(dev, &ctx->parabuf);
 	v4l2_ctrl_handler_free(&ctx->ctrls);
 	clk_disable_unprepare(dev->clk_per);
 	clk_disable_unprepare(dev->clk_ahb);
@@ -1788,7 +1891,8 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
 	/* Get results from the coda */
 	coda_read(dev, CODA_RET_ENC_PIC_TYPE);
 	start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
-	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx));
+	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
+
 	/* Calculate bytesused field */
 	if (dst_buf->v4l2_buf.sequence == 0) {
 		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr +
@@ -1858,7 +1962,7 @@ static void coda_timeout(struct work_struct *work)
 
 static u32 coda_supported_firmwares[] = {
 	CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
-	CODA_FIRMWARE_VERNUM(CODA_7541, 13, 4, 29),
+	CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
 };
 
 static bool coda_firmware_supported(u32 vernum)
@@ -1923,8 +2027,13 @@ static int coda_hw_init(struct coda_dev *dev)
 		coda_write(dev, 0, CODA_REG_BIT_CODE_BUF_ADDR + i * 4);
 
 	/* Tell the BIT where to find everything it needs */
-	coda_write(dev, dev->workbuf.paddr,
-		      CODA_REG_BIT_WORK_BUF_ADDR);
+	if (dev->devtype->product == CODA_7541) {
+		coda_write(dev, dev->tempbuf.paddr,
+				CODA_REG_BIT_TEMP_BUF_ADDR);
+	} else {
+		coda_write(dev, dev->workbuf.paddr,
+			      CODA_REG_BIT_WORK_BUF_ADDR);
+	}
 	coda_write(dev, dev->codebuf.paddr,
 		      CODA_REG_BIT_CODE_BUF_ADDR);
 	coda_write(dev, 0, CODA_REG_BIT_CODE_RUN);
@@ -2011,11 +2120,8 @@ static void coda_fw_callback(const struct firmware *fw, void *context)
 	}
 
 	/* allocate auxiliary per-device code buffer for the BIT processor */
-	dev->codebuf.size = fw->size;
-	dev->codebuf.vaddr = dma_alloc_coherent(&pdev->dev, fw->size,
-						    &dev->codebuf.paddr,
-						    GFP_KERNEL);
-	if (!dev->codebuf.vaddr) {
+	ret = coda_alloc_aux_buf(dev, &dev->codebuf, fw->size);
+	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to allocate code buffer\n");
 		return;
 	}
@@ -2205,18 +2311,26 @@ static int coda_probe(struct platform_device *pdev)
 	/* allocate auxiliary per-device buffers for the BIT processor */
 	switch (dev->devtype->product) {
 	case CODA_DX6:
-		dev->workbuf.size = CODADX6_WORK_BUF_SIZE;
+		ret = coda_alloc_aux_buf(dev, &dev->workbuf,
+					 CODADX6_WORK_BUF_SIZE);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "failed to allocate work buffer\n");
+			v4l2_device_unregister(&dev->v4l2_dev);
+			return ret;
+		}
+		break;
+	case CODA_7541:
+		dev->tempbuf.size = CODA7_TEMP_BUF_SIZE;
 		break;
-	default:
-		dev->workbuf.size = CODA7_WORK_BUF_SIZE;
 	}
-	dev->workbuf.vaddr = dma_alloc_coherent(&pdev->dev, dev->workbuf.size,
-						    &dev->workbuf.paddr,
-						    GFP_KERNEL);
-	if (!dev->workbuf.vaddr) {
-		dev_err(&pdev->dev, "failed to allocate work buffer\n");
-		v4l2_device_unregister(&dev->v4l2_dev);
-		return -ENOMEM;
+	if (dev->tempbuf.size) {
+		ret = coda_alloc_aux_buf(dev, &dev->tempbuf,
+					 dev->tempbuf.size);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "failed to allocate temp buffer\n");
+			v4l2_device_unregister(&dev->v4l2_dev);
+			return ret;
+		}
 	}
 
 	if (dev->devtype->product == CODA_DX6)
@@ -2248,12 +2362,9 @@ static int coda_remove(struct platform_device *pdev)
 	v4l2_device_unregister(&dev->v4l2_dev);
 	if (dev->iram_vaddr)
 		gen_pool_free(dev->iram_pool, dev->iram_vaddr, dev->iram_size);
-	if (dev->codebuf.vaddr)
-		dma_free_coherent(&pdev->dev, dev->codebuf.size,
-				  &dev->codebuf.vaddr, dev->codebuf.paddr);
-	if (dev->workbuf.vaddr)
-		dma_free_coherent(&pdev->dev, dev->workbuf.size, &dev->workbuf.vaddr,
-			  dev->workbuf.paddr);
+	coda_free_aux_buf(dev, &dev->codebuf);
+	coda_free_aux_buf(dev, &dev->tempbuf);
+	coda_free_aux_buf(dev, &dev->workbuf);
 	return 0;
 }
 
diff --git a/drivers/media/platform/coda.h b/drivers/media/platform/coda.h
index 39c17c6..b2b5b1d 100644
--- a/drivers/media/platform/coda.h
+++ b/drivers/media/platform/coda.h
@@ -43,6 +43,7 @@
 #define		CODA_STREAM_ENDIAN_SELECT	(1 << 0)
 #define CODA_REG_BIT_FRAME_MEM_CTRL		0x110
 #define		CODA_IMAGE_ENDIAN_SELECT	(1 << 0)
+#define CODA_REG_BIT_TEMP_BUF_ADDR		0x118
 #define CODA_REG_BIT_RD_PTR(x)			(0x120 + 8 * (x))
 #define CODA_REG_BIT_WR_PTR(x)			(0x124 + 8 * (x))
 #define CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR	0x140
@@ -91,6 +92,14 @@
 #define 	CODA_MODE_INVALID		0xffff
 #define CODA_REG_BIT_INT_ENABLE		0x170
 #define		CODA_INT_INTERRUPT_ENABLE	(1 << 3)
+#define CODA7_REG_BIT_RUN_AUX_STD		0x178
+#define		CODA_MP4_AUX_MPEG4		0
+#define		CODA_MP4_AUX_DIVX3		1
+#define		CODA_VPX_AUX_THO		0
+#define		CODA_VPX_AUX_VP6		1
+#define		CODA_VPX_AUX_VP8		2
+#define		CODA_H264_AUX_AVC		0
+#define		CODA_H264_AUX_MVC		1
 
 /*
  * Commands' mailbox:
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 5/8] [media] coda: add bitstream ringbuffer handling for decoder
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
                   ` (3 preceding siblings ...)
  2013-06-21  7:55 ` [PATCH v2 4/8] [media] coda: update CODA7541 to firmware 1.4.50 Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 6/8] [media] coda: dynamic IRAM setup " Philipp Zabel
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

Add a bitstream ringbuffer using kfifo. Queued source buffers are to be copied
into the bitstream ringbuffer immediately and marked as done, if possible.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 134 +++++++++++++++++++++++++++++++++++++++++-
 drivers/media/platform/coda.h |   3 +
 2 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 28ee3f7..1f3bd43 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/kfifo.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
@@ -182,6 +183,7 @@ struct coda_ctx {
 	int				streamon_out;
 	int				streamon_cap;
 	u32				isequence;
+	u32				qsequence;
 	struct coda_q_data		q_data[2];
 	enum coda_inst_type		inst_type;
 	struct coda_codec		*codec;
@@ -193,6 +195,9 @@ struct coda_ctx {
 	int				gopcounter;
 	char				vpu_header[3][64];
 	int				vpu_header_size[3];
+	struct kfifo			bitstream_fifo;
+	struct mutex			bitstream_mutex;
+	struct coda_aux_buf		bitstream;
 	struct coda_aux_buf		parabuf;
 	struct coda_aux_buf		internal_frames[CODA_MAX_FRAMEBUFFERS];
 	struct coda_aux_buf		workbuf;
@@ -200,6 +205,7 @@ struct coda_ctx {
 	int				idx;
 	int				reg_idx;
 	struct coda_iram_info		iram_info;
+	u32				bit_stream_param;
 };
 
 static const u8 coda_filler_nal[14] = { 0x00, 0x00, 0x00, 0x01, 0x0c, 0xff,
@@ -249,6 +255,8 @@ static void coda_command_async(struct coda_ctx *ctx, int cmd)
 
 	if (dev->devtype->product == CODA_7541) {
 		/* Restore context related registers to CODA */
+		coda_write(dev, ctx->bit_stream_param,
+				CODA_REG_BIT_BIT_STREAM_PARAM);
 		coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
 	}
 
@@ -683,6 +691,105 @@ static const struct v4l2_ioctl_ops coda_ioctl_ops = {
 	.vidioc_streamoff	= vidioc_streamoff,
 };
 
+static inline int coda_get_bitstream_payload(struct coda_ctx *ctx)
+{
+	return kfifo_len(&ctx->bitstream_fifo);
+}
+
+static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
+{
+	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
+	struct coda_dev *dev = ctx->dev;
+	u32 rd_ptr;
+
+	rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
+	kfifo->out = (kfifo->in & ~kfifo->mask) |
+		      (rd_ptr - ctx->bitstream.paddr);
+	if (kfifo->out > kfifo->in)
+		kfifo->out -= kfifo->mask + 1;
+}
+
+static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
+{
+	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
+	struct coda_dev *dev = ctx->dev;
+	u32 rd_ptr, wr_ptr;
+
+	rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
+	coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
+	wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
+	coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
+}
+
+static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
+{
+	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
+	struct coda_dev *dev = ctx->dev;
+	u32 wr_ptr;
+
+	wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
+	coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
+}
+
+static int coda_bitstream_queue(struct coda_ctx *ctx, struct vb2_buffer *src_buf)
+{
+	u32 src_size = vb2_get_plane_payload(src_buf, 0);
+	u32 n;
+
+	n = kfifo_in(&ctx->bitstream_fifo, vb2_plane_vaddr(src_buf, 0), src_size);
+	if (n < src_size)
+		return -ENOSPC;
+
+	dma_sync_single_for_device(&ctx->dev->plat_dev->dev, ctx->bitstream.paddr,
+				   ctx->bitstream.size, DMA_TO_DEVICE);
+
+	ctx->qsequence++;
+
+	return 0;
+}
+
+static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
+				     struct vb2_buffer *src_buf)
+{
+	int ret;
+
+	if (coda_get_bitstream_payload(ctx) +
+	    vb2_get_plane_payload(src_buf, 0) + 512 >= ctx->bitstream.size)
+		return false;
+
+	if (vb2_plane_vaddr(src_buf, 0) == NULL) {
+		v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
+		return true;
+	}
+
+	ret = coda_bitstream_queue(ctx, src_buf);
+	if (ret < 0) {
+		v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
+		return false;
+	}
+	/* Sync read pointer to device */
+	if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
+		coda_kfifo_sync_to_device_write(ctx);
+
+	return true;
+}
+
+static void coda_fill_bitstream(struct coda_ctx *ctx)
+{
+	struct vb2_buffer *src_buf;
+
+	while (v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) > 0) {
+		src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
+
+		if (coda_bitstream_try_queue(ctx, src_buf)) {
+			src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
+			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
+		} else {
+			break;
+		}
+	}
+}
+
 /*
  * Mem-to-mem operations.
  */
@@ -833,15 +940,22 @@ static int coda_job_ready(void *m2m_priv)
 
 	/*
 	 * For both 'P' and 'key' frame cases 1 picture
-	 * and 1 frame are needed.
+	 * and 1 frame are needed. In the decoder case,
+	 * the compressed frame can be in the bitstream.
 	 */
-	if (!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) ||
-		!v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) {
+	if (!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) &&
+	    ctx->inst_type != CODA_INST_DECODER) {
 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
 			 "not ready: not enough video buffers.\n");
 		return 0;
 	}
 
+	if (!v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) {
+		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+			 "not ready: not enough video capture buffers.\n");
+		return 0;
+	}
+
 	if (ctx->aborting) {
 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
 			 "not ready: aborting\n");
@@ -1776,6 +1890,18 @@ static int coda_open(struct file *file)
 		goto err;
 	}
 
+	ctx->bitstream.size = CODA_MAX_FRAME_SIZE;
+	ctx->bitstream.vaddr = dma_alloc_writecombine(&dev->plat_dev->dev,
+			ctx->bitstream.size, &ctx->bitstream.paddr, GFP_KERNEL);
+	if (!ctx->bitstream.vaddr) {
+		v4l2_err(&dev->v4l2_dev, "failed to allocate bitstream ringbuffer");
+		ret = -ENOMEM;
+		goto err;
+	}
+	kfifo_init(&ctx->bitstream_fifo,
+		ctx->bitstream.vaddr, ctx->bitstream.size);
+	mutex_init(&ctx->bitstream_mutex);
+
 	coda_lock(ctx);
 	list_add(&ctx->list, &dev->instances);
 	coda_unlock(ctx);
@@ -1807,6 +1933,8 @@ static int coda_release(struct file *file)
 	list_del(&ctx->list);
 	coda_unlock(ctx);
 
+	dma_free_writecombine(&dev->plat_dev->dev, ctx->bitstream.size,
+		ctx->bitstream.vaddr, ctx->bitstream.paddr);
 	coda_free_context_buffers(ctx);
 	if (ctx->dev->devtype->product == CODA_DX6)
 		coda_free_aux_buf(dev, &ctx->workbuf);
diff --git a/drivers/media/platform/coda.h b/drivers/media/platform/coda.h
index b2b5b1d..140eea5 100644
--- a/drivers/media/platform/coda.h
+++ b/drivers/media/platform/coda.h
@@ -43,6 +43,9 @@
 #define		CODA_STREAM_ENDIAN_SELECT	(1 << 0)
 #define CODA_REG_BIT_FRAME_MEM_CTRL		0x110
 #define		CODA_IMAGE_ENDIAN_SELECT	(1 << 0)
+#define CODA_REG_BIT_BIT_STREAM_PARAM		0x114
+#define		CODA_BIT_STREAM_END_FLAG	(1 << 2)
+#define		CODA_BIT_DEC_SEQ_INIT_ESCAPE	(1 << 0)
 #define CODA_REG_BIT_TEMP_BUF_ADDR		0x118
 #define CODA_REG_BIT_RD_PTR(x)			(0x120 + 8 * (x))
 #define CODA_REG_BIT_WR_PTR(x)			(0x124 + 8 * (x))
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 6/8] [media] coda: dynamic IRAM setup for decoder
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
                   ` (4 preceding siblings ...)
  2013-06-21  7:55 ` [PATCH v2 5/8] [media] coda: add bitstream ringbuffer handling for decoder Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-07-26 15:18   ` Mauro Carvalho Chehab
  2013-06-21  7:55 ` [PATCH v2 7/8] [media] coda: split encoder specific parts out of device_run and irq_handler Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 8/8] [media] coda: add CODA7541 decoding support Philipp Zabel
  7 siblings, 1 reply; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 50 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 1f3bd43..856a93e 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -1212,6 +1212,7 @@ static void coda_setup_iram(struct coda_ctx *ctx)
 	int ipacdc_size;
 	int bitram_size;
 	int dbk_size;
+	int ovl_size;
 	int mb_width;
 	int me_size;
 	int size;
@@ -1273,7 +1274,47 @@ static void coda_setup_iram(struct coda_ctx *ctx)
 			size -= ipacdc_size;
 		}
 
-		/* OVL disabled for encoder */
+		/* OVL and BTP disabled for encoder */
+	} else if (ctx->inst_type == CODA_INST_DECODER) {
+		struct coda_q_data *q_data_dst;
+		int mb_height;
+
+		q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+		mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
+		mb_height = DIV_ROUND_UP(q_data_dst->height, 16);
+
+		dbk_size = round_up(256 * mb_width, 1024);
+		if (size >= dbk_size) {
+			iram_info->axi_sram_use |= CODA7_USE_HOST_DBK_ENABLE;
+			iram_info->buf_dbk_y_use = dev->iram_paddr;
+			iram_info->buf_dbk_c_use = dev->iram_paddr +
+						   dbk_size / 2;
+			size -= dbk_size;
+		} else {
+			goto out;
+		}
+
+		bitram_size = round_up(128 * mb_width, 1024);
+		if (size >= bitram_size) {
+			iram_info->axi_sram_use |= CODA7_USE_HOST_BIT_ENABLE;
+			iram_info->buf_bit_use = iram_info->buf_dbk_c_use +
+						 dbk_size / 2;
+			size -= bitram_size;
+		} else {
+			goto out;
+		}
+
+		ipacdc_size = round_up(128 * mb_width, 1024);
+		if (size >= ipacdc_size) {
+			iram_info->axi_sram_use |= CODA7_USE_HOST_IP_ENABLE;
+			iram_info->buf_ip_ac_dc_use = iram_info->buf_bit_use +
+						      bitram_size;
+			size -= ipacdc_size;
+		} else {
+			goto out;
+		}
+
+		ovl_size = round_up(80 * mb_width, 1024);
 	}
 
 out:
@@ -1300,7 +1341,12 @@ out:
 
 	if (dev->devtype->product == CODA_7541) {
 		/* TODO - Enabling these causes picture errors on CODA7541 */
-		if (ctx->inst_type == CODA_INST_ENCODER) {
+		if (ctx->inst_type == CODA_INST_DECODER) {
+			/* fw 1.4.50 */
+			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
+						     CODA7_USE_IP_ENABLE);
+		} else {
+			/* fw 13.4.29 */
 			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
 						     CODA7_USE_HOST_DBK_ENABLE |
 						     CODA7_USE_IP_ENABLE |
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 7/8] [media] coda: split encoder specific parts out of device_run and irq_handler
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
                   ` (5 preceding siblings ...)
  2013-06-21  7:55 ` [PATCH v2 6/8] [media] coda: dynamic IRAM setup " Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-06-21  7:55 ` [PATCH v2 8/8] [media] coda: add CODA7541 decoding support Philipp Zabel
  7 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

Add coda_prepare_encode() and coda_finish_encode() functions. They are called
from coda_device_run() and coda_irq_handler(), respectively, before and after
the hardware picture run. This should make the following decoder support patch
easier to read, which will add the coda_prepare/finish_decode() equivalents.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/media/platform/coda.c | 82 +++++++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 856a93e..e8b3708 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -793,9 +793,8 @@ static void coda_fill_bitstream(struct coda_ctx *ctx)
 /*
  * Mem-to-mem operations.
  */
-static void coda_device_run(void *m2m_priv)
+static void coda_prepare_encode(struct coda_ctx *ctx)
 {
-	struct coda_ctx *ctx = m2m_priv;
 	struct coda_q_data *q_data_src, *q_data_dst;
 	struct vb2_buffer *src_buf, *dst_buf;
 	struct coda_dev *dev = ctx->dev;
@@ -805,8 +804,6 @@ static void coda_device_run(void *m2m_priv)
 	u32 pic_stream_buffer_addr, pic_stream_buffer_size;
 	u32 dst_fourcc;
 
-	mutex_lock(&dev->coda_mutex);
-
 	src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
 	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
 	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
@@ -917,6 +914,16 @@ static void coda_device_run(void *m2m_priv)
 	coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
 	coda_write(dev, pic_stream_buffer_size / 1024,
 		   CODA_CMD_ENC_PIC_BB_SIZE);
+}
+
+static void coda_device_run(void *m2m_priv)
+{
+	struct coda_ctx *ctx = m2m_priv;
+	struct coda_dev *dev = ctx->dev;
+
+	mutex_lock(&dev->coda_mutex);
+
+	coda_prepare_encode(ctx);
 
 	if (dev->devtype->product == CODA_7541) {
 		coda_write(dev, CODA7_USE_BIT_ENABLE | CODA7_USE_HOST_BIT_ENABLE |
@@ -2025,39 +2032,11 @@ static const struct v4l2_file_operations coda_fops = {
 	.mmap		= coda_mmap,
 };
 
-static irqreturn_t coda_irq_handler(int irq, void *data)
+static void coda_encode_finish(struct coda_ctx *ctx)
 {
 	struct vb2_buffer *src_buf, *dst_buf;
-	struct coda_dev *dev = data;
+	struct coda_dev *dev = ctx->dev;
 	u32 wr_ptr, start_ptr;
-	struct coda_ctx *ctx;
-
-	cancel_delayed_work(&dev->timeout);
-
-	/* read status register to attend the IRQ */
-	coda_read(dev, CODA_REG_BIT_INT_STATUS);
-	coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
-		      CODA_REG_BIT_INT_CLEAR);
-
-	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
-	if (ctx == NULL) {
-		v4l2_err(&dev->v4l2_dev, "Instance released before the end of transaction\n");
-		mutex_unlock(&dev->coda_mutex);
-		return IRQ_HANDLED;
-	}
-
-	if (ctx->aborting) {
-		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
-			 "task has been aborted\n");
-		mutex_unlock(&dev->coda_mutex);
-		return IRQ_HANDLED;
-	}
-
-	if (coda_isbusy(ctx->dev)) {
-		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
-			 "coda is still busy!!!!\n");
-		return IRQ_NONE;
-	}
 
 	src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
 	dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
@@ -2106,6 +2085,41 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
 		dst_buf->v4l2_buf.sequence,
 		(dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
 		"KEYFRAME" : "PFRAME");
+}
+
+static irqreturn_t coda_irq_handler(int irq, void *data)
+{
+	struct coda_dev *dev = data;
+	struct coda_ctx *ctx;
+
+	cancel_delayed_work(&dev->timeout);
+
+	/* read status register to attend the IRQ */
+	coda_read(dev, CODA_REG_BIT_INT_STATUS);
+	coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
+		      CODA_REG_BIT_INT_CLEAR);
+
+	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
+	if (ctx == NULL) {
+		v4l2_err(&dev->v4l2_dev, "Instance released before the end of transaction\n");
+		mutex_unlock(&dev->coda_mutex);
+		return IRQ_HANDLED;
+	}
+
+	if (ctx->aborting) {
+		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+			 "task has been aborted\n");
+		mutex_unlock(&dev->coda_mutex);
+		return IRQ_HANDLED;
+	}
+
+	if (coda_isbusy(ctx->dev)) {
+		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+			 "coda is still busy!!!!\n");
+		return IRQ_NONE;
+	}
+
+	coda_encode_finish(ctx);
 
 	mutex_unlock(&dev->coda_mutex);
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 8/8] [media] coda: add CODA7541 decoding support
  2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
                   ` (6 preceding siblings ...)
  2013-06-21  7:55 ` [PATCH v2 7/8] [media] coda: split encoder specific parts out of device_run and irq_handler Philipp Zabel
@ 2013-06-21  7:55 ` Philipp Zabel
  2013-06-27 10:10   ` Kamil Debski
  7 siblings, 1 reply; 15+ messages in thread
From: Philipp Zabel @ 2013-06-21  7:55 UTC (permalink / raw)
  To: linux-media
  Cc: Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun, Philipp Zabel

This patch enables decoding of h.264 and mpeg4 streams on CODA7541.
Queued output buffers are immediately copied into the bitstream
ringbuffer. A device_run can be scheduled whenever there is either
enough compressed bitstream data, or the CODA is in stream end mode.

Each successful device_run, data is read from the bitstream ringbuffer
and a frame is decoded into a free internal framebuffer. Depending on
reordering, a possibly previously decoded frame is marked as display
frame, and at the same time the display frame from the previous run
is copied out into a capture buffer by the rotator hardware.

The dequeued capture buffers are counted to send the EOS signal to
userspace with the last frame. When userspace sends the decoder stop
command or enqueues an empty output buffer, the stream end flag is
set to allow decoding the remaining frames in the bitstream
ringbuffer.

The enum_fmt/try_fmt functions return fixed capture buffer sizes
while the output queue is streaming, to allow better autonegotiation
in userspace.

A per-context buffer mutex is used to lock the picture run against
buffer dequeueing: if a job gets queued, then streamoff dequeues
the last buffer, and then device_run is called, bail out. For that
the interrupt handler has to be threaded.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
Changes since v1:
 - Included the fix by Wei Yongjun, adding a missing unlock in the
   coda_stop_streaming() error handling case.
 - Restricted check for available bitstream data in coda_job_ready()
   to the decoder case.
---
 drivers/media/platform/coda.c | 787 ++++++++++++++++++++++++++++++++++++++----
 drivers/media/platform/coda.h |  84 +++++
 2 files changed, 813 insertions(+), 58 deletions(-)

diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index e8b3708..9cbdea6 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -29,6 +29,7 @@
 
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
+#include <media/v4l2-event.h>
 #include <media/v4l2-ioctl.h>
 #include <media/v4l2-mem2mem.h>
 #include <media/videobuf2-core.h>
@@ -47,9 +48,11 @@
 #define CODA_PARA_BUF_SIZE	(10 * 1024)
 #define CODA_ISRAM_SIZE	(2048 * 2)
 #define CODADX6_IRAM_SIZE	0xb000
-#define CODA7_IRAM_SIZE		0x14000 /* 81920 bytes */
+#define CODA7_IRAM_SIZE		0x14000
 
-#define CODA_MAX_FRAMEBUFFERS	2
+#define CODA7_PS_BUF_SIZE	0x28000
+
+#define CODA_MAX_FRAMEBUFFERS	8
 
 #define MAX_W		8192
 #define MAX_H		8192
@@ -178,12 +181,16 @@ struct coda_iram_info {
 
 struct coda_ctx {
 	struct coda_dev			*dev;
+	struct mutex			buffer_mutex;
 	struct list_head		list;
+	struct work_struct		skip_run;
 	int				aborting;
+	int				initialized;
 	int				streamon_out;
 	int				streamon_cap;
 	u32				isequence;
 	u32				qsequence;
+	u32				osequence;
 	struct coda_q_data		q_data[2];
 	enum coda_inst_type		inst_type;
 	struct coda_codec		*codec;
@@ -193,12 +200,16 @@ struct coda_ctx {
 	struct v4l2_ctrl_handler	ctrls;
 	struct v4l2_fh			fh;
 	int				gopcounter;
+	int				runcounter;
 	char				vpu_header[3][64];
 	int				vpu_header_size[3];
 	struct kfifo			bitstream_fifo;
 	struct mutex			bitstream_mutex;
 	struct coda_aux_buf		bitstream;
+	bool				prescan_failed;
 	struct coda_aux_buf		parabuf;
+	struct coda_aux_buf		psbuf;
+	struct coda_aux_buf		slicebuf;
 	struct coda_aux_buf		internal_frames[CODA_MAX_FRAMEBUFFERS];
 	struct coda_aux_buf		workbuf;
 	int				num_internal_frames;
@@ -206,6 +217,8 @@ struct coda_ctx {
 	int				reg_idx;
 	struct coda_iram_info		iram_info;
 	u32				bit_stream_param;
+	u32				frm_dis_flg;
+	int				display_idx;
 };
 
 static const u8 coda_filler_nal[14] = { 0x00, 0x00, 0x00, 0x01, 0x0c, 0xff,
@@ -257,6 +270,8 @@ static void coda_command_async(struct coda_ctx *ctx, int cmd)
 		/* Restore context related registers to CODA */
 		coda_write(dev, ctx->bit_stream_param,
 				CODA_REG_BIT_BIT_STREAM_PARAM);
+		coda_write(dev, ctx->frm_dis_flg,
+				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
 		coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
 	}
 
@@ -331,6 +346,8 @@ static struct coda_codec codadx6_codecs[] = {
 static struct coda_codec coda7_codecs[] = {
 	CODA_CODEC(CODA7_MODE_ENCODE_H264, V4L2_PIX_FMT_YUV420, V4L2_PIX_FMT_H264,   1280, 720),
 	CODA_CODEC(CODA7_MODE_ENCODE_MP4,  V4L2_PIX_FMT_YUV420, V4L2_PIX_FMT_MPEG4,  1280, 720),
+	CODA_CODEC(CODA7_MODE_DECODE_H264, V4L2_PIX_FMT_H264,   V4L2_PIX_FMT_YUV420, 1920, 1080),
+	CODA_CODEC(CODA7_MODE_DECODE_MP4,  V4L2_PIX_FMT_MPEG4,  V4L2_PIX_FMT_YUV420, 1920, 1080),
 };
 
 static bool coda_format_is_yuv(u32 fourcc)
@@ -399,7 +416,7 @@ static int vidioc_querycap(struct file *file, void *priv,
 }
 
 static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
-			enum v4l2_buf_type type)
+			enum v4l2_buf_type type, int src_fourcc)
 {
 	struct coda_ctx *ctx = fh_to_ctx(priv);
 	struct coda_codec *codecs = ctx->dev->devtype->codecs;
@@ -411,7 +428,8 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
 
 	for (i = 0; i < num_formats; i++) {
 		/* Both uncompressed formats are always supported */
-		if (coda_format_is_yuv(formats[i].fourcc)) {
+		if (coda_format_is_yuv(formats[i].fourcc) &&
+		    !coda_format_is_yuv(src_fourcc)) {
 			if (num == f->index)
 				break;
 			++num;
@@ -419,8 +437,10 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
 		}
 		/* Compressed formats may be supported, check the codec list */
 		for (k = 0; k < num_codecs; k++) {
+			/* if src_fourcc is set, only consider matching codecs */
 			if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE &&
-			    formats[i].fourcc == codecs[k].dst_fourcc)
+			    formats[i].fourcc == codecs[k].dst_fourcc &&
+			    (!src_fourcc || src_fourcc == codecs[k].src_fourcc))
 				break;
 			if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
 			    formats[i].fourcc == codecs[k].src_fourcc)
@@ -447,13 +467,26 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
 static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 				   struct v4l2_fmtdesc *f)
 {
-	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+	struct coda_ctx *ctx = fh_to_ctx(priv);
+	struct vb2_queue *src_vq;
+	struct coda_q_data *q_data_src;
+
+	/* If the source format is already fixed, only list matching formats */
+	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	if (vb2_is_streaming(src_vq)) {
+		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+
+		return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE,
+				q_data_src->fourcc);
+	}
+
+	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE, 0);
 }
 
 static int vidioc_enum_fmt_vid_out(struct file *file, void *priv,
 				   struct v4l2_fmtdesc *f)
 {
-	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT, 0);
 }
 
 static int vidioc_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
@@ -526,15 +559,45 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 				  struct v4l2_format *f)
 {
 	struct coda_ctx *ctx = fh_to_ctx(priv);
-	struct coda_codec *codec = NULL;
+	struct coda_codec *codec;
+	struct vb2_queue *src_vq;
+	int ret;
+
+	/*
+	 * If the source format is already fixed, try to find a codec that
+	 * converts to the given destination format
+	 */
+	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	if (vb2_is_streaming(src_vq)) {
+		struct coda_q_data *q_data_src;
 
-	/* Determine codec by the encoded format */
-	codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
-				f->fmt.pix.pixelformat);
+		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+		codec = coda_find_codec(ctx->dev, q_data_src->fourcc,
+					f->fmt.pix.pixelformat);
+		if (!codec)
+			return -EINVAL;
+	} else {
+		/* Otherwise determine codec by encoded format, if possible */
+		codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
+					f->fmt.pix.pixelformat);
+	}
 
 	f->fmt.pix.colorspace = ctx->colorspace;
 
-	return vidioc_try_fmt(codec, f);
+	ret = vidioc_try_fmt(codec, f);
+	if (ret < 0)
+		return ret;
+
+	/* The h.264 decoder only returns complete 16x16 macroblocks */
+	if (codec && codec->src_fourcc == V4L2_PIX_FMT_H264) {
+		f->fmt.pix.width = round_up(f->fmt.pix.width, 16);
+		f->fmt.pix.height = round_up(f->fmt.pix.height, 16);
+		f->fmt.pix.bytesperline = f->fmt.pix.width;
+		f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
+				       f->fmt.pix.height * 3 / 2;
+	}
+
+	return 0;
 }
 
 static int vidioc_try_fmt_vid_out(struct file *file, void *priv,
@@ -644,11 +707,35 @@ static int vidioc_expbuf(struct file *file, void *priv,
 	return v4l2_m2m_expbuf(file, ctx->m2m_ctx, eb);
 }
 
+static bool coda_buf_is_end_of_stream(struct coda_ctx *ctx,
+				      struct v4l2_buffer *buf)
+{
+	struct vb2_queue *src_vq;
+
+	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+
+	return ((ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) &&
+		(buf->sequence == (ctx->qsequence - 1)));
+}
+
 static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *buf)
 {
 	struct coda_ctx *ctx = fh_to_ctx(priv);
+	int ret;
+
+	ret = v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
 
-	return v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
+	/* If this is the last capture buffer, emit an end-of-stream event */
+	if (buf->type == V4L2_BUF_TYPE_VIDEO_CAPTURE &&
+	    coda_buf_is_end_of_stream(ctx, buf)) {
+		const struct v4l2_event eos_event = {
+			.type = V4L2_EVENT_EOS
+		};
+
+		v4l2_event_queue_fh(&ctx->fh, &eos_event);
+	}
+
+	return ret;
 }
 
 static int vidioc_streamon(struct file *file, void *priv,
@@ -663,8 +750,53 @@ static int vidioc_streamoff(struct file *file, void *priv,
 			    enum v4l2_buf_type type)
 {
 	struct coda_ctx *ctx = fh_to_ctx(priv);
+	int ret;
+
+	/*
+	 * This indirectly calls __vb2_queue_cancel, which dequeues all buffers.
+	 * We therefore have to lock it against running hardware in this context,
+	 * which still needs the buffers.
+	 */
+	mutex_lock(&ctx->buffer_mutex);
+	ret = v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
+	mutex_unlock(&ctx->buffer_mutex);
 
-	return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
+	return ret;
+}
+
+static int vidioc_decoder_cmd(struct file *file, void *fh,
+			      struct v4l2_decoder_cmd *dc)
+{
+	struct coda_ctx *ctx = fh_to_ctx(fh);
+
+	if (dc->cmd != V4L2_DEC_CMD_STOP)
+		return -EINVAL;
+
+	if ((dc->flags & V4L2_DEC_CMD_STOP_TO_BLACK) ||
+	    (dc->flags & V4L2_DEC_CMD_STOP_IMMEDIATELY))
+		return -EINVAL;
+
+	if (dc->stop.pts != 0)
+		return -EINVAL;
+
+	if (ctx->inst_type != CODA_INST_DECODER)
+		return -EINVAL;
+
+	/* Set the strem-end flag on this context */
+	ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
+
+	return 0;
+}
+
+static int vidioc_subscribe_event(struct v4l2_fh *fh,
+				  const struct v4l2_event_subscription *sub)
+{
+	switch (sub->type) {
+	case V4L2_EVENT_EOS:
+		return v4l2_event_subscribe(fh, sub, 0, NULL);
+	default:
+		return v4l2_ctrl_subscribe_event(fh, sub);
+	}
 }
 
 static const struct v4l2_ioctl_ops coda_ioctl_ops = {
@@ -689,8 +821,22 @@ static const struct v4l2_ioctl_ops coda_ioctl_ops = {
 
 	.vidioc_streamon	= vidioc_streamon,
 	.vidioc_streamoff	= vidioc_streamoff,
+
+	.vidioc_decoder_cmd	= vidioc_decoder_cmd,
+
+	.vidioc_subscribe_event = vidioc_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
 };
 
+static int coda_start_decoding(struct coda_ctx *ctx);
+
+static void coda_skip_run(struct work_struct *work)
+{
+	struct coda_ctx *ctx = container_of(work, struct coda_ctx, skip_run);
+
+	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx);
+}
+
 static inline int coda_get_bitstream_payload(struct coda_ctx *ctx)
 {
 	return kfifo_len(&ctx->bitstream_fifo);
@@ -771,6 +917,8 @@ static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
 	if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
 		coda_kfifo_sync_to_device_write(ctx);
 
+	ctx->prescan_failed = false;
+
 	return true;
 }
 
@@ -793,6 +941,84 @@ static void coda_fill_bitstream(struct coda_ctx *ctx)
 /*
  * Mem-to-mem operations.
  */
+static int coda_prepare_decode(struct coda_ctx *ctx)
+{
+	struct vb2_buffer *dst_buf;
+	struct coda_dev *dev = ctx->dev;
+	struct coda_q_data *q_data_dst;
+	u32 stridey, height;
+	u32 picture_y, picture_cb, picture_cr;
+
+	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
+	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+
+	if (ctx->params.rot_mode & CODA_ROT_90) {
+		stridey = q_data_dst->height;
+		height = q_data_dst->width;
+	} else {
+		stridey = q_data_dst->width;
+		height = q_data_dst->height;
+	}
+
+	/* Try to copy source buffer contents into the bitstream ringbuffer */
+	mutex_lock(&ctx->bitstream_mutex);
+	coda_fill_bitstream(ctx);
+	mutex_unlock(&ctx->bitstream_mutex);
+
+	if (coda_get_bitstream_payload(ctx) < 512 &&
+	    (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+			 "bitstream payload: %d, skipping\n",
+			 coda_get_bitstream_payload(ctx));
+		schedule_work(&ctx->skip_run);
+		return -EAGAIN;
+	}
+
+	/* Run coda_start_decoding (again) if not yet initialized */
+	if (!ctx->initialized) {
+		int ret = coda_start_decoding(ctx);
+		if (ret < 0) {
+			v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
+			schedule_work(&ctx->skip_run);
+			return -EAGAIN;
+		} else {
+			ctx->initialized = 1;
+		}
+	}
+
+	/* Set rotator output */
+	picture_y = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
+	if (q_data_dst->fourcc == V4L2_PIX_FMT_YVU420) {
+		/* Switch Cr and Cb for YVU420 format */
+		picture_cr = picture_y + stridey * height;
+		picture_cb = picture_cr + stridey / 2 * height / 2;
+	} else {
+		picture_cb = picture_y + stridey * height;
+		picture_cr = picture_cb + stridey / 2 * height / 2;
+	}
+	coda_write(dev, picture_y, CODA_CMD_DEC_PIC_ROT_ADDR_Y);
+	coda_write(dev, picture_cb, CODA_CMD_DEC_PIC_ROT_ADDR_CB);
+	coda_write(dev, picture_cr, CODA_CMD_DEC_PIC_ROT_ADDR_CR);
+	coda_write(dev, stridey, CODA_CMD_DEC_PIC_ROT_STRIDE);
+	coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode,
+			CODA_CMD_DEC_PIC_ROT_MODE);
+
+	switch (dev->devtype->product) {
+	case CODA_DX6:
+		/* TBD */
+	case CODA_7541:
+		coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
+		break;
+	}
+
+	coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
+
+	coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
+	coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
+
+	return 0;
+}
+
 static void coda_prepare_encode(struct coda_ctx *ctx)
 {
 	struct coda_q_data *q_data_src, *q_data_dst;
@@ -810,9 +1036,9 @@ static void coda_prepare_encode(struct coda_ctx *ctx)
 	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 	dst_fourcc = q_data_dst->fourcc;
 
-	src_buf->v4l2_buf.sequence = ctx->isequence;
-	dst_buf->v4l2_buf.sequence = ctx->isequence;
-	ctx->isequence++;
+	src_buf->v4l2_buf.sequence = ctx->osequence;
+	dst_buf->v4l2_buf.sequence = ctx->osequence;
+	ctx->osequence++;
 
 	/*
 	 * Workaround coda firmware BUG that only marks the first
@@ -920,15 +1146,36 @@ static void coda_device_run(void *m2m_priv)
 {
 	struct coda_ctx *ctx = m2m_priv;
 	struct coda_dev *dev = ctx->dev;
+	int ret;
 
-	mutex_lock(&dev->coda_mutex);
+	mutex_lock(&ctx->buffer_mutex);
 
-	coda_prepare_encode(ctx);
+	/*
+	 * If streamoff dequeued all buffers before we could get the lock,
+	 * just bail out immediately.
+	 */
+	if ((!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) &&
+	    ctx->inst_type != CODA_INST_DECODER) ||
+		!v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) {
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+			"%d: device_run without buffers\n", ctx->idx);
+		mutex_unlock(&ctx->buffer_mutex);
+		schedule_work(&ctx->skip_run);
+		return;
+	}
 
-	if (dev->devtype->product == CODA_7541) {
-		coda_write(dev, CODA7_USE_BIT_ENABLE | CODA7_USE_HOST_BIT_ENABLE |
-				CODA7_USE_ME_ENABLE | CODA7_USE_HOST_ME_ENABLE,
-				CODA7_REG_BIT_AXI_SRAM_USE);
+	mutex_lock(&dev->coda_mutex);
+
+	if (ctx->inst_type == CODA_INST_DECODER) {
+		ret = coda_prepare_decode(ctx);
+		if (ret < 0) {
+			mutex_unlock(&dev->coda_mutex);
+			mutex_unlock(&ctx->buffer_mutex);
+			/* job_finish scheduled by prepare_decode */
+			return;
+		}
+	} else {
+		coda_prepare_encode(ctx);
 	}
 
 	if (dev->devtype->product != CODA_DX6)
@@ -938,6 +1185,8 @@ static void coda_device_run(void *m2m_priv)
 	/* 1 second timeout in case CODA locks up */
 	schedule_delayed_work(&dev->timeout, HZ);
 
+	if (ctx->inst_type == CODA_INST_DECODER)
+		coda_kfifo_sync_to_device_full(ctx);
 	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
 }
 
@@ -963,6 +1212,16 @@ static int coda_job_ready(void *m2m_priv)
 		return 0;
 	}
 
+	if (ctx->prescan_failed ||
+	    ((ctx->inst_type == CODA_INST_DECODER) &&
+	     (coda_get_bitstream_payload(ctx) < 512) &&
+	     !(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
+		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+			 "%d: not ready: not enough bitstream data.\n",
+			 ctx->idx);
+		return 0;
+	}
+
 	if (ctx->aborting) {
 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
 			 "not ready: aborting\n");
@@ -1078,7 +1337,29 @@ static int coda_buf_prepare(struct vb2_buffer *vb)
 static void coda_buf_queue(struct vb2_buffer *vb)
 {
 	struct coda_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
-	v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
+	struct coda_q_data *q_data;
+
+	q_data = get_q_data(ctx, vb->vb2_queue->type);
+
+	/*
+	 * In the decoder case, immediately try to copy the buffer into the
+	 * bitstream ringbuffer and mark it as ready to be dequeued.
+	 */
+	if (q_data->fourcc == V4L2_PIX_FMT_H264 &&
+	    vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+		/*
+		 * For backwards compatiblity, queuing an empty buffer marks
+		 * the stream end
+		 */
+		if (vb2_get_plane_payload(vb, 0) == 0)
+			ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
+		mutex_lock(&ctx->bitstream_mutex);
+		v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
+		coda_fill_bitstream(ctx);
+		mutex_unlock(&ctx->bitstream_mutex);
+	} else {
+		v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
+	}
 }
 
 static void coda_wait_prepare(struct vb2_queue *q)
@@ -1366,6 +1647,8 @@ static void coda_free_context_buffers(struct coda_ctx *ctx)
 {
 	struct coda_dev *dev = ctx->dev;
 
+	coda_free_aux_buf(dev, &ctx->slicebuf);
+	coda_free_aux_buf(dev, &ctx->psbuf);
 	if (dev->devtype->product != CODA_DX6)
 		coda_free_aux_buf(dev, &ctx->workbuf);
 }
@@ -1385,12 +1668,40 @@ static int coda_alloc_context_buffers(struct coda_ctx *ctx,
 		return 0;
 	}
 
+	if (ctx->psbuf.vaddr) {
+		v4l2_err(&dev->v4l2_dev, "psmembuf still allocated\n");
+		return -EBUSY;
+	}
+	if (ctx->slicebuf.vaddr) {
+		v4l2_err(&dev->v4l2_dev, "slicebuf still allocated\n");
+		return -EBUSY;
+	}
 	if (ctx->workbuf.vaddr) {
 		v4l2_err(&dev->v4l2_dev, "context buffer still allocated\n");
 		ret = -EBUSY;
 		return -ENOMEM;
 	}
 
+	if (q_data->fourcc == V4L2_PIX_FMT_H264) {
+		/* worst case slice size */
+		size = (DIV_ROUND_UP(q_data->width, 16) *
+			DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
+		ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size);
+		if (ret < 0) {
+			v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte slice buffer",
+				 ctx->slicebuf.size);
+			return ret;
+		}
+	}
+
+	if (dev->devtype->product == CODA_7541) {
+		ret = coda_alloc_context_buf(ctx, &ctx->psbuf, CODA7_PS_BUF_SIZE);
+		if (ret < 0) {
+			v4l2_err(&dev->v4l2_dev, "failed to allocate psmem buffer");
+			goto err;
+		}
+	}
+
 	ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size);
 	if (ret < 0) {
 		v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte context buffer",
@@ -1405,6 +1716,148 @@ err:
 	return ret;
 }
 
+static int coda_start_decoding(struct coda_ctx *ctx)
+{
+	struct coda_q_data *q_data_src, *q_data_dst;
+	u32 bitstream_buf, bitstream_size;
+	struct coda_dev *dev = ctx->dev;
+	int width, height;
+	u32 src_fourcc;
+	u32 val;
+	int ret;
+
+	/* Start decoding */
+	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+	bitstream_buf = ctx->bitstream.paddr;
+	bitstream_size = ctx->bitstream.size;
+	src_fourcc = q_data_src->fourcc;
+
+	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
+
+	/* Update coda bitstream read and write pointers from kfifo */
+	coda_kfifo_sync_to_device_full(ctx);
+
+	ctx->display_idx = -1;
+	ctx->frm_dis_flg = 0;
+	coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
+
+	coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
+			CODA_REG_BIT_BIT_STREAM_PARAM);
+
+	coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
+	coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
+	val = 0;
+	if (dev->devtype->product == CODA_7541)
+		val |= CODA_REORDER_ENABLE;
+	coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
+
+	ctx->params.codec_mode = ctx->codec->mode;
+	ctx->params.codec_mode_aux = 0;
+	if (src_fourcc == V4L2_PIX_FMT_H264) {
+		if (dev->devtype->product == CODA_7541) {
+			coda_write(dev, ctx->psbuf.paddr,
+					CODA_CMD_DEC_SEQ_PS_BB_START);
+			coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
+					CODA_CMD_DEC_SEQ_PS_BB_SIZE);
+		}
+	}
+
+	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
+		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
+		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
+		return -ETIMEDOUT;
+	}
+
+	/* Update kfifo out pointer from coda bitstream read pointer */
+	coda_kfifo_sync_from_device(ctx);
+
+	coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
+
+	if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
+		v4l2_err(&dev->v4l2_dev,
+			"CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
+			coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
+		return -EAGAIN;
+	}
+
+	val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
+	if (dev->devtype->product == CODA_DX6) {
+		width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
+		height = val & CODADX6_PICHEIGHT_MASK;
+	} else {
+		width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
+		height = val & CODA7_PICHEIGHT_MASK;
+	}
+
+	if (width > q_data_dst->width || height > q_data_dst->height) {
+		v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
+			 width, height, q_data_dst->width, q_data_dst->height);
+		return -EINVAL;
+	}
+
+	width = round_up(width, 16);
+	height = round_up(height, 16);
+
+	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
+		 __func__, ctx->idx, width, height);
+
+	ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED) + 1;
+	if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
+		v4l2_err(&dev->v4l2_dev,
+			 "not enough framebuffers to decode (%d < %d)\n",
+			 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
+		return -EINVAL;
+	}
+
+	ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
+	if (ret < 0)
+		return ret;
+
+	/* Tell the decoder how many frame buffers we allocated. */
+	coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
+	coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
+
+	if (dev->devtype->product != CODA_DX6) {
+		/* Set secondary AXI IRAM */
+		coda_setup_iram(ctx);
+
+		coda_write(dev, ctx->iram_info.buf_bit_use,
+				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
+		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
+				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
+		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
+				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
+		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
+				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
+		coda_write(dev, ctx->iram_info.buf_ovl_use,
+				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
+	}
+
+	if (src_fourcc == V4L2_PIX_FMT_H264) {
+		coda_write(dev, ctx->slicebuf.paddr,
+				CODA_CMD_SET_FRAME_SLICE_BB_START);
+		coda_write(dev, ctx->slicebuf.size / 1024,
+				CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
+	}
+
+	if (dev->devtype->product == CODA_7541) {
+		int max_mb_x = 1920 / 16;
+		int max_mb_y = 1088 / 16;
+		int max_mb_num = max_mb_x * max_mb_y;
+		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
+				CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
+	}
+
+	if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
+		v4l2_err(&ctx->dev->v4l2_dev,
+			 "CODA_COMMAND_SET_FRAME_BUF timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer *buf,
 			      int header_code, u8 *header, int *size)
 {
@@ -1439,26 +1892,36 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 	u32 value;
 	int ret = 0;
 
-	if (count < 1)
-		return -EINVAL;
+	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+		if (q_data_src->fourcc == V4L2_PIX_FMT_H264) {
+			if (coda_get_bitstream_payload(ctx) < 512)
+				return -EINVAL;
+		} else {
+			if (count < 1)
+				return -EINVAL;
+		}
 
-	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
 		ctx->streamon_out = 1;
-	else
-		ctx->streamon_cap = 1;
 
-	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
-	if (ctx->streamon_out) {
 		if (coda_format_is_yuv(q_data_src->fourcc))
 			ctx->inst_type = CODA_INST_ENCODER;
 		else
 			ctx->inst_type = CODA_INST_DECODER;
+	} else {
+		if (count < 1)
+			return -EINVAL;
+
+		ctx->streamon_cap = 1;
 	}
 
 	/* Don't start the coda unless both queues are on */
 	if (!(ctx->streamon_out & ctx->streamon_cap))
 		return 0;
 
+	/* Allow device_run with no buffers queued and after streamoff */
+	v4l2_m2m_set_src_buffered(ctx->m2m_ctx, true);
+
 	ctx->gopcounter = ctx->params.gop_size - 1;
 	buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
 	bitstream_buf = vb2_dma_contig_plane_dma_addr(buf, 0);
@@ -1478,6 +1941,20 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 	if (ret < 0)
 		return ret;
 
+	if (ctx->inst_type == CODA_INST_DECODER) {
+		mutex_lock(&dev->coda_mutex);
+		ret = coda_start_decoding(ctx);
+		mutex_unlock(&dev->coda_mutex);
+		if (ret == -EAGAIN) {
+			return 0;
+		} else if (ret < 0) {
+			return ret;
+		} else {
+			ctx->initialized = 1;
+			return 0;
+		}
+	}
+
 	if (!coda_is_initialized(dev)) {
 		v4l2_err(v4l2_dev, "coda is not initialized.\n");
 		return -EFAULT;
@@ -1619,6 +2096,9 @@ static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 
 	coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
 	coda_write(dev, round_up(q_data_src->width, 8), CODA_CMD_SET_FRAME_BUF_STRIDE);
+	if (dev->devtype->product == CODA_7541)
+		coda_write(dev, round_up(q_data_src->width, 8),
+				CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
 	if (dev->devtype->product != CODA_DX6) {
 		coda_write(dev, ctx->iram_info.buf_bit_use,
 				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
@@ -1710,32 +2190,26 @@ static int coda_stop_streaming(struct vb2_queue *q)
 	struct coda_dev *dev = ctx->dev;
 
 	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
-		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
 			 "%s: output\n", __func__);
 		ctx->streamon_out = 0;
+
+		ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
+
+		ctx->isequence = 0;
 	} else {
-		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
 			 "%s: capture\n", __func__);
 		ctx->streamon_cap = 0;
-	}
-
-	/* Don't stop the coda unless both queues are off */
-	if (ctx->streamon_out || ctx->streamon_cap)
-		return 0;
 
-	cancel_delayed_work(&dev->timeout);
-
-	mutex_lock(&dev->coda_mutex);
-	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
-		 "%s: sent command 'SEQ_END' to coda\n", __func__);
-	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
-		v4l2_err(&dev->v4l2_dev,
-			 "CODA_COMMAND_SEQ_END failed\n");
-		return -ETIMEDOUT;
+		ctx->osequence = 0;
 	}
-	mutex_unlock(&dev->coda_mutex);
 
-	coda_free_framebuffers(ctx);
+	if (!ctx->streamon_out && !ctx->streamon_cap) {
+		kfifo_init(&ctx->bitstream_fifo,
+			ctx->bitstream.vaddr, ctx->bitstream.size);
+		ctx->runcounter = 0;
+	}
 
 	return 0;
 }
@@ -1895,7 +2369,7 @@ static int coda_open(struct file *file)
 {
 	struct coda_dev *dev = video_drvdata(file);
 	struct coda_ctx *ctx = NULL;
-	int ret = 0;
+	int ret;
 	int idx;
 
 	idx = coda_next_free_instance(dev);
@@ -1907,6 +2381,7 @@ static int coda_open(struct file *file)
 	if (!ctx)
 		return -ENOMEM;
 
+	INIT_WORK(&ctx->skip_run, coda_skip_run);
 	v4l2_fh_init(&ctx->fh, video_devdata(file));
 	file->private_data = &ctx->fh;
 	v4l2_fh_add(&ctx->fh);
@@ -1954,6 +2429,7 @@ static int coda_open(struct file *file)
 	kfifo_init(&ctx->bitstream_fifo,
 		ctx->bitstream.vaddr, ctx->bitstream.size);
 	mutex_init(&ctx->bitstream_mutex);
+	mutex_init(&ctx->buffer_mutex);
 
 	coda_lock(ctx);
 	list_add(&ctx->list, &dev->instances);
@@ -1982,6 +2458,23 @@ static int coda_release(struct file *file)
 	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "Releasing instance %p\n",
 		 ctx);
 
+	/* If this instance is running, call .job_abort and wait for it to end */
+	v4l2_m2m_ctx_release(ctx->m2m_ctx);
+
+	/* In case the instance was not running, we still need to call SEQ_END */
+	mutex_lock(&dev->coda_mutex);
+	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+		 "%s: sent command 'SEQ_END' to coda\n", __func__);
+	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
+		v4l2_err(&dev->v4l2_dev,
+			 "CODA_COMMAND_SEQ_END failed\n");
+		mutex_unlock(&dev->coda_mutex);
+		return -ETIMEDOUT;
+	}
+	mutex_unlock(&dev->coda_mutex);
+
+	coda_free_framebuffers(ctx);
+
 	coda_lock(ctx);
 	list_del(&ctx->list);
 	coda_unlock(ctx);
@@ -2032,7 +2525,159 @@ static const struct v4l2_file_operations coda_fops = {
 	.mmap		= coda_mmap,
 };
 
-static void coda_encode_finish(struct coda_ctx *ctx)
+static void coda_finish_decode(struct coda_ctx *ctx)
+{
+	struct coda_dev *dev = ctx->dev;
+	struct coda_q_data *q_data_src;
+	struct coda_q_data *q_data_dst;
+	struct vb2_buffer *dst_buf;
+	int width, height;
+	int decoded_idx;
+	int display_idx;
+	u32 src_fourcc;
+	int success;
+	u32 val;
+
+	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
+
+	/* Update kfifo out pointer from coda bitstream read pointer */
+	coda_kfifo_sync_from_device(ctx);
+
+	/*
+	 * in stream-end mode, the read pointer can overshoot the write pointer
+	 * by up to 512 bytes
+	 */
+	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
+		if (coda_get_bitstream_payload(ctx) >= 0x100000 - 512)
+			kfifo_init(&ctx->bitstream_fifo,
+				ctx->bitstream.vaddr, ctx->bitstream.size);
+	}
+
+	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	src_fourcc = q_data_src->fourcc;
+
+	val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
+	if (val != 1)
+		pr_err("DEC_PIC_SUCCESS = %d\n", val);
+
+	success = val & 0x1;
+	if (!success)
+		v4l2_err(&dev->v4l2_dev, "decode failed\n");
+
+	if (src_fourcc == V4L2_PIX_FMT_H264) {
+		if (val & (1 << 3))
+			v4l2_err(&dev->v4l2_dev,
+				 "insufficient PS buffer space (%d bytes)\n",
+				 ctx->psbuf.size);
+		if (val & (1 << 2))
+			v4l2_err(&dev->v4l2_dev,
+				 "insufficient slice buffer space (%d bytes)\n",
+				 ctx->slicebuf.size);
+	}
+
+	val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
+	width = (val >> 16) & 0xffff;
+	height = val & 0xffff;
+
+	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+
+	val = coda_read(dev, CODA_RET_DEC_PIC_TYPE);
+	if ((val & 0x7) == 0) {
+		dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
+		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
+	} else {
+		dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
+		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
+	}
+
+	val = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
+	if (val > 0)
+		v4l2_err(&dev->v4l2_dev,
+			 "errors in %d macroblocks\n", val);
+
+	if (dev->devtype->product == CODA_7541) {
+		val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
+		if (val == 0) {
+			/* not enough bitstream data */
+			v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+				 "prescan failed: %d\n", val);
+			ctx->prescan_failed = true;
+			return;
+		}
+	}
+
+	ctx->frm_dis_flg = coda_read(dev, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
+
+	/*
+	 * The previous display frame was copied out by the rotator,
+	 * now it can be overwritten again
+	 */
+	if (ctx->display_idx >= 0 &&
+	    ctx->display_idx < ctx->num_internal_frames) {
+		ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
+		coda_write(dev, ctx->frm_dis_flg,
+				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
+	}
+
+	/*
+	 * The index of the last decoded frame, not necessarily in
+	 * display order, and the index of the next display frame.
+	 * The latter could have been decoded in a previous run.
+	 */
+	decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
+	display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
+
+	if (decoded_idx == -1) {
+		/* no frame was decoded, but we might have a display frame */
+		if (display_idx < 0 && ctx->display_idx < 0)
+			ctx->prescan_failed = true;
+	} else if (decoded_idx == -2) {
+		/* no frame was decoded, we still return the remaining buffers */
+	} else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
+		v4l2_err(&dev->v4l2_dev,
+			 "decoded frame index out of range: %d\n", decoded_idx);
+	}
+
+	if (display_idx == -1) {
+		/*
+		 * no more frames to be decoded, but there could still
+		 * be rotator output to dequeue
+		 */
+		ctx->prescan_failed = true;
+	} else if (display_idx == -3) {
+		/* possibly prescan failure */
+	} else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
+		v4l2_err(&dev->v4l2_dev,
+			 "presentation frame index out of range: %d\n",
+			 display_idx);
+	}
+
+	/* If a frame was copied out, return it */
+	if (ctx->display_idx >= 0 &&
+	    ctx->display_idx < ctx->num_internal_frames) {
+		dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
+		dst_buf->v4l2_buf.sequence = ctx->osequence++;
+
+		vb2_set_plane_payload(dst_buf, 0, width * height * 3 / 2);
+
+		v4l2_m2m_buf_done(dst_buf, success ? VB2_BUF_STATE_DONE :
+						     VB2_BUF_STATE_ERROR);
+
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+			"job finished: decoding frame (%d) (%s)\n",
+			dst_buf->v4l2_buf.sequence,
+			(dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
+			"KEYFRAME" : "PFRAME");
+	} else {
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+			"job finished: no frame decoded\n");
+	}
+
+	/* The rotator will copy the current display frame next time */
+	ctx->display_idx = display_idx;
+}
+
+static void coda_finish_encode(struct coda_ctx *ctx)
 {
 	struct vb2_buffer *src_buf, *dst_buf;
 	struct coda_dev *dev = ctx->dev;
@@ -2109,8 +2754,7 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
 	if (ctx->aborting) {
 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
 			 "task has been aborted\n");
-		mutex_unlock(&dev->coda_mutex);
-		return IRQ_HANDLED;
+		goto out;
 	}
 
 	if (coda_isbusy(ctx->dev)) {
@@ -2119,9 +2763,29 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	coda_encode_finish(ctx);
+	if (ctx->inst_type == CODA_INST_DECODER)
+		coda_finish_decode(ctx);
+	else
+		coda_finish_encode(ctx);
+
+out:
+	if (ctx->aborting || (!ctx->streamon_cap && !ctx->streamon_out)) {
+		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
+			 "%s: sent command 'SEQ_END' to coda\n", __func__);
+		if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
+			v4l2_err(&dev->v4l2_dev,
+				 "CODA_COMMAND_SEQ_END failed\n");
+		}
+
+		kfifo_init(&ctx->bitstream_fifo,
+			ctx->bitstream.vaddr, ctx->bitstream.size);
+
+		coda_free_framebuffers(ctx);
+		coda_free_context_buffers(ctx);
+	}
 
 	mutex_unlock(&dev->coda_mutex);
+	mutex_unlock(&ctx->buffer_mutex);
 
 	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx);
 
@@ -2138,6 +2802,8 @@ static void coda_timeout(struct work_struct *work)
 
 	mutex_lock(&dev->dev_mutex);
 	list_for_each_entry(ctx, &dev->instances, list) {
+		if (mutex_is_locked(&ctx->buffer_mutex))
+			mutex_unlock(&ctx->buffer_mutex);
 		v4l2_m2m_streamoff(NULL, ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
 		v4l2_m2m_streamoff(NULL, ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 	}
@@ -2218,6 +2884,7 @@ static int coda_hw_init(struct coda_dev *dev)
 	if (dev->devtype->product == CODA_7541) {
 		coda_write(dev, dev->tempbuf.paddr,
 				CODA_REG_BIT_TEMP_BUF_ADDR);
+		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
 	} else {
 		coda_write(dev, dev->workbuf.paddr,
 			      CODA_REG_BIT_WORK_BUF_ADDR);
@@ -2462,8 +3129,8 @@ static int coda_probe(struct platform_device *pdev)
 		return -ENOENT;
 	}
 
-	if (devm_request_irq(&pdev->dev, irq, coda_irq_handler,
-		0, CODA_NAME, dev) < 0) {
+	if (devm_request_threaded_irq(&pdev->dev, irq, NULL, coda_irq_handler,
+		IRQF_ONESHOT, CODA_NAME, dev) < 0) {
 		dev_err(&pdev->dev, "failed to request irq\n");
 		return -ENOENT;
 	}
@@ -2521,10 +3188,14 @@ static int coda_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (dev->devtype->product == CODA_DX6)
+	switch (dev->devtype->product) {
+	case CODA_DX6:
 		dev->iram_size = CODADX6_IRAM_SIZE;
-	else
+		break;
+	case CODA_7541:
 		dev->iram_size = CODA7_IRAM_SIZE;
+		break;
+	}
 	dev->iram_vaddr = gen_pool_alloc(dev->iram_pool, dev->iram_size);
 	if (!dev->iram_vaddr) {
 		dev_err(&pdev->dev, "unable to alloc iram\n");
diff --git a/drivers/media/platform/coda.h b/drivers/media/platform/coda.h
index 140eea5..4e32e2e 100644
--- a/drivers/media/platform/coda.h
+++ b/drivers/media/platform/coda.h
@@ -49,6 +49,7 @@
 #define CODA_REG_BIT_TEMP_BUF_ADDR		0x118
 #define CODA_REG_BIT_RD_PTR(x)			(0x120 + 8 * (x))
 #define CODA_REG_BIT_WR_PTR(x)			(0x124 + 8 * (x))
+#define CODA_REG_BIT_FRM_DIS_FLG(x)		(0x150 + 4 * (x))
 #define CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR	0x140
 #define CODA7_REG_BIT_AXI_SRAM_USE		0x140
 #define		CODA7_USE_HOST_ME_ENABLE	(1 << 11)
@@ -95,6 +96,7 @@
 #define 	CODA_MODE_INVALID		0xffff
 #define CODA_REG_BIT_INT_ENABLE		0x170
 #define		CODA_INT_INTERRUPT_ENABLE	(1 << 3)
+#define CODA_REG_BIT_INT_REASON			0x174
 #define CODA7_REG_BIT_RUN_AUX_STD		0x178
 #define		CODA_MP4_AUX_MPEG4		0
 #define		CODA_MP4_AUX_DIVX3		1
@@ -111,15 +113,89 @@
  * issued.
  */
 
+/* Decoder Sequence Initialization */
+#define CODA_CMD_DEC_SEQ_BB_START		0x180
+#define CODA_CMD_DEC_SEQ_BB_SIZE		0x184
+#define CODA_CMD_DEC_SEQ_OPTION			0x188
+#define		CODA_REORDER_ENABLE			(1 << 1)
+#define		CODADX6_QP_REPORT			(1 << 0)
+#define		CODA7_MP4_DEBLK_ENABLE			(1 << 0)
+#define CODA_CMD_DEC_SEQ_SRC_SIZE		0x18c
+#define CODA_CMD_DEC_SEQ_START_BYTE		0x190
+#define CODA_CMD_DEC_SEQ_PS_BB_START		0x194
+#define CODA_CMD_DEC_SEQ_PS_BB_SIZE		0x198
+#define CODA_CMD_DEC_SEQ_MP4_ASP_CLASS		0x19c
+#define CODA_CMD_DEC_SEQ_X264_MV_EN		0x19c
+#define CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE		0x1a0
+
+#define CODA7_RET_DEC_SEQ_ASPECT		0x1b0
+#define CODA_RET_DEC_SEQ_SUCCESS		0x1c0
+#define CODA_RET_DEC_SEQ_SRC_FMT		0x1c4 /* SRC_SIZE on CODA7 */
+#define CODA_RET_DEC_SEQ_SRC_SIZE		0x1c4
+#define CODA_RET_DEC_SEQ_SRC_F_RATE		0x1c8
+#define CODA9_RET_DEC_SEQ_ASPECT		0x1c8
+#define CODA_RET_DEC_SEQ_FRAME_NEED		0x1cc
+#define CODA_RET_DEC_SEQ_FRAME_DELAY		0x1d0
+#define CODA_RET_DEC_SEQ_INFO			0x1d4
+#define CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT	0x1d8
+#define CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM	0x1dc
+#define CODA_RET_DEC_SEQ_NEXT_FRAME_NUM		0x1e0
+#define CODA_RET_DEC_SEQ_ERR_REASON		0x1e0
+#define CODA_RET_DEC_SEQ_FRATE_NR		0x1e4
+#define CODA_RET_DEC_SEQ_FRATE_DR		0x1e8
+#define CODA_RET_DEC_SEQ_JPG_PARA		0x1e4
+#define CODA_RET_DEC_SEQ_JPG_THUMB_IND		0x1e8
+
+/* Decoder Picture Run */
+#define CODA_CMD_DEC_PIC_ROT_MODE		0x180
+#define CODA_CMD_DEC_PIC_ROT_ADDR_Y		0x184
+#define CODA_CMD_DEC_PIC_ROT_ADDR_CB		0x188
+#define CODA_CMD_DEC_PIC_ROT_ADDR_CR		0x18c
+#define CODA_CMD_DEC_PIC_ROT_STRIDE		0x190
+
+#define CODA_CMD_DEC_PIC_OPTION			0x194
+#define		CODA_PRE_SCAN_EN			(1 << 0)
+#define		CODA_PRE_SCAN_MODE_DECODE		(0 << 1)
+#define		CODA_PRE_SCAN_MODE_RETURN		(1 << 1)
+#define		CODA_IFRAME_SEARCH_EN			(1 << 2)
+#define		CODA_SKIP_FRAME_MODE			(0x3 << 3)
+#define CODA_CMD_DEC_PIC_SKIP_NUM		0x198
+#define CODA_CMD_DEC_PIC_CHUNK_SIZE		0x19c
+#define CODA_CMD_DEC_PIC_BB_START		0x1a0
+#define CODA_CMD_DEC_PIC_START_BYTE		0x1a4
+#define CODA_RET_DEC_PIC_SIZE			0x1bc
+#define CODA_RET_DEC_PIC_FRAME_NUM		0x1c0
+#define CODA_RET_DEC_PIC_FRAME_IDX		0x1c4
+#define CODA_RET_DEC_PIC_ERR_MB			0x1c8
+#define CODA_RET_DEC_PIC_TYPE			0x1cc
+#define		CODA_PIC_TYPE_MASK			0x7
+#define		CODA_PIC_TYPE_MASK_VC1			0x3f
+#define		CODA9_PIC_TYPE_FIRST_MASK		(0x7 << 3)
+#define		CODA9_PIC_TYPE_IDR_MASK			(0x3 << 6)
+#define		CODA7_PIC_TYPE_H264_NPF_MASK		(0x3 << 16)
+#define		CODA7_PIC_TYPE_INTERLACED		(1 << 18)
+#define CODA_RET_DEC_PIC_POST			0x1d0
+#define CODA_RET_DEC_PIC_MVC_REPORT		0x1d0
+#define CODA_RET_DEC_PIC_OPTION			0x1d4
+#define CODA_RET_DEC_PIC_SUCCESS		0x1d8
+#define CODA_RET_DEC_PIC_CUR_IDX		0x1dc
+#define CODA_RET_DEC_PIC_CROP_LEFT_RIGHT	0x1e0
+#define CODA_RET_DEC_PIC_CROP_TOP_BOTTOM	0x1e4
+#define CODA_RET_DEC_PIC_FRAME_NEED		0x1ec
+
 /* Encoder Sequence Initialization */
 #define CODA_CMD_ENC_SEQ_BB_START				0x180
 #define CODA_CMD_ENC_SEQ_BB_SIZE				0x184
 #define CODA_CMD_ENC_SEQ_OPTION				0x188
+#define		CODA7_OPTION_AVCINTRA16X16ONLY_OFFSET		9
 #define		CODA7_OPTION_GAMMA_OFFSET			8
+#define		CODA7_OPTION_RCQPMAX_OFFSET			7
 #define		CODADX6_OPTION_GAMMA_OFFSET			7
+#define		CODA7_OPTION_RCQPMIN_OFFSET			6
 #define		CODA_OPTION_LIMITQP_OFFSET			6
 #define		CODA_OPTION_RCINTRAQP_OFFSET			5
 #define		CODA_OPTION_FMO_OFFSET				4
+#define		CODA_OPTION_AVC_AUD_OFFSET			2
 #define		CODA_OPTION_SLICEREPORT_OFFSET			1
 #define CODA_CMD_ENC_SEQ_COD_STD				0x18c
 #define		CODA_STD_MPEG4					0
@@ -188,8 +264,10 @@
 #define		CODA_FMOPARAM_TYPE_MASK				1
 #define		CODA_FMOPARAM_SLICENUM_OFFSET			0
 #define		CODA_FMOPARAM_SLICENUM_MASK			0x0f
+#define CODADX6_CMD_ENC_SEQ_INTRA_QP				0x1bc
 #define CODA7_CMD_ENC_SEQ_SEARCH_BASE				0x1b8
 #define CODA7_CMD_ENC_SEQ_SEARCH_SIZE				0x1bc
+#define CODA7_CMD_ENC_SEQ_INTRA_QP				0x1c4
 #define CODA_CMD_ENC_SEQ_RC_QP_MAX				0x1c8
 #define		CODA_QPMAX_OFFSET				0
 #define		CODA_QPMAX_MASK					0x3f
@@ -216,18 +294,24 @@
 #define CODA_CMD_ENC_PIC_OPTION	0x194
 #define CODA_CMD_ENC_PIC_BB_START	0x198
 #define CODA_CMD_ENC_PIC_BB_SIZE	0x19c
+#define CODA_RET_ENC_FRAME_NUM		0x1c0
 #define CODA_RET_ENC_PIC_TYPE		0x1c4
+#define CODA_RET_ENC_PIC_FRAME_IDX	0x1c8
 #define CODA_RET_ENC_PIC_SLICE_NUM	0x1cc
 #define CODA_RET_ENC_PIC_FLAG		0x1d0
+#define CODA_RET_ENC_PIC_SUCCESS	0x1d8
 
 /* Set Frame Buffer */
 #define CODA_CMD_SET_FRAME_BUF_NUM		0x180
 #define CODA_CMD_SET_FRAME_BUF_STRIDE		0x184
+#define CODA_CMD_SET_FRAME_SLICE_BB_START	0x188
+#define CODA_CMD_SET_FRAME_SLICE_BB_SIZE	0x18c
 #define CODA7_CMD_SET_FRAME_AXI_BIT_ADDR	0x190
 #define CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR	0x194
 #define CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR	0x198
 #define CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR	0x19c
 #define CODA7_CMD_SET_FRAME_AXI_OVL_ADDR	0x1a0
+#define CODA7_CMD_SET_FRAME_MAX_DEC_SIZE	0x1a4
 #define CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE	0x1a8
 
 /* Encoder Header */
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 8/8] [media] coda: add CODA7541 decoding support
  2013-06-21  7:55 ` [PATCH v2 8/8] [media] coda: add CODA7541 decoding support Philipp Zabel
@ 2013-06-27 10:10   ` Kamil Debski
  2013-06-27 14:19     ` Philipp Zabel
  0 siblings, 1 reply; 15+ messages in thread
From: Kamil Debski @ 2013-06-27 10:10 UTC (permalink / raw)
  To: 'Philipp Zabel', linux-media
  Cc: 'Javier Martin', Sylwester Nawrocki,
	'Gaëtan Carlier', 'Wei Yongjun'

Hi Philipp,

This patch did not apply well on my tree. I find this quite strange.
I did try and applied it manually, but please check here if it is
correct:
http://git.linuxtv.org/kdebski/media.git/shortlog/refs/heads/master

Best wishes,
-- 
Kamil Debski
Linux Kernel Developer
Samsung R&D Institute Poland


> -----Original Message-----
> From: Philipp Zabel [mailto:p.zabel@pengutronix.de]
> Sent: Friday, June 21, 2013 9:56 AM
> To: linux-media@vger.kernel.org
> Cc: Kamil Debski; Javier Martin; Sylwester Nawrocki; Gaëtan Carlier;
> Wei Yongjun; Philipp Zabel
> Subject: [PATCH v2 8/8] [media] coda: add CODA7541 decoding support
> 
> This patch enables decoding of h.264 and mpeg4 streams on CODA7541.
> Queued output buffers are immediately copied into the bitstream
> ringbuffer. A device_run can be scheduled whenever there is either
> enough compressed bitstream data, or the CODA is in stream end mode.
> 
> Each successful device_run, data is read from the bitstream ringbuffer
> and a frame is decoded into a free internal framebuffer. Depending on
> reordering, a possibly previously decoded frame is marked as display
> frame, and at the same time the display frame from the previous run is
> copied out into a capture buffer by the rotator hardware.
> 
> The dequeued capture buffers are counted to send the EOS signal to
> userspace with the last frame. When userspace sends the decoder stop
> command or enqueues an empty output buffer, the stream end flag is set
> to allow decoding the remaining frames in the bitstream ringbuffer.
> 
> The enum_fmt/try_fmt functions return fixed capture buffer sizes while
> the output queue is streaming, to allow better autonegotiation in
> userspace.
> 
> A per-context buffer mutex is used to lock the picture run against
> buffer dequeueing: if a job gets queued, then streamoff dequeues the
> last buffer, and then device_run is called, bail out. For that the
> interrupt handler has to be threaded.
> 
> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
> ---
> Changes since v1:
>  - Included the fix by Wei Yongjun, adding a missing unlock in the
>    coda_stop_streaming() error handling case.
>  - Restricted check for available bitstream data in coda_job_ready()
>    to the decoder case.
> ---
>  drivers/media/platform/coda.c | 787
> ++++++++++++++++++++++++++++++++++++++----
>  drivers/media/platform/coda.h |  84 +++++
>  2 files changed, 813 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/media/platform/coda.c
> b/drivers/media/platform/coda.c index e8b3708..9cbdea6 100644
> --- a/drivers/media/platform/coda.c
> +++ b/drivers/media/platform/coda.c
> @@ -29,6 +29,7 @@
> 
>  #include <media/v4l2-ctrls.h>
>  #include <media/v4l2-device.h>
> +#include <media/v4l2-event.h>
>  #include <media/v4l2-ioctl.h>
>  #include <media/v4l2-mem2mem.h>
>  #include <media/videobuf2-core.h>
> @@ -47,9 +48,11 @@
>  #define CODA_PARA_BUF_SIZE	(10 * 1024)
>  #define CODA_ISRAM_SIZE	(2048 * 2)
>  #define CODADX6_IRAM_SIZE	0xb000
> -#define CODA7_IRAM_SIZE		0x14000 /* 81920 bytes */
> +#define CODA7_IRAM_SIZE		0x14000
> 
> -#define CODA_MAX_FRAMEBUFFERS	2
> +#define CODA7_PS_BUF_SIZE	0x28000
> +
> +#define CODA_MAX_FRAMEBUFFERS	8
> 
>  #define MAX_W		8192
>  #define MAX_H		8192
> @@ -178,12 +181,16 @@ struct coda_iram_info {
> 
>  struct coda_ctx {
>  	struct coda_dev			*dev;
> +	struct mutex			buffer_mutex;
>  	struct list_head		list;
> +	struct work_struct		skip_run;
>  	int				aborting;
> +	int				initialized;
>  	int				streamon_out;
>  	int				streamon_cap;
>  	u32				isequence;
>  	u32				qsequence;
> +	u32				osequence;
>  	struct coda_q_data		q_data[2];
>  	enum coda_inst_type		inst_type;
>  	struct coda_codec		*codec;
> @@ -193,12 +200,16 @@ struct coda_ctx {
>  	struct v4l2_ctrl_handler	ctrls;
>  	struct v4l2_fh			fh;
>  	int				gopcounter;
> +	int				runcounter;
>  	char				vpu_header[3][64];
>  	int				vpu_header_size[3];
>  	struct kfifo			bitstream_fifo;
>  	struct mutex			bitstream_mutex;
>  	struct coda_aux_buf		bitstream;
> +	bool				prescan_failed;
>  	struct coda_aux_buf		parabuf;
> +	struct coda_aux_buf		psbuf;
> +	struct coda_aux_buf		slicebuf;
>  	struct coda_aux_buf
> 	internal_frames[CODA_MAX_FRAMEBUFFERS];
>  	struct coda_aux_buf		workbuf;
>  	int				num_internal_frames;
> @@ -206,6 +217,8 @@ struct coda_ctx {
>  	int				reg_idx;
>  	struct coda_iram_info		iram_info;
>  	u32				bit_stream_param;
> +	u32				frm_dis_flg;
> +	int				display_idx;
>  };
> 
>  static const u8 coda_filler_nal[14] = { 0x00, 0x00, 0x00, 0x01, 0x0c,
> 0xff, @@ -257,6 +270,8 @@ static void coda_command_async(struct
> coda_ctx *ctx, int cmd)
>  		/* Restore context related registers to CODA */
>  		coda_write(dev, ctx->bit_stream_param,
>  				CODA_REG_BIT_BIT_STREAM_PARAM);
> +		coda_write(dev, ctx->frm_dis_flg,
> +				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
>  		coda_write(dev, ctx->workbuf.paddr,
> CODA_REG_BIT_WORK_BUF_ADDR);
>  	}
> 
> @@ -331,6 +346,8 @@ static struct coda_codec codadx6_codecs[] =
> {  static struct coda_codec coda7_codecs[] = {
>  	CODA_CODEC(CODA7_MODE_ENCODE_H264, V4L2_PIX_FMT_YUV420,
> V4L2_PIX_FMT_H264,   1280, 720),
>  	CODA_CODEC(CODA7_MODE_ENCODE_MP4,  V4L2_PIX_FMT_YUV420,
> V4L2_PIX_FMT_MPEG4,  1280, 720),
> +	CODA_CODEC(CODA7_MODE_DECODE_H264, V4L2_PIX_FMT_H264,
> V4L2_PIX_FMT_YUV420, 1920, 1080),
> +	CODA_CODEC(CODA7_MODE_DECODE_MP4,  V4L2_PIX_FMT_MPEG4,
> +V4L2_PIX_FMT_YUV420, 1920, 1080),
>  };
> 
>  static bool coda_format_is_yuv(u32 fourcc) @@ -399,7 +416,7 @@ static
> int vidioc_querycap(struct file *file, void *priv,  }
> 
>  static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
> -			enum v4l2_buf_type type)
> +			enum v4l2_buf_type type, int src_fourcc)
>  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
>  	struct coda_codec *codecs = ctx->dev->devtype->codecs; @@ -411,7
> +428,8 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc *f,
> 
>  	for (i = 0; i < num_formats; i++) {
>  		/* Both uncompressed formats are always supported */
> -		if (coda_format_is_yuv(formats[i].fourcc)) {
> +		if (coda_format_is_yuv(formats[i].fourcc) &&
> +		    !coda_format_is_yuv(src_fourcc)) {
>  			if (num == f->index)
>  				break;
>  			++num;
> @@ -419,8 +437,10 @@ static int enum_fmt(void *priv, struct
> v4l2_fmtdesc *f,
>  		}
>  		/* Compressed formats may be supported, check the codec
> list */
>  		for (k = 0; k < num_codecs; k++) {
> +			/* if src_fourcc is set, only consider matching
> codecs */
>  			if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE &&
> -			    formats[i].fourcc == codecs[k].dst_fourcc)
> +			    formats[i].fourcc == codecs[k].dst_fourcc &&
> +			    (!src_fourcc || src_fourcc ==
> codecs[k].src_fourcc))
>  				break;
>  			if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
>  			    formats[i].fourcc == codecs[k].src_fourcc) @@ -
> 447,13 +467,26 @@ static int enum_fmt(void *priv, struct v4l2_fmtdesc
> *f,  static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
>  				   struct v4l2_fmtdesc *f)
>  {
> -	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	struct coda_ctx *ctx = fh_to_ctx(priv);
> +	struct vb2_queue *src_vq;
> +	struct coda_q_data *q_data_src;
> +
> +	/* If the source format is already fixed, only list matching
> formats */
> +	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	if (vb2_is_streaming(src_vq)) {
> +		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +
> +		return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE,
> +				q_data_src->fourcc);
> +	}
> +
> +	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_CAPTURE, 0);
>  }
> 
>  static int vidioc_enum_fmt_vid_out(struct file *file, void *priv,
>  				   struct v4l2_fmtdesc *f)
>  {
> -	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	return enum_fmt(priv, f, V4L2_BUF_TYPE_VIDEO_OUTPUT, 0);
>  }
> 
>  static int vidioc_g_fmt(struct file *file, void *priv, struct
> v4l2_format *f) @@ -526,15 +559,45 @@ static int
> vidioc_try_fmt_vid_cap(struct file *file, void *priv,
>  				  struct v4l2_format *f)
>  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
> -	struct coda_codec *codec = NULL;
> +	struct coda_codec *codec;
> +	struct vb2_queue *src_vq;
> +	int ret;
> +
> +	/*
> +	 * If the source format is already fixed, try to find a codec
> that
> +	 * converts to the given destination format
> +	 */
> +	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	if (vb2_is_streaming(src_vq)) {
> +		struct coda_q_data *q_data_src;
> 
> -	/* Determine codec by the encoded format */
> -	codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
> -				f->fmt.pix.pixelformat);
> +		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +		codec = coda_find_codec(ctx->dev, q_data_src->fourcc,
> +					f->fmt.pix.pixelformat);
> +		if (!codec)
> +			return -EINVAL;
> +	} else {
> +		/* Otherwise determine codec by encoded format, if possible
> */
> +		codec = coda_find_codec(ctx->dev, V4L2_PIX_FMT_YUV420,
> +					f->fmt.pix.pixelformat);
> +	}
> 
>  	f->fmt.pix.colorspace = ctx->colorspace;
> 
> -	return vidioc_try_fmt(codec, f);
> +	ret = vidioc_try_fmt(codec, f);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* The h.264 decoder only returns complete 16x16 macroblocks */
> +	if (codec && codec->src_fourcc == V4L2_PIX_FMT_H264) {
> +		f->fmt.pix.width = round_up(f->fmt.pix.width, 16);
> +		f->fmt.pix.height = round_up(f->fmt.pix.height, 16);
> +		f->fmt.pix.bytesperline = f->fmt.pix.width;
> +		f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
> +				       f->fmt.pix.height * 3 / 2;
> +	}
> +
> +	return 0;
>  }
> 
>  static int vidioc_try_fmt_vid_out(struct file *file, void *priv, @@ -
> 644,11 +707,35 @@ static int vidioc_expbuf(struct file *file, void
> *priv,
>  	return v4l2_m2m_expbuf(file, ctx->m2m_ctx, eb);  }
> 
> +static bool coda_buf_is_end_of_stream(struct coda_ctx *ctx,
> +				      struct v4l2_buffer *buf)
> +{
> +	struct vb2_queue *src_vq;
> +
> +	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +
> +	return ((ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) &&
> +		(buf->sequence == (ctx->qsequence - 1))); }
> +
>  static int vidioc_dqbuf(struct file *file, void *priv, struct
> v4l2_buffer *buf)  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
> +	int ret;
> +
> +	ret = v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
> 
> -	return v4l2_m2m_dqbuf(file, ctx->m2m_ctx, buf);
> +	/* If this is the last capture buffer, emit an end-of-stream
> event */
> +	if (buf->type == V4L2_BUF_TYPE_VIDEO_CAPTURE &&
> +	    coda_buf_is_end_of_stream(ctx, buf)) {
> +		const struct v4l2_event eos_event = {
> +			.type = V4L2_EVENT_EOS
> +		};
> +
> +		v4l2_event_queue_fh(&ctx->fh, &eos_event);
> +	}
> +
> +	return ret;
>  }
> 
>  static int vidioc_streamon(struct file *file, void *priv, @@ -663,8
> +750,53 @@ static int vidioc_streamoff(struct file *file, void *priv,
>  			    enum v4l2_buf_type type)
>  {
>  	struct coda_ctx *ctx = fh_to_ctx(priv);
> +	int ret;
> +
> +	/*
> +	 * This indirectly calls __vb2_queue_cancel, which dequeues all
> buffers.
> +	 * We therefore have to lock it against running hardware in this
> context,
> +	 * which still needs the buffers.
> +	 */
> +	mutex_lock(&ctx->buffer_mutex);
> +	ret = v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
> +	mutex_unlock(&ctx->buffer_mutex);
> 
> -	return v4l2_m2m_streamoff(file, ctx->m2m_ctx, type);
> +	return ret;
> +}
> +
> +static int vidioc_decoder_cmd(struct file *file, void *fh,
> +			      struct v4l2_decoder_cmd *dc)
> +{
> +	struct coda_ctx *ctx = fh_to_ctx(fh);
> +
> +	if (dc->cmd != V4L2_DEC_CMD_STOP)
> +		return -EINVAL;
> +
> +	if ((dc->flags & V4L2_DEC_CMD_STOP_TO_BLACK) ||
> +	    (dc->flags & V4L2_DEC_CMD_STOP_IMMEDIATELY))
> +		return -EINVAL;
> +
> +	if (dc->stop.pts != 0)
> +		return -EINVAL;
> +
> +	if (ctx->inst_type != CODA_INST_DECODER)
> +		return -EINVAL;
> +
> +	/* Set the strem-end flag on this context */
> +	ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
> +
> +	return 0;
> +}
> +
> +static int vidioc_subscribe_event(struct v4l2_fh *fh,
> +				  const struct v4l2_event_subscription *sub) {
> +	switch (sub->type) {
> +	case V4L2_EVENT_EOS:
> +		return v4l2_event_subscribe(fh, sub, 0, NULL);
> +	default:
> +		return v4l2_ctrl_subscribe_event(fh, sub);
> +	}
>  }
> 
>  static const struct v4l2_ioctl_ops coda_ioctl_ops = { @@ -689,8
> +821,22 @@ static const struct v4l2_ioctl_ops coda_ioctl_ops = {
> 
>  	.vidioc_streamon	= vidioc_streamon,
>  	.vidioc_streamoff	= vidioc_streamoff,
> +
> +	.vidioc_decoder_cmd	= vidioc_decoder_cmd,
> +
> +	.vidioc_subscribe_event = vidioc_subscribe_event,
> +	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
>  };
> 
> +static int coda_start_decoding(struct coda_ctx *ctx);
> +
> +static void coda_skip_run(struct work_struct *work) {
> +	struct coda_ctx *ctx = container_of(work, struct coda_ctx,
> skip_run);
> +
> +	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx); }
> +
>  static inline int coda_get_bitstream_payload(struct coda_ctx *ctx)  {
>  	return kfifo_len(&ctx->bitstream_fifo); @@ -771,6 +917,8 @@
> static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
>  	if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
>  		coda_kfifo_sync_to_device_write(ctx);
> 
> +	ctx->prescan_failed = false;
> +
>  	return true;
>  }
> 
> @@ -793,6 +941,84 @@ static void coda_fill_bitstream(struct coda_ctx
> *ctx)
>  /*
>   * Mem-to-mem operations.
>   */
> +static int coda_prepare_decode(struct coda_ctx *ctx) {
> +	struct vb2_buffer *dst_buf;
> +	struct coda_dev *dev = ctx->dev;
> +	struct coda_q_data *q_data_dst;
> +	u32 stridey, height;
> +	u32 picture_y, picture_cb, picture_cr;
> +
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
> +	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +
> +	if (ctx->params.rot_mode & CODA_ROT_90) {
> +		stridey = q_data_dst->height;
> +		height = q_data_dst->width;
> +	} else {
> +		stridey = q_data_dst->width;
> +		height = q_data_dst->height;
> +	}
> +
> +	/* Try to copy source buffer contents into the bitstream
> ringbuffer */
> +	mutex_lock(&ctx->bitstream_mutex);
> +	coda_fill_bitstream(ctx);
> +	mutex_unlock(&ctx->bitstream_mutex);
> +
> +	if (coda_get_bitstream_payload(ctx) < 512 &&
> +	    (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			 "bitstream payload: %d, skipping\n",
> +			 coda_get_bitstream_payload(ctx));
> +		schedule_work(&ctx->skip_run);
> +		return -EAGAIN;
> +	}
> +
> +	/* Run coda_start_decoding (again) if not yet initialized */
> +	if (!ctx->initialized) {
> +		int ret = coda_start_decoding(ctx);
> +		if (ret < 0) {
> +			v4l2_err(&dev->v4l2_dev, "failed to start
> decoding\n");
> +			schedule_work(&ctx->skip_run);
> +			return -EAGAIN;
> +		} else {
> +			ctx->initialized = 1;
> +		}
> +	}
> +
> +	/* Set rotator output */
> +	picture_y = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
> +	if (q_data_dst->fourcc == V4L2_PIX_FMT_YVU420) {
> +		/* Switch Cr and Cb for YVU420 format */
> +		picture_cr = picture_y + stridey * height;
> +		picture_cb = picture_cr + stridey / 2 * height / 2;
> +	} else {
> +		picture_cb = picture_y + stridey * height;
> +		picture_cr = picture_cb + stridey / 2 * height / 2;
> +	}
> +	coda_write(dev, picture_y, CODA_CMD_DEC_PIC_ROT_ADDR_Y);
> +	coda_write(dev, picture_cb, CODA_CMD_DEC_PIC_ROT_ADDR_CB);
> +	coda_write(dev, picture_cr, CODA_CMD_DEC_PIC_ROT_ADDR_CR);
> +	coda_write(dev, stridey, CODA_CMD_DEC_PIC_ROT_STRIDE);
> +	coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode,
> +			CODA_CMD_DEC_PIC_ROT_MODE);
> +
> +	switch (dev->devtype->product) {
> +	case CODA_DX6:
> +		/* TBD */
> +	case CODA_7541:
> +		coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
> +		break;
> +	}
> +
> +	coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
> +
> +	coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
> +	coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
> +
> +	return 0;
> +}
> +
>  static void coda_prepare_encode(struct coda_ctx *ctx)  {
>  	struct coda_q_data *q_data_src, *q_data_dst; @@ -810,9 +1036,9 @@
> static void coda_prepare_encode(struct coda_ctx *ctx)
>  	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
>  	dst_fourcc = q_data_dst->fourcc;
> 
> -	src_buf->v4l2_buf.sequence = ctx->isequence;
> -	dst_buf->v4l2_buf.sequence = ctx->isequence;
> -	ctx->isequence++;
> +	src_buf->v4l2_buf.sequence = ctx->osequence;
> +	dst_buf->v4l2_buf.sequence = ctx->osequence;
> +	ctx->osequence++;
> 
>  	/*
>  	 * Workaround coda firmware BUG that only marks the first @@ -
> 920,15 +1146,36 @@ static void coda_device_run(void *m2m_priv)  {
>  	struct coda_ctx *ctx = m2m_priv;
>  	struct coda_dev *dev = ctx->dev;
> +	int ret;
> 
> -	mutex_lock(&dev->coda_mutex);
> +	mutex_lock(&ctx->buffer_mutex);
> 
> -	coda_prepare_encode(ctx);
> +	/*
> +	 * If streamoff dequeued all buffers before we could get the lock,
> +	 * just bail out immediately.
> +	 */
> +	if ((!v4l2_m2m_num_src_bufs_ready(ctx->m2m_ctx) &&
> +	    ctx->inst_type != CODA_INST_DECODER) ||
> +		!v4l2_m2m_num_dst_bufs_ready(ctx->m2m_ctx)) {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			"%d: device_run without buffers\n", ctx->idx);
> +		mutex_unlock(&ctx->buffer_mutex);
> +		schedule_work(&ctx->skip_run);
> +		return;
> +	}
> 
> -	if (dev->devtype->product == CODA_7541) {
> -		coda_write(dev, CODA7_USE_BIT_ENABLE |
> CODA7_USE_HOST_BIT_ENABLE |
> -				CODA7_USE_ME_ENABLE | CODA7_USE_HOST_ME_ENABLE,
> -				CODA7_REG_BIT_AXI_SRAM_USE);
> +	mutex_lock(&dev->coda_mutex);
> +
> +	if (ctx->inst_type == CODA_INST_DECODER) {
> +		ret = coda_prepare_decode(ctx);
> +		if (ret < 0) {
> +			mutex_unlock(&dev->coda_mutex);
> +			mutex_unlock(&ctx->buffer_mutex);
> +			/* job_finish scheduled by prepare_decode */
> +			return;
> +		}
> +	} else {
> +		coda_prepare_encode(ctx);
>  	}
> 
>  	if (dev->devtype->product != CODA_DX6) @@ -938,6 +1185,8 @@
> static void coda_device_run(void *m2m_priv)
>  	/* 1 second timeout in case CODA locks up */
>  	schedule_delayed_work(&dev->timeout, HZ);
> 
> +	if (ctx->inst_type == CODA_INST_DECODER)
> +		coda_kfifo_sync_to_device_full(ctx);
>  	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);  }
> 
> @@ -963,6 +1212,16 @@ static int coda_job_ready(void *m2m_priv)
>  		return 0;
>  	}
> 
> +	if (ctx->prescan_failed ||
> +	    ((ctx->inst_type == CODA_INST_DECODER) &&
> +	     (coda_get_bitstream_payload(ctx) < 512) &&
> +	     !(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
> +		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
> +			 "%d: not ready: not enough bitstream data.\n",
> +			 ctx->idx);
> +		return 0;
> +	}
> +
>  	if (ctx->aborting) {
>  		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
>  			 "not ready: aborting\n");
> @@ -1078,7 +1337,29 @@ static int coda_buf_prepare(struct vb2_buffer
> *vb)  static void coda_buf_queue(struct vb2_buffer *vb)  {
>  	struct coda_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> -	v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
> +	struct coda_q_data *q_data;
> +
> +	q_data = get_q_data(ctx, vb->vb2_queue->type);
> +
> +	/*
> +	 * In the decoder case, immediately try to copy the buffer into
> the
> +	 * bitstream ringbuffer and mark it as ready to be dequeued.
> +	 */
> +	if (q_data->fourcc == V4L2_PIX_FMT_H264 &&
> +	    vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		/*
> +		 * For backwards compatiblity, queuing an empty buffer
> marks
> +		 * the stream end
> +		 */
> +		if (vb2_get_plane_payload(vb, 0) == 0)
> +			ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
> +		mutex_lock(&ctx->bitstream_mutex);
> +		v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
> +		coda_fill_bitstream(ctx);
> +		mutex_unlock(&ctx->bitstream_mutex);
> +	} else {
> +		v4l2_m2m_buf_queue(ctx->m2m_ctx, vb);
> +	}
>  }
> 
>  static void coda_wait_prepare(struct vb2_queue *q) @@ -1366,6 +1647,8
> @@ static void coda_free_context_buffers(struct coda_ctx *ctx)  {
>  	struct coda_dev *dev = ctx->dev;
> 
> +	coda_free_aux_buf(dev, &ctx->slicebuf);
> +	coda_free_aux_buf(dev, &ctx->psbuf);
>  	if (dev->devtype->product != CODA_DX6)
>  		coda_free_aux_buf(dev, &ctx->workbuf);  } @@ -1385,12
> +1668,40 @@ static int coda_alloc_context_buffers(struct coda_ctx *ctx,
>  		return 0;
>  	}
> 
> +	if (ctx->psbuf.vaddr) {
> +		v4l2_err(&dev->v4l2_dev, "psmembuf still allocated\n");
> +		return -EBUSY;
> +	}
> +	if (ctx->slicebuf.vaddr) {
> +		v4l2_err(&dev->v4l2_dev, "slicebuf still allocated\n");
> +		return -EBUSY;
> +	}
>  	if (ctx->workbuf.vaddr) {
>  		v4l2_err(&dev->v4l2_dev, "context buffer still
> allocated\n");
>  		ret = -EBUSY;
>  		return -ENOMEM;
>  	}
> 
> +	if (q_data->fourcc == V4L2_PIX_FMT_H264) {
> +		/* worst case slice size */
> +		size = (DIV_ROUND_UP(q_data->width, 16) *
> +			DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
> +		ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size);
> +		if (ret < 0) {
> +			v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte
> slice buffer",
> +				 ctx->slicebuf.size);
> +			return ret;
> +		}
> +	}
> +
> +	if (dev->devtype->product == CODA_7541) {
> +		ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
> CODA7_PS_BUF_SIZE);
> +		if (ret < 0) {
> +			v4l2_err(&dev->v4l2_dev, "failed to allocate psmem
> buffer");
> +			goto err;
> +		}
> +	}
> +
>  	ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size);
>  	if (ret < 0) {
>  		v4l2_err(&dev->v4l2_dev, "failed to allocate %d byte
> context buffer", @@ -1405,6 +1716,148 @@ err:
>  	return ret;
>  }
> 
> +static int coda_start_decoding(struct coda_ctx *ctx) {
> +	struct coda_q_data *q_data_src, *q_data_dst;
> +	u32 bitstream_buf, bitstream_size;
> +	struct coda_dev *dev = ctx->dev;
> +	int width, height;
> +	u32 src_fourcc;
> +	u32 val;
> +	int ret;
> +
> +	/* Start decoding */
> +	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	bitstream_buf = ctx->bitstream.paddr;
> +	bitstream_size = ctx->bitstream.size;
> +	src_fourcc = q_data_src->fourcc;
> +
> +	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
> +
> +	/* Update coda bitstream read and write pointers from kfifo */
> +	coda_kfifo_sync_to_device_full(ctx);
> +
> +	ctx->display_idx = -1;
> +	ctx->frm_dis_flg = 0;
> +	coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
> +
> +	coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
> +			CODA_REG_BIT_BIT_STREAM_PARAM);
> +
> +	coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
> +	coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
> +	val = 0;
> +	if (dev->devtype->product == CODA_7541)
> +		val |= CODA_REORDER_ENABLE;
> +	coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
> +
> +	ctx->params.codec_mode = ctx->codec->mode;
> +	ctx->params.codec_mode_aux = 0;
> +	if (src_fourcc == V4L2_PIX_FMT_H264) {
> +		if (dev->devtype->product == CODA_7541) {
> +			coda_write(dev, ctx->psbuf.paddr,
> +					CODA_CMD_DEC_SEQ_PS_BB_START);
> +			coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
> +					CODA_CMD_DEC_SEQ_PS_BB_SIZE);
> +		}
> +	}
> +
> +	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
> +		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
> +		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
> +		return -ETIMEDOUT;
> +	}
> +
> +	/* Update kfifo out pointer from coda bitstream read pointer */
> +	coda_kfifo_sync_from_device(ctx);
> +
> +	coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
> +
> +	if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
> +		v4l2_err(&dev->v4l2_dev,
> +			"CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
> +			coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
> +		return -EAGAIN;
> +	}
> +
> +	val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
> +	if (dev->devtype->product == CODA_DX6) {
> +		width = (val >> CODADX6_PICWIDTH_OFFSET) &
> CODADX6_PICWIDTH_MASK;
> +		height = val & CODADX6_PICHEIGHT_MASK;
> +	} else {
> +		width = (val >> CODA7_PICWIDTH_OFFSET) &
> CODA7_PICWIDTH_MASK;
> +		height = val & CODA7_PICHEIGHT_MASK;
> +	}
> +
> +	if (width > q_data_dst->width || height > q_data_dst->height) {
> +		v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
> +			 width, height, q_data_dst->width, q_data_dst-
> >height);
> +		return -EINVAL;
> +	}
> +
> +	width = round_up(width, 16);
> +	height = round_up(height, 16);
> +
> +	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d
> now: %dx%d\n",
> +		 __func__, ctx->idx, width, height);
> +
> +	ctx->num_internal_frames = coda_read(dev,
> CODA_RET_DEC_SEQ_FRAME_NEED) + 1;
> +	if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "not enough framebuffers to decode (%d < %d)\n",
> +			 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
> +		return -EINVAL;
> +	}
> +
> +	ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Tell the decoder how many frame buffers we allocated. */
> +	coda_write(dev, ctx->num_internal_frames,
> CODA_CMD_SET_FRAME_BUF_NUM);
> +	coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
> +
> +	if (dev->devtype->product != CODA_DX6) {
> +		/* Set secondary AXI IRAM */
> +		coda_setup_iram(ctx);
> +
> +		coda_write(dev, ctx->iram_info.buf_bit_use,
> +				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
> +				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
> +				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
> +				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
> +		coda_write(dev, ctx->iram_info.buf_ovl_use,
> +				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
> +	}
> +
> +	if (src_fourcc == V4L2_PIX_FMT_H264) {
> +		coda_write(dev, ctx->slicebuf.paddr,
> +				CODA_CMD_SET_FRAME_SLICE_BB_START);
> +		coda_write(dev, ctx->slicebuf.size / 1024,
> +				CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
> +	}
> +
> +	if (dev->devtype->product == CODA_7541) {
> +		int max_mb_x = 1920 / 16;
> +		int max_mb_y = 1088 / 16;
> +		int max_mb_num = max_mb_x * max_mb_y;
> +		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
> +				CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
> +	}
> +
> +	if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
> +		v4l2_err(&ctx->dev->v4l2_dev,
> +			 "CODA_COMMAND_SET_FRAME_BUF timeout\n");
> +		return -ETIMEDOUT;
> +	}
> +
> +	return 0;
> +}
> +
>  static int coda_encode_header(struct coda_ctx *ctx, struct vb2_buffer
> *buf,
>  			      int header_code, u8 *header, int *size)  { @@ -
> 1439,26 +1892,36 @@ static int coda_start_streaming(struct vb2_queue *q,
> unsigned int count)
>  	u32 value;
>  	int ret = 0;
> 
> -	if (count < 1)
> -		return -EINVAL;
> +	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		if (q_data_src->fourcc == V4L2_PIX_FMT_H264) {
> +			if (coda_get_bitstream_payload(ctx) < 512)
> +				return -EINVAL;
> +		} else {
> +			if (count < 1)
> +				return -EINVAL;
> +		}
> 
> -	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
>  		ctx->streamon_out = 1;
> -	else
> -		ctx->streamon_cap = 1;
> 
> -	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> -	if (ctx->streamon_out) {
>  		if (coda_format_is_yuv(q_data_src->fourcc))
>  			ctx->inst_type = CODA_INST_ENCODER;
>  		else
>  			ctx->inst_type = CODA_INST_DECODER;
> +	} else {
> +		if (count < 1)
> +			return -EINVAL;
> +
> +		ctx->streamon_cap = 1;
>  	}
> 
>  	/* Don't start the coda unless both queues are on */
>  	if (!(ctx->streamon_out & ctx->streamon_cap))
>  		return 0;
> 
> +	/* Allow device_run with no buffers queued and after streamoff */
> +	v4l2_m2m_set_src_buffered(ctx->m2m_ctx, true);
> +
>  	ctx->gopcounter = ctx->params.gop_size - 1;
>  	buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
>  	bitstream_buf = vb2_dma_contig_plane_dma_addr(buf, 0); @@ -1478,6
> +1941,20 @@ static int coda_start_streaming(struct vb2_queue *q,
> unsigned int count)
>  	if (ret < 0)
>  		return ret;
> 
> +	if (ctx->inst_type == CODA_INST_DECODER) {
> +		mutex_lock(&dev->coda_mutex);
> +		ret = coda_start_decoding(ctx);
> +		mutex_unlock(&dev->coda_mutex);
> +		if (ret == -EAGAIN) {
> +			return 0;
> +		} else if (ret < 0) {
> +			return ret;
> +		} else {
> +			ctx->initialized = 1;
> +			return 0;
> +		}
> +	}
> +
>  	if (!coda_is_initialized(dev)) {
>  		v4l2_err(v4l2_dev, "coda is not initialized.\n");
>  		return -EFAULT;
> @@ -1619,6 +2096,9 @@ static int coda_start_streaming(struct vb2_queue
> *q, unsigned int count)
> 
>  	coda_write(dev, ctx->num_internal_frames,
> CODA_CMD_SET_FRAME_BUF_NUM);
>  	coda_write(dev, round_up(q_data_src->width, 8),
> CODA_CMD_SET_FRAME_BUF_STRIDE);
> +	if (dev->devtype->product == CODA_7541)
> +		coda_write(dev, round_up(q_data_src->width, 8),
> +				CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
>  	if (dev->devtype->product != CODA_DX6) {
>  		coda_write(dev, ctx->iram_info.buf_bit_use,
>  				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
> @@ -1710,32 +2190,26 @@ static int coda_stop_streaming(struct vb2_queue
> *q)
>  	struct coda_dev *dev = ctx->dev;
> 
>  	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> -		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
>  			 "%s: output\n", __func__);
>  		ctx->streamon_out = 0;
> +
> +		ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
> +
> +		ctx->isequence = 0;
>  	} else {
> -		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
>  			 "%s: capture\n", __func__);
>  		ctx->streamon_cap = 0;
> -	}
> -
> -	/* Don't stop the coda unless both queues are off */
> -	if (ctx->streamon_out || ctx->streamon_cap)
> -		return 0;
> 
> -	cancel_delayed_work(&dev->timeout);
> -
> -	mutex_lock(&dev->coda_mutex);
> -	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> -		 "%s: sent command 'SEQ_END' to coda\n", __func__);
> -	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
> -		v4l2_err(&dev->v4l2_dev,
> -			 "CODA_COMMAND_SEQ_END failed\n");
> -		return -ETIMEDOUT;
> +		ctx->osequence = 0;
>  	}
> -	mutex_unlock(&dev->coda_mutex);
> 
> -	coda_free_framebuffers(ctx);
> +	if (!ctx->streamon_out && !ctx->streamon_cap) {
> +		kfifo_init(&ctx->bitstream_fifo,
> +			ctx->bitstream.vaddr, ctx->bitstream.size);
> +		ctx->runcounter = 0;
> +	}
> 
>  	return 0;
>  }
> @@ -1895,7 +2369,7 @@ static int coda_open(struct file *file)  {
>  	struct coda_dev *dev = video_drvdata(file);
>  	struct coda_ctx *ctx = NULL;
> -	int ret = 0;
> +	int ret;
>  	int idx;
> 
>  	idx = coda_next_free_instance(dev);
> @@ -1907,6 +2381,7 @@ static int coda_open(struct file *file)
>  	if (!ctx)
>  		return -ENOMEM;
> 
> +	INIT_WORK(&ctx->skip_run, coda_skip_run);
>  	v4l2_fh_init(&ctx->fh, video_devdata(file));
>  	file->private_data = &ctx->fh;
>  	v4l2_fh_add(&ctx->fh);
> @@ -1954,6 +2429,7 @@ static int coda_open(struct file *file)
>  	kfifo_init(&ctx->bitstream_fifo,
>  		ctx->bitstream.vaddr, ctx->bitstream.size);
>  	mutex_init(&ctx->bitstream_mutex);
> +	mutex_init(&ctx->buffer_mutex);
> 
>  	coda_lock(ctx);
>  	list_add(&ctx->list, &dev->instances); @@ -1982,6 +2458,23 @@
> static int coda_release(struct file *file)
>  	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "Releasing instance %p\n",
>  		 ctx);
> 
> +	/* If this instance is running, call .job_abort and wait for it
> to end */
> +	v4l2_m2m_ctx_release(ctx->m2m_ctx);
> +
> +	/* In case the instance was not running, we still need to call
> SEQ_END */
> +	mutex_lock(&dev->coda_mutex);
> +	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +		 "%s: sent command 'SEQ_END' to coda\n", __func__);
> +	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "CODA_COMMAND_SEQ_END failed\n");
> +		mutex_unlock(&dev->coda_mutex);
> +		return -ETIMEDOUT;
> +	}
> +	mutex_unlock(&dev->coda_mutex);
> +
> +	coda_free_framebuffers(ctx);
> +
>  	coda_lock(ctx);
>  	list_del(&ctx->list);
>  	coda_unlock(ctx);
> @@ -2032,7 +2525,159 @@ static const struct v4l2_file_operations
> coda_fops = {
>  	.mmap		= coda_mmap,
>  };
> 
> -static void coda_encode_finish(struct coda_ctx *ctx)
> +static void coda_finish_decode(struct coda_ctx *ctx) {
> +	struct coda_dev *dev = ctx->dev;
> +	struct coda_q_data *q_data_src;
> +	struct coda_q_data *q_data_dst;
> +	struct vb2_buffer *dst_buf;
> +	int width, height;
> +	int decoded_idx;
> +	int display_idx;
> +	u32 src_fourcc;
> +	int success;
> +	u32 val;
> +
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
> +
> +	/* Update kfifo out pointer from coda bitstream read pointer */
> +	coda_kfifo_sync_from_device(ctx);
> +
> +	/*
> +	 * in stream-end mode, the read pointer can overshoot the write
> pointer
> +	 * by up to 512 bytes
> +	 */
> +	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
> +		if (coda_get_bitstream_payload(ctx) >= 0x100000 - 512)
> +			kfifo_init(&ctx->bitstream_fifo,
> +				ctx->bitstream.vaddr, ctx->bitstream.size);
> +	}
> +
> +	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	src_fourcc = q_data_src->fourcc;
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
> +	if (val != 1)
> +		pr_err("DEC_PIC_SUCCESS = %d\n", val);
> +
> +	success = val & 0x1;
> +	if (!success)
> +		v4l2_err(&dev->v4l2_dev, "decode failed\n");
> +
> +	if (src_fourcc == V4L2_PIX_FMT_H264) {
> +		if (val & (1 << 3))
> +			v4l2_err(&dev->v4l2_dev,
> +				 "insufficient PS buffer space (%d bytes)\n",
> +				 ctx->psbuf.size);
> +		if (val & (1 << 2))
> +			v4l2_err(&dev->v4l2_dev,
> +				 "insufficient slice buffer space (%d bytes)\n",
> +				 ctx->slicebuf.size);
> +	}
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
> +	width = (val >> 16) & 0xffff;
> +	height = val & 0xffff;
> +
> +	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_TYPE);
> +	if ((val & 0x7) == 0) {
> +		dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
> +		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_PFRAME;
> +	} else {
> +		dst_buf->v4l2_buf.flags |= V4L2_BUF_FLAG_PFRAME;
> +		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
> +	}
> +
> +	val = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
> +	if (val > 0)
> +		v4l2_err(&dev->v4l2_dev,
> +			 "errors in %d macroblocks\n", val);
> +
> +	if (dev->devtype->product == CODA_7541) {
> +		val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
> +		if (val == 0) {
> +			/* not enough bitstream data */
> +			v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +				 "prescan failed: %d\n", val);
> +			ctx->prescan_failed = true;
> +			return;
> +		}
> +	}
> +
> +	ctx->frm_dis_flg = coda_read(dev,
> +CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
> +
> +	/*
> +	 * The previous display frame was copied out by the rotator,
> +	 * now it can be overwritten again
> +	 */
> +	if (ctx->display_idx >= 0 &&
> +	    ctx->display_idx < ctx->num_internal_frames) {
> +		ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
> +		coda_write(dev, ctx->frm_dis_flg,
> +				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
> +	}
> +
> +	/*
> +	 * The index of the last decoded frame, not necessarily in
> +	 * display order, and the index of the next display frame.
> +	 * The latter could have been decoded in a previous run.
> +	 */
> +	decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
> +	display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
> +
> +	if (decoded_idx == -1) {
> +		/* no frame was decoded, but we might have a display frame
> */
> +		if (display_idx < 0 && ctx->display_idx < 0)
> +			ctx->prescan_failed = true;
> +	} else if (decoded_idx == -2) {
> +		/* no frame was decoded, we still return the remaining
> buffers */
> +	} else if (decoded_idx < 0 || decoded_idx >= ctx-
> >num_internal_frames) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "decoded frame index out of range: %d\n",
> decoded_idx);
> +	}
> +
> +	if (display_idx == -1) {
> +		/*
> +		 * no more frames to be decoded, but there could still
> +		 * be rotator output to dequeue
> +		 */
> +		ctx->prescan_failed = true;
> +	} else if (display_idx == -3) {
> +		/* possibly prescan failure */
> +	} else if (display_idx < 0 || display_idx >= ctx-
> >num_internal_frames) {
> +		v4l2_err(&dev->v4l2_dev,
> +			 "presentation frame index out of range: %d\n",
> +			 display_idx);
> +	}
> +
> +	/* If a frame was copied out, return it */
> +	if (ctx->display_idx >= 0 &&
> +	    ctx->display_idx < ctx->num_internal_frames) {
> +		dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
> +		dst_buf->v4l2_buf.sequence = ctx->osequence++;
> +
> +		vb2_set_plane_payload(dst_buf, 0, width * height * 3 / 2);
> +
> +		v4l2_m2m_buf_done(dst_buf, success ? VB2_BUF_STATE_DONE :
> +						     VB2_BUF_STATE_ERROR);
> +
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			"job finished: decoding frame (%d) (%s)\n",
> +			dst_buf->v4l2_buf.sequence,
> +			(dst_buf->v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) ?
> +			"KEYFRAME" : "PFRAME");
> +	} else {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			"job finished: no frame decoded\n");
> +	}
> +
> +	/* The rotator will copy the current display frame next time */
> +	ctx->display_idx = display_idx;
> +}
> +
> +static void coda_finish_encode(struct coda_ctx *ctx)
>  {
>  	struct vb2_buffer *src_buf, *dst_buf;
>  	struct coda_dev *dev = ctx->dev;
> @@ -2109,8 +2754,7 @@ static irqreturn_t coda_irq_handler(int irq, void
> *data)
>  	if (ctx->aborting) {
>  		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
>  			 "task has been aborted\n");
> -		mutex_unlock(&dev->coda_mutex);
> -		return IRQ_HANDLED;
> +		goto out;
>  	}
> 
>  	if (coda_isbusy(ctx->dev)) {
> @@ -2119,9 +2763,29 @@ static irqreturn_t coda_irq_handler(int irq,
> void *data)
>  		return IRQ_NONE;
>  	}
> 
> -	coda_encode_finish(ctx);
> +	if (ctx->inst_type == CODA_INST_DECODER)
> +		coda_finish_decode(ctx);
> +	else
> +		coda_finish_encode(ctx);
> +
> +out:
> +	if (ctx->aborting || (!ctx->streamon_cap && !ctx->streamon_out))
> {
> +		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
> +			 "%s: sent command 'SEQ_END' to coda\n", __func__);
> +		if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
> +			v4l2_err(&dev->v4l2_dev,
> +				 "CODA_COMMAND_SEQ_END failed\n");
> +		}
> +
> +		kfifo_init(&ctx->bitstream_fifo,
> +			ctx->bitstream.vaddr, ctx->bitstream.size);
> +
> +		coda_free_framebuffers(ctx);
> +		coda_free_context_buffers(ctx);
> +	}
> 
>  	mutex_unlock(&dev->coda_mutex);
> +	mutex_unlock(&ctx->buffer_mutex);
> 
>  	v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->m2m_ctx);
> 
> @@ -2138,6 +2802,8 @@ static void coda_timeout(struct work_struct *work)
> 
>  	mutex_lock(&dev->dev_mutex);
>  	list_for_each_entry(ctx, &dev->instances, list) {
> +		if (mutex_is_locked(&ctx->buffer_mutex))
> +			mutex_unlock(&ctx->buffer_mutex);
>  		v4l2_m2m_streamoff(NULL, ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_OUTPUT);
>  		v4l2_m2m_streamoff(NULL, ctx->m2m_ctx,
> V4L2_BUF_TYPE_VIDEO_CAPTURE);
>  	}
> @@ -2218,6 +2884,7 @@ static int coda_hw_init(struct coda_dev *dev)
>  	if (dev->devtype->product == CODA_7541) {
>  		coda_write(dev, dev->tempbuf.paddr,
>  				CODA_REG_BIT_TEMP_BUF_ADDR);
> +		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
>  	} else {
>  		coda_write(dev, dev->workbuf.paddr,
>  			      CODA_REG_BIT_WORK_BUF_ADDR);
> @@ -2462,8 +3129,8 @@ static int coda_probe(struct platform_device
> *pdev)
>  		return -ENOENT;
>  	}
> 
> -	if (devm_request_irq(&pdev->dev, irq, coda_irq_handler,
> -		0, CODA_NAME, dev) < 0) {
> +	if (devm_request_threaded_irq(&pdev->dev, irq, NULL,
> coda_irq_handler,
> +		IRQF_ONESHOT, CODA_NAME, dev) < 0) {
>  		dev_err(&pdev->dev, "failed to request irq\n");
>  		return -ENOENT;
>  	}
> @@ -2521,10 +3188,14 @@ static int coda_probe(struct platform_device
> *pdev)
>  		}
>  	}
> 
> -	if (dev->devtype->product == CODA_DX6)
> +	switch (dev->devtype->product) {
> +	case CODA_DX6:
>  		dev->iram_size = CODADX6_IRAM_SIZE;
> -	else
> +		break;
> +	case CODA_7541:
>  		dev->iram_size = CODA7_IRAM_SIZE;
> +		break;
> +	}
>  	dev->iram_vaddr = gen_pool_alloc(dev->iram_pool, dev->iram_size);
>  	if (!dev->iram_vaddr) {
>  		dev_err(&pdev->dev, "unable to alloc iram\n"); diff --git
> a/drivers/media/platform/coda.h b/drivers/media/platform/coda.h index
> 140eea5..4e32e2e 100644
> --- a/drivers/media/platform/coda.h
> +++ b/drivers/media/platform/coda.h
> @@ -49,6 +49,7 @@
>  #define CODA_REG_BIT_TEMP_BUF_ADDR		0x118
>  #define CODA_REG_BIT_RD_PTR(x)			(0x120 + 8 * (x))
>  #define CODA_REG_BIT_WR_PTR(x)			(0x124 + 8 * (x))
> +#define CODA_REG_BIT_FRM_DIS_FLG(x)		(0x150 + 4 * (x))
>  #define CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR	0x140
>  #define CODA7_REG_BIT_AXI_SRAM_USE		0x140
>  #define		CODA7_USE_HOST_ME_ENABLE	(1 << 11)
> @@ -95,6 +96,7 @@
>  #define 	CODA_MODE_INVALID		0xffff
>  #define CODA_REG_BIT_INT_ENABLE		0x170
>  #define		CODA_INT_INTERRUPT_ENABLE	(1 << 3)
> +#define CODA_REG_BIT_INT_REASON			0x174
>  #define CODA7_REG_BIT_RUN_AUX_STD		0x178
>  #define		CODA_MP4_AUX_MPEG4		0
>  #define		CODA_MP4_AUX_DIVX3		1
> @@ -111,15 +113,89 @@
>   * issued.
>   */
> 
> +/* Decoder Sequence Initialization */
> +#define CODA_CMD_DEC_SEQ_BB_START		0x180
> +#define CODA_CMD_DEC_SEQ_BB_SIZE		0x184
> +#define CODA_CMD_DEC_SEQ_OPTION			0x188
> +#define		CODA_REORDER_ENABLE			(1 << 1)
> +#define		CODADX6_QP_REPORT			(1 << 0)
> +#define		CODA7_MP4_DEBLK_ENABLE			(1 << 0)
> +#define CODA_CMD_DEC_SEQ_SRC_SIZE		0x18c
> +#define CODA_CMD_DEC_SEQ_START_BYTE		0x190
> +#define CODA_CMD_DEC_SEQ_PS_BB_START		0x194
> +#define CODA_CMD_DEC_SEQ_PS_BB_SIZE		0x198
> +#define CODA_CMD_DEC_SEQ_MP4_ASP_CLASS		0x19c
> +#define CODA_CMD_DEC_SEQ_X264_MV_EN		0x19c
> +#define CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE		0x1a0
> +
> +#define CODA7_RET_DEC_SEQ_ASPECT		0x1b0
> +#define CODA_RET_DEC_SEQ_SUCCESS		0x1c0
> +#define CODA_RET_DEC_SEQ_SRC_FMT		0x1c4 /* SRC_SIZE on CODA7 */
> +#define CODA_RET_DEC_SEQ_SRC_SIZE		0x1c4
> +#define CODA_RET_DEC_SEQ_SRC_F_RATE		0x1c8
> +#define CODA9_RET_DEC_SEQ_ASPECT		0x1c8
> +#define CODA_RET_DEC_SEQ_FRAME_NEED		0x1cc
> +#define CODA_RET_DEC_SEQ_FRAME_DELAY		0x1d0
> +#define CODA_RET_DEC_SEQ_INFO			0x1d4
> +#define CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT	0x1d8
> +#define CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM	0x1dc
> +#define CODA_RET_DEC_SEQ_NEXT_FRAME_NUM		0x1e0
> +#define CODA_RET_DEC_SEQ_ERR_REASON		0x1e0
> +#define CODA_RET_DEC_SEQ_FRATE_NR		0x1e4
> +#define CODA_RET_DEC_SEQ_FRATE_DR		0x1e8
> +#define CODA_RET_DEC_SEQ_JPG_PARA		0x1e4
> +#define CODA_RET_DEC_SEQ_JPG_THUMB_IND		0x1e8
> +
> +/* Decoder Picture Run */
> +#define CODA_CMD_DEC_PIC_ROT_MODE		0x180
> +#define CODA_CMD_DEC_PIC_ROT_ADDR_Y		0x184
> +#define CODA_CMD_DEC_PIC_ROT_ADDR_CB		0x188
> +#define CODA_CMD_DEC_PIC_ROT_ADDR_CR		0x18c
> +#define CODA_CMD_DEC_PIC_ROT_STRIDE		0x190
> +
> +#define CODA_CMD_DEC_PIC_OPTION			0x194
> +#define		CODA_PRE_SCAN_EN			(1 << 0)
> +#define		CODA_PRE_SCAN_MODE_DECODE		(0 << 1)
> +#define		CODA_PRE_SCAN_MODE_RETURN		(1 << 1)
> +#define		CODA_IFRAME_SEARCH_EN			(1 << 2)
> +#define		CODA_SKIP_FRAME_MODE			(0x3 << 3)
> +#define CODA_CMD_DEC_PIC_SKIP_NUM		0x198
> +#define CODA_CMD_DEC_PIC_CHUNK_SIZE		0x19c
> +#define CODA_CMD_DEC_PIC_BB_START		0x1a0
> +#define CODA_CMD_DEC_PIC_START_BYTE		0x1a4
> +#define CODA_RET_DEC_PIC_SIZE			0x1bc
> +#define CODA_RET_DEC_PIC_FRAME_NUM		0x1c0
> +#define CODA_RET_DEC_PIC_FRAME_IDX		0x1c4
> +#define CODA_RET_DEC_PIC_ERR_MB			0x1c8
> +#define CODA_RET_DEC_PIC_TYPE			0x1cc
> +#define		CODA_PIC_TYPE_MASK			0x7
> +#define		CODA_PIC_TYPE_MASK_VC1			0x3f
> +#define		CODA9_PIC_TYPE_FIRST_MASK		(0x7 << 3)
> +#define		CODA9_PIC_TYPE_IDR_MASK			(0x3 << 6)
> +#define		CODA7_PIC_TYPE_H264_NPF_MASK		(0x3 << 16)
> +#define		CODA7_PIC_TYPE_INTERLACED		(1 << 18)
> +#define CODA_RET_DEC_PIC_POST			0x1d0
> +#define CODA_RET_DEC_PIC_MVC_REPORT		0x1d0
> +#define CODA_RET_DEC_PIC_OPTION			0x1d4
> +#define CODA_RET_DEC_PIC_SUCCESS		0x1d8
> +#define CODA_RET_DEC_PIC_CUR_IDX		0x1dc
> +#define CODA_RET_DEC_PIC_CROP_LEFT_RIGHT	0x1e0
> +#define CODA_RET_DEC_PIC_CROP_TOP_BOTTOM	0x1e4
> +#define CODA_RET_DEC_PIC_FRAME_NEED		0x1ec
> +
>  /* Encoder Sequence Initialization */
>  #define CODA_CMD_ENC_SEQ_BB_START				0x180
>  #define CODA_CMD_ENC_SEQ_BB_SIZE				0x184
>  #define CODA_CMD_ENC_SEQ_OPTION				0x188
> +#define		CODA7_OPTION_AVCINTRA16X16ONLY_OFFSET		9
>  #define		CODA7_OPTION_GAMMA_OFFSET			8
> +#define		CODA7_OPTION_RCQPMAX_OFFSET			7
>  #define		CODADX6_OPTION_GAMMA_OFFSET			7
> +#define		CODA7_OPTION_RCQPMIN_OFFSET			6
>  #define		CODA_OPTION_LIMITQP_OFFSET			6
>  #define		CODA_OPTION_RCINTRAQP_OFFSET			5
>  #define		CODA_OPTION_FMO_OFFSET				4
> +#define		CODA_OPTION_AVC_AUD_OFFSET			2
>  #define		CODA_OPTION_SLICEREPORT_OFFSET			1
>  #define CODA_CMD_ENC_SEQ_COD_STD				0x18c
>  #define		CODA_STD_MPEG4					0
> @@ -188,8 +264,10 @@
>  #define		CODA_FMOPARAM_TYPE_MASK				1
>  #define		CODA_FMOPARAM_SLICENUM_OFFSET			0
>  #define		CODA_FMOPARAM_SLICENUM_MASK			0x0f
> +#define CODADX6_CMD_ENC_SEQ_INTRA_QP				0x1bc
>  #define CODA7_CMD_ENC_SEQ_SEARCH_BASE				0x1b8
>  #define CODA7_CMD_ENC_SEQ_SEARCH_SIZE				0x1bc
> +#define CODA7_CMD_ENC_SEQ_INTRA_QP				0x1c4
>  #define CODA_CMD_ENC_SEQ_RC_QP_MAX				0x1c8
>  #define		CODA_QPMAX_OFFSET				0
>  #define		CODA_QPMAX_MASK					0x3f
> @@ -216,18 +294,24 @@
>  #define CODA_CMD_ENC_PIC_OPTION	0x194
>  #define CODA_CMD_ENC_PIC_BB_START	0x198
>  #define CODA_CMD_ENC_PIC_BB_SIZE	0x19c
> +#define CODA_RET_ENC_FRAME_NUM		0x1c0
>  #define CODA_RET_ENC_PIC_TYPE		0x1c4
> +#define CODA_RET_ENC_PIC_FRAME_IDX	0x1c8
>  #define CODA_RET_ENC_PIC_SLICE_NUM	0x1cc
>  #define CODA_RET_ENC_PIC_FLAG		0x1d0
> +#define CODA_RET_ENC_PIC_SUCCESS	0x1d8
> 
>  /* Set Frame Buffer */
>  #define CODA_CMD_SET_FRAME_BUF_NUM		0x180
>  #define CODA_CMD_SET_FRAME_BUF_STRIDE		0x184
> +#define CODA_CMD_SET_FRAME_SLICE_BB_START	0x188
> +#define CODA_CMD_SET_FRAME_SLICE_BB_SIZE	0x18c
>  #define CODA7_CMD_SET_FRAME_AXI_BIT_ADDR	0x190
>  #define CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR	0x194
>  #define CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR	0x198
>  #define CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR	0x19c
>  #define CODA7_CMD_SET_FRAME_AXI_OVL_ADDR	0x1a0
> +#define CODA7_CMD_SET_FRAME_MAX_DEC_SIZE	0x1a4
>  #define CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE	0x1a8
> 
>  /* Encoder Header */
> --
> 1.8.3.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 8/8] [media] coda: add CODA7541 decoding support
  2013-06-27 10:10   ` Kamil Debski
@ 2013-06-27 14:19     ` Philipp Zabel
  0 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-06-27 14:19 UTC (permalink / raw)
  To: Kamil Debski
  Cc: linux-media, 'Javier Martin', Sylwester Nawrocki,
	'Gaëtan Carlier', 'Wei Yongjun'

Hi Kamil,

Am Donnerstag, den 27.06.2013, 12:10 +0200 schrieb Kamil Debski:
> Hi Philipp,
> 
> This patch did not apply well on my tree. I find this quite strange.
> I did try and applied it manually, but please check here if it is
> correct:
> http://git.linuxtv.org/kdebski/media.git/shortlog/refs/heads/master

yes, the commit contents are identical.

thanks
Philipp


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly
  2013-06-21  7:55 ` [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly Philipp Zabel
@ 2013-07-26 13:02   ` Mauro Carvalho Chehab
  2013-07-26 13:22     ` Philipp Zabel
  0 siblings, 1 reply; 15+ messages in thread
From: Mauro Carvalho Chehab @ 2013-07-26 13:02 UTC (permalink / raw)
  To: Philipp Zabel
  Cc: linux-media, Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun

Hi Philipp,

Em Fri, 21 Jun 2013 09:55:27 +0200
Philipp Zabel <p.zabel@pengutronix.de> escreveu:

> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

Please provide a description of the patch.

Thanks!
Mauro

> ---
>  drivers/media/platform/coda.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
> index c4566c4..90f3386 100644
> --- a/drivers/media/platform/coda.c
> +++ b/drivers/media/platform/coda.c
> @@ -1662,12 +1662,12 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
>  	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx));
>  	/* Calculate bytesused field */
>  	if (dst_buf->v4l2_buf.sequence == 0) {
> -		dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr) +
> -						ctx->vpu_header_size[0] +
> -						ctx->vpu_header_size[1] +
> -						ctx->vpu_header_size[2];
> +		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr +
> +					ctx->vpu_header_size[0] +
> +					ctx->vpu_header_size[1] +
> +					ctx->vpu_header_size[2]);
>  	} else {
> -		dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr);
> +		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr);
>  	}
>  
>  	v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",


-- 

Cheers,
Mauro

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly
  2013-07-26 13:02   ` Mauro Carvalho Chehab
@ 2013-07-26 13:22     ` Philipp Zabel
  0 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-07-26 13:22 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: linux-media, Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun

Hi Mauro,

Am Freitag, den 26.07.2013, 10:02 -0300 schrieb Mauro Carvalho Chehab:
> Hi Philipp,
> 
> Em Fri, 21 Jun 2013 09:55:27 +0200
> Philipp Zabel <p.zabel@pengutronix.de> escreveu:
> 
> > Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
> 
> Please provide a description of the patch.

Sorry, how about this:

"As stated in the vb2_buffer documentation, drivers should not directly fill
 in v4l2_planes[0].bytesused, but should use vb2_set_plane_payload()
 function instead. No functional changes."

regards
Philipp

> Thanks!
> Mauro
> 
> > ---
> >  drivers/media/platform/coda.c | 10 +++++-----
> >  1 file changed, 5 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
> > index c4566c4..90f3386 100644
> > --- a/drivers/media/platform/coda.c
> > +++ b/drivers/media/platform/coda.c
> > @@ -1662,12 +1662,12 @@ static irqreturn_t coda_irq_handler(int irq, void *data)
> >  	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->idx));
> >  	/* Calculate bytesused field */
> >  	if (dst_buf->v4l2_buf.sequence == 0) {
> > -		dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr) +
> > -						ctx->vpu_header_size[0] +
> > -						ctx->vpu_header_size[1] +
> > -						ctx->vpu_header_size[2];
> > +		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr +
> > +					ctx->vpu_header_size[0] +
> > +					ctx->vpu_header_size[1] +
> > +					ctx->vpu_header_size[2]);
> >  	} else {
> > -		dst_buf->v4l2_planes[0].bytesused = (wr_ptr - start_ptr);
> > +		vb2_set_plane_payload(dst_buf, 0, wr_ptr - start_ptr);
> >  	}
> >  
> >  	v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
> 
> 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/8] [media] coda: dynamic IRAM setup for decoder
  2013-06-21  7:55 ` [PATCH v2 6/8] [media] coda: dynamic IRAM setup " Philipp Zabel
@ 2013-07-26 15:18   ` Mauro Carvalho Chehab
  2013-07-29 12:30     ` Philipp Zabel
  0 siblings, 1 reply; 15+ messages in thread
From: Mauro Carvalho Chehab @ 2013-07-26 15:18 UTC (permalink / raw)
  To: Philipp Zabel
  Cc: linux-media, Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun

Em Fri, 21 Jun 2013 09:55:32 +0200
Philipp Zabel <p.zabel@pengutronix.de> escreveu:

> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

Please add a description for the patch.

Thanks!
Mauro

> ---
>  drivers/media/platform/coda.c | 50 +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
> index 1f3bd43..856a93e 100644
> --- a/drivers/media/platform/coda.c
> +++ b/drivers/media/platform/coda.c
> @@ -1212,6 +1212,7 @@ static void coda_setup_iram(struct coda_ctx *ctx)
>  	int ipacdc_size;
>  	int bitram_size;
>  	int dbk_size;
> +	int ovl_size;
>  	int mb_width;
>  	int me_size;
>  	int size;
> @@ -1273,7 +1274,47 @@ static void coda_setup_iram(struct coda_ctx *ctx)
>  			size -= ipacdc_size;
>  		}
>  
> -		/* OVL disabled for encoder */
> +		/* OVL and BTP disabled for encoder */
> +	} else if (ctx->inst_type == CODA_INST_DECODER) {
> +		struct coda_q_data *q_data_dst;
> +		int mb_height;
> +
> +		q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +		mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
> +		mb_height = DIV_ROUND_UP(q_data_dst->height, 16);
> +
> +		dbk_size = round_up(256 * mb_width, 1024);
> +		if (size >= dbk_size) {
> +			iram_info->axi_sram_use |= CODA7_USE_HOST_DBK_ENABLE;
> +			iram_info->buf_dbk_y_use = dev->iram_paddr;
> +			iram_info->buf_dbk_c_use = dev->iram_paddr +
> +						   dbk_size / 2;
> +			size -= dbk_size;
> +		} else {
> +			goto out;
> +		}
> +
> +		bitram_size = round_up(128 * mb_width, 1024);
> +		if (size >= bitram_size) {
> +			iram_info->axi_sram_use |= CODA7_USE_HOST_BIT_ENABLE;
> +			iram_info->buf_bit_use = iram_info->buf_dbk_c_use +
> +						 dbk_size / 2;
> +			size -= bitram_size;
> +		} else {
> +			goto out;
> +		}
> +
> +		ipacdc_size = round_up(128 * mb_width, 1024);
> +		if (size >= ipacdc_size) {
> +			iram_info->axi_sram_use |= CODA7_USE_HOST_IP_ENABLE;
> +			iram_info->buf_ip_ac_dc_use = iram_info->buf_bit_use +
> +						      bitram_size;
> +			size -= ipacdc_size;
> +		} else {
> +			goto out;
> +		}
> +
> +		ovl_size = round_up(80 * mb_width, 1024);
>  	}
>  
>  out:
> @@ -1300,7 +1341,12 @@ out:
>  
>  	if (dev->devtype->product == CODA_7541) {
>  		/* TODO - Enabling these causes picture errors on CODA7541 */
> -		if (ctx->inst_type == CODA_INST_ENCODER) {
> +		if (ctx->inst_type == CODA_INST_DECODER) {
> +			/* fw 1.4.50 */
> +			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
> +						     CODA7_USE_IP_ENABLE);
> +		} else {
> +			/* fw 13.4.29 */
>  			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
>  						     CODA7_USE_HOST_DBK_ENABLE |
>  						     CODA7_USE_IP_ENABLE |


-- 

Cheers,
Mauro

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 6/8] [media] coda: dynamic IRAM setup for decoder
  2013-07-26 15:18   ` Mauro Carvalho Chehab
@ 2013-07-29 12:30     ` Philipp Zabel
  0 siblings, 0 replies; 15+ messages in thread
From: Philipp Zabel @ 2013-07-29 12:30 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: linux-media, Kamil Debski, Javier Martin, Sylwester Nawrocki,
	Gaëtan Carlier, Wei Yongjun

Hi Mauro,

Am Freitag, den 26.07.2013, 12:18 -0300 schrieb Mauro Carvalho Chehab:
> Em Fri, 21 Jun 2013 09:55:32 +0200
> Philipp Zabel <p.zabel@pengutronix.de> escreveu:
> 
> > Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
> 
> Please add a description for the patch.

Sorry I missed this, description is the same as for the encoder IRAM
setup:

"This sets up IRAM areas used as temporary memory for the different
 hardware units depending on the frame size."

regards
Philipp

> Thanks!
> Mauro
> 
> > ---
> >  drivers/media/platform/coda.c | 50 +++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 48 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
> > index 1f3bd43..856a93e 100644
> > --- a/drivers/media/platform/coda.c
> > +++ b/drivers/media/platform/coda.c
> > @@ -1212,6 +1212,7 @@ static void coda_setup_iram(struct coda_ctx *ctx)
> >  	int ipacdc_size;
> >  	int bitram_size;
> >  	int dbk_size;
> > +	int ovl_size;
> >  	int mb_width;
> >  	int me_size;
> >  	int size;
> > @@ -1273,7 +1274,47 @@ static void coda_setup_iram(struct coda_ctx *ctx)
> >  			size -= ipacdc_size;
> >  		}
> >  
> > -		/* OVL disabled for encoder */
> > +		/* OVL and BTP disabled for encoder */
> > +	} else if (ctx->inst_type == CODA_INST_DECODER) {
> > +		struct coda_q_data *q_data_dst;
> > +		int mb_height;
> > +
> > +		q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> > +		mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
> > +		mb_height = DIV_ROUND_UP(q_data_dst->height, 16);
> > +
> > +		dbk_size = round_up(256 * mb_width, 1024);
> > +		if (size >= dbk_size) {
> > +			iram_info->axi_sram_use |= CODA7_USE_HOST_DBK_ENABLE;
> > +			iram_info->buf_dbk_y_use = dev->iram_paddr;
> > +			iram_info->buf_dbk_c_use = dev->iram_paddr +
> > +						   dbk_size / 2;
> > +			size -= dbk_size;
> > +		} else {
> > +			goto out;
> > +		}
> > +
> > +		bitram_size = round_up(128 * mb_width, 1024);
> > +		if (size >= bitram_size) {
> > +			iram_info->axi_sram_use |= CODA7_USE_HOST_BIT_ENABLE;
> > +			iram_info->buf_bit_use = iram_info->buf_dbk_c_use +
> > +						 dbk_size / 2;
> > +			size -= bitram_size;
> > +		} else {
> > +			goto out;
> > +		}
> > +
> > +		ipacdc_size = round_up(128 * mb_width, 1024);
> > +		if (size >= ipacdc_size) {
> > +			iram_info->axi_sram_use |= CODA7_USE_HOST_IP_ENABLE;
> > +			iram_info->buf_ip_ac_dc_use = iram_info->buf_bit_use +
> > +						      bitram_size;
> > +			size -= ipacdc_size;
> > +		} else {
> > +			goto out;
> > +		}
> > +
> > +		ovl_size = round_up(80 * mb_width, 1024);
> >  	}
> >  
> >  out:
> > @@ -1300,7 +1341,12 @@ out:
> >  
> >  	if (dev->devtype->product == CODA_7541) {
> >  		/* TODO - Enabling these causes picture errors on CODA7541 */
> > -		if (ctx->inst_type == CODA_INST_ENCODER) {
> > +		if (ctx->inst_type == CODA_INST_DECODER) {
> > +			/* fw 1.4.50 */
> > +			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
> > +						     CODA7_USE_IP_ENABLE);
> > +		} else {
> > +			/* fw 13.4.29 */
> >  			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
> >  						     CODA7_USE_HOST_DBK_ENABLE |
> >  						     CODA7_USE_IP_ENABLE |
> 
> 



^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-07-29 12:31 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-21  7:55 [PATCH v2 0/8] CODA7541 decoding support Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 1/8] [media] coda: use vb2_set_plane_payload instead of setting v4l2_planes[0].bytesused directly Philipp Zabel
2013-07-26 13:02   ` Mauro Carvalho Chehab
2013-07-26 13:22     ` Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 2/8] [media] coda: dynamic IRAM setup for encoder Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 3/8] [media] coda: do not allocate maximum number of framebuffers " Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 4/8] [media] coda: update CODA7541 to firmware 1.4.50 Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 5/8] [media] coda: add bitstream ringbuffer handling for decoder Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 6/8] [media] coda: dynamic IRAM setup " Philipp Zabel
2013-07-26 15:18   ` Mauro Carvalho Chehab
2013-07-29 12:30     ` Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 7/8] [media] coda: split encoder specific parts out of device_run and irq_handler Philipp Zabel
2013-06-21  7:55 ` [PATCH v2 8/8] [media] coda: add CODA7541 decoding support Philipp Zabel
2013-06-27 10:10   ` Kamil Debski
2013-06-27 14:19     ` Philipp Zabel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox