Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v2] media: verisilicon: Simplify motion vectors and rfc buffers allocation
From: Benjamin Gaignard @ 2026-04-15  8:28 UTC (permalink / raw)
  To: Nicolas Dufresne, p.zabel, mchehab, heiko
  Cc: linux-media, linux-rockchip, linux-kernel, linux-arm-kernel,
	kernel
In-Reply-To: <43b252cc6186829e021022480ebfe34274c3e572.camel@collabora.com>


Le 08/04/2026 à 22:41, Nicolas Dufresne a écrit :
> Hi,
>
> Le mercredi 25 mars 2026 à 14:17 +0100, Benjamin Gaignard a écrit :
>> Until now we reserve the space needed for motion vectors and reference
>> frame compression at the end of the frame buffer.
>> This patch disentanglement mv and rfc from frame buffers by allocating
> Use imperative tone, avoid sarting a story (Once upon a time ...), drop "This patch", we know its a patch.
>
>> distinct buffers for each purpose.
>> That simplify the code by removing lot of offset computation.
>>
>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
>> ---
>> version 2:
>> - rework commit message
>> - free mv and rfc buffer before signal the buffer completion.
>>
>>   drivers/media/platform/verisilicon/hantro.h   |  17 +-
>>   .../media/platform/verisilicon/hantro_av1.c   |   7 -
>>   .../media/platform/verisilicon/hantro_av1.h   |   1 -
>>   .../media/platform/verisilicon/hantro_g2.c    |  36 --
>>   .../platform/verisilicon/hantro_g2_hevc_dec.c |  24 +-
>>   .../platform/verisilicon/hantro_g2_vp9_dec.c  |  12 +-
>>   .../media/platform/verisilicon/hantro_hevc.c  |  20 +-
>>   .../media/platform/verisilicon/hantro_hw.h    |  99 +-----
>>   .../platform/verisilicon/hantro_postproc.c    |  29 +-
>>   .../media/platform/verisilicon/hantro_v4l2.c  | 314 ++++++++++++++++--
>>   .../verisilicon/rockchip_vpu981_hw_av1_dec.c  |  16 +-
>>   11 files changed, 359 insertions(+), 216 deletions(-)
>>
>> diff --git a/drivers/media/platform/verisilicon/hantro.h b/drivers/media/platform/verisilicon/hantro.h
>> index 0353de154a1e..c4ceb9c99016 100644
>> --- a/drivers/media/platform/verisilicon/hantro.h
>> +++ b/drivers/media/platform/verisilicon/hantro.h
>> @@ -31,6 +31,9 @@ struct hantro_ctx;
>>   struct hantro_codec_ops;
>>   struct hantro_postproc_ops;
>>   
>> +#define MAX_MV_BUFFERS	MAX_POSTPROC_BUFFERS
>> +#define MAX_RFC_BUFFERS	MAX_POSTPROC_BUFFERS
> Why two defines ? And why 64 ? Isn't the maximum something per codec ?

One per new array to be more readable when iterating in these arrays.
MAX_POSTPROC_BUFFERS is the maximum number of buffers for the capture queue
and it isn't something codec specific.

>
>> +
>>   #define HANTRO_JPEG_ENCODER	BIT(0)
>>   #define HANTRO_ENCODERS		0x0000ffff
>>   #define HANTRO_MPEG2_DECODER	BIT(16)
>> @@ -237,6 +240,9 @@ struct hantro_dev {
>>    * @need_postproc:	Set to true if the bitstream features require to
>>    *			use the post-processor.
>>    *
>> + * @dec_mv:		motion vectors buffers for the context.
>> + * @dec_rfc:		reference frame compression buffers for the context.
>> + *
>>    * @codec_ops:		Set of operations related to codec mode.
>>    * @postproc:		Post-processing context.
>>    * @h264_dec:		H.264-decoding context.
>> @@ -264,6 +270,9 @@ struct hantro_ctx {
>>   	int jpeg_quality;
>>   	int bit_depth;
>>   
>> +	struct hantro_aux_buf dec_mv[MAX_MV_BUFFERS];
>> +	struct hantro_aux_buf dec_rfc[MAX_RFC_BUFFERS];
>> +
>>   	const struct hantro_codec_ops *codec_ops;
>>   	struct hantro_postproc_ctx postproc;
>>   	bool need_postproc;
>> @@ -334,14 +343,14 @@ struct hantro_vp9_decoded_buffer_info {
>>   	unsigned short width;
>>   	unsigned short height;
>>   	size_t chroma_offset;
>> -	size_t mv_offset;
>> +	dma_addr_t mv_addr;
>>   	u32 bit_depth : 4;
>>   };
>>   
>>   struct hantro_av1_decoded_buffer_info {
>>   	/* Info needed when the decoded frame serves as a reference frame. */
>>   	size_t chroma_offset;
>> -	size_t mv_offset;
>> +	dma_addr_t mv_addr;
>>   };
>>   
>>   struct hantro_decoded_buffer {
>> @@ -507,4 +516,8 @@ void hantro_postproc_free(struct hantro_ctx *ctx);
>>   int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx,
>>   				   struct v4l2_frmsizeenum *fsize);
>>   
>> +dma_addr_t hantro_mv_get_buf_addr(struct hantro_ctx *ctx, int index);
>> +dma_addr_t hantro_rfc_get_luma_buf_addr(struct hantro_ctx *ctx, int index);
>> +dma_addr_t hantro_rfc_get_chroma_buf_addr(struct hantro_ctx *ctx, int index);
>> +
>>   #endif /* HANTRO_H_ */
>> diff --git a/drivers/media/platform/verisilicon/hantro_av1.c b/drivers/media/platform/verisilicon/hantro_av1.c
>> index 5a51ac877c9c..3a80a7994f67 100644
>> --- a/drivers/media/platform/verisilicon/hantro_av1.c
>> +++ b/drivers/media/platform/verisilicon/hantro_av1.c
>> @@ -222,13 +222,6 @@ size_t hantro_av1_luma_size(struct hantro_ctx *ctx)
>>   	return ctx->ref_fmt.plane_fmt[0].bytesperline * ctx->ref_fmt.height;
>>   }
>>   
>> -size_t hantro_av1_chroma_size(struct hantro_ctx *ctx)
>> -{
>> -	size_t cr_offset = hantro_av1_luma_size(ctx);
>> -
>> -	return ALIGN((cr_offset * 3) / 2, 64);
>> -}
>> -
>>   static void hantro_av1_tiles_free(struct hantro_ctx *ctx)
>>   {
>>   	struct hantro_dev *vpu = ctx->dev;
>> diff --git a/drivers/media/platform/verisilicon/hantro_av1.h b/drivers/media/platform/verisilicon/hantro_av1.h
>> index 4e2122b95cdd..330f7938d097 100644
>> --- a/drivers/media/platform/verisilicon/hantro_av1.h
>> +++ b/drivers/media/platform/verisilicon/hantro_av1.h
>> @@ -41,7 +41,6 @@ int hantro_av1_get_order_hint(struct hantro_ctx *ctx, int ref);
>>   int hantro_av1_frame_ref(struct hantro_ctx *ctx, u64 timestamp);
>>   void hantro_av1_clean_refs(struct hantro_ctx *ctx);
>>   size_t hantro_av1_luma_size(struct hantro_ctx *ctx);
>> -size_t hantro_av1_chroma_size(struct hantro_ctx *ctx);
>>   void hantro_av1_exit(struct hantro_ctx *ctx);
>>   int hantro_av1_init(struct hantro_ctx *ctx);
>>   int hantro_av1_prepare_run(struct hantro_ctx *ctx);
>> diff --git a/drivers/media/platform/verisilicon/hantro_g2.c b/drivers/media/platform/verisilicon/hantro_g2.c
>> index 318673b66da8..4ae7df53dcb1 100644
>> --- a/drivers/media/platform/verisilicon/hantro_g2.c
>> +++ b/drivers/media/platform/verisilicon/hantro_g2.c
>> @@ -99,39 +99,3 @@ size_t hantro_g2_chroma_offset(struct hantro_ctx *ctx)
>>   {
>>   	return ctx->ref_fmt.plane_fmt[0].bytesperline *	ctx->ref_fmt.height;
>>   }
>> -
>> -size_t hantro_g2_motion_vectors_offset(struct hantro_ctx *ctx)
>> -{
>> -	size_t cr_offset = hantro_g2_chroma_offset(ctx);
>> -
>> -	return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
>> -}
>> -
>> -static size_t hantro_g2_mv_size(struct hantro_ctx *ctx)
>> -{
>> -	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
>> -	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
>> -	unsigned int pic_width_in_ctbs, pic_height_in_ctbs;
>> -	unsigned int max_log2_ctb_size;
>> -
>> -	max_log2_ctb_size = sps->log2_min_luma_coding_block_size_minus3 + 3 +
>> -			    sps->log2_diff_max_min_luma_coding_block_size;
>> -	pic_width_in_ctbs = (sps->pic_width_in_luma_samples +
>> -			    (1 << max_log2_ctb_size) - 1) >> max_log2_ctb_size;
>> -	pic_height_in_ctbs = (sps->pic_height_in_luma_samples + (1 << max_log2_ctb_size) - 1)
>> -			     >> max_log2_ctb_size;
>> -
>> -	return pic_width_in_ctbs * pic_height_in_ctbs * (1 << (2 * (max_log2_ctb_size - 4))) * 16;
>> -}
>> -
>> -size_t hantro_g2_luma_compress_offset(struct hantro_ctx *ctx)
>> -{
>> -	return hantro_g2_motion_vectors_offset(ctx) +
>> -	       hantro_g2_mv_size(ctx);
>> -}
>> -
>> -size_t hantro_g2_chroma_compress_offset(struct hantro_ctx *ctx)
>> -{
>> -	return hantro_g2_luma_compress_offset(ctx) +
>> -	       hantro_hevc_luma_compressed_size(ctx->dst_fmt.width, ctx->dst_fmt.height);
>> -}
>> diff --git a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
>> index e8c2e83379de..d0af9fb882ba 100644
>> --- a/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
>> +++ b/drivers/media/platform/verisilicon/hantro_g2_hevc_dec.c
>> @@ -383,9 +383,6 @@ static int set_ref(struct hantro_ctx *ctx)
>>   	struct vb2_v4l2_buffer *vb2_dst;
>>   	struct hantro_decoded_buffer *dst;
>>   	size_t cr_offset = hantro_g2_chroma_offset(ctx);
>> -	size_t mv_offset = hantro_g2_motion_vectors_offset(ctx);
>> -	size_t compress_luma_offset = hantro_g2_luma_compress_offset(ctx);
>> -	size_t compress_chroma_offset = hantro_g2_chroma_compress_offset(ctx);
>>   	u32 max_ref_frames;
>>   	u16 dpb_longterm_e;
>>   	static const struct hantro_reg cur_poc[] = {
>> @@ -453,14 +450,17 @@ static int set_ref(struct hantro_ctx *ctx)
>>   	dpb_longterm_e = 0;
>>   	for (i = 0; i < decode_params->num_active_dpb_entries &&
>>   	     i < (V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1); i++) {
>> +		int index = hantro_hevc_get_ref_buf_index(ctx, dpb[i].pic_order_cnt_val);
>>   		luma_addr = hantro_hevc_get_ref_buf(ctx, dpb[i].pic_order_cnt_val);
>>   		if (!luma_addr)
>>   			return -ENOMEM;
>>   
>>   		chroma_addr = luma_addr + cr_offset;
>> -		mv_addr = luma_addr + mv_offset;
>> -		compress_luma_addr = luma_addr + compress_luma_offset;
>> -		compress_chroma_addr = luma_addr + compress_chroma_offset;
>> +		mv_addr = hantro_mv_get_buf_addr(ctx, index);
>> +		if (ctx->hevc_dec.use_compression) {
>> +			compress_luma_addr = hantro_rfc_get_luma_buf_addr(ctx, index);
>> +			compress_chroma_addr = hantro_rfc_get_chroma_buf_addr(ctx, index);
>> +		}
>>   
>>   		if (dpb[i].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE)
>>   			dpb_longterm_e |= BIT(V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1 - i);
>> @@ -478,13 +478,17 @@ static int set_ref(struct hantro_ctx *ctx)
>>   	if (!luma_addr)
>>   		return -ENOMEM;
>>   
>> -	if (hantro_hevc_add_ref_buf(ctx, decode_params->pic_order_cnt_val, luma_addr))
>> +	if (hantro_hevc_add_ref_buf(ctx, decode_params->pic_order_cnt_val, luma_addr, vb2_dst))
>>   		return -EINVAL;
>>   
>>   	chroma_addr = luma_addr + cr_offset;
>> -	mv_addr = luma_addr + mv_offset;
>> -	compress_luma_addr = luma_addr + compress_luma_offset;
>> -	compress_chroma_addr = luma_addr + compress_chroma_offset;
>> +	mv_addr = hantro_mv_get_buf_addr(ctx, dst->base.vb.vb2_buf.index);
>> +	if (ctx->hevc_dec.use_compression) {
>> +		compress_luma_addr =
>> +			hantro_rfc_get_luma_buf_addr(ctx, dst->base.vb.vb2_buf.index);
>> +		compress_chroma_addr =
>> +			hantro_rfc_get_chroma_buf_addr(ctx, dst->base.vb.vb2_buf.index);
>> +	}
>>   
>>   	hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr);
>>   	hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr);
>> diff --git a/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
>> index 56c79e339030..1e96d0fce72a 100644
>> --- a/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
>> +++ b/drivers/media/platform/verisilicon/hantro_g2_vp9_dec.c
>> @@ -129,7 +129,7 @@ static void config_output(struct hantro_ctx *ctx,
>>   			  struct hantro_decoded_buffer *dst,
>>   			  const struct v4l2_ctrl_vp9_frame *dec_params)
>>   {
>> -	dma_addr_t luma_addr, chroma_addr, mv_addr;
>> +	dma_addr_t luma_addr, chroma_addr;
>>   
>>   	hantro_reg_write(ctx->dev, &g2_out_dis, 0);
>>   	if (!ctx->dev->variant->legacy_regs)
>> @@ -142,9 +142,8 @@ static void config_output(struct hantro_ctx *ctx,
>>   	hantro_write_addr(ctx->dev, G2_OUT_CHROMA_ADDR, chroma_addr);
>>   	dst->vp9.chroma_offset = hantro_g2_chroma_offset(ctx);
>>   
>> -	mv_addr = luma_addr + hantro_g2_motion_vectors_offset(ctx);
>> -	hantro_write_addr(ctx->dev, G2_OUT_MV_ADDR, mv_addr);
>> -	dst->vp9.mv_offset = hantro_g2_motion_vectors_offset(ctx);
>> +	dst->vp9.mv_addr = hantro_mv_get_buf_addr(ctx, dst->base.vb.vb2_buf.index);
>> +	hantro_write_addr(ctx->dev, G2_OUT_MV_ADDR, dst->vp9.mv_addr);
>>   }
>>   
>>   struct hantro_vp9_ref_reg {
>> @@ -215,15 +214,12 @@ static void config_ref_registers(struct hantro_ctx *ctx,
>>   			.c_base = G2_REF_CHROMA_ADDR(5),
>>   		},
>>   	};
>> -	dma_addr_t mv_addr;
>>   
>>   	config_ref(ctx, dst, &ref_regs[0], dec_params, dec_params->last_frame_ts);
>>   	config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params->golden_frame_ts);
>>   	config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params->alt_frame_ts);
>>   
>> -	mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
>> -		  mv_ref->vp9.mv_offset;
>> -	hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
>> +	hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_ref->vp9.mv_addr);
>>   
>>   	hantro_reg_write(ctx->dev, &vp9_last_sign_bias,
>>   			 dec_params->ref_frame_sign_bias & V4L2_VP9_SIGN_BIAS_LAST ? 1 : 0);
>> diff --git a/drivers/media/platform/verisilicon/hantro_hevc.c b/drivers/media/platform/verisilicon/hantro_hevc.c
>> index 83cd12b0ddd6..272ce336b1c6 100644
>> --- a/drivers/media/platform/verisilicon/hantro_hevc.c
>> +++ b/drivers/media/platform/verisilicon/hantro_hevc.c
>> @@ -54,7 +54,24 @@ dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
>>   	return 0;
>>   }
>>   
>> -int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
>> +int hantro_hevc_get_ref_buf_index(struct hantro_ctx *ctx, s32 poc)
>> +{
>> +	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
>> +	int i;
>> +
>> +	/* Find the reference buffer in already known ones */
>> +	for (i = 0;  i < NUM_REF_PICTURES; i++) {
>> +		if (hevc_dec->ref_bufs_poc[i] == poc)
>> +			return hevc_dec->ref_vb2[i]->vb2_buf.index;
> I'm a little worried that there is no flag indicating if the entry was set or
> not. POC 0 is valid notably, do we initialize to an invalid value to prevent
> matching an unset entry or unused entry ?

I will add a check of hevc_dec->ref_bufs_used here.

>
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx,
>> +			    int poc,
>> +			    dma_addr_t addr,
>> +			    struct vb2_v4l2_buffer *vb2)
>>   {
>>   	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
>>   	int i;
>> @@ -65,6 +82,7 @@ int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr)
>>   			hevc_dec->ref_bufs_used |= 1 << i;
>>   			hevc_dec->ref_bufs_poc[i] = poc;
>>   			hevc_dec->ref_bufs[i].dma = addr;
>> +			hevc_dec->ref_vb2[i] = vb2;
>>   			return 0;
>>   		}
>>   	}
>> diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
>> index f0e4bca4b2b2..6a1ee9899b60 100644
>> --- a/drivers/media/platform/verisilicon/hantro_hw.h
>> +++ b/drivers/media/platform/verisilicon/hantro_hw.h
>> @@ -162,6 +162,7 @@ struct hantro_hevc_dec_hw_ctx {
>>   	struct hantro_aux_buf scaling_lists;
>>   	s32 ref_bufs_poc[NUM_REF_PICTURES];
>>   	u32 ref_bufs_used;
>> +	struct vb2_v4l2_buffer *ref_vb2[NUM_REF_PICTURES];
>>   	struct hantro_hevc_dec_ctrls ctrls;
>>   	unsigned int num_tile_cols_allocated;
>>   	bool use_compression;
>> @@ -457,7 +458,10 @@ int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx);
>>   int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
>>   void hantro_hevc_ref_init(struct hantro_ctx *ctx);
>>   dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
>> -int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
>> +int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc,
>> +			    dma_addr_t addr,
>> +			    struct vb2_v4l2_buffer *vb2);
>> +int hantro_hevc_get_ref_buf_index(struct hantro_ctx *ctx, s32 poc);
>>   
>>   int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx);
>>   void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx);
>> @@ -469,100 +473,7 @@ static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
>>   	return (dimension + 63) / 64;
>>   }
>>   
>> -static inline size_t
>> -hantro_vp9_mv_size(unsigned int width, unsigned int height)
>> -{
>> -	int num_ctbs;
>> -
>> -	/*
>> -	 * There can be up to (CTBs x 64) number of blocks,
>> -	 * and the motion vector for each block needs 16 bytes.
>> -	 */
>> -	num_ctbs = hantro_vp9_num_sbs(width) * hantro_vp9_num_sbs(height);
>> -	return (num_ctbs * 64) * 16;
>> -}
>> -
>> -static inline size_t
>> -hantro_h264_mv_size(unsigned int width, unsigned int height)
>> -{
>> -	/*
>> -	 * A decoded 8-bit 4:2:0 NV12 frame may need memory for up to
>> -	 * 448 bytes per macroblock with additional 32 bytes on
>> -	 * multi-core variants.
>> -	 *
>> -	 * The H264 decoder needs extra space on the output buffers
>> -	 * to store motion vectors. This is needed for reference
>> -	 * frames and only if the format is non-post-processed NV12.
>> -	 *
>> -	 * Memory layout is as follow:
>> -	 *
>> -	 * +---------------------------+
>> -	 * | Y-plane   256 bytes x MBs |
>> -	 * +---------------------------+
>> -	 * | UV-plane  128 bytes x MBs |
>> -	 * +---------------------------+
>> -	 * | MV buffer  64 bytes x MBs |
>> -	 * +---------------------------+
>> -	 * | MC sync          32 bytes |
>> -	 * +---------------------------+
>> -	 */
>> -	return 64 * MB_WIDTH(width) * MB_WIDTH(height) + 32;
>> -}
>> -
>> -static inline size_t
>> -hantro_hevc_mv_size(unsigned int width, unsigned int height)
>> -{
>> -	/*
>> -	 * A CTB can be 64x64, 32x32 or 16x16.
>> -	 * Allocated memory for the "worse" case: 16x16
>> -	 */
>> -	return width * height / 16;
>> -}
>> -
>> -static inline size_t
>> -hantro_hevc_luma_compressed_size(unsigned int width, unsigned int height)
>> -{
>> -	u32 pic_width_in_cbsy =
>> -		round_up((width + CBS_LUMA - 1) / CBS_LUMA, CBS_SIZE);
>> -	u32 pic_height_in_cbsy = (height + CBS_LUMA - 1) / CBS_LUMA;
>> -
>> -	return round_up(pic_width_in_cbsy * pic_height_in_cbsy, CBS_SIZE);
>> -}
>> -
>> -static inline size_t
>> -hantro_hevc_chroma_compressed_size(unsigned int width, unsigned int height)
>> -{
>> -	u32 pic_width_in_cbsc =
>> -		round_up((width + CBS_CHROMA_W - 1) / CBS_CHROMA_W, CBS_SIZE);
>> -	u32 pic_height_in_cbsc = (height / 2 + CBS_CHROMA_H - 1) / CBS_CHROMA_H;
>> -
>> -	return round_up(pic_width_in_cbsc * pic_height_in_cbsc, CBS_SIZE);
>> -}
>> -
>> -static inline size_t
>> -hantro_hevc_compressed_size(unsigned int width, unsigned int height)
>> -{
>> -	return hantro_hevc_luma_compressed_size(width, height) +
>> -	       hantro_hevc_chroma_compressed_size(width, height);
>> -}
>> -
>> -static inline unsigned short hantro_av1_num_sbs(unsigned short dimension)
>> -{
>> -	return DIV_ROUND_UP(dimension, 64);
>> -}
>> -
>> -static inline size_t
>> -hantro_av1_mv_size(unsigned int width, unsigned int height)
>> -{
>> -	size_t num_sbs = hantro_av1_num_sbs(width) * hantro_av1_num_sbs(height);
>> -
>> -	return ALIGN(num_sbs * 384, 16) * 2 + 512;
>> -}
>> -
>>   size_t hantro_g2_chroma_offset(struct hantro_ctx *ctx);
>> -size_t hantro_g2_motion_vectors_offset(struct hantro_ctx *ctx);
>> -size_t hantro_g2_luma_compress_offset(struct hantro_ctx *ctx);
>> -size_t hantro_g2_chroma_compress_offset(struct hantro_ctx *ctx);
>>   
>>   int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
>>   int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx);
>> diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c
>> index e94d1ba5ef10..2409353c16e4 100644
>> --- a/drivers/media/platform/verisilicon/hantro_postproc.c
>> +++ b/drivers/media/platform/verisilicon/hantro_postproc.c
>> @@ -196,36 +196,11 @@ void hantro_postproc_free(struct hantro_ctx *ctx)
>>   	}
>>   }
>>   
>> -static unsigned int hantro_postproc_buffer_size(struct hantro_ctx *ctx)
>> -{
>> -	unsigned int buf_size;
>> -
>> -	buf_size = ctx->ref_fmt.plane_fmt[0].sizeimage;
>> -	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
>> -		buf_size += hantro_h264_mv_size(ctx->ref_fmt.width,
>> -						ctx->ref_fmt.height);
>> -	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
>> -		buf_size += hantro_vp9_mv_size(ctx->ref_fmt.width,
>> -					       ctx->ref_fmt.height);
>> -	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) {
>> -		buf_size += hantro_hevc_mv_size(ctx->ref_fmt.width,
>> -						ctx->ref_fmt.height);
>> -		if (ctx->hevc_dec.use_compression)
>> -			buf_size += hantro_hevc_compressed_size(ctx->ref_fmt.width,
>> -								ctx->ref_fmt.height);
>> -	}
>> -	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME)
>> -		buf_size += hantro_av1_mv_size(ctx->ref_fmt.width,
>> -					       ctx->ref_fmt.height);
>> -
>> -	return buf_size;
>> -}
>> -
>>   static int hantro_postproc_alloc(struct hantro_ctx *ctx, int index)
>>   {
>>   	struct hantro_dev *vpu = ctx->dev;
>>   	struct hantro_aux_buf *priv = &ctx->postproc.dec_q[index];
>> -	unsigned int buf_size = hantro_postproc_buffer_size(ctx);
>> +	unsigned int buf_size = ctx->ref_fmt.plane_fmt[0].sizeimage;
>>   
>>   	if (!buf_size)
>>   		return -EINVAL;
>> @@ -267,7 +242,7 @@ dma_addr_t
>>   hantro_postproc_get_dec_buf_addr(struct hantro_ctx *ctx, int index)
>>   {
>>   	struct hantro_aux_buf *priv = &ctx->postproc.dec_q[index];
>> -	unsigned int buf_size = hantro_postproc_buffer_size(ctx);
>> +	unsigned int buf_size = ctx->ref_fmt.plane_fmt[0].sizeimage;
>>   	struct hantro_dev *vpu = ctx->dev;
>>   	int ret;
>>   
>> diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.c b/drivers/media/platform/verisilicon/hantro_v4l2.c
>> index fcf3bd9bcda2..f8d4dd518368 100644
>> --- a/drivers/media/platform/verisilicon/hantro_v4l2.c
>> +++ b/drivers/media/platform/verisilicon/hantro_v4l2.c
>> @@ -36,6 +36,9 @@ static int hantro_set_fmt_out(struct hantro_ctx *ctx,
>>   static int hantro_set_fmt_cap(struct hantro_ctx *ctx,
>>   			      struct v4l2_pix_format_mplane *pix_mp);
>>   
>> +static void hantro_mv_free(struct hantro_ctx *ctx);
>> +static void hantro_rfc_free(struct hantro_ctx *ctx);
>> +
>>   static const struct hantro_fmt *
>>   hantro_get_formats(const struct hantro_ctx *ctx, unsigned int *num_fmts, bool need_postproc)
>>   {
>> @@ -362,26 +365,6 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx,
>>   		/* Fill remaining fields */
>>   		v4l2_fill_pixfmt_mp(pix_mp, fmt->fourcc, pix_mp->width,
>>   				    pix_mp->height);
>> -		if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE &&
>> -		    !hantro_needs_postproc(ctx, fmt))
>> -			pix_mp->plane_fmt[0].sizeimage +=
>> -				hantro_h264_mv_size(pix_mp->width,
>> -						    pix_mp->height);
>> -		else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME &&
>> -			 !hantro_needs_postproc(ctx, fmt))
>> -			pix_mp->plane_fmt[0].sizeimage +=
>> -				hantro_vp9_mv_size(pix_mp->width,
>> -						   pix_mp->height);
>> -		else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE &&
>> -			 !hantro_needs_postproc(ctx, fmt))
>> -			pix_mp->plane_fmt[0].sizeimage +=
>> -				hantro_hevc_mv_size(pix_mp->width,
>> -						    pix_mp->height);
>> -		else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME &&
>> -			 !hantro_needs_postproc(ctx, fmt))
>> -			pix_mp->plane_fmt[0].sizeimage +=
>> -				hantro_av1_mv_size(pix_mp->width,
>> -						   pix_mp->height);
>>   	} else if (!pix_mp->plane_fmt[0].sizeimage) {
>>   		/*
>>   		 * For coded formats the application can specify
>> @@ -984,6 +967,9 @@ static void hantro_stop_streaming(struct vb2_queue *q)
>>   			ctx->codec_ops->exit(ctx);
>>   	}
>>   
>> +	hantro_mv_free(ctx);
>> +	hantro_rfc_free(ctx);
>> +
>>   	/*
>>   	 * The mem2mem framework calls v4l2_m2m_cancel_job before
>>   	 * .stop_streaming, so there isn't any job running and
>> @@ -1025,3 +1011,291 @@ const struct vb2_ops hantro_queue_ops = {
>>   	.start_streaming = hantro_start_streaming,
>>   	.stop_streaming = hantro_stop_streaming,
>>   };
>> +
>> +static inline size_t
>> +hantro_vp9_mv_size(unsigned int width, unsigned int height)
> I don't like much that we are adding more codec specific function in
> hantro_v4l2.c. Can we move these into codec specific headers (since this is
> inline), just to keep things separate.

I will do that and maybe more clean up in an additional patch.

>
>> +{
>> +	int num_ctbs;
>> +
>> +	/*
>> +	 * There can be up to (CTBs x 64) number of blocks,
>> +	 * and the motion vector for each block needs 16 bytes.
>> +	 */
>> +	num_ctbs = hantro_vp9_num_sbs(width) * hantro_vp9_num_sbs(height);
>> +	return (num_ctbs * 64) * 16;
>> +}
>> +
>> +static inline size_t
>> +hantro_h264_mv_size(unsigned int width, unsigned int height)
>> +{
>> +	/*
>> +	 * A decoded 8-bit 4:2:0 NV12 frame may need memory for up to
>> +	 * 448 bytes per macroblock with additional 32 bytes on
>> +	 * multi-core variants.
>> +	 *
>> +	 * The H264 decoder needs extra space on the output buffers
>> +	 * to store motion vectors. This is needed for reference
>> +	 * frames and only if the format is non-post-processed NV12.
>> +	 *
>> +	 * Memory layout is as follow:
>> +	 *
>> +	 * +---------------------------+
>> +	 * | Y-plane   256 bytes x MBs |
>> +	 * +---------------------------+
>> +	 * | UV-plane  128 bytes x MBs |
>> +	 * +---------------------------+
>> +	 * | MV buffer  64 bytes x MBs |
>> +	 * +---------------------------+
>> +	 * | MC sync          32 bytes |
>> +	 * +---------------------------+
>> +	 */
>> +	return 64 * MB_WIDTH(width) * MB_WIDTH(height) + 32;
>> +}
>> +
>> +static inline size_t
>> +hantro_hevc_mv_size(unsigned int width, unsigned int height, int depth)
>> +{
>> +	/*
>> +	 * A CTB can be 64x64, 32x32 or 16x16.
>> +	 * Allocated memory for the "worse" case: 16x16
>> +	 */
>> +	return DIV_ROUND_UP(width * height * depth / 8, 128);
>> +}
>> +
>> +static inline unsigned short hantro_av1_num_sbs(unsigned short dimension)
>> +{
>> +	return DIV_ROUND_UP(dimension, 64);
>> +}
>> +
>> +static inline size_t
>> +hantro_av1_mv_size(unsigned int width, unsigned int height)
>> +{
>> +	size_t num_sbs = hantro_av1_num_sbs(width) * hantro_av1_num_sbs(height);
>> +
>> +	return ALIGN(num_sbs * 384, 16) * 2 + 512;
>> +}
>> +
>> +static void hantro_mv_free(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int i;
>> +
>> +	for (i = 0; i < MAX_MV_BUFFERS; i++) {
>> +		struct hantro_aux_buf *mv = &ctx->dec_mv[i];
>> +
>> +		if (!mv->cpu)
>> +			continue;
>> +
>> +		dma_free_attrs(vpu->dev, mv->size, mv->cpu,
>> +			       mv->dma, mv->attrs);
>> +		mv->cpu = NULL;
>> +	}
>> +}
>> +
>> +static unsigned int hantro_mv_buffer_size(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int fourcc = ctx->vpu_src_fmt->fourcc;
>> +	int width = ctx->ref_fmt.width;
>> +	int height = ctx->ref_fmt.height;
>> +
>> +	switch (fourcc) {
>> +	case V4L2_PIX_FMT_H264_SLICE:
>> +		return hantro_h264_mv_size(width, height);
>> +	case V4L2_PIX_FMT_VP9_FRAME:
>> +		return hantro_vp9_mv_size(width, height);
>> +	case V4L2_PIX_FMT_HEVC_SLICE:
>> +		return hantro_hevc_mv_size(width, height, ctx->bit_depth);
>> +	case V4L2_PIX_FMT_AV1_FRAME:
>> +		return hantro_av1_mv_size(width, height);
>> +	}
>> +
>> +	/* Should not happen */
>> +	dev_warn(vpu->dev, "Invalid motion vectors size\n");
>> +	return 0;
>> +}
>> +
>> +static int hantro_mv_buffer_alloc(struct hantro_ctx *ctx, int index)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_aux_buf *mv = &ctx->dec_mv[index];
>> +	unsigned int buf_size = hantro_mv_buffer_size(ctx);
>> +
>> +	if (!buf_size)
>> +		return -EINVAL;
>> +
>> +	/*
>> +	 * Motion vectors buffers are only read and write by the
>> +	 * hardware so no mapping is needed.
>> +	 */
>> +	mv->attrs = DMA_ATTR_NO_KERNEL_MAPPING;
>> +	mv->cpu = dma_alloc_attrs(vpu->dev, buf_size, &mv->dma,
>> +				  GFP_KERNEL, mv->attrs);
>> +	if (!mv->cpu)
>> +		return -ENOMEM;
>> +	mv->size = buf_size;
>> +
>> +	return 0;
>> +}
>> +
>> +dma_addr_t
>> +hantro_mv_get_buf_addr(struct hantro_ctx *ctx, int index)
>> +{
>> +	struct hantro_aux_buf *mv = &ctx->dec_mv[index];
>> +	unsigned int buf_size = hantro_mv_buffer_size(ctx);
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int ret;
>> +
>> +	if (mv->size < buf_size && mv->cpu) {
>> +		/* buffer is too small, release it */
>> +		dma_free_attrs(vpu->dev, mv->size, mv->cpu,
>> +			       mv->dma, mv->attrs);
>> +		mv->cpu = NULL;
>> +	}
>> +
>> +	if (!mv->cpu) {
>> +		/* buffer not already allocated, try getting a new one */
>> +		ret = hantro_mv_buffer_alloc(ctx, index);
>> +		if (ret)
>> +			return 0;
>> +	}
>> +
>> +	if (!mv->cpu)
>> +		return 0;
>> +
>> +	return mv->dma;
>> +}
>> +
>> +static inline size_t
>> +hantro_hevc_luma_compressed_size(unsigned int width, unsigned int height)
>> +{
>> +	u32 pic_width_in_cbsy =
>> +		round_up((width + CBS_LUMA - 1) / CBS_LUMA, CBS_SIZE);
>> +	u32 pic_height_in_cbsy = (height + CBS_LUMA - 1) / CBS_LUMA;
>> +
>> +	return round_up(pic_width_in_cbsy * pic_height_in_cbsy, CBS_SIZE);
>> +}
>> +
>> +static inline size_t
>> +hantro_hevc_chroma_compressed_size(unsigned int width, unsigned int height)
>> +{
>> +	u32 pic_width_in_cbsc =
>> +		round_up((width + CBS_CHROMA_W - 1) / CBS_CHROMA_W, CBS_SIZE);
>> +	u32 pic_height_in_cbsc = (height / 2 + CBS_CHROMA_H - 1) / CBS_CHROMA_H;
>> +
>> +	return round_up(pic_width_in_cbsc * pic_height_in_cbsc, CBS_SIZE);
>> +}
>> +
>> +static inline size_t
>> +hantro_hevc_compressed_size(unsigned int width, unsigned int height)
>> +{
>> +	return hantro_hevc_luma_compressed_size(width, height) +
>> +	       hantro_hevc_chroma_compressed_size(width, height);
>> +}
>> +
>> +static void hantro_rfc_free(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int i;
>> +
>> +	for (i = 0; i < MAX_MV_BUFFERS; i++) {
>> +		struct hantro_aux_buf *rfc = &ctx->dec_rfc[i];
>> +
>> +		if (!rfc->cpu)
>> +			continue;
>> +
>> +		dma_free_attrs(vpu->dev, rfc->size, rfc->cpu,
>> +			       rfc->dma, rfc->attrs);
>> +		rfc->cpu = NULL;
>> +	}
>> +}
>> +
>> +static unsigned int hantro_rfc_buffer_size(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int fourcc = ctx->vpu_src_fmt->fourcc;
>> +	int width = ctx->ref_fmt.width;
>> +	int height = ctx->ref_fmt.height;
>> +
>> +	switch (fourcc) {
>> +	case V4L2_PIX_FMT_HEVC_SLICE:
>> +		return hantro_hevc_compressed_size(width, height);
>> +	}
>> +
>> +	/* Should not happen */
>> +	dev_warn(vpu->dev, "Invalid rfc size\n");
>> +	return 0;
>> +}
>> +
>> +static int hantro_rfc_buffer_alloc(struct hantro_ctx *ctx, int index)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct hantro_aux_buf *rfc = &ctx->dec_rfc[index];
>> +	unsigned int buf_size = hantro_rfc_buffer_size(ctx);
>> +
>> +	if (!buf_size)
>> +		return -EINVAL;
>> +
>> +	/*
>> +	 * RFC buffers are only read and write by the
>> +	 * hardware so no mapping is needed.
>> +	 */
>> +	rfc->attrs = DMA_ATTR_NO_KERNEL_MAPPING;
>> +	rfc->cpu = dma_alloc_attrs(vpu->dev, buf_size, &rfc->dma,
>> +				   GFP_KERNEL, rfc->attrs);
>> +	if (!rfc->cpu)
>> +		return -ENOMEM;
>> +	rfc->size = buf_size;
>> +
>> +	return 0;
>> +}
>> +
>> +dma_addr_t
>> +hantro_rfc_get_luma_buf_addr(struct hantro_ctx *ctx, int index)
>> +{
>> +	struct hantro_aux_buf *rfc = &ctx->dec_rfc[index];
>> +	unsigned int buf_size = hantro_rfc_buffer_size(ctx);
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int ret;
>> +
>> +	if (rfc->size < buf_size && rfc->cpu) {
>> +		/* buffer is too small, release it */
>> +		dma_free_attrs(vpu->dev, rfc->size, rfc->cpu,
>> +			       rfc->dma, rfc->attrs);
>> +		rfc->cpu = NULL;
>> +	}
>> +
>> +	if (!rfc->cpu) {
>> +		/* buffer not already allocated, try getting a new one */
>> +		ret = hantro_rfc_buffer_alloc(ctx, index);
>> +		if (ret)
>> +			return 0;
>> +	}
>> +
>> +	if (!rfc->cpu)
>> +		return 0;
>> +
>> +	return rfc->dma;
>> +}
>> +
>> +dma_addr_t
>> +hantro_rfc_get_chroma_buf_addr(struct hantro_ctx *ctx, int index)
>> +{
>> +	dma_addr_t luma_addr = hantro_rfc_get_luma_buf_addr(ctx, index);
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	int fourcc = ctx->vpu_src_fmt->fourcc;
>> +	int width = ctx->ref_fmt.width;
>> +	int height = ctx->ref_fmt.height;
>> +
>> +	if (!luma_addr)
>> +		return -EINVAL;
>> +
>> +	switch (fourcc) {
>> +	case V4L2_PIX_FMT_HEVC_SLICE:
>> +		return luma_addr + hantro_hevc_luma_compressed_size(width, height);
>> +	}
>> +
>> +	/* Should not happen */
>> +	dev_warn(vpu->dev, "Invalid rfc chroma address\n");
>> +	return 0;
>> +}
>> diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>> index c1ada14df4c3..21da8ddfc4b3 100644
>> --- a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>> +++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
>> @@ -62,7 +62,7 @@ rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
>>   	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
>>   	struct hantro_dev *vpu = ctx->dev;
>>   	struct hantro_decoded_buffer *dst;
>> -	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>> +	dma_addr_t luma_addr, chroma_addr = 0;
>>   	int cur_width = frame->frame_width_minus_1 + 1;
>>   	int cur_height = frame->frame_height_minus_1 + 1;
>>   	int scale_width =
>> @@ -120,11 +120,10 @@ rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
>>   	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
>>   	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>>   	chroma_addr = luma_addr + dst->av1.chroma_offset;
>> -	mv_addr = luma_addr + dst->av1.mv_offset;
>>   
>>   	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
>>   	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
>> -	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
>> +	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), dst->av1.mv_addr);
>>   
>>   	return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
>>   		(scale_height != (1 << AV1_REF_SCALE_SHIFT));
>> @@ -180,11 +179,10 @@ static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
>>   		if (idx >= 0) {
>>   			dma_addr_t luma_addr, mv_addr = 0;
>>   			struct hantro_decoded_buffer *seg;
>> -			size_t mv_offset = hantro_av1_chroma_size(ctx);
>>   
>>   			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
>>   			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
>> -			mv_addr = luma_addr + mv_offset;
>> +			mv_addr = hantro_mv_get_buf_addr(ctx, seg->base.vb.vb2_buf.index);
>>   
>>   			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
>>   			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
>> @@ -1350,22 +1348,20 @@ rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
>>   	struct hantro_dev *vpu = ctx->dev;
>>   	struct hantro_decoded_buffer *dst;
>>   	struct vb2_v4l2_buffer *vb2_dst;
>> -	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
>> +	dma_addr_t luma_addr, chroma_addr = 0;
>>   	size_t cr_offset = hantro_av1_luma_size(ctx);
>> -	size_t mv_offset = hantro_av1_chroma_size(ctx);
>>   
>>   	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
>>   	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
>>   	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>>   	chroma_addr = luma_addr + cr_offset;
>> -	mv_addr = luma_addr + mv_offset;
>>   
>>   	dst->av1.chroma_offset = cr_offset;
>> -	dst->av1.mv_offset = mv_offset;
>> +	dst->av1.mv_addr = hantro_mv_get_buf_addr(ctx, dst->base.vb.vb2_buf.index);
>>   
>>   	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
>>   	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
>> -	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
>> +	hantro_write_addr(vpu, AV1_TILE_OUT_MV, dst->av1.mv_addr);
>>   }
>>   
>>   int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
> I like the direction this is going, as it removes a lot of stride/offset open
> calculation, which has been source of problem, and it also reduce the memory
> allocation overhead. My main worry is that we don't tighly manages the entries
> based on the DPB (references). So even if a reference have gone away, we don't
> explicitly reset the entry and prevent them from being used. I'd like to see
> that improved.

Sure but I don't want to mix everything is this patch.
This need to be solve per codec.

Regards,
Benjamin

>
> Nicolas


^ permalink raw reply

* [PATCH v2 3/3] ASoC: fsl: imx-rpmsg: Switch to core ignore-suspend-widgets support
From: Chancel Liu @ 2026-04-15  8:19 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, shengjiu.wang, Xiubo.Lee,
	festevam, nicoleotsuka, Frank.Li, s.hauer, kernel, shumingf,
	rander.wang, pierre-louis.bossart, linux-sound, linux-kernel,
	linuxppc-dev, imx, linux-arm-kernel
In-Reply-To: <20260415081942.4183108-1-chancel.liu@nxp.com>

The imx-rpmsg machine driver currently implements its own logic to
parse ignore-suspend-widgets from Device Tree and manually traverse
DAPM widgets to mark them as ignore_suspend.

It also has a potential issue that some widgets listed in the property
(e.g. "Headphone Jack") belong to card or CPU DAI DAPM context.

Switch to use snd_soc_of_parse_ignore_suspend_widgets() with the
introduction of a generic ignore-suspend-widgets mechanism in the ASoC
core.

Signed-off-by: Chancel Liu <chancel.liu@nxp.com>
---
 sound/soc/fsl/imx-rpmsg.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/sound/soc/fsl/imx-rpmsg.c b/sound/soc/fsl/imx-rpmsg.c
index 40e0043cfe15..15b0733f1524 100644
--- a/sound/soc/fsl/imx-rpmsg.c
+++ b/sound/soc/fsl/imx-rpmsg.c
@@ -87,7 +87,6 @@ static int imx_rpmsg_late_probe(struct snd_soc_card *card)
 	int ret;
 
 	if (data->lpa) {
-		struct snd_soc_component *codec_comp;
 		struct device_node *codec_np;
 		struct device_driver *codec_drv;
 		struct device *codec_dev = NULL;
@@ -107,22 +106,6 @@ static int imx_rpmsg_late_probe(struct snd_soc_card *card)
 			}
 		}
 		if (codec_dev) {
-			codec_comp = snd_soc_lookup_component_nolocked(codec_dev, NULL);
-			if (codec_comp) {
-				int i, num_widgets;
-				const char *widgets;
-				struct snd_soc_dapm_context *dapm;
-
-				num_widgets = of_property_count_strings(data->card.dev->of_node,
-									"ignore-suspend-widgets");
-				for (i = 0; i < num_widgets; i++) {
-					of_property_read_string_index(data->card.dev->of_node,
-								      "ignore-suspend-widgets",
-								      i, &widgets);
-					dapm = snd_soc_component_to_dapm(codec_comp);
-					snd_soc_dapm_ignore_suspend(dapm, widgets);
-				}
-			}
 			codec_drv = codec_dev->driver;
 			if (codec_drv->pm) {
 				memcpy(&lpa_pm, codec_drv->pm, sizeof(lpa_pm));
@@ -274,6 +257,15 @@ static int imx_rpmsg_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (data->lpa && of_property_present(np, "ignore-suspend-widgets")) {
+		ret = snd_soc_of_parse_ignore_suspend_widgets(&data->card,
+							      "ignore-suspend-widgets");
+		if (ret) {
+			dev_err(&pdev->dev, "failed to parse ignore-suspend-widgets: %d\n", ret);
+			return ret;
+		}
+	}
+
 	platform_set_drvdata(pdev, &data->card);
 	snd_soc_card_set_drvdata(&data->card, data);
 	ret = devm_snd_soc_register_card(&pdev->dev, &data->card);
-- 
2.50.1



^ permalink raw reply related

* [PATCH v2 2/3] ASoC: soc-core: Add core support for ignoring suspend on selected DAPM widgets
From: Chancel Liu @ 2026-04-15  8:19 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, shengjiu.wang, Xiubo.Lee,
	festevam, nicoleotsuka, Frank.Li, s.hauer, kernel, shumingf,
	rander.wang, pierre-louis.bossart, linux-sound, linux-kernel,
	linuxppc-dev, imx, linux-arm-kernel
In-Reply-To: <20260415081942.4183108-1-chancel.liu@nxp.com>

Some audio systems require specific DAPM widgets to remain powered
during system suspend. Introduce a generic and reusable mechanism in
the ASoC core to mark selected DAPM widgets as ignore_suspend.

The unified mechanism consists of two parts:
1. Parse and store the name list of widgets to ignore suspend in
struct snd_soc_card

The list of widgets can be provided either by the machine driver or
parsed from Device Tree. Different machines have different routing and
power requirements. Each machine can specify its own widgets to ignore
suspend through DT property. It enables flexible policy without hard
code. A new helper, snd_soc_of_parse_ignore_suspend_widgets() is added
for this purpose.

2. Apply ignore_suspend flags during snd_soc_bind_card()

After all components have been probed and all DAPM widgets have been
registered, snd_soc_bind_card() performs a unified lookup of the
configured widget names across all DAPM contexts of the card and marks
the matching widgets with ignore_suspend = 1.

Signed-off-by: Chancel Liu <chancel.liu@nxp.com>
---
 include/sound/soc-dapm.h |  1 +
 include/sound/soc.h      |  5 +++++
 sound/soc/soc-core.c     | 43 ++++++++++++++++++++++++++++++++++++++++
 sound/soc/soc-dapm.c     | 30 ++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+)

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 4f8fb7622a13..39c290e0eb7f 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -636,6 +636,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, struct snd_s
 void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w);
 int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card);
 void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card);
+int snd_soc_dapm_ignore_suspend_widgets(struct snd_soc_card *card);
 
 int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream,
 			    struct snd_pcm_hw_params *params, struct snd_soc_dai *dai);
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 5e3eb617d832..7d6fa79f48e3 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1057,10 +1057,14 @@ struct snd_soc_card {
 	int num_dapm_widgets;
 	const struct snd_soc_dapm_route *dapm_routes;
 	int num_dapm_routes;
+	const char **ignore_suspend_widgets;
+	int num_ignore_suspend_widgets;
 	const struct snd_soc_dapm_widget *of_dapm_widgets;
 	int num_of_dapm_widgets;
 	const struct snd_soc_dapm_route *of_dapm_routes;
 	int num_of_dapm_routes;
+	const char **of_ignore_suspend_widgets;
+	int num_of_ignore_suspend_widgets;
 
 	/* lists of probed devices belonging to this card */
 	struct list_head component_dev_list;
@@ -1339,6 +1343,7 @@ void snd_soc_of_parse_node_prefix(struct device_node *np,
 int snd_soc_of_parse_audio_routing(struct snd_soc_card *card,
 				   const char *propname);
 int snd_soc_of_parse_aux_devs(struct snd_soc_card *card, const char *propname);
+int snd_soc_of_parse_ignore_suspend_widgets(struct snd_soc_card *card, const char *propname);
 
 unsigned int snd_soc_daifmt_clock_provider_flipped(unsigned int dai_fmt);
 unsigned int snd_soc_daifmt_clock_provider_from_bitmap(unsigned int bit_frame);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 3fecf9fc903c..705181dae472 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2289,6 +2289,10 @@ static int snd_soc_bind_card(struct snd_soc_card *card)
 	if (ret < 0)
 		goto probe_end;
 
+	ret = snd_soc_dapm_ignore_suspend_widgets(card);
+	if (ret < 0)
+		goto probe_end;
+
 	snd_soc_dapm_new_widgets(card);
 	snd_soc_card_fixup_controls(card);
 
@@ -3294,6 +3298,45 @@ int snd_soc_of_parse_aux_devs(struct snd_soc_card *card, const char *propname)
 }
 EXPORT_SYMBOL_GPL(snd_soc_of_parse_aux_devs);
 
+int snd_soc_of_parse_ignore_suspend_widgets(struct snd_soc_card *card,
+					    const char *propname)
+{
+	struct device_node *np = card->dev->of_node;
+	int num_widgets;
+	const char **widgets;
+	int i;
+
+	num_widgets = of_property_count_strings(np, propname);
+	if (num_widgets < 0) {
+		dev_err(card->dev,
+			"ASoC: Property '%s' does not exist\n", propname);
+		return -EINVAL;
+	}
+
+	widgets = devm_kcalloc(card->dev, num_widgets, sizeof(char *), GFP_KERNEL);
+	if (!widgets)
+		return -ENOMEM;
+
+	for (i = 0; i < num_widgets; i++) {
+		const char *name;
+		int ret = of_property_read_string_index(np, propname, i, &name);
+
+		if (ret) {
+			dev_err(card->dev,
+				"ASoC: Property '%s' could not be read: %d\n",
+				propname, ret);
+			return -EINVAL;
+		}
+		widgets[i] = name;
+	}
+
+	card->num_of_ignore_suspend_widgets = num_widgets;
+	card->of_ignore_suspend_widgets = widgets;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_soc_of_parse_ignore_suspend_widgets);
+
 unsigned int snd_soc_daifmt_clock_provider_flipped(unsigned int dai_fmt)
 {
 	unsigned int inv_dai_fmt = dai_fmt & ~SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK;
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index c5b80d9ed64b..209f86b9add6 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -4595,6 +4595,36 @@ void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card)
 	}
 }
 
+int snd_soc_dapm_ignore_suspend_widgets(struct snd_soc_card *card)
+{
+	struct snd_soc_dapm_widget *w;
+	int i;
+
+	for (i = 0; i < card->num_ignore_suspend_widgets; i++) {
+		w = dapm_find_widget(snd_soc_card_to_dapm(card),
+				     card->ignore_suspend_widgets[i], true);
+		if (!w) {
+			dev_err(card->dev, "ASoC: DAPM unknown ignore suspend widget %s\n",
+				card->ignore_suspend_widgets[i]);
+			return -EINVAL;
+		}
+		w->ignore_suspend = 1;
+	}
+
+	for (i = 0; i < card->num_of_ignore_suspend_widgets; i++) {
+		w = dapm_find_widget(snd_soc_card_to_dapm(card),
+				     card->of_ignore_suspend_widgets[i], true);
+		if (!w) {
+			dev_err(card->dev, "ASoC: DAPM unknown ignore suspend widget %s\n",
+				card->of_ignore_suspend_widgets[i]);
+			return -EINVAL;
+		}
+		w->ignore_suspend = 1;
+	}
+
+	return 0;
+}
+
 static void dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream, int event)
 {
 	struct snd_soc_dai *dai;
-- 
2.50.1



^ permalink raw reply related

* [PATCH v2 1/3] ASoC: dapm: Fix widget lookup with prefixed names across DAPM contexts
From: Chancel Liu @ 2026-04-15  8:19 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, shengjiu.wang, Xiubo.Lee,
	festevam, nicoleotsuka, Frank.Li, s.hauer, kernel, shumingf,
	rander.wang, pierre-louis.bossart, linux-sound, linux-kernel,
	linuxppc-dev, imx, linux-arm-kernel
In-Reply-To: <20260415081942.4183108-1-chancel.liu@nxp.com>

Currently dapm_find_widget() manually constructs a prefixed widget name
based on the provided DAPM context and compares it using strcmp(). This
happens to work in most cases because callers usually know which DAPM
context the target widget belongs to and pass in the matching DAPM
context.

However, this assumption breaks when search_other_contexts is enabled.
In such cases, callers may intentionally pass a different DAPM context,
while searching for a widget that actually belongs to another DAPM
context.

For example, when searching for a "DAC" widget, the widget belongs to
the codec DAPM and be registered with a codec prefix, while the caller
passes card->dapm and intends to search across all DAPM contexts. The
current implementation incorrectly applies the caller card DAPM causing
the lookup to fail even though the widget exists on the card.

Use snd_soc_dapm_widget_name_cmp() instead, which compares widget names
using the widget's own DAPM context and prefix. It fixes widget lookup
failures when searching across different DAPM contexts on the card.

Fixes: ae4fc532244b ("ASoC: dapm: use component prefix when checking widget names")
Signed-off-by: Chancel Liu <chancel.liu@nxp.com>
Assisted-by: Cody:Claude-3.5-Sonnet
---
 sound/soc/soc-dapm.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index d6192204e613..c5b80d9ed64b 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2906,20 +2906,9 @@ static struct snd_soc_dapm_widget *dapm_find_widget(
 {
 	struct snd_soc_dapm_widget *w;
 	struct snd_soc_dapm_widget *fallback = NULL;
-	char prefixed_pin[80];
-	const char *pin_name;
-	const char *prefix = dapm_prefix(dapm);
-
-	if (prefix) {
-		snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s",
-			 prefix, pin);
-		pin_name = prefixed_pin;
-	} else {
-		pin_name = pin;
-	}
 
 	for_each_card_widgets(dapm->card, w) {
-		if (!strcmp(w->name, pin_name)) {
+		if (!snd_soc_dapm_widget_name_cmp(w, pin)) {
 			if (w->dapm == dapm)
 				return w;
 			else
-- 
2.50.1



^ permalink raw reply related

* [PATCH v2 0/3] ASoC: soc-core: Add core support for ignoring suspend on selected DAPM widgets
From: Chancel Liu @ 2026-04-15  8:19 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, shengjiu.wang, Xiubo.Lee,
	festevam, nicoleotsuka, Frank.Li, s.hauer, kernel, shumingf,
	rander.wang, pierre-louis.bossart, linux-sound, linux-kernel,
	linuxppc-dev, imx, linux-arm-kernel

Some audio systems require specific DAPM widgets to remain powered
during system suspend. Introduce a generic and reusable mechanism in
the ASoC core to mark selected DAPM widgets as ignore_suspend.

The unified mechanism consists of two parts:
1. Parse and store the name list of widgets to ignore suspend in
struct snd_soc_card

The list of widgets can be provided either by the machine driver or
parsed from Device Tree. Different machines have different routing and
power requirements. Each machine can specify its own widgets to ignore
suspend through DT property. It enables flexible policy without hard
code. A new helper, snd_soc_of_parse_ignore_suspend_widgets() is added
for this purpose.

2. Apply ignore_suspend flags during snd_soc_bind_card()

After all components have been probed and all DAPM widgets have been
registered, snd_soc_bind_card() performs a unified lookup of the
configured widget names across all DAPM contexts of the card and marks
the matching widgets with ignore_suspend = 1.

Switch to use core ignore-suspend-widgets support for imx-rpmsg driver.

This v2 series is a rework of the previous "[PATCH] ASoC: imx-rpmsg:
Fix ignore-suspend-widgets only applied to codec DAPM".
Changes in v2:
- Rework to use a unified core mechanism instead of machine driver
specific code

Chancel Liu (3):
  ASoC: dapm: Fix widget lookup with prefixed names across DAPM contexts
  ASoC: soc-core: Add core support for ignoring suspend on selected DAPM
    widgets
  ASoC: fsl: imx-rpmsg: Switch to core ignore-suspend-widgets support

 include/sound/soc-dapm.h  |  1 +
 include/sound/soc.h       |  5 +++++
 sound/soc/fsl/imx-rpmsg.c | 26 ++++++++---------------
 sound/soc/soc-core.c      | 43 +++++++++++++++++++++++++++++++++++++++
 sound/soc/soc-dapm.c      | 43 ++++++++++++++++++++++++++++-----------
 5 files changed, 89 insertions(+), 29 deletions(-)

--
2.50.1



^ permalink raw reply

* Re: [PATCH 4/5] media: dt-bindings: add NXP i.MX95 compatible string
From: Krzysztof Kozlowski @ 2026-04-15  8:10 UTC (permalink / raw)
  To: Guoniu Zhou
  Cc: Michael Riesch, Mauro Carvalho Chehab, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Heiko Stuebner,
	Laurent Pinchart, Frank Li, linux-media, linux-kernel, devicetree,
	imx, linux-arm-kernel, linux-rockchip
In-Reply-To: <20260415-csi2_imx95-v1-4-7d63f3508719@oss.nxp.com>

On Wed, Apr 15, 2026 at 11:46:55AM +0800, Guoniu Zhou wrote:
> The i.MX95 CSI-2 controller is nearly identical to i.MX93, with the
> only difference being the use of IDI (Image Data Interface) instead
> of IPI (Image Pixel Interface). The binding constraints are otherwise
> the same.

Nearly identical with some difference really, really suggests they are
compatible. Express compatibility or explain why they are not compatible
(difference between IDI and IPI unfortunately does not help me).

Best regards,
Krzysztof



^ permalink raw reply

* Re: [PATCH 1/8] arm64/hwcap: Generate the KERNEL_HWCAP_ definitions for the hwcaps
From: Catalin Marinas @ 2026-04-15  8:09 UTC (permalink / raw)
  To: Alexander Stein
  Cc: Will Deacon, Jonathan Corbet, Shuah Khan, linux-arm-kernel,
	linux-kernel, linux-doc, linux-kselftest, Mark Brown
In-Reply-To: <8745494.GXAFRqVoOG@steina-w>

On Wed, Apr 15, 2026 at 08:24:22AM +0200, Alexander Stein wrote:
> Am Montag, 2. März 2026, 23:53:16 CEST schrieb Mark Brown:
> > Currently for each hwcap we define both the HWCAPn_NAME definition which is
> > exposed to userspace and a kernel internal KERNEL_HWCAP_NAME definition
> > which we use internally. This is tedious and repetitive, instead use a
> > script to generate the KERNEL_HWCAP_ definitions from the UAPI definitions.
> > 
> > No functional changes intended.
> 
> Somehow this change causes to delete and generate kernel-hwcap.h on each
> make call. This results in compiling essentially everything each time.

Does this fix it:

https://lore.kernel.org/r/20260413-arm64-hwcap-gen-fix-v1-1-26c56aed6908@kernel.org

It's queued, it will go in before -rc1.

-- 
Catalin


^ permalink raw reply

* Re: [PATCH 1/4] dt-bindings: clock: marvell,pxa1908: Add #reset-cells
From: Krzysztof Kozlowski @ 2026-04-15  8:08 UTC (permalink / raw)
  To: Duje Mihanović
  Cc: Michael Turquette, Stephen Boyd, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Karel Balej, linux-arm-kernel, linux-clk,
	devicetree, linux-kernel, phone-devel, ~postmarketos/upstreaming,
	Duje Mihanović
In-Reply-To: <20260414-pxa1908-clk-reset-v1-1-94bae5f3a8cf@dujemihanovic.xyz>

On Tue, Apr 14, 2026 at 09:51:50PM +0200, Duje Mihanović wrote:
> From: Duje Mihanović <duje@dujemihanovic.xyz>
> 
> The APBC and APBCP controllers have reset lines exposed. Give them
> a #reset-cells so that they may be used as reset controllers.
> 
> Signed-off-by: Duje Mihanović <duje@dujemihanovic.xyz>
> ---
>  .../devicetree/bindings/clock/marvell,pxa1908.yaml | 34 +++++++++++++++-------
>  1 file changed, 24 insertions(+), 10 deletions(-)

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>

Best regards,
Krzysztof



^ permalink raw reply

* [soc:ti/dt-2] BUILD SUCCESS abe76f9f47d59ff80eb2fc59482aa76bbf6fd13a
From: kernel test robot @ 2026-04-15  8:08 UTC (permalink / raw)
  To: Vignesh Raghavendra; +Cc: linux-arm-kernel, arm

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git ti/dt-2
branch HEAD: abe76f9f47d59ff80eb2fc59482aa76bbf6fd13a  arm64: dts: ti: k3: Use memory-region-names for r5f

elapsed time: 765m

configs tested: 134
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha                             allnoconfig    gcc-15.2.0
alpha                            allyesconfig    gcc-15.2.0
arc                              allmodconfig    gcc-15.2.0
arc                               allnoconfig    gcc-15.2.0
arc                              allyesconfig    gcc-15.2.0
arc                   randconfig-001-20260415    gcc-13.4.0
arc                   randconfig-002-20260415    gcc-15.2.0
arm                               allnoconfig    clang-23
arm                              allyesconfig    gcc-15.2.0
arm                     davinci_all_defconfig    clang-19
arm                   randconfig-001-20260415    clang-23
arm                   randconfig-002-20260415    clang-23
arm                   randconfig-003-20260415    gcc-8.5.0
arm                   randconfig-004-20260415    clang-17
arm64                            allmodconfig    clang-19
arm64                             allnoconfig    gcc-15.2.0
arm64                 randconfig-001-20260415    gcc-15.2.0
arm64                 randconfig-002-20260415    gcc-14.3.0
arm64                 randconfig-003-20260415    gcc-13.4.0
arm64                 randconfig-004-20260415    clang-19
csky                             allmodconfig    gcc-15.2.0
csky                              allnoconfig    gcc-15.2.0
csky                  randconfig-001-20260415    gcc-9.5.0
csky                  randconfig-002-20260415    gcc-15.2.0
hexagon                          allmodconfig    clang-17
hexagon                           allnoconfig    clang-23
hexagon               randconfig-001-20260415    clang-23
hexagon               randconfig-002-20260415    clang-23
i386                             allmodconfig    gcc-14
i386                              allnoconfig    gcc-14
i386                             allyesconfig    gcc-14
i386        buildonly-randconfig-001-20260415    clang-20
i386        buildonly-randconfig-002-20260415    gcc-14
i386        buildonly-randconfig-003-20260415    clang-20
i386        buildonly-randconfig-004-20260415    gcc-14
i386        buildonly-randconfig-005-20260415    gcc-14
i386        buildonly-randconfig-006-20260415    clang-20
i386                  randconfig-011-20260415    gcc-14
i386                  randconfig-012-20260415    clang-20
i386                  randconfig-013-20260415    gcc-14
i386                  randconfig-014-20260415    gcc-14
i386                  randconfig-015-20260415    clang-20
i386                  randconfig-016-20260415    gcc-14
i386                  randconfig-017-20260415    gcc-14
loongarch                        allmodconfig    clang-19
loongarch                         allnoconfig    clang-23
loongarch                           defconfig    clang-19
loongarch             randconfig-001-20260415    clang-18
loongarch             randconfig-002-20260415    clang-23
m68k                             allmodconfig    gcc-15.2.0
m68k                              allnoconfig    gcc-15.2.0
m68k                             allyesconfig    gcc-15.2.0
m68k                                defconfig    gcc-15.2.0
microblaze                        allnoconfig    gcc-15.2.0
microblaze                       allyesconfig    gcc-15.2.0
microblaze                          defconfig    gcc-15.2.0
mips                             allmodconfig    gcc-15.2.0
mips                              allnoconfig    gcc-15.2.0
mips                             allyesconfig    gcc-15.2.0
nios2                            allmodconfig    gcc-11.5.0
nios2                             allnoconfig    gcc-11.5.0
nios2                               defconfig    gcc-11.5.0
nios2                 randconfig-001-20260415    gcc-10.5.0
nios2                 randconfig-002-20260415    gcc-11.5.0
openrisc                         allmodconfig    gcc-15.2.0
openrisc                          allnoconfig    gcc-15.2.0
openrisc                            defconfig    gcc-15.2.0
parisc                           allmodconfig    gcc-15.2.0
parisc                            allnoconfig    gcc-15.2.0
parisc                           allyesconfig    gcc-15.2.0
parisc                              defconfig    gcc-15.2.0
parisc                randconfig-001-20260415    gcc-8.5.0
parisc                randconfig-002-20260415    gcc-9.5.0
parisc64                            defconfig    gcc-15.2.0
powerpc                          allmodconfig    gcc-15.2.0
powerpc                           allnoconfig    gcc-15.2.0
powerpc               randconfig-001-20260415    clang-20
powerpc               randconfig-002-20260415    clang-20
powerpc64             randconfig-001-20260415    clang-23
powerpc64             randconfig-002-20260415    clang-23
riscv                            allmodconfig    clang-23
riscv                             allnoconfig    gcc-15.2.0
riscv                            allyesconfig    clang-16
riscv                               defconfig    clang-23
riscv                 randconfig-001-20260415    gcc-9.5.0
riscv                 randconfig-002-20260415    clang-23
s390                             allmodconfig    clang-18
s390                              allnoconfig    clang-23
s390                             allyesconfig    gcc-15.2.0
s390                                defconfig    clang-23
s390                  randconfig-001-20260415    gcc-15.2.0
s390                  randconfig-002-20260415    gcc-8.5.0
sh                               allmodconfig    gcc-15.2.0
sh                                allnoconfig    gcc-15.2.0
sh                               allyesconfig    gcc-15.2.0
sh                                  defconfig    gcc-15.2.0
sh                    randconfig-001-20260415    gcc-13.4.0
sh                    randconfig-002-20260415    gcc-11.5.0
sparc                             allnoconfig    gcc-15.2.0
sparc                               defconfig    gcc-15.2.0
sparc                 randconfig-001-20260415    gcc-8.5.0
sparc                 randconfig-002-20260415    gcc-11.5.0
sparc64                          allmodconfig    clang-23
sparc64                             defconfig    clang-20
sparc64               randconfig-001-20260415    clang-23
sparc64               randconfig-002-20260415    gcc-12.5.0
um                               allmodconfig    clang-19
um                                allnoconfig    clang-23
um                               allyesconfig    gcc-14
um                                  defconfig    clang-23
um                             i386_defconfig    gcc-14
um                    randconfig-001-20260415    clang-23
um                    randconfig-002-20260415    clang-23
um                           x86_64_defconfig    clang-23
x86_64                           allmodconfig    clang-20
x86_64                            allnoconfig    clang-20
x86_64                           allyesconfig    clang-20
x86_64      buildonly-randconfig-001-20260415    clang-20
x86_64      buildonly-randconfig-002-20260415    gcc-13
x86_64      buildonly-randconfig-003-20260415    gcc-14
x86_64      buildonly-randconfig-004-20260415    clang-20
x86_64      buildonly-randconfig-005-20260415    clang-20
x86_64      buildonly-randconfig-006-20260415    clang-20
x86_64                              defconfig    gcc-14
x86_64                randconfig-071-20260415    clang-20
x86_64                randconfig-072-20260415    clang-20
x86_64                randconfig-073-20260415    gcc-13
x86_64                randconfig-074-20260415    clang-20
x86_64                randconfig-075-20260415    clang-20
x86_64                randconfig-076-20260415    gcc-14
x86_64                          rhel-9.4-rust    clang-20
xtensa                            allnoconfig    gcc-15.2.0
xtensa                randconfig-001-20260415    gcc-8.5.0
xtensa                randconfig-002-20260415    gcc-8.5.0

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply

* Re: [PATCH 1/3] regulator: dt-bindings: mt6360: add buck regulator supplies
From: Krzysztof Kozlowski @ 2026-04-15  8:06 UTC (permalink / raw)
  To: Louis-Alexis Eyraud
  Cc: Rob Herring, Krzysztof Kozlowski, Conor Dooley, Matthias Brugger,
	AngeloGioacchino Del Regno, Liam Girdwood, Mark Brown, Gene Chen,
	kernel, devicetree, linux-kernel, linux-arm-kernel,
	linux-mediatek
In-Reply-To: <20260414-mtk-g1200-pmic-cleanup-v1-1-2a7193ed4e93@collabora.com>

On Tue, Apr 14, 2026 at 01:44:10PM +0200, Louis-Alexis Eyraud wrote:
> MT6360 PMIC provides 2 buck and 6 ldo regulators, that have each one a
> separate supply.
> Currently, the supplies for the ldo regulators are described in the
> dt-bindings but the ones for the buck regulators are not.
> 
> Add the descriptions for these missing supplies.
> 
> Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
> ---
>  Documentation/devicetree/bindings/regulator/mt6360-regulator.yaml | 4 ++++
>  1 file changed, 4 insertions(+)

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>

Best regards,
Krzysztof



^ permalink raw reply

* Re: [PATCH v7 0/4] Add Qualcomm extended CTI support
From: Leo Yan @ 2026-04-15  8:05 UTC (permalink / raw)
  To: Yingchao Deng (Consultant)
  Cc: Yingchao Deng, Suzuki K Poulose, Mike Leach, James Clark,
	Alexander Shishkin, coresight, linux-arm-kernel, linux-kernel,
	linux-arm-msm, Jinlong Mao, Tingwei Zhang, Jie Gan
In-Reply-To: <9772e300-06cb-4892-810c-bdcf6251bf9f@quicinc.com>

Hi Yingchao,

On Wed, Apr 15, 2026 at 11:22:49AM +0800, Yingchao Deng (Consultant) wrote:

[...]

> Gentle reminder.

This series would be on Mike's radar.

I will also look into details once I finish Levi's series review.

Thanks,
Leo


^ permalink raw reply

* Re: [PATCH v7 5/6] arm64: dts: rockchip: refactor items from Orange Pi 5/b to prep for Pro
From: Alexey Charkov @ 2026-04-15  8:02 UTC (permalink / raw)
  To: Dennis Gilmore
  Cc: Andrew Lunn, Andrzej Hajda, Chaoyi Chen, Conor Dooley,
	David Airlie, devicetree, dri-devel, FUKAUMI Naoki,
	Heiko Stuebner, Hsun Lai, Jernej Skrabec, Jimmy Hon, John Clark,
	Jonas Karlman, Krzysztof Kozlowski, Laurent Pinchart,
	linux-arm-kernel, linux-kernel, linux-rockchip, Maarten Lankhorst,
	Maxime Ripard, Michael Opdenacker, Michael Riesch, Mykola Kvach,
	Neil Armstrong, Peter Robinson, Quentin Schulz, Robert Foss,
	Rob Herring, Simona Vetter, Thomas Zimmermann
In-Reply-To: <20260414214104.1363987-6-dennis@ausil.us>

On Wed, Apr 15, 2026 at 1:41 AM Dennis Gilmore <dennis@ausil.us> wrote:
>
> The Orange Pi 5 Pro uses the same SoC and base as the Orange Pi 5 and
> Orange Pi 5B but has had sound, USB, and leds wired up differently. The
> 5 and 5B boards use gmac for ethernet where the Pro has a PCIe attached
> NIC.
>
> Move the 5/5B-specific bits (analog-sound/es8388, FUSB302 Type-C,
> gmac1, pwm-leds, i2s1_8ch routing, USB role-switch plumbing) out of
> rk3588s-orangepi-5.dtsi into a new rk3588s-orangepi-5-5b.dtsi that is
> included by both 5 and 5B.
>
> The RK806 PLDO1 and PLDO2 outputs are wired differently between the
> 5/5B and the Pro (PLDO1/PLDO2 are swapped), so label the PMIC node
> rk806_single in the base dtsi, drop pldo-reg1/pldo-reg2 from it, and
> define them via a &rk806_single regulators augmentation in
> rk3588s-orangepi-5-5b.dtsi. The Pro will supply its own mapping.
>
> Signed-off-by: Dennis Gilmore <dennis@ausil.us>
> ---
>  .../dts/rockchip/rk3588s-orangepi-5-5b.dtsi   | 222 +++++++++++++++++
>  .../boot/dts/rockchip/rk3588s-orangepi-5.dts  |   6 +-
>  .../boot/dts/rockchip/rk3588s-orangepi-5.dtsi | 225 +-----------------
>  .../boot/dts/rockchip/rk3588s-orangepi-5b.dts |   2 +-
>  4 files changed, 240 insertions(+), 215 deletions(-)
>  create mode 100644 arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5-5b.dtsi

Hi Dennis,

Sashiko makes a good point [1] about also moving the vbus_typec to the
*-5-5b.dtsi, given that it's not used on the Pro. Same with mdio1
which only makes sense together with its respective GMAC. Other than
that, this looks good to me - feel free to include:

Reviewed-by: Alexey Charkov <alchark@gmail.com

Best regards,
Alexey

[1] https://sashiko.dev/#/patchset/20260414214104.1363987-1-dennis%40ausil.us

> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5-5b.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5-5b.dtsi
> new file mode 100644
> index 000000000000..9e987ffa6241
> --- /dev/null
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5-5b.dtsi
> @@ -0,0 +1,222 @@
> +// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
> +/*
> + * Device tree definitions shared by the Orange Pi 5 and Orange Pi 5B
> + * but not the Orange Pi 5 Pro.
> + */
> +
> +#include <dt-bindings/usb/pd.h>
> +#include "rk3588s-orangepi-5.dtsi"
> +
> +/ {
> +       aliases {
> +               ethernet0 = &gmac1;
> +       };
> +
> +       analog-sound {
> +               compatible = "simple-audio-card";
> +               pinctrl-names = "default";
> +               pinctrl-0 = <&hp_detect>;
> +               simple-audio-card,name = "rockchip,es8388";
> +               simple-audio-card,bitclock-master = <&masterdai>;
> +               simple-audio-card,format = "i2s";
> +               simple-audio-card,frame-master = <&masterdai>;
> +               simple-audio-card,hp-det-gpios = <&gpio1 RK_PD5 GPIO_ACTIVE_HIGH>;
> +               simple-audio-card,mclk-fs = <256>;
> +               simple-audio-card,routing =
> +                       "Headphones", "LOUT1",
> +                       "Headphones", "ROUT1",
> +                       "LINPUT1", "Microphone Jack",
> +                       "RINPUT1", "Microphone Jack",
> +                       "LINPUT2", "Onboard Microphone",
> +                       "RINPUT2", "Onboard Microphone";
> +               simple-audio-card,widgets =
> +                       "Microphone", "Microphone Jack",
> +                       "Microphone", "Onboard Microphone",
> +                       "Headphone", "Headphones";
> +
> +               simple-audio-card,cpu {
> +                       sound-dai = <&i2s1_8ch>;
> +               };
> +
> +               masterdai: simple-audio-card,codec {
> +                       sound-dai = <&es8388>;
> +                       system-clock-frequency = <12288000>;
> +               };
> +       };
> +
> +       pwm-leds {
> +               compatible = "pwm-leds";
> +
> +               led {
> +                       color = <LED_COLOR_ID_GREEN>;
> +                       function = LED_FUNCTION_STATUS;
> +                       linux,default-trigger = "heartbeat";
> +                       max-brightness = <255>;
> +                       pwms = <&pwm0 0 25000 0>;
> +               };
> +       };
> +};
> +
> +&gmac1 {
> +       clock_in_out = "output";
> +       phy-handle = <&rgmii_phy1>;
> +       phy-mode = "rgmii-rxid";
> +       pinctrl-0 = <&gmac1_miim
> +                       &gmac1_tx_bus2
> +                       &gmac1_rx_bus2
> +                       &gmac1_rgmii_clk
> +                       &gmac1_rgmii_bus>;
> +       pinctrl-names = "default";
> +       tx_delay = <0x42>;
> +       status = "okay";
> +};
> +
> +&i2c6 {
> +       es8388: audio-codec@10 {
> +               compatible = "everest,es8388", "everest,es8328";
> +               reg = <0x10>;
> +               clocks = <&cru I2S1_8CH_MCLKOUT>;
> +               AVDD-supply = <&vcca_3v3_s0>;
> +               DVDD-supply = <&vcca_1v8_s0>;
> +               HPVDD-supply = <&vcca_3v3_s0>;
> +               PVDD-supply = <&vcca_3v3_s0>;
> +               assigned-clocks = <&cru I2S1_8CH_MCLKOUT>;
> +               assigned-clock-rates = <12288000>;
> +               #sound-dai-cells = <0>;
> +       };
> +
> +       usbc0: usb-typec@22 {
> +               compatible = "fcs,fusb302";
> +               reg = <0x22>;
> +               interrupt-parent = <&gpio0>;
> +               interrupts = <RK_PD3 IRQ_TYPE_LEVEL_LOW>;
> +               pinctrl-names = "default";
> +               pinctrl-0 = <&usbc0_int>;
> +               vbus-supply = <&vbus_typec>;
> +               status = "okay";
> +
> +               usb_con: connector {
> +                       compatible = "usb-c-connector";
> +                       label = "USB-C";
> +                       data-role = "dual";
> +                       op-sink-microwatt = <1000000>;
> +                       power-role = "dual";
> +                       sink-pdos =
> +                               <PDO_FIXED(5000, 1000, PDO_FIXED_USB_COMM)>;
> +                       source-pdos =
> +                               <PDO_FIXED(5000, 3000, PDO_FIXED_USB_COMM)>;
> +                       try-power-role = "source";
> +
> +                       ports {
> +                               #address-cells = <1>;
> +                               #size-cells = <0>;
> +
> +                               port@0 {
> +                                       reg = <0>;
> +                                       usbc0_hs: endpoint {
> +                                               remote-endpoint = <&usb_host0_xhci_drd_sw>;
> +                                       };
> +                               };
> +
> +                               port@1 {
> +                                       reg = <1>;
> +                                       usbc0_ss: endpoint {
> +                                               remote-endpoint = <&usbdp_phy0_typec_ss>;
> +                                       };
> +                               };
> +
> +                               port@2 {
> +                                       reg = <2>;
> +                                       usbc0_sbu: endpoint {
> +                                               remote-endpoint = <&usbdp_phy0_typec_sbu>;
> +                                       };
> +                               };
> +                       };
> +               };
> +       };
> +};
> +
> +&i2s1_8ch {
> +       rockchip,i2s-tx-route = <3 2 1 0>;
> +       rockchip,i2s-rx-route = <1 3 2 0>;
> +       pinctrl-names = "default";
> +       pinctrl-0 = <&i2s1m0_sclk
> +                          &i2s1m0_mclk
> +                          &i2s1m0_lrck
> +                          &i2s1m0_sdi1
> +                          &i2s1m0_sdo3>;
> +       status = "okay";
> +};
> +
> +&pwm0 {
> +       pinctrl-0 = <&pwm0m2_pins>;
> +       pinctrl-names = "default";
> +       status = "okay";
> +};
> +
> +&rk806_single {
> +       regulators {
> +               vcc_1v8_s0: pldo-reg1 {
> +                       regulator-name = "vcc_1v8_s0";
> +                       regulator-always-on;
> +                       regulator-boot-on;
> +                       regulator-min-microvolt = <1800000>;
> +                       regulator-max-microvolt = <1800000>;
> +
> +                       regulator-state-mem {
> +                               regulator-off-in-suspend;
> +                       };
> +               };
> +
> +               vcca_1v8_s0: pldo-reg2 {
> +                       regulator-name = "vcca_1v8_s0";
> +                       regulator-always-on;
> +                       regulator-boot-on;
> +                       regulator-min-microvolt = <1800000>;
> +                       regulator-max-microvolt = <1800000>;
> +
> +                       regulator-state-mem {
> +                               regulator-off-in-suspend;
> +                               regulator-suspend-microvolt = <1800000>;
> +                       };
> +               };
> +       };
> +};
> +
> +
> +&usb_host0_xhci {
> +       dr_mode = "otg";
> +       usb-role-switch;
> +
> +       port {
> +               usb_host0_xhci_drd_sw: endpoint {
> +                       remote-endpoint = <&usbc0_hs>;
> +               };
> +       };
> +};
> +
> +&usb_host2_xhci {
> +       status = "okay";
> +};
> +
> +&usbdp_phy0 {
> +       mode-switch;
> +       orientation-switch;
> +       sbu1-dc-gpios = <&gpio4 RK_PA5 GPIO_ACTIVE_HIGH>;
> +       sbu2-dc-gpios = <&gpio4 RK_PA7 GPIO_ACTIVE_HIGH>;
> +
> +       port {
> +               #address-cells = <1>;
> +               #size-cells = <0>;
> +
> +               usbdp_phy0_typec_ss: endpoint@0 {
> +                       reg = <0>;
> +                       remote-endpoint = <&usbc0_ss>;
> +               };
> +
> +               usbdp_phy0_typec_sbu: endpoint@1 {
> +                       reg = <1>;
> +                       remote-endpoint = <&usbc0_sbu>;
> +               };
> +       };
> +};
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
> index 83b9b6645a1e..d76bdf1b5e90 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts
> @@ -2,12 +2,16 @@
>
>  /dts-v1/;
>
> -#include "rk3588s-orangepi-5.dtsi"
> +#include "rk3588s-orangepi-5-5b.dtsi"
>
>  / {
>         model = "Xunlong Orange Pi 5";
>         compatible = "xunlong,orangepi-5", "rockchip,rk3588s";
>
> +       aliases {
> +               mmc0 = &sdmmc;
> +       };
> +
>         vcc3v3_pcie20: regulator-vcc3v3-pcie20 {
>                 compatible = "regulator-fixed";
>                 enable-active-high;
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dtsi
> index fd5c6a025cd1..2b605e5fc35a 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dtsi
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dtsi
> @@ -3,19 +3,13 @@
>  /dts-v1/;
>
>  #include <dt-bindings/gpio/gpio.h>
> -#include <dt-bindings/leds/common.h>
>  #include <dt-bindings/input/input.h>
> +#include <dt-bindings/leds/common.h>
>  #include <dt-bindings/pinctrl/rockchip.h>
>  #include <dt-bindings/soc/rockchip,vop2.h>
> -#include <dt-bindings/usb/pd.h>
>  #include "rk3588s.dtsi"
>
>  / {
> -       aliases {
> -               ethernet0 = &gmac1;
> -               mmc0 = &sdmmc;
> -       };
> -
>         chosen {
>                 stdout-path = "serial2:1500000n8";
>         };
> @@ -34,38 +28,6 @@ button-recovery {
>                 };
>         };
>
> -       analog-sound {
> -               compatible = "simple-audio-card";
> -               pinctrl-names = "default";
> -               pinctrl-0 = <&hp_detect>;
> -               simple-audio-card,name = "rockchip,es8388";
> -               simple-audio-card,bitclock-master = <&masterdai>;
> -               simple-audio-card,format = "i2s";
> -               simple-audio-card,frame-master = <&masterdai>;
> -               simple-audio-card,hp-det-gpios = <&gpio1 RK_PD5 GPIO_ACTIVE_HIGH>;
> -               simple-audio-card,mclk-fs = <256>;
> -               simple-audio-card,routing =
> -                       "Headphones", "LOUT1",
> -                       "Headphones", "ROUT1",
> -                       "LINPUT1", "Microphone Jack",
> -                       "RINPUT1", "Microphone Jack",
> -                       "LINPUT2", "Onboard Microphone",
> -                       "RINPUT2", "Onboard Microphone";
> -               simple-audio-card,widgets =
> -                       "Microphone", "Microphone Jack",
> -                       "Microphone", "Onboard Microphone",
> -                       "Headphone", "Headphones";
> -
> -               simple-audio-card,cpu {
> -                       sound-dai = <&i2s1_8ch>;
> -               };
> -
> -               masterdai: simple-audio-card,codec {
> -                       sound-dai = <&es8388>;
> -                       system-clock-frequency = <12288000>;
> -               };
> -       };
> -
>         hdmi0-con {
>                 compatible = "hdmi-connector";
>                 type = "a";
> @@ -77,18 +39,6 @@ hdmi0_con_in: endpoint {
>                 };
>         };
>
> -       pwm-leds {
> -               compatible = "pwm-leds";
> -
> -               led {
> -                       color = <LED_COLOR_ID_GREEN>;
> -                       function = LED_FUNCTION_STATUS;
> -                       linux,default-trigger = "heartbeat";
> -                       max-brightness = <255>;
> -                       pwms = <&pwm0 0 25000 0>;
> -               };
> -       };
> -
>         vbus_typec: regulator-vbus-typec {
>                 compatible = "regulator-fixed";
>                 enable-active-high;
> @@ -101,15 +51,6 @@ vbus_typec: regulator-vbus-typec {
>                 vin-supply = <&vcc5v0_sys>;
>         };
>
> -       vcc5v0_sys: regulator-vcc5v0-sys {
> -               compatible = "regulator-fixed";
> -               regulator-name = "vcc5v0_sys";
> -               regulator-always-on;
> -               regulator-boot-on;
> -               regulator-min-microvolt = <5000000>;
> -               regulator-max-microvolt = <5000000>;
> -       };
> -
>         vcc_3v3_sd_s0: regulator-vcc-3v3-sd-s0 {
>                 compatible = "regulator-fixed";
>                 gpios = <&gpio4 RK_PB5 GPIO_ACTIVE_LOW>;
> @@ -119,6 +60,15 @@ vcc_3v3_sd_s0: regulator-vcc-3v3-sd-s0 {
>                 regulator-max-microvolt = <3300000>;
>                 vin-supply = <&vcc_3v3_s3>;
>         };
> +
> +       vcc5v0_sys: regulator-vcc5v0-sys {
> +               compatible = "regulator-fixed";
> +               regulator-name = "vcc5v0_sys";
> +               regulator-always-on;
> +               regulator-boot-on;
> +               regulator-min-microvolt = <5000000>;
> +               regulator-max-microvolt = <5000000>;
> +       };
>  };
>
>  &combphy0_ps {
> @@ -161,20 +111,6 @@ &cpu_l3 {
>         cpu-supply = <&vdd_cpu_lit_s0>;
>  };
>
> -&gmac1 {
> -       clock_in_out = "output";
> -       phy-handle = <&rgmii_phy1>;
> -       phy-mode = "rgmii-rxid";
> -       pinctrl-0 = <&gmac1_miim
> -                    &gmac1_tx_bus2
> -                    &gmac1_rx_bus2
> -                    &gmac1_rgmii_clk
> -                    &gmac1_rgmii_bus>;
> -       pinctrl-names = "default";
> -       tx_delay = <0x42>;
> -       status = "okay";
> -};
> -
>  &gpu {
>         mali-supply = <&vdd_gpu_s0>;
>         status = "okay";
> @@ -270,69 +206,6 @@ &i2c6 {
>         pinctrl-0 = <&i2c6m3_xfer>;
>         status = "okay";
>
> -       es8388: audio-codec@10 {
> -               compatible = "everest,es8388", "everest,es8328";
> -               reg = <0x10>;
> -               clocks = <&cru I2S1_8CH_MCLKOUT>;
> -               AVDD-supply = <&vcca_3v3_s0>;
> -               DVDD-supply = <&vcca_1v8_s0>;
> -               HPVDD-supply = <&vcca_3v3_s0>;
> -               PVDD-supply = <&vcca_3v3_s0>;
> -               assigned-clocks = <&cru I2S1_8CH_MCLKOUT>;
> -               assigned-clock-rates = <12288000>;
> -               #sound-dai-cells = <0>;
> -       };
> -
> -       usbc0: usb-typec@22 {
> -               compatible = "fcs,fusb302";
> -               reg = <0x22>;
> -               interrupt-parent = <&gpio0>;
> -               interrupts = <RK_PD3 IRQ_TYPE_LEVEL_LOW>;
> -               pinctrl-names = "default";
> -               pinctrl-0 = <&usbc0_int>;
> -               vbus-supply = <&vbus_typec>;
> -               status = "okay";
> -
> -               usb_con: connector {
> -                       compatible = "usb-c-connector";
> -                       label = "USB-C";
> -                       data-role = "dual";
> -                       op-sink-microwatt = <1000000>;
> -                       power-role = "dual";
> -                       sink-pdos =
> -                               <PDO_FIXED(5000, 1000, PDO_FIXED_USB_COMM)>;
> -                       source-pdos =
> -                               <PDO_FIXED(5000, 3000, PDO_FIXED_USB_COMM)>;
> -                       try-power-role = "source";
> -
> -                       ports {
> -                               #address-cells = <1>;
> -                               #size-cells = <0>;
> -
> -                               port@0 {
> -                                       reg = <0>;
> -                                       usbc0_hs: endpoint {
> -                                               remote-endpoint = <&usb_host0_xhci_drd_sw>;
> -                                       };
> -                               };
> -
> -                               port@1 {
> -                                       reg = <1>;
> -                                       usbc0_ss: endpoint {
> -                                               remote-endpoint = <&usbdp_phy0_typec_ss>;
> -                                       };
> -                               };
> -
> -                               port@2 {
> -                                       reg = <2>;
> -                                       usbc0_sbu: endpoint {
> -                                               remote-endpoint = <&usbdp_phy0_typec_sbu>;
> -                                       };
> -                               };
> -                       };
> -               };
> -       };
> -
>         hym8563: rtc@51 {
>                 compatible = "haoyu,hym8563";
>                 reg = <0x51>;
> @@ -346,18 +219,6 @@ hym8563: rtc@51 {
>         };
>  };
>
> -&i2s1_8ch {
> -       rockchip,i2s-tx-route = <3 2 1 0>;
> -       rockchip,i2s-rx-route = <1 3 2 0>;
> -       pinctrl-names = "default";
> -       pinctrl-0 = <&i2s1m0_sclk
> -                    &i2s1m0_mclk
> -                    &i2s1m0_lrck
> -                    &i2s1m0_sdi1
> -                    &i2s1m0_sdo3>;
> -       status = "okay";
> -};
> -
>  &i2s5_8ch {
>         status = "okay";
>  };
> @@ -404,12 +265,6 @@ typec5v_pwren: typec5v-pwren {
>         };
>  };
>
> -&pwm0 {
> -       pinctrl-0 = <&pwm0m2_pins>;
> -       pinctrl-names = "default";
> -       status = "okay";
> -};
> -
>  &rknn_core_0 {
>         npu-supply = <&vdd_npu_s0>;
>         sram-supply = <&vdd_npu_s0>;
> @@ -491,7 +346,7 @@ &spi2 {
>         pinctrl-names = "default";
>         pinctrl-0 = <&spi2m2_cs0 &spi2m2_pins>;
>
> -       pmic@0 {
> +       rk806_single: pmic@0 {
>                 compatible = "rockchip,rk806";
>                 reg = <0x0>;
>                 interrupt-parent = <&gpio0>;
> @@ -666,31 +521,6 @@ regulator-state-mem {
>                                 };
>                         };
>
> -                       vcc_1v8_s0: pldo-reg1 {
> -                               regulator-name = "vcc_1v8_s0";
> -                               regulator-always-on;
> -                               regulator-boot-on;
> -                               regulator-min-microvolt = <1800000>;
> -                               regulator-max-microvolt = <1800000>;
> -
> -                               regulator-state-mem {
> -                                       regulator-off-in-suspend;
> -                               };
> -                       };
> -
> -                       vcca_1v8_s0: pldo-reg2 {
> -                               regulator-name = "vcca_1v8_s0";
> -                               regulator-always-on;
> -                               regulator-boot-on;
> -                               regulator-min-microvolt = <1800000>;
> -                               regulator-max-microvolt = <1800000>;
> -
> -                               regulator-state-mem {
> -                                       regulator-off-in-suspend;
> -                                       regulator-suspend-microvolt = <1800000>;
> -                               };
> -                       };
> -
>                         vdda_1v2_s0: pldo-reg3 {
>                                 regulator-name = "vdda_1v2_s0";
>                                 regulator-always-on;
> @@ -841,26 +671,7 @@ &uart2 {
>  };
>
>  &usbdp_phy0 {
> -       mode-switch;
> -       orientation-switch;
> -       sbu1-dc-gpios = <&gpio4 RK_PA5 GPIO_ACTIVE_HIGH>;
> -       sbu2-dc-gpios = <&gpio4 RK_PA7 GPIO_ACTIVE_HIGH>;
>         status = "okay";
> -
> -       port {
> -               #address-cells = <1>;
> -               #size-cells = <0>;
> -
> -               usbdp_phy0_typec_ss: endpoint@0 {
> -                       reg = <0>;
> -                       remote-endpoint = <&usbc0_ss>;
> -               };
> -
> -               usbdp_phy0_typec_sbu: endpoint@1 {
> -                       reg = <1>;
> -                       remote-endpoint = <&usbc0_sbu>;
> -               };
> -       };
>  };
>
>  &usb_host0_ehci {
> @@ -872,15 +683,7 @@ &usb_host0_ohci {
>  };
>
>  &usb_host0_xhci {
> -       dr_mode = "otg";
> -       usb-role-switch;
>         status = "okay";
> -
> -       port {
> -               usb_host0_xhci_drd_sw: endpoint {
> -                       remote-endpoint = <&usbc0_hs>;
> -               };
> -       };
>  };
>
>  &usb_host1_ehci {
> @@ -891,7 +694,7 @@ &usb_host1_ohci {
>         status = "okay";
>  };
>
> -&usb_host2_xhci {
> +&vop {
>         status = "okay";
>  };
>
> @@ -899,10 +702,6 @@ &vop_mmu {
>         status = "okay";
>  };
>
> -&vop {
> -       status = "okay";
> -};
> -
>  &vp0 {
>         vp0_out_hdmi0: endpoint@ROCKCHIP_VOP2_EP_HDMI0 {
>                 reg = <ROCKCHIP_VOP2_EP_HDMI0>;
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5b.dts
> index d21ec320d295..8af174777809 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5b.dts
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5b.dts
> @@ -2,7 +2,7 @@
>
>  /dts-v1/;
>
> -#include "rk3588s-orangepi-5.dtsi"
> +#include "rk3588s-orangepi-5-5b.dtsi"
>
>  / {
>         model = "Xunlong Orange Pi 5B";
> --
> 2.53.0
>


^ permalink raw reply

* Re: [PATCH v4 3/9] coresight: etm4x: fix leaked trace id
From: Yeoreum Yun @ 2026-04-15  8:01 UTC (permalink / raw)
  To: Leo Yan
  Cc: Jie Gan, coresight, linux-arm-kernel, linux-kernel,
	suzuki.poulose, mike.leach, james.clark, alexander.shishkin
In-Reply-To: <20260415072933.GH356832@e132581.arm.com>

Hi Leo,
> On Wed, Apr 15, 2026 at 09:21:21AM +0800, Jie Gan wrote:
>
> [...]
>
> > > > > @@ -918,8 +918,10 @@ static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_pa
> > > > >   	cscfg_config_sysfs_get_active_cfg(&cfg_hash, &preset);
> > > > >   	if (cfg_hash) {
> > > > >   		ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
> > > > > -		if (ret)
> > > > > +		if (ret) {
> > > > > +			etm4_release_trace_id(drvdata);
> > > >
> > > > If so, even an ID is reserved for failures, and the ID map is big enough
> > > > for each CPU, we don't need to worry memory leak or ID used out issue ?
> > >
> > > However, in theory, this could lead to an ID leak,
> > > so it would be better to release it in error cases.
> >
> > What I am thinking is as SoCs continue to grow more complex with an
> > increasing number of subsystems, trace IDs may be exhausted in the near
> > future. (that's why we have dynamic trace ID allocation/release).
>
> Thanks for the input.
>
> I am wandering if we can use "dev->devt" as the trace ID.  A device's
> major/minor number is unique in kernel and dev_t is defined as u32:
>
>   typedef u32 __kernel_dev_t;
>
> And we can consolidate this for both SYSFS and PERF modes.
>

When I see the CORESIGHT_TRACE_ID_MAX:

 /* architecturally we have 128 IDs some of which are reserved */
  #define CORESIGHT_TRACE_IDS_MAX 128

I think this came from the hardware restriction for number of TRACE_IDs.
In this case, clamping the device_id to trace_id seems more complex and
reduce some performance perspective.


--
Sincerely,
Yeoreum Yun


^ permalink raw reply

* [PATCH v3 3/3] remoteproc: imx_rproc: Add support for i.MX94
From: Peng Fan (OSS) @ 2026-04-15  7:50 UTC (permalink / raw)
  To: Bjorn Andersson, Mathieu Poirier, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Frank Li, Sascha Hauer,
	Pengutronix Kernel Team, Fabio Estevam, Daniel Baluta
  Cc: linux-remoteproc, devicetree, imx, linux-arm-kernel, linux-kernel,
	Peng Fan
In-Reply-To: <20260415-imx943-rproc-v3-0-9fa7528db8ca@nxp.com>

From: Peng Fan <peng.fan@nxp.com>

Add basic remoteproc support for the i.MX94 M-core processors, including
address translation tables(dev addr is from view of remote processor,
sys addr is from view of main processor) and device configuration data for
the CM70, CM71, and CM33S cores.

Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
---
 drivers/remoteproc/imx_rproc.c | 71 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index c21782be4bb69d830479f538a091bda48b740ca4..5c4a1f2f877f5afd82192a13281f57f094a0e478 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -145,6 +145,47 @@ static const struct imx_rproc_att imx_rproc_att_imx95_m7[] = {
 	{ 0x80000000, 0x80000000, 0x50000000, 0 },
 };
 
+static const struct imx_rproc_att imx_rproc_att_imx94_m70[] = {
+	/* dev addr , sys addr  , size	    , flags */
+	/* TCM CODE NON-SECURE */
+	{ 0x00000000, 0x203C0000, 0x00040000, ATT_OWN | ATT_IOMEM },
+	/* TCM SYS NON-SECURE*/
+	{ 0x20000000, 0x20400000, 0x00040000, ATT_OWN | ATT_IOMEM },
+
+	/* DDR */
+	{ 0x80000000, 0x80000000, 0x50000000, 0 },
+};
+
+static const struct imx_rproc_att imx_rproc_att_imx94_m71[] = {
+	/* dev addr , sys addr  , size	    , flags */
+	/* TCM CODE NON-SECURE */
+	{ 0x00000000, 0x202C0000, 0x00040000, ATT_OWN | ATT_IOMEM },
+	/* TCM SYS NON-SECURE*/
+	{ 0x20000000, 0x20300000, 0x00040000, ATT_OWN | ATT_IOMEM },
+
+	/* DDR */
+	{ 0x80000000, 0x80000000, 0x50000000, 0 },
+};
+
+static const struct imx_rproc_att imx_rproc_att_imx94_m33s[] = {
+	/* dev addr , sys addr  , size	    , flags */
+	/* TCM CODE NON-SECURE */
+	{ 0x0FFC0000, 0x209C0000, 0x00040000, ATT_OWN | ATT_IOMEM },
+	/* TCM CODE SECURE */
+	{ 0x1FFC0000, 0x209C0000, 0x00040000, ATT_OWN | ATT_IOMEM },
+
+	/* TCM SYS NON-SECURE */
+	{ 0x20000000, 0x20A00000, 0x00040000, ATT_OWN | ATT_IOMEM },
+	/* TCM SYS SECURE */
+	{ 0x30000000, 0x20A00000, 0x00040000, ATT_OWN | ATT_IOMEM },
+
+	/* M33S OCRAM */
+	{ 0x20800000, 0x20800000, 0x180000, ATT_OWN | ATT_IOMEM },
+
+	/* DDR */
+	{ 0x80000000, 0x80000000, 0x50000000, 0 },
+};
+
 static const struct imx_rproc_att imx_rproc_att_imx93[] = {
 	/* dev addr , sys addr  , size	    , flags */
 	/* TCM CODE NON-SECURE */
@@ -1477,6 +1518,33 @@ static const struct imx_rproc_dcfg imx_rproc_cfg_imx93 = {
 	.flags		= IMX_RPROC_NEED_CLKS,
 };
 
+static const struct imx_rproc_dcfg imx_rproc_cfg_imx94_m70 = {
+	.att		= imx_rproc_att_imx94_m70,
+	.att_size	= ARRAY_SIZE(imx_rproc_att_imx94_m70),
+	.ops		= &imx_rproc_ops_sm_lmm,
+	.cpuid		= 1,
+	.lmid		= 2,
+	.reset_vector_mask = GENMASK_U32(31, 16),
+};
+
+static const struct imx_rproc_dcfg imx_rproc_cfg_imx94_m71 = {
+	.att		= imx_rproc_att_imx94_m71,
+	.att_size	= ARRAY_SIZE(imx_rproc_att_imx94_m71),
+	.ops		= &imx_rproc_ops_sm_lmm,
+	.cpuid		= 7,
+	.lmid		= 3,
+	.reset_vector_mask = GENMASK_U32(31, 16),
+};
+
+static const struct imx_rproc_dcfg imx_rproc_cfg_imx94_m33s = {
+	.att		= imx_rproc_att_imx94_m33s,
+	.att_size	= ARRAY_SIZE(imx_rproc_att_imx94_m33s),
+	.ops		= &imx_rproc_ops_sm_lmm,
+	.cpuid		= 8,
+	.lmid		= 1,
+	.reset_vector_mask = GENMASK_U32(31, 16),
+};
+
 static const struct imx_rproc_dcfg imx_rproc_cfg_imx95_m7 = {
 	.att		= imx_rproc_att_imx95_m7,
 	.att_size	= ARRAY_SIZE(imx_rproc_att_imx95_m7),
@@ -1501,6 +1569,9 @@ static const struct of_device_id imx_rproc_of_match[] = {
 	{ .compatible = "fsl,imx8qm-cm4", .data = &imx_rproc_cfg_imx8qm },
 	{ .compatible = "fsl,imx8ulp-cm33", .data = &imx_rproc_cfg_imx8ulp },
 	{ .compatible = "fsl,imx93-cm33", .data = &imx_rproc_cfg_imx93 },
+	{ .compatible = "fsl,imx94-cm70", .data = &imx_rproc_cfg_imx94_m70 },
+	{ .compatible = "fsl,imx94-cm71", .data = &imx_rproc_cfg_imx94_m71 },
+	{ .compatible = "fsl,imx94-cm33s", .data = &imx_rproc_cfg_imx94_m33s },
 	{ .compatible = "fsl,imx95-cm7", .data = &imx_rproc_cfg_imx95_m7 },
 	{},
 };

-- 
2.37.1



^ permalink raw reply related

* [PATCH v3 2/3] remoteproc: imx_rproc: Program non-zero SM CPU/LMM reset vector
From: Peng Fan (OSS) @ 2026-04-15  7:50 UTC (permalink / raw)
  To: Bjorn Andersson, Mathieu Poirier, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Frank Li, Sascha Hauer,
	Pengutronix Kernel Team, Fabio Estevam, Daniel Baluta
  Cc: linux-remoteproc, devicetree, imx, linux-arm-kernel, linux-kernel,
	Peng Fan
In-Reply-To: <20260415-imx943-rproc-v3-0-9fa7528db8ca@nxp.com>

From: Peng Fan <peng.fan@nxp.com>

Cortex-M[7,33] processors use a fixed reset vector table format:

  0x00  Initial SP value
  0x04  Reset vector
  0x08  NMI
  0x0C  ...
  ...
  IRQ[n]

In ELF images, the corresponding layout is:

reset_vectors:  --> hardware reset address
        .word __stack_end__
        .word Reset_Handler
        .word NMI_Handler
        .word HardFault_Handler
        ...
        .word UART_IRQHandler
        .word SPI_IRQHandler
        ...

Reset_Handler:  --> ELF entry point address
        ...

The hardware fetches the first two words from reset_vectors and populates
SP with __stack_end__ and PC with Reset_Handler. Execution proceeds from
Reset_Handler.

However, the ELF entry point does not always match the hardware reset
address. For example, on i.MX94 CM33S:

  ELF entry point:     0x0ffc211d
  hardware reset base: 0x0ffc0000 (default reset value, sw programmable)

Current driver always programs the reset vector as 0. But i.MX94 CM33S's
default reset base is 0x0ffc0000, so the correct reset vector must be
passed to the SM API; otherwise the M33 Sync core cannot boot successfully.

rproc_elf_get_boot_addr() returns the ELF entry point, which is not the
hardware reset vector address. Fix the issue by deriving the hardware reset
vector locally using a SoC-specific mask:

  reset_vector = rproc->bootaddr & reset_vector_mask

The ELF entry point semantics remain unchanged. The masking is applied only
at the point where the SM reset vector is programmed.

Add reset_vector_mask = GENMASK_U32(31, 16) to the i.MX95 M7 configuration
so the hardware reset vector is derived correctly. Without this mask, the
SM reset vector would be programmed with an unaligned ELF entry point and
the M7 core would fail to boot.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
---
 drivers/remoteproc/imx_rproc.c | 27 +++++++++++++++++++++++++--
 drivers/remoteproc/imx_rproc.h |  2 ++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 0dd80e688b0ea3df4c66e5726884dc86c8a5a881..c21782be4bb69d830479f538a091bda48b740ca4 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -339,13 +339,32 @@ static int imx_rproc_scu_api_start(struct rproc *rproc)
 	return imx_sc_pm_cpu_start(priv->ipc_handle, priv->rsrc_id, true, priv->entry);
 }
 
+static u64 imx_rproc_sm_get_reset_vector(struct rproc *rproc)
+{
+	struct imx_rproc *priv = rproc->priv;
+	u32 reset_vector_mask = priv->dcfg->reset_vector_mask ?: GENMASK(31, 0);
+
+	/*
+	 * The hardware fetches the first two words from reset_vectors
+	 * (hardware reset address) and populates SP and PC using the first
+	 * two words. Execution proceeds from PC. The ELF entry point does
+	 * not always match the hardware reset address.
+	 * To derive the correct hardware reset address, the lower address
+	 * bits must be masked off before programming the reset vector.
+	 */
+	return rproc->bootaddr & reset_vector_mask;
+}
+
 static int imx_rproc_sm_cpu_start(struct rproc *rproc)
 {
 	struct imx_rproc *priv = rproc->priv;
 	const struct imx_rproc_dcfg *dcfg = priv->dcfg;
+	u64 reset_vector;
 	int ret;
 
-	ret = scmi_imx_cpu_reset_vector_set(dcfg->cpuid, 0, true, false, false);
+	reset_vector = imx_rproc_sm_get_reset_vector(rproc);
+
+	ret = scmi_imx_cpu_reset_vector_set(dcfg->cpuid, reset_vector, true, false, false);
 	if (ret) {
 		dev_err(priv->dev, "Failed to set reset vector cpuid(%u): %d\n", dcfg->cpuid, ret);
 		return ret;
@@ -359,13 +378,16 @@ static int imx_rproc_sm_lmm_start(struct rproc *rproc)
 	struct imx_rproc *priv = rproc->priv;
 	const struct imx_rproc_dcfg *dcfg = priv->dcfg;
 	struct device *dev = priv->dev;
+	u64 reset_vector;
 	int ret;
 
+	reset_vector = imx_rproc_sm_get_reset_vector(rproc);
+
 	/*
 	 * If the remoteproc core can't start the M7, it will already be
 	 * handled in imx_rproc_sm_lmm_prepare().
 	 */
-	ret = scmi_imx_lmm_reset_vector_set(dcfg->lmid, dcfg->cpuid, 0, 0);
+	ret = scmi_imx_lmm_reset_vector_set(dcfg->lmid, dcfg->cpuid, 0, reset_vector);
 	if (ret) {
 		dev_err(dev, "Failed to set reset vector lmid(%u), cpuid(%u): %d\n",
 			dcfg->lmid, dcfg->cpuid, ret);
@@ -1462,6 +1484,7 @@ static const struct imx_rproc_dcfg imx_rproc_cfg_imx95_m7 = {
 	/* Must align with System Manager Firmware */
 	.cpuid		= 1, /* Use 1 as cpu id for M7 core */
 	.lmid		= 1, /* Use 1 as Logical Machine ID where M7 resides */
+	.reset_vector_mask = GENMASK_U32(31, 16),
 };
 
 static const struct of_device_id imx_rproc_of_match[] = {
diff --git a/drivers/remoteproc/imx_rproc.h b/drivers/remoteproc/imx_rproc.h
index d37e6f90548cec727b4aeb874680b42af85bdbb4..0d7d48352a1091ad24e8e083172ce6da6d26ae10 100644
--- a/drivers/remoteproc/imx_rproc.h
+++ b/drivers/remoteproc/imx_rproc.h
@@ -41,6 +41,8 @@ struct imx_rproc_dcfg {
 	/* For System Manager(SM) based SoCs */
 	u32				cpuid; /* ID of the remote core */
 	u32				lmid;  /* ID of the Logcial Machine */
+	/* reset_vector = elf_entry_addr & reset_vector_mask */
+	u32				reset_vector_mask;
 };
 
 #endif /* _IMX_RPROC_H */

-- 
2.37.1



^ permalink raw reply related

* [PATCH v3 1/3] dt-bindings: remoteproc: imx-rproc: Support i.MX94
From: Peng Fan (OSS) @ 2026-04-15  7:50 UTC (permalink / raw)
  To: Bjorn Andersson, Mathieu Poirier, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Frank Li, Sascha Hauer,
	Pengutronix Kernel Team, Fabio Estevam, Daniel Baluta
  Cc: linux-remoteproc, devicetree, imx, linux-arm-kernel, linux-kernel,
	Peng Fan
In-Reply-To: <20260415-imx943-rproc-v3-0-9fa7528db8ca@nxp.com>

From: Peng Fan <peng.fan@nxp.com>

Add compatible string for:
 Cortex-M7 core[0,1] in i.MX94
 Cortex-M33 Sync core in i.MX94

To i.MX94, Cortex-M7 core0 and core1 have different memory view from
Cortex-A55 core, so different compatible string is used.

Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Peng Fan <peng.fan@nxp.com>
---
 Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
index ce8ec0119469c8fc0979a192b6e3d3a03108d7d2..c18f71b648890da9c25a2f3309d8dbec5bb8d226 100644
--- a/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
@@ -28,6 +28,9 @@ properties:
       - fsl,imx8qxp-cm4
       - fsl,imx8ulp-cm33
       - fsl,imx93-cm33
+      - fsl,imx94-cm33s
+      - fsl,imx94-cm70
+      - fsl,imx94-cm71
       - fsl,imx95-cm7
 
   clocks:

-- 
2.37.1



^ permalink raw reply related

* [PATCH v3 0/3] Add i.MX94 remoteproc support and reset vector handling improvements
From: Peng Fan (OSS) @ 2026-04-15  7:50 UTC (permalink / raw)
  To: Bjorn Andersson, Mathieu Poirier, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Frank Li, Sascha Hauer,
	Pengutronix Kernel Team, Fabio Estevam, Daniel Baluta
  Cc: linux-remoteproc, devicetree, imx, linux-arm-kernel, linux-kernel,
	Peng Fan

This series adds remoteproc support for the i.MX94 family, including the
CM70, CM71, and CM33S cores, and derive the hardware reset vector for
Cortex‑M processors whose ELF entry point does not directly correspond to
the actual reset address.

Background:
Cortex‑M processors fetch their initial SP and PC from a fixed reset vector
table. While ELF images embed the entry point (e_entry), this value is
not always aligned to the hardware reset address. On platforms such as
i.MX94 CM33S, masking is required to compute the correct reset vector
address before programming the SoC reset registers.

Similarly, on i.MX95, the existing implementation always programs a reset
vector of 0x0, which only works when executing entirely from TCM. When
firmware is loaded into DDR, the driver must pass the correct reset vector
to the SM CPU/LMM interfaces.

Summary of patches:
[1]dt-bindings: remoteproc: imx-rproc: Introduce fsl,reset-vector-mask
Adds a new DT property allowing SoCs to specify a mask for deriving the
hardware reset vector from the ELF entry point.

[2]remoteproc: imx_rproc: Program non-zero SM CPU/LMM reset vector
Ensures the correct reset vector is passed to SM APIs by introducing a
helper (imx_rproc_sm_get_reset_vector()) that applies the reset‑vector
mask.

[3]remoteproc: imx_rproc: Add support for i.MX94 remoteproc
Adds address translation tables and configuration data for CM70, CM71,
and CM33S, enabling full remoteproc operation on i.MX94.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
---
Changes in v3:
- Patch 2: 
  Drop R-b because of changes in V3

  Following suggestion from Mathieu that apply reset vector in
  scmi_imx_[cpu,lmm]_reset_vector_set(), not change the meaning of
  rproc->bootaddr, add helper imx_rproc_sm_get_reset_vector() to get reset
  vector and use the hlper in scmi_imx_[cpu,lmm]_reset_vector_set().

  Add reset-vector-mask for i.MX95 CM7 to avoid breaking i.MX95 CM7
  boot.

- Link to v2: https://lore.kernel.org/r/20260327-imx943-rproc-v2-0-a547a3588730@nxp.com

Changes in v2:
- Drop fsl,reset-vector-mask by using fixed value in driver for per device
- Add R-b for i.MX94 dt-binding
- Update commit log to include dev addr and sys addr
- Link to v1: https://lore.kernel.org/r/20260312-imx943-rproc-v1-0-3e66596592a8@nxp.com

---
Peng Fan (3):
      dt-bindings: remoteproc: imx-rproc: Support i.MX94
      remoteproc: imx_rproc: Program non-zero SM CPU/LMM reset vector
      remoteproc: imx_rproc: Add support for i.MX94

 .../bindings/remoteproc/fsl,imx-rproc.yaml         |  3 +
 drivers/remoteproc/imx_rproc.c                     | 98 +++++++++++++++++++++-
 drivers/remoteproc/imx_rproc.h                     |  2 +
 3 files changed, 101 insertions(+), 2 deletions(-)
---
base-commit: 724699d8d0523909da51fda8d1e10c1ff867b280
change-id: 20260311-imx943-rproc-2050e00b65f7

Best regards,
-- 
Peng Fan <peng.fan@nxp.com>



^ permalink raw reply

* [PATCH v3] media: verisilicon: Create AV1 helper library
From: Benjamin Gaignard @ 2026-04-15  7:38 UTC (permalink / raw)
  To: nicolas.dufresne, p.zabel, mchehab, heiko
  Cc: linux-kernel, linux-media, linux-rockchip, linux-arm-kernel,
	kernel, Benjamin Gaignard

Regroup all none hardware related AV1 functions into a helper library.
The goal is to avoid code duplication for futur AV1 codecs.

Tested on rock 5b board Fluster score remains the same 204/241.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
---
changes in version 3:
 - Remove useless wrapper functions.

 drivers/media/platform/verisilicon/Makefile   |   7 +-
 .../media/platform/verisilicon/hantro_av1.c   | 780 +++++++++++++++
 .../media/platform/verisilicon/hantro_av1.h   |  62 ++
 ...entropymode.c => hantro_av1_entropymode.c} |  18 +-
 ...entropymode.h => hantro_av1_entropymode.h} |  18 +-
 ...av1_filmgrain.c => hantro_av1_filmgrain.c} |  82 +-
 .../verisilicon/hantro_av1_filmgrain.h        |  44 +
 .../media/platform/verisilicon/hantro_hw.h    |   7 +-
 .../verisilicon/rockchip_av1_filmgrain.h      |  36 -
 .../verisilicon/rockchip_vpu981_hw_av1_dec.c  | 935 ++----------------
 .../platform/verisilicon/rockchip_vpu_hw.c    |   7 +-
 11 files changed, 1041 insertions(+), 955 deletions(-)
 create mode 100644 drivers/media/platform/verisilicon/hantro_av1.c
 create mode 100644 drivers/media/platform/verisilicon/hantro_av1.h
 rename drivers/media/platform/verisilicon/{rockchip_av1_entropymode.c => hantro_av1_entropymode.c} (99%)
 rename drivers/media/platform/verisilicon/{rockchip_av1_entropymode.h => hantro_av1_entropymode.h} (95%)
 rename drivers/media/platform/verisilicon/{rockchip_av1_filmgrain.c => hantro_av1_filmgrain.c} (92%)
 create mode 100644 drivers/media/platform/verisilicon/hantro_av1_filmgrain.h
 delete mode 100644 drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h

diff --git a/drivers/media/platform/verisilicon/Makefile b/drivers/media/platform/verisilicon/Makefile
index f6f019d04ff0..a1dd6c2d29be 100644
--- a/drivers/media/platform/verisilicon/Makefile
+++ b/drivers/media/platform/verisilicon/Makefile
@@ -19,7 +19,10 @@ hantro-vpu-y += \
 		hantro_hevc.o \
 		hantro_mpeg2.o \
 		hantro_vp8.o \
-		hantro_vp9.o
+		hantro_vp9.o \
+		hantro_av1.o \
+		hantro_av1_filmgrain.o \
+		hantro_av1_entropymode.o
 
 hantro-vpu-$(CONFIG_VIDEO_HANTRO_IMX8M) += \
 		imx8m_vpu_hw.o
@@ -33,8 +36,6 @@ hantro-vpu-$(CONFIG_VIDEO_HANTRO_ROCKCHIP) += \
 		rockchip_vpu2_hw_mpeg2_dec.o \
 		rockchip_vpu2_hw_vp8_dec.o \
 		rockchip_vpu981_hw_av1_dec.o \
-		rockchip_av1_filmgrain.o \
-		rockchip_av1_entropymode.o \
 		rockchip_vpu_hw.o
 
 hantro-vpu-$(CONFIG_VIDEO_HANTRO_SUNXI) += \
diff --git a/drivers/media/platform/verisilicon/hantro_av1.c b/drivers/media/platform/verisilicon/hantro_av1.c
new file mode 100644
index 000000000000..5a51ac877c9c
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_av1.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2026, Collabora
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ */
+
+#include <linux/types.h>
+#include <media/v4l2-h264.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro.h"
+#include "hantro_av1.h"
+#include "hantro_hw.h"
+
+#define GM_GLOBAL_MODELS_PER_FRAME	7
+#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
+#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
+#define AV1_MAX_TILES		128
+#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
+#define AV1_INVALID_IDX		-1
+#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
+
+#define SUPERRES_SCALE_BITS 3
+
+#define DIV_LUT_PREC_BITS 14
+#define DIV_LUT_BITS 8
+#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
+#define WARP_PARAM_REDUCE_BITS 6
+#define WARPEDMODEL_PREC_BITS 16
+
+#define AV1_DIV_ROUND_UP_POW2(value, n)			\
+({							\
+	typeof(n) _n  = n;				\
+	typeof(value) _value = value;			\
+	(_value + (BIT(_n) >> 1)) >> _n;		\
+})
+
+#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
+({									\
+	typeof(n) _n_  = n;						\
+	typeof(value) _value_ = value;					\
+	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
+		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
+})
+
+static const short div_lut[DIV_LUT_NUM + 1] = {
+	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
+	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
+	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
+	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
+	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
+	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
+	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
+	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
+	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
+	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
+	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
+	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
+	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
+	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
+	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
+	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
+	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
+	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
+	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
+	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
+	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
+	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
+	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
+	8240,  8224,  8208,  8192,
+};
+
+enum hantro_av1_tx_mode {
+	HANTRO_AV1_TX_MODE_ONLY_4X4	= 0,
+	HANTRO_AV1_TX_MODE_8X8		= 1,
+	HANTRO_AV1_TX_MODE_16x16	= 2,
+	HANTRO_AV1_TX_MODE_32x32	= 3,
+	HANTRO_AV1_TX_MODE_SELECT	= 4,
+};
+
+enum hantro_av1_inter_prediction_filter_type {
+	HANTRO_AV1_EIGHT_TAP_SMOOTH	= 0,
+	HANTRO_AV1_EIGHT_TAP		= 1,
+	HANTRO_AV1_EIGHT_TAP_SHARP	= 2,
+	HANTRO_AV1_BILINEAR		= 3,
+	HANTRO_AV1_SWITCHABLE		= 4,
+};
+
+int hantro_av1_get_hardware_tx_mode(enum v4l2_av1_tx_mode tx_mode)
+{
+	switch (tx_mode) {
+	case V4L2_AV1_TX_MODE_ONLY_4X4:
+		return HANTRO_AV1_TX_MODE_ONLY_4X4;
+	case V4L2_AV1_TX_MODE_LARGEST:
+		return HANTRO_AV1_TX_MODE_32x32;
+	case V4L2_AV1_TX_MODE_SELECT:
+		return HANTRO_AV1_TX_MODE_SELECT;
+	}
+
+	return HANTRO_AV1_TX_MODE_32x32;
+}
+
+int hantro_av1_get_hardware_mcomp_filt_type(int interpolation_filter)
+{
+	switch (interpolation_filter) {
+	case V4L2_AV1_INTERPOLATION_FILTER_EIGHTTAP:
+		return HANTRO_AV1_EIGHT_TAP;
+	case V4L2_AV1_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH:
+		return HANTRO_AV1_EIGHT_TAP_SMOOTH;
+	case V4L2_AV1_INTERPOLATION_FILTER_EIGHTTAP_SHARP:
+		return HANTRO_AV1_EIGHT_TAP_SHARP;
+	case V4L2_AV1_INTERPOLATION_FILTER_BILINEAR:
+		return HANTRO_AV1_BILINEAR;
+	case V4L2_AV1_INTERPOLATION_FILTER_SWITCHABLE:
+		return HANTRO_AV1_SWITCHABLE;
+	}
+
+	return HANTRO_AV1_EIGHT_TAP_SMOOTH;
+}
+
+int hantro_av1_get_frame_index(struct hantro_ctx *ctx, int ref)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	u64 timestamp;
+	int i, idx = frame->ref_frame_idx[ref];
+
+	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
+		return AV1_INVALID_IDX;
+
+	timestamp = frame->reference_frame_ts[idx];
+	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+		if (!av1_dec->frame_refs[i].used)
+			continue;
+		if (av1_dec->frame_refs[i].timestamp == timestamp)
+			return i;
+	}
+
+	return AV1_INVALID_IDX;
+}
+
+int hantro_av1_get_order_hint(struct hantro_ctx *ctx, int ref)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	int idx = hantro_av1_get_frame_index(ctx, ref);
+
+	if (idx != AV1_INVALID_IDX)
+		return av1_dec->frame_refs[idx].order_hint;
+
+	return 0;
+}
+
+int hantro_av1_frame_ref(struct hantro_ctx *ctx, u64 timestamp)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	int i;
+
+	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
+		int j;
+
+		if (av1_dec->frame_refs[i].used)
+			continue;
+
+		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
+		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
+		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
+		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
+		av1_dec->frame_refs[i].timestamp = timestamp;
+		av1_dec->frame_refs[i].frame_type = frame->frame_type;
+		av1_dec->frame_refs[i].order_hint = frame->order_hint;
+		av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
+
+		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
+			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
+		av1_dec->frame_refs[i].used = true;
+		av1_dec->current_frame_index = i;
+
+		return i;
+	}
+
+	return AV1_INVALID_IDX;
+}
+
+static void hantro_av1_frame_unref(struct hantro_ctx *ctx, int idx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	if (idx >= 0)
+		av1_dec->frame_refs[idx].used = false;
+}
+
+void hantro_av1_clean_refs(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+	int ref, idx;
+
+	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
+		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
+		bool used = false;
+
+		if (!av1_dec->frame_refs[idx].used)
+			continue;
+
+		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
+			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
+				used = true;
+		}
+
+		if (!used)
+			hantro_av1_frame_unref(ctx, idx);
+	}
+}
+
+size_t hantro_av1_luma_size(struct hantro_ctx *ctx)
+{
+	return ctx->ref_fmt.plane_fmt[0].bytesperline * ctx->ref_fmt.height;
+}
+
+size_t hantro_av1_chroma_size(struct hantro_ctx *ctx)
+{
+	size_t cr_offset = hantro_av1_luma_size(ctx);
+
+	return ALIGN((cr_offset * 3) / 2, 64);
+}
+
+static void hantro_av1_tiles_free(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	if (av1_dec->db_data_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
+				  av1_dec->db_data_col.cpu,
+				  av1_dec->db_data_col.dma);
+	av1_dec->db_data_col.cpu = NULL;
+
+	if (av1_dec->db_ctrl_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
+				  av1_dec->db_ctrl_col.cpu,
+				  av1_dec->db_ctrl_col.dma);
+	av1_dec->db_ctrl_col.cpu = NULL;
+
+	if (av1_dec->cdef_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
+				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
+	av1_dec->cdef_col.cpu = NULL;
+
+	if (av1_dec->sr_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
+				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
+	av1_dec->sr_col.cpu = NULL;
+
+	if (av1_dec->lr_col.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
+				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
+	av1_dec->lr_col.cpu = NULL;
+}
+
+static int hantro_av1_tiles_reallocate(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
+	unsigned int num_tile_cols = tile_info->tile_cols;
+	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
+	unsigned int height_in_sb = height / 64;
+	unsigned int stripe_num = ((height + 8) + 63) / 64;
+	size_t size;
+
+	if (av1_dec->db_data_col.size >=
+	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
+		return 0;
+
+	hantro_av1_tiles_free(ctx);
+
+	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
+	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						      &av1_dec->db_data_col.dma,
+						      GFP_KERNEL);
+	if (!av1_dec->db_data_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->db_data_col.size = size;
+
+	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
+	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						      &av1_dec->db_ctrl_col.dma,
+						      GFP_KERNEL);
+	if (!av1_dec->db_ctrl_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->db_ctrl_col.size = size;
+
+	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
+	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						   &av1_dec->cdef_col.dma,
+						   GFP_KERNEL);
+	if (!av1_dec->cdef_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->cdef_col.size = size;
+
+	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
+	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						 &av1_dec->sr_col.dma,
+						 GFP_KERNEL);
+	if (!av1_dec->sr_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->sr_col.size = size;
+
+	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
+	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
+						 &av1_dec->lr_col.dma,
+						 GFP_KERNEL);
+	if (!av1_dec->lr_col.cpu)
+		goto buffer_allocation_error;
+	av1_dec->lr_col.size = size;
+
+	av1_dec->num_tile_cols_allocated = num_tile_cols;
+	return 0;
+
+buffer_allocation_error:
+	hantro_av1_tiles_free(ctx);
+	return -ENOMEM;
+}
+
+void hantro_av1_exit(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	if (av1_dec->global_model.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
+				  av1_dec->global_model.cpu,
+				  av1_dec->global_model.dma);
+	av1_dec->global_model.cpu = NULL;
+
+	if (av1_dec->tile_info.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
+				  av1_dec->tile_info.cpu,
+				  av1_dec->tile_info.dma);
+	av1_dec->tile_info.cpu = NULL;
+
+	if (av1_dec->film_grain.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
+				  av1_dec->film_grain.cpu,
+				  av1_dec->film_grain.dma);
+	av1_dec->film_grain.cpu = NULL;
+
+	if (av1_dec->prob_tbl.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
+				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
+	av1_dec->prob_tbl.cpu = NULL;
+
+	if (av1_dec->prob_tbl_out.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
+				  av1_dec->prob_tbl_out.cpu,
+				  av1_dec->prob_tbl_out.dma);
+	av1_dec->prob_tbl_out.cpu = NULL;
+
+	if (av1_dec->tile_buf.cpu)
+		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
+				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
+	av1_dec->tile_buf.cpu = NULL;
+
+	hantro_av1_tiles_free(ctx);
+}
+
+int hantro_av1_init(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+
+	memset(av1_dec, 0, sizeof(*av1_dec));
+
+	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
+						       &av1_dec->global_model.dma,
+						       GFP_KERNEL);
+	if (!av1_dec->global_model.cpu)
+		return -ENOMEM;
+	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
+
+	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_TILE_INFO_SIZE,
+						    &av1_dec->tile_info.dma,
+						    GFP_KERNEL);
+	if (!av1_dec->tile_info.cpu)
+		return -ENOMEM;
+	av1_dec->tile_info.size = AV1_TILE_INFO_SIZE;
+
+	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
+						     ALIGN(sizeof(struct hantro_av1_film_grain), 2048),
+						     &av1_dec->film_grain.dma,
+						     GFP_KERNEL);
+	if (!av1_dec->film_grain.cpu)
+		return -ENOMEM;
+	av1_dec->film_grain.size = ALIGN(sizeof(struct hantro_av1_film_grain), 2048);
+
+	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
+						   ALIGN(sizeof(struct av1cdfs), 2048),
+						   &av1_dec->prob_tbl.dma,
+						   GFP_KERNEL);
+	if (!av1_dec->prob_tbl.cpu)
+		return -ENOMEM;
+	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
+
+	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
+						       ALIGN(sizeof(struct av1cdfs), 2048),
+						       &av1_dec->prob_tbl_out.dma,
+						       GFP_KERNEL);
+	if (!av1_dec->prob_tbl_out.cpu)
+		return -ENOMEM;
+	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
+	av1_dec->cdfs = &av1_dec->default_cdfs;
+	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
+
+	hantro_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
+
+	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
+						   AV1_TILE_SIZE,
+						   &av1_dec->tile_buf.dma,
+						   GFP_KERNEL);
+	if (!av1_dec->tile_buf.cpu)
+		return -ENOMEM;
+	av1_dec->tile_buf.size = AV1_TILE_SIZE;
+
+	return 0;
+}
+
+int hantro_av1_prepare_run(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+
+	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
+	if (WARN_ON(!ctrls->sequence))
+		return -EINVAL;
+
+	ctrls->tile_group_entry =
+	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
+	if (WARN_ON(!ctrls->tile_group_entry))
+		return -EINVAL;
+
+	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
+	if (WARN_ON(!ctrls->frame))
+		return -EINVAL;
+
+	ctrls->film_grain =
+	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
+
+	return hantro_av1_tiles_reallocate(ctx);
+}
+
+static int hantro_av1_get_msb(u32 n)
+{
+	if (n == 0)
+		return 0;
+	return 31 ^ __builtin_clz(n);
+}
+
+static short hantro_av1_resolve_divisor_32(u32 d, short *shift)
+{
+	int f;
+	u64 e;
+
+	*shift = hantro_av1_get_msb(d);
+	/* e is obtained from D after resetting the most significant 1 bit. */
+	e = d - ((u32)1 << *shift);
+	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
+	if (*shift > DIV_LUT_BITS)
+		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
+	else
+		f = e << (DIV_LUT_BITS - *shift);
+	if (f > DIV_LUT_NUM)
+		return -1;
+	*shift += DIV_LUT_PREC_BITS;
+	/* Use f as lookup into the precomputed table of multipliers */
+	return div_lut[f];
+}
+
+static void hantro_av1_get_shear_params(const u32 *params, s64 *alpha,
+					s64 *beta, s64 *gamma, s64 *delta)
+{
+	const int *mat = params;
+	short shift;
+	short y;
+	long long gv, dv;
+
+	if (mat[2] <= 0)
+		return;
+
+	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
+	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
+
+	y = hantro_av1_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
+
+	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
+
+	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
+
+	dv = ((long long)mat[3] * mat[4]) * y;
+	*delta = clamp_val(mat[5] -
+		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
+		S16_MIN, S16_MAX);
+
+	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
+		 * (1 << WARP_PARAM_REDUCE_BITS);
+	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
+		* (1 << WARP_PARAM_REDUCE_BITS);
+	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
+		 * (1 << WARP_PARAM_REDUCE_BITS);
+	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
+		* (1 << WARP_PARAM_REDUCE_BITS);
+}
+
+void hantro_av1_set_global_model(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
+	u8 *dst = av1_dec->global_model.cpu;
+	int ref_frame, i;
+
+	memset(dst, 0, GLOBAL_MODEL_SIZE);
+	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
+		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
+
+		for (i = 0; i < 6; ++i) {
+			if (i == 2)
+				*(s32 *)dst =
+					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
+			else if (i == 3)
+				*(s32 *)dst =
+					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
+			else
+				*(s32 *)dst =
+					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
+			dst += 4;
+		}
+
+		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
+			hantro_av1_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
+						    &alpha, &beta, &gamma, &delta);
+
+		*(s16 *)dst = alpha;
+		dst += 2;
+		*(s16 *)dst = beta;
+		dst += 2;
+		*(s16 *)dst = gamma;
+		dst += 2;
+		*(s16 *)dst = delta;
+		dst += 2;
+	}
+}
+
+int hantro_av1_tile_log2(int target)
+{
+	int k;
+
+	/*
+	 * returns the smallest value for k such that 1 << k is greater
+	 * than or equal to target
+	 */
+	for (k = 0; (1 << k) < target; k++);
+
+	return k;
+}
+
+int hantro_av1_get_dist(struct hantro_ctx *ctx, int a, int b)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	int bits = ctrls->sequence->order_hint_bits - 1;
+	int diff, m;
+
+	if (!ctrls->sequence->order_hint_bits)
+		return 0;
+
+	diff = a - b;
+	m = 1 << bits;
+	diff = (diff & (m - 1)) - (diff & m);
+
+	return diff;
+}
+
+void hantro_av1_set_frame_sign_bias(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
+	int i;
+
+	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
+		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
+			av1_dec->ref_frame_sign_bias[i] = 0;
+
+		return;
+	}
+	// Identify the nearest forward and backward references.
+	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
+		if (hantro_av1_get_frame_index(ctx, i) >= 0) {
+			int rel_off =
+			    hantro_av1_get_dist(ctx,
+						hantro_av1_get_order_hint(ctx, i),
+						frame->order_hint);
+			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
+		}
+	}
+}
+
+void hantro_av1_init_scaling_function(const u8 *values, const u8 *scaling,
+				      u8 num_points, u8 *scaling_lut)
+{
+	int i, point;
+
+	if (num_points == 0) {
+		memset(scaling_lut, 0, 256);
+		return;
+	}
+
+	for (point = 0; point < num_points - 1; point++) {
+		int x;
+		s32 delta_y = scaling[point + 1] - scaling[point];
+		s32 delta_x = values[point + 1] - values[point];
+		s64 delta =
+		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
+					 delta_x) : 0;
+
+		for (x = 0; x < delta_x; x++) {
+			scaling_lut[values[point] + x] =
+			    scaling[point] +
+			    (s32)((x * delta + 32768) >> 16);
+		}
+	}
+
+	for (i = values[num_points - 1]; i < 256; i++)
+		scaling_lut[i] = scaling[num_points - 1];
+}
+
+void hantro_av1_set_tile_info(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
+	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
+	    ctrls->tile_group_entry;
+	u8 *dst = av1_dec->tile_info.cpu;
+	int tile0, tile1;
+
+	memset(dst, 0, av1_dec->tile_info.size);
+
+	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
+		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
+			int tile_id = tile1 * tile_info->tile_cols + tile0;
+			u32 start, end;
+			u32 y0 =
+			    tile_info->height_in_sbs_minus_1[tile1] + 1;
+			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
+
+			/* tile size in SB units (width,height) */
+			*dst++ = x0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = y0;
+			*dst++ = 0;
+			*dst++ = 0;
+			*dst++ = 0;
+
+			/* tile start position */
+			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
+			*dst++ = start & 255;
+			*dst++ = (start >> 8) & 255;
+			*dst++ = (start >> 16) & 255;
+			*dst++ = (start >> 24) & 255;
+
+			/* number of bytes in tile data */
+			end = start + group_entry[tile_id].tile_size;
+			*dst++ = end & 255;
+			*dst++ = (end >> 8) & 255;
+			*dst++ = (end >> 16) & 255;
+			*dst++ = (end >> 24) & 255;
+		}
+	}
+}
+
+bool hantro_av1_is_lossless(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
+	const struct v4l2_av1_quantization *quantization = &frame->quantization;
+	int i;
+
+	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
+		int qindex = quantization->base_q_idx;
+
+		if (segmentation->feature_enabled[i] &
+		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
+			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
+		}
+		qindex = clamp(qindex, 0, 255);
+
+		if (qindex ||
+		    quantization->delta_q_y_dc ||
+		    quantization->delta_q_u_dc ||
+		    quantization->delta_q_u_ac ||
+		    quantization->delta_q_v_dc ||
+		    quantization->delta_q_v_ac)
+			return false;
+	}
+
+	return true;
+}
+
+void hantro_av1_update_prob(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	bool frame_is_intra = IS_INTRA(frame->frame_type);
+	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
+	int i;
+
+	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
+		return;
+
+	for (i = 0; i < NUM_REF_FRAMES; i++) {
+		if (frame->refresh_frame_flags & BIT(i)) {
+			struct mvcdfs stored_mv_cdf;
+
+			hantro_av1_get_cdfs(ctx, i);
+			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
+			*av1_dec->cdfs = *out_cdfs;
+			if (frame_is_intra) {
+				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
+				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
+			}
+			hantro_av1_store_cdfs(ctx, frame->refresh_frame_flags);
+			break;
+		}
+	}
+}
+
+void hantro_av1_set_prob(struct hantro_ctx *ctx)
+{
+	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
+	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
+	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
+	const struct v4l2_av1_quantization *quantization = &frame->quantization;
+	bool error_resilient_mode =
+	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
+	bool frame_is_intra = IS_INTRA(frame->frame_type);
+
+	if (error_resilient_mode || frame_is_intra ||
+	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
+		av1_dec->cdfs = &av1_dec->default_cdfs;
+		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
+		hantro_av1_default_coeff_probs(quantization->base_q_idx,
+					       av1_dec->cdfs);
+	} else {
+		hantro_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
+	}
+	hantro_av1_store_cdfs(ctx, frame->refresh_frame_flags);
+
+	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
+
+	if (frame_is_intra) {
+		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
+		/* Overwrite MV context area with intrabc MV context */
+		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
+		       sizeof(struct mvcdfs));
+	}
+}
diff --git a/drivers/media/platform/verisilicon/hantro_av1.h b/drivers/media/platform/verisilicon/hantro_av1.h
new file mode 100644
index 000000000000..4e2122b95cdd
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_av1.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _HANTRO_AV1_H_
+#define _HANTRO_AV1_H_
+
+#define AV1_PRIMARY_REF_NONE	7
+#define AV1_REF_SCALE_SHIFT	14
+#define MAX_FRAME_DISTANCE	31
+#define AV1DEC_MAX_PIC_BUFFERS	24
+
+#define SCALE_NUMERATOR 8
+#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
+
+#define RS_SCALE_SUBPEL_BITS 14
+#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
+#define RS_SUBPEL_BITS 6
+#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
+#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
+#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
+
+/*
+ * These 3 values aren't defined enum v4l2_av1_segment_feature because
+ * they are not part of the specification
+ */
+#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
+#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
+#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
+
+#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
+
+#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
+#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
+
+int hantro_av1_get_frame_index(struct hantro_ctx *ctx, int ref);
+int hantro_av1_get_order_hint(struct hantro_ctx *ctx, int ref);
+int hantro_av1_frame_ref(struct hantro_ctx *ctx, u64 timestamp);
+void hantro_av1_clean_refs(struct hantro_ctx *ctx);
+size_t hantro_av1_luma_size(struct hantro_ctx *ctx);
+size_t hantro_av1_chroma_size(struct hantro_ctx *ctx);
+void hantro_av1_exit(struct hantro_ctx *ctx);
+int hantro_av1_init(struct hantro_ctx *ctx);
+int hantro_av1_prepare_run(struct hantro_ctx *ctx);
+void hantro_av1_set_global_model(struct hantro_ctx *ctx);
+int hantro_av1_tile_log2(int target);
+int hantro_av1_get_dist(struct hantro_ctx *ctx, int a, int b);
+void hantro_av1_set_frame_sign_bias(struct hantro_ctx *ctx);
+void hantro_av1_init_scaling_function(const u8 *values, const u8 *scaling,
+				      u8 num_points, u8 *scaling_lut);
+void hantro_av1_set_tile_info(struct hantro_ctx *ctx);
+bool hantro_av1_is_lossless(struct hantro_ctx *ctx);
+void hantro_av1_update_prob(struct hantro_ctx *ctx);
+void hantro_av1_set_prob(struct hantro_ctx *ctx);
+
+int hantro_av1_get_hardware_mcomp_filt_type(int interpolation_filter);
+int hantro_av1_get_hardware_tx_mode(enum v4l2_av1_tx_mode tx_mode);
+
+#endif
diff --git a/drivers/media/platform/verisilicon/rockchip_av1_entropymode.c b/drivers/media/platform/verisilicon/hantro_av1_entropymode.c
similarity index 99%
rename from drivers/media/platform/verisilicon/rockchip_av1_entropymode.c
rename to drivers/media/platform/verisilicon/hantro_av1_entropymode.c
index b1ae72ad675e..4f7bfec73668 100644
--- a/drivers/media/platform/verisilicon/rockchip_av1_entropymode.c
+++ b/drivers/media/platform/verisilicon/hantro_av1_entropymode.c
@@ -11,7 +11,7 @@
  */
 
 #include "hantro.h"
-#include "rockchip_av1_entropymode.h"
+#include "hantro_av1_entropymode.h"
 
 #define AOM_ICDF ICDF
 #define AOM_CDF2(a0) AOM_ICDF(a0)
@@ -4195,7 +4195,7 @@ static const u16 default_bits_cdf[][10] = {
 	}
 };
 
-static int rockchip_av1_get_q_ctx(int q)
+static int hantro_av1_get_q_ctx(int q)
 {
 	if (q <= 20)
 		return 0;
@@ -4206,10 +4206,10 @@ static int rockchip_av1_get_q_ctx(int q)
 	return 3;
 }
 
-void rockchip_av1_default_coeff_probs(u32 base_qindex, void *ptr)
+void hantro_av1_default_coeff_probs(u32 base_qindex, void *ptr)
 {
 	struct av1cdfs *cdfs = (struct av1cdfs *)ptr;
-	const int index = rockchip_av1_get_q_ctx(base_qindex);
+	const int index = hantro_av1_get_q_ctx(base_qindex);
 
 	memcpy(cdfs->txb_skip_cdf, av1_default_txb_skip_cdfs[index],
 	       sizeof(av1_default_txb_skip_cdfs[0]));
@@ -4240,8 +4240,8 @@ void rockchip_av1_default_coeff_probs(u32 base_qindex, void *ptr)
 	       sizeof(av1_default_eob_multi1024_cdfs[0]));
 }
 
-void rockchip_av1_set_default_cdfs(struct av1cdfs *cdfs,
-				   struct mvcdfs *cdfs_ndvc)
+void hantro_av1_set_default_cdfs(struct av1cdfs *cdfs,
+				 struct mvcdfs *cdfs_ndvc)
 {
 	memcpy(cdfs->partition_cdf, default_partition_cdf,
 	       sizeof(cdfs->partition_cdf));
@@ -4398,7 +4398,7 @@ void rockchip_av1_set_default_cdfs(struct av1cdfs *cdfs,
 	       sizeof(cdfs->compound_idx_cdf));
 }
 
-void rockchip_av1_get_cdfs(struct hantro_ctx *ctx, u32 ref_idx)
+void hantro_av1_get_cdfs(struct hantro_ctx *ctx, u32 ref_idx)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
 
@@ -4406,8 +4406,8 @@ void rockchip_av1_get_cdfs(struct hantro_ctx *ctx, u32 ref_idx)
 	av1_dec->cdfs_ndvc = &av1_dec->cdfs_last_ndvc[ref_idx];
 }
 
-void rockchip_av1_store_cdfs(struct hantro_ctx *ctx,
-			     u32 refresh_frame_flags)
+void hantro_av1_store_cdfs(struct hantro_ctx *ctx,
+			   u32 refresh_frame_flags)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
 	int i;
diff --git a/drivers/media/platform/verisilicon/rockchip_av1_entropymode.h b/drivers/media/platform/verisilicon/hantro_av1_entropymode.h
similarity index 95%
rename from drivers/media/platform/verisilicon/rockchip_av1_entropymode.h
rename to drivers/media/platform/verisilicon/hantro_av1_entropymode.h
index bbf8424c7d2c..abbc660ecce3 100644
--- a/drivers/media/platform/verisilicon/rockchip_av1_entropymode.h
+++ b/drivers/media/platform/verisilicon/hantro_av1_entropymode.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 
-#ifndef _ROCKCHIP_AV1_ENTROPYMODE_H_
-#define _ROCKCHIP_AV1_ENTROPYMODE_H_
+#ifndef _HANTRO_AV1_ENTROPYMODE_H_
+#define _HANTRO_AV1_ENTROPYMODE_H_
 
 #include <linux/types.h>
 
@@ -262,11 +262,11 @@ struct av1cdfs {
 	u16 dummy3[16];
 };
 
-void rockchip_av1_store_cdfs(struct hantro_ctx *ctx,
-			     u32 refresh_frame_flags);
-void rockchip_av1_get_cdfs(struct hantro_ctx *ctx, u32 ref_idx);
-void rockchip_av1_set_default_cdfs(struct av1cdfs *cdfs,
-				   struct mvcdfs *cdfs_ndvc);
-void rockchip_av1_default_coeff_probs(u32 base_qindex, void *ptr);
+void hantro_av1_store_cdfs(struct hantro_ctx *ctx,
+			   u32 refresh_frame_flags);
+void hantro_av1_get_cdfs(struct hantro_ctx *ctx, u32 ref_idx);
+void hantro_av1_set_default_cdfs(struct av1cdfs *cdfs,
+				 struct mvcdfs *cdfs_ndvc);
+void hantro_av1_default_coeff_probs(u32 base_qindex, void *ptr);
 
-#endif /* _ROCKCHIP_AV1_ENTROPYMODE_H_ */
+#endif /* _HANTRO_AV1_ENTROPYMODE_H_ */
diff --git a/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c b/drivers/media/platform/verisilicon/hantro_av1_filmgrain.c
similarity index 92%
rename from drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
rename to drivers/media/platform/verisilicon/hantro_av1_filmgrain.c
index f64dea797eff..06a21974e24e 100644
--- a/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.c
+++ b/drivers/media/platform/verisilicon/hantro_av1_filmgrain.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0
 
-#include "rockchip_av1_filmgrain.h"
+#include "hantro_av1_filmgrain.h"
 
 static const s32 gaussian_sequence[2048] = {
 	56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
@@ -204,8 +204,8 @@ static inline s32 round_power_of_two(const s32 val, s32 n)
 	return (val + a) >> n;
 }
 
-static void rockchip_av1_init_random_generator(u8 luma_num, u16 seed,
-					       u16 *random_register)
+static void hantro_av1_init_random_generator(u8 luma_num, u16 seed,
+					     u16 *random_register)
 {
 	u16 random_reg = seed;
 
@@ -214,7 +214,7 @@ static void rockchip_av1_init_random_generator(u8 luma_num, u16 seed,
 	*random_register = random_reg;
 }
 
-static inline void rockchip_av1_update_random_register(u16 *random_register)
+static inline void hantro_av1_update_random_register(u16 *random_register)
 {
 	u16 bit;
 	u16 random_reg = *random_register;
@@ -224,21 +224,21 @@ static inline void rockchip_av1_update_random_register(u16 *random_register)
 	*random_register = (random_reg >> 1) | (bit << 15);
 }
 
-static inline s32 rockchip_av1_get_random_number(u16 random_register)
+static inline s32 hantro_av1_get_random_number(u16 random_register)
 {
 	return (random_register >> 5) & ((1 << 11) - 1);
 }
 
-void rockchip_av1_generate_luma_grain_block(s32 (*luma_grain_block)[73][82],
-					    s32 bitdepth,
-					    u8 num_y_points,
-					    s32 grain_scale_shift,
-					    s32 ar_coeff_lag,
-					    s32 (*ar_coeffs_y)[24],
-					    s32 ar_coeff_shift,
-					    s32 grain_min,
-					    s32 grain_max,
-					    u16 random_seed)
+void hantro_av1_generate_luma_grain_block(s32 (*luma_grain_block)[73][82],
+					  s32 bitdepth,
+					  u8 num_y_points,
+					  s32 grain_scale_shift,
+					  s32 ar_coeff_lag,
+					  s32 (*ar_coeffs_y)[24],
+					  s32 ar_coeff_shift,
+					  s32 grain_min,
+					  s32 grain_max,
+					  u16 random_seed)
 {
 	s32 gauss_sec_shift = 12 - bitdepth + grain_scale_shift;
 	u16 grain_random_register = random_seed;
@@ -247,11 +247,11 @@ void rockchip_av1_generate_luma_grain_block(s32 (*luma_grain_block)[73][82],
 	for (i = 0; i < 73; i++) {
 		for (j = 0; j < 82; j++) {
 			if (num_y_points > 0) {
-				rockchip_av1_update_random_register
+				hantro_av1_update_random_register
 				    (&grain_random_register);
 				(*luma_grain_block)[i][j] =
 				    round_power_of_two(gaussian_sequence
-					     [rockchip_av1_get_random_number
+					     [hantro_av1_get_random_number
 					      (grain_random_register)],
 					     gauss_sec_shift);
 			} else {
@@ -285,37 +285,37 @@ void rockchip_av1_generate_luma_grain_block(s32 (*luma_grain_block)[73][82],
 }
 
 // Calculate chroma grain noise once per frame
-void rockchip_av1_generate_chroma_grain_block(s32 (*luma_grain_block)[73][82],
-					      s32 (*cb_grain_block)[38][44],
-					      s32 (*cr_grain_block)[38][44],
-					      s32 bitdepth,
-					      u8 num_y_points,
-					      u8 num_cb_points,
-					      u8 num_cr_points,
-					      s32 grain_scale_shift,
-					      s32 ar_coeff_lag,
-					      s32 (*ar_coeffs_cb)[25],
-					      s32 (*ar_coeffs_cr)[25],
-					      s32 ar_coeff_shift,
-					      s32 grain_min,
-					      s32 grain_max,
-					      u8 chroma_scaling_from_luma,
-					      u16 random_seed)
+void hantro_av1_generate_chroma_grain_block(s32 (*luma_grain_block)[73][82],
+					    s32 (*cb_grain_block)[38][44],
+					    s32 (*cr_grain_block)[38][44],
+					    s32 bitdepth,
+					    u8 num_y_points,
+					    u8 num_cb_points,
+					    u8 num_cr_points,
+					    s32 grain_scale_shift,
+					    s32 ar_coeff_lag,
+					    s32 (*ar_coeffs_cb)[25],
+					    s32 (*ar_coeffs_cr)[25],
+					    s32 ar_coeff_shift,
+					    s32 grain_min,
+					    s32 grain_max,
+					    u8 chroma_scaling_from_luma,
+					    u16 random_seed)
 {
 	s32 gauss_sec_shift = 12 - bitdepth + grain_scale_shift;
 	u16 grain_random_register = 0;
 	s32 i, j;
 
-	rockchip_av1_init_random_generator(7, random_seed,
-					   &grain_random_register);
+	hantro_av1_init_random_generator(7, random_seed,
+					 &grain_random_register);
 	for (i = 0; i < 38; i++) {
 		for (j = 0; j < 44; j++) {
 			if (num_cb_points || chroma_scaling_from_luma) {
-				rockchip_av1_update_random_register
+				hantro_av1_update_random_register
 				    (&grain_random_register);
 				(*cb_grain_block)[i][j] =
 				    round_power_of_two(gaussian_sequence
-					     [rockchip_av1_get_random_number
+					     [hantro_av1_get_random_number
 					      (grain_random_register)],
 					     gauss_sec_shift);
 			} else {
@@ -324,16 +324,16 @@ void rockchip_av1_generate_chroma_grain_block(s32 (*luma_grain_block)[73][82],
 		}
 	}
 
-	rockchip_av1_init_random_generator(11, random_seed,
-					   &grain_random_register);
+	hantro_av1_init_random_generator(11, random_seed,
+					 &grain_random_register);
 	for (i = 0; i < 38; i++) {
 		for (j = 0; j < 44; j++) {
 			if (num_cr_points || chroma_scaling_from_luma) {
-				rockchip_av1_update_random_register
+				hantro_av1_update_random_register
 				    (&grain_random_register);
 				(*cr_grain_block)[i][j] =
 				    round_power_of_two(gaussian_sequence
-					     [rockchip_av1_get_random_number
+					     [hantro_av1_get_random_number
 					      (grain_random_register)],
 					     gauss_sec_shift);
 			} else {
diff --git a/drivers/media/platform/verisilicon/hantro_av1_filmgrain.h b/drivers/media/platform/verisilicon/hantro_av1_filmgrain.h
new file mode 100644
index 000000000000..5593e84114d0
--- /dev/null
+++ b/drivers/media/platform/verisilicon/hantro_av1_filmgrain.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _HANTRO_AV1_FILMGRAIN_H_
+#define _HANTRO_AV1_FILMGRAIN_H_
+
+#include <linux/types.h>
+
+struct hantro_av1_film_grain {
+	u8 scaling_lut_y[256];
+	u8 scaling_lut_cb[256];
+	u8 scaling_lut_cr[256];
+	s16 cropped_luma_grain_block[4096];
+	s16 cropped_chroma_grain_block[1024 * 2];
+};
+
+void hantro_av1_generate_luma_grain_block(s32 (*luma_grain_block)[73][82],
+					  s32 bitdepth,
+					  u8 num_y_points,
+					  s32 grain_scale_shift,
+					  s32 ar_coeff_lag,
+					  s32 (*ar_coeffs_y)[24],
+					  s32 ar_coeff_shift,
+					  s32 grain_min,
+					  s32 grain_max,
+					  u16 random_seed);
+
+void hantro_av1_generate_chroma_grain_block(s32 (*luma_grain_block)[73][82],
+					    s32 (*cb_grain_block)[38][44],
+					    s32 (*cr_grain_block)[38][44],
+					    s32 bitdepth,
+					    u8 num_y_points,
+					    u8 num_cb_points,
+					    u8 num_cr_points,
+					    s32 grain_scale_shift,
+					    s32 ar_coeff_lag,
+					    s32 (*ar_coeffs_cb)[25],
+					    s32 (*ar_coeffs_cr)[25],
+					    s32 ar_coeff_shift,
+					    s32 grain_min,
+					    s32 grain_max,
+					    u8 chroma_scaling_from_luma,
+					    u16 random_seed);
+
+#endif
diff --git a/drivers/media/platform/verisilicon/hantro_hw.h b/drivers/media/platform/verisilicon/hantro_hw.h
index 5f2011529f02..d1d39d1df5d2 100644
--- a/drivers/media/platform/verisilicon/hantro_hw.h
+++ b/drivers/media/platform/verisilicon/hantro_hw.h
@@ -15,8 +15,8 @@
 #include <media/v4l2-vp9.h>
 #include <media/videobuf2-core.h>
 
-#include "rockchip_av1_entropymode.h"
-#include "rockchip_av1_filmgrain.h"
+#include "hantro_av1_entropymode.h"
+#include "hantro_av1_filmgrain.h"
 
 #define DEC_8190_ALIGN_MASK	0x07U
 
@@ -459,10 +459,7 @@ void hantro_hevc_ref_init(struct hantro_ctx *ctx);
 dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc);
 int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr);
 
-int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx);
-void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx);
 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx);
-void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx);
 
 static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension)
 {
diff --git a/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h b/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
deleted file mode 100644
index 31a8b7920c31..000000000000
--- a/drivers/media/platform/verisilicon/rockchip_av1_filmgrain.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#ifndef _ROCKCHIP_AV1_FILMGRAIN_H_
-#define _ROCKCHIP_AV1_FILMGRAIN_H_
-
-#include <linux/types.h>
-
-void rockchip_av1_generate_luma_grain_block(s32 (*luma_grain_block)[73][82],
-					    s32 bitdepth,
-					    u8 num_y_points,
-					    s32 grain_scale_shift,
-					    s32 ar_coeff_lag,
-					    s32 (*ar_coeffs_y)[24],
-					    s32 ar_coeff_shift,
-					    s32 grain_min,
-					    s32 grain_max,
-					    u16 random_seed);
-
-void rockchip_av1_generate_chroma_grain_block(s32 (*luma_grain_block)[73][82],
-					      s32 (*cb_grain_block)[38][44],
-					      s32 (*cr_grain_block)[38][44],
-					      s32 bitdepth,
-					      u8 num_y_points,
-					      u8 num_cb_points,
-					      u8 num_cr_points,
-					      s32 grain_scale_shift,
-					      s32 ar_coeff_lag,
-					      s32 (*ar_coeffs_cb)[25],
-					      s32 (*ar_coeffs_cr)[25],
-					      s32 ar_coeff_shift,
-					      s32 grain_min,
-					      s32 grain_max,
-					      u8 chroma_scaling_from_luma,
-					      u16 random_seed);
-
-#endif
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
index e4e21ad37323..990a5e6b5531 100644
--- a/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
+++ b/drivers/media/platform/verisilicon/rockchip_vpu981_hw_av1_dec.c
@@ -7,622 +7,35 @@
 
 #include <media/v4l2-mem2mem.h>
 #include "hantro.h"
+#include "hantro_av1.h"
 #include "hantro_v4l2.h"
 #include "rockchip_vpu981_regs.h"
 
 #define AV1_DEC_MODE		17
-#define GM_GLOBAL_MODELS_PER_FRAME	7
-#define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
-#define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
-#define AV1_MAX_TILES		128
-#define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
-#define AV1DEC_MAX_PIC_BUFFERS	24
-#define AV1_REF_SCALE_SHIFT	14
-#define AV1_INVALID_IDX		-1
-#define MAX_FRAME_DISTANCE	31
-#define AV1_PRIMARY_REF_NONE	7
-#define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
-/*
- * These 3 values aren't defined enum v4l2_av1_segment_feature because
- * they are not part of the specification
- */
-#define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
-#define V4L2_AV1_SEG_LVL_ALT_LF_U	3
-#define V4L2_AV1_SEG_LVL_ALT_LF_V	4
-
-#define SUPERRES_SCALE_BITS 3
-#define SCALE_NUMERATOR 8
-#define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
-
-#define RS_SUBPEL_BITS 6
-#define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
-#define RS_SCALE_SUBPEL_BITS 14
-#define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
-#define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
-#define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
-
-#define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
-
-#define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
-#define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
-#define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
-#define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
-#define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
-#define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
-#define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
-
-#define DIV_LUT_PREC_BITS 14
-#define DIV_LUT_BITS 8
-#define DIV_LUT_NUM BIT(DIV_LUT_BITS)
-#define WARP_PARAM_REDUCE_BITS 6
-#define WARPEDMODEL_PREC_BITS 16
-
-#define AV1_DIV_ROUND_UP_POW2(value, n)			\
-({							\
-	typeof(n) _n  = n;				\
-	typeof(value) _value = value;			\
-	(_value + (BIT(_n) >> 1)) >> _n;		\
-})
-
-#define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
-({									\
-	typeof(n) _n_  = n;						\
-	typeof(value) _value_ = value;					\
-	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
-		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
-})
-
-enum rockchip_av1_tx_mode {
-	ROCKCHIP_AV1_TX_MODE_ONLY_4X4	= 0,
-	ROCKCHIP_AV1_TX_MODE_8X8	= 1,
-	ROCKCHIP_AV1_TX_MODE_16x16	= 2,
-	ROCKCHIP_AV1_TX_MODE_32x32	= 3,
-	ROCKCHIP_AV1_TX_MODE_SELECT	= 4,
-};
-
-struct rockchip_av1_film_grain {
-	u8 scaling_lut_y[256];
-	u8 scaling_lut_cb[256];
-	u8 scaling_lut_cr[256];
-	s16 cropped_luma_grain_block[4096];
-	s16 cropped_chroma_grain_block[1024 * 2];
-};
-
-static const short div_lut[DIV_LUT_NUM + 1] = {
-	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
-	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
-	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
-	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
-	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
-	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
-	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
-	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
-	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
-	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
-	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
-	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
-	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
-	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
-	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
-	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
-	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
-	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
-	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
-	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
-	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
-	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
-	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
-	8240,  8224,  8208,  8192,
-};
-
-static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	u64 timestamp;
-	int i, idx = frame->ref_frame_idx[ref];
-
-	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
-		return AV1_INVALID_IDX;
-
-	timestamp = frame->reference_frame_ts[idx];
-	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
-		if (!av1_dec->frame_refs[i].used)
-			continue;
-		if (av1_dec->frame_refs[i].timestamp == timestamp)
-			return i;
-	}
-
-	return AV1_INVALID_IDX;
-}
-
-static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
-
-	if (idx != AV1_INVALID_IDX)
-		return av1_dec->frame_refs[idx].order_hint;
-
-	return 0;
-}
-
-static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
-					     u64 timestamp)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	int i;
-
-	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
-		int j;
-
-		if (av1_dec->frame_refs[i].used)
-			continue;
-
-		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
-		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
-		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
-		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
-		av1_dec->frame_refs[i].timestamp = timestamp;
-		av1_dec->frame_refs[i].frame_type = frame->frame_type;
-		av1_dec->frame_refs[i].order_hint = frame->order_hint;
-		av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
-
-		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
-			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
-		av1_dec->frame_refs[i].used = true;
-		av1_dec->current_frame_index = i;
-
-		return i;
-	}
-
-	return AV1_INVALID_IDX;
-}
-
-static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-
-	if (idx >= 0)
-		av1_dec->frame_refs[idx].used = false;
-}
-
-static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-
-	int ref, idx;
-
-	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
-		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
-		bool used = false;
-
-		if (!av1_dec->frame_refs[idx].used)
-			continue;
-
-		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
-			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
-				used = true;
-		}
-
-		if (!used)
-			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
-	}
-}
-
-static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
-{
-	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
-}
-
-static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
-{
-	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
-
-	return ALIGN((cr_offset * 3) / 2, 64);
-}
-
-static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-
-	if (av1_dec->db_data_col.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
-				  av1_dec->db_data_col.cpu,
-				  av1_dec->db_data_col.dma);
-	av1_dec->db_data_col.cpu = NULL;
-
-	if (av1_dec->db_ctrl_col.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
-				  av1_dec->db_ctrl_col.cpu,
-				  av1_dec->db_ctrl_col.dma);
-	av1_dec->db_ctrl_col.cpu = NULL;
-
-	if (av1_dec->cdef_col.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
-				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
-	av1_dec->cdef_col.cpu = NULL;
-
-	if (av1_dec->sr_col.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
-				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
-	av1_dec->sr_col.cpu = NULL;
-
-	if (av1_dec->lr_col.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
-				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
-	av1_dec->lr_col.cpu = NULL;
-}
-
-static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
-	unsigned int num_tile_cols = tile_info->tile_cols;
-	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
-	unsigned int height_in_sb = height / 64;
-	unsigned int stripe_num = ((height + 8) + 63) / 64;
-	size_t size;
-
-	if (av1_dec->db_data_col.size >=
-	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
-		return 0;
-
-	rockchip_vpu981_av1_dec_tiles_free(ctx);
-
-	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
-	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
-						      &av1_dec->db_data_col.dma,
-						      GFP_KERNEL);
-	if (!av1_dec->db_data_col.cpu)
-		goto buffer_allocation_error;
-	av1_dec->db_data_col.size = size;
-
-	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
-	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
-						      &av1_dec->db_ctrl_col.dma,
-						      GFP_KERNEL);
-	if (!av1_dec->db_ctrl_col.cpu)
-		goto buffer_allocation_error;
-	av1_dec->db_ctrl_col.size = size;
-
-	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
-	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
-						   &av1_dec->cdef_col.dma,
-						   GFP_KERNEL);
-	if (!av1_dec->cdef_col.cpu)
-		goto buffer_allocation_error;
-	av1_dec->cdef_col.size = size;
-
-	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
-	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
-						 &av1_dec->sr_col.dma,
-						 GFP_KERNEL);
-	if (!av1_dec->sr_col.cpu)
-		goto buffer_allocation_error;
-	av1_dec->sr_col.size = size;
-
-	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
-	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
-						 &av1_dec->lr_col.dma,
-						 GFP_KERNEL);
-	if (!av1_dec->lr_col.cpu)
-		goto buffer_allocation_error;
-	av1_dec->lr_col.size = size;
-
-	av1_dec->num_tile_cols_allocated = num_tile_cols;
-	return 0;
-
-buffer_allocation_error:
-	rockchip_vpu981_av1_dec_tiles_free(ctx);
-	return -ENOMEM;
-}
-
-void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-
-	if (av1_dec->global_model.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
-				  av1_dec->global_model.cpu,
-				  av1_dec->global_model.dma);
-	av1_dec->global_model.cpu = NULL;
-
-	if (av1_dec->tile_info.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
-				  av1_dec->tile_info.cpu,
-				  av1_dec->tile_info.dma);
-	av1_dec->tile_info.cpu = NULL;
-
-	if (av1_dec->film_grain.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
-				  av1_dec->film_grain.cpu,
-				  av1_dec->film_grain.dma);
-	av1_dec->film_grain.cpu = NULL;
-
-	if (av1_dec->prob_tbl.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
-				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
-	av1_dec->prob_tbl.cpu = NULL;
-
-	if (av1_dec->prob_tbl_out.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
-				  av1_dec->prob_tbl_out.cpu,
-				  av1_dec->prob_tbl_out.dma);
-	av1_dec->prob_tbl_out.cpu = NULL;
-
-	if (av1_dec->tile_buf.cpu)
-		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
-				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
-	av1_dec->tile_buf.cpu = NULL;
-
-	rockchip_vpu981_av1_dec_tiles_free(ctx);
-}
-
-int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-
-	memset(av1_dec, 0, sizeof(*av1_dec));
-
-	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
-						       &av1_dec->global_model.dma,
-						       GFP_KERNEL);
-	if (!av1_dec->global_model.cpu)
-		return -ENOMEM;
-	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
-
-	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_TILE_INFO_SIZE,
-						    &av1_dec->tile_info.dma,
-						    GFP_KERNEL);
-	if (!av1_dec->tile_info.cpu)
-		return -ENOMEM;
-	av1_dec->tile_info.size = AV1_TILE_INFO_SIZE;
-
-	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
-						     ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
-						     &av1_dec->film_grain.dma,
-						     GFP_KERNEL);
-	if (!av1_dec->film_grain.cpu)
-		return -ENOMEM;
-	av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
-
-	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
-						   ALIGN(sizeof(struct av1cdfs), 2048),
-						   &av1_dec->prob_tbl.dma,
-						   GFP_KERNEL);
-	if (!av1_dec->prob_tbl.cpu)
-		return -ENOMEM;
-	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
-
-	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
-						       ALIGN(sizeof(struct av1cdfs), 2048),
-						       &av1_dec->prob_tbl_out.dma,
-						       GFP_KERNEL);
-	if (!av1_dec->prob_tbl_out.cpu)
-		return -ENOMEM;
-	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
-	av1_dec->cdfs = &av1_dec->default_cdfs;
-	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
-
-	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
-
-	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
-						   AV1_TILE_SIZE,
-						   &av1_dec->tile_buf.dma,
-						   GFP_KERNEL);
-	if (!av1_dec->tile_buf.cpu)
-		return -ENOMEM;
-	av1_dec->tile_buf.size = AV1_TILE_SIZE;
-
-	return 0;
-}
-
-static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-
-	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
-	if (WARN_ON(!ctrls->sequence))
-		return -EINVAL;
-
-	ctrls->tile_group_entry =
-	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
-	if (WARN_ON(!ctrls->tile_group_entry))
-		return -EINVAL;
-
-	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
-	if (WARN_ON(!ctrls->frame))
-		return -EINVAL;
-
-	ctrls->film_grain =
-	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
-
-	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
-}
-
-static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
-{
-	if (n == 0)
-		return 0;
-	return 31 ^ __builtin_clz(n);
-}
-
-static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
-{
-	int f;
-	u64 e;
-
-	*shift = rockchip_vpu981_av1_dec_get_msb(d);
-	/* e is obtained from D after resetting the most significant 1 bit. */
-	e = d - ((u32)1 << *shift);
-	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
-	if (*shift > DIV_LUT_BITS)
-		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
-	else
-		f = e << (DIV_LUT_BITS - *shift);
-	if (f > DIV_LUT_NUM)
-		return -1;
-	*shift += DIV_LUT_PREC_BITS;
-	/* Use f as lookup into the precomputed table of multipliers */
-	return div_lut[f];
-}
-
-static void
-rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
-					 s64 *beta, s64 *gamma, s64 *delta)
-{
-	const int *mat = params;
-	short shift;
-	short y;
-	long long gv, dv;
-
-	if (mat[2] <= 0)
-		return;
-
-	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
-	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
-
-	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
-
-	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
-
-	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
-
-	dv = ((long long)mat[3] * mat[4]) * y;
-	*delta = clamp_val(mat[5] -
-		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
-		S16_MIN, S16_MAX);
-
-	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
-		 * (1 << WARP_PARAM_REDUCE_BITS);
-	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
-		* (1 << WARP_PARAM_REDUCE_BITS);
-	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
-		 * (1 << WARP_PARAM_REDUCE_BITS);
-	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
-		* (1 << WARP_PARAM_REDUCE_BITS);
-}
 
 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
-	u8 *dst = av1_dec->global_model.cpu;
 	struct hantro_dev *vpu = ctx->dev;
-	int ref_frame, i;
-
-	memset(dst, 0, GLOBAL_MODEL_SIZE);
-	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
-		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
-
-		for (i = 0; i < 6; ++i) {
-			if (i == 2)
-				*(s32 *)dst =
-					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
-			else if (i == 3)
-				*(s32 *)dst =
-					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
-			else
-				*(s32 *)dst =
-					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
-			dst += 4;
-		}
-
-		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
-			rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
-								 &alpha, &beta, &gamma, &delta);
-
-		*(s16 *)dst = alpha;
-		dst += 2;
-		*(s16 *)dst = beta;
-		dst += 2;
-		*(s16 *)dst = gamma;
-		dst += 2;
-		*(s16 *)dst = delta;
-		dst += 2;
-	}
 
+	hantro_av1_set_global_model(ctx);
 	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
 }
 
-static int rockchip_vpu981_av1_tile_log2(int target)
-{
-	int k;
-
-	/*
-	 * returns the smallest value for k such that 1 << k is greater
-	 * than or equal to target
-	 */
-	for (k = 0; (1 << k) < target; k++);
-
-	return k;
-}
-
 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
-	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
-	    ctrls->tile_group_entry;
 	int context_update_y =
 	    tile_info->context_update_tile_id / tile_info->tile_cols;
 	int context_update_x =
 	    tile_info->context_update_tile_id % tile_info->tile_cols;
 	int context_update_tile_id =
 	    context_update_x * tile_info->tile_rows + context_update_y;
-	u8 *dst = av1_dec->tile_info.cpu;
 	struct hantro_dev *vpu = ctx->dev;
-	int tile0, tile1;
-
-	memset(dst, 0, av1_dec->tile_info.size);
-
-	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
-		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
-			int tile_id = tile1 * tile_info->tile_cols + tile0;
-			u32 start, end;
-			u32 y0 =
-			    tile_info->height_in_sbs_minus_1[tile1] + 1;
-			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
-
-			/* tile size in SB units (width,height) */
-			*dst++ = x0;
-			*dst++ = 0;
-			*dst++ = 0;
-			*dst++ = 0;
-			*dst++ = y0;
-			*dst++ = 0;
-			*dst++ = 0;
-			*dst++ = 0;
-
-			/* tile start position */
-			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
-			*dst++ = start & 255;
-			*dst++ = (start >> 8) & 255;
-			*dst++ = (start >> 16) & 255;
-			*dst++ = (start >> 24) & 255;
-
-			/* number of bytes in tile data */
-			end = start + group_entry[tile_id].tile_size;
-			*dst++ = end & 255;
-			*dst++ = (end >> 8) & 255;
-			*dst++ = (end >> 16) & 255;
-			*dst++ = (end >> 24) & 255;
-		}
-	}
+
+	hantro_av1_set_tile_info(ctx);
 
 	hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
 	hantro_reg_write(vpu, &av1_tile_enable,
@@ -631,8 +44,8 @@ static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
 	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
 	hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
 	hantro_reg_write(vpu, &av1_tile_transpose, 1);
-	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
-	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
+	if (hantro_av1_tile_log2(tile_info->tile_cols) ||
+	    hantro_av1_tile_log2(tile_info->tile_rows))
 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
 	else
 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
@@ -640,50 +53,6 @@ static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
 	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
 }
 
-static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
-					    int a, int b)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	int bits = ctrls->sequence->order_hint_bits - 1;
-	int diff, m;
-
-	if (!ctrls->sequence->order_hint_bits)
-		return 0;
-
-	diff = a - b;
-	m = 1 << bits;
-	diff = (diff & (m - 1)) - (diff & m);
-
-	return diff;
-}
-
-static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
-	int i;
-
-	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
-		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
-			av1_dec->ref_frame_sign_bias[i] = 0;
-
-		return;
-	}
-	// Identify the nearest forward and backward references.
-	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
-		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
-			int rel_off =
-			    rockchip_vpu981_av1_dec_get_dist(ctx,
-							     rockchip_vpu981_get_order_hint(ctx, i),
-							     frame->order_hint);
-			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
-		}
-	}
-}
-
 static bool
 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
 				int width, int height)
@@ -806,12 +175,12 @@ static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
 
 	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
 	    frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
-		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
+		int idx = hantro_av1_get_frame_index(ctx, frame->primary_ref_frame);
 
 		if (idx >= 0) {
 			dma_addr_t luma_addr, mv_addr = 0;
 			struct hantro_decoded_buffer *seg;
-			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+			size_t mv_offset = hantro_av1_chroma_size(ctx);
 
 			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
 			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
@@ -1041,35 +410,6 @@ static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
 			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
 }
 
-static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
-	const struct v4l2_av1_quantization *quantization = &frame->quantization;
-	int i;
-
-	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
-		int qindex = quantization->base_q_idx;
-
-		if (segmentation->feature_enabled[i] &
-		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
-			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
-		}
-		qindex = clamp(qindex, 0, 255);
-
-		if (qindex ||
-		    quantization->delta_q_y_dc ||
-		    quantization->delta_q_u_dc ||
-		    quantization->delta_q_u_ac ||
-		    quantization->delta_q_v_dc ||
-		    quantization->delta_q_v_ac)
-			return false;
-	}
-	return true;
-}
-
 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
@@ -1089,7 +429,7 @@ static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
 	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
 
 	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
-	    !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
+	    !hantro_av1_is_lossless(ctx) &&
 	    !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
 		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
 				 loop_filter->ref_deltas[0]);
@@ -1128,112 +468,23 @@ static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
 	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
 }
 
-static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
-{
-	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	bool frame_is_intra = IS_INTRA(frame->frame_type);
-	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
-	int i;
-
-	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
-		return;
-
-	for (i = 0; i < NUM_REF_FRAMES; i++) {
-		if (frame->refresh_frame_flags & BIT(i)) {
-			struct mvcdfs stored_mv_cdf;
-
-			rockchip_av1_get_cdfs(ctx, i);
-			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
-			*av1_dec->cdfs = *out_cdfs;
-			if (frame_is_intra) {
-				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
-				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
-			}
-			rockchip_av1_store_cdfs(ctx,
-						frame->refresh_frame_flags);
-			break;
-		}
-	}
-}
-
-void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
-{
-	rockchip_vpu981_av1_dec_update_prob(ctx);
-}
-
 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
-	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
-	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
-	const struct v4l2_av1_quantization *quantization = &frame->quantization;
 	struct hantro_dev *vpu = ctx->dev;
-	bool error_resilient_mode =
-	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
-	bool frame_is_intra = IS_INTRA(frame->frame_type);
-
-	if (error_resilient_mode || frame_is_intra ||
-	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
-		av1_dec->cdfs = &av1_dec->default_cdfs;
-		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
-		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
-						 av1_dec->cdfs);
-	} else {
-		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
-	}
-	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
-
-	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
-
-	if (frame_is_intra) {
-		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
-		/* Overwrite MV context area with intrabc MV context */
-		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
-		       sizeof(struct mvcdfs));
-	}
 
+	hantro_av1_set_prob(ctx);
 	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
 	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
 }
 
-static void
-rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
-					      u8 num_points, u8 *scaling_lut)
-{
-	int i, point;
-
-	if (num_points == 0) {
-		memset(scaling_lut, 0, 256);
-		return;
-	}
-
-	for (point = 0; point < num_points - 1; point++) {
-		int x;
-		s32 delta_y = scaling[point + 1] - scaling[point];
-		s32 delta_x = values[point + 1] - values[point];
-		s64 delta =
-		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
-					 delta_x) : 0;
-
-		for (x = 0; x < delta_x; x++) {
-			scaling_lut[values[point] + x] =
-			    scaling[point] +
-			    (s32)((x * delta + 32768) >> 16);
-		}
-	}
-
-	for (i = values[num_points - 1]; i < 256; i++)
-		scaling_lut[i] = scaling[num_points - 1];
-}
 
 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
 {
 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
 	const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
-	struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
+	struct hantro_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
 	struct hantro_dev *vpu = ctx->dev;
 	bool scaling_from_luma =
 		!!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
@@ -1316,10 +567,10 @@ static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
 	hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
 	hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
 
-	rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
-						      film_grain->point_y_scaling,
-						      film_grain->num_y_points,
-						      fgmem->scaling_lut_y);
+	hantro_av1_init_scaling_function(film_grain->point_y_value,
+					 film_grain->point_y_scaling,
+					 film_grain->num_y_points,
+					 fgmem->scaling_lut_y);
 
 	if (film_grain->flags &
 	    V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
@@ -1328,10 +579,10 @@ static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
 		memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
 		       sizeof(*fgmem->scaling_lut_y) * 256);
 	} else {
-		rockchip_vpu981_av1_dec_init_scaling_function
+		hantro_av1_init_scaling_function
 		    (film_grain->point_cb_value, film_grain->point_cb_scaling,
 		     film_grain->num_cb_points, fgmem->scaling_lut_cb);
-		rockchip_vpu981_av1_dec_init_scaling_function
+		hantro_av1_init_scaling_function
 		    (film_grain->point_cr_value, film_grain->point_cr_scaling,
 		     film_grain->num_cr_points, fgmem->scaling_lut_cr);
 	}
@@ -1351,21 +602,21 @@ static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
 	grain_min = 0 - grain_center;
 	grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
 
-	rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
-					       film_grain->num_y_points, grain_scale_shift,
-					       ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
-					       grain_min, grain_max, film_grain->grain_seed);
-
-	rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
-						 cr_grain_block, bitdepth,
-						 film_grain->num_y_points,
-						 film_grain->num_cb_points,
-						 film_grain->num_cr_points,
-						 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
-						 ar_coeffs_cr, ar_coeff_shift, grain_min,
-						 grain_max,
-						 scaling_from_luma,
-						 film_grain->grain_seed);
+	hantro_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
+					     film_grain->num_y_points, grain_scale_shift,
+					     ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
+					     grain_min, grain_max, film_grain->grain_seed);
+
+	hantro_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
+					       cr_grain_block, bitdepth,
+					       film_grain->num_y_points,
+					       film_grain->num_cb_points,
+					       film_grain->num_cr_points,
+					       grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
+					       ar_coeffs_cr, ar_coeff_shift, grain_min,
+					       grain_max,
+					       scaling_from_luma,
+					       film_grain->grain_seed);
 
 	for (i = 0; i < 64; i++) {
 		for (j = 0; j < 64; j++)
@@ -1617,12 +868,12 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 	int ref_ind = 0;
 	int rf, idx;
 
-	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
-	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
-	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
-	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
+	alt_frame_offset = hantro_av1_get_order_hint(ctx, ALT_BUF_IDX);
+	gld_frame_offset = hantro_av1_get_order_hint(ctx, GLD_BUF_IDX);
+	bwd_frame_offset = hantro_av1_get_order_hint(ctx, BWD_BUF_IDX);
+	alt2_frame_offset = hantro_av1_get_order_hint(ctx, ALT2_BUF_IDX);
 
-	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
+	idx = hantro_av1_get_frame_index(ctx, LST_BUF_IDX);
 	if (idx >= 0) {
 		int alt_frame_offset_in_lst =
 			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
@@ -1644,8 +895,8 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 		ref_stamp--;
 	}
 
-	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
-	if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
+	idx = hantro_av1_get_frame_index(ctx, BWD_BUF_IDX);
+	if (hantro_av1_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
 		int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
 		int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
 		bool bwd_intra_only =
@@ -1659,8 +910,8 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 		}
 	}
 
-	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
-	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
+	idx = hantro_av1_get_frame_index(ctx, ALT2_BUF_IDX);
+	if (hantro_av1_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
 		int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
 		int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
 		bool alt2_intra_only =
@@ -1674,8 +925,8 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 		}
 	}
 
-	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
-	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
+	idx = hantro_av1_get_frame_index(ctx, ALT_BUF_IDX);
+	if (hantro_av1_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
 	    ref_stamp >= 0) {
 		int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
 		int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
@@ -1690,7 +941,7 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 		}
 	}
 
-	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
+	idx = hantro_av1_get_frame_index(ctx, LST2_BUF_IDX);
 	if (idx >= 0 && ref_stamp >= 0) {
 		int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
 		int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
@@ -1706,14 +957,14 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 	}
 
 	for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
-		idx = rockchip_vpu981_get_frame_index(ctx, rf);
+		idx = hantro_av1_get_frame_index(ctx, rf);
 		if (idx >= 0) {
-			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
+			int rf_order_hint = hantro_av1_get_order_hint(ctx, rf);
 
 			cur_offset[rf] =
-			    rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
+			    hantro_av1_get_dist(ctx, cur_frame_offset, rf_order_hint);
 			cur_roffset[rf] =
-			    rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
+			    hantro_av1_get_dist(ctx, rf_order_hint, cur_frame_offset);
 		} else {
 			cur_offset[rf] = 0;
 			cur_roffset[rf] = 0;
@@ -1736,32 +987,32 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 	if (use_ref_frame_mvs && ref_ind > 0 &&
 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
-		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
-		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
+		int rf = hantro_av1_get_order_hint(ctx, refs_selected[0]);
+		int idx = hantro_av1_get_frame_index(ctx, refs_selected[0]);
 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
 		int val;
 
 		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
 		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
 	}
 
@@ -1776,32 +1027,32 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 	if (use_ref_frame_mvs && ref_ind > 1 &&
 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
-		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
-		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
+		int rf = hantro_av1_get_order_hint(ctx, refs_selected[1]);
+		int idx = hantro_av1_get_frame_index(ctx, refs_selected[1]);
 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
 		int val;
 
 		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
 		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
 	}
 
@@ -1816,32 +1067,32 @@ static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
 	if (use_ref_frame_mvs && ref_ind > 2 &&
 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
-		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
-		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
+		int rf = hantro_av1_get_order_hint(ctx, refs_selected[2]);
+		int idx = hantro_av1_get_frame_index(ctx, refs_selected[2]);
 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
 		int val;
 
 		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
 
-		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
+		val = hantro_av1_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
 		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
 	}
 
@@ -1883,7 +1134,7 @@ static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
 
 	if (!allow_intrabc) {
 		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
-			int idx = rockchip_vpu981_get_frame_index(ctx, i);
+			int idx = hantro_av1_get_frame_index(ctx, i);
 
 			if (idx >= 0)
 				ref_count[idx]++;
@@ -1898,7 +1149,7 @@ static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
 	}
 	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
 
-	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
+	hantro_av1_set_frame_sign_bias(ctx);
 
 	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
 		u32 ref = i - 1;
@@ -1910,8 +1161,8 @@ static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
 			width = frame->frame_width_minus_1 + 1;
 			height = frame->frame_height_minus_1 + 1;
 		} else {
-			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
-				idx = rockchip_vpu981_get_frame_index(ctx, ref);
+			if (hantro_av1_get_frame_index(ctx, ref) > 0)
+				idx = hantro_av1_get_frame_index(ctx, ref);
 			width = av1_dec->frame_refs[idx].width;
 			height = av1_dec->frame_refs[idx].height;
 		}
@@ -1943,20 +1194,6 @@ static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
 	rockchip_vpu981_av1_dec_set_other_frames(ctx);
 }
 
-static int rockchip_vpu981_av1_get_hardware_tx_mode(enum v4l2_av1_tx_mode tx_mode)
-{
-	switch (tx_mode) {
-	case V4L2_AV1_TX_MODE_ONLY_4X4:
-		return ROCKCHIP_AV1_TX_MODE_ONLY_4X4;
-	case V4L2_AV1_TX_MODE_LARGEST:
-		return ROCKCHIP_AV1_TX_MODE_32x32;
-	case V4L2_AV1_TX_MODE_SELECT:
-		return ROCKCHIP_AV1_TX_MODE_SELECT;
-	}
-
-	return ROCKCHIP_AV1_TX_MODE_32x32;
-}
-
 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
@@ -2029,7 +1266,7 @@ static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
 	hantro_reg_write(vpu, &av1_comp_pred_mode,
 			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
 
-	tx_mode = rockchip_vpu981_av1_get_hardware_tx_mode(ctrls->frame->tx_mode);
+	tx_mode = hantro_av1_get_hardware_tx_mode(ctrls->frame->tx_mode);
 	hantro_reg_write(vpu, &av1_transform_mode, tx_mode);
 	hantro_reg_write(vpu, &av1_max_cb_size,
 			 (ctrls->sequence->flags
@@ -2061,7 +1298,7 @@ static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
 		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
 	}
 
-	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
+	hantro_reg_write(vpu, &av1_lossless_e, hantro_av1_is_lossless(ctx));
 	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
 	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
 
@@ -2109,8 +1346,8 @@ rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
 	struct hantro_decoded_buffer *dst;
 	struct vb2_v4l2_buffer *vb2_dst;
 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
-	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
-	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
+	size_t cr_offset = hantro_av1_luma_size(ctx);
+	size_t mv_offset = hantro_av1_chroma_size(ctx);
 
 	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
 	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
@@ -2134,7 +1371,7 @@ int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
 
 	hantro_start_prepare_run(ctx);
 
-	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
+	ret = hantro_av1_prepare_run(ctx);
 	if (ret)
 		goto prepare_error;
 
@@ -2144,8 +1381,8 @@ int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
 		goto prepare_error;
 	}
 
-	rockchip_vpu981_av1_dec_clean_refs(ctx);
-	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
+	hantro_av1_clean_refs(ctx);
+	hantro_av1_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
 
 	rockchip_vpu981_av1_dec_set_parameters(ctx);
 	rockchip_vpu981_av1_dec_set_global_model(ctx);
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
index 02673be9878e..f50a3e38097e 100644
--- a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
+++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
@@ -9,6 +9,7 @@
 #include <linux/clk.h>
 
 #include "hantro.h"
+#include "hantro_av1.h"
 #include "hantro_jpeg.h"
 #include "hantro_g1_regs.h"
 #include "hantro_h1_regs.h"
@@ -608,9 +609,9 @@ static const struct hantro_codec_ops rk3568_vepu_codec_ops[] = {
 static const struct hantro_codec_ops rk3588_vpu981_codec_ops[] = {
 	[HANTRO_MODE_AV1_DEC] = {
 		.run = rockchip_vpu981_av1_dec_run,
-		.init = rockchip_vpu981_av1_dec_init,
-		.exit = rockchip_vpu981_av1_dec_exit,
-		.done = rockchip_vpu981_av1_dec_done,
+		.init = hantro_av1_init,
+		.exit = hantro_av1_exit,
+		.done = hantro_av1_update_prob,
 	},
 };
 /*
-- 
2.43.0



^ permalink raw reply related

* [soc:soc/dt] BUILD SUCCESS 41d7004ab4e521ccbd98793d7da55022796c463f
From: kernel test robot @ 2026-04-15  7:31 UTC (permalink / raw)
  To: Krzysztof Kozlowski; +Cc: linux-arm-kernel, arm

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git soc/dt
branch HEAD: 41d7004ab4e521ccbd98793d7da55022796c463f  Merge tag 'ti-k3-dt-for-v7.1-part2' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/ti/linux into soc/dt

elapsed time: 728m

configs tested: 75
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha                             allnoconfig    gcc-15.2.0
alpha                            allyesconfig    gcc-15.2.0
arc                              allmodconfig    gcc-15.2.0
arc                               allnoconfig    gcc-15.2.0
arc                              allyesconfig    gcc-15.2.0
arm                               allnoconfig    clang-23
arm                              allyesconfig    gcc-15.2.0
arm64                            allmodconfig    clang-19
arm64                             allnoconfig    gcc-15.2.0
csky                             allmodconfig    gcc-15.2.0
csky                              allnoconfig    gcc-15.2.0
hexagon                          allmodconfig    clang-17
hexagon                           allnoconfig    clang-23
i386                             allmodconfig    gcc-14
i386                              allnoconfig    gcc-14
i386                             allyesconfig    gcc-14
loongarch                        allmodconfig    clang-19
loongarch                         allnoconfig    clang-23
m68k                             allmodconfig    gcc-15.2.0
m68k                              allnoconfig    gcc-15.2.0
m68k                             allyesconfig    gcc-15.2.0
m68k                                defconfig    gcc-15.2.0
microblaze                        allnoconfig    gcc-15.2.0
microblaze                       allyesconfig    gcc-15.2.0
microblaze                          defconfig    gcc-15.2.0
mips                             allmodconfig    gcc-15.2.0
mips                              allnoconfig    gcc-15.2.0
mips                             allyesconfig    gcc-15.2.0
nios2                            allmodconfig    gcc-11.5.0
nios2                             allnoconfig    gcc-11.5.0
nios2                               defconfig    gcc-11.5.0
openrisc                         allmodconfig    gcc-15.2.0
openrisc                          allnoconfig    gcc-15.2.0
parisc                           allmodconfig    gcc-15.2.0
parisc                            allnoconfig    gcc-15.2.0
parisc                           allyesconfig    gcc-15.2.0
parisc64                            defconfig    gcc-15.2.0
powerpc                          allmodconfig    gcc-15.2.0
powerpc                           allnoconfig    gcc-15.2.0
riscv                            allmodconfig    clang-23
riscv                             allnoconfig    gcc-15.2.0
riscv                            allyesconfig    clang-16
s390                             allmodconfig    clang-18
s390                              allnoconfig    clang-23
s390                             allyesconfig    gcc-15.2.0
sh                               allmodconfig    gcc-15.2.0
sh                                allnoconfig    gcc-15.2.0
sh                               allyesconfig    gcc-15.2.0
sh                                  defconfig    gcc-15.2.0
sh                     sh7710voipgw_defconfig    gcc-15.2.0
sparc                             allnoconfig    gcc-15.2.0
sparc                 randconfig-001-20260415    gcc-8.5.0
sparc                 randconfig-002-20260415    gcc-11.5.0
sparc64                          allmodconfig    clang-23
sparc64                             defconfig    clang-20
sparc64               randconfig-001-20260415    clang-23
sparc64               randconfig-002-20260415    gcc-12.5.0
um                               allmodconfig    clang-19
um                                allnoconfig    clang-23
um                               allyesconfig    gcc-14
um                                  defconfig    clang-23
um                    randconfig-001-20260415    clang-23
x86_64                           allmodconfig    clang-20
x86_64                            allnoconfig    clang-20
x86_64                           allyesconfig    clang-20
x86_64      buildonly-randconfig-001-20260415    clang-20
x86_64      buildonly-randconfig-002-20260415    gcc-13
x86_64      buildonly-randconfig-003-20260415    gcc-14
x86_64      buildonly-randconfig-004-20260415    clang-20
x86_64      buildonly-randconfig-005-20260415    clang-20
x86_64      buildonly-randconfig-006-20260415    clang-20
x86_64                              defconfig    gcc-14
x86_64                          rhel-9.4-rust    clang-20
xtensa                            allnoconfig    gcc-15.2.0
xtensa                           allyesconfig    gcc-15.2.0

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply

* Re: [PATCH v4 3/9] coresight: etm4x: fix leaked trace id
From: Leo Yan @ 2026-04-15  7:29 UTC (permalink / raw)
  To: Jie Gan
  Cc: Yeoreum Yun, coresight, linux-arm-kernel, linux-kernel,
	suzuki.poulose, mike.leach, james.clark, alexander.shishkin
In-Reply-To: <81fdef8a-a60e-4d29-948d-c4a07e23dad9@oss.qualcomm.com>

On Wed, Apr 15, 2026 at 09:21:21AM +0800, Jie Gan wrote:

[...]

> > > > @@ -918,8 +918,10 @@ static int etm4_enable_sysfs(struct coresight_device *csdev, struct coresight_pa
> > > >   	cscfg_config_sysfs_get_active_cfg(&cfg_hash, &preset);
> > > >   	if (cfg_hash) {
> > > >   		ret = cscfg_csdev_enable_active_config(csdev, cfg_hash, preset);
> > > > -		if (ret)
> > > > +		if (ret) {
> > > > +			etm4_release_trace_id(drvdata);
> > > 
> > > If so, even an ID is reserved for failures, and the ID map is big enough
> > > for each CPU, we don't need to worry memory leak or ID used out issue ?
> > 
> > However, in theory, this could lead to an ID leak,
> > so it would be better to release it in error cases.
> 
> What I am thinking is as SoCs continue to grow more complex with an
> increasing number of subsystems, trace IDs may be exhausted in the near
> future. (that's why we have dynamic trace ID allocation/release).

Thanks for the input.

I am wandering if we can use "dev->devt" as the trace ID.  A device's
major/minor number is unique in kernel and dev_t is defined as u32:

  typedef u32 __kernel_dev_t;

And we can consolidate this for both SYSFS and PERF modes.

Thanks,
Leo


^ permalink raw reply

* [PATCH v14 2/5] dt-bindings: iommu: verisilicon: Add binding for VSI IOMMU
From: Benjamin Gaignard @ 2026-04-15  7:23 UTC (permalink / raw)
  To: joro, will, robin.murphy, krzk+dt, conor+dt, heiko
  Cc: iommu, devicetree, linux-kernel, linux-arm-kernel, linux-rockchip,
	kernel, Benjamin Gaignard, Conor Dooley
In-Reply-To: <20260415072349.44237-1-benjamin.gaignard@collabora.com>

Add a device tree binding for the Verisilicon (VSI) IOMMU.
This IOMMU sits in front of hardware encoder and decoder
blocks on SoCs using Verisilicon IP, such as the Rockchip RK3588.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
---
 .../bindings/iommu/verisilicon,iommu.yaml     | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml

diff --git a/Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml b/Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml
new file mode 100644
index 000000000000..d3ce9e603b61
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/verisilicon,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Verisilicon IOMMU
+
+maintainers:
+  - Benjamin Gaignard <benjamin.gaignard@collabora.com>
+
+description: |+
+  A Versilicon iommu translates io virtual addresses to physical addresses for
+  its associated video decoder.
+
+properties:
+  compatible:
+    items:
+      - const: rockchip,rk3588-av1-iommu
+      - const: verisilicon,iommu-1.2
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Core clock
+      - description: Interface clock
+
+  clock-names:
+    items:
+      - const: core
+      - const: iface
+
+  "#iommu-cells":
+    const: 0
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    bus {
+      #address-cells = <2>;
+      #size-cells = <2>;
+
+      iommu@fdca0000 {
+        compatible = "rockchip,rk3588-av1-iommu","verisilicon,iommu-1.2";
+        reg = <0x0 0xfdca0000 0x0 0x600>;
+        interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH 0>;
+        clocks = <&cru ACLK_AV1>, <&cru PCLK_AV1>;
+        clock-names = "core", "iface";
+        #iommu-cells = <0>;
+      };
+    };
-- 
2.43.0



^ permalink raw reply related

* [PATCH v14 3/5] iommu: Add verisilicon IOMMU driver
From: Benjamin Gaignard @ 2026-04-15  7:23 UTC (permalink / raw)
  To: joro, will, robin.murphy, krzk+dt, conor+dt, heiko
  Cc: iommu, devicetree, linux-kernel, linux-arm-kernel, linux-rockchip,
	kernel, Benjamin Gaignard
In-Reply-To: <20260415072349.44237-1-benjamin.gaignard@collabora.com>

The Verisilicon IOMMU hardware block can be found in combination
with Verisilicon hardware video codecs (encoders or decoders) on
different SoCs.
Enable it will allow us to use non contiguous memory allocators
for Verisilicon video codecs.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
---
changes in version 14:
 - Flush TLB after each map/unmap operations.
 - Remove vsi_iommu_restore_ctx() and do not touch Verisilicon stateless
   video decoder.
 - Allow to build the driver as a module.

 MAINTAINERS               |   8 +
 drivers/iommu/Kconfig     |  11 +
 drivers/iommu/Makefile    |   1 +
 drivers/iommu/vsi-iommu.c | 796 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 816 insertions(+)
 create mode 100644 drivers/iommu/vsi-iommu.c

diff --git a/MAINTAINERS b/MAINTAINERS
index c3fe46d7c4bc..fa56dd0e3f25 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27667,6 +27667,14 @@ F:	drivers/media/v4l2-core/v4l2-isp.c
 F:	include/media/v4l2-isp.h
 F:	include/uapi/linux/media/v4l2-isp.h
 
+VERISILICON IOMMU DRIVER
+M:	Benjamin Gaignard <benjamin.gaignard@collabora.com>
+L:	iommu@lists.linux.dev
+S:	Maintained
+F:	Documentation/devicetree/bindings/iommu/verisilicon,iommu.yaml
+F:	drivers/iommu/vsi-iommu.c
+F:	include/linux/vsi-iommu.h
+
 VF610 NAND DRIVER
 M:	Stefan Agner <stefan@agner.ch>
 L:	linux-mtd@lists.infradead.org
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f86262b11416..18d3d68af7cd 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -384,6 +384,17 @@ config SPRD_IOMMU
 
 	  Say Y here if you want to use the multimedia devices listed above.
 
+config VSI_IOMMU
+	tristate "Verisilicon IOMMU Support"
+	depends on (ARCH_ROCKCHIP && ARM64) || COMPILE_TEST
+	select IOMMU_API
+	help
+	  Support for IOMMUs used by Verisilicon sub-systems like video
+	  decoders or encoder hardware blocks.
+
+	  Say Y here if you want to use this IOMMU in front of these
+	  hardware blocks.
+
 config IOMMU_DEBUG_PAGEALLOC
 	bool "Debug IOMMU mappings against page allocations"
 	depends on DEBUG_PAGEALLOC && IOMMU_API && PAGE_EXTENSION
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 0275821f4ef9..887af357a7c9 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -36,4 +36,5 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
 obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
 obj-$(CONFIG_APPLE_DART) += apple-dart.o
+obj-$(CONFIG_VSI_IOMMU) += vsi-iommu.o
 obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o
diff --git a/drivers/iommu/vsi-iommu.c b/drivers/iommu/vsi-iommu.c
new file mode 100644
index 000000000000..5d0721bd2c7a
--- /dev/null
+++ b/drivers/iommu/vsi-iommu.c
@@ -0,0 +1,796 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2025 Collabora Ltd.
+ *
+ * IOMMU API for Verisilicon
+ *
+ * Module Authors:	Yandong Lin <yandong.lin@rock-chips.com>
+ *			Simon Xue <xxm@rock-chips.com>
+ *			Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ *
+ * This hardware block is using a 2 pages tables allocation structure.
+ * That make very similar to Rockhip iommu hardware blocks but it has
+ * it own driver because the registers offset and configuration bits
+ * are completely different. An additional reason is that this hardware
+ * has been developed by Verisilicon to be used by their hardware video
+ * decoders and not for a general purpose like Rockchip iommus.
+ */
+
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_iommu.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "iommu-pages.h"
+
+struct vsi_iommu {
+	struct device *dev;
+	void __iomem *regs;
+	struct clk_bulk_data *clocks;
+	int num_clocks;
+	struct iommu_device iommu;
+	struct list_head node; /* entry in vsi_iommu_domain.iommus */
+	struct iommu_domain *domain; /* domain to which iommu is attached */
+	spinlock_t lock; /* lock to protect vsi_iommu fields */
+	int irq;
+	bool enable;
+};
+
+struct vsi_iommu_domain {
+	struct list_head iommus;
+	struct device *dev;
+	u32 *dt;
+	dma_addr_t dt_dma;
+	struct iommu_domain domain;
+	u64 *pta;
+	dma_addr_t pta_dma;
+	spinlock_t lock; /* lock to protect vsi_iommu_domain fields */
+};
+
+static struct iommu_domain vsi_identity_domain;
+
+#define NUM_DT_ENTRIES	1024
+#define NUM_PT_ENTRIES	1024
+
+#define SPAGE_SIZE	BIT(12)
+
+/* vsi iommu regs address */
+#define VSI_MMU_CONFIG1_BASE			0x1ac
+#define VSI_MMU_AHB_EXCEPTION_BASE		0x380
+#define VSI_MMU_AHB_CONTROL_BASE		0x388
+#define VSI_MMU_AHB_TLB_ARRAY_BASE_L_BASE	0x38C
+
+/* MMU register offsets */
+#define VSI_MMU_FLUSH_BASE		0x184
+#define VSI_MMU_BIT_FLUSH		BIT(4)
+
+#define VSI_MMU_PAGE_FAULT_ADDR		0x380
+#define VSI_MMU_STATUS_BASE		0x384	/* IRQ status */
+
+#define VSI_MMU_BIT_ENABLE		BIT(0)
+
+#define VSI_MMU_OUT_OF_BOUND		BIT(28)
+/* Irq mask */
+#define VSI_MMU_IRQ_MASK		0x7
+
+#define VSI_DTE_PT_ADDRESS_MASK		0xffffffc0
+#define VSI_DTE_PT_VALID		BIT(0)
+
+#define VSI_PAGE_DESC_LO_MASK		0xfffff000
+#define VSI_PAGE_DESC_HI_MASK		GENMASK_ULL(39, 32)
+#define VSI_PAGE_DESC_HI_SHIFT		(32 - 4)
+
+static inline phys_addr_t vsi_dte_pt_address(u32 dte)
+{
+	return (phys_addr_t)dte & VSI_DTE_PT_ADDRESS_MASK;
+}
+
+static inline u32 vsi_mk_dte(u32 dte)
+{
+	return (phys_addr_t)dte | VSI_DTE_PT_VALID;
+}
+
+#define VSI_PTE_PAGE_WRITABLE		BIT(2)
+#define VSI_PTE_PAGE_VALID		BIT(0)
+
+static inline phys_addr_t vsi_pte_page_address(u64 pte)
+{
+	return ((pte << VSI_PAGE_DESC_HI_SHIFT) & VSI_PAGE_DESC_HI_MASK) |
+	       (pte & VSI_PAGE_DESC_LO_MASK);
+}
+
+static u32 vsi_mk_pte(phys_addr_t page, int prot)
+{
+	u32 flags = 0;
+
+	flags |= (prot & IOMMU_WRITE) ? VSI_PTE_PAGE_WRITABLE : 0;
+
+	page = (page & VSI_PAGE_DESC_LO_MASK) |
+	       ((page & VSI_PAGE_DESC_HI_MASK) >> VSI_PAGE_DESC_HI_SHIFT);
+
+	return page | flags | VSI_PTE_PAGE_VALID;
+}
+
+#define VSI_DTE_PT_VALID	BIT(0)
+
+static inline bool vsi_dte_is_pt_valid(u32 dte)
+{
+	return dte & VSI_DTE_PT_VALID;
+}
+
+static inline bool vsi_pte_is_page_valid(u32 pte)
+{
+	return pte & VSI_PTE_PAGE_VALID;
+}
+
+static u32 vsi_mk_pte_invalid(u32 pte)
+{
+	return pte & ~VSI_PTE_PAGE_VALID;
+}
+
+#define VSI_MASTER_TLB_MASK	GENMASK_ULL(31, 10)
+/* mode 0 : 4k */
+#define VSI_PTA_4K_MODE	0
+
+static u64 vsi_mk_pta(dma_addr_t dt_dma)
+{
+	u64 val = (dt_dma & VSI_MASTER_TLB_MASK) | VSI_PTA_4K_MODE;
+
+	return val;
+}
+
+static struct vsi_iommu_domain *to_vsi_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct vsi_iommu_domain, domain);
+}
+
+static inline void vsi_table_flush(struct vsi_iommu_domain *vsi_domain, dma_addr_t dma,
+				   unsigned int count)
+{
+	size_t size = count * sizeof(u32); /* count of u32 entry */
+
+	dma_sync_single_for_device(vsi_domain->dev, dma, size, DMA_TO_DEVICE);
+}
+
+#define VSI_IOVA_DTE_MASK	0xffc00000
+#define VSI_IOVA_DTE_SHIFT	22
+#define VSI_IOVA_PTE_MASK	0x003ff000
+#define VSI_IOVA_PTE_SHIFT	12
+#define VSI_IOVA_PAGE_MASK	0x00000fff
+#define VSI_IOVA_PAGE_SHIFT	0
+
+static u32 vsi_iova_dte_index(u32 iova)
+{
+	return (iova & VSI_IOVA_DTE_MASK) >> VSI_IOVA_DTE_SHIFT;
+}
+
+static u32 vsi_iova_pte_index(u32 iova)
+{
+	return (iova & VSI_IOVA_PTE_MASK) >> VSI_IOVA_PTE_SHIFT;
+}
+
+static u32 vsi_iova_page_offset(u32 iova)
+{
+	return (iova & VSI_IOVA_PAGE_MASK) >> VSI_IOVA_PAGE_SHIFT;
+}
+
+static irqreturn_t vsi_iommu_irq(int irq, void *dev_id)
+{
+	struct vsi_iommu *iommu = dev_id;
+	unsigned long flags;
+	dma_addr_t iova;
+	u32 status;
+
+	if (pm_runtime_resume_and_get(iommu->dev) < 0)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	status = readl(iommu->regs + VSI_MMU_STATUS_BASE);
+	if (status & VSI_MMU_IRQ_MASK) {
+		dev_err(iommu->dev, "unexpected int_status=%08x\n", status);
+		iova = readl(iommu->regs + VSI_MMU_PAGE_FAULT_ADDR);
+		report_iommu_fault(iommu->domain, iommu->dev, iova, status);
+	}
+	writel(0, iommu->regs + VSI_MMU_STATUS_BASE);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+	pm_runtime_put_autosuspend(iommu->dev);
+
+	return IRQ_HANDLED;
+}
+
+static struct vsi_iommu *vsi_iommu_get_from_dev(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+	struct device *iommu_dev = bus_find_device_by_fwnode(&platform_bus_type,
+							     fwspec->iommu_fwnode);
+
+	put_device(iommu_dev);
+
+	return iommu_dev ? dev_get_drvdata(iommu_dev) : NULL;
+}
+
+static struct iommu_domain *vsi_iommu_domain_alloc_paging(struct device *dev)
+{
+	struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+	struct vsi_iommu_domain *vsi_domain;
+
+	vsi_domain = kzalloc(sizeof(*vsi_domain), GFP_KERNEL);
+	if (!vsi_domain)
+		return NULL;
+
+	vsi_domain->dev = iommu->dev;
+	spin_lock_init(&vsi_domain->lock);
+
+	/*
+	 * iommu use a 2 level pagetable.
+	 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
+	 * Allocate one 4 KiB page for each table.
+	 */
+	vsi_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+					      SPAGE_SIZE);
+	if (!vsi_domain->dt)
+		goto err_free_domain;
+
+	vsi_domain->dt_dma = dma_map_single(vsi_domain->dev, vsi_domain->dt,
+					    SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(vsi_domain->dev, vsi_domain->dt_dma)) {
+		dev_err(dev, "DMA map error for DT\n");
+		goto err_free_dt;
+	}
+
+	vsi_domain->pta = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+					       SPAGE_SIZE);
+	if (!vsi_domain->pta)
+		goto err_unmap_dt;
+
+	vsi_domain->pta[0] = vsi_mk_pta(vsi_domain->dt_dma);
+	vsi_domain->pta_dma = dma_map_single(vsi_domain->dev, vsi_domain->pta,
+					     SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(vsi_domain->dev, vsi_domain->pta_dma)) {
+		dev_err(dev, "DMA map error for PTA\n");
+		goto err_free_pta;
+	}
+
+	INIT_LIST_HEAD(&vsi_domain->iommus);
+
+	vsi_domain->domain.geometry.aperture_start = 0;
+	vsi_domain->domain.geometry.aperture_end   = DMA_BIT_MASK(32);
+	vsi_domain->domain.geometry.force_aperture = true;
+	vsi_domain->domain.pgsize_bitmap	   = SZ_4K;
+
+	return &vsi_domain->domain;
+
+err_free_pta:
+	iommu_free_pages(vsi_domain->pta);
+err_unmap_dt:
+	dma_unmap_single(vsi_domain->dev, vsi_domain->dt_dma,
+			 SPAGE_SIZE, DMA_TO_DEVICE);
+err_free_dt:
+	iommu_free_pages(vsi_domain->dt);
+err_free_domain:
+	kfree(vsi_domain);
+
+	return NULL;
+}
+
+static phys_addr_t vsi_iommu_iova_to_phys(struct iommu_domain *domain,
+					  dma_addr_t iova)
+{
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	phys_addr_t pt_phys, phys = 0;
+	unsigned long flags;
+	u32 dte, pte;
+	u32 *page_table;
+
+	spin_lock_irqsave(&vsi_domain->lock, flags);
+	dte = vsi_domain->dt[vsi_iova_dte_index(iova)];
+	if (!vsi_dte_is_pt_valid(dte))
+		goto unlock;
+
+	pt_phys = vsi_dte_pt_address(dte);
+	page_table = (u32 *)phys_to_virt(pt_phys);
+	pte = page_table[vsi_iova_pte_index(iova)];
+	if (!vsi_pte_is_page_valid(pte))
+		goto unlock;
+
+	phys = vsi_pte_page_address(pte) + vsi_iova_page_offset(iova);
+
+unlock:
+	spin_unlock_irqrestore(&vsi_domain->lock, flags);
+	return phys;
+}
+
+static size_t vsi_iommu_unmap_iova(struct vsi_iommu_domain *vsi_domain,
+				   u32 *pte_addr, dma_addr_t pte_dma,
+				   size_t size)
+{
+	unsigned int pte_count;
+	unsigned int pte_total = size / SPAGE_SIZE;
+
+	for (pte_count = 0;
+	     pte_count < pte_total && pte_count < NUM_PT_ENTRIES; pte_count++) {
+		u32 pte = pte_addr[pte_count];
+
+		if (!vsi_pte_is_page_valid(pte))
+			break;
+
+		pte_addr[pte_count] = vsi_mk_pte_invalid(pte);
+	}
+
+	vsi_table_flush(vsi_domain, pte_dma, pte_total);
+
+	return pte_count * SPAGE_SIZE;
+}
+
+static int vsi_iommu_map_iova(struct vsi_iommu_domain *vsi_domain, u32 *pte_addr,
+			      dma_addr_t pte_dma, dma_addr_t iova,
+			      phys_addr_t paddr, size_t size, int prot)
+{
+	unsigned int pte_count;
+	unsigned int pte_total = size / SPAGE_SIZE;
+
+	for (pte_count = 0;
+	     pte_count < pte_total && pte_count < NUM_PT_ENTRIES; pte_count++) {
+		u32 pte = pte_addr[pte_count];
+
+		if (vsi_pte_is_page_valid(pte))
+			return (pte_count - 1) * SPAGE_SIZE;
+
+		pte_addr[pte_count] = vsi_mk_pte(paddr, prot);
+
+		paddr += SPAGE_SIZE;
+	}
+
+	vsi_table_flush(vsi_domain, pte_dma, pte_total);
+
+	return 0;
+}
+
+static void vsi_iommu_flush_tlb(struct iommu_domain *domain)
+{
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	struct list_head *pos;
+
+	list_for_each(pos, &vsi_domain->iommus) {
+		struct vsi_iommu *iommu;
+
+		iommu = list_entry(pos, struct vsi_iommu, node);
+		if (!iommu)
+			continue;
+
+		if (pm_runtime_get(iommu->dev) < 0)
+			continue;
+
+		spin_lock(&iommu->lock);
+
+		if (iommu->enable) {
+			writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE);
+			writel(0, iommu->regs + VSI_MMU_FLUSH_BASE);
+		}
+
+		spin_unlock(&iommu->lock);
+
+		pm_runtime_put_autosuspend(iommu->dev);
+	}
+}
+
+static size_t vsi_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
+			      size_t size, size_t count, struct iommu_iotlb_gather *gather)
+{
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
+	unsigned long flags;
+	phys_addr_t pt_phys;
+	u32 dte;
+	u32 *pte_addr;
+	size_t unmap_size = 0;
+
+	spin_lock_irqsave(&vsi_domain->lock, flags);
+
+	dte = vsi_domain->dt[vsi_iova_dte_index(iova)];
+	/* Just return 0 if iova is unmapped */
+	if (!vsi_dte_is_pt_valid(dte))
+		goto unlock;
+
+	pt_phys = vsi_dte_pt_address(dte);
+	pte_addr = (u32 *)phys_to_virt(pt_phys) + vsi_iova_pte_index(iova);
+	pte_dma = pt_phys + vsi_iova_pte_index(iova) * sizeof(u32);
+	unmap_size = vsi_iommu_unmap_iova(vsi_domain, pte_addr, pte_dma, size);
+	if (!unmap_size)
+		goto unlock;
+
+	vsi_iommu_flush_tlb(domain);
+unlock:
+	spin_unlock_irqrestore(&vsi_domain->lock, flags);
+
+	return unmap_size;
+}
+
+static u32 *vsi_dte_get_page_table(struct vsi_iommu_domain *vsi_domain,
+				   dma_addr_t iova, gfp_t gfp)
+{
+	u32 *page_table, *dte_addr;
+	u32 dte_index, dte;
+	phys_addr_t pt_phys;
+	dma_addr_t pt_dma;
+	gfp_t flags;
+
+	dte_index = vsi_iova_dte_index(iova);
+	dte_addr = &vsi_domain->dt[dte_index];
+	dte = *dte_addr;
+	if (vsi_dte_is_pt_valid(dte))
+		goto done;
+
+	/* Do not allow to sleep while allocating the buffer */
+	flags = (gfp & ~GFP_KERNEL) | GFP_ATOMIC | GFP_DMA32;
+	page_table = iommu_alloc_pages_sz(flags, PAGE_SIZE);
+	if (!page_table)
+		return ERR_PTR(-ENOMEM);
+
+	pt_dma = dma_map_single(vsi_domain->dev, page_table, PAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(vsi_domain->dev, pt_dma)) {
+		dev_err(vsi_domain->dev, "DMA mapping error while allocating page table\n");
+		iommu_free_pages(page_table);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	dte = vsi_mk_dte(pt_dma);
+	*dte_addr = dte;
+
+	vsi_table_flush(vsi_domain,
+			vsi_domain->dt_dma + dte_index * sizeof(u32), 1);
+done:
+	pt_phys = vsi_dte_pt_address(dte);
+	return (u32 *)phys_to_virt(pt_phys);
+}
+
+static int vsi_iommu_map(struct iommu_domain *domain, unsigned long _iova,
+			 phys_addr_t paddr, size_t size, size_t count,
+			 int prot, gfp_t gfp, size_t *mapped)
+{
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
+	u32 *page_table, *pte_addr;
+	u32 dte, pte_index;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vsi_domain->lock, flags);
+
+	page_table = vsi_dte_get_page_table(vsi_domain, iova, gfp);
+	if (IS_ERR(page_table)) {
+		spin_unlock_irqrestore(&vsi_domain->lock, flags);
+		return PTR_ERR(page_table);
+	}
+
+	dte = vsi_domain->dt[vsi_iova_dte_index(iova)];
+	pte_index = vsi_iova_pte_index(iova);
+	pte_addr = &page_table[pte_index];
+	pte_dma = vsi_dte_pt_address(dte) + pte_index * sizeof(u32);
+	ret = vsi_iommu_map_iova(vsi_domain, pte_addr, pte_dma, iova,
+				 paddr, size, prot);
+	if (!ret)
+		*mapped = size;
+
+	vsi_iommu_flush_tlb(domain);
+
+	spin_unlock_irqrestore(&vsi_domain->lock, flags);
+
+	return ret;
+}
+
+static void vsi_iommu_disable(struct vsi_iommu *iommu)
+{
+	writel(0, iommu->regs + VSI_MMU_AHB_CONTROL_BASE);
+	iommu->enable = false;
+}
+
+static int vsi_iommu_identity_attach(struct iommu_domain *domain,
+				     struct device *dev, struct iommu_domain *old)
+{
+	struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	unsigned long flags;
+	int ret;
+
+	ret = pm_runtime_resume_and_get(iommu->dev);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irqsave(&vsi_domain->lock, flags);
+	spin_lock(&iommu->lock);
+	if (iommu->domain == domain)
+		goto unlock;
+
+	vsi_iommu_disable(iommu);
+	list_del_init(&iommu->node);
+
+	iommu->domain = domain;
+
+unlock:
+	spin_unlock(&iommu->lock);
+	spin_unlock_irqrestore(&vsi_domain->lock, flags);
+	pm_runtime_put_autosuspend(iommu->dev);
+	return 0;
+}
+
+static const struct iommu_domain_ops vsi_identity_ops = {
+	.attach_dev = vsi_iommu_identity_attach,
+};
+
+static struct iommu_domain vsi_identity_domain = {
+	.type = IOMMU_DOMAIN_IDENTITY,
+	.ops = &vsi_identity_ops,
+};
+
+static void vsi_iommu_enable(struct vsi_iommu *iommu, struct iommu_domain *domain)
+{
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+
+	if (domain == &vsi_identity_domain)
+		return;
+
+	writel(vsi_domain->pta_dma, iommu->regs + VSI_MMU_AHB_TLB_ARRAY_BASE_L_BASE);
+	writel(VSI_MMU_OUT_OF_BOUND, iommu->regs + VSI_MMU_CONFIG1_BASE);
+	writel(VSI_MMU_BIT_ENABLE, iommu->regs + VSI_MMU_AHB_EXCEPTION_BASE);
+	writel(VSI_MMU_BIT_ENABLE, iommu->regs + VSI_MMU_AHB_CONTROL_BASE);
+	iommu->enable = true;
+}
+
+static int vsi_iommu_attach_device(struct iommu_domain *domain,
+				   struct device *dev, struct iommu_domain *old)
+{
+	struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	unsigned long flags;
+	int ret = 0;
+
+	ret = pm_runtime_resume_and_get(iommu->dev);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irqsave(&vsi_domain->lock, flags);
+	spin_lock(&iommu->lock);
+
+	vsi_iommu_enable(iommu, domain);
+	writel(VSI_MMU_BIT_FLUSH, iommu->regs + VSI_MMU_FLUSH_BASE);
+	writel(0, iommu->regs + VSI_MMU_FLUSH_BASE);
+
+	list_del_init(&iommu->node);
+	list_add_tail(&iommu->node, &vsi_domain->iommus);
+
+	iommu->domain = domain;
+
+	spin_unlock(&iommu->lock);
+	spin_unlock_irqrestore(&vsi_domain->lock, flags);
+	pm_runtime_put_autosuspend(iommu->dev);
+	return ret;
+}
+
+static void vsi_iommu_domain_free(struct iommu_domain *domain)
+{
+	struct vsi_iommu_domain *vsi_domain = to_vsi_domain(domain);
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&vsi_domain->lock, flags);
+
+	WARN_ON(!list_empty(&vsi_domain->iommus));
+
+	for (i = 0; i < NUM_DT_ENTRIES; i++) {
+		u32 dte = vsi_domain->dt[i];
+
+		if (vsi_dte_is_pt_valid(dte)) {
+			phys_addr_t pt_phys = vsi_dte_pt_address(dte);
+			u32 *page_table = phys_to_virt(pt_phys);
+
+			dma_unmap_single(vsi_domain->dev, pt_phys,
+					 SPAGE_SIZE, DMA_TO_DEVICE);
+			iommu_free_pages(page_table);
+		}
+	}
+
+	dma_unmap_single(vsi_domain->dev, vsi_domain->dt_dma,
+			 SPAGE_SIZE, DMA_TO_DEVICE);
+	iommu_free_pages(vsi_domain->dt);
+
+	dma_unmap_single(vsi_domain->dev, vsi_domain->pta_dma,
+			 SPAGE_SIZE, DMA_TO_DEVICE);
+	iommu_free_pages(vsi_domain->pta);
+
+	spin_unlock_irqrestore(&vsi_domain->lock, flags);
+
+	kfree(vsi_domain);
+}
+
+static struct iommu_device *vsi_iommu_probe_device(struct device *dev)
+{
+	struct vsi_iommu *iommu = vsi_iommu_get_from_dev(dev);
+	struct device_link *link;
+
+	link = device_link_add(dev, iommu->dev,
+			       DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
+	if (!link)
+		dev_err(dev, "Unable to link %s\n", dev_name(iommu->dev));
+
+	dev_iommu_priv_set(dev, iommu);
+	return &iommu->iommu;
+}
+
+static void vsi_iommu_release_device(struct device *dev)
+{
+	struct vsi_iommu *iommu = dev_iommu_priv_get(dev);
+
+	device_link_remove(dev, iommu->dev);
+}
+
+static int vsi_iommu_of_xlate(struct device *dev, const struct of_phandle_args *args)
+{
+	return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
+static const struct iommu_ops vsi_iommu_ops = {
+	.identity_domain = &vsi_identity_domain,
+	.release_domain = &vsi_identity_domain,
+	.domain_alloc_paging = vsi_iommu_domain_alloc_paging,
+	.of_xlate = vsi_iommu_of_xlate,
+	.probe_device = vsi_iommu_probe_device,
+	.release_device = vsi_iommu_release_device,
+	.device_group = generic_single_device_group,
+	.owner = THIS_MODULE,
+	.default_domain_ops = &(const struct iommu_domain_ops) {
+		.attach_dev		= vsi_iommu_attach_device,
+		.map_pages		= vsi_iommu_map,
+		.unmap_pages		= vsi_iommu_unmap,
+		.iova_to_phys		= vsi_iommu_iova_to_phys,
+		.free			= vsi_iommu_domain_free,
+	}
+};
+
+static const struct of_device_id vsi_iommu_dt_ids[] = {
+	{
+		.compatible = "verisilicon,iommu-1.2",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, vsi_iommu_dt_ids);
+
+static int vsi_iommu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct vsi_iommu *iommu;
+	int err;
+
+	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENOMEM;
+
+	iommu->dev = dev;
+	spin_lock_init(&iommu->lock);
+	INIT_LIST_HEAD(&iommu->node);
+
+	iommu->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(iommu->regs))
+		return -ENOMEM;
+
+	iommu->num_clocks = devm_clk_bulk_get_all(dev, &iommu->clocks);
+	if  (iommu->num_clocks < 0)
+		return iommu->num_clocks;
+
+	err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
+	if (err)
+		return err;
+
+	iommu->irq = platform_get_irq(pdev, 0);
+	if (iommu->irq < 0)
+		return iommu->irq;
+
+	err = devm_request_irq(iommu->dev, iommu->irq, vsi_iommu_irq,
+			       IRQF_SHARED, dev_name(dev), iommu);
+	if (err)
+		goto err_unprepare_clocks;
+
+	dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	platform_set_drvdata(pdev, iommu);
+
+	pm_runtime_set_autosuspend_delay(dev, 100);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_enable(dev);
+
+	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	if (err)
+		goto err_runtime_disable;
+
+	err = iommu_device_register(&iommu->iommu, &vsi_iommu_ops, dev);
+	if (err)
+		goto err_remove_sysfs;
+
+	return 0;
+
+err_remove_sysfs:
+	iommu_device_sysfs_remove(&iommu->iommu);
+err_runtime_disable:
+	pm_runtime_disable(dev);
+err_unprepare_clocks:
+	clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
+	return err;
+}
+
+static void vsi_iommu_shutdown(struct platform_device *pdev)
+{
+	struct vsi_iommu *iommu = platform_get_drvdata(pdev);
+
+	disable_irq(iommu->irq);
+	pm_runtime_force_suspend(&pdev->dev);
+}
+
+static int __maybe_unused vsi_iommu_suspend(struct device *dev)
+{
+	struct vsi_iommu *iommu = dev_get_drvdata(dev);
+
+	vsi_iommu_disable(iommu);
+
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+
+	return 0;
+}
+
+static int __maybe_unused vsi_iommu_resume(struct device *dev)
+{
+	struct vsi_iommu *iommu = dev_get_drvdata(dev);
+	unsigned long flags;
+	int ret;
+
+	ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
+	if (ret)
+		return ret;
+
+	if (iommu->domain) {
+		struct vsi_iommu_domain *vsi_domain = to_vsi_domain(iommu->domain);
+
+		spin_lock_irqsave(&vsi_domain->lock, flags);
+		spin_lock(&iommu->lock);
+		vsi_iommu_enable(iommu, iommu->domain);
+		spin_unlock(&iommu->lock);
+		spin_unlock_irqrestore(&vsi_domain->lock, flags);
+	}
+
+	return 0;
+}
+
+static DEFINE_RUNTIME_DEV_PM_OPS(vsi_iommu_pm_ops,
+				 vsi_iommu_suspend, vsi_iommu_resume,
+				 NULL);
+
+static struct platform_driver rockchip_vsi_iommu_driver = {
+	.probe = vsi_iommu_probe,
+	.shutdown = vsi_iommu_shutdown,
+	.driver = {
+		   .name = "vsi_iommu",
+		   .of_match_table = vsi_iommu_dt_ids,
+		   .pm = pm_sleep_ptr(&vsi_iommu_pm_ops),
+		   .suppress_bind_attrs = true,
+	},
+};
+module_platform_driver(rockchip_vsi_iommu_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Benjamin Gaignard <benjamin.gaignard@collabora.com>");
+MODULE_DESCRIPTION("Verisilicon IOMMU driver");
-- 
2.43.0



^ permalink raw reply related

* [PATCH v14 4/5] arm64: dts: rockchip: Add verisilicon IOMMU node on RK3588
From: Benjamin Gaignard @ 2026-04-15  7:23 UTC (permalink / raw)
  To: joro, will, robin.murphy, krzk+dt, conor+dt, heiko
  Cc: iommu, devicetree, linux-kernel, linux-arm-kernel, linux-rockchip,
	kernel, Benjamin Gaignard
In-Reply-To: <20260415072349.44237-1-benjamin.gaignard@collabora.com>

Add the device tree node for the Verisilicon IOMMU present
in the RK3588 SoC.
This IOMMU handles address translation for the VPU hardware blocks.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
index 7fe9593d8c19..7fde18feeaf8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
@@ -1428,6 +1428,17 @@ av1d: video-codec@fdc70000 {
 		clock-names = "aclk", "hclk";
 		power-domains = <&power RK3588_PD_AV1>;
 		resets = <&cru SRST_A_AV1>, <&cru SRST_P_AV1>, <&cru SRST_A_AV1_BIU>, <&cru SRST_P_AV1_BIU>;
+		iommus = <&av1d_mmu>;
+	};
+
+	av1d_mmu: iommu@fdca0000 {
+		compatible = "rockchip,rk3588-av1-iommu", "verisilicon,iommu-1.2";
+		reg = <0x0 0xfdca0000 0x0 0x600>;
+		interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH 0>;
+		clocks = <&cru ACLK_AV1>, <&cru PCLK_AV1>;
+		clock-names = "core", "iface";
+		#iommu-cells = <0>;
+		power-domains = <&power RK3588_PD_AV1>;
 	};
 
 	vop: vop@fdd90000 {
-- 
2.43.0



^ permalink raw reply related

* [PATCH v14 5/5] arm64: defconfig: enable Verisilicon IOMMU for Rockchip RK3588
From: Benjamin Gaignard @ 2026-04-15  7:23 UTC (permalink / raw)
  To: joro, will, robin.murphy, krzk+dt, conor+dt, heiko
  Cc: iommu, devicetree, linux-kernel, linux-arm-kernel, linux-rockchip,
	kernel, Benjamin Gaignard, Krzysztof Kozlowski
In-Reply-To: <20260415072349.44237-1-benjamin.gaignard@collabora.com>

Enable Verisilicon IOMMU used by Rockchip RK3588 AV1 hardware codec.
This hardware block could be found in Radxa ROCK 5B board.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index b67d5b1fc45b..b97f5008f6be 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1592,6 +1592,7 @@ CONFIG_ARM_SMMU_V3=y
 CONFIG_MTK_IOMMU=y
 CONFIG_QCOM_IOMMU=y
 CONFIG_APPLE_DART=m
+CONFIG_VSI_IOMMU=m
 CONFIG_REMOTEPROC=y
 CONFIG_IMX_REMOTEPROC=y
 CONFIG_MTK_SCP=m
-- 
2.43.0



^ permalink raw reply related

* [PATCH v14 1/5] dt-bindings: vendor-prefixes: Add Verisilicon
From: Benjamin Gaignard @ 2026-04-15  7:23 UTC (permalink / raw)
  To: joro, will, robin.murphy, krzk+dt, conor+dt, heiko
  Cc: iommu, devicetree, linux-kernel, linux-arm-kernel, linux-rockchip,
	kernel, Benjamin Gaignard, Conor Dooley
In-Reply-To: <20260415072349.44237-1-benjamin.gaignard@collabora.com>

Verisilicon Microelectronics is a company based in Shanghai, China,
developping hardware blocks for SoC.

https://verisilicon.com/

Add their name to the list of vendors.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index ee7fd3cfe203..ebd9072300a8 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1761,6 +1761,8 @@ patternProperties:
     description: Variscite Ltd.
   "^vdl,.*":
     description: Van der Laan b.v.
+  "^verisilicon,.*":
+    description: VeriSilicon Microelectronics
   "^vertexcom,.*":
     description: Vertexcom Technologies, Inc.
   "^via,.*":
-- 
2.43.0



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox