The Linux Kernel Mailing List

The Linux Kernel Mailing List
 help / color / mirror / Atom feed

* Re: [PATCH v5 13/16] media: iris: Introduce buffer size calculations for AR50LT
From: Vikash Garodia @ 2026-06-24 15:10 UTC (permalink / raw)
  To: Dmitry Baryshkov, Abhinav Kumar, Bryan O'Donoghue,
	Mauro Carvalho Chehab, Bjorn Andersson, Konrad Dybcio,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley, Vishnu Reddy
  Cc: linux-media, linux-arm-msm, linux-kernel, devicetree,
	Dikshita Agarwal
In-Reply-To: <20260616-iris-ar50lt-v5-13-583b42770b6a@oss.qualcomm.com>



On 6/16/2026 5:34 AM, Dmitry Baryshkov wrote:
> From: Dikshita Agarwal <dikshita.agarwal@oss.qualcomm.com>
> 
> Introduces AR50LT  buffer size calculation for both encoder and
> decoder. Reuse the buffer size calculation which are common, while
> adding the AR50LT specific ones separately.
> 
> Signed-off-by: Dikshita Agarwal <dikshita.agarwal@oss.qualcomm.com>
> Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@oss.qualcomm.com>
> ---
>   drivers/media/platform/qcom/iris/iris_vpu_buffer.c | 401 +++++++++++++++++++++
>   drivers/media/platform/qcom/iris/iris_vpu_buffer.h |  37 ++
>   2 files changed, 438 insertions(+)
> 
> diff --git a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
> index 4a39b8fef52b..ca03d6570513 100644
> --- a/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
> +++ b/drivers/media/platform/qcom/iris/iris_vpu_buffer.c
> @@ -50,6 +50,32 @@ static u32 hfi_buffer_bin_h264d(u32 frame_width, u32 frame_height, u32 num_vpp_p
>   	return size_h264d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes);
>   }
>   
> +static u32 size_h264d_hw_bin_buffer_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 size_yuv, size_bin_hdr, size_bin_res;
> +
> +	size_yuv = ((frame_width * frame_height * 3) >> 1);
> +	if (size_yuv <= 1920 * 1088 * 3 / 2) {
> +		size_bin_hdr = size_yuv * H264_CABAC_HDR_RATIO_SM_TOT;
> +		size_bin_res = size_yuv * H264_CABAC_RES_RATIO_SM_TOT;
> +	} else {
> +		size_bin_hdr = (size_yuv * 3) / 5;
> +		size_bin_res = (size_yuv * 3) / 2;
> +	}
> +	size_bin_hdr = ALIGN(size_bin_hdr, DMA_ALIGNMENT);
> +	size_bin_res = ALIGN(size_bin_res, DMA_ALIGNMENT);
> +
> +	return size_bin_hdr + size_bin_res;
> +}
> +
> +static u32 hfi_buffer_bin_h264d_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 n_aligned_h = ALIGN(frame_height, 16);
> +	u32 n_aligned_w = ALIGN(frame_width, 16);
> +
> +	return size_h264d_hw_bin_buffer_ar50lt(n_aligned_w, n_aligned_h, num_vpp_pipes);
> +}
> +
>   static u32 size_av1d_hw_bin_buffer(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>   {
>   	u32 size_yuv, size_bin_hdr, size_bin_res;
> @@ -103,6 +129,21 @@ static u32 hfi_buffer_bin_vp9d(u32 frame_width, u32 frame_height, u32 num_vpp_pi
>   	return _size * num_vpp_pipes;
>   }
>   
> +static u32 hfi_buffer_bin_vp9d_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 size_yuv, size;
> +
> +	size_yuv = ALIGN(frame_width, 16) * ALIGN(frame_height, 16) * 3 / 2;
> +	size_yuv = ALIGN(size_yuv, DMA_ALIGNMENT);
> +
> +	size = ALIGN(((((MAX(size_yuv, VPX_DECODER_FRAME_BIN_BUFFER_SIZE)) * 6) / 5) /

MAX() or max() ?

> +		      num_vpp_pipes), DMA_ALIGNMENT) +
> +		ALIGN((((MAX(size_yuv, VPX_DECODER_FRAME_BIN_BUFFER_SIZE)) * 4) / num_vpp_pipes),
> +		      DMA_ALIGNMENT);
> +
> +	return size * num_vpp_pipes;
> +}
> +
>   static u32 hfi_buffer_bin_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>   {
>   	u32 n_aligned_w = ALIGN(frame_width, 16);
> @@ -111,6 +152,32 @@ static u32 hfi_buffer_bin_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_p
>   	return size_h265d_hw_bin_buffer(n_aligned_w, n_aligned_h, num_vpp_pipes);
>   }
>   
> +static u32 size_h265d_hw_bin_buffer_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 size_yuv, size_bin_hdr, size_bin_res;
> +
> +	size_yuv = ((frame_width * frame_height * 3) >> 1);
> +	if (size_yuv <= ((BIN_BUFFER_THRESHOLD * 3) >> 1)) {
> +		size_bin_hdr = size_yuv * H265_CABAC_HDR_RATIO_SM_TOT;
> +		size_bin_res = size_yuv * H265_CABAC_RES_RATIO_SM_TOT;
> +	} else {
> +		size_bin_hdr = (size_yuv * 41) / 50;
> +		size_bin_res = (size_yuv * 59) / 50;
> +	}
> +	size_bin_hdr = ALIGN(size_bin_hdr, DMA_ALIGNMENT);
> +	size_bin_res = ALIGN(size_bin_res, DMA_ALIGNMENT);
> +
> +	return size_bin_hdr + size_bin_res;
> +}
> +
> +static u32 hfi_buffer_bin_h265d_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 n_aligned_w = ALIGN(frame_width, 16);
> +	u32 n_aligned_h = ALIGN(frame_height, 16);
> +
> +	return size_h265d_hw_bin_buffer_ar50lt(n_aligned_w, n_aligned_h, num_vpp_pipes);
> +}
> +
>   static u32 hfi_buffer_comv_h264d(u32 frame_width, u32 frame_height, u32 _comv_bufcount)
>   {
>   	u32 frame_height_in_mbs = DIV_ROUND_UP(frame_height, 16);
> @@ -174,6 +241,14 @@ static u32 size_h264d_bse_cmd_buf(u32 frame_height)
>   		SIZE_H264D_BSE_CMD_PER_BUF;
>   }
>   
> +static u32 size_h264d_bse_cmd_buf_ar50lt(u32 frame_height)
> +{
> +	u32 height = ALIGN(frame_height, 32);
> +
> +	return min_t(u32, (DIV_ROUND_UP(height, 16) * 12), H264D_MAX_SLICE) *
> +		SIZE_H264D_BSE_CMD_PER_BUF;
> +}
> +
>   static u32 size_h265d_bse_cmd_buf(u32 frame_width, u32 frame_height)
>   {
>   	u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
> @@ -185,6 +260,18 @@ static u32 size_h265d_bse_cmd_buf(u32 frame_width, u32 frame_height)
>   	return _size;
>   }
>   
> +static u32 size_h265d_bse_cmd_buf_ar50lt(u32 frame_width, u32 frame_height)
> +{
> +	u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
> +			   (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) *
> +			    NUM_HW_PIC_BUF, DMA_ALIGNMENT);
> +
> +	_size = min_t(u32, _size, H265D_MAX_SLICE_AR50LT + 1);
> +	_size = 2 * _size * SIZE_H265D_BSE_CMD_PER_BUF;
> +
> +	return _size;
> +}
> +
>   static u32 hfi_buffer_persist_h265d(u32 rpu_enabled)
>   {
>   	return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 +
> @@ -195,6 +282,13 @@ static u32 hfi_buffer_persist_h265d(u32 rpu_enabled)
>   		     DMA_ALIGNMENT);
>   }
>   
> +static u32 hfi_buffer_persist_h265d_ar50lt(void)
> +{
> +	return ALIGN((SIZE_SLIST_BUF_H265 * NUM_SLIST_BUF_H265 +
> +		      H265_NUM_TILE * sizeof(u32) + NUM_HW_PIC_BUF * SIZE_SEI_USERDATA),
> +		     DMA_ALIGNMENT);
> +}
> +
>   static inline
>   u32 hfi_iris3_vp9d_comv_size(void)
>   {
> @@ -212,6 +306,13 @@ static u32 hfi_buffer_persist_vp9d(void)
>   		HDR10_HIST_EXTRADATA_SIZE;
>   }
>   
> +static u32 hfi_buffer_persist_vp9d_ar50lt(void)
> +{
> +	return ALIGN(VP9_NUM_PROBABILITY_TABLE_BUF * VP9_PROB_TABLE_SIZE, DMA_ALIGNMENT) +
> +		ALIGN(hfi_iris3_vp9d_comv_size(), DMA_ALIGNMENT) +
> +		ALIGN(MAX_SUPERFRAME_HEADER_LEN, DMA_ALIGNMENT);
> +}
> +
>   static u32 size_h264d_vpp_cmd_buf(u32 frame_height)
>   {
>   	u32 size, height = ALIGN(frame_height, 32);
> @@ -222,6 +323,16 @@ static u32 size_h264d_vpp_cmd_buf(u32 frame_height)
>   	return size > VPP_CMD_MAX_SIZE ? VPP_CMD_MAX_SIZE : size;
>   }
>   
> +static u32 size_h264d_vpp_cmd_buf_ar50lt(u32 frame_height)
> +{
> +	u32 size, height = ALIGN(frame_height, 32);
> +
> +	size = min_t(u32, (DIV_ROUND_UP(height, 16) * 12), H264D_MAX_SLICE) *
> +		SIZE_H264D_VPP_CMD_PER_BUF;
> +
> +	return size > VPP_CMD_MAX_SIZE ? VPP_CMD_MAX_SIZE : size;
> +}
> +
>   static u32 hfi_buffer_persist_h264d(void)
>   {
>   	return ALIGN(SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264 +
> @@ -230,6 +341,11 @@ static u32 hfi_buffer_persist_h264d(void)
>   		    DMA_ALIGNMENT);
>   }
>   
> +static u32 hfi_buffer_persist_h264d_ar50lt(void)
> +{
> +	return ALIGN((SIZE_SLIST_BUF_H264 * NUM_SLIST_BUF_H264), DMA_ALIGNMENT);
> +}
> +
>   static u32 hfi_buffer_persist_av1d(u32 max_width, u32 max_height, u32 total_ref_count)
>   {
>   	u32 comv_size, size;
> @@ -255,6 +371,17 @@ static u32 hfi_buffer_non_comv_h264d(u32 frame_width, u32 frame_height, u32 num_
>   	return ALIGN(size, DMA_ALIGNMENT);
>   }
>   
> +static u32 hfi_buffer_non_comv_h264d_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 size_bse = size_h264d_bse_cmd_buf_ar50lt(frame_height);
> +	u32 size_vpp = size_h264d_vpp_cmd_buf_ar50lt(frame_height);
> +	u32 size = ALIGN(size_bse, DMA_ALIGNMENT) +
> +		ALIGN(size_vpp, DMA_ALIGNMENT) +
> +		ALIGN(SIZE_HW_PIC(SIZE_H264D_HW_PIC_T), DMA_ALIGNMENT);
> +
> +	return ALIGN(size, DMA_ALIGNMENT);
> +}
> +
>   static u32 size_h265d_vpp_cmd_buf(u32 frame_width, u32 frame_height)
>   {
>   	u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
> @@ -269,6 +396,20 @@ static u32 size_h265d_vpp_cmd_buf(u32 frame_width, u32 frame_height)
>   	return _size;
>   }
>   
> +static u32 size_h265d_vpp_cmd_buf_ar50lt(u32 frame_width, u32 frame_height)
> +{
> +	u32 _size = ALIGN(((ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
> +			   (ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS)) *
> +			  NUM_HW_PIC_BUF, DMA_ALIGNMENT);
> +	_size = min_t(u32, _size, H265D_MAX_SLICE_AR50LT + 1);
> +	_size = ALIGN(_size, 4);
> +	_size = 2 * _size * SIZE_H265D_VPP_CMD_PER_BUF_AR50LT;
> +	if (_size > VPP_CMD_MAX_SIZE)
> +		_size = VPP_CMD_MAX_SIZE;
> +
> +	return _size;
> +}
> +
>   static u32 hfi_buffer_non_comv_h265d(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>   {
>   	u32 _size_bse = size_h265d_bse_cmd_buf(frame_width, frame_height);
> @@ -285,6 +426,20 @@ static u32 hfi_buffer_non_comv_h265d(u32 frame_width, u32 frame_height, u32 num_
>   	return ALIGN(_size, DMA_ALIGNMENT);
>   }
>   
> +static u32 hfi_buffer_non_comv_h265d_ar50lt(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	u32 _size_bse = size_h265d_bse_cmd_buf_ar50lt(frame_width, frame_height);
> +	u32 _size_vpp = size_h265d_vpp_cmd_buf_ar50lt(frame_width, frame_height);
> +	u32 _size = ALIGN(_size_bse, DMA_ALIGNMENT) +
> +		ALIGN(_size_vpp, DMA_ALIGNMENT) +
> +		ALIGN(2 * sizeof(u16) *
> +		(ALIGN(frame_width, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS) *
> +		(ALIGN(frame_height, LCU_MAX_SIZE_PELS) / LCU_MIN_SIZE_PELS), DMA_ALIGNMENT) +
> +		ALIGN(SIZE_HW_PIC(SIZE_H265D_HW_PIC_T), DMA_ALIGNMENT);
> +
> +	return ALIGN(_size, DMA_ALIGNMENT);
> +}
> +
>   static u32 size_vpss_lb(u32 frame_width, u32 frame_height)
>   {
>   	u32 opb_lb_wr_llb_y_buffer_size, opb_lb_wr_llb_uv_buffer_size;
> @@ -317,6 +472,13 @@ u32 size_h265d_lb_fe_top_data(u32 frame_width, u32 frame_height)
>   		(ALIGN(frame_width, 64) + 8) * 2;
>   }
>   
> +static inline
> +u32 size_h265d_lb_fe_top_data_ar50lt(u32 frame_width, u32 frame_height)
> +{
> +	return ALIGN(MAX_FE_NBR_DATA_LUMA_LINE_BUFFER_SIZE *
> +		(ALIGN(frame_width, 64) + 8), DMA_ALIGNMENT) * 2;
> +}
> +
>   static inline
>   u32 size_h265d_lb_fe_top_ctrl(u32 frame_width, u32 frame_height)
>   {
> @@ -348,6 +510,17 @@ u32 size_h265d_lb_se_left_ctrl(u32 frame_width, u32 frame_height)
>   		MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE));
>   }
>   
> +static inline
> +u32 size_h265d_lb_se_left_ctrl_ar50lt(u32 frame_width, u32 frame_height)
> +{
> +	return max_t(u32, ((frame_height + 16 - 1) / 8) *
> +		MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE_AR50LT,
> +		max_t(u32, ((frame_height + 32 - 1) / 8) *
> +		MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE_AR50LT,
> +		((frame_height + 64 - 1) / 8) *
> +		MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE_AR50LT));
> +}
> +
>   static inline
>   u32 size_h265d_lb_pe_top_data(u32 frame_width, u32 frame_height)
>   {
> @@ -355,6 +528,13 @@ u32 size_h265d_lb_pe_top_data(u32 frame_width, u32 frame_height)
>   		(ALIGN(frame_width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS);
>   }
>   
> +static inline
> +u32 size_h265d_lb_pe_top_data_ar50lt(u32 frame_width, u32 frame_height)
> +{
> +	return MAX_PE_NBR_DATA_LCU64_LINE_BUFFER_SIZE_AR50LT *
> +		(ALIGN(frame_width, LCU_MIN_SIZE_PELS) / LCU_MIN_SIZE_PELS);
> +}
> +
>   static inline
>   u32 size_h265d_lb_vsp_top(u32 frame_width, u32 frame_height)
>   {
> @@ -404,6 +584,29 @@ u32 hfi_buffer_line_h265d(u32 frame_width, u32 frame_height, bool is_opb, u32 nu
>   	return ALIGN((_size + vpss_lb_size), DMA_ALIGNMENT);
>   }
>   
> +static inline
> +u32 hfi_buffer_line_h265d_ar50lt(u32 frame_width, u32 frame_height, bool is_opb, u32 num_vpp_pipes)
> +{
> +	u32 size;
> +
> +	size = ALIGN(size_h265d_lb_fe_top_data_ar50lt(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_h265d_lb_fe_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_h265d_lb_fe_left_ctrl(frame_width, frame_height),
> +		      DMA_ALIGNMENT) * num_vpp_pipes +
> +		ALIGN(size_h265d_lb_se_left_ctrl_ar50lt(frame_width, frame_height),
> +		      DMA_ALIGNMENT) * num_vpp_pipes +
> +		ALIGN(size_h265d_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_h265d_lb_pe_top_data_ar50lt(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_h265d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_h265d_lb_vsp_left(frame_width, frame_height),
> +		      DMA_ALIGNMENT) * num_vpp_pipes +
> +		ALIGN(size_h265d_lb_recon_dma_metadata_wr(frame_width, frame_height),
> +		      DMA_ALIGNMENT) * 4 +
> +		ALIGN(size_h265d_qp(frame_width, frame_height), DMA_ALIGNMENT);
> +
> +	return ALIGN(size, DMA_ALIGNMENT);
> +}
> +
>   static inline
>   u32 size_vpxd_lb_fe_left_ctrl(u32 frame_width, u32 frame_height)
>   {
> @@ -438,6 +641,17 @@ u32 size_vpxd_lb_se_left_ctrl(u32 frame_width, u32 frame_height)
>   			   MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE));
>   }
>   
> +static inline
> +u32 size_vpxd_lb_se_left_ctrl_ar50lt(u32 frame_width, u32 frame_height)
> +{
> +	return max_t(u32, ((frame_height + 15) >> 4) *
> +		     MAX_SE_NBR_CTRL_LCU16_LINE_BUFFER_SIZE_AR50LT,
> +		     max_t(u32, ((frame_height + 31) >> 5) *
> +			   MAX_SE_NBR_CTRL_LCU32_LINE_BUFFER_SIZE_AR50LT,
> +			   ((frame_height + 63) >> 6) *
> +			   MAX_SE_NBR_CTRL_LCU64_LINE_BUFFER_SIZE_AR50LT));
> +}
> +
>   static inline
>   u32 size_vpxd_lb_recon_dma_metadata_wr(u32 frame_width, u32 frame_height)
>   {
> @@ -492,6 +706,19 @@ u32 hfi_iris3_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
>   		ALIGN(size_vp9d_qp(frame_width, frame_height), DMA_ALIGNMENT);
>   }
>   
> +static inline
> +u32 hfi_ar50lt_vp9d_lb_size(u32 frame_width, u32 frame_height, u32 num_vpp_pipes)
> +{
> +	return ALIGN(size_vpxd_lb_fe_left_ctrl(frame_width, frame_height), DMA_ALIGNMENT) *
> +		num_vpp_pipes +
> +		ALIGN(size_vpxd_lb_se_left_ctrl_ar50lt(frame_width, frame_height), DMA_ALIGNMENT) *
> +		num_vpp_pipes +
> +		ALIGN(size_vp9d_lb_vsp_top(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_vpxd_lb_se_top_ctrl(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_vp9d_lb_pe_top_data(frame_width, frame_height), DMA_ALIGNMENT) +
> +		ALIGN(size_vp9d_lb_fe_top_data(frame_width, frame_height), DMA_ALIGNMENT);
> +}
> +
>   static inline
>   u32 hfi_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min, bool is_opb,
>   			 u32 num_vpp_pipes)
> @@ -507,6 +734,13 @@ u32 hfi_buffer_line_vp9d(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_mi
>   	return _lb_size + vpss_lb_size + 4096;
>   }
>   
> +static inline
> +u32 hfi_buffer_line_vp9d_ar50lt(u32 frame_width, u32 frame_height, u32 _yuv_bufcount_min,
> +				bool is_opb, u32 num_vpp_pipes)
> +{
> +	return hfi_ar50lt_vp9d_lb_size(frame_width, frame_height, num_vpp_pipes);

pls keep same name across like "hfi_buffer_line_vp9d_ar50lt" and 
"hfi_ar50lt_vp9d_lb_size" or combine these 2 apis, as the one just calls 
the other.

> +}
> +
>   static u32 hfi_buffer_line_h264d(u32 frame_width, u32 frame_height,
>   				 bool is_opb, u32 num_vpp_pipes)
>   {
> @@ -529,6 +763,25 @@ static u32 hfi_buffer_line_h264d(u32 frame_width, u32 frame_height,
>   	return ALIGN((size + vpss_lb_size), DMA_ALIGNMENT);
>   }
>   
> +static u32 hfi_buffer_line_h264d_ar50lt(u32 frame_width, u32 frame_height,
> +					bool is_opb, u32 num_vpp_pipes)
> +{
> +	u32 size;
> +
> +	size = ALIGN(size_h264d_lb_fe_top_data_ar50lt(frame_width), DMA_ALIGNMENT) +
> +		ALIGN(size_h264d_lb_fe_top_ctrl_ar50lt(frame_width), DMA_ALIGNMENT) +
> +		ALIGN(size_h264d_lb_fe_left_ctrl(frame_height), DMA_ALIGNMENT) * num_vpp_pipes +
> +		ALIGN(size_h264d_lb_se_top_ctrl_ar50lt(frame_width), DMA_ALIGNMENT) +
> +		ALIGN(size_h264d_lb_se_left_ctrl_ar50lt(frame_height), DMA_ALIGNMENT) *
> +		num_vpp_pipes +
> +		ALIGN(size_h264d_lb_pe_top_data_ar50lt(frame_width), DMA_ALIGNMENT) +
> +		ALIGN(size_h264d_lb_vsp_top(frame_width), DMA_ALIGNMENT) +
> +		ALIGN(size_h264d_lb_recon_dma_metadata_wr(frame_height), DMA_ALIGNMENT) * 2 +
> +		ALIGN(size_h264d_qp(frame_width, frame_height), DMA_ALIGNMENT);
> +
> +	return ALIGN(size, DMA_ALIGNMENT);
> +}
> +
>   static u32 size_av1d_lb_opb_wr1_nv12_ubwc(u32 frame_width, u32 frame_height)
>   {
>   	u32 size, y_width, y_width_a = 128;
> @@ -724,6 +977,23 @@ static u32 iris_vpu_dec_bin_size(struct iris_inst *inst)
>   	return 0;
>   }
>   
> +static u32 iris_vpu_ar50lt_dec_bin_size(struct iris_inst *inst)
> +{
> +	u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;
> +	struct v4l2_format *f = inst->fmt_src;
> +	u32 height = f->fmt.pix_mp.height;
> +	u32 width = f->fmt.pix_mp.width;
> +
> +	if (inst->codec == V4L2_PIX_FMT_H264)
> +		return hfi_buffer_bin_h264d_ar50lt(width, height, num_vpp_pipes);
> +	else if (inst->codec == V4L2_PIX_FMT_HEVC)
> +		return hfi_buffer_bin_h265d_ar50lt(width, height, num_vpp_pipes);
> +	else if (inst->codec == V4L2_PIX_FMT_VP9)
> +		return hfi_buffer_bin_vp9d_ar50lt(width, height, num_vpp_pipes);
> +
> +	return 0;
> +}
> +
>   static u32 iris_vpu_dec_comv_size(struct iris_inst *inst)
>   {
>   	u32 num_comv = VIDEO_MAX_FRAME;
> @@ -785,6 +1055,18 @@ static u32 iris_vpu_dec_persist_size(struct iris_inst *inst)
>   	return 0;
>   }
>   
> +static u32 iris_vpu_ar50lt_dec_persist_size(struct iris_inst *inst)
> +{
> +	if (inst->codec == V4L2_PIX_FMT_H264)
> +		return hfi_buffer_persist_h264d_ar50lt();
> +	else if (inst->codec == V4L2_PIX_FMT_HEVC)
> +		return hfi_buffer_persist_h265d_ar50lt();
> +	else if (inst->codec == V4L2_PIX_FMT_VP9)
> +		return hfi_buffer_persist_vp9d_ar50lt();
> +
> +	return 0;
> +}
> +
>   static u32 iris_vpu_dec_dpb_size(struct iris_inst *inst)
>   {
>   	if (iris_split_mode_enabled(inst))
> @@ -808,6 +1090,21 @@ static u32 iris_vpu_dec_non_comv_size(struct iris_inst *inst)
>   	return 0;
>   }
>   
> +static u32 iris_vpu_ar50lt_dec_non_comv_size(struct iris_inst *inst)
> +{
> +	u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;

neither of below api uses this variable ?

> +	struct v4l2_format *f = inst->fmt_src;
> +	u32 height = f->fmt.pix_mp.height;
> +	u32 width = f->fmt.pix_mp.width;
> +
> +	if (inst->codec == V4L2_PIX_FMT_H264)
> +		return hfi_buffer_non_comv_h264d_ar50lt(width, height, num_vpp_pipes);
> +	else if (inst->codec == V4L2_PIX_FMT_HEVC)
> +		return hfi_buffer_non_comv_h265d_ar50lt(width, height, num_vpp_pipes);
> +
> +	return 0;
> +}
> +
>   static u32 iris_vpu_dec_line_size(struct iris_inst *inst)
>   {
>   	u32 num_vpp_pipes = inst->core->iris_platform_data->num_vpp_pipe;
> @@ -833,6 +1130,29 @@ static u32 iris_vpu_dec_line_size(struct iris_inst *inst)
>   	return 0;
>   }
>   
Regards,
Vikash

^ permalink raw reply

* Re: [PATCH 26/37] drm: event-notifier: add mechanism to notify about hotplug events
From: Maxime Ripard @ 2026-06-24 15:09 UTC (permalink / raw)
  To: Luca Ceresoli
  Cc: Maarten Lankhorst, Thomas Zimmermann, David Airlie, Simona Vetter,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Laurent Pinchart,
	Jonas Karlman, Jernej Skrabec, Inki Dae, Jagan Teki,
	Marek Szyprowski, Marek Vasut, Stefan Agner, Frank Li,
	Sascha Hauer, Pengutronix Kernel Team, Fabio Estevam, Hui Pu,
	Ian Ray, Thomas Petazzoni, dri-devel, linux-kernel, imx,
	linux-arm-kernel
In-Reply-To: <DJ4EWMIX84PN.3VTENLJT2FWDH@bootlin.com>

[-- Attachment #1: Type: text/plain, Size: 1809 bytes --]

On Tue, Jun 09, 2026 at 11:30:52AM +0200, Luca Ceresoli wrote:
> On Mon Jun 8, 2026 at 2:13 PM CEST, Maxime Ripard wrote:
> > On Tue, May 19, 2026 at 12:37:43PM +0200, Luca Ceresoli wrote:
> >> In preparation for supporting DRM bridge hotplug, add an event notifier to
> >> allow interested parties to be notified about events they need to react to.
> >>
> >> For the initial implementation of bridge hotplug, two events are needed:
> >> bridge detach (happening in drm_bridge.c) and MIPI device attach to MIPI
> >> host (happening in drm_mipi_dsi.c).
> >>
> >> For this reason implement the event notifier in a new common file that
> >> event producers can easily use to send events.
> >>
> >> Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
> >
> > So, you claim in the commit message that it's about a hotplug event, but
> > the only events are the bridge being attached and detached, so not a
> > hotplug event?
> >
> > And why a bridge would want to be notified that itself (or another?)
> > bridge is being attached or detached?
> >
> > You need documentation, and a more descriptive commit message.
> 
> Yes.
> 
> But before that I need a decision about the scope we want to give to this
> notifier. It is a very generic module, with basically no dependencies, so
> people will try to add more unrelated events and we'd need to set a
> boundary.
> 
> Options:
> 
>  * Should it be for hotplug-related events only?
>  * Should it be for any DRM event that needs notifications?
>  * Something else?
> 
> I'm happy to write that in the docs and commit message once it's agreed, so
> any comments would be welcome right now.

At the end of the day, you'd be the first user of it, so you get to make
the rules, but they should be written down :)

Maxime

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 273 bytes --]

^ permalink raw reply

* Re: [PATCH v3 7/7] rust: io: mem: return DevresLt from IoMem/ExclusiveIoMem::into_devres()
From: Alexandre Courbot @ 2026-06-24 15:09 UTC (permalink / raw)
  To: Danilo Krummrich
  Cc: gregkh, rafael, ojeda, boqun, gary, bjorn3_gh, lossin, a.hindborg,
	aliceryhl, tmgross, ecourtney, m.wilczynski, david.m.ertman,
	ira.weiny, leon, daniel.almeida, bhelgaas, kwilczynski,
	driver-core, linux-kernel, nova-gpu, dri-devel, linux-pwm,
	rust-for-linux
In-Reply-To: <20260618230834.812007-8-dakr@kernel.org>

On Fri Jun 19, 2026 at 8:08 AM JST, Danilo Krummrich wrote:
> Implement ForLt and CovariantForLt for IoMem<'static, SIZE> and
> ExclusiveIoMem<'static, SIZE> so that DevresLt can shorten the stored
> 'static lifetime back to the caller's borrow lifetime.
>
> CovariantForLt is sound because both types only hold &'a Device<Bound>,
> which is covariant over 'a.
>
> Since DevresLt::new() handles the lifetime transmutation internally,
> into_devres() no longer needs an explicit transmute to 'static.
>
> Add DevresIoMem<SIZE> and DevresExclusiveIoMem<SIZE> type aliases.
>
> Signed-off-by: Danilo Krummrich <dakr@kernel.org>

Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>

^ permalink raw reply

* Re: [PATCH v2 3/3] iio: magnetometer: st_magn: honour st,fullscale-milligauss DT property
From: Jonathan Cameron @ 2026-06-24 15:09 UTC (permalink / raw)
  To: me
  Cc: github.com, Andy Shevchenko, David Lechner, Nuno Sá,
	Andy Shevchenko, Nathan Chancellor, Nick Desaulniers,
	Bill Wendling, Justin Stitt, Denis Ciocca, Lars-Peter Clausen,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley, Denis Ciocca,
	Linus Walleij, linux-iio, linux-kernel, llvm, devicetree
In-Reply-To: <edcf262b530a88ee602fa07edc8382ac@herrie.org>

On Wed, 24 Jun 2026 06:18:45 +0200
me@herrie.org wrote:

> On 2026-06-23 21:49, Andy Shevchenko wrote:
> > On Tue, Jun 23, 2026 at 08:29:16PM +0100, Jonathan Cameron wrote:  
> >> On Tue, 16 Jun 2026 15:02:06 +0200
> >> Herman van Hazendonk <github.com@herrie.org> wrote:
> >>   
> >> > The ST magnetometer core's common probe hardcodes fs_avl[0] -- the
> >> > highest-sensitivity full-scale supported by the chip -- as the
> >> > starting range. For the LSM303DLH that is +/-1.3 G; for the
> >> > LSM303DLHC and LSM303DLM it is +/-2 G; for the LIS3MDL it is +/-4 G.
> >> >
> >> > That is the right default for "minimal noise floor at a desk", but
> >> > it leaves no margin for boards that pick up appreciable DC bias from
> >> > nearby PCB structures. On the HP TouchPad (apq8060 / tenderloin) the
> >> > LSM303DLH magnetometer is mounted close enough to the surrounding
> >> > power planes that X reads back as the chip's 0xF000 overflow
> >> > sentinel (== -4096 raw, the value the chip publishes when the ADC
> >> > saturates) on every sample at the chip-default range, while Y and Z
> >> > fall well within the +/-1.3 G window.
> >> >
> >> > Parse the st,fullscale-milligauss device-tree property (documented
> >> > separately in dt-bindings/iio/st,st-sensors.yaml) in the
> >> > magnetometer common probe to select the initial fs_avl entry by its
> >> > mg value. The DT binding pins the accepted value set per compatible
> >> > via allOf/if-then enum clauses, so a malformed mg value fails
> >> > dt_binding_check rather than reaching the driver. Sensors with a
> >> > fixed full-scale (fs.addr == 0: LSM303AGR, LIS2MDL, IIS2MDC) have no
> >> > register to switch and the property is rejected outright for them
> >> > in the binding; the parse block is additionally gated on fs.addr as
> >> > defence in depth against stale DTBs.
> >> >
> >> > Per-sensor mg ranges are listed in st_magn_sensors_settings[]. For
> >> > LSM303DLH and LSM303DLHC/DLM the valid values are 1300, 1900, 2500,
> >> > 4000, 4700, 5600 and 8100; for LIS3MDL, LSM9DS1-magn and LSM303C-magn
> >> > they are 4000, 8000, 12000, 16000.
> >> >
> >> > Empirical scale sweep on the HP TouchPad confirmed that on this
> >> > board any fs_avl >= 1 produces non-saturated X readings:
> >> >
> >> >     scale (0.001 G/LSB)  | X raw    Y raw    Z raw
> >> >     --------------------+-------------------------------
> >> >             1.100        | -4096    44       46    (X saturated)
> >> >             0.855        |  -547    37       37    (clean)
> >> >             0.670        |  -433    94      103    (clean)
> >> >             0.450        |  -266    44       71    (clean)
> >> >             0.400        |  -235    34       65    (clean)
> >> >             0.330        |  -196    27       56    (clean)
> >> >             0.230        |  -145    15       40    (clean)
> >> >
> >> > 2500 mg is the natural choice for tenderloin: comfortably outside
> >> > the saturation regime while keeping useful precision for compass
> >> > applications.
> >> >
> >> > Assisted-by: Claude:claude-opus-4-7 sparse smatch clang-analyzer coccinelle checkpatch
> >> > Assisted-by: Sashiko:claude-opus-4-7  
> >> Hmm. First time I remember seeing Sashiko credited like this. Seems 
> >> like pretty much
> >> every patch series of any complexity would end up crediting sashiko.
> >> Out of curiosity were you just looking at reports, or were you running 
> >> it locally to
> >> help with development?  
> > 
> > I believe it's the second one, because LKML version uses Gemini (as far 
> > as I
> > understand the case). At least that's why I haven't commented on this 
> > tag.  
> I have the whole toolchain running locally to avoid too many submissions 
> and
> feedback from Sashiko with Gemini after submitting. For small drivers I 
> can run
> Gemini locally as well, usually stick with Claude Opus 4.7 since that's 
> what I
> have a subscription for. For very complicated and large drivers Claude 
> Opus
> tends to time out even with 1 or 2 hour, so I fall back to Claude Haiku 
> 4.5
> (which catches quite a few things, but is not as thorough as Opus or 
> Gemini).
> 
> Seeing Sashiko tends to provide different feedback on different rounds, 
> I try
> to only submit when Sashiko and all others are clean.
> 
> Hope this clarifies!
Thank!  I loose track of all these models :)  So the summary is very
useful. I've been meaning to get a similar flow in place myself for
checking local stuff (not that I'm doing any development at the moment)

Jonathan

> 
> Herman


^ permalink raw reply

* Re: [PATCH v2 2/2] arm64: dts: qcom: kaanapali: fix traceNoC probe issue
From: Jie Gan @ 2026-06-24 15:08 UTC (permalink / raw)
  To: Suzuki K Poulose, Konrad Dybcio, Bjorn Andersson, Konrad Dybcio,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley, Tingwei Zhang,
	Jingyi Wang, Abel Vesa, Mike Leach, James Clark, Leo Yan,
	Yuanfang Zhang
  Cc: linux-arm-msm, devicetree, linux-kernel, coresight,
	linux-arm-kernel
In-Reply-To: <471d7a92-3629-4274-a303-8906d3626037@arm.com>



On 6/24/2026 9:51 PM, Suzuki K Poulose wrote:
> On 24/06/2026 14:48, Jie Gan wrote:
>>
>>
>> On 6/24/2026 9:27 PM, Konrad Dybcio wrote:
>>> On 6/24/26 11:49 AM, Jie Gan wrote:
>>>> The AMBA bus attempts to read the CID/PID of a device before invoking
>>>> its probe function if the arm,primecell-periphid property is absent.
>>>> This causes a deferred probe issue for the TraceNoC device, as the
>>>> CID/PID cannot be read from the periphid register.
>>>
>>> Why does it probe defer?
>>>
>>
>> For an AMBA device, the periphid is mandatory for probing. In the 
>> amba_match function, AMBA attempts to read the periphid from the CID/ 
>> PID registers if the arm,primecell-periphid property is absent in the 
>> device tree. If this read fails, it returns -EPROBE_DEFER, and the 
>> probe ultimately fails.
> 
> Why does it fail ? power management ? hw broken ? Is it really AMBA or 
> do you pretend that to be an AMBA device by faking the CID/PID?

The CID reads as 0 from the register, which I suspect is a hardware 
design issue. I have not yet confirmed this with the hardware team. As a 
workaround, I provided a fake periphid via a DT property to bypass 
amba_read_periphid.


Leo commented in other thread:
 >>tnoc.c registers both an AMBA driver and a platform driver. Shouldn't 
 >>it
 >>be registered as a platform device instead?

The platform driver is intended for the interconnect TraceNoC device and 
is not designed to allocate an ATID. The issue is that the TPDM device 
borrows the ATID from the TraceNoC device, resulting in the ATID always 
being 0 when associated with an interconnect NoC device.

However, I believe it is acceptable to allocate an ATID for the itNoC 
device and the issue can be fixed with this way.

Thanks,
Jie

> 
> Suzuki
> 
> 
>> Most AMBA devices expose valid CID/PID registers, so specifying 
>> arm,primecell-periphid in the device tree is usually unnecessary. 
>> However, for the TraceNoC device in this case, AMBA cannot reliably 
>> read the periphid from the corresponding registers.
>>
>>> And is this required for all TNOC devices?
>>
>> So far, the TNOC device has been added to sm8750, Glymur, and 
>> Kaanapali platforms, and all exhibit probe failures due to the same 
>> root cause.
>>
>> I prefer to fix it on Kaanapali first.
>>
>> Thanks,
>> Jie
>>
>>>
>>> Konrad
>>
> 


^ permalink raw reply

* [PATCH v3 3/3] rust: sync: Use safe synchronize_rcu() abstraction in poll
From: Philipp Stanner @ 2026-06-24 15:07 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Daniel Almeida, Tamir Duberstein,
	Alexandre Courbot, Onur Özkan, Alexander Viro,
	Christian Brauner, Jan Kara, Lyude Paul, Paul E. McKenney,
	Frederic Weisbecker, Neeraj Upadhyay, Joel Fernandes,
	Josh Triplett, Uladzislau Rezki, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Christian Schrefl,
	Philipp Stanner
  Cc: rust-for-linux, linux-kernel, linux-fsdevel, rcu
In-Reply-To: <20260624150704.1504001-2-phasta@kernel.org>

We now have a safe wrapper for the foreign function synchronize_rcu().

Use it in poll.rs.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Onur Özkan <work@onurozkan.dev>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
---
 rust/kernel/sync/poll.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs
index 0ec985d560c8..30ebeea1702f 100644
--- a/rust/kernel/sync/poll.rs
+++ b/rust/kernel/sync/poll.rs
@@ -8,7 +8,11 @@
     bindings,
     fs::File,
     prelude::*,
-    sync::{CondVar, LockClassKey},
+    sync::{
+        CondVar,
+        LockClassKey,
+        rcu::synchronize_rcu, //
+    }, //
 };
 use core::{marker::PhantomData, ops::Deref};
 
@@ -99,8 +103,6 @@ fn drop(self: Pin<&mut Self>) {
         unsafe { bindings::__wake_up_pollfree(self.inner.wait_queue_head.get()) };
 
         // Wait for epoll items to be properly removed.
-        //
-        // SAFETY: Just an FFI call.
-        unsafe { bindings::synchronize_rcu() };
+        synchronize_rcu();
     }
 }
-- 
2.54.0


^ permalink raw reply related

* [PATCH v3 2/3] rust: revocable: Use safe synchronize_rcu() abstraction
From: Philipp Stanner @ 2026-06-24 15:07 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Daniel Almeida, Tamir Duberstein,
	Alexandre Courbot, Onur Özkan, Alexander Viro,
	Christian Brauner, Jan Kara, Lyude Paul, Paul E. McKenney,
	Frederic Weisbecker, Neeraj Upadhyay, Joel Fernandes,
	Josh Triplett, Uladzislau Rezki, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Christian Schrefl,
	Philipp Stanner
  Cc: rust-for-linux, linux-kernel, linux-fsdevel, rcu
In-Reply-To: <20260624150704.1504001-2-phasta@kernel.org>

We now have a safe wrapper for the foreign function synchronize_rcu().

Use it in revocable.rs.

Signed-off-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Onur Özkan <work@onurozkan.dev>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
---
 rust/kernel/revocable.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs
index 0f4ae673256d..f539603349f1 100644
--- a/rust/kernel/revocable.rs
+++ b/rust/kernel/revocable.rs
@@ -7,7 +7,11 @@
 
 use pin_init::Wrapper;
 
-use crate::{bindings, prelude::*, sync::rcu, types::Opaque};
+use crate::{
+    prelude::*,
+    sync::rcu,
+    types::Opaque, //
+};
 use core::{
     marker::PhantomData,
     ops::Deref,
@@ -161,8 +165,7 @@ unsafe fn revoke_internal<const SYNC: bool>(&self) -> bool {
 
         if revoke {
             if SYNC {
-                // SAFETY: Just an FFI call, there are no further requirements.
-                unsafe { bindings::synchronize_rcu() };
+                rcu::synchronize_rcu();
             }
 
             // SAFETY: We know `self.data` is valid because only one CPU can succeed the
-- 
2.54.0


^ permalink raw reply related

* [PATCH v3 1/3] rust: sync: Add abstraction for synchronize_rcu()
From: Philipp Stanner @ 2026-06-24 15:07 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Daniel Almeida, Tamir Duberstein,
	Alexandre Courbot, Onur Özkan, Alexander Viro,
	Christian Brauner, Jan Kara, Lyude Paul, Paul E. McKenney,
	Frederic Weisbecker, Neeraj Upadhyay, Joel Fernandes,
	Josh Triplett, Uladzislau Rezki, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Christian Schrefl,
	Philipp Stanner
  Cc: rust-for-linux, linux-kernel, linux-fsdevel, rcu
In-Reply-To: <20260624150704.1504001-2-phasta@kernel.org>

synchronize_rcu() is a frequently used C function which is always safe
to be called.

Add a safe abstraction for synchronize_rcu().

Signed-off-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Onur Özkan <work@onurozkan.dev>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Reviewed-by: Gary Guo <gary@garyguo.net>
---
 rust/kernel/sync/rcu.rs | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs
index a32bef6e490b..2bae76d229f0 100644
--- a/rust/kernel/sync/rcu.rs
+++ b/rust/kernel/sync/rcu.rs
@@ -50,3 +50,19 @@ fn drop(&mut self) {
 pub fn read_lock() -> Guard {
     Guard::new()
 }
+
+/// Wait for one RCU grace period.
+///
+/// Waits for all RCU read-side critical sections (such as those established by
+/// a [`rcu::Guard`]) at the moment of the function call to finish.
+///
+/// Does not prevent new read-side critical sections from starting, which may
+/// begin and run while this call is blocking.
+///
+/// Note that this is one of the RCU primitives which must not be called in
+/// atomic context.
+#[inline]
+pub fn synchronize_rcu() {
+    // SAFETY: `synchronize_rcu()` is always safe to be called from process context.
+    unsafe { bindings::synchronize_rcu() };
+}
-- 
2.54.0


^ permalink raw reply related

* [PATCH v3 0/3] Add and use abstraction for synchronize_rcu()
From: Philipp Stanner @ 2026-06-24 15:07 UTC (permalink / raw)
  To: Miguel Ojeda, Boqun Feng, Gary Guo, Björn Roy Baron,
	Benno Lossin, Andreas Hindborg, Alice Ryhl, Trevor Gross,
	Danilo Krummrich, Daniel Almeida, Tamir Duberstein,
	Alexandre Courbot, Onur Özkan, Alexander Viro,
	Christian Brauner, Jan Kara, Lyude Paul, Paul E. McKenney,
	Frederic Weisbecker, Neeraj Upadhyay, Joel Fernandes,
	Josh Triplett, Uladzislau Rezki, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, Zqiang, Christian Schrefl,
	Philipp Stanner
  Cc: rust-for-linux, linux-kernel, linux-fsdevel, rcu

Changes since v2:
  - Compromise docu for "RCU versus atomic context" a bit better
  - Add respective R-b's

Changes since v1:
  - Vertically format imports. (Onur)
  - Make the new function #[inline]. (Alice)
  - Add Alice's R-b.

Philipp Stanner (3):
  rust: sync: Add abstraction for synchronize_rcu()
  rust: revocable: Use safe synchronize_rcu() abstraction
  rust: sync: Use safe synchronize_rcu() abstraction in poll

 rust/kernel/revocable.rs |  9 ++++++---
 rust/kernel/sync/poll.rs | 10 ++++++----
 rust/kernel/sync/rcu.rs  | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 7 deletions(-)


base-commit: 43a393185e33e573a374c1d4f7ddf6481484ef8d
-- 
2.54.0


^ permalink raw reply

* Re: [PATCH v2 2/4] mm: drop stale folio_ref_count()==1 check in do_swap_page reuse logic
From: David Hildenbrand (Arm) @ 2026-06-24 15:07 UTC (permalink / raw)
  To: Barry Song (Xiaomi), akpm, linux-mm
  Cc: baoquan.he, chrisl, jp.kobryn, kasong, liam, linux-kernel, ljs,
	mhocko, nphamcs, rppt, shakeel.butt, shikemeng, surenb,
	usama.arif, vbabka, youngjun.park
In-Reply-To: <20260623231635.43086-3-baohua@kernel.org>

On 6/24/26 01:16, Barry Song (Xiaomi) wrote:
> The "we just allocated them without exposing them to the swapcache"
> case no longer exists, as Kairui has routed synchronous I/O through
> the swapcache as well in his series "unify swapin use swap cache and
> cleanup flags"[1]. As a result, folio_ref_count() should never be 1
> in this path, since at least two references are held (base ref plus
> swapcache). Remove the folio_ref_count()==1 check and update the
> comment accordingly.
> 
> [1] https://lore.kernel.org/all/20251220-swap-table-p2-v5-0-8862a265a033@tencent.com/
> 
> Acked-by: Usama Arif <usama.arif@linux.dev>
> Reviewed-by: Kairui Song <kasong@tencent.com>
> Reviewed-by: Baoquan He <baoquan.he@linux.dev>
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
> ---
>  mm/memory.c | 7 ++-----
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index f6848f4234a6..abd0adcf65f0 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -5049,12 +5049,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  
>  	/*
>  	 * Same logic as in do_wp_page(); however, optimize for pages that are

s/Same/Similar/ ?

> -	 * certainly not shared either because we just allocated them without
> -	 * exposing them to the swapcache or because the swap entry indicates
> -	 * exclusivity.
> +	 * certainly not because the swap entry indicates exclusivity.
>  	 */
> -	if (!folio_test_ksm(folio) &&
> -	    (exclusive || folio_ref_count(folio) == 1)) {
> +	if (!folio_test_ksm(folio) && exclusive) {

Hmm, but KSM folios should never have "exclusive" set. So I think you can drop
that as well (was only relevant with folio_ref_count==1 check IIRC).

-- 
Cheers,

David

^ permalink raw reply

* Re: [PATCH] nvdimm/btt: add endian conversion in dev_err in btt_log_read
From: Ben Dooks @ 2026-06-24 15:07 UTC (permalink / raw)
  To: Alison Schofield
  Cc: Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny, nvdimm,
	linux-kernel
In-Reply-To: <ajscAZsK9ulXov8w@aschofie-mobl2.lan>

On 24/06/2026 00:51, Alison Schofield wrote:
> On Mon, Jun 22, 2026 at 03:20:11PM +0100, Ben Dooks wrote:
>> The dev_err() call in btt_log_read() is passing a seq value
>> into dev_err() which is a __le32 without any conversion.
>>
>> Fix the following (prototype) sparse warnings:
>> drivers/nvdimm/btt.c:342:17: warning: incorrect type in argument 5 (different base types)
>> drivers/nvdimm/btt.c:342:17:    expected int
>> drivers/nvdimm/btt.c:342:17:    got restricted __le32 [usertype] seq
>> drivers/nvdimm/btt.c:342:17: warning: incorrect type in argument 6 (different base types)
>> drivers/nvdimm/btt.c:342:17:    expected int
>> drivers/nvdimm/btt.c:342:17:    got restricted __le32 [usertype] seq
> 
> Hi Ben,
> 
> Please revise the commit log.
> 
> The commit log is a message to all future readers, not a place to
> paste static analysis warnings and leave the user visible impact
> assumed, or as an exercise for the reader.
> 
> Prefer something like this:
> 
> 	When BTT log corruption is detected, btt_log_read() reports the
> 	sequence numbers of the two log entries. Those values are stored
> 	little-endian, so printing them without conversion can report
> 	byte-swapped sequence numbers on big-endian systems.
> 
> 	Convert the sequence numbers to CPU endianness before passing
> 	them to dev_err().
> 
> 	Issue reported by sparse.
> 
> 
> (There is no need for the sparse pastings.)

Thanks, I've posted a v2, with a reworded patch commit log.

The only comment is that I like the sparse warnings as it makes it
easier to search if there is a patch in flight for this.

-- 
Ben Dooks				http://www.codethink.co.uk/
Senior Engineer				Codethink - Providing Genius

https://www.codethink.co.uk/privacy.html

^ permalink raw reply

* [PATCH net-next] openvswitch: conntrack: annotate ct limit hlist traversal
From: Runyu Xiao @ 2026-06-24 15:01 UTC (permalink / raw)
  To: aconole, echaudro, i.maximets
  Cc: davem, edumazet, kuba, pabeni, horms, netdev, dev, linux-kernel,
	runyu.xiao, jianhao.xu

ct_limit_set() is documented as being called with ovs_mutex held. It
walks the ct limit hlist with hlist_for_each_entry_rcu(), but the
iterator does not currently pass the OVS lockdep condition used
elsewhere for RCU-protected OVS objects.

Pass lockdep_ovsl_is_held() to the iterator. This matches the function's
existing caller contract and lets CONFIG_PROVE_RCU_LIST distinguish the
ovs_mutex-protected update path from the RCU read-side ct_limit_get()
path.

This was found by our static analysis tool and then manually reviewed
against the current tree. In the reviewed CONFIG_PROVE_RCU_LIST triage
run, the writer-side ct limit update produced the expected "RCU-list
traversed in non-reader section!!" warning while ovs_mutex was held,
with the stack matching ct_limit_set() and ovs_ct_limit_set_zone_limit().
The change is limited to documenting the existing protection contract.

This is a lockdep annotation cleanup. It does not change the conntrack
limit list update or release behavior.

Signed-off-by: Runyu Xiao <runyu.xiao@seu.edu.cn>
---
 net/openvswitch/conntrack.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c6fd9c424e8f..95697d4e16e6 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -883,7 +883,8 @@ static void ct_limit_set(const struct ovs_ct_limit_info *info,
 	struct hlist_head *head;

 	head = ct_limit_hash_bucket(info, new_ct_limit->zone);
-	hlist_for_each_entry_rcu(ct_limit, head, hlist_node) {
+	hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
+				 lockdep_ovsl_is_held()) {
 		if (ct_limit->zone == new_ct_limit->zone) {
 			hlist_replace_rcu(&ct_limit->hlist_node,
 					  &new_ct_limit->hlist_node);
-- 
2.34.1

^ permalink raw reply related

* [PATCH] tools/mm: add thp_swap_allocator_test binary to .gitignore
From: Zenghui Yu @ 2026-06-24 15:06 UTC (permalink / raw)
  To: linux-mm, linux-kernel; +Cc: akpm, Zenghui Yu

Tell git to ignore the generated binary for thp_swap_allocator_test.c.

Signed-off-by: Zenghui Yu <zenghui.yu@linux.dev>
---
 tools/mm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/mm/.gitignore b/tools/mm/.gitignore
index 922879f93fc8..1446a659e540 100644
--- a/tools/mm/.gitignore
+++ b/tools/mm/.gitignore
@@ -2,3 +2,4 @@
 slabinfo
 page-types
 page_owner_sort
+thp_swap_allocator_test
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH net v2] net: sungem: fix probe error cleanup
From: Simon Horman @ 2026-06-24 15:06 UTC (permalink / raw)
  To: Ruoyu Wang
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, netdev, linux-kernel
In-Reply-To: <20260623025759.3468566-1-ruoyuw560@gmail.com>

On Tue, Jun 23, 2026 at 10:57:59AM +0800, Ruoyu Wang wrote:
> gem_init_one() calls gem_remove_one() when register_netdev() fails.
> gem_remove_one() unregisters and frees resources owned by the net_device,
> including the DMA block, MMIO mapping, PCI regions, and the net_device
> itself. gem_init_one() then falls through to its own cleanup labels and
> frees the same resources again.
> 
> Keep the register_netdev() error path in gem_init_one(): clear drvdata so
> PM/remove paths do not see a half-registered device, remove the NAPI
> instance added during probe, and let the existing cleanup labels release
> the resources once.
> 
> The issue was found by a local static-analysis checker for probe error
> paths. The reported path was manually inspected before sending this fix.
> 
> Compile-tested with CONFIG_SUNGEM=y. Runtime testing was not performed
> because no sungem hardware is available.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Ruoyu Wang <ruoyuw560@gmail.com>
> ---
> v2:
> - Add a Fixes tag.
> - Describe how the issue was found.
> - Add testing information.
> 
> v1: https://lore.kernel.org/netdev/20260620155326.80582-1-ruoyuw560@gmail.com/

Thanks for the update.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* [PATCH v2] nvdimm/btt: fix sequence endian in btt_log_read error print
From: Ben Dooks @ 2026-06-24 15:06 UTC (permalink / raw)
  To: Vishal Verma, Dan Williams, Dave Jiang, Alison Schofield,
	Ira Weiny, nvdimm, linux-kernel
  Cc: Ben Dooks

The error reporting in btt_log_read() prints sequence numbers out
from the log which are stored in little endian without any endian
conversion. Make sure these are passed throuhg endian convesion
before going to the kernel console so the user sees the correct
sequence number and to avoid any warnings from sparse due to
endian conversion.

Fix the following (prototype) sparse warnings:
drivers/nvdimm/btt.c:342:17: warning: incorrect type in argument 5 (different base types)
drivers/nvdimm/btt.c:342:17:    expected int
drivers/nvdimm/btt.c:342:17:    got restricted __le32 [usertype] seq
drivers/nvdimm/btt.c:342:17: warning: incorrect type in argument 6 (different base types)
drivers/nvdimm/btt.c:342:17:    expected int
drivers/nvdimm/btt.c:342:17:    got restricted __le32 [usertype] seq

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
---
v2: reworded commnit messae
---
 drivers/nvdimm/btt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 7e1112960d7f..e9d548442884 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -341,8 +341,9 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
 	if (old_ent < 0 || old_ent > 1) {
 		dev_err(to_dev(arena),
 				"log corruption (%d): lane %d seq [%d, %d]\n",
-				old_ent, lane, log.ent[arena->log_index[0]].seq,
-				log.ent[arena->log_index[1]].seq);
+				old_ent, lane,
+				le32_to_cpu(log.ent[arena->log_index[0]].seq),
+				le32_to_cpu(log.ent[arena->log_index[1]].seq));
 		/* TODO set error state? */
 		return -EIO;
 	}
-- 
2.37.2.352.g3c44437643


^ permalink raw reply related

* Re: [PATCH v6 1/3] regulator: dt-bindings: Add Unisoc SC2730 PMIC
From: Rob Herring @ 2026-06-24 15:05 UTC (permalink / raw)
  To: Mark Brown
  Cc: Otto Pflüger, Liam Girdwood, Krzysztof Kozlowski,
	Conor Dooley, Orson Zhai, Baolin Wang, Chunyan Zhang, Lee Jones,
	linux-kernel, devicetree, Krzysztof Kozlowski
In-Reply-To: <878f8f38-2294-4097-9d08-2aa04bfe773b@sirena.org.uk>

On Wed, Jun 24, 2026 at 9:13 AM Mark Brown <broonie@kernel.org> wrote:
>
> On Wed, Jun 24, 2026 at 08:06:13AM -0500, Rob Herring wrote:
> > On Sat, Jun 20, 2026 at 10:54:00AM +0200, Otto Pflüger wrote:
> > > Add bindings for the regulators found in the Spreadtrum/Unisoc SC2730
> > > PMIC, used e.g. with the UMS512 and UMS9230 SoCs.
> > >
> > > Signed-off-by: Otto Pflüger <otto.pflueger@abscue.de>
> > > Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
> > > ---
> > >  .../bindings/regulator/sprd,sc2730-regulator.yaml  | 44 ++++++++++++++++++++++
> > >  1 file changed, 44 insertions(+)
>
> > Applied for rc1 to fix the warnings.
>
> Warnings?

Well, one warning:

Documentation/devicetree/bindings/mfd/sprd,sc2731.yaml: Unresolvable
reference: /schemas/regulator/sprd,sc2730-regulator.yaml#

^ permalink raw reply

* Re: [PATCH v3 6/7] rust: pci: return DevresLt from Bar::into_devres()
From: Alexandre Courbot @ 2026-06-24 15:05 UTC (permalink / raw)
  To: Danilo Krummrich
  Cc: gregkh, rafael, ojeda, boqun, gary, bjorn3_gh, lossin, a.hindborg,
	aliceryhl, tmgross, ecourtney, m.wilczynski, david.m.ertman,
	ira.weiny, leon, daniel.almeida, bhelgaas, kwilczynski,
	driver-core, linux-kernel, nova-gpu, dri-devel, linux-pwm,
	rust-for-linux
In-Reply-To: <20260618230834.812007-7-dakr@kernel.org>

On Fri Jun 19, 2026 at 8:08 AM JST, Danilo Krummrich wrote:
> Implement ForLt and CovariantForLt for Bar<'static, SIZE> so that
> DevresLt can shorten the stored 'static lifetime back to the caller's
> borrow lifetime.
>
> CovariantForLt is sound because Bar<'a, SIZE> only holds &'a
> Device<Bound>, which is covariant over 'a.
>
> Since DevresLt::new() handles the lifetime transmutation internally,
> into_devres() no longer needs an explicit transmute to Bar<'static>.
>
> Add a DevresBar<SIZE> type alias for convenience.
>
> Signed-off-by: Danilo Krummrich <dakr@kernel.org>

Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>

^ permalink raw reply

* Re: [PATCH 6.1 337/522] arm64/mm: Enable batched TLB flush in unmap_hotplug_range()
From: Ryan Roberts @ 2026-06-24 15:05 UTC (permalink / raw)
  To: Will Deacon, Ben Hutchings
  Cc: Anshuman Khandual, Catalin Marinas, David Hildenbrand (Arm),
	patches, linux-arm-kernel, linux-kernel, Sasha Levin,
	Greg Kroah-Hartman, stable, mark.rutland
In-Reply-To: <ajqXWqiAol6Shdd6@willie-the-truck>

On 23/06/2026 15:25, Will Deacon wrote:
> On Sun, Jun 21, 2026 at 05:02:27PM +0200, Ben Hutchings wrote:
>> On Tue, 2026-06-16 at 20:28 +0530, Greg Kroah-Hartman wrote:
>>> 6.1-stable review patch.  If anyone has any objections, please let me know.
>>>
>>> ------------------
>>>
>>> From: Anshuman Khandual <anshuman.khandual@arm.com>
>>>
>>> [ Upstream commit 48478b9f791376b4b89018d7afdfd06865498f65 ]
>> [...]
>>> @@ -949,15 +953,14 @@ static void unmap_hotplug_pmd_range(pud_
>>>  		WARN_ON(!pmd_present(pmd));
>>>  		if (pmd_sect(pmd)) {
>>>  			pmd_clear(pmdp);
>>> -
>>> -			/*
>>> -			 * One TLBI should be sufficient here as the PMD_SIZE
>>> -			 * range is mapped with a single block entry.
>>> -			 */
>>> -			flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>>> -			if (free_mapped)
>>> +			if (free_mapped) {
>>> +				/* CONT blocks are not supported in the vmemmap */
>>> +				WARN_ON(pmd_cont(pmd));
>>> +				flush_tlb_kernel_range(addr, addr + PMD_SIZE);
>>
>> It wasn't clear to me from the commit message why this now adds PMD_SIZE
>> rather than PAGE_SIZE.  It seems like this change is fine for Linux
>> 6.13+ with a CPU that supports TLB range flushing, but otherwise results
>> in unnecessarily executing multiple TLB invalidations at intervals of
>> the base page size.
> 
> Hmm, the commit message also makes very little sense to me and so I don't
> understand why this patch has us doing multiple TLB invalidations when
> we run into a !cont, block mapping at the PMD level. The old comment
> (which this patch removes) should still apply afaict.
> 
> Anshuman, Ryan, any ideas what's going on here?

I think this change was probably my fault; Given the API is called
flush_tlb_kernel_range() it seemed like an abuse/hack to pretend we are only
flushing the first PAGE_SIZE of the range. But as I understand it, even if the
HW shatters a block mapping into multiple TLB entries, all of the entries
relating to the block mapping will be invalidated if just one of them intersects
the TLBI range/address. So it should be safe to reapply this hack.

Although ideally I think it would be better if this API took a stride argument;
then intent is clear.

What's the best way to handle this? Submit a patch for mainline that reverts
this part, then get it backported to stable (implying this current patch will
have been applied to stable)?

Thanks,
Ryan


> 
> Will


^ permalink raw reply

* [PATCH net] dt-bindings: net: renesas,ether: Drop example "ethernet-phy-ieee802.3-c22" fallback
From: Rob Herring (Arm) @ 2026-06-24 15:02 UTC (permalink / raw)
  To: Niklas Söderlund, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Krzysztof Kozlowski, Conor Dooley,
	Geert Uytterhoeven, Magnus Damm, Sergei Shtylyov
  Cc: netdev, linux-renesas-soc, devicetree, linux-kernel

Fix the Micrel PHY in the example which shouldn't have the
fallback "ethernet-phy-ieee802.3-c22" compatible:

Documentation/devicetree/bindings/net/renesas,ether.example.dtb: ethernet-phy@1 \
  (ethernet-phy-id0022.1537): compatible: ['ethernet-phy-id0022.1537', 'ethernet-phy-ieee802.3-c22'] is too long
        from schema $id: http://devicetree.org/schemas/net/micrel.yaml

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 Documentation/devicetree/bindings/net/renesas,ether.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml
index f0a52f47f95a..dd7187f12a67 100644
--- a/Documentation/devicetree/bindings/net/renesas,ether.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml
@@ -121,8 +121,7 @@ examples:
         #size-cells = <0>;
 
         phy1: ethernet-phy@1 {
-            compatible = "ethernet-phy-id0022.1537",
-                         "ethernet-phy-ieee802.3-c22";
+            compatible = "ethernet-phy-id0022.1537";
             reg = <1>;
             interrupt-parent = <&irqc0>;
             interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v2 02/13] mm/page_alloc: some renames to clarify alloc_flags scopes
From: Suren Baghdasaryan @ 2026-06-24 15:03 UTC (permalink / raw)
  To: Brendan Jackman
  Cc: Andrew Morton, Vlastimil Babka, Michal Hocko, Johannes Weiner,
	Zi Yan, Muchun Song, Oscar Salvador, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Mike Rapoport, Matthew Brost,
	Joshua Hahn, Rakie Kim, Byungchul Park, Ying Huang,
	Alistair Popple, Hao Li, Christoph Lameter, David Rientjes,
	Roman Gushchin, Sebastian Andrzej Siewior, Clark Williams,
	Steven Rostedt, Harry Yoo (Oracle), Gregory Price,
	Alexei Starovoitov, Matthew Wilcox, linux-mm, linux-kernel,
	linux-rt-devel
In-Reply-To: <20260622-alloc-trylock-v2-2-31f31367d420@google.com>

On Mon, Jun 22, 2026 at 3:01 AM Brendan Jackman <jackmanb@google.com> wrote:
>
> It's pretty confusing that:
>
> - The slowpath and fastpath have a totally distinct set of alloc_flags.
>
> - gfp_to_alloc_flags() sounds generic but it only influences the
>   slowpath.
>
> - prepare_alloc_pages() is generic in that it sets up the
>   alloc_context, but the alloc_flags it generates are only used for the
>   fastpath.

I understand you want to clarify the usage but this particular point
seems to be an implementation detail. IOW, if tomorrow
__alloc_frozen_pages_noprof() is changed to use alloc_flags when
calling __alloc_pages_slowpath(), would we be renaming it back? So, I
would suggest keeping alloc_flags as is in prepare_alloc_pages() and
its callers. The rest LGTM.

>
> Rename some variables to highlight which alloc_flags are
> fastpath-specific. Rename gfp_to_alloc_flags() to highlight that it's
> slowpath-specific.
>
> gfp_to_alloc_flags_cma()'s current name is actually fine, but rename it
> anyway, just for consistency.
>
> Signed-off-by: Brendan Jackman <jackmanb@google.com>
> ---
>  mm/page_alloc.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6c4eb6908bd95..bc05d75a41627 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3771,8 +3771,8 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
>  }
>
>  /* Must be called after current_gfp_context() which can change gfp_mask */
> -static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
> -                                                 unsigned int alloc_flags)
> +static inline unsigned int cma_alloc_flags(gfp_t gfp_mask,
> +                                          unsigned int alloc_flags)
>  {
>  #ifdef CONFIG_CMA
>         if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
> @@ -4471,7 +4471,7 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
>  }
>
>  static inline unsigned int
> -gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
> +slowpath_alloc_flags(gfp_t gfp_mask, unsigned int order)
>  {
>         unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
>
> @@ -4508,7 +4508,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
>         } else if (unlikely(rt_or_dl_task(current)) && in_task())
>                 alloc_flags |= ALLOC_MIN_RESERVE;
>
> -       alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
> +       alloc_flags = cma_alloc_flags(gfp_mask, alloc_flags);
>
>         if (defrag_mode)
>                 alloc_flags |= ALLOC_NOFRAGMENT;
> @@ -4774,7 +4774,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>          * kswapd needs to be woken up, and to avoid the cost of setting up
>          * alloc_flags precisely. So we do that now.
>          */
> -       alloc_flags = gfp_to_alloc_flags(gfp_mask, order);
> +       alloc_flags = slowpath_alloc_flags(gfp_mask, order);
>
>         /*
>          * We need to recalculate the starting point for the zonelist iterator
> @@ -4815,7 +4815,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>
>         reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
>         if (reserve_flags)
> -               alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags) |
> +               alloc_flags = cma_alloc_flags(gfp_mask, reserve_flags) |
>                                           (alloc_flags & ALLOC_KSWAPD);
>
>         /*
> @@ -5017,7 +5017,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>  static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>                 int preferred_nid, nodemask_t *nodemask,
>                 struct alloc_context *ac, gfp_t *alloc_gfp,
> -               unsigned int *alloc_flags)
> +               unsigned int *fastpath_alloc_flags)
>  {
>         ac->highest_zoneidx = gfp_zone(gfp_mask);
>         ac->zonelist = node_zonelist(preferred_nid, gfp_mask);
> @@ -5033,7 +5033,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>                 if (in_task() && !ac->nodemask)
>                         ac->nodemask = &cpuset_current_mems_allowed;
>                 else
> -                       *alloc_flags |= ALLOC_CPUSET;
> +                       *fastpath_alloc_flags |= ALLOC_CPUSET;
>         }
>
>         might_alloc(gfp_mask);
> @@ -5042,11 +5042,11 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>          * Don't invoke should_fail logic, since it may call
>          * get_random_u32() and printk() which need to spin_lock.
>          */
> -       if (!(*alloc_flags & ALLOC_NOLOCK) &&
> +       if (!(*fastpath_alloc_flags & ALLOC_NOLOCK) &&
>             should_fail_alloc_page(gfp_mask, order))
>                 return false;
>
> -       *alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags);
> +       *fastpath_alloc_flags = cma_alloc_flags(gfp_mask, *fastpath_alloc_flags);
>
>         /* Dirty zone balancing only done in the fast path */
>         ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
> @@ -5260,7 +5260,7 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
>                 int preferred_nid, nodemask_t *nodemask)
>  {
>         struct page *page;
> -       unsigned int alloc_flags = ALLOC_WMARK_LOW;
> +       unsigned int fastpath_alloc_flags = ALLOC_WMARK_LOW;
>         gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
>         struct alloc_context ac = { };
>
> @@ -5282,17 +5282,17 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
>         gfp = current_gfp_context(gfp);
>         alloc_gfp = gfp;
>         if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
> -                       &alloc_gfp, &alloc_flags))
> +                       &alloc_gfp, &fastpath_alloc_flags))
>                 return NULL;
>
>         /*
>          * Forbid the first pass from falling back to types that fragment
>          * memory until all local zones are considered.
>          */
> -       alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
> +       fastpath_alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
>
>         /* First allocation attempt */
> -       page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
> +       page = get_page_from_freelist(alloc_gfp, order, fastpath_alloc_flags, &ac);
>         if (likely(page))
>                 goto out;
>
>
> --
> 2.54.0
>

^ permalink raw reply

* [ndctl PATCH RESEND] test/cxl-mbox: Regression test for huge CXL_MEM_SEND_COMMAND out.size
From: Richard Cheng @ 2026-06-24 15:01 UTC (permalink / raw)
  To: dave, jic23, dave.jiang, alison.schofield, vishal.l.verma, djbw,
	danwilliams, nvdimm
  Cc: iweiny, ming.li, kobak, kaihengf, kees, newtonl, kristinc, mochs,
	linux-cxl, linux-kernel, Richard Cheng

Implement a regression test for unbounded kvzalloc() in the kernel's
cxl_mbox_cmd_ctor(), which a CXL_MEM_SEND_COMMAND with an out.size
greater than INT_MAX could drive into a size > INT_MAX kvmalloc() WARN.

libcxl's cxl_cmd_set_output_payload() rejects an out.size larger than
the mailbox payload_max, so the test crafts a raw struct
cxl_send_command and issues the CXL_MEM_SEND_COMMAND ioctl directly
against the cxl_test mock memdev.

The test is for a kernel bug fix [1].

[1]: https://lore.kernel.org/all/20260624144147.53997-1-icheng@nvidia.com/
Signed-off-by: Richard Cheng <icheng@nvidia.com>
---
 test/cxl-mbox.c  | 129 +++++++++++++++++++++++++++++++++++++++++++++++
 test/cxl-mbox.sh |  48 ++++++++++++++++++
 2 files changed, 177 insertions(+)
 create mode 100644 test/cxl-mbox.c
 create mode 100755 test/cxl-mbox.sh

diff --git a/test/cxl-mbox.c b/test/cxl-mbox.c
new file mode 100644
index 0000000..d81327b
--- /dev/null
+++ b/test/cxl-mbox.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2026 Nvidia Corporation. All rights reserved.
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <cxl/libcxl.h>
+#include <cxl/cxl_mem.h>
+
+static const char provider[] = "cxl_test";
+
+/*
+ * The cxl_test mock advertises a 4 KiB (SZ_4K) mailbox payload_size and
+ * IDENTIFY returns a full struct cxl_mbox_identify. Post-fix the kernel
+ * clamps the output allocation to payload_size and copies that many bytes
+ * back into out.payload, so the buffer must be >= payload_size. 64 KiB is
+ * comfortably above the mock's 4 KiB payload.
+ */
+#define OUT_BUF_SIZE	(64 * 1024)
+
+/*
+ * Regression for the unbounded kvzalloc() in cxl_mbox_cmd_ctor() driven by a
+ * huge CXL_MEM_SEND_COMMAND out.size. The kernel fix CLAMPS the output
+ * allocation to the mailbox payload_size; it does not reject the request.
+ * Assert the ioctl SUCCEEDS (no -ENOMEM) -- do NOT assert -EINVAL.
+ */
+static int test_cxl_mbox_huge_out_size(struct cxl_memdev *memdev)
+{
+	struct cxl_send_command c = { 0 };
+	const char *devname;
+	char path[256];
+	void *buf;
+	int fd, rc;
+
+	devname = cxl_memdev_get_devname(memdev);
+	if (!devname)
+		return -ENODEV;
+
+	snprintf(path, sizeof(path), "/dev/cxl/%s", devname);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		if (errno == ENOENT || errno == ENODEV)
+			return -ENODEV;
+		fprintf(stderr, "failed to open %s: %s\n", path,
+			strerror(errno));
+		return -errno;
+	}
+
+	buf = calloc(1, OUT_BUF_SIZE);
+	if (!buf) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	c.id = CXL_MEM_COMMAND_ID_IDENTIFY;
+	/*
+	 * 0x80000000 (2^31, > INT_MAX) is the proven reproducer that trips the
+	 * size > INT_MAX kvmalloc() WARN. out.size is __s32 in this vendored
+	 * UAPI; cast to avoid -Woverflow, the kernel reads the same 4 bytes
+	 * (kernel UAPI declares it __u32).
+	 */
+	c.out.size = (typeof(c.out.size))0x80000000U;
+	c.out.payload = (__u64)(uintptr_t)buf;
+
+	rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &c);
+
+	/* Pass iff the kernel clamped (success), not rejected. */
+	if (rc == 0 && c.retval == 0) {
+		rc = 0;
+		goto out;
+	}
+
+	fprintf(stderr,
+		"CXL_MEM_SEND_COMMAND huge out.size mishandled: rc=%d errno=%d retval=%u\n",
+		rc, errno, c.retval);
+	rc = -ENXIO;
+
+out:
+	free(buf);
+	close(fd);
+	return rc;
+}
+
+static int test_cxl_mbox(struct cxl_ctx *ctx, struct cxl_bus *bus)
+{
+	struct cxl_memdev *memdev;
+
+	cxl_memdev_foreach(ctx, memdev) {
+		if (cxl_memdev_get_bus(memdev) != bus)
+			continue;
+		return test_cxl_mbox_huge_out_size(memdev);
+	}
+
+	return -ENODEV;
+}
+
+int main(int argc, char *argv[])
+{
+	struct cxl_ctx *ctx;
+	struct cxl_bus *bus;
+	int rc;
+
+	rc = cxl_new(&ctx);
+	if (rc < 0)
+		return rc;
+
+	cxl_set_log_priority(ctx, LOG_DEBUG);
+
+	bus = cxl_bus_get_by_provider(ctx, provider);
+	if (!bus) {
+		fprintf(stderr, "%s: unable to find bus (%s)\n",
+			argv[0], provider);
+		rc = -ENODEV;
+		goto out;
+	}
+
+	rc = test_cxl_mbox(ctx, bus);
+
+out:
+	cxl_unref(ctx);
+	return rc;
+}
diff --git a/test/cxl-mbox.sh b/test/cxl-mbox.sh
new file mode 100755
index 0000000..67fecf5
--- /dev/null
+++ b/test/cxl-mbox.sh
@@ -0,0 +1,48 @@
+#!/bin/bash -Ex
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2026 Nvidia Corporation. All rights reserved.
+
+. $(dirname "$0")/common
+
+BIN="$TEST_PATH"/cxl-mbox
+rc=77
+# 237 is -ENODEV
+ERR_NODEV=237
+# TAINT_WARN is bit 9
+TAINT_WARN=512
+
+trap 'err $LINENO' ERR
+
+modprobe -r cxl_test 2>/dev/null
+modprobe cxl_test
+# cxl_test alone does not autoload the mock memdev module on this box
+modprobe cxl_mock_mem
+
+main()
+{
+	test -x "$BIN" || do_skip "no CXL mailbox test"
+
+	t0=$(cat /proc/sys/kernel/tainted)
+
+	rc=0
+	"$BIN" || rc=$?
+
+	t1=$(cat /proc/sys/kernel/tainted)
+
+	echo "status: $rc"
+	if [ "$rc" -eq "$ERR_NODEV" ]; then
+		do_skip "no cxl_test memdev"
+	elif [ "$rc" -ne 0 ]; then
+		echo "fail: $LINENO" && exit 1
+	fi
+
+	if (( (t1 & TAINT_WARN) && !(t0 & TAINT_WARN) )); then
+		echo "fail: $LINENO kernel WARN taint (bit 9) set" && exit 1
+	fi
+
+	_cxl_cleanup
+}
+
+{
+	main "$@"; exit "$?"
+}

base-commit: 8ad90e54f0ff4f7291e7f21d44d769d10f24e2b6
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v2 1/4] mm: avoid unnecessary lru drain for wp_can_reuse_anon_folio()
From: David Hildenbrand (Arm) @ 2026-06-24 15:02 UTC (permalink / raw)
  To: Barry Song (Xiaomi), akpm, linux-mm
  Cc: baoquan.he, chrisl, jp.kobryn, kasong, liam, linux-kernel, ljs,
	mhocko, nphamcs, rppt, shakeel.butt, shikemeng, surenb,
	usama.arif, vbabka, youngjun.park
In-Reply-To: <20260623231635.43086-2-baohua@kernel.org>

On 6/24/26 01:16, Barry Song (Xiaomi) wrote:
> We always unconditionally drain the LRU before retrying anon folio
> reuse in wp_can_reuse_anon_folio(). Instead, assume !LRU anon folios
> are in lru_cache, and use the refcount to avoid many unnecessary LRU
> drains.
> 
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Reviewed-by: Baoquan He <baoquan.he@linux.dev>
> Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
> ---
>  mm/memory.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index ff338c2abe92..f6848f4234a6 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4193,12 +4193,18 @@ static bool wp_can_reuse_anon_folio(struct folio *folio,
>  	 */
>  	if (folio_test_ksm(folio) || folio_ref_count(folio) > 3)
>  		return false;
> -	if (!folio_test_lru(folio))
> +	if (!folio_test_lru(folio)) {
> +		/*
> +		 * Assume folio is on lru_cache and holds a cache reference.
> +		 */
> +		if (folio_ref_count(folio) > 2 + folio_test_swapcache(folio))
> +			return false;

I'm not keen on making this function even uglier, so no, not like that.

We have the earlier "folio_ref_count(folio) > 3" check.

In which scenarios can you trigger this such that we would care?

If the answer is "I don't know" there is no reason for a change.

-- 
Cheers,

David

^ permalink raw reply

* Re: [PATCH] test/cxl-mbox: Regression test for huge CXL_MEM_SEND_COMMAND out.size
From: Richard Cheng @ 2026-06-24 15:01 UTC (permalink / raw)
  To: dave, jic23, dave.jiang, alison.schofield, vishal.l.verma, djbw,
	danwilliams, nvdimm
  Cc: iweiny, ming.li, kobak, kaihengf, kees, newtonl, kristinc, mochs,
	linux-cxl, linux-kernel
In-Reply-To: <20260624145843.55116-1-icheng@nvidia.com>

On Wed, Jun 24, 2026 at 10:58:43PM +0800, Richard Cheng wrote:
> Implement a regression test for unbounded kvzalloc() in the kernel's
> cxl_mbox_cmd_ctor(), which a CXL_MEM_SEND_COMMAND with an out.size
> greater than INT_MAX could drive into a size > INT_MAX kvmalloc() WARN.
> 
> libcxl's cxl_cmd_set_output_payload() rejects an out.size larger than
> the mailbox payload_max, so the test crafts a raw struct
> cxl_send_command and issues the CXL_MEM_SEND_COMMAND ioctl directly
> against the cxl_test mock memdev.
> 
> The test is for a kernel bug fix [1].
> 
> [1]: https://lore.kernel.org/all/20260624144147.53997-1-icheng@nvidia.com/
> Signed-off-by: Richard Cheng <icheng@nvidia.com>

Sorry, forgot to add the ndctl prefix, please ignore this one.
I'll resend it.

Best regards,
Richard Cheng.

> ---
>  test/cxl-mbox.c  | 129 +++++++++++++++++++++++++++++++++++++++++++++++
>  test/cxl-mbox.sh |  48 ++++++++++++++++++
>  2 files changed, 177 insertions(+)
>  create mode 100644 test/cxl-mbox.c
>  create mode 100755 test/cxl-mbox.sh
> 
> diff --git a/test/cxl-mbox.c b/test/cxl-mbox.c
> new file mode 100644
> index 0000000..d81327b
> --- /dev/null
> +++ b/test/cxl-mbox.c
> @@ -0,0 +1,129 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (C) 2026 Nvidia Corporation. All rights reserved.
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stddef.h>
> +#include <stdlib.h>
> +#include <syslog.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/ioctl.h>
> +#include <cxl/libcxl.h>
> +#include <cxl/cxl_mem.h>
> +
> +static const char provider[] = "cxl_test";
> +
> +/*
> + * The cxl_test mock advertises a 4 KiB (SZ_4K) mailbox payload_size and
> + * IDENTIFY returns a full struct cxl_mbox_identify. Post-fix the kernel
> + * clamps the output allocation to payload_size and copies that many bytes
> + * back into out.payload, so the buffer must be >= payload_size. 64 KiB is
> + * comfortably above the mock's 4 KiB payload.
> + */
> +#define OUT_BUF_SIZE	(64 * 1024)
> +
> +/*
> + * Regression for the unbounded kvzalloc() in cxl_mbox_cmd_ctor() driven by a
> + * huge CXL_MEM_SEND_COMMAND out.size. The kernel fix CLAMPS the output
> + * allocation to the mailbox payload_size; it does not reject the request.
> + * Assert the ioctl SUCCEEDS (no -ENOMEM) -- do NOT assert -EINVAL.
> + */
> +static int test_cxl_mbox_huge_out_size(struct cxl_memdev *memdev)
> +{
> +	struct cxl_send_command c = { 0 };
> +	const char *devname;
> +	char path[256];
> +	void *buf;
> +	int fd, rc;
> +
> +	devname = cxl_memdev_get_devname(memdev);
> +	if (!devname)
> +		return -ENODEV;
> +
> +	snprintf(path, sizeof(path), "/dev/cxl/%s", devname);
> +
> +	fd = open(path, O_RDWR);
> +	if (fd < 0) {
> +		if (errno == ENOENT || errno == ENODEV)
> +			return -ENODEV;
> +		fprintf(stderr, "failed to open %s: %s\n", path,
> +			strerror(errno));
> +		return -errno;
> +	}
> +
> +	buf = calloc(1, OUT_BUF_SIZE);
> +	if (!buf) {
> +		rc = -ENOMEM;
> +		goto out;
> +	}
> +
> +	c.id = CXL_MEM_COMMAND_ID_IDENTIFY;
> +	/*
> +	 * 0x80000000 (2^31, > INT_MAX) is the proven reproducer that trips the
> +	 * size > INT_MAX kvmalloc() WARN. out.size is __s32 in this vendored
> +	 * UAPI; cast to avoid -Woverflow, the kernel reads the same 4 bytes
> +	 * (kernel UAPI declares it __u32).
> +	 */
> +	c.out.size = (typeof(c.out.size))0x80000000U;
> +	c.out.payload = (__u64)(uintptr_t)buf;
> +
> +	rc = ioctl(fd, CXL_MEM_SEND_COMMAND, &c);
> +
> +	/* Pass iff the kernel clamped (success), not rejected. */
> +	if (rc == 0 && c.retval == 0) {
> +		rc = 0;
> +		goto out;
> +	}
> +
> +	fprintf(stderr,
> +		"CXL_MEM_SEND_COMMAND huge out.size mishandled: rc=%d errno=%d retval=%u\n",
> +		rc, errno, c.retval);
> +	rc = -ENXIO;
> +
> +out:
> +	free(buf);
> +	close(fd);
> +	return rc;
> +}
> +
> +static int test_cxl_mbox(struct cxl_ctx *ctx, struct cxl_bus *bus)
> +{
> +	struct cxl_memdev *memdev;
> +
> +	cxl_memdev_foreach(ctx, memdev) {
> +		if (cxl_memdev_get_bus(memdev) != bus)
> +			continue;
> +		return test_cxl_mbox_huge_out_size(memdev);
> +	}
> +
> +	return -ENODEV;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct cxl_ctx *ctx;
> +	struct cxl_bus *bus;
> +	int rc;
> +
> +	rc = cxl_new(&ctx);
> +	if (rc < 0)
> +		return rc;
> +
> +	cxl_set_log_priority(ctx, LOG_DEBUG);
> +
> +	bus = cxl_bus_get_by_provider(ctx, provider);
> +	if (!bus) {
> +		fprintf(stderr, "%s: unable to find bus (%s)\n",
> +			argv[0], provider);
> +		rc = -ENODEV;
> +		goto out;
> +	}
> +
> +	rc = test_cxl_mbox(ctx, bus);
> +
> +out:
> +	cxl_unref(ctx);
> +	return rc;
> +}
> diff --git a/test/cxl-mbox.sh b/test/cxl-mbox.sh
> new file mode 100755
> index 0000000..67fecf5
> --- /dev/null
> +++ b/test/cxl-mbox.sh
> @@ -0,0 +1,48 @@
> +#!/bin/bash -Ex
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2026 Nvidia Corporation. All rights reserved.
> +
> +. $(dirname "$0")/common
> +
> +BIN="$TEST_PATH"/cxl-mbox
> +rc=77
> +# 237 is -ENODEV
> +ERR_NODEV=237
> +# TAINT_WARN is bit 9
> +TAINT_WARN=512
> +
> +trap 'err $LINENO' ERR
> +
> +modprobe -r cxl_test 2>/dev/null
> +modprobe cxl_test
> +# cxl_test alone does not autoload the mock memdev module on this box
> +modprobe cxl_mock_mem
> +
> +main()
> +{
> +	test -x "$BIN" || do_skip "no CXL mailbox test"
> +
> +	t0=$(cat /proc/sys/kernel/tainted)
> +
> +	rc=0
> +	"$BIN" || rc=$?
> +
> +	t1=$(cat /proc/sys/kernel/tainted)
> +
> +	echo "status: $rc"
> +	if [ "$rc" -eq "$ERR_NODEV" ]; then
> +		do_skip "no cxl_test memdev"
> +	elif [ "$rc" -ne 0 ]; then
> +		echo "fail: $LINENO" && exit 1
> +	fi
> +
> +	if (( (t1 & TAINT_WARN) && !(t0 & TAINT_WARN) )); then
> +		echo "fail: $LINENO kernel WARN taint (bit 9) set" && exit 1
> +	fi
> +
> +	_cxl_cleanup
> +}
> +
> +{
> +	main "$@"; exit "$?"
> +}
> 
> base-commit: 8ad90e54f0ff4f7291e7f21d44d769d10f24e2b6
> -- 
> 2.43.0
> 

^ permalink raw reply

* Re: [PATCH v3 1/2] dt-bindings: iio: dac: Add AD5529R
From: Janani Sunil @ 2026-06-24 15:01 UTC (permalink / raw)
  To: Jonathan Cameron, Rodrigo Alencar
  Cc: Nuno Sá, Conor Dooley, Janani Sunil, Lars-Peter Clausen,
	Michael Hennerich, David Lechner, Nuno Sá, Andy Shevchenko,
	Rob Herring, Krzysztof Kozlowski, Conor Dooley, Philipp Zabel,
	Jonathan Corbet, Shuah Khan, linux-iio, devicetree, linux-kernel,
	linux-doc, Mark Brown
In-Reply-To: <20260623155732.318f34f2@jic23-huawei>


On 6/23/26 16:57, Jonathan Cameron wrote:
> On Tue, 23 Jun 2026 09:09:14 +0100
> Rodrigo Alencar <455.rodrigo.alencar@gmail.com> wrote:
>
>> On 22/06/26 13:20, Nuno Sá wrote:
>>> On Mon, Jun 22, 2026 at 12:51:20PM +0100, Rodrigo Alencar wrote:
>>>> On 22/06/26 11:29, Nuno Sá wrote:
>>>>> On Mon, Jun 22, 2026 at 10:24:05AM +0100, Rodrigo Alencar wrote:
>>>>>> On 21/06/26 15:33, Jonathan Cameron wrote:
>>>>>>> On Fri, 19 Jun 2026 16:54:11 +0100
>>>>>>> Nuno Sá <noname.nuno@gmail.com> wrote:
>>>>>>>    
>>>>>>>> On Fri, Jun 19, 2026 at 03:12:07PM +0100, Conor Dooley wrote:
>>>>>>>>> On Fri, Jun 19, 2026 at 02:01:08PM +0100, Nuno Sá wrote:
>>>>>>>>>> On Fri, Jun 19, 2026 at 12:40:54PM +0100, Conor Dooley wrote:
>>>>>>>>>>> On Fri, Jun 19, 2026 at 12:36:55PM +0100, Conor Dooley wrote:
>>>>>>>>>>>> On Fri, Jun 19, 2026 at 12:33:11PM +0200, Janani Sunil wrote:
>>>>>>>>>>>>> On 6/14/26 21:44, Jonathan Cameron wrote:
>>>>>>>>>>>>>> On Tue, 9 Jun 2026 16:47:23 +0200
>>>>>>>>>>>>>> Janani Sunil <jan.sun97@gmail.com> wrote:
>>>>>>>>>>>>>>      
>>>>>>>>>>>>>>> On 5/26/26 15:11, Rodrigo Alencar wrote:
>>>>>>>>>>>>>>>> On 26/05/19 05:42PM, Janani Sunil wrote:
>>>>>>>>>>>>>>>>> Devicetree bindings for AD5529R 16 channel 12/16 bit high voltage,
>>>>>>>>>>>>>>>>> buffered voltage output digital-to-analog converter (DAC) with an
>>>>>>>>>>>>>>>>> integrated precision reference.
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> Probably others may comment on that, but...
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> This parent node may support device addressing for multi-device support through
>>>>>>>>>>>>>>>> those ID pins. I suppose that each device may have its own power supplies or
>>>>>>>>>>>>>>>> other resources like the toggle pins or reset and enable.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> That way I suppose that an example would look like...
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +patternProperties:
>>>>>>>>>>>>>>>>> +  "^channel@([0-9]|1[0-5])$":
>>>>>>>>>>>>>>>>> +    type: object
>>>>>>>>>>>>>>>>> +    description: Child nodes for individual channel configuration
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    properties:
>>>>>>>>>>>>>>>>> +      reg:
>>>>>>>>>>>>>>>>> +        description: Channel number.
>>>>>>>>>>>>>>>>> +        minimum: 0
>>>>>>>>>>>>>>>>> +        maximum: 15
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +      adi,output-range-microvolt:
>>>>>>>>>>>>>>>>> +        description: |
>>>>>>>>>>>>>>>>> +          Output voltage range for this channel as [min, max] in microvolts.
>>>>>>>>>>>>>>>>> +          If not specified, defaults to 0V to 5V range.
>>>>>>>>>>>>>>>>> +        oneOf:
>>>>>>>>>>>>>>>>> +          - items:
>>>>>>>>>>>>>>>>> +              - const: 0
>>>>>>>>>>>>>>>>> +              - enum: [5000000, 10000000, 20000000, 40000000]
>>>>>>>>>>>>>>>>> +          - items:
>>>>>>>>>>>>>>>>> +              - const: -5000000
>>>>>>>>>>>>>>>>> +              - const: 5000000
>>>>>>>>>>>>>>>>> +          - items:
>>>>>>>>>>>>>>>>> +              - const: -10000000
>>>>>>>>>>>>>>>>> +              - const: 10000000
>>>>>>>>>>>>>>>>> +          - items:
>>>>>>>>>>>>>>>>> +              - const: -15000000
>>>>>>>>>>>>>>>>> +              - const: 15000000
>>>>>>>>>>>>>>>>> +          - items:
>>>>>>>>>>>>>>>>> +              - const: -20000000
>>>>>>>>>>>>>>>>> +              - const: 20000000
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    required:
>>>>>>>>>>>>>>>>> +      - reg
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    additionalProperties: false
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +required:
>>>>>>>>>>>>>>>>> +  - compatible
>>>>>>>>>>>>>>>>> +  - reg
>>>>>>>>>>>>>>>>> +  - vdd-supply
>>>>>>>>>>>>>>>>> +  - avdd-supply
>>>>>>>>>>>>>>>>> +  - hvdd-supply
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +dependencies:
>>>>>>>>>>>>>>>>> +  spi-cpha: [ spi-cpol ]
>>>>>>>>>>>>>>>>> +  spi-cpol: [ spi-cpha ]
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +allOf:
>>>>>>>>>>>>>>>>> +  - $ref: /schemas/spi/spi-peripheral-props.yaml#
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +unevaluatedProperties: false
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +examples:
>>>>>>>>>>>>>>>>> +  - |
>>>>>>>>>>>>>>>>> +    #include <dt-bindings/gpio/gpio.h>
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    spi {
>>>>>>>>>>>>>>>>> +        #address-cells = <1>;
>>>>>>>>>>>>>>>>> +        #size-cells = <0>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +        dac@0 {
>>>>>>>>>>>>>>>>> +            compatible = "adi,ad5529r-16";
>>>>>>>>>>>>>>>>> +            reg = <0>;
>>>>>>>>>>>>>>>>> +            spi-max-frequency = <25000000>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +            vdd-supply = <&vdd_regulator>;
>>>>>>>>>>>>>>>>> +            avdd-supply = <&avdd_regulator>;
>>>>>>>>>>>>>>>>> +            hvdd-supply = <&hvdd_regulator>;
>>>>>>>>>>>>>>>>> +            hvss-supply = <&hvss_regulator>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +            reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +            #address-cells = <1>;
>>>>>>>>>>>>>>>>> +            #size-cells = <0>;
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +            channel@0 {
>>>>>>>>>>>>>>>>> +                reg = <0>;
>>>>>>>>>>>>>>>>> +                adi,output-range-microvolt = <0 5000000>;
>>>>>>>>>>>>>>>>> +            };
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +            channel@1 {
>>>>>>>>>>>>>>>>> +                reg = <1>;
>>>>>>>>>>>>>>>>> +                adi,output-range-microvolt = <(-10000000) 10000000>;
>>>>>>>>>>>>>>>>> +            };
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +            channel@2 {
>>>>>>>>>>>>>>>>> +                reg = <2>;
>>>>>>>>>>>>>>>>> +                adi,output-range-microvolt = <0 40000000>;
>>>>>>>>>>>>>>>>> +            };
>>>>>>>>>>>>>>>>> +        };
>>>>>>>>>>>>>>>>> +    };
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 	spi {
>>>>>>>>>>>>>>>> 		#address-cells = <1>;
>>>>>>>>>>>>>>>> 		#size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 		multi-dac@0 {
>>>>>>>>>>>>>>>> 			compatible = "adi,ad5529r-16";
>>>>>>>>>>>>>>>> 			reg = <0>;
>>>>>>>>>>>>>>>> 			spi-max-frequency = <25000000>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 			#address-cells = <1>;
>>>>>>>>>>>>>>>> 			#size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 			dac@0 {
>>>>>>>>>>>>>>>> 				reg = <0>;
>>>>>>>>>>>>>>>> 				vdd-supply = <&vdd_regulator>;
>>>>>>>>>>>>>>>> 				avdd-supply = <&avdd_regulator>;
>>>>>>>>>>>>>>>> 				hvdd-supply = <&hvdd_regulator>;
>>>>>>>>>>>>>>>> 				hvss-supply = <&hvss_regulator>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				#address-cells = <1>;
>>>>>>>>>>>>>>>> 				#size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				channel@0 {
>>>>>>>>>>>>>>>> 					reg = <0>;
>>>>>>>>>>>>>>>> 					adi,output-range-microvolt = <0 5000000>;
>>>>>>>>>>>>>>>> 				};
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				channel@1 {
>>>>>>>>>>>>>>>> 					reg = <1>;
>>>>>>>>>>>>>>>> 					adi,output-range-microvolt = <(-10000000) 10000000>;
>>>>>>>>>>>>>>>> 				};
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				channel@2 {
>>>>>>>>>>>>>>>> 					reg = <2>;
>>>>>>>>>>>>>>>> 					adi,output-range-microvolt = <0 40000000>;
>>>>>>>>>>>>>>>> 				};
>>>>>>>>>>>>>>>> 			}
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 			dac@1 {
>>>>>>>>>>>>>>>> 				reg = <1>;
>>>>>>>>>>>>>>>> 				vdd-supply = <&vdd_regulator>;
>>>>>>>>>>>>>>>> 				avdd-supply = <&avdd_regulator>;
>>>>>>>>>>>>>>>> 				hvdd-supply = <&hvdd_regulator>;
>>>>>>>>>>>>>>>> 				hvss-supply = <&hvss_regulator>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				reset-gpios = <&gpio0 88 GPIO_ACTIVE_LOW>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				#address-cells = <1>;
>>>>>>>>>>>>>>>> 				#size-cells = <0>;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				channel@0 {
>>>>>>>>>>>>>>>> 					reg = <0>;
>>>>>>>>>>>>>>>> 					adi,output-range-microvolt = <0 5000000>;
>>>>>>>>>>>>>>>> 				};
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 				channel@1 {
>>>>>>>>>>>>>>>> 					reg = <1>;
>>>>>>>>>>>>>>>> 					adi,output-range-microvolt = <(-10000000) 10000000>;
>>>>>>>>>>>>>>>> 				};
>>>>>>>>>>>>>>>> 			}
>>>>>>>>>>>>>>>> 		};
>>>>>>>>>>>>>>>> 	};
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> then you might need something like:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 	patternProperties:
>>>>>>>>>>>>>>>> 		"^dac@[0-3]$":
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> and put most of the things under this node pattern.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> So the main driver that you're putting together might need to handle up to four instances.
>>>>>>>>>>>>>>>> Even if your current driver cannot handle this, the dt-bindings might need cover that.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Need to double check if each dac node needs a separate compatible, so you would maybe populate
>>>>>>>>>>>>>>>> a platform data to be shared with the child nodes, which would be a separate driver.
>>>>>>>>>>>>>>>> (not sure if it would make sense to mix and match ad5529r-16 and ad5529r-12).
>>>>>>>>>>>>>>> Hi Rodrigo,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thank you for looking at this.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> For now, I would prefer to keep the binding scoped to a single AD5529R device instance. The current
>>>>>>>>>>>>>>> hardware/use case we have only needs one device node and the driver is written around that model as well.
>>>>>>>>>>>>>>> While the device addressing pins could allow multi-device topology, we do not have an actual platform using
>>>>>>>>>>>>>>> that configuration at the moment, so I would prefer not to introduce an extra parent/child binding structure
>>>>>>>>>>>>>>> speculatively without a validating use case.
>>>>>>>>>>>>>> Interesting feature - kind of similar to address control on a typical i2c bus device, or
>>>>>>>>>>>>>> looking at it another way a kind of distributed SPI mux.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Challenge of a binding is we need to anticipate the future.  So I think we do need something
>>>>>>>>>>>>>> like Rodrigo is suggesting even if we only (for now) support a single instance in the driver.
>>>>>>>>>>>>>> That would leave the path open to supporting the addressing at a later date.
>>>>>>>>>>>>>> An alternative might be to look at it like a chained device setup. In those we pretend there
>>>>>>>>>>>>>> is just one device with a lot of channels etc.  The snag is that here things are more loosely
>>>>>>>>>>>>>> coupled whereas for those devices it tends to be you have to read / write the same register
>>>>>>>>>>>>>> in all devices in the chain as one big SPI message.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> +CC Mark Brown as he may know of some precedence for this feature. For his reference..
>>>>>>>>>>>>>> - Each of these device has 2 ID pins.  The SPI transfers have to contain the 2 bit
>>>>>>>>>>>>>> value that matches that or they are ignored.  Thus a single bus + 1 chip select can
>>>>>>>>>>>>>> be used to talk to 4 devices.  Question is what that looks like in device tree + I guess
>>>>>>>>>>>>>> longer term how to support it cleanly in SPI.
>>>>>>>>>>>> I'd swear I have seen this before, from some Microchip devices. Let me
>>>>>>>>>>>> see if I can find what I am thinking of...
>>>>>>>>>>>
>>>>>>>>>>> microchip,mcp3911 and microchip,mcp3564 both seem to do this with
>>>>>>>>>>> slightly different properties.
>>>>>>>>>>>
>>>>>>>>>>>    microchip,device-addr:
>>>>>>>>>>>      description: Device address when multiple MCP3911 chips are present on the same SPI bus.
>>>>>>>>>>>      $ref: /schemas/types.yaml#/definitions/uint32
>>>>>>>>>>>      enum: [0, 1, 2, 3]
>>>>>>>>>>>      default: 0
>>>>>>>>>>>
>>>>>>>>>>> and
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>    microchip,hw-device-address:
>>>>>>>>>>>      $ref: /schemas/types.yaml#/definitions/uint32
>>>>>>>>>>>      minimum: 0
>>>>>>>>>>>      maximum: 3
>>>>>>>>>>>      description:
>>>>>>>>>>>        The address is set on a per-device basis by fuses in the factory,
>>>>>>>>>>>        configured on request. If not requested, the fuses are set for 0x1.
>>>>>>>>>>>        The device address is part of the device markings to avoid
>>>>>>>>>>>        potential confusion. This address is coded on two bits, so four possible
>>>>>>>>>>>        addresses are available when multiple devices are present on the same
>>>>>>>>>>>        SPI bus with only one Chip Select line for all devices.
>>>>>>>>>>>        Each device communication starts by a CS falling edge, followed by the
>>>>>>>>>>>        clocking of the device address (BITS[7:6] - top two bits of COMMAND BYTE
>>>>>>>>>>>        which is first one on the wire).
>>>>>>>>>>>
>>>>>>>>>>> This sounds exactly like the sort of feature that you're dealing with
>>>>>>>>>>> here?
>>>>>>>>>>>      
>>>>>>>>>> The core idea yes but for this chip, things are a bit more annoying (but
>>>>>>>>>> Janani can correct me if I'm wrong). Here, each device can, in theory,
>>>>>>>>>> have it's own supplies, pins and at the very least, channels with maybe
>>>>>>>>>> different scales. That is why Janani is proposing dac nodes. Given I
>>>>>>>>>> honestly don't like much of that "adi,ad5529r-bus" compatible I wondered
>>>>>>>>>> about solving this at the spi level.
>>>>>>>>>>
>>>>>>>>>> Ah and to make it more annoying, we can also mix 12 and 16 bits variants
>>>>>>>>>> together in the same bus.
>>>>>>>>> I'm definitely missing something, because that property for the
>>>>>>>>> microchip devices is not impacted what else is on the bus. AFAICT, you
>>>>>>>>> could have an mcp3911 and an mcp3564 on the same bus even though both
>>>>>>>>> are completely different devices with different drivers. They have
>>>>>>>>> individual device nodes and their own supplies etc etc. These aren't
>>>>>>>>> per-channel properties on an adc or dac, they're per child device on a
>>>>>>>>> spi bus.
>>>>>>>> Maybe I'm the one missing something :). IIRC, spi would not allow two
>>>>>>>> devices on the same CS right? Because for this chip we would need
>>>>>>>> something like:
>>>>>>>>
>>>>>>>> spi {
>>>>>>>> 	dac@0 {
>>>>>>>> 		reg = <0>;
>>>>>>>> 		adi,pin-id = <0>;
>>>>>>>> 	};
>>>>>>>>
>>>>>>>> 	dac@1 {
>>>>>>>> 		reg = <0>; // which seems already problematic?
>>>>>>>> 		adi,pin-id <1>;
>>>>>>>> 	};
>>>>>>>>
>>>>>>>> 	...
>>>>>>>>
>>>>>>>> 	//up to 4
>>>>>>>> };
>>>>>>> Yeah. It's not clear to me how that works for the microchip devices
>>>>>>> (I suspect it doesn't!)
>>>>>>>
>>>>>>> Just thinking as I type, but could we do something a bit nasty with
>>>>>>> a gpio mux that doesn't actually switch but represents the GPIO being
>>>>>>> shared?  Given this is all tied to the spi bus that should all happen
>>>>>>> under serializing locks.
>>>>>>>
>>>>>>> Agreed though that this would be nicer as an SPI thing that let
>>>>>>> us specify that a single CS is share by multiple devices and their
>>>>>>> is some other signal acting to select which one we are talking to.
>>>>>>>    
>>>>>> If the device-addressing on the same chip-select is to be handled
>>>>>> by the spi framework, wouldn't we lose device-specific features?
>>>>>>
>>>>>> I understand that this multi-device feature is there mostly to extend the
>>>>>> channel count from 16 to 32, 48 or 64. I suppose the command:
>>>>>>
>>>>>> 	"MULTI DEVICE SW LDAC MODE"
>>>>>>
>>>>>> exists so that software can update channel values accross multiple devices.
>>>>> Right! You do have a point! I agree the main driver for a feature like
>>>>> this is likely to extend the channel count and effectively "aggregate"
>>>>> devices.
>>>>>
>>>>> But I would say that even with the spi solution the MULTI DEVICE stuff
>>>>> should be doable (as we still need a sort of adi,pin-id property).
>>>> I don't think we can have something like an IIO buffer shared by multiple
>>>> devices. Synchronizing separate devices would be doable with proper hardware
>>>> support for this (probably involving an FGPA).
>>> True!
>>>    
>>>>     
>>>>> But yes, I do feel that the whole feature is for aggregation so seeing
>>>>> one device with 32 channels is the expectation here? Rather than seeing
>>>>> two devices with 16 channels.
>>>> Yes, I think aggregation is the whole point there... so that the IIO driver
>>>> is multi-device-aware.
>>> Which makes me feel that different pins per device might be possible
>>> from an HW point of view but does not make much sense. For example, for
>>> the buffer example I would expect LDAC to be shared between all the
>>> devices.
>> That is why I would still suggest the multi-dac node in the middle...
>> the parent node can hold shared resources, while the dac children can
>> have their own, overriding or inheriting stuff.
>>
> Before going down that path I'd want confirmation this is something we
> actually think anyone will build.
>
> Jonathan

To directly answer your question- we currently do not have a platform that supports multi device topology with independent supplies or reset lines.
Given that, I agree to start with the parallel wiring assumption and defer per chip resource variation under there is a solid use case. I will also drop the "adi,resolution" proposal and proceed with "adi,device-addrs" in the AD5529R binding.
With all of the above, the proposed binding for the multi-device follow up series would look like:


     dac@0 {
         compatible = "adi,ad5529r-16";
         reg = <0>;
         adi,device-addrs = <0 1>;
         reset-gpios = <&gpio0 87 GPIO_ACTIVE_LOW>;
         vdd-supply  = <&vdd_reg>;
         hvdd-supply = <&hvdd_reg>;

         channel@0  { reg = <0>;  adi,output-range-microvolt = <0 5000000>; };
         channel@16 { reg = <16>; adi,output-range-microvolt = <0 40000000>; };
     };

Does this look reasonable to everyone?

Regards,
Janani Sunil


^ permalink raw reply

* Re: [BUG] KFENCE: use-after-free read in udp_tunnel_nic_device_sync_work
From: Eric Dumazet @ 2026-06-24 15:00 UTC (permalink / raw)
  To: Sam Sun
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, netdev,
	linux-kernel, syzkaller
In-Reply-To: <CAEkJfYMXsNuJjKWJ5nvvw0afSP77F0WWT0gfj2-sQM3VyZ0brQ@mail.gmail.com>

On Wed, Jun 24, 2026 at 7:46 AM Sam Sun <samsun1006219@gmail.com> wrote:
>

> So we are still freeing struct udp_tunnel_nic while its embedded work_struct
> is active. debugobjects catches this at kfree() before the active work gets a
> chance to run later and dereference the freed utn.
>
> My read is that the conversion from bitfields to atomic bitops removes the
> plain bitfield data race, but UDP_TUNNEL_NIC_WORK_PENDING is still only one
> boolean state. It can represent "some work is pending", but it cannot
> distinguish between:
>   idle
>   queued
>   running
>   running and queued again
>
> In particular, the workqueue core clears WORK_STRUCT_PENDING before invoking
> the worker. At that point the same work item can be queued again by
> udp_tunnel_nic_device_sync(). If an already running instance later executes:
>
>   clear_bit(UDP_TUNNEL_NIC_WORK_PENDING, &utn->flags);
>
> it can still clear the bit that was set for the requeued instance. Then
> udp_tunnel_nic_unregister() may observe UDP_TUNNEL_NIC_WORK_PENDING clear and
> free utn, even though debugobjects still sees utn->work as active.
>

-ETOOMANYBUGS

Ok, we could try to convert pending bit to a refcount.

diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 9944ed923ddfd10f9adf6ad788c0740daeaf2adb..2e14686f35896cb0caba3f8f587ef8b369090fbf
100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -3,6 +3,7 @@

 #include <linux/ethtool_netlink.h>
 #include <linux/netdevice.h>
+#include <linux/refcount.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
@@ -30,9 +31,8 @@ struct udp_tunnel_nic_table_entry {
  * @work:      async work for talking to hardware from process context
  * @dev:       netdev pointer
  * @lock:      protects all fields
- * @need_sync: at least one port start changed
- * @need_replay: space was freed, we need a replay of all ports
- * @work_pending: @work is currently scheduled
+ * @flags:     sync, replay flags
+ * @refcnt:    reference count
  * @n_tables:  number of tables under @entries
  * @missed:    bitmap of tables which overflown
  * @entries:   table of tables of ports currently offloaded
@@ -44,9 +44,11 @@ struct udp_tunnel_nic {

        struct mutex lock;

-       u8 need_sync:1;
-       u8 need_replay:1;
-       u8 work_pending:1;
+       unsigned long flags;
+#define UDP_TUNNEL_NIC_NEED_SYNC       0
+#define UDP_TUNNEL_NIC_NEED_REPLAY     1
+
+       refcount_t refcnt;

        unsigned int n_tables;
        unsigned long missed;
@@ -116,7 +118,7 @@ udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
                           unsigned int flag)
 {
        entry->flags |= flag;
-       utn->need_sync = 1;
+       set_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
 }

 static void
@@ -283,7 +285,7 @@ udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
 static void
 __udp_tunnel_nic_device_sync(struct net_device *dev, struct
udp_tunnel_nic *utn)
 {
-       if (!utn->need_sync)
+       if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
                return;

        if (dev->udp_tunnel_nic_info->sync_table)
@@ -291,21 +293,24 @@ __udp_tunnel_nic_device_sync(struct net_device
*dev, struct udp_tunnel_nic *utn)
        else
                udp_tunnel_nic_device_sync_by_port(dev, utn);

-       utn->need_sync = 0;
+       clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
        /* Can't replay directly here, in case we come from the tunnel driver's
         * notification - trying to replay may deadlock inside tunnel driver.
         */
-       utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
+       if (udp_tunnel_nic_should_replay(dev, utn))
+               set_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
+       else
+               clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
 }

 static void
 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
-       if (!utn->need_sync)
+       if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
                return;

-       queue_work(udp_tunnel_nic_workqueue, &utn->work);
-       utn->work_pending = 1;
+       if (queue_work(udp_tunnel_nic_workqueue, &utn->work))
+               refcount_inc(&utn->refcnt);
 }

 static bool
@@ -348,7 +353,7 @@ udp_tunnel_nic_has_collision(struct net_device
*dev, struct udp_tunnel_nic *utn,
                        if (!udp_tunnel_nic_entry_is_free(entry) &&
                            entry->port == ti->port &&
                            entry->type != ti->type) {
-                               __set_bit(i, &utn->missed);
+                               set_bit(i, &utn->missed);
                                return true;
                        }
                }
@@ -483,7 +488,7 @@ udp_tunnel_nic_add_new(struct net_device *dev,
struct udp_tunnel_nic *utn,
                 * are no devices currently which have multiple tables accepting
                 * the same tunnel type, and false positives are okay.
                 */
-               __set_bit(i, &utn->missed);
+               set_bit(i, &utn->missed);
        }

        return false;
@@ -552,7 +557,7 @@ static void __udp_tunnel_nic_reset_ntf(struct
net_device *dev)

        mutex_lock(&utn->lock);

-       utn->need_sync = false;
+       clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
        for (i = 0; i < utn->n_tables; i++)
                for (j = 0; j < info->tables[i].n_entries; j++) {
                        struct udp_tunnel_nic_table_entry *entry;
@@ -696,8 +701,8 @@ udp_tunnel_nic_flush(struct net_device *dev,
struct udp_tunnel_nic *utn)
        for (i = 0; i < utn->n_tables; i++)
                memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
                                                      sizeof(**utn->entries)));
-       WARN_ON(utn->need_sync);
-       utn->need_replay = 0;
+       WARN_ON(test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags));
+       clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
 }

 static void
@@ -713,8 +718,8 @@ udp_tunnel_nic_replay(struct net_device *dev,
struct udp_tunnel_nic *utn)
        for (i = 0; i < utn->n_tables; i++)
                for (j = 0; j < info->tables[i].n_entries; j++)
                        udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
-       utn->missed = 0;
-       utn->need_replay = 0;
+       bitmap_zero(&utn->missed, UDP_TUNNEL_NIC_MAX_TABLES);
+       clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);

        if (!info->shared) {
                udp_tunnel_get_rx_info(dev);
@@ -728,6 +733,25 @@ udp_tunnel_nic_replay(struct net_device *dev,
struct udp_tunnel_nic *utn)
                        udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
 }

+static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
+{
+       unsigned int i;
+
+       for (i = 0; i < utn->n_tables; i++)
+               kfree(utn->entries[i]);
+
+       if (utn->dev)
+               dev_put(utn->dev);
+
+       kfree(utn);
+}
+
+static void udp_tunnel_nic_put(struct udp_tunnel_nic *utn)
+{
+       if (refcount_dec_and_test(&utn->refcnt))
+               udp_tunnel_nic_free(utn);
+}
+
 static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
 {
        struct udp_tunnel_nic *utn =
@@ -736,14 +760,15 @@ static void
udp_tunnel_nic_device_sync_work(struct work_struct *work)
        rtnl_lock();
        mutex_lock(&utn->lock);

-       utn->work_pending = 0;
        __udp_tunnel_nic_device_sync(utn->dev, utn);

-       if (utn->need_replay)
+       if (test_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags))
                udp_tunnel_nic_replay(utn->dev, utn);

        mutex_unlock(&utn->lock);
        rtnl_unlock();
+
+       udp_tunnel_nic_put(utn);
 }

 static struct udp_tunnel_nic *
@@ -759,6 +784,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
        utn->n_tables = n_tables;
        INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
        mutex_init(&utn->lock);
+       refcount_set(&utn->refcnt, 1);

        for (i = 0; i < n_tables; i++) {
                utn->entries[i] = kzalloc_objs(*utn->entries[i],
@@ -776,15 +802,6 @@ udp_tunnel_nic_alloc(const struct
udp_tunnel_nic_info *info,
        return NULL;
 }

-static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
-{
-       unsigned int i;
-
-       for (i = 0; i < utn->n_tables; i++)
-               kfree(utn->entries[i]);
-       kfree(utn);
-}
-
 static int udp_tunnel_nic_register(struct net_device *dev)
 {
        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
@@ -863,6 +880,7 @@ static void
 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+       bool last = true;

        udp_tunnel_nic_lock(dev);

@@ -889,6 +907,7 @@ udp_tunnel_nic_unregister(struct net_device *dev,
struct udp_tunnel_nic *utn)
                        udp_tunnel_drop_rx_info(dev);
                        utn->dev = first->dev;
                        udp_tunnel_nic_unlock(dev);
+                       last = false;
                        goto release_dev;
                }

@@ -901,16 +920,11 @@ udp_tunnel_nic_unregister(struct net_device
*dev, struct udp_tunnel_nic *utn)
        udp_tunnel_nic_flush(dev, utn);
        udp_tunnel_nic_unlock(dev);

-       /* Wait for the work to be done using the state, netdev core will
-        * retry unregister until we give up our reference on this device.
-        */
-       if (utn->work_pending)
-               return;
-
-       udp_tunnel_nic_free(utn);
+       udp_tunnel_nic_put(utn);
 release_dev:
        dev->udp_tunnel_nic = NULL;
-       dev_put(dev);
+       if (!last)
+               dev_put(dev);
 }

 static int

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox