* Re: [PATCH v2 02/10] dmaengine: imx-sdma: fix spba-bus handling for i.MX8M
From: Shengjiu Wang @ 2026-04-02 3:33 UTC (permalink / raw)
To: Marco Felsch
Cc: Vinod Koul, Shawn Guo, Sascha Hauer, Pengutronix Kernel Team,
Fabio Estevam, Jiada Wang, dmaengine, imx, linux-arm-kernel,
linux-kernel
In-Reply-To: <20250911-v6-16-topic-sdma-v2-2-d315f56343b5@pengutronix.de>
On Fri, Sep 12, 2025 at 6:05 AM Marco Felsch <m.felsch@pengutronix.de> wrote:
>
> Starting with i.MX8M* devices there are multiple spba-busses so we can't
> just search the whole DT for the first spba-bus match and take it.
> Instead we need to check for each device to which bus it belongs and
> setup the spba_{start,end}_addr accordingly per sdma_channel.
>
> While on it, don't ignore errors from of_address_to_resource() if they
> are valid.
>
> Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
> ---
> drivers/dma/imx-sdma.c | 58 ++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 40 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
> index 3ecb917214b1268b148a29df697b780bc462afa4..56daaeb7df03986850c9c74273d0816700581dc0 100644
> --- a/drivers/dma/imx-sdma.c
> +++ b/drivers/dma/imx-sdma.c
> @@ -429,6 +429,8 @@ struct sdma_desc {
> * @event_mask: event mask used in p_2_p script
> * @watermark_level: value for gReg[7], some script will extend it from
> * basic watermark such as p_2_p
> + * @spba_start_addr: SDMA controller SPBA bus start address
> + * @spba_end_addr: SDMA controller SPBA bus end address
> * @shp_addr: value for gReg[6]
> * @per_addr: value for gReg[2]
> * @status: status of dma channel
> @@ -461,6 +463,8 @@ struct sdma_channel {
> dma_addr_t per_address, per_address2;
> unsigned long event_mask[2];
> unsigned long watermark_level;
> + u32 spba_start_addr;
> + u32 spba_end_addr;
> u32 shp_addr, per_addr;
> enum dma_status status;
> struct imx_dma_data data;
> @@ -534,8 +538,6 @@ struct sdma_engine {
> u32 script_number;
> struct sdma_script_start_addrs *script_addrs;
> const struct sdma_driver_data *drvdata;
> - u32 spba_start_addr;
> - u32 spba_end_addr;
> unsigned int irq;
> dma_addr_t bd0_phys;
> struct sdma_buffer_descriptor *bd0;
> @@ -1236,8 +1238,6 @@ static void sdma_channel_synchronize(struct dma_chan *chan)
>
> static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
> {
> - struct sdma_engine *sdma = sdmac->sdma;
> -
> int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML;
> int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16;
>
> @@ -1263,12 +1263,12 @@ static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
> swap(sdmac->event_mask[0], sdmac->event_mask[1]);
> }
>
> - if (sdmac->per_address2 >= sdma->spba_start_addr &&
> - sdmac->per_address2 <= sdma->spba_end_addr)
> + if (sdmac->per_address2 >= sdmac->spba_start_addr &&
> + sdmac->per_address2 <= sdmac->spba_end_addr)
> sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP;
>
> - if (sdmac->per_address >= sdma->spba_start_addr &&
> - sdmac->per_address <= sdma->spba_end_addr)
> + if (sdmac->per_address >= sdmac->spba_start_addr &&
> + sdmac->per_address <= sdmac->spba_end_addr)
> sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP;
>
> sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT;
> @@ -1447,6 +1447,31 @@ static void sdma_desc_free(struct virt_dma_desc *vd)
> kfree(desc);
> }
>
> +static int sdma_config_spba_slave(struct dma_chan *chan)
> +{
> + struct sdma_channel *sdmac = to_sdma_chan(chan);
> + struct device_node *spba_bus;
> + struct resource spba_res;
> + int ret;
> +
> + spba_bus = of_get_parent(chan->slave->of_node);
if the chan is requested by __dma_request_channel(), the chan->slave = NULL
Then there will be an issue here.
Best regards
Shengjiu Wang
> + /* Device doesn't belong to the spba-bus */
> + if (!of_device_is_compatible(spba_bus, "fsl,spba-bus"))
> + return 0;
> +
> + ret = of_address_to_resource(spba_bus, 0, &spba_res);
> + of_node_put(spba_bus);
> + if (ret) {
> + dev_err(sdmac->sdma->dev, "Failed to get spba-bus resources\n");
> + return -EINVAL;
> + }
> +
> + sdmac->spba_start_addr = spba_res.start;
> + sdmac->spba_end_addr = spba_res.end;
> +
> + return 0;
> +}
> +
> static int sdma_alloc_chan_resources(struct dma_chan *chan)
> {
> struct sdma_channel *sdmac = to_sdma_chan(chan);
> @@ -1527,6 +1552,8 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
>
> sdmac->event_id0 = 0;
> sdmac->event_id1 = 0;
> + sdmac->spba_start_addr = 0;
> + sdmac->spba_end_addr = 0;
>
> sdma_set_channel_priority(sdmac, 0);
>
> @@ -1837,6 +1864,7 @@ static int sdma_config(struct dma_chan *chan,
> {
> struct sdma_channel *sdmac = to_sdma_chan(chan);
> struct sdma_engine *sdma = sdmac->sdma;
> + int ret;
>
> memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg));
>
> @@ -1867,6 +1895,10 @@ static int sdma_config(struct dma_chan *chan,
> sdma_event_enable(sdmac, sdmac->event_id1);
> }
>
> + ret = sdma_config_spba_slave(chan);
> + if (ret)
> + return ret;
> +
> return 0;
> }
>
> @@ -2235,11 +2267,9 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
> static int sdma_probe(struct platform_device *pdev)
> {
> struct device_node *np = pdev->dev.of_node;
> - struct device_node *spba_bus;
> const char *fw_name;
> int ret;
> int irq;
> - struct resource spba_res;
> int i;
> struct sdma_engine *sdma;
> s32 *saddr_arr;
> @@ -2375,14 +2405,6 @@ static int sdma_probe(struct platform_device *pdev)
> dev_err(&pdev->dev, "failed to register controller\n");
> goto err_register;
> }
> -
> - spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus");
> - ret = of_address_to_resource(spba_bus, 0, &spba_res);
> - if (!ret) {
> - sdma->spba_start_addr = spba_res.start;
> - sdma->spba_end_addr = spba_res.end;
> - }
> - of_node_put(spba_bus);
> }
>
> /*
>
> --
> 2.47.3
>
>
^ permalink raw reply
* Re: [PATCH] dmaengine: idxd: fix double free in idxd_setup_groups() error path
From: Shuai Xue @ 2026-04-02 1:44 UTC (permalink / raw)
To: Guangshuo Li, Vinicius Costa Gomes, Dave Jiang, Vinod Koul,
Fenghua Yu, dmaengine, linux-kernel
Cc: stable
In-Reply-To: <20260401033622.1446904-1-lgs201920130244@gmail.com>
On 4/1/26 11:36 AM, Guangshuo Li wrote:
> When an error happens after device_initialize(), idxd_setup_groups()
> calls put_device(conf_dev).
>
> The device release callback idxd_conf_group_release() frees group, but
> the current error paths then call kfree(group) again, causing a double
> free.
>
> Keep the cleanup in idxd_conf_group_release() after put_device() and
> avoid freeing group again in idxd_setup_groups().
>
> Fixes: aa6f4f945b10 ("dmaengine: idxd: fix memory leak in error handling path of idxd_setup_groups")
> Cc: stable@vger.kernel.org
> Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
> ---
> drivers/dma/idxd/init.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index b782eb3c191d..d9a9d56dd277 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -374,7 +374,7 @@ static int idxd_setup_groups(struct idxd_device *idxd)
> rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
> if (rc < 0) {
> put_device(conf_dev);
> - kfree(group);
> +
> goto err;
> }
>
> @@ -399,7 +399,7 @@ static int idxd_setup_groups(struct idxd_device *idxd)
> while (--i >= 0) {
> group = idxd->groups[i];
> put_device(group_confdev(group));
> - kfree(group);
> +
> }
> kfree(idxd->groups);
>
Note that idxd_clean_groups() and idxd_clean_engines() have the same pattern.
It should be fixed in the same patch as well.
Thanks.
Shuai
^ permalink raw reply
* Re: [PATCH] dmaengine: idxd: fix double free in idxd_setup_engines() error path
From: Shuai Xue @ 2026-04-02 1:37 UTC (permalink / raw)
To: Guangshuo Li, Vinicius Costa Gomes, Dave Jiang, Vinod Koul,
Fenghua Yu, dmaengine, linux-kernel
Cc: stable
In-Reply-To: <20260401034029.1457489-1-lgs201920130244@gmail.com>
On 4/1/26 11:40 AM, Guangshuo Li wrote:
> When an error happens after device_initialize(), idxd_setup_engines()
> calls put_device(conf_dev).
>
> The device release callback idxd_conf_engine_release() frees engine,
> but the current error paths then call kfree(engine) again, causing a
> double free.
>
> Keep the cleanup in idxd_conf_engine_release() after put_device() and
> avoid freeing engine again in idxd_setup_engines().
>
> Fixes: 817bced19d1d ("dmaengine: idxd: fix memory leak in error handling path of idxd_setup_engines")
> Cc: stable@vger.kernel.org
> Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
> ---
> drivers/dma/idxd/init.c | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index d9a9d56dd277..4eff74182225 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -310,7 +310,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
> rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
> if (rc < 0) {
> put_device(conf_dev);
> - kfree(engine);
> +
> goto err;
> }
>
> @@ -324,7 +324,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
> engine = idxd->engines[i];
> conf_dev = engine_confdev(engine);
> put_device(conf_dev);
> - kfree(engine);
> +
> }
> kfree(idxd->engines);
>
> @@ -374,7 +374,6 @@ static int idxd_setup_groups(struct idxd_device *idxd)
> rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
> if (rc < 0) {
> put_device(conf_dev);
> -
> goto err;
> }
>
> @@ -399,7 +398,6 @@ static int idxd_setup_groups(struct idxd_device *idxd)
> while (--i >= 0) {
> group = idxd->groups[i];
> put_device(group_confdev(group));
> -
> }
> kfree(idxd->groups);
>
Nit: please remove the blank lines left behind by both deletions.
With those addressed:Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Thanks.
Shuai
^ permalink raw reply
* Re: [PATCH] dmaengine: idxd: fix double free in idxd_setup_groups() error path
From: Shuai Xue @ 2026-04-02 1:32 UTC (permalink / raw)
To: Guangshuo Li, Vinicius Costa Gomes, Dave Jiang, Vinod Koul,
Fenghua Yu, dmaengine, linux-kernel
Cc: stable
In-Reply-To: <20260401033622.1446904-1-lgs201920130244@gmail.com>
On 4/1/26 11:36 AM, Guangshuo Li wrote:
> When an error happens after device_initialize(), idxd_setup_groups()
> calls put_device(conf_dev).
>
> The device release callback idxd_conf_group_release() frees group, but
> the current error paths then call kfree(group) again, causing a double
> free.
>
> Keep the cleanup in idxd_conf_group_release() after put_device() and
> avoid freeing group again in idxd_setup_groups().
Yes, I think it is a double free.
Sorry, I missed the idxd_conf_group_release path.
>
> Fixes: aa6f4f945b10 ("dmaengine: idxd: fix memory leak in error handling path of idxd_setup_groups")
> Cc: stable@vger.kernel.org
> Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
> ---
> drivers/dma/idxd/init.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index b782eb3c191d..d9a9d56dd277 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -374,7 +374,7 @@ static int idxd_setup_groups(struct idxd_device *idxd)
> rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
> if (rc < 0) {
> put_device(conf_dev);
> - kfree(group);
> +
> goto err;
> }
>
> @@ -399,7 +399,7 @@ static int idxd_setup_groups(struct idxd_device *idxd)
> while (--i >= 0) {
> group = idxd->groups[i];
> put_device(group_confdev(group));
> - kfree(group);
> +
Nit: please remove the blank lines left behind by both deletions.
With those addressed:
Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Thanks.
Shuai
^ permalink raw reply
* Re: [PATCH] dmaengine: idxd: fix double free in idxd_alloc() error path
From: Vinicius Costa Gomes @ 2026-04-01 23:18 UTC (permalink / raw)
To: Guangshuo Li, Dave Jiang, Vinod Koul, Shuai Xue, Fenghua Yu,
dmaengine, linux-kernel
Cc: Guangshuo Li, stable
In-Reply-To: <20260401094003.1482794-1-lgs201920130244@gmail.com>
Hi,
Guangshuo Li <lgs201920130244@gmail.com> writes:
> When dev_set_name() fails after device_initialize(), idxd_alloc()
> calls put_device(conf_dev).
>
> For these devices, conf_dev->type is set from idxd->data->dev_type,
> which resolves to dsa_device_type or iax_device_type, and both use
> idxd_conf_device_release() as their release callback.
>
> That release callback frees idxd, idxd->opcap_bmap, and releases
> idxd->id, but the current error path then frees those resources again
> directly, causing a double free.
>
> Keep the cleanup in idxd_conf_device_release() after put_device() and
> avoid freeing idxd-managed resources again in idxd_alloc().
>
> Fixes: 46a5cca76c76 ("dmaengine: idxd: fix memory leak in error handling path of idxd_alloc")
> Cc: stable@vger.kernel.org
> Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
My preference is for the maintainer making the pull request to decide if
something should be sent to stable or not.
I was trying some AI review bot, I hope you don't mind, and got these
comments, went through them and they seemed good (including that these
patches should be sent as a series, that there are some more work to do
while you are cleaning the error paths), including it verbatim here:
This patch removes bitmap_free(idxd->opcap_bmap) after put_device()
in idxd_alloc()'s err_name path and adds a return NULL to prevent
falling through to the err_opcap and err_ida labels, avoiding
double-frees of opcap_bmap, ida, and idxd itself.
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index 4eff74182225..94ce52565e7a 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -635,7 +635,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
>
> err_name:
> put_device(conf_dev);
> - bitmap_free(idxd->opcap_bmap);
> + return NULL;
> err_opcap:
> ida_free(&idxd_ida, idxd->id);
> err_ida:
The double-free analysis is correct, but does the put_device() above
actually work here?
put_device(conf_dev) drops the refcount from 1 to 0 (no device_add()
was called, so nobody else holds a reference) and triggers the release
callback idxd_conf_device_release(), which does:
idxd_conf_device_release() {
destroy_workqueue(idxd->wq);
...
}
At this point in idxd_alloc(), idxd->wq is still NULL -- the
workqueue is created much later in idxd_setup_internals():
idxd_setup_internals() {
...
idxd->wq = create_workqueue(dev_name(dev));
...
}
destroy_workqueue() does not handle a NULL argument -- it immediately
dereferences the pointer:
destroy_workqueue(wq) {
workqueue_sysfs_unregister(wq);
mutex_lock(&wq->mutex); <-- NULL dereference
...
}
So put_device() here will oops before the double-free is even
reached. This is a pre-existing issue (the old code has the same
put_device call), but relying on idxd_conf_device_release() as the
cleanup path for a partially-initialized idxd_device doesn't work.
Would it make sense to skip put_device() and instead free only
what was allocated, similar to the err_opcap and err_ida labels?
Two more things worth noting about this series:
Patch 3 (idxd_setup_engines) includes hunks that remove blank lines
from idxd_setup_groups() -- lines that only exist after Patch 2 is
applied. These four patches should probably be sent as a numbered
series with an explicit ordering rather than as independent patches.
The same put_device()-then-kfree() pattern also exists in
idxd_clean_wqs(), idxd_clean_engines(), idxd_clean_groups(), and
idxd_free(), which are not addressed by this series. It might be
worth fixing all of them together.
Cheers,
--
Vinicius
^ permalink raw reply
* Re: [PATCH v3 1/2] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Frank Li @ 2026-04-01 22:45 UTC (permalink / raw)
To: Alex Bereza
Cc: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana,
dmaengine, linux-arm-kernel, linux-kernel
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v3-1-85508f0aedde@bereza.email>
On Wed, Apr 01, 2026 at 12:56:32PM +0200, Alex Bereza wrote:
> Currently when calling xilinx_dma_poll_timeout with delay_us=0 and a
> condition that is never fulfilled, the CPU busy-waits for prolonged time
> and the timeout triggers only with a massive delay causing a CPU stall.
>
> This happens due to a huge underestimation of wall clock time in
> poll_timeout_us_atomic. Commit 7349a69cf312 ("iopoll: Do not use
> timekeeping in read_poll_timeout_atomic()") changed the behavior to no
> longer use ktime_get at the expense of underestimation of wall clock
> time which appears to be very large for delay_us=0. Instead of timing
> out after approximately XILINX_DMA_LOOP_COUNT microseconds, the timeout
> takes XILINX_DMA_LOOP_COUNT * 1000 * (time that the overhead of the for
> loop in poll_timeout_us_atomic takes) which is in the range of several
> minutes for XILINX_DMA_LOOP_COUNT=1000000. Fix this by using a non-zero
> value for delay_us. Use delay_us=10 to keep the delay in the hot path of
> starting DMA transfers minimal but still avoid CPU stalls in case of
> unexpected hardware failures.
>
> One-off measurement with delay_us=0 causes the cpu to busy wait around 7
> minutes in the timeout case. After applying this patch with delay_us=10
> the measured timeout was 1053428 microseconds which is roughly
> equivalent to the expected 1000000 microseconds specified in
> XILINX_DMA_LOOP_COUNT.
>
> Add a constant XILINX_DMA_POLL_DELAY_US for delay_us value.
>
> Fixes: 9495f2648287 ("dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's")
> Fixes: 7349a69cf312 ("iopoll: Do not use timekeeping in read_poll_timeout_atomic()")
> Signed-off-by: Alex Bereza <alex@bereza.email>
> ---
Reviewed-by: Frank Li <Frank.Li@nxp.com>
> drivers/dma/xilinx/xilinx_dma.c | 14 ++++++++++----
> 1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index 02a05f215614..345a738bab2c 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -167,6 +167,8 @@
>
> /* Delay loop counter to prevent hardware failure */
> #define XILINX_DMA_LOOP_COUNT 1000000
> +/* Delay between polls (avoid a delay of 0 to prevent CPU stalls) */
> +#define XILINX_DMA_POLL_DELAY_US 10
>
> /* AXI DMA Specific Registers/Offsets */
> #define XILINX_DMA_REG_SRCDSTADDR 0x18
> @@ -1332,7 +1334,8 @@ static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
>
> /* Wait for the hardware to halt */
> return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
> - val & XILINX_DMA_DMASR_HALTED, 0,
> + val & XILINX_DMA_DMASR_HALTED,
> + XILINX_DMA_POLL_DELAY_US,
> XILINX_DMA_LOOP_COUNT);
> }
>
> @@ -1347,7 +1350,8 @@ static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
> u32 val;
>
> return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
> - val & XILINX_DMA_DMASR_IDLE, 0,
> + val & XILINX_DMA_DMASR_IDLE,
> + XILINX_DMA_POLL_DELAY_US,
> XILINX_DMA_LOOP_COUNT);
> }
>
> @@ -1364,7 +1368,8 @@ static void xilinx_dma_start(struct xilinx_dma_chan *chan)
>
> /* Wait for the hardware to start */
> err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
> - !(val & XILINX_DMA_DMASR_HALTED), 0,
> + !(val & XILINX_DMA_DMASR_HALTED),
> + XILINX_DMA_POLL_DELAY_US,
> XILINX_DMA_LOOP_COUNT);
>
> if (err) {
> @@ -1780,7 +1785,8 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
>
> /* Wait for the hardware to finish reset */
> err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
> - !(tmp & XILINX_DMA_DMACR_RESET), 0,
> + !(tmp & XILINX_DMA_DMACR_RESET),
> + XILINX_DMA_POLL_DELAY_US,
> XILINX_DMA_LOOP_COUNT);
>
> if (err) {
>
> --
> 2.53.0
>
^ permalink raw reply
* Re: [PATCH v3 2/2] dmaengine: xilinx_dma: Rename XILINX_DMA_LOOP_COUNT
From: Frank Li @ 2026-04-01 22:40 UTC (permalink / raw)
To: Alex Bereza
Cc: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana,
dmaengine, linux-arm-kernel, linux-kernel
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v3-2-85508f0aedde@bereza.email>
On Wed, Apr 01, 2026 at 12:56:33PM +0200, Alex Bereza wrote:
> Rename XILINX_DMA_LOOP_COUNT to XILINX_DMA_POLL_TIMEOUT_US because the
> former is incorrect. It is a timeout value for polling various register
> bits in microseconds. It is not a loop count.
Rename XILINX_DMA_LOOP_COUNT to XILINX_DMA_POLL_TIMEOUT_US because it is a
timeout value, not a loop count for polling register in microseconds.
No functional changes.
Frank
>
> Signed-off-by: Alex Bereza <alex@bereza.email>
> ---
> drivers/dma/xilinx/xilinx_dma.c | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
> index 345a738bab2c..253c27fd1a0e 100644
> --- a/drivers/dma/xilinx/xilinx_dma.c
> +++ b/drivers/dma/xilinx/xilinx_dma.c
> @@ -165,8 +165,8 @@
> #define XILINX_DMA_FLUSH_MM2S 2
> #define XILINX_DMA_FLUSH_BOTH 1
>
> -/* Delay loop counter to prevent hardware failure */
> -#define XILINX_DMA_LOOP_COUNT 1000000
> +/* Timeout for polling various registers */
> +#define XILINX_DMA_POLL_TIMEOUT_US 1000000
> /* Delay between polls (avoid a delay of 0 to prevent CPU stalls) */
> #define XILINX_DMA_POLL_DELAY_US 10
>
> @@ -1336,7 +1336,7 @@ static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
> return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
> val & XILINX_DMA_DMASR_HALTED,
> XILINX_DMA_POLL_DELAY_US,
> - XILINX_DMA_LOOP_COUNT);
> + XILINX_DMA_POLL_TIMEOUT_US);
> }
>
> /**
> @@ -1352,7 +1352,7 @@ static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
> return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
> val & XILINX_DMA_DMASR_IDLE,
> XILINX_DMA_POLL_DELAY_US,
> - XILINX_DMA_LOOP_COUNT);
> + XILINX_DMA_POLL_TIMEOUT_US);
> }
>
> /**
> @@ -1370,7 +1370,7 @@ static void xilinx_dma_start(struct xilinx_dma_chan *chan)
> err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
> !(val & XILINX_DMA_DMASR_HALTED),
> XILINX_DMA_POLL_DELAY_US,
> - XILINX_DMA_LOOP_COUNT);
> + XILINX_DMA_POLL_TIMEOUT_US);
>
> if (err) {
> dev_err(chan->dev, "Cannot start channel %p: %x\n",
> @@ -1787,7 +1787,7 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
> err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
> !(tmp & XILINX_DMA_DMACR_RESET),
> XILINX_DMA_POLL_DELAY_US,
> - XILINX_DMA_LOOP_COUNT);
> + XILINX_DMA_POLL_TIMEOUT_US);
>
> if (err) {
> dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
>
> --
> 2.53.0
>
^ permalink raw reply
* Re: [PATCH v2 4/4] dmaengine: dma-axi-dmac: Defer freeing DMA descriptors
From: Frank Li @ 2026-04-01 22:27 UTC (permalink / raw)
To: Nuno Sá
Cc: Nuno Sá, dmaengine, linux-kernel, Lars-Peter Clausen,
Vinod Koul, Frank Li, Eliza Balas
In-Reply-To: <ac1C8VdYXe3pMu0B@nsa>
On Wed, Apr 01, 2026 at 05:14:16PM +0100, Nuno Sá wrote:
> On Tue, Mar 31, 2026 at 04:21:06PM +0100, Nuno Sá wrote:
> > On Tue, Mar 31, 2026 at 10:16:09AM -0400, Frank Li wrote:
> > > On Tue, Mar 31, 2026 at 09:53:45AM +0100, Nuno Sá wrote:
> > > > On Mon, Mar 30, 2026 at 11:24:34AM -0400, Frank Li wrote:
> > > > > On Fri, Mar 27, 2026 at 04:58:41PM +0000, Nuno Sá wrote:
> > > > > > From: Eliza Balas <eliza.balas@analog.com>
> > > > > >
> > > > > > This IP core can be used in architectures (like Microblaze) where DMA
> > > > > > descriptors are allocated with vmalloc().
> > > > >
> > > > > strage, why use vmalloc()?
> > > >
> > > > It's just one of the paths in dma_alloc_coherent(). It should be
> > > > architecture dependant.
> > >
> > > Which architectures, this may common problem, dma_alloc/free_coherent() is
> > > quite common at other dma-engine driver.
> >
> > I'll double check this but I believe this was triggered on microblaze
> > where we also use this IP. Will come back with confirmation!
> >
>
> Hi Frank,
>
> I now went to the bottom of the issue! The problem is that for archs
> like microblaze and arm64 we have DMA_DIRECT_REMAP which means that when
> calling dma_alloc_coherent() in [1] we will get into the code path in
> [2]. Now I did some research and we might have other solution for this
> that does not involve this refcount craziness plus async work. But I
> need to test it. FYI, what I have in mind is similar to the what
> loongson2-apb-dma.c does. This means, using the dma pool API. IIUC, with
> the pool we only actually free the memory (dma_free_coherent()) in the
> .terminate_all() callback (when destroying the pool) which should not
> happen in interrupt context right?
I think so. If your dma engineer descriptor is link-list, suggest use dma
pool. If it is cicylic buffer, suggest pre-alloc enough descriptors when
apply channel.
Frank
>
> [1]: https://elixir.bootlin.com/linux/v7.0-rc6/source/drivers/dma/dma-axi-dmac.c#L549
> [2]: https://elixir.bootlin.com/linux/v7.0-rc6/source/kernel/dma/direct.c#L278
>
> - Nuno Sá
>
> > - Nuno Sá
> > >
> > > Frank
> > >
> > > >
> > > > - Nuno Sá
> > > >
> > > > >
> > > > > Frank
> > > > >
> > > > > > Hence, given that freeing the
> > > > > > descriptors happen in softirq context, vunmpap() will BUG().
> > > > > >
> > > > > > To solve the above, we setup a work item during allocation of the
> > > > > > descriptors and schedule in softirq context. Hence, the actual freeing
> > > > > > happens in threaded context.
> > > > > >
> > > > > > Also note that to account for the possible race where the struct axi_dmac
> > > > > > object is gone between scheduling the work and actually running it, we
> > > > > > now save and get a reference of struct device when allocating the
> > > > > > descriptor (given that's all we need in axi_dmac_free_desc()) and
> > > > > > release it in axi_dmac_free_desc().
> > > > > >
> > > > > > Signed-off-by: Eliza Balas <eliza.balas@analog.com>
> > > > > > Co-developed-by: Nuno Sá <nuno.sa@analog.com>
> > > > > > Signed-off-by: Nuno Sá <nuno.sa@analog.com>
> > > > > > ---
> > > > > > drivers/dma/dma-axi-dmac.c | 50 ++++++++++++++++++++++++++++++++++------------
> > > > > > 1 file changed, 37 insertions(+), 13 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
> > > > > > index 70d3ad7e7d37..46f1ead0c7d7 100644
> > > > > > --- a/drivers/dma/dma-axi-dmac.c
> > > > > > +++ b/drivers/dma/dma-axi-dmac.c
> > > > > > @@ -25,6 +25,7 @@
> > > > > > #include <linux/regmap.h>
> > > > > > #include <linux/slab.h>
> > > > > > #include <linux/spinlock.h>
> > > > > > +#include <linux/workqueue.h>
> > > > > >
> > > > > > #include <dt-bindings/dma/axi-dmac.h>
> > > > > >
> > > > > > @@ -133,6 +134,9 @@ struct axi_dmac_sg {
> > > > > > struct axi_dmac_desc {
> > > > > > struct virt_dma_desc vdesc;
> > > > > > struct axi_dmac_chan *chan;
> > > > > > + struct device *dev;
> > > > > > +
> > > > > > + struct work_struct sched_work;
> > > > > >
> > > > > > bool cyclic;
> > > > > > bool cyclic_eot;
> > > > > > @@ -666,6 +670,25 @@ static void axi_dmac_issue_pending(struct dma_chan *c)
> > > > > > spin_unlock_irqrestore(&chan->vchan.lock, flags);
> > > > > > }
> > > > > >
> > > > > > +static void axi_dmac_free_desc(struct axi_dmac_desc *desc)
> > > > > > +{
> > > > > > + struct axi_dmac_hw_desc *hw = desc->sg[0].hw;
> > > > > > + dma_addr_t hw_phys = desc->sg[0].hw_phys;
> > > > > > +
> > > > > > + dma_free_coherent(desc->dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)),
> > > > > > + hw, hw_phys);
> > > > > > + put_device(desc->dev);
> > > > > > + kfree(desc);
> > > > > > +}
> > > > > > +
> > > > > > +static void axi_dmac_free_desc_schedule_work(struct work_struct *work)
> > > > > > +{
> > > > > > + struct axi_dmac_desc *desc = container_of(work, struct axi_dmac_desc,
> > > > > > + sched_work);
> > > > > > +
> > > > > > + axi_dmac_free_desc(desc);
> > > > > > +}
> > > > > > +
> > > > > > static struct axi_dmac_desc *
> > > > > > axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
> > > > > > {
> > > > > > @@ -681,6 +704,7 @@ axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
> > > > > > return NULL;
> > > > > > desc->num_sgs = num_sgs;
> > > > > > desc->chan = chan;
> > > > > > + desc->dev = get_device(dmac->dma_dev.dev);
> > > > > >
> > > > > > hws = dma_alloc_coherent(dev, PAGE_ALIGN(num_sgs * sizeof(*hws)),
> > > > > > &hw_phys, GFP_ATOMIC);
> > > > > > @@ -703,21 +727,18 @@ axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
> > > > > > /* The last hardware descriptor will trigger an interrupt */
> > > > > > desc->sg[num_sgs - 1].hw->flags = AXI_DMAC_HW_FLAG_LAST | AXI_DMAC_HW_FLAG_IRQ;
> > > > > >
> > > > > > + /*
> > > > > > + * We need to setup a work item because this IP can be used on archs
> > > > > > + * that rely on vmalloced memory for descriptors. And given that freeing
> > > > > > + * the descriptors happens in softirq context, vunmpap() will BUG().
> > > > > > + * Hence, setup the worker so that we can queue it and free the
> > > > > > + * descriptor in threaded context.
> > > > > > + */
> > > > > > + INIT_WORK(&desc->sched_work, axi_dmac_free_desc_schedule_work);
> > > > > > +
> > > > > > return desc;
> > > > > > }
> > > > > >
> > > > > > -static void axi_dmac_free_desc(struct axi_dmac_desc *desc)
> > > > > > -{
> > > > > > - struct axi_dmac *dmac = chan_to_axi_dmac(desc->chan);
> > > > > > - struct device *dev = dmac->dma_dev.dev;
> > > > > > - struct axi_dmac_hw_desc *hw = desc->sg[0].hw;
> > > > > > - dma_addr_t hw_phys = desc->sg[0].hw_phys;
> > > > > > -
> > > > > > - dma_free_coherent(dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)),
> > > > > > - hw, hw_phys);
> > > > > > - kfree(desc);
> > > > > > -}
> > > > > > -
> > > > > > static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
> > > > > > enum dma_transfer_direction direction, dma_addr_t addr,
> > > > > > unsigned int num_periods, unsigned int period_len,
> > > > > > @@ -958,7 +979,10 @@ static void axi_dmac_free_chan_resources(struct dma_chan *c)
> > > > > >
> > > > > > static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
> > > > > > {
> > > > > > - axi_dmac_free_desc(to_axi_dmac_desc(vdesc));
> > > > > > + struct axi_dmac_desc *desc = to_axi_dmac_desc(vdesc);
> > > > > > +
> > > > > > + /* See the comment in axi_dmac_alloc_desc() for the why! */
> > > > > > + schedule_work(&desc->sched_work);
> > > > > > }
> > > > > >
> > > > > > static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
> > > > > >
> > > > > > --
> > > > > > 2.53.0
> > > > > >
^ permalink raw reply
* RE: [PATCH v3 0/2] Fix CPU stall in xilinx_dma_poll_timeout caused by passing delay_us=0
From: Gupta, Suraj @ 2026-04-01 18:37 UTC (permalink / raw)
To: Alex Bereza, Vinod Koul, Frank Li, Simek, Michal,
Geert Uytterhoeven, Ulf Hansson, Arnd Bergmann, Tony Lindgren,
Rao, Appana Durga Kedareswara
Cc: dmaengine@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v3-0-85508f0aedde@bereza.email>
[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Alex Bereza <alex@bereza.email>
> Sent: Wednesday, April 1, 2026 4:27 PM
> To: Vinod Koul <vkoul@kernel.org>; Frank Li <Frank.Li@kernel.org>; Simek,
> Michal <michal.simek@amd.com>; Geert Uytterhoeven
> <geert+renesas@glider.be>; Ulf Hansson <ulf.hansson@linaro.org>; Arnd
> Bergmann <arnd@arndb.de>; Tony Lindgren <tony@atomide.com>; Rao,
> Appana Durga Kedareswara <appana.durga.kedareswara.rao@amd.com>
> Cc: dmaengine@vger.kernel.org; linux-arm-kernel@lists.infradead.org; linux-
> kernel@vger.kernel.org; Alex Bereza <alex@bereza.email>
> Subject: [PATCH v3 0/2] Fix CPU stall in xilinx_dma_poll_timeout caused by
> passing delay_us=0
>
> [You don't often get email from alex@bereza.email. Learn why this is important
> at https://aka.ms/LearnAboutSenderIdentification ]
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> Signed-off-by: Alex Bereza <alex@bereza.email>
> ---
For the series, Reviewed-by: Suraj Gupta <suraj.gupta2@amd.com>
Thanks!
Suraj
> Changes in v3:
> - patch 1/2:
> - Fix commit message: remove blank line between tags
> - patch 2/2: nothing
> - Link to v2: https://patch.msgid.link/20260401-fix-atomic-poll-timeout-
> regression-v2-0-68a265e3770f@bereza.email
>
> Changes in v2:
> - Fixed the Fixes: tags as suggested by Geert Uytterhoeven
> <geert+renesas@glider.be> - thanks!
> - Split the renaming of XILINX_DMA_LOOP_COUNT into separate commit
> - Link to v1: https://patch.msgid.link/20260331-fix-atomic-poll-timeout-
> regression-v1-1-5b7bd96eaca0@bereza.email
>
> ---
> Alex Bereza (2):
> dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
> dmaengine: xilinx_dma: Rename XILINX_DMA_LOOP_COUNT
>
> drivers/dma/xilinx/xilinx_dma.c | 26 ++++++++++++++++----------
> 1 file changed, 16 insertions(+), 10 deletions(-)
> ---
> base-commit: b7560798466a07d9c3fb011698e92c335ab28baf
> change-id: 20260330-fix-atomic-poll-timeout-regression-4f4e3baf3fd7
>
> Best regards,
> --
> Alex Bereza <alex@bereza.email>
>
^ permalink raw reply
* Re: [PATCH v2 4/4] dmaengine: dma-axi-dmac: Defer freeing DMA descriptors
From: Nuno Sá @ 2026-04-01 16:14 UTC (permalink / raw)
To: Frank Li
Cc: Nuno Sá, dmaengine, linux-kernel, Lars-Peter Clausen,
Vinod Koul, Frank Li, Eliza Balas
In-Reply-To: <acvmNkDwLsdJCvWa@nsa>
On Tue, Mar 31, 2026 at 04:21:06PM +0100, Nuno Sá wrote:
> On Tue, Mar 31, 2026 at 10:16:09AM -0400, Frank Li wrote:
> > On Tue, Mar 31, 2026 at 09:53:45AM +0100, Nuno Sá wrote:
> > > On Mon, Mar 30, 2026 at 11:24:34AM -0400, Frank Li wrote:
> > > > On Fri, Mar 27, 2026 at 04:58:41PM +0000, Nuno Sá wrote:
> > > > > From: Eliza Balas <eliza.balas@analog.com>
> > > > >
> > > > > This IP core can be used in architectures (like Microblaze) where DMA
> > > > > descriptors are allocated with vmalloc().
> > > >
> > > > strage, why use vmalloc()?
> > >
> > > It's just one of the paths in dma_alloc_coherent(). It should be
> > > architecture dependant.
> >
> > Which architectures, this may common problem, dma_alloc/free_coherent() is
> > quite common at other dma-engine driver.
>
> I'll double check this but I believe this was triggered on microblaze
> where we also use this IP. Will come back with confirmation!
>
Hi Frank,
I now went to the bottom of the issue! The problem is that for archs
like microblaze and arm64 we have DMA_DIRECT_REMAP which means that when
calling dma_alloc_coherent() in [1] we will get into the code path in
[2]. Now I did some research and we might have other solution for this
that does not involve this refcount craziness plus async work. But I
need to test it. FYI, what I have in mind is similar to the what
loongson2-apb-dma.c does. This means, using the dma pool API. IIUC, with
the pool we only actually free the memory (dma_free_coherent()) in the
.terminate_all() callback (when destroying the pool) which should not
happen in interrupt context right?
[1]: https://elixir.bootlin.com/linux/v7.0-rc6/source/drivers/dma/dma-axi-dmac.c#L549
[2]: https://elixir.bootlin.com/linux/v7.0-rc6/source/kernel/dma/direct.c#L278
- Nuno Sá
> - Nuno Sá
> >
> > Frank
> >
> > >
> > > - Nuno Sá
> > >
> > > >
> > > > Frank
> > > >
> > > > > Hence, given that freeing the
> > > > > descriptors happen in softirq context, vunmpap() will BUG().
> > > > >
> > > > > To solve the above, we setup a work item during allocation of the
> > > > > descriptors and schedule in softirq context. Hence, the actual freeing
> > > > > happens in threaded context.
> > > > >
> > > > > Also note that to account for the possible race where the struct axi_dmac
> > > > > object is gone between scheduling the work and actually running it, we
> > > > > now save and get a reference of struct device when allocating the
> > > > > descriptor (given that's all we need in axi_dmac_free_desc()) and
> > > > > release it in axi_dmac_free_desc().
> > > > >
> > > > > Signed-off-by: Eliza Balas <eliza.balas@analog.com>
> > > > > Co-developed-by: Nuno Sá <nuno.sa@analog.com>
> > > > > Signed-off-by: Nuno Sá <nuno.sa@analog.com>
> > > > > ---
> > > > > drivers/dma/dma-axi-dmac.c | 50 ++++++++++++++++++++++++++++++++++------------
> > > > > 1 file changed, 37 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
> > > > > index 70d3ad7e7d37..46f1ead0c7d7 100644
> > > > > --- a/drivers/dma/dma-axi-dmac.c
> > > > > +++ b/drivers/dma/dma-axi-dmac.c
> > > > > @@ -25,6 +25,7 @@
> > > > > #include <linux/regmap.h>
> > > > > #include <linux/slab.h>
> > > > > #include <linux/spinlock.h>
> > > > > +#include <linux/workqueue.h>
> > > > >
> > > > > #include <dt-bindings/dma/axi-dmac.h>
> > > > >
> > > > > @@ -133,6 +134,9 @@ struct axi_dmac_sg {
> > > > > struct axi_dmac_desc {
> > > > > struct virt_dma_desc vdesc;
> > > > > struct axi_dmac_chan *chan;
> > > > > + struct device *dev;
> > > > > +
> > > > > + struct work_struct sched_work;
> > > > >
> > > > > bool cyclic;
> > > > > bool cyclic_eot;
> > > > > @@ -666,6 +670,25 @@ static void axi_dmac_issue_pending(struct dma_chan *c)
> > > > > spin_unlock_irqrestore(&chan->vchan.lock, flags);
> > > > > }
> > > > >
> > > > > +static void axi_dmac_free_desc(struct axi_dmac_desc *desc)
> > > > > +{
> > > > > + struct axi_dmac_hw_desc *hw = desc->sg[0].hw;
> > > > > + dma_addr_t hw_phys = desc->sg[0].hw_phys;
> > > > > +
> > > > > + dma_free_coherent(desc->dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)),
> > > > > + hw, hw_phys);
> > > > > + put_device(desc->dev);
> > > > > + kfree(desc);
> > > > > +}
> > > > > +
> > > > > +static void axi_dmac_free_desc_schedule_work(struct work_struct *work)
> > > > > +{
> > > > > + struct axi_dmac_desc *desc = container_of(work, struct axi_dmac_desc,
> > > > > + sched_work);
> > > > > +
> > > > > + axi_dmac_free_desc(desc);
> > > > > +}
> > > > > +
> > > > > static struct axi_dmac_desc *
> > > > > axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
> > > > > {
> > > > > @@ -681,6 +704,7 @@ axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
> > > > > return NULL;
> > > > > desc->num_sgs = num_sgs;
> > > > > desc->chan = chan;
> > > > > + desc->dev = get_device(dmac->dma_dev.dev);
> > > > >
> > > > > hws = dma_alloc_coherent(dev, PAGE_ALIGN(num_sgs * sizeof(*hws)),
> > > > > &hw_phys, GFP_ATOMIC);
> > > > > @@ -703,21 +727,18 @@ axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
> > > > > /* The last hardware descriptor will trigger an interrupt */
> > > > > desc->sg[num_sgs - 1].hw->flags = AXI_DMAC_HW_FLAG_LAST | AXI_DMAC_HW_FLAG_IRQ;
> > > > >
> > > > > + /*
> > > > > + * We need to setup a work item because this IP can be used on archs
> > > > > + * that rely on vmalloced memory for descriptors. And given that freeing
> > > > > + * the descriptors happens in softirq context, vunmpap() will BUG().
> > > > > + * Hence, setup the worker so that we can queue it and free the
> > > > > + * descriptor in threaded context.
> > > > > + */
> > > > > + INIT_WORK(&desc->sched_work, axi_dmac_free_desc_schedule_work);
> > > > > +
> > > > > return desc;
> > > > > }
> > > > >
> > > > > -static void axi_dmac_free_desc(struct axi_dmac_desc *desc)
> > > > > -{
> > > > > - struct axi_dmac *dmac = chan_to_axi_dmac(desc->chan);
> > > > > - struct device *dev = dmac->dma_dev.dev;
> > > > > - struct axi_dmac_hw_desc *hw = desc->sg[0].hw;
> > > > > - dma_addr_t hw_phys = desc->sg[0].hw_phys;
> > > > > -
> > > > > - dma_free_coherent(dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)),
> > > > > - hw, hw_phys);
> > > > > - kfree(desc);
> > > > > -}
> > > > > -
> > > > > static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
> > > > > enum dma_transfer_direction direction, dma_addr_t addr,
> > > > > unsigned int num_periods, unsigned int period_len,
> > > > > @@ -958,7 +979,10 @@ static void axi_dmac_free_chan_resources(struct dma_chan *c)
> > > > >
> > > > > static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
> > > > > {
> > > > > - axi_dmac_free_desc(to_axi_dmac_desc(vdesc));
> > > > > + struct axi_dmac_desc *desc = to_axi_dmac_desc(vdesc);
> > > > > +
> > > > > + /* See the comment in axi_dmac_alloc_desc() for the why! */
> > > > > + schedule_work(&desc->sched_work);
> > > > > }
> > > > >
> > > > > static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
> > > > >
> > > > > --
> > > > > 2.53.0
> > > > >
^ permalink raw reply
* Re: [PATCHv2] dmaengine: hsu: use kzalloc_flex()
From: Andy Shevchenko @ 2026-04-01 13:32 UTC (permalink / raw)
To: Rosen Penev
Cc: dmaengine, Andy Shevchenko, Vinod Koul, Frank Li, Kees Cook,
Gustavo A. R. Silva,
open list:INTEL MID (Mobile Internet Device) PLATFORM,
open list:KERNEL HARDENING (not covered by other areas):Keyword:b__counted_by(_le|_be)?b
In-Reply-To: <CAKxU2N-QT6KAKzAYDUp_d9ug=1VxHMvegEQDbxS4GumH+8QBWg@mail.gmail.com>
On Wed, Apr 1, 2026 at 12:31 AM Rosen Penev <rosenp@gmail.com> wrote:
> On Mon, Mar 30, 2026 at 9:29 PM Andy Shevchenko
> <andy.shevchenko@gmail.com> wrote:
> > On Mon, Mar 30, 2026 at 11:41 PM Rosen Penev <rosenp@gmail.com> wrote:
> > > On Mon, Mar 30, 2026 at 1:46 AM Andy Shevchenko
> > > <andy.shevchenko@gmail.com> wrote:
> > > > On Sat, Mar 28, 2026 at 9:17 PM Rosen Penev <rosenp@gmail.com> wrote:
...
> > > > > - hsu = devm_kzalloc(chip->dev, sizeof(*hsu), GFP_KERNEL);
> > > > > + /* Calculate nr_channels from the IO space length */
> > > > > + nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH;
> > > > > + hsu = devm_kzalloc(chip->dev, struct_size(hsu, chan, nr_channels), GFP_KERNEL);
> > > > > if (!hsu)
> > > > > return -ENOMEM;
> > > > >
> > > > > - chip->hsu = hsu;
> > > > > -
> > > > > - /* Calculate nr_channels from the IO space length */
> > > > > - hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH;
> > > > > + hsu->nr_channels = nr_channels;
> > > > >
> > > > > - hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels,
> > > > > - sizeof(*hsu->chan), GFP_KERNEL);
> > > > > - if (!hsu->chan)
> > > > > - return -ENOMEM;
> > > > > + chip->hsu = hsu;
> > > >
> > > > Don't know these _flex() APIs enough, but can we leave the chip->hsu =
> > > > hsu; in the same place as it's now?
> > > __counted_by requires the first assignment after allocation to be the
> > > counting variable. The _flex macros do this automatically for GCC15
> > > and above.
> >
> > Why? The hsu member has nothing to do with VLA, where is this
> > requirement coming from? My understanding is that the check should
> > imply the minimum sizeof of the data structure and the compiler should
> > know that way before doing any allocations.
> Not sure I follow. This patch changes hsu's chan member to a FAM.
> Where is VLA coming from?
VLA: variable-length array
FAM: flexible array member
The second one is VLA member + size member.
What your patch is doing is changing a pointer to VLA member.
> The current code is devm_kzalloc(x, struct_size()). When it gets
> introduced, I'm sure there will be a treewide conversion to
> devm_kzalloc_flex, which would automatically set the counting variable
> for >=GCC15.
>
> It's best practice to assign right after since kzalloc_flex does it anyways.
Still, I'm not convinced we should blindly follow this rule. The
length needs to be known before accessing the VLA, but it's okay to
access other members. Leaving hsu member assignment where it's now is
fine, no need to move it around.
> > My understanding seems in align with what Gustavo blogged:
> > https://people.kernel.org/gustavoars/how-to-use-the-new-counted_by-attribute-in-c-and-linux
> >
> > The same is written in the GCC patch description
> > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653123.html
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* [PATCH] dt-bindings: dma: qcom,gpi: Document GPI DMA engine for Hawi SoC
From: Mukesh Ojha @ 2026-04-01 12:40 UTC (permalink / raw)
To: Vinod Koul, Rob Herring, Krzysztof Kozlowski, Conor Dooley
Cc: Frank Li, linux-arm-msm, dmaengine, devicetree, linux-kernel,
Xueyao An, Konrad Dybcio, Mukesh Ojha
From: Xueyao An <xueyao.an@oss.qualcomm.com>
The Hawi GPI DMA engine follows the same programming model and
register interface as previous generation of Qualcomm SoCs like
kaanapali, glymur, and is fully compatible with earlier GPI DMA
implementations.
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Signed-off-by: Xueyao An <xueyao.an@oss.qualcomm.com>
Signed-off-by: Mukesh Ojha <mukesh.ojha@oss.qualcomm.com>
---
Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 +
1 file changed, 1 insertion(+)
diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index fde1df035ad1..caa2ef90d8f2 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -25,6 +25,7 @@ properties:
- items:
- enum:
- qcom,glymur-gpi-dma
+ - qcom,hawi-gpi-dma
- qcom,kaanapali-gpi-dma
- qcom,milos-gpi-dma
- qcom,qcm2290-gpi-dma
--
2.53.0
^ permalink raw reply related
* Re: [PATCH] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Alex Bereza @ 2026-04-01 11:41 UTC (permalink / raw)
To: Gupta, Suraj, Alex Bereza, Vinod Koul, Frank Li, Michal Simek,
Geert Uytterhoeven, Ulf Hansson, Arnd Bergmann, Tony Lindgren
Cc: dmaengine, linux-arm-kernel, linux-kernel
In-Reply-To: <f5a8ac32-3c65-4703-87fc-d0990839887d@amd.com>
Hi Suraj,
On Wed Apr 1, 2026 at 1:15 PM CEST, Suraj Gupta wrote:
>>>> Hi, in addition to this patch I also have a question: what is the point
>>>> of atomically polling for the HALTED or IDLE bit in the stop_transfer
>>>> functions? Does device_terminate_all really need to be callable from
>>>> atomic context? If not, one could switch to polling non-atomically and
>>>> avoid burning CPU cycles.
>>>>
>>>
>>> dmaengine_terminate_async(), which directly calls device_terminate_all
>>> can be called from atomic context.
>>
>> Right, thanks! Just for my understanding: I still think there is potential for improvement, because
>> from my understanding it would be beneficial to do the waiting for the bits in the status register
>> and the freeing of descriptors in xilinx_dma_synchronize. Do I understand correctly that this is
>> currently not possible due to how the DMA engine API is structured? To make this possible I think
>> the deprecated dmaengine_terminate_all would have to be removed and all users of this API would have
>> to be adapted accordingly, correct? So this would be a patch of much larger scope than xilinx_dma
>> driver alone.
>
> Yes, your understanding of xilinx_dma_synchronize()and proposed changes
> looks correct.
Thank you for the feedback on this. It is really helpful since I'm quite new to
writing patches for the kernel. I was thinking about whether I can improve the
xilinx_dma driver in this regard, but given the large scope of changing the
whole DMA engine API and all its users unfortunately this task is too big for
me.
BR
Alex
^ permalink raw reply
* Re: [PATCH] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Gupta, Suraj @ 2026-04-01 11:15 UTC (permalink / raw)
To: Alex Bereza, Vinod Koul, Frank Li, Michal Simek,
Geert Uytterhoeven, Ulf Hansson, Arnd Bergmann, Tony Lindgren
Cc: dmaengine, linux-arm-kernel, linux-kernel
In-Reply-To: <DHHOCNHDN27K.RIE745OFAACD@bereza.email>
On 4/1/2026 1:57 PM, Alex Bereza wrote:
> [You don't often get email from alex@bereza.email. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
>
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> On Wed Apr 1, 2026 at 7:23 AM CEST, Suraj Gupta wrote:
>
>>> Rename XILINX_DMA_LOOP_COUNT to XILINX_DMA_POLL_TIMEOUT_US because the
>>> former is incorrect. It is a timeout value for polling various register
>>> bits in microseconds. It is not a loop count. Add a constant
>>> XILINX_DMA_POLL_DELAY_US for delay_us value.
>>
>> Please split this change in a new patch.
>
> Ok, will send a v2.
>
>>> Fixes: 7349a69cf312 ("iopoll: Do not use timekeeping in read_poll_timeout_atomic()")
>>
>> This patch doesn't fixes anything in iopoll, please use correct fixes tag.
>
> Ok, but I'm not sure what would be the correct fixes tag then? I though I need to reference
> 7349a69cf312 in fixes tag because this is the actual change that surfaced the CPU stall issue that I
> want to fix in this driver. I'm fixing the call sites of xilinx_dma_poll_timeout but they were added
> in different commits. Should I add all of them? That would be the following then:
>
> Fixes: 9495f2648287 ("dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's")
> Fixes: 676f9c26c330 ("dmaengine: xilinx: fix device_terminate_all() callback for AXI CDMA")
>
> Three call sites with delay_us=0 were first introduced by 9495f2648287, then 676f9c26c330 added the
> fourth call site when introducing xilinx_cdma_stop_transfer (probably copy paste from
> xilinx_dma_stop_transfer). Would adding these two fixes tags be correct?
>
>>> Hi, in addition to this patch I also have a question: what is the point
>>> of atomically polling for the HALTED or IDLE bit in the stop_transfer
>>> functions? Does device_terminate_all really need to be callable from
>>> atomic context? If not, one could switch to polling non-atomically and
>>> avoid burning CPU cycles.
>>>
>>
>> dmaengine_terminate_async(), which directly calls device_terminate_all
>> can be called from atomic context.
>
> Right, thanks! Just for my understanding: I still think there is potential for improvement, because
> from my understanding it would be beneficial to do the waiting for the bits in the status register
> and the freeing of descriptors in xilinx_dma_synchronize. Do I understand correctly that this is
> currently not possible due to how the DMA engine API is structured? To make this possible I think
> the deprecated dmaengine_terminate_all would have to be removed and all users of this API would have
> to be adapted accordingly, correct? So this would be a patch of much larger scope than xilinx_dma
> driver alone.
Yes, your understanding of xilinx_dma_synchronize()and proposed changes
looks correct.
Regards,
Suraj
^ permalink raw reply
* Re: [PATCH v2 1/2] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Alex Bereza @ 2026-04-01 10:59 UTC (permalink / raw)
To: Geert Uytterhoeven, Alex Bereza
Cc: Vinod Koul, Frank Li, Michal Simek, Ulf Hansson, Arnd Bergmann,
Tony Lindgren, Kedareswara rao Appana, dmaengine,
linux-arm-kernel, linux-kernel
In-Reply-To: <CAMuHMdWGHzt8nB3EGAToxZibf-O6C5xb9bcWhQQApzL3-6pcCA@mail.gmail.com>
Hi Geert,
thank you for the quick and helpful replies.
>> Fixes: 9495f2648287 ("dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's")
>> Fixes: 7349a69cf312 ("iopoll: Do not use timekeeping in read_poll_timeout_atomic()")
>>
>
> Please no blank line between tags.
Fixed. Sorry, was not aware of this formatting requirement.
BR
Alex
^ permalink raw reply
* [PATCH v3 2/2] dmaengine: xilinx_dma: Rename XILINX_DMA_LOOP_COUNT
From: Alex Bereza @ 2026-04-01 10:56 UTC (permalink / raw)
To: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana
Cc: dmaengine, linux-arm-kernel, linux-kernel, Alex Bereza
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v3-0-85508f0aedde@bereza.email>
Rename XILINX_DMA_LOOP_COUNT to XILINX_DMA_POLL_TIMEOUT_US because the
former is incorrect. It is a timeout value for polling various register
bits in microseconds. It is not a loop count.
Signed-off-by: Alex Bereza <alex@bereza.email>
---
drivers/dma/xilinx/xilinx_dma.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 345a738bab2c..253c27fd1a0e 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -165,8 +165,8 @@
#define XILINX_DMA_FLUSH_MM2S 2
#define XILINX_DMA_FLUSH_BOTH 1
-/* Delay loop counter to prevent hardware failure */
-#define XILINX_DMA_LOOP_COUNT 1000000
+/* Timeout for polling various registers */
+#define XILINX_DMA_POLL_TIMEOUT_US 1000000
/* Delay between polls (avoid a delay of 0 to prevent CPU stalls) */
#define XILINX_DMA_POLL_DELAY_US 10
@@ -1336,7 +1336,7 @@ static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
val & XILINX_DMA_DMASR_HALTED,
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
}
/**
@@ -1352,7 +1352,7 @@ static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
val & XILINX_DMA_DMASR_IDLE,
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
}
/**
@@ -1370,7 +1370,7 @@ static void xilinx_dma_start(struct xilinx_dma_chan *chan)
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
!(val & XILINX_DMA_DMASR_HALTED),
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
if (err) {
dev_err(chan->dev, "Cannot start channel %p: %x\n",
@@ -1787,7 +1787,7 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
!(tmp & XILINX_DMA_DMACR_RESET),
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
if (err) {
dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
--
2.53.0
^ permalink raw reply related
* [PATCH v3 1/2] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Alex Bereza @ 2026-04-01 10:56 UTC (permalink / raw)
To: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana
Cc: dmaengine, linux-arm-kernel, linux-kernel, Alex Bereza
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v3-0-85508f0aedde@bereza.email>
Currently when calling xilinx_dma_poll_timeout with delay_us=0 and a
condition that is never fulfilled, the CPU busy-waits for prolonged time
and the timeout triggers only with a massive delay causing a CPU stall.
This happens due to a huge underestimation of wall clock time in
poll_timeout_us_atomic. Commit 7349a69cf312 ("iopoll: Do not use
timekeeping in read_poll_timeout_atomic()") changed the behavior to no
longer use ktime_get at the expense of underestimation of wall clock
time which appears to be very large for delay_us=0. Instead of timing
out after approximately XILINX_DMA_LOOP_COUNT microseconds, the timeout
takes XILINX_DMA_LOOP_COUNT * 1000 * (time that the overhead of the for
loop in poll_timeout_us_atomic takes) which is in the range of several
minutes for XILINX_DMA_LOOP_COUNT=1000000. Fix this by using a non-zero
value for delay_us. Use delay_us=10 to keep the delay in the hot path of
starting DMA transfers minimal but still avoid CPU stalls in case of
unexpected hardware failures.
One-off measurement with delay_us=0 causes the cpu to busy wait around 7
minutes in the timeout case. After applying this patch with delay_us=10
the measured timeout was 1053428 microseconds which is roughly
equivalent to the expected 1000000 microseconds specified in
XILINX_DMA_LOOP_COUNT.
Add a constant XILINX_DMA_POLL_DELAY_US for delay_us value.
Fixes: 9495f2648287 ("dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's")
Fixes: 7349a69cf312 ("iopoll: Do not use timekeeping in read_poll_timeout_atomic()")
Signed-off-by: Alex Bereza <alex@bereza.email>
---
drivers/dma/xilinx/xilinx_dma.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 02a05f215614..345a738bab2c 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -167,6 +167,8 @@
/* Delay loop counter to prevent hardware failure */
#define XILINX_DMA_LOOP_COUNT 1000000
+/* Delay between polls (avoid a delay of 0 to prevent CPU stalls) */
+#define XILINX_DMA_POLL_DELAY_US 10
/* AXI DMA Specific Registers/Offsets */
#define XILINX_DMA_REG_SRCDSTADDR 0x18
@@ -1332,7 +1334,8 @@ static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
/* Wait for the hardware to halt */
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
- val & XILINX_DMA_DMASR_HALTED, 0,
+ val & XILINX_DMA_DMASR_HALTED,
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
}
@@ -1347,7 +1350,8 @@ static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
u32 val;
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
- val & XILINX_DMA_DMASR_IDLE, 0,
+ val & XILINX_DMA_DMASR_IDLE,
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
}
@@ -1364,7 +1368,8 @@ static void xilinx_dma_start(struct xilinx_dma_chan *chan)
/* Wait for the hardware to start */
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
- !(val & XILINX_DMA_DMASR_HALTED), 0,
+ !(val & XILINX_DMA_DMASR_HALTED),
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
if (err) {
@@ -1780,7 +1785,8 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
/* Wait for the hardware to finish reset */
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
- !(tmp & XILINX_DMA_DMACR_RESET), 0,
+ !(tmp & XILINX_DMA_DMACR_RESET),
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
if (err) {
--
2.53.0
^ permalink raw reply related
* [PATCH v3 0/2] Fix CPU stall in xilinx_dma_poll_timeout caused by passing delay_us=0
From: Alex Bereza @ 2026-04-01 10:56 UTC (permalink / raw)
To: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana
Cc: dmaengine, linux-arm-kernel, linux-kernel, Alex Bereza
Signed-off-by: Alex Bereza <alex@bereza.email>
---
Changes in v3:
- patch 1/2:
- Fix commit message: remove blank line between tags
- patch 2/2: nothing
- Link to v2: https://patch.msgid.link/20260401-fix-atomic-poll-timeout-regression-v2-0-68a265e3770f@bereza.email
Changes in v2:
- Fixed the Fixes: tags as suggested by Geert Uytterhoeven
<geert+renesas@glider.be> - thanks!
- Split the renaming of XILINX_DMA_LOOP_COUNT into separate commit
- Link to v1: https://patch.msgid.link/20260331-fix-atomic-poll-timeout-regression-v1-1-5b7bd96eaca0@bereza.email
---
Alex Bereza (2):
dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
dmaengine: xilinx_dma: Rename XILINX_DMA_LOOP_COUNT
drivers/dma/xilinx/xilinx_dma.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
---
base-commit: b7560798466a07d9c3fb011698e92c335ab28baf
change-id: 20260330-fix-atomic-poll-timeout-regression-4f4e3baf3fd7
Best regards,
--
Alex Bereza <alex@bereza.email>
^ permalink raw reply
* Re: [PATCH v2 1/2] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Geert Uytterhoeven @ 2026-04-01 10:28 UTC (permalink / raw)
To: Alex Bereza
Cc: Vinod Koul, Frank Li, Michal Simek, Ulf Hansson, Arnd Bergmann,
Tony Lindgren, Kedareswara rao Appana, dmaengine,
linux-arm-kernel, linux-kernel
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v2-1-68a265e3770f@bereza.email>
Hi Alex,
Thanks for your patch!
On Wed, 1 Apr 2026 at 11:58, Alex Bereza <alex@bereza.email> wrote:
> Currently when calling xilinx_dma_poll_timeout with delay_us=0 and a
> condition that is never fulfilled, the CPU busy-waits for prolonged time
> and the timeout triggers only with a massive delay causing a CPU stall.
>
> This happens due to a huge underestimation of wall clock time in
> poll_timeout_us_atomic. Commit 7349a69cf312 ("iopoll: Do not use
> timekeeping in read_poll_timeout_atomic()") changed the behavior to no
> longer use ktime_get at the expense of underestimation of wall clock
> time which appears to be very large for delay_us=0. Instead of timing
> out after approximately XILINX_DMA_LOOP_COUNT microseconds, the timeout
> takes XILINX_DMA_LOOP_COUNT * 1000 * (time that the overhead of the for
> loop in poll_timeout_us_atomic takes) which is in the range of several
> minutes for XILINX_DMA_LOOP_COUNT=1000000. Fix this by using a non-zero
> value for delay_us. Use delay_us=10 to keep the delay in the hot path of
> starting DMA transfers minimal but still avoid CPU stalls in case of
> unexpected hardware failures.
>
> One-off measurement with delay_us=0 causes the cpu to busy wait around 7
> minutes in the timeout case. After applying this patch with delay_us=10
> the measured timeout was 1053428 microseconds which is roughly
> equivalent to the expected 1000000 microseconds specified in
> XILINX_DMA_LOOP_COUNT.
>
> Add a constant XILINX_DMA_POLL_DELAY_US for delay_us value.
>
> Fixes: 9495f2648287 ("dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's")
> Fixes: 7349a69cf312 ("iopoll: Do not use timekeeping in read_poll_timeout_atomic()")
>
Please no blank line between tags.
> Signed-off-by: Alex Bereza <alex@bereza.email>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH v6 4/4] i2c: qcom-geni: Support multi-owner controllers in GPI mode
From: Konrad Dybcio @ 2026-04-01 10:21 UTC (permalink / raw)
To: Mukesh Kumar Savaliya, viken.dadhaniya, andi.shyti, robh, krzk+dt,
conor+dt, vkoul, Frank.Li, andersson, konradybcio,
dmitry.baryshkov, linmq006, quic_jseerapu, agross, linux-arm-msm,
linux-i2c, devicetree, linux-kernel, dmaengine
Cc: krzysztof.kozlowski, bartosz.golaszewski, bjorn.andersson
In-Reply-To: <20260331114742.2896317-5-mukesh.savaliya@oss.qualcomm.com>
On 3/31/26 1:47 PM, Mukesh Kumar Savaliya wrote:
> Some platforms use a QUP-based I2C controller in a configuration where the
> controller is shared with another system processor. In this setup the
> operating system must not assume exclusive ownership of the controller or
> its associated pins.
>
> Add support for enabling multi-owner operation when DeviceTree specifies
> qcom,qup-multi-owner. When enabled, mark the underlying serial engine as
> shared so the common GENI resource handling avoids selecting the "sleep"
> pinctrl state, which could disrupt transfers initiated by the other
> processor.
>
> For GPI mode transfers, request lock/unlock TRE sequencing from the GPI
> driver by setting a single lock_action selector per message, emitting lock
> before the first message and unlock after the last message (handling the
> single-message case as well). This serializes access to the shared
> controller without requiring message-position flags to be passed into the
> DMA engine layer.
>
> Signed-off-by: Mukesh Kumar Savaliya <mukesh.savaliya@oss.qualcomm.com>
> ---
[...]
> + if (of_property_read_bool(pdev->dev.of_node, "qcom,qup-multi-owner")) {
> + /*
> + * Multi-owner controller configuration: the controller may be
> + * used by another system processor. Mark the SE as shared so
> + * common GENI resource handling can avoid pin state changes
> + * that would disrupt the other user.
> + */
I don't find this comment very useful given we have kerneldoc for that
property and the behavior you described impacts another file
[...]
> + if (gi2c->se.multi_owner)
> + dev_err_probe(dev, -EINVAL, "I2C sharing not supported in non GSI mode\n");
return dev_err_probe()
Konrad
^ permalink raw reply
* Re: [PATCH v6 3/4] soc: qcom: geni-se: Keep pinctrl active for multi-owner controllers
From: Konrad Dybcio @ 2026-04-01 10:19 UTC (permalink / raw)
To: Mukesh Kumar Savaliya, viken.dadhaniya, andi.shyti, robh, krzk+dt,
conor+dt, vkoul, Frank.Li, andersson, konradybcio,
dmitry.baryshkov, linmq006, quic_jseerapu, agross, linux-arm-msm,
linux-i2c, devicetree, linux-kernel, dmaengine
Cc: krzysztof.kozlowski, bartosz.golaszewski, bjorn.andersson
In-Reply-To: <20260331114742.2896317-4-mukesh.savaliya@oss.qualcomm.com>
On 3/31/26 1:47 PM, Mukesh Kumar Savaliya wrote:
> On platforms where a GENI Serial Engine is shared with another system
> processor, selecting the "sleep" pinctrl state can disrupt ongoing
> transfers initiated by the other processor.
>
> Teach geni_se_resources_off() to skip selecting the pinctrl sleep state
> when the Serial Engine is marked as shared, while still allowing the
> rest of the resource shutdown sequence to proceed.
>
> This is required for multi-owner configurations (described via DeviceTree
> with qcom,qup-multi-owner on the protocol controller node).
>
> Signed-off-by: Mukesh Kumar Savaliya <mukesh.savaliya@oss.qualcomm.com>
> ---
[...]
> + * @multi_owner: True if SE is shared between multiprocessors.
'between multiple owners'?
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Konrad
^ permalink raw reply
* [PATCH v2 2/2] dmaengine: xilinx_dma: Rename XILINX_DMA_LOOP_COUNT
From: Alex Bereza @ 2026-04-01 9:57 UTC (permalink / raw)
To: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana
Cc: dmaengine, linux-arm-kernel, linux-kernel, Alex Bereza
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v2-0-68a265e3770f@bereza.email>
Rename XILINX_DMA_LOOP_COUNT to XILINX_DMA_POLL_TIMEOUT_US because the
former is incorrect. It is a timeout value for polling various register
bits in microseconds. It is not a loop count.
Signed-off-by: Alex Bereza <alex@bereza.email>
---
drivers/dma/xilinx/xilinx_dma.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 345a738bab2c..253c27fd1a0e 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -165,8 +165,8 @@
#define XILINX_DMA_FLUSH_MM2S 2
#define XILINX_DMA_FLUSH_BOTH 1
-/* Delay loop counter to prevent hardware failure */
-#define XILINX_DMA_LOOP_COUNT 1000000
+/* Timeout for polling various registers */
+#define XILINX_DMA_POLL_TIMEOUT_US 1000000
/* Delay between polls (avoid a delay of 0 to prevent CPU stalls) */
#define XILINX_DMA_POLL_DELAY_US 10
@@ -1336,7 +1336,7 @@ static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
val & XILINX_DMA_DMASR_HALTED,
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
}
/**
@@ -1352,7 +1352,7 @@ static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
val & XILINX_DMA_DMASR_IDLE,
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
}
/**
@@ -1370,7 +1370,7 @@ static void xilinx_dma_start(struct xilinx_dma_chan *chan)
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
!(val & XILINX_DMA_DMASR_HALTED),
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
if (err) {
dev_err(chan->dev, "Cannot start channel %p: %x\n",
@@ -1787,7 +1787,7 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
!(tmp & XILINX_DMA_DMACR_RESET),
XILINX_DMA_POLL_DELAY_US,
- XILINX_DMA_LOOP_COUNT);
+ XILINX_DMA_POLL_TIMEOUT_US);
if (err) {
dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
--
2.53.0
^ permalink raw reply related
* [PATCH v2 0/2] Fix CPU stall in xilinx_dma_poll_timeout caused by passing delay_us=0
From: Alex Bereza @ 2026-04-01 9:57 UTC (permalink / raw)
To: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana
Cc: dmaengine, linux-arm-kernel, linux-kernel, Alex Bereza
Signed-off-by: Alex Bereza <alex@bereza.email>
---
Changes in v2:
- Fixed the Fixes: tags as suggested by Geert Uytterhoeven
<geert+renesas@glider.be> - thanks!
- Split the renaming of XILINX_DMA_LOOP_COUNT into separate commit
- Link to v1: https://patch.msgid.link/20260331-fix-atomic-poll-timeout-regression-v1-1-5b7bd96eaca0@bereza.email
---
Alex Bereza (2):
dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
dmaengine: xilinx_dma: Rename XILINX_DMA_LOOP_COUNT
drivers/dma/xilinx/xilinx_dma.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
---
base-commit: b7560798466a07d9c3fb011698e92c335ab28baf
change-id: 20260330-fix-atomic-poll-timeout-regression-4f4e3baf3fd7
Best regards,
--
Alex Bereza <alex@bereza.email>
^ permalink raw reply
* [PATCH v2 1/2] dmaengine: xilinx_dma: Fix CPU stall in xilinx_dma_poll_timeout
From: Alex Bereza @ 2026-04-01 9:57 UTC (permalink / raw)
To: Vinod Koul, Frank Li, Michal Simek, Geert Uytterhoeven,
Ulf Hansson, Arnd Bergmann, Tony Lindgren, Kedareswara rao Appana
Cc: dmaengine, linux-arm-kernel, linux-kernel, Alex Bereza
In-Reply-To: <20260401-fix-atomic-poll-timeout-regression-v2-0-68a265e3770f@bereza.email>
Currently when calling xilinx_dma_poll_timeout with delay_us=0 and a
condition that is never fulfilled, the CPU busy-waits for prolonged time
and the timeout triggers only with a massive delay causing a CPU stall.
This happens due to a huge underestimation of wall clock time in
poll_timeout_us_atomic. Commit 7349a69cf312 ("iopoll: Do not use
timekeeping in read_poll_timeout_atomic()") changed the behavior to no
longer use ktime_get at the expense of underestimation of wall clock
time which appears to be very large for delay_us=0. Instead of timing
out after approximately XILINX_DMA_LOOP_COUNT microseconds, the timeout
takes XILINX_DMA_LOOP_COUNT * 1000 * (time that the overhead of the for
loop in poll_timeout_us_atomic takes) which is in the range of several
minutes for XILINX_DMA_LOOP_COUNT=1000000. Fix this by using a non-zero
value for delay_us. Use delay_us=10 to keep the delay in the hot path of
starting DMA transfers minimal but still avoid CPU stalls in case of
unexpected hardware failures.
One-off measurement with delay_us=0 causes the cpu to busy wait around 7
minutes in the timeout case. After applying this patch with delay_us=10
the measured timeout was 1053428 microseconds which is roughly
equivalent to the expected 1000000 microseconds specified in
XILINX_DMA_LOOP_COUNT.
Add a constant XILINX_DMA_POLL_DELAY_US for delay_us value.
Fixes: 9495f2648287 ("dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's")
Fixes: 7349a69cf312 ("iopoll: Do not use timekeeping in read_poll_timeout_atomic()")
Signed-off-by: Alex Bereza <alex@bereza.email>
---
drivers/dma/xilinx/xilinx_dma.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 02a05f215614..345a738bab2c 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -167,6 +167,8 @@
/* Delay loop counter to prevent hardware failure */
#define XILINX_DMA_LOOP_COUNT 1000000
+/* Delay between polls (avoid a delay of 0 to prevent CPU stalls) */
+#define XILINX_DMA_POLL_DELAY_US 10
/* AXI DMA Specific Registers/Offsets */
#define XILINX_DMA_REG_SRCDSTADDR 0x18
@@ -1332,7 +1334,8 @@ static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
/* Wait for the hardware to halt */
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
- val & XILINX_DMA_DMASR_HALTED, 0,
+ val & XILINX_DMA_DMASR_HALTED,
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
}
@@ -1347,7 +1350,8 @@ static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
u32 val;
return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
- val & XILINX_DMA_DMASR_IDLE, 0,
+ val & XILINX_DMA_DMASR_IDLE,
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
}
@@ -1364,7 +1368,8 @@ static void xilinx_dma_start(struct xilinx_dma_chan *chan)
/* Wait for the hardware to start */
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
- !(val & XILINX_DMA_DMASR_HALTED), 0,
+ !(val & XILINX_DMA_DMASR_HALTED),
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
if (err) {
@@ -1780,7 +1785,8 @@ static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
/* Wait for the hardware to finish reset */
err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
- !(tmp & XILINX_DMA_DMACR_RESET), 0,
+ !(tmp & XILINX_DMA_DMACR_RESET),
+ XILINX_DMA_POLL_DELAY_US,
XILINX_DMA_LOOP_COUNT);
if (err) {
--
2.53.0
^ permalink raw reply related
* [PATCH] dmaengine: idxd: fix double free in idxd_alloc() error path
From: Guangshuo Li @ 2026-04-01 9:40 UTC (permalink / raw)
To: Vinicius Costa Gomes, Dave Jiang, Vinod Koul, Shuai Xue,
Fenghua Yu, dmaengine, linux-kernel
Cc: Guangshuo Li, stable
When dev_set_name() fails after device_initialize(), idxd_alloc()
calls put_device(conf_dev).
For these devices, conf_dev->type is set from idxd->data->dev_type,
which resolves to dsa_device_type or iax_device_type, and both use
idxd_conf_device_release() as their release callback.
That release callback frees idxd, idxd->opcap_bmap, and releases
idxd->id, but the current error path then frees those resources again
directly, causing a double free.
Keep the cleanup in idxd_conf_device_release() after put_device() and
avoid freeing idxd-managed resources again in idxd_alloc().
Fixes: 46a5cca76c76 ("dmaengine: idxd: fix memory leak in error handling path of idxd_alloc")
Cc: stable@vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244@gmail.com>
---
drivers/dma/idxd/init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 4eff74182225..94ce52565e7a 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -635,7 +635,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
err_name:
put_device(conf_dev);
- bitmap_free(idxd->opcap_bmap);
+ return NULL;
err_opcap:
ida_free(&idxd_ida, idxd->id);
err_ida:
--
2.43.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox