From: Vishwaroop A <va@nvidia.com>
To: Mark Brown <broonie@kernel.org>,
Thierry Reding <thierry.reding@gmail.com>,
Jonathan Hunter <jonathanh@nvidia.com>,
"Sowjanya Komatineni" <skomatineni@nvidia.com>,
Laxman Dewangan <ldewangan@nvidia.com>, <smangipudi@nvidia.com>,
<kyarlagadda@nvidia.com>
Cc: Vishwaroop A <va@nvidia.com>, <linux-spi@vger.kernel.org>,
<linux-tegra@vger.kernel.org>, <linux-kernel@vger.kernel.org>
Subject: [PATCH v1 2/2] spi: tegra210-quad: Check hardware status on timeout
Date: Fri, 10 Oct 2025 15:20:01 +0000 [thread overview]
Message-ID: <20251010152001.2399799-3-va@nvidia.com> (raw)
In-Reply-To: <20251010152001.2399799-1-va@nvidia.com>
Under high system load, QSPI interrupts can be delayed or blocked on the
target CPU, causing wait_for_completion_timeout() to report failure even
though the hardware successfully completed the transfer. This has been
observed in production during error injection, RAS firmware activity, and
CPU saturation scenarios.
When a timeout occurs, check the QSPI_RDY bit in QSPI_TRANS_STATUS to
determine if the hardware actually completed the transfer. If so, manually
invoke the completion handler to process the transfer successfully instead
of failing it.
This distinguishes lost/delayed interrupts from real hardware timeouts,
preventing unnecessary failures of transfers that completed successfully.
Signed-off-by: Vishwaroop A <va@nvidia.com>
---
drivers/spi/spi-tegra210-quad.c | 164 ++++++++++++++++++++++----------
1 file changed, 114 insertions(+), 50 deletions(-)
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index c2f880d08109..757e9fe23e0e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -1019,17 +1019,22 @@ static void tegra_qspi_dump_regs(struct tegra_qspi *tqspi)
tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS));
}
-static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
+static void tegra_qspi_reset(struct tegra_qspi *tqspi)
{
- dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
- tegra_qspi_dump_regs(tqspi);
- tegra_qspi_flush_fifos(tqspi, true);
if (device_reset(tqspi->dev) < 0) {
dev_warn_once(tqspi->dev, "device reset failed\n");
tegra_qspi_mask_clear_irq(tqspi);
}
}
+static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
+{
+ dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
+ tegra_qspi_dump_regs(tqspi);
+ tegra_qspi_flush_fifos(tqspi, true);
+ tegra_qspi_reset(tqspi);
+}
+
static void tegra_qspi_transfer_end(struct spi_device *spi)
{
struct tegra_qspi *tqspi = spi_controller_get_devdata(spi->controller);
@@ -1043,6 +1048,49 @@ static void tegra_qspi_transfer_end(struct spi_device *spi)
tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
}
+static irqreturn_t handle_cpu_based_xfer(struct tegra_qspi *tqspi);
+static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi);
+
+/**
+ * tegra_qspi_handle_timeout - Handle transfer timeout with hardware check
+ * @tqspi: QSPI controller instance
+ *
+ * When a timeout occurs but hardware has completed the transfer (interrupt
+ * was lost or delayed), manually trigger transfer completion processing.
+ * This avoids failing transfers that actually succeeded.
+ *
+ * Returns: 0 if transfer was completed, -ETIMEDOUT if real timeout
+ */
+static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi)
+{
+ irqreturn_t ret;
+ u32 status;
+
+ /* Check if hardware actually completed the transfer */
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ if (!(status & QSPI_RDY))
+ return -ETIMEDOUT;
+
+ /*
+ * Hardware completed but interrupt was lost/delayed. Manually
+ * process the completion by calling the appropriate handler.
+ */
+ dev_warn_ratelimited(tqspi->dev,
+ "QSPI interrupt timeout, but transfer complete\n");
+
+ /* Clear the transfer status */
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS);
+
+ /* Manually trigger completion handler */
+ if (!tqspi->is_curr_dma_xfer)
+ ret = handle_cpu_based_xfer(tqspi);
+ else
+ ret = handle_dma_based_xfer(tqspi);
+
+ return (ret == IRQ_HANDLED) ? 0 : -EIO;
+}
+
static u32 tegra_qspi_cmd_config(bool is_ddr, u8 bus_width, u8 len)
{
u32 cmd_config = 0;
@@ -1074,6 +1122,30 @@ static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len)
return addr_config;
}
+static void tegra_qspi_dma_stop(struct tegra_qspi *tqspi)
+{
+ u32 value;
+
+ if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+
+ if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
+ dmaengine_terminate_all(tqspi->rx_dma_chan);
+
+ value = tegra_qspi_readl(tqspi, QSPI_DMA_CTL);
+ value &= ~QSPI_DMA_EN;
+ tegra_qspi_writel(tqspi, value, QSPI_DMA_CTL);
+}
+
+static void tegra_qspi_pio_stop(struct tegra_qspi *tqspi)
+{
+ u32 value;
+
+ value = tegra_qspi_readl(tqspi, QSPI_COMMAND1);
+ value &= ~QSPI_PIO;
+ tegra_qspi_writel(tqspi, value, QSPI_COMMAND1);
+}
+
static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
struct spi_message *msg)
{
@@ -1081,7 +1153,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
struct spi_transfer *xfer;
struct spi_device *spi = msg->spi;
u8 transfer_phase = 0;
- u32 cmd1 = 0, dma_ctl = 0;
+ u32 cmd1 = 0;
int ret = 0;
u32 address_value = 0;
u32 cmd_config = 0, addr_config = 0;
@@ -1148,43 +1220,28 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
QSPI_DMA_TIMEOUT);
if (WARN_ON_ONCE(ret == 0)) {
- dev_err_ratelimited(tqspi->dev,
- "QSPI Transfer failed with timeout\n");
- if (tqspi->is_curr_dma_xfer) {
- if ((tqspi->cur_direction & DATA_DIR_TX) &&
- tqspi->tx_dma_chan)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if ((tqspi->cur_direction & DATA_DIR_RX) &&
- tqspi->rx_dma_chan)
- dmaengine_terminate_all(tqspi->rx_dma_chan);
- }
-
- /* Abort transfer by resetting pio/dma bit */
- if (!tqspi->is_curr_dma_xfer) {
- cmd1 = tegra_qspi_readl
- (tqspi,
- QSPI_COMMAND1);
- cmd1 &= ~QSPI_PIO;
- tegra_qspi_writel
- (tqspi, cmd1,
- QSPI_COMMAND1);
- } else {
- dma_ctl = tegra_qspi_readl
- (tqspi,
- QSPI_DMA_CTL);
- dma_ctl &= ~QSPI_DMA_EN;
- tegra_qspi_writel(tqspi, dma_ctl,
- QSPI_DMA_CTL);
+ /*
+ * Check if hardware completed the transfer
+ * even though interrupt was lost or delayed.
+ * If so, process the completion and continue.
+ */
+ ret = tegra_qspi_handle_timeout(tqspi);
+ if (ret < 0) {
+ /* Real timeout - clean up and fail */
+ dev_err(tqspi->dev, "transfer timeout\n");
+
+ /* Abort transfer by resetting pio/dma bit */
+ if (tqspi->is_curr_dma_xfer)
+ tegra_qspi_dma_stop(tqspi);
+ else
+ tegra_qspi_pio_stop(tqspi);
+
+ /* Reset controller if timeout happens */
+ tegra_qspi_reset(tqspi);
+
+ ret = -EIO;
+ goto exit;
}
-
- /* Reset controller if timeout happens */
- if (device_reset(tqspi->dev) < 0) {
- dev_warn_once(tqspi->dev,
- "device reset failed\n");
- tegra_qspi_mask_clear_irq(tqspi);
- }
- ret = -EIO;
- goto exit;
}
if (tqspi->tx_status || tqspi->rx_status) {
@@ -1275,16 +1332,23 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
ret = wait_for_completion_timeout(&tqspi->xfer_completion,
QSPI_DMA_TIMEOUT);
if (WARN_ON(ret == 0)) {
- dev_err(tqspi->dev, "transfer timeout\n");
- if (tqspi->is_curr_dma_xfer) {
- if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
- dmaengine_terminate_all(tqspi->rx_dma_chan);
+ /*
+ * Check if hardware completed the transfer even though
+ * interrupt was lost or delayed. If so, process the
+ * completion and continue.
+ */
+ ret = tegra_qspi_handle_timeout(tqspi);
+ if (ret < 0) {
+ /* Real timeout - clean up and fail */
+ dev_err(tqspi->dev, "transfer timeout\n");
+
+ if (tqspi->is_curr_dma_xfer)
+ tegra_qspi_dma_stop(tqspi);
+
+ tegra_qspi_handle_error(tqspi);
+ ret = -EIO;
+ goto complete_xfer;
}
- tegra_qspi_handle_error(tqspi);
- ret = -EIO;
- goto complete_xfer;
}
if (tqspi->tx_status || tqspi->rx_status) {
--
2.17.1
prev parent reply other threads:[~2025-10-10 15:20 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-10 15:19 [PATCH v1 0/2] spi: tegra210-quad: Improve timeout handling under high system load Vishwaroop A
2025-10-10 15:20 ` [PATCH v1 1/2] spi: tegra210-quad: Fix timeout handling Vishwaroop A
2025-10-14 10:15 ` Thierry Reding
2025-10-10 15:20 ` Vishwaroop A [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251010152001.2399799-3-va@nvidia.com \
--to=va@nvidia.com \
--cc=broonie@kernel.org \
--cc=jonathanh@nvidia.com \
--cc=kyarlagadda@nvidia.com \
--cc=ldewangan@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-spi@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=skomatineni@nvidia.com \
--cc=smangipudi@nvidia.com \
--cc=thierry.reding@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.