From: Vishwaroop A <va@nvidia.com>
To: Mark Brown <broonie@kernel.org>,
Thierry Reding <thierry.reding@gmail.com>,
Jonathan Hunter <jonathanh@nvidia.com>,
"Sowjanya Komatineni" <skomatineni@nvidia.com>,
Laxman Dewangan <ldewangan@nvidia.com>, <smangipudi@nvidia.com>,
<kyarlagadda@nvidia.com>
Cc: Vishwaroop A <va@nvidia.com>, <linux-spi@vger.kernel.org>,
<linux-tegra@vger.kernel.org>, <linux-kernel@vger.kernel.org>
Subject: [PATCH v1 2/2] spi: tegra210-quad: Check hardware status on timeout
Date: Fri, 10 Oct 2025 15:20:01 +0000 [thread overview]
Message-ID: <20251010152001.2399799-3-va@nvidia.com> (raw)
In-Reply-To: <20251010152001.2399799-1-va@nvidia.com>
Under high system load, QSPI interrupts can be delayed or blocked on the
target CPU, causing wait_for_completion_timeout() to report failure even
though the hardware successfully completed the transfer. This has been
observed in production during error injection, RAS firmware activity, and
CPU saturation scenarios.
When a timeout occurs, check the QSPI_RDY bit in QSPI_TRANS_STATUS to
determine if the hardware actually completed the transfer. If so, manually
invoke the completion handler to process the transfer successfully instead
of failing it.
This distinguishes lost/delayed interrupts from real hardware timeouts,
preventing unnecessary failures of transfers that completed successfully.
Signed-off-by: Vishwaroop A <va@nvidia.com>
---
drivers/spi/spi-tegra210-quad.c | 164 ++++++++++++++++++++++----------
1 file changed, 114 insertions(+), 50 deletions(-)
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index c2f880d08109..757e9fe23e0e 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -1019,17 +1019,22 @@ static void tegra_qspi_dump_regs(struct tegra_qspi *tqspi)
tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS));
}
-static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
+static void tegra_qspi_reset(struct tegra_qspi *tqspi)
{
- dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
- tegra_qspi_dump_regs(tqspi);
- tegra_qspi_flush_fifos(tqspi, true);
if (device_reset(tqspi->dev) < 0) {
dev_warn_once(tqspi->dev, "device reset failed\n");
tegra_qspi_mask_clear_irq(tqspi);
}
}
+static void tegra_qspi_handle_error(struct tegra_qspi *tqspi)
+{
+ dev_err(tqspi->dev, "error in transfer, fifo status 0x%08x\n", tqspi->status_reg);
+ tegra_qspi_dump_regs(tqspi);
+ tegra_qspi_flush_fifos(tqspi, true);
+ tegra_qspi_reset(tqspi);
+}
+
static void tegra_qspi_transfer_end(struct spi_device *spi)
{
struct tegra_qspi *tqspi = spi_controller_get_devdata(spi->controller);
@@ -1043,6 +1048,49 @@ static void tegra_qspi_transfer_end(struct spi_device *spi)
tegra_qspi_writel(tqspi, tqspi->def_command1_reg, QSPI_COMMAND1);
}
+static irqreturn_t handle_cpu_based_xfer(struct tegra_qspi *tqspi);
+static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi);
+
+/**
+ * tegra_qspi_handle_timeout - Handle transfer timeout with hardware check
+ * @tqspi: QSPI controller instance
+ *
+ * When a timeout occurs but hardware has completed the transfer (interrupt
+ * was lost or delayed), manually trigger transfer completion processing.
+ * This avoids failing transfers that actually succeeded.
+ *
+ * Returns: 0 if transfer was completed, -ETIMEDOUT if real timeout
+ */
+static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi)
+{
+ irqreturn_t ret;
+ u32 status;
+
+ /* Check if hardware actually completed the transfer */
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ if (!(status & QSPI_RDY))
+ return -ETIMEDOUT;
+
+ /*
+ * Hardware completed but interrupt was lost/delayed. Manually
+ * process the completion by calling the appropriate handler.
+ */
+ dev_warn_ratelimited(tqspi->dev,
+ "QSPI interrupt timeout, but transfer complete\n");
+
+ /* Clear the transfer status */
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS);
+
+ /* Manually trigger completion handler */
+ if (!tqspi->is_curr_dma_xfer)
+ ret = handle_cpu_based_xfer(tqspi);
+ else
+ ret = handle_dma_based_xfer(tqspi);
+
+ return (ret == IRQ_HANDLED) ? 0 : -EIO;
+}
+
static u32 tegra_qspi_cmd_config(bool is_ddr, u8 bus_width, u8 len)
{
u32 cmd_config = 0;
@@ -1074,6 +1122,30 @@ static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len)
return addr_config;
}
+static void tegra_qspi_dma_stop(struct tegra_qspi *tqspi)
+{
+ u32 value;
+
+ if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
+ dmaengine_terminate_all(tqspi->tx_dma_chan);
+
+ if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
+ dmaengine_terminate_all(tqspi->rx_dma_chan);
+
+ value = tegra_qspi_readl(tqspi, QSPI_DMA_CTL);
+ value &= ~QSPI_DMA_EN;
+ tegra_qspi_writel(tqspi, value, QSPI_DMA_CTL);
+}
+
+static void tegra_qspi_pio_stop(struct tegra_qspi *tqspi)
+{
+ u32 value;
+
+ value = tegra_qspi_readl(tqspi, QSPI_COMMAND1);
+ value &= ~QSPI_PIO;
+ tegra_qspi_writel(tqspi, value, QSPI_COMMAND1);
+}
+
static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
struct spi_message *msg)
{
@@ -1081,7 +1153,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
struct spi_transfer *xfer;
struct spi_device *spi = msg->spi;
u8 transfer_phase = 0;
- u32 cmd1 = 0, dma_ctl = 0;
+ u32 cmd1 = 0;
int ret = 0;
u32 address_value = 0;
u32 cmd_config = 0, addr_config = 0;
@@ -1148,43 +1220,28 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi,
QSPI_DMA_TIMEOUT);
if (WARN_ON_ONCE(ret == 0)) {
- dev_err_ratelimited(tqspi->dev,
- "QSPI Transfer failed with timeout\n");
- if (tqspi->is_curr_dma_xfer) {
- if ((tqspi->cur_direction & DATA_DIR_TX) &&
- tqspi->tx_dma_chan)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if ((tqspi->cur_direction & DATA_DIR_RX) &&
- tqspi->rx_dma_chan)
- dmaengine_terminate_all(tqspi->rx_dma_chan);
- }
-
- /* Abort transfer by resetting pio/dma bit */
- if (!tqspi->is_curr_dma_xfer) {
- cmd1 = tegra_qspi_readl
- (tqspi,
- QSPI_COMMAND1);
- cmd1 &= ~QSPI_PIO;
- tegra_qspi_writel
- (tqspi, cmd1,
- QSPI_COMMAND1);
- } else {
- dma_ctl = tegra_qspi_readl
- (tqspi,
- QSPI_DMA_CTL);
- dma_ctl &= ~QSPI_DMA_EN;
- tegra_qspi_writel(tqspi, dma_ctl,
- QSPI_DMA_CTL);
+ /*
+ * Check if hardware completed the transfer
+ * even though interrupt was lost or delayed.
+ * If so, process the completion and continue.
+ */
+ ret = tegra_qspi_handle_timeout(tqspi);
+ if (ret < 0) {
+ /* Real timeout - clean up and fail */
+ dev_err(tqspi->dev, "transfer timeout\n");
+
+ /* Abort transfer by resetting pio/dma bit */
+ if (tqspi->is_curr_dma_xfer)
+ tegra_qspi_dma_stop(tqspi);
+ else
+ tegra_qspi_pio_stop(tqspi);
+
+ /* Reset controller if timeout happens */
+ tegra_qspi_reset(tqspi);
+
+ ret = -EIO;
+ goto exit;
}
-
- /* Reset controller if timeout happens */
- if (device_reset(tqspi->dev) < 0) {
- dev_warn_once(tqspi->dev,
- "device reset failed\n");
- tegra_qspi_mask_clear_irq(tqspi);
- }
- ret = -EIO;
- goto exit;
}
if (tqspi->tx_status || tqspi->rx_status) {
@@ -1275,16 +1332,23 @@ static int tegra_qspi_non_combined_seq_xfer(struct tegra_qspi *tqspi,
ret = wait_for_completion_timeout(&tqspi->xfer_completion,
QSPI_DMA_TIMEOUT);
if (WARN_ON(ret == 0)) {
- dev_err(tqspi->dev, "transfer timeout\n");
- if (tqspi->is_curr_dma_xfer) {
- if ((tqspi->cur_direction & DATA_DIR_TX) && tqspi->tx_dma_chan)
- dmaengine_terminate_all(tqspi->tx_dma_chan);
- if ((tqspi->cur_direction & DATA_DIR_RX) && tqspi->rx_dma_chan)
- dmaengine_terminate_all(tqspi->rx_dma_chan);
+ /*
+ * Check if hardware completed the transfer even though
+ * interrupt was lost or delayed. If so, process the
+ * completion and continue.
+ */
+ ret = tegra_qspi_handle_timeout(tqspi);
+ if (ret < 0) {
+ /* Real timeout - clean up and fail */
+ dev_err(tqspi->dev, "transfer timeout\n");
+
+ if (tqspi->is_curr_dma_xfer)
+ tegra_qspi_dma_stop(tqspi);
+
+ tegra_qspi_handle_error(tqspi);
+ ret = -EIO;
+ goto complete_xfer;
}
- tegra_qspi_handle_error(tqspi);
- ret = -EIO;
- goto complete_xfer;
}
if (tqspi->tx_status || tqspi->rx_status) {
--
2.17.1
prev parent reply other threads:[~2025-10-10 15:20 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-10 15:19 [PATCH v1 0/2] spi: tegra210-quad: Improve timeout handling under high system load Vishwaroop A
2025-10-10 15:20 ` [PATCH v1 1/2] spi: tegra210-quad: Fix timeout handling Vishwaroop A
2025-10-14 10:15 ` Thierry Reding
2025-10-10 15:20 ` Vishwaroop A [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251010152001.2399799-3-va@nvidia.com \
--to=va@nvidia.com \
--cc=broonie@kernel.org \
--cc=jonathanh@nvidia.com \
--cc=kyarlagadda@nvidia.com \
--cc=ldewangan@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-spi@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=skomatineni@nvidia.com \
--cc=smangipudi@nvidia.com \
--cc=thierry.reding@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).