From: Adrian Hunter <adrian.hunter@intel.com>
To: alexandre.belloni@bootlin.com
Cc: Frank.Li@nxp.com, linux-i3c@lists.infradead.org,
linux-kernel@vger.kernel.org
Subject: [PATCH V2 12/16] i3c: mipi-i3c-hci: Add DMA-mode recovery for internal controller errors
Date: Tue, 21 Apr 2026 20:54:31 +0300 [thread overview]
Message-ID: <20260421175435.122094-13-adrian.hunter@intel.com> (raw)
In-Reply-To: <20260421175435.122094-1-adrian.hunter@intel.com>
Handle internal I3C HCI errors when operating in DMA mode by adding a
simple recovery mechanism.
On detection of an internal controller error, mark recovery as needed and
attempt to restore operation by performing a software reset followed by
state restore. To keep recovery straightforward on this unlikely error
path, all currently queued transfers are terminated and completed with an
error.
This allows the controller to resume operation after internal failures
rather than remaining permanently stuck.
Note, internal errors indicated by INTR_HC_INTERNAL_ERR, cause the
controller to stop.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
Changes in V2:
Rename completing_xfer to final_xfer
Add hci_dma_xfer_done() before checking for an already complete
transfer
Improve commit message
drivers/i3c/master/mipi-i3c-hci/cmd.h | 6 ++
drivers/i3c/master/mipi-i3c-hci/core.c | 1 +
drivers/i3c/master/mipi-i3c-hci/dma.c | 91 +++++++++++++++++++++++---
drivers/i3c/master/mipi-i3c-hci/hci.h | 1 +
4 files changed, 90 insertions(+), 9 deletions(-)
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd.h b/drivers/i3c/master/mipi-i3c-hci/cmd.h
index b1bf87daa651..7bada7b4b2de 100644
--- a/drivers/i3c/master/mipi-i3c-hci/cmd.h
+++ b/drivers/i3c/master/mipi-i3c-hci/cmd.h
@@ -65,4 +65,10 @@ struct hci_cmd_ops {
extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v1;
extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v2;
+static inline void hci_cmd_set_resp_err(u32 *response, int resp_err)
+{
+ *response &= ~RESP_ERR_FIELD;
+ *response |= FIELD_PREP(RESP_ERR_FIELD, resp_err);
+}
+
#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
index 12a0122fb709..69dcf5dad3a5 100644
--- a/drivers/i3c/master/mipi-i3c-hci/core.c
+++ b/drivers/i3c/master/mipi-i3c-hci/core.c
@@ -668,6 +668,7 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id)
if (val & INTR_HC_INTERNAL_ERR) {
dev_err(&hci->master.dev, "Host Controller Internal Error\n");
val &= ~INTR_HC_INTERNAL_ERR;
+ hci->recovery_needed = true;
}
if (val)
diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
index 41bbd912df7f..4ea623dbed52 100644
--- a/drivers/i3c/master/mipi-i3c-hci/dma.c
+++ b/drivers/i3c/master/mipi-i3c-hci/dma.c
@@ -9,6 +9,7 @@
*/
#include <linux/bitfield.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
@@ -258,6 +259,10 @@ static void hci_dma_init_rh(struct i3c_hci *hci, struct hci_rh_data *rh, int i)
rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE);
rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_RUN_STOP);
+ /*
+ * Do not clear the entries of rh->src_xfers because the recovery uses
+ * them. In other cases they should be NULL anyway.
+ */
rh->done_ptr = 0;
rh->ibi_chunk_ptr = 0;
rh->xfer_space = rh->xfer_entries;
@@ -362,7 +367,7 @@ static int hci_dma_init(struct i3c_hci *hci)
rh->resp = dma_alloc_coherent(rings->sysdev, resps_sz,
&rh->resp_dma, GFP_KERNEL);
rh->src_xfers =
- kmalloc_objs(*rh->src_xfers, rh->xfer_entries);
+ kzalloc_objs(*rh->src_xfers, rh->xfer_entries);
ret = -ENOMEM;
if (!rh->xfer || !rh->resp || !rh->src_xfers)
goto err_out;
@@ -572,13 +577,15 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh)
hci_dma_unmap_xfer(hci, xfer, 1);
rh->src_xfers[done_ptr] = NULL;
xfer->ring_entry = -1;
- xfer->response = resp;
if (tid != xfer->cmd_tid) {
dev_err(&hci->master.dev,
"response tid=%d when expecting %d\n",
tid, xfer->cmd_tid);
- /* TODO: do something about it? */
+ hci->recovery_needed = true;
+ if (!RESP_STATUS(resp))
+ hci_cmd_set_resp_err(&resp, RESP_ERR_HC_TERMINATED);
}
+ xfer->response = resp;
if (xfer == xfer->final_xfer || RESP_STATUS(resp))
complete(xfer->final_xfer->completion);
if (RESP_STATUS(resp))
@@ -635,6 +642,58 @@ static void hci_dma_unblock_enqueue(struct i3c_hci *hci)
}
}
+static void hci_dma_error_out_rh(struct i3c_hci *hci, struct hci_rh_data *rh)
+{
+ /*
+ * The entries of rh->src_xfers are not cleared by
+ * i3c_hci_reset_and_restore(), so can be used here.
+ */
+ for (int i = 0; i < rh->xfer_entries; i++) {
+ struct hci_xfer *xfer = rh->src_xfers[i];
+
+ if (!xfer)
+ continue;
+ hci_dma_unmap_xfer(hci, xfer, 1);
+ rh->src_xfers[i] = NULL;
+ xfer->ring_entry = -1;
+ hci_cmd_set_resp_err(&xfer->response, RESP_ERR_HC_TERMINATED);
+ if (xfer == xfer->final_xfer)
+ complete(xfer->final_xfer->completion);
+ }
+}
+
+static void hci_dma_error_out_all(struct i3c_hci *hci)
+{
+ struct hci_rings_data *rings = hci->io_data;
+
+ for (int i = 0; i < rings->total; i++)
+ hci_dma_error_out_rh(hci, &rings->headers[i]);
+}
+
+static void hci_dma_recovery(struct i3c_hci *hci)
+{
+ int ret;
+
+ dev_err(&hci->master.dev, "Attempting to recover from internal errors\n");
+
+ for (int i = 0; i < 3; i++) {
+ ret = i3c_hci_reset_and_restore(hci);
+ if (!ret)
+ break;
+ dev_err(&hci->master.dev, "Reset and restore failed, error %d\n", ret);
+ /* Just in case the controller is busy, give it some time */
+ msleep(1000);
+ }
+
+ spin_lock_irq(&hci->lock);
+ hci_dma_error_out_all(hci);
+ hci_dma_unblock_enqueue(hci);
+ hci->recovery_needed = false;
+ spin_unlock_irq(&hci->lock);
+
+ dev_err(&hci->master.dev, "Recovery %s\n", ret ? "failed!" : "done");
+}
+
static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
struct hci_xfer *xfer_list, int n)
{
@@ -650,6 +709,17 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
ring_status = rh_reg_read(RING_STATUS);
if (ring_status & RING_STATUS_RUNNING) {
+ /*
+ * The transfer may have already completed, especially
+ * if recovery has just run. Do nothing in that case.
+ */
+ hci_dma_xfer_done(hci, rh);
+ if (xfer_list->final_xfer->ring_entry < 0 &&
+ !hci->recovery_needed && !hci->enqueue_blocked &&
+ ring_status == (RING_STATUS_ENABLED | RING_STATUS_RUNNING)) {
+ spin_unlock_irq(&hci->lock);
+ return false;
+ }
hci->enqueue_blocked = true;
spin_unlock_irq(&hci->lock);
/* stop the ring */
@@ -657,12 +727,8 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
spin_lock_irq(&hci->lock);
ring_status = rh_reg_read(RING_STATUS);
if (ring_status & RING_STATUS_RUNNING) {
- /*
- * We're deep in it if ever this condition is ever met.
- * Hardware might still be writing to memory, etc.
- */
- dev_crit(&hci->master.dev, "unable to abort the ring\n");
- WARN_ON(1);
+ dev_err(&hci->master.dev, "Unable to abort the DMA ring\n");
+ hci->recovery_needed = true;
}
}
@@ -670,6 +736,13 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
hci_dma_xfer_done(hci, rh);
+ if (hci->recovery_needed) {
+ hci->enqueue_blocked = true;
+ spin_unlock_irq(&hci->lock);
+ hci_dma_recovery(hci);
+ return true;
+ }
+
for (i = 0; i < n; i++) {
struct hci_xfer *xfer = xfer_list + i;
int idx = xfer->ring_entry;
diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h
index a3151c26827e..4bf2c66c97b4 100644
--- a/drivers/i3c/master/mipi-i3c-hci/hci.h
+++ b/drivers/i3c/master/mipi-i3c-hci/hci.h
@@ -55,6 +55,7 @@ struct i3c_hci {
atomic_t next_cmd_tid;
bool irq_inactive;
bool enqueue_blocked;
+ bool recovery_needed;
wait_queue_head_t enqueue_wait_queue;
u32 caps;
unsigned int quirks;
--
2.51.0
--
linux-i3c mailing list
linux-i3c@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-i3c
next prev parent reply other threads:[~2026-04-21 17:55 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-21 17:54 [PATCH V2 00/16] i3c: mipi-i3c-hci: DMA abort, recovery and related improvements Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 01/16] i3c: mipi-i3c-hci: Fix suspend behavior when bus disable falls back to software reset Adrian Hunter
2026-04-22 4:03 ` Frank Li
2026-04-21 17:54 ` [PATCH V2 02/16] i3c: mipi-i3c-hci: Preserve RUN bit when aborting DMA ring Adrian Hunter
2026-04-22 4:04 ` Frank Li
2026-04-21 17:54 ` [PATCH V2 03/16] i3c: mipi-i3c-hci: Prevent DMA enqueue while ring is aborting or in error Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 04/16] i3c: mipi-i3c-hci: Wait for DMA ring restart to complete Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 05/16] i3c: mipi-i3c-hci: Move hci_dma_xfer_done() definition Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 06/16] i3c: mipi-i3c-hci: Call hci_dma_xfer_done() from dequeue path Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 07/16] i3c: mipi-i3c-hci: Complete transfer lists immediately on error Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 08/16] i3c: mipi-i3c-hci: Avoid restarting DMA ring after aborting wrong transfer Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 09/16] i3c: mipi-i3c-hci: Add DMA ring abort/reset quirk for Intel controllers Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 10/16] i3c: mipi-i3c-hci: Add DMA ring abort " Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 11/16] i3c: mipi-i3c-hci: Factor out reset-and-restore helper Adrian Hunter
2026-04-21 17:54 ` Adrian Hunter [this message]
2026-04-21 17:54 ` [PATCH V2 13/16] i3c: mipi-i3c-hci: Wait for NoOp commands to complete Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 14/16] i3c: mipi-i3c-hci: Base timeouts on actual transfer start time Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 15/16] i3c: mipi-i3c-hci: Consolidate DMA ring allocation Adrian Hunter
2026-04-21 17:54 ` [PATCH V2 16/16] i3c: mipi-i3c-hci: Increase DMA transfer ring size to maximum Adrian Hunter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260421175435.122094-13-adrian.hunter@intel.com \
--to=adrian.hunter@intel.com \
--cc=Frank.Li@nxp.com \
--cc=alexandre.belloni@bootlin.com \
--cc=linux-i3c@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox