From: Tony Battersby <tonyb@cybernetics.com>
To: Nilesh Javali <njavali@marvell.com>,
GR-QLogic-Storage-Upstream@marvell.com,
"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
"Martin K. Petersen" <martin.petersen@oracle.com>
Cc: linux-scsi <linux-scsi@vger.kernel.org>,
target-devel@vger.kernel.org, scst-devel@lists.sourceforge.net,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: [SCST PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort
Date: Mon, 8 Sep 2025 14:59:18 -0400 [thread overview]
Message-ID: <fa3d6bb5-eea0-4cc3-86c2-93874f439b42@cybernetics.com> (raw)
In-Reply-To: <80974286-f8ac-4eff-9439-c05fe38716b1@cybernetics.com>
This patch applies to the out-of-tree SCST project, not to the Linux
kernel. Apply when importing the upstream patch with the same title.
SCST addendum:
- In sqa_on_hw_pending_cmd_timeout(), call qlt_send_term_exchange()
first and then restart the timeout. After another timeout, force the
cmd to finish without waiting for a response from the HW.
Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
---
qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 172 +++++++++++++-----
1 file changed, 122 insertions(+), 50 deletions(-)
diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
index e885b9711..9371dad06 100644
--- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
+++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
@@ -187,6 +187,7 @@ static struct cmd_state_name {
{QLA_TGT_STATE_NEED_DATA, "NeedData"},
{QLA_TGT_STATE_DATA_IN, "DataIn"},
{QLA_TGT_STATE_PROCESSED, "Processed"},
+ {QLA_TGT_STATE_DONE, "Done"},
};
static char *cmdstate_to_str(uint8_t state)
@@ -497,23 +498,14 @@ static void sqa_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
{
struct scst_cmd *scst_cmd = cmd->scst_cmd;
int rx_status;
- unsigned long flags;
TRACE_ENTRY();
- spin_lock_irqsave(&cmd->cmd_lock, flags);
- if (cmd->aborted) {
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
+ if (unlikely(cmd->aborted)) {
scst_set_cmd_error(scst_cmd,
SCST_LOAD_SENSE(scst_sense_internal_failure));
- scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
- SCST_CONTEXT_THREAD);
- return;
- }
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
- if (cmd->write_data_transferred) {
+ rx_status = SCST_RX_STATUS_ERROR_SENSE_SET;
+ } else if (likely(cmd->write_data_transferred)) {
rx_status = SCST_RX_STATUS_SUCCESS;
} else {
rx_status = SCST_RX_STATUS_ERROR_SENSE_SET;
@@ -691,6 +683,7 @@ static void sqa_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
TRACE_ENTRY();
+ cmd->state = QLA_TGT_STATE_DONE;
cmd->trc_flags |= TRC_CMD_DONE;
scst_tgt_cmd_done(scst_cmd, scst_work_context);
@@ -1522,9 +1515,10 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd)
cmd = scst_cmd_get_tgt_priv(scst_cmd);
if (scst_cmd_aborted_on_xmit(scst_cmd)) {
- TRACE_MGMT_DBG("sqatgt(%ld/%d): CMD_ABORTED cmd[%p]",
- cmd->vha->host_no, cmd->vha->vp_idx,
- cmd);
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: skipping send response for aborted cmd",
+ cmd->vha->host_no, cmd->vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
qlt_abort_cmd(cmd);
scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_ABORTED);
scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_DIRECT);
@@ -1841,18 +1835,26 @@ static int sqa_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
return t32;
}
+/*
+ * Prevent the CTIO completion handler from finding the given cmd, which will
+ * cause the CTIO to be ignored.
+ */
static void sqa_cleanup_hw_pending_cmd(scsi_qla_host_t *vha,
struct qla_tgt_cmd *cmd)
{
+ struct req_que *req = cmd->qpair->req;
uint32_t h;
- struct qla_qpair *qpair = cmd->qpair;
- for (h = 1; h < qpair->req->num_outstanding_cmds; h++) {
- if (qpair->req->outstanding_cmds[h] == (srb_t *)cmd) {
- printk(KERN_INFO "Clearing handle %d for cmd %p", h,
- cmd);
- //TRACE_DBG("Clearing handle %d for cmd %p", h, cmd);
- qpair->req->outstanding_cmds[h] = NULL;
+ cmd->cmd_sent_to_fw = 0;
+ cmd->trc_flags |= TRC_CTIO_IGNORED;
+
+ for (h = 1; h < req->num_outstanding_cmds; h++) {
+ if (req->outstanding_cmds[h] == (srb_t *)cmd) {
+ PRINT_INFO(
+ "sqatgt(%ld/%d): tag %lld: handle %x: detaching cmd from handle; CTIO will be ignored",
+ vha->host_no, vha->vp_idx,
+ se_cmd_tag(&cmd->se_cmd), h);
+ req->outstanding_cmds[h] = NULL;
break;
}
}
@@ -1863,50 +1865,120 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd)
struct qla_tgt_cmd *cmd = scst_cmd_get_tgt_priv(scst_cmd);
struct scsi_qla_host *vha = cmd->vha;
struct qla_qpair *qpair = cmd->qpair;
- uint8_t aborted = cmd->aborted;
unsigned long flags;
+ bool advance_cmd = false;
TRACE_ENTRY();
- TRACE_MGMT_DBG("sqatgt(%ld/%d): Cmd %p HW pending for too long (state %s) %s; %s;",
- vha->host_no, vha->vp_idx, cmd,
- cmdstate_to_str((uint8_t)cmd->state),
- cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
- aborted ? "aborted" : "not aborted");
-
- qlt_abort_cmd(cmd);
+ scst_cmd_get(scst_cmd);
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: HW pending for too long (state %s) %s; %s",
+ vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd),
+ cmdstate_to_str((uint8_t)cmd->state),
+ cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
+ cmd->aborted ? "aborted" : "not aborted");
+
switch (cmd->state) {
case QLA_TGT_STATE_NEW:
case QLA_TGT_STATE_DATA_IN:
- PRINT_ERROR("sqa(%ld): A command in state (%s) should not be HW pending. %s",
- vha->host_no, cmdstate_to_str((uint8_t)cmd->state),
- aborted ? "aborted" : "not aborted");
- break;
+ case QLA_TGT_STATE_DONE:
+ PRINT_ERROR(
+ "sqatgt(%ld/%d): tag %lld: A command in state (%s) should not be HW pending. %s",
+ vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd),
+ cmdstate_to_str((uint8_t)cmd->state),
+ cmd->aborted ? "aborted" : "not aborted");
+ goto out_unlock;
case QLA_TGT_STATE_NEED_DATA:
- /* the abort will nudge it out of FW */
- TRACE_MGMT_DBG("Force rx_data cmd %p", cmd);
- sqa_cleanup_hw_pending_cmd(vha, cmd);
- scst_set_cmd_error(scst_cmd,
- SCST_LOAD_SENSE(scst_sense_internal_failure));
- scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
- SCST_CONTEXT_THREAD);
- break;
case QLA_TGT_STATE_PROCESSED:
- if (!cmd->cmd_sent_to_fw)
- PRINT_ERROR("sqa(%ld): command should not be in HW pending. It's already processed. ",
- vha->host_no);
- else
- TRACE_MGMT_DBG("Force finishing cmd %p", cmd);
- sqa_cleanup_hw_pending_cmd(vha, cmd);
- scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED);
- scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD);
break;
}
+
+ /* Handle race with normal CTIO completion. */
+ if (!cmd->cmd_sent_to_fw) {
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: cmd not sent to fw; assuming just completed",
+ vha->host_no, vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
+ goto out_unlock;
+ }
+
+ /* Check for chip reset or a timeout after sending a term exchange. */
+ if (!qpair->fw_started ||
+ cmd->reset_count != qpair->chip_reset ||
+ (cmd->sent_term_exchg &&
+ time_is_before_jiffies(cmd->jiffies_at_term_exchg +
+ SQA_MAX_HW_PENDING_TIME * HZ / 2))) {
+ /*
+ * Timeout waiting for a response from the hw, so force the cmd
+ * to finish without waiting any longer. Note that this is
+ * slightly dangerous if the hw is still using the DMA
+ * mapping, so try term exchange first and see if that works.
+ */
+
+ sqa_cleanup_hw_pending_cmd(vha, cmd);
+
+ qlt_unmap_sg(vha, cmd);
+
+ advance_cmd = true;
+ } else {
+ /*
+ * We still expect a CTIO response from the hw. Terminating the
+ * exchange should force the CTIO response to happen sooner.
+ */
+ if (!cmd->sent_term_exchg)
+ qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1);
+ }
+
+ if (advance_cmd) {
+ switch (cmd->state) {
+ case QLA_TGT_STATE_NEED_DATA:
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: force rx_data",
+ vha->host_no, vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
+ cmd->state = QLA_TGT_STATE_DATA_IN;
+ scst_set_cmd_error(scst_cmd,
+ SCST_LOAD_SENSE(scst_sense_internal_failure));
+ scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
+ SCST_CONTEXT_THREAD);
+ break;
+
+ case QLA_TGT_STATE_PROCESSED:
+ TRACE_MGMT_DBG(
+ "sqatgt(%ld/%d): tag %lld: force finishing cmd",
+ vha->host_no, vha->vp_idx,
+ scst_cmd_get_tag(scst_cmd));
+ scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED);
+ scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD);
+ break;
+
+ default:
+ break;
+ }
+ } else {
+ /*
+ * Restart the timer so that this function is called again
+ * after another timeout. This is similar to
+ * scst_update_hw_pending_start() except that we also set
+ * cmd_hw_pending to 1.
+ *
+ * IRQs are already OFF.
+ */
+ spin_lock(&scst_cmd->sess->sess_list_lock);
+ scst_cmd->cmd_hw_pending = 1;
+ scst_cmd->hw_pending_start = jiffies;
+ spin_unlock(&scst_cmd->sess->sess_list_lock);
+ }
+
+out_unlock:
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+ scst_cmd_put(scst_cmd);
+
TRACE_EXIT();
}
--
2.43.0
next prev parent reply other threads:[~2025-09-08 18:59 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-08 18:45 [PATCH 00/15] qla2xxx target mode improvements Tony Battersby
2025-09-08 18:47 ` [PATCH 01/15] Revert "scsi: qla2xxx: Perform lockless command completion in abort path" Tony Battersby
2025-09-08 18:48 ` [PATCH 02/15] scsi: qla2xxx: fix initiator mode with qlini_mode=exclusive Tony Battersby
2025-09-08 18:50 ` [PATCH 03/15] scsi: qla2xxx: fix lost interrupts with qlini_mode=disabled Tony Battersby
2025-09-08 18:51 ` [PATCH 04/15] scsi: qla2xxx: use reinit_completion on mbx_intr_comp Tony Battersby
2025-09-08 18:53 ` [PATCH 05/15] scsi: qla2xxx: remove code for unsupported hardware Tony Battersby
2025-09-08 18:54 ` [PATCH 06/15] scsi: qla2xxx: improve debug output for term exchange Tony Battersby
2025-09-08 18:56 ` [PATCH 07/15] scsi: qla2xxx: fix term exchange when cmd_sent_to_fw == 1 Tony Battersby
2025-09-08 18:58 ` [PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort Tony Battersby
2025-09-08 18:59 ` Tony Battersby [this message]
2025-09-11 14:21 ` Dmitry Bogdanov
2025-09-24 19:41 ` Tony Battersby
2025-09-24 19:43 ` [PATCH v2 09/16] scsi: qla2xxx: fix races with aborting commands Tony Battersby
2025-09-24 19:45 ` [SCST PATCH " Tony Battersby
2025-09-25 8:42 ` [DMARC Error]Re: [PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort Dmitry Bogdanov
2025-09-08 19:01 ` [PATCH 09/15] scsi: qla2xxx: improve checks in qlt_xmit_response / qlt_rdy_to_xfer Tony Battersby
2025-09-08 19:02 ` [PATCH 10/15] scsi: qla2xxx: fix TMR failure handling Tony Battersby
2025-09-12 14:36 ` Dmitry Bogdanov
2025-09-16 16:04 ` Tony Battersby
2025-09-17 13:06 ` [DMARC Error]Re: " Dmitry Bogdanov
2025-09-17 20:38 ` Tony Battersby
2025-09-08 19:04 ` [PATCH 11/15] scsi: qla2xxx: fix invalid memory access with big CDBs Tony Battersby
2025-09-08 19:05 ` [SCST PATCH " Tony Battersby
2025-09-08 19:07 ` [PATCH 12/15] scsi: qla2xxx: add cmd->rsp_sent Tony Battersby
2025-09-08 19:08 ` [SCST PATCH " Tony Battersby
2025-09-15 13:47 ` [PATCH " Dmitry Bogdanov
2025-09-24 20:04 ` Tony Battersby
2025-09-08 19:09 ` [PATCH 13/15] scsi: qla2xxx: improve cmd logging Tony Battersby
2025-09-08 19:10 ` [PATCH 14/15] scsi: qla2xxx: add back SRR support Tony Battersby
2025-09-08 19:11 ` [SCST PATCH " Tony Battersby
2025-09-25 12:49 ` [PATCH " Xose Vazquez Perez
2025-09-25 15:30 ` Xose Vazquez Perez
2025-09-25 16:04 ` Tony Battersby
2025-09-25 17:00 ` Tony Battersby
2025-09-25 19:30 ` Tony Battersby
2025-09-08 19:13 ` [PATCH 15/15] scsi: qla2xxx: improve safety of cmd lookup by handle Tony Battersby
2025-09-08 19:14 ` [SCST PATCH] qla2x00t-32gbit: add on_abort_cmd callback Tony Battersby
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fa3d6bb5-eea0-4cc3-86c2-93874f439b42@cybernetics.com \
--to=tonyb@cybernetics.com \
--cc=GR-QLogic-Storage-Upstream@marvell.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=njavali@marvell.com \
--cc=scst-devel@lists.sourceforge.net \
--cc=target-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox