Linux SCSI subsystem development
 help / color / mirror / Atom feed
From: Tony Battersby <tonyb@cybernetics.com>
To: Nilesh Javali <njavali@marvell.com>,
	GR-QLogic-Storage-Upstream@marvell.com,
	"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
	"Martin K. Petersen" <martin.petersen@oracle.com>
Cc: linux-scsi <linux-scsi@vger.kernel.org>,
	target-devel@vger.kernel.org, scst-devel@lists.sourceforge.net,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: [SCST PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort
Date: Mon, 8 Sep 2025 14:59:18 -0400	[thread overview]
Message-ID: <fa3d6bb5-eea0-4cc3-86c2-93874f439b42@cybernetics.com> (raw)
In-Reply-To: <80974286-f8ac-4eff-9439-c05fe38716b1@cybernetics.com>

This patch applies to the out-of-tree SCST project, not to the Linux
kernel.  Apply when importing the upstream patch with the same title.

SCST addendum:

- In sqa_on_hw_pending_cmd_timeout(), call qlt_send_term_exchange()
  first and then restart the timeout.  After another timeout, force the
  cmd to finish without waiting for a response from the HW.

Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
---
 qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 172 +++++++++++++-----
 1 file changed, 122 insertions(+), 50 deletions(-)

diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
index e885b9711..9371dad06 100644
--- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
+++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c
@@ -187,6 +187,7 @@ static struct cmd_state_name {
 	{QLA_TGT_STATE_NEED_DATA, "NeedData"},
 	{QLA_TGT_STATE_DATA_IN, "DataIn"},
 	{QLA_TGT_STATE_PROCESSED, "Processed"},
+	{QLA_TGT_STATE_DONE, "Done"},
 };
 
 static char *cmdstate_to_str(uint8_t state)
@@ -497,23 +498,14 @@ static void sqa_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
 {
 	struct scst_cmd *scst_cmd = cmd->scst_cmd;
 	int rx_status;
-	unsigned long flags;
 
 	TRACE_ENTRY();
 
-	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	if (cmd->aborted) {
-		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
+	if (unlikely(cmd->aborted)) {
 		scst_set_cmd_error(scst_cmd,
 			SCST_LOAD_SENSE(scst_sense_internal_failure));
-		scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
-			SCST_CONTEXT_THREAD);
-		return;
-	}
-	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
-	if (cmd->write_data_transferred) {
+		rx_status = SCST_RX_STATUS_ERROR_SENSE_SET;
+	} else if (likely(cmd->write_data_transferred)) {
 		rx_status = SCST_RX_STATUS_SUCCESS;
 	} else {
 		rx_status = SCST_RX_STATUS_ERROR_SENSE_SET;
@@ -691,6 +683,7 @@ static void sqa_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
 
 	TRACE_ENTRY();
 
+	cmd->state = QLA_TGT_STATE_DONE;
 	cmd->trc_flags |= TRC_CMD_DONE;
 	scst_tgt_cmd_done(scst_cmd, scst_work_context);
 
@@ -1522,9 +1515,10 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd)
 	cmd = scst_cmd_get_tgt_priv(scst_cmd);
 
 	if (scst_cmd_aborted_on_xmit(scst_cmd)) {
-		TRACE_MGMT_DBG("sqatgt(%ld/%d): CMD_ABORTED cmd[%p]",
-			cmd->vha->host_no, cmd->vha->vp_idx,
-			cmd);
+		TRACE_MGMT_DBG(
+		    "sqatgt(%ld/%d): tag %lld: skipping send response for aborted cmd",
+		    cmd->vha->host_no, cmd->vha->vp_idx,
+		    scst_cmd_get_tag(scst_cmd));
 		qlt_abort_cmd(cmd);
 		scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_ABORTED);
 		scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_DIRECT);
@@ -1841,18 +1835,26 @@ static int sqa_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
 	return t32;
 }
 
+/*
+ * Prevent the CTIO completion handler from finding the given cmd, which will
+ * cause the CTIO to be ignored.
+ */
 static void sqa_cleanup_hw_pending_cmd(scsi_qla_host_t *vha,
 	struct qla_tgt_cmd *cmd)
 {
+	struct req_que *req = cmd->qpair->req;
 	uint32_t h;
-	struct qla_qpair *qpair = cmd->qpair;
 
-	for (h = 1; h < qpair->req->num_outstanding_cmds; h++) {
-		if (qpair->req->outstanding_cmds[h] == (srb_t *)cmd) {
-			printk(KERN_INFO "Clearing handle %d for cmd %p", h,
-			       cmd);
-			//TRACE_DBG("Clearing handle %d for cmd %p", h, cmd);
-			qpair->req->outstanding_cmds[h] = NULL;
+	cmd->cmd_sent_to_fw = 0;
+	cmd->trc_flags |= TRC_CTIO_IGNORED;
+
+	for (h = 1; h < req->num_outstanding_cmds; h++) {
+		if (req->outstanding_cmds[h] == (srb_t *)cmd) {
+			PRINT_INFO(
+			    "sqatgt(%ld/%d): tag %lld: handle %x: detaching cmd from handle; CTIO will be ignored",
+			    vha->host_no, vha->vp_idx,
+			    se_cmd_tag(&cmd->se_cmd), h);
+			req->outstanding_cmds[h] = NULL;
 			break;
 		}
 	}
@@ -1863,50 +1865,120 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd)
 	struct qla_tgt_cmd *cmd = scst_cmd_get_tgt_priv(scst_cmd);
 	struct scsi_qla_host *vha = cmd->vha;
 	struct qla_qpair *qpair = cmd->qpair;
-	uint8_t aborted = cmd->aborted;
 	unsigned long flags;
+	bool advance_cmd = false;
 
 	TRACE_ENTRY();
-	TRACE_MGMT_DBG("sqatgt(%ld/%d): Cmd %p HW pending for too long (state %s) %s; %s;",
-		       vha->host_no, vha->vp_idx, cmd,
-		       cmdstate_to_str((uint8_t)cmd->state),
-		       cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
-		       aborted ? "aborted" : "not aborted");
-
 
-	qlt_abort_cmd(cmd);
+	scst_cmd_get(scst_cmd);
 
 	spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+
+	TRACE_MGMT_DBG(
+	    "sqatgt(%ld/%d): tag %lld: HW pending for too long (state %s) %s; %s",
+	    vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd),
+	    cmdstate_to_str((uint8_t)cmd->state),
+	    cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw",
+	    cmd->aborted ? "aborted" : "not aborted");
+
 	switch (cmd->state) {
 	case QLA_TGT_STATE_NEW:
 	case QLA_TGT_STATE_DATA_IN:
-		PRINT_ERROR("sqa(%ld): A command in state (%s) should not be HW pending. %s",
-			vha->host_no, cmdstate_to_str((uint8_t)cmd->state),
-			aborted ? "aborted" : "not aborted");
-		break;
+	case QLA_TGT_STATE_DONE:
+		PRINT_ERROR(
+		    "sqatgt(%ld/%d): tag %lld: A command in state (%s) should not be HW pending. %s",
+		    vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd),
+		    cmdstate_to_str((uint8_t)cmd->state),
+		    cmd->aborted ? "aborted" : "not aborted");
+		goto out_unlock;
 
 	case QLA_TGT_STATE_NEED_DATA:
-		/* the abort will nudge it out of FW */
-		TRACE_MGMT_DBG("Force rx_data cmd %p", cmd);
-		sqa_cleanup_hw_pending_cmd(vha, cmd);
-		scst_set_cmd_error(scst_cmd,
-		    SCST_LOAD_SENSE(scst_sense_internal_failure));
-		scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
-		    SCST_CONTEXT_THREAD);
-		break;
 	case QLA_TGT_STATE_PROCESSED:
-		if (!cmd->cmd_sent_to_fw)
-			PRINT_ERROR("sqa(%ld): command should not be in HW pending. It's already processed. ",
-				    vha->host_no);
-		else
-			TRACE_MGMT_DBG("Force finishing cmd %p", cmd);
-		sqa_cleanup_hw_pending_cmd(vha, cmd);
-		scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED);
-		scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD);
 		break;
 	}
+
+	/* Handle race with normal CTIO completion. */
+	if (!cmd->cmd_sent_to_fw) {
+		TRACE_MGMT_DBG(
+		    "sqatgt(%ld/%d): tag %lld: cmd not sent to fw; assuming just completed",
+		    vha->host_no, vha->vp_idx,
+		    scst_cmd_get_tag(scst_cmd));
+		goto out_unlock;
+	}
+
+	/* Check for chip reset or a timeout after sending a term exchange. */
+	if (!qpair->fw_started ||
+	    cmd->reset_count != qpair->chip_reset ||
+	    (cmd->sent_term_exchg &&
+	     time_is_before_jiffies(cmd->jiffies_at_term_exchg +
+				    SQA_MAX_HW_PENDING_TIME * HZ / 2))) {
+		/*
+		 * Timeout waiting for a response from the hw, so force the cmd
+		 * to finish without waiting any longer.  Note that this is
+		 * slightly dangerous if the hw is still using the DMA
+		 * mapping, so try term exchange first and see if that works.
+		 */
+
+		sqa_cleanup_hw_pending_cmd(vha, cmd);
+
+		qlt_unmap_sg(vha, cmd);
+
+		advance_cmd = true;
+	} else {
+		/*
+		 * We still expect a CTIO response from the hw.  Terminating the
+		 * exchange should force the CTIO response to happen sooner.
+		 */
+		if (!cmd->sent_term_exchg)
+			qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1);
+	}
+
+	if (advance_cmd) {
+		switch (cmd->state) {
+		case QLA_TGT_STATE_NEED_DATA:
+			TRACE_MGMT_DBG(
+			    "sqatgt(%ld/%d): tag %lld: force rx_data",
+			    vha->host_no, vha->vp_idx,
+			    scst_cmd_get_tag(scst_cmd));
+			cmd->state = QLA_TGT_STATE_DATA_IN;
+			scst_set_cmd_error(scst_cmd,
+			    SCST_LOAD_SENSE(scst_sense_internal_failure));
+			scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET,
+			    SCST_CONTEXT_THREAD);
+			break;
+
+		case QLA_TGT_STATE_PROCESSED:
+			TRACE_MGMT_DBG(
+			    "sqatgt(%ld/%d): tag %lld: force finishing cmd",
+			    vha->host_no, vha->vp_idx,
+			    scst_cmd_get_tag(scst_cmd));
+			scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED);
+			scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD);
+			break;
+
+		default:
+			break;
+		}
+	} else {
+		/*
+		 * Restart the timer so that this function is called again
+		 * after another timeout.  This is similar to
+		 * scst_update_hw_pending_start() except that we also set
+		 * cmd_hw_pending to 1.
+		 *
+		 * IRQs are already OFF.
+		 */
+		spin_lock(&scst_cmd->sess->sess_list_lock);
+		scst_cmd->cmd_hw_pending = 1;
+		scst_cmd->hw_pending_start = jiffies;
+		spin_unlock(&scst_cmd->sess->sess_list_lock);
+	}
+
+out_unlock:
 	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
+	scst_cmd_put(scst_cmd);
+
 	TRACE_EXIT();
 }
 
-- 
2.43.0



  reply	other threads:[~2025-09-08 18:59 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-08 18:45 [PATCH 00/15] qla2xxx target mode improvements Tony Battersby
2025-09-08 18:47 ` [PATCH 01/15] Revert "scsi: qla2xxx: Perform lockless command completion in abort path" Tony Battersby
2025-09-08 18:48 ` [PATCH 02/15] scsi: qla2xxx: fix initiator mode with qlini_mode=exclusive Tony Battersby
2025-09-08 18:50 ` [PATCH 03/15] scsi: qla2xxx: fix lost interrupts with qlini_mode=disabled Tony Battersby
2025-09-08 18:51 ` [PATCH 04/15] scsi: qla2xxx: use reinit_completion on mbx_intr_comp Tony Battersby
2025-09-08 18:53 ` [PATCH 05/15] scsi: qla2xxx: remove code for unsupported hardware Tony Battersby
2025-09-08 18:54 ` [PATCH 06/15] scsi: qla2xxx: improve debug output for term exchange Tony Battersby
2025-09-08 18:56 ` [PATCH 07/15] scsi: qla2xxx: fix term exchange when cmd_sent_to_fw == 1 Tony Battersby
2025-09-08 18:58 ` [PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort Tony Battersby
2025-09-08 18:59   ` Tony Battersby [this message]
2025-09-11 14:21   ` Dmitry Bogdanov
2025-09-24 19:41     ` Tony Battersby
2025-09-24 19:43       ` [PATCH v2 09/16] scsi: qla2xxx: fix races with aborting commands Tony Battersby
2025-09-24 19:45         ` [SCST PATCH " Tony Battersby
2025-09-25  8:42       ` [DMARC Error]Re: [PATCH 08/15] scsi: qla2xxx: fix oops during cmd abort Dmitry Bogdanov
2025-09-08 19:01 ` [PATCH 09/15] scsi: qla2xxx: improve checks in qlt_xmit_response / qlt_rdy_to_xfer Tony Battersby
2025-09-08 19:02 ` [PATCH 10/15] scsi: qla2xxx: fix TMR failure handling Tony Battersby
2025-09-12 14:36   ` Dmitry Bogdanov
2025-09-16 16:04     ` Tony Battersby
2025-09-17 13:06       ` [DMARC Error]Re: " Dmitry Bogdanov
2025-09-17 20:38         ` Tony Battersby
2025-09-08 19:04 ` [PATCH 11/15] scsi: qla2xxx: fix invalid memory access with big CDBs Tony Battersby
2025-09-08 19:05   ` [SCST PATCH " Tony Battersby
2025-09-08 19:07 ` [PATCH 12/15] scsi: qla2xxx: add cmd->rsp_sent Tony Battersby
2025-09-08 19:08   ` [SCST PATCH " Tony Battersby
2025-09-15 13:47   ` [PATCH " Dmitry Bogdanov
2025-09-24 20:04     ` Tony Battersby
2025-09-08 19:09 ` [PATCH 13/15] scsi: qla2xxx: improve cmd logging Tony Battersby
2025-09-08 19:10 ` [PATCH 14/15] scsi: qla2xxx: add back SRR support Tony Battersby
2025-09-08 19:11   ` [SCST PATCH " Tony Battersby
2025-09-25 12:49   ` [PATCH " Xose Vazquez Perez
2025-09-25 15:30     ` Xose Vazquez Perez
2025-09-25 16:04       ` Tony Battersby
2025-09-25 17:00         ` Tony Battersby
2025-09-25 19:30           ` Tony Battersby
2025-09-08 19:13 ` [PATCH 15/15] scsi: qla2xxx: improve safety of cmd lookup by handle Tony Battersby
2025-09-08 19:14 ` [SCST PATCH] qla2x00t-32gbit: add on_abort_cmd callback Tony Battersby

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fa3d6bb5-eea0-4cc3-86c2-93874f439b42@cybernetics.com \
    --to=tonyb@cybernetics.com \
    --cc=GR-QLogic-Storage-Upstream@marvell.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=njavali@marvell.com \
    --cc=scst-devel@lists.sourceforge.net \
    --cc=target-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox