linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out
@ 2025-06-16 16:26 Karan Tilak Kumar
  2025-06-16 16:26 ` [PATCH v5 2/4] scsi: fnic: Turn off FDMI ACTIVE flags on link down Karan Tilak Kumar
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Karan Tilak Kumar @ 2025-06-16 16:26 UTC (permalink / raw)
  To: sebaddel
  Cc: arulponn, djhawar, gcboffa, mkai2, satishkh, aeasi, jejb,
	martin.petersen, linux-scsi, linux-kernel, jmeneghi, revers,
	dan.carpenter, Karan Tilak Kumar, stable

When both the RHBA and RPA FDMI requests time out, fnic reuses a frame
to send ABTS for each of them. On send completion, this causes an
attempt to free the same frame twice that leads to a crash.

Fix crash by allocating separate frames for RHBA and RPA,
and modify ABTS logic accordingly.

Tested by checking MDS for FDMI information.
Tested by using instrumented driver to:
Drop PLOGI response
Drop RHBA response
Drop RPA response
Drop RHBA and RPA response
Drop PLOGI response + ABTS response
Drop RHBA response + ABTS response
Drop RPA response + ABTS response
Drop RHBA and RPA response + ABTS response for both of them

Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI")
Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
Tested-by: Arun Easi <aeasi@cisco.com>
Co-developed-by: Arun Easi <aeasi@cisco.com>
Signed-off-by: Arun Easi <aeasi@cisco.com>
Tested-by: Karan Tilak Kumar <kartilak@cisco.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
---
Changes between v4 and v5:
    - Incorporate review comments from John:
	- Refactor patches

Changes between v3 and v4:
    - Incorporate review comments from Dan:
	- Remove comments from Cc tag

Changes between v2 and v3:
    - Incorporate review comments from Dan:
	- Add Cc to stable

Changes between v1 and v2:
    - Incorporate review comments from Dan:
        - Add Fixes tag
---
 drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++---------
 drivers/scsi/fnic/fnic.h      |   2 +-
 drivers/scsi/fnic/fnic_fdls.h |   1 +
 3 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
index c2b6f4eb338e..0ee1b74967b9 100644
--- a/drivers/scsi/fnic/fdls_disc.c
+++ b/drivers/scsi/fnic/fdls_disc.c
@@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
 	iport->fabric.timer_pending = 1;
 }
 
-static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
+		uint16_t oxid)
 {
-	uint8_t *frame;
+	struct fc_frame_header *pfdmi_abts;
 	uint8_t d_id[3];
+	uint8_t *frame;
 	struct fnic *fnic = iport->fnic;
-	struct fc_frame_header *pfabric_abts;
-	unsigned long fdmi_tov;
-	uint16_t oxid;
-	uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
-			sizeof(struct fc_frame_header);
 
 	frame = fdls_alloc_frame(iport);
 	if (frame == NULL) {
 		FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
 				"Failed to allocate frame to send FDMI ABTS");
-		return;
+		return NULL;
 	}
 
-	pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
+	pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
 	fdls_init_fabric_abts_frame(frame, iport);
 
 	hton24(d_id, FC_FID_MGMT_SERV);
-	FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
+	FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
+	FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
+
+	return frame;
+}
+
+static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
+{
+	uint8_t *frame;
+	unsigned long fdmi_tov;
+	uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
+			sizeof(struct fc_frame_header);
 
 	if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
-		oxid = iport->active_oxid_fdmi_plogi;
-		FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+		frame = fdls_alloc_init_fdmi_abts_frame(iport,
+						iport->active_oxid_fdmi_plogi);
+		if (frame == NULL)
+			return;
+
 		fnic_send_fcoe_frame(iport, frame, frame_size);
 	} else {
 		if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
-			oxid = iport->active_oxid_fdmi_rhba;
-			FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+			frame = fdls_alloc_init_fdmi_abts_frame(iport,
+						iport->active_oxid_fdmi_rhba);
+			if (frame == NULL)
+				return;
+
 			fnic_send_fcoe_frame(iport, frame, frame_size);
 		}
 		if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
-			oxid = iport->active_oxid_fdmi_rpa;
-			FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
+			frame = fdls_alloc_init_fdmi_abts_frame(iport,
+						iport->active_oxid_fdmi_rpa);
+			if (frame == NULL) {
+				if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
+					goto arm_timer;
+				else
+					return;
+			}
+
 			fnic_send_fcoe_frame(iport, frame, frame_size);
 		}
 	}
 
+arm_timer:
 	fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
 	mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
 	iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
@@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
 	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 }
 
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
+{
+	struct fnic *fnic = iport->fnic;
+
+	iport->fabric.fdmi_pending = 0;
+	/* If max retries not exhausted, start over from fdmi plogi */
+	if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
+		iport->fabric.fdmi_retry++;
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+					 "Retry FDMI PLOGI. FDMI retry: %d",
+					 iport->fabric.fdmi_retry);
+		fdls_send_fdmi_plogi(iport);
+	}
+}
+
 void fdls_fdmi_timer_callback(struct timer_list *t)
 {
 	struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer);
@@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
 		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
 
-	iport->fabric.fdmi_pending = 0;
-	/* If max retries not exhaused, start over from fdmi plogi */
-	if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
-		iport->fabric.fdmi_retry++;
-		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-					 "retry fdmi timer %d", iport->fabric.fdmi_retry);
-		fdls_send_fdmi_plogi(iport);
-	}
+	fdls_fdmi_retry_plogi(iport);
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
 		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
 	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
@@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
 	switch (FNIC_FRAME_TYPE(oxid)) {
 	case FNIC_FRAME_TYPE_FDMI_PLOGI:
 		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
+
+		iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
+		iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
 		break;
 	case FNIC_FRAME_TYPE_FDMI_RHBA:
+		iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
+
+		/* If RPA is still pending, don't turn off ABORT PENDING.
+		 * We count on the timer to detect the ABTS timeout and take
+		 * corrective action.
+		 */
+		if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
+			iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
 		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
 		break;
 	case FNIC_FRAME_TYPE_FDMI_RPA:
+		iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
+
+		/* If RHBA is still pending, don't turn off ABORT PENDING.
+		 * We count on the timer to detect the ABTS timeout and take
+		 * corrective action.
+		 */
+		if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
+			iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+
 		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
 		break;
 	default:
@@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
 		break;
 	}
 
-	timer_delete_sync(&iport->fabric.fdmi_timer);
-	iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
-
-	fdls_send_fdmi_plogi(iport);
+	/*
+	 * Only if ABORT PENDING is off, delete the timer, and if no other
+	 * operations are pending, retry FDMI.
+	 * Otherwise, let the timer pop and take the appropriate action.
+	 */
+	if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
+		timer_delete_sync(&iport->fabric.fdmi_timer);
+		if (!iport->fabric.fdmi_pending)
+			fdls_fdmi_retry_plogi(iport);
+	}
 }
 
 static void
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 6c5f6046b1f5..86e293ce530d 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -30,7 +30,7 @@
 
 #define DRV_NAME		"fnic"
 #define DRV_DESCRIPTION		"Cisco FCoE HBA Driver"
-#define DRV_VERSION		"1.8.0.0"
+#define DRV_VERSION		"1.8.0.1"
 #define PFX			DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h
index 8e610b65ad57..531d0b37e450 100644
--- a/drivers/scsi/fnic/fnic_fdls.h
+++ b/drivers/scsi/fnic/fnic_fdls.h
@@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
 bool fdls_delete_tport(struct fnic_iport_s *iport,
 		       struct fnic_tport_s *tport);
 void fdls_fdmi_timer_callback(struct timer_list *t);
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
 
 /* fnic_fcs.c */
 void fnic_fdls_init(struct fnic *fnic, int usefip);
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v5 2/4] scsi: fnic: Turn off FDMI ACTIVE flags on link down
  2025-06-16 16:26 [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Karan Tilak Kumar
@ 2025-06-16 16:26 ` Karan Tilak Kumar
  2025-06-16 16:26 ` [PATCH v5 3/4] scsi: fnic: Add and improve logs in FDMI and FDMI ABTS paths Karan Tilak Kumar
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Karan Tilak Kumar @ 2025-06-16 16:26 UTC (permalink / raw)
  To: sebaddel
  Cc: arulponn, djhawar, gcboffa, mkai2, satishkh, aeasi, jejb,
	martin.petersen, linux-scsi, linux-kernel, jmeneghi, revers,
	dan.carpenter, Karan Tilak Kumar, stable

When the link goes down and comes up, FDMI requests are not sent out
anymore.
Fix bug by turning off FNIC_FDMI_ACTIVE when the link goes down.

Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI")
Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
Reviewed-by: Arun Easi <aeasi@cisco.com>
Tested-by: Karan Tilak Kumar <kartilak@cisco.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
---
Changes between v4 and v5:
    - Incorporate review comments from John:
	- Refactor patches
	- Increment driver version number

Changes between v3 and v4:
    - Incorporate review comments from Dan:
	- Remove comments from Cc tag

Changes between v2 and v3:
    - Incorporate review comments from Dan:
	- Add Cc to stable

Changes between v1 and v2:
    - Incorporate review comments from Dan:
	- Add Fixes tag
---
 drivers/scsi/fnic/fdls_disc.c | 9 ++++++---
 drivers/scsi/fnic/fnic.h      | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
index 0ee1b74967b9..146744ca97c2 100644
--- a/drivers/scsi/fnic/fdls_disc.c
+++ b/drivers/scsi/fnic/fdls_disc.c
@@ -5027,9 +5027,12 @@ void fnic_fdls_link_down(struct fnic_iport_s *iport)
 		fdls_delete_tport(iport, tport);
 	}
 
-	if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) {
-		timer_delete_sync(&iport->fabric.fdmi_timer);
-		iport->fabric.fdmi_pending = 0;
+	if (fnic_fdmi_support == 1) {
+		if (iport->fabric.fdmi_pending > 0) {
+			timer_delete_sync(&iport->fabric.fdmi_timer);
+			iport->fabric.fdmi_pending = 0;
+		}
+		iport->flags &= ~FNIC_FDMI_ACTIVE;
 	}
 
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 86e293ce530d..c2fdc6553e62 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -30,7 +30,7 @@
 
 #define DRV_NAME		"fnic"
 #define DRV_DESCRIPTION		"Cisco FCoE HBA Driver"
-#define DRV_VERSION		"1.8.0.1"
+#define DRV_VERSION		"1.8.0.2"
 #define PFX			DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v5 3/4] scsi: fnic: Add and improve logs in FDMI and FDMI ABTS paths
  2025-06-16 16:26 [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Karan Tilak Kumar
  2025-06-16 16:26 ` [PATCH v5 2/4] scsi: fnic: Turn off FDMI ACTIVE flags on link down Karan Tilak Kumar
@ 2025-06-16 16:26 ` Karan Tilak Kumar
  2025-06-16 16:26 ` [PATCH v5 4/4] scsi: fnic: Set appropriate logging level for log message Karan Tilak Kumar
  2025-07-08  8:54 ` [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Hannes Reinecke
  3 siblings, 0 replies; 6+ messages in thread
From: Karan Tilak Kumar @ 2025-06-16 16:26 UTC (permalink / raw)
  To: sebaddel
  Cc: arulponn, djhawar, gcboffa, mkai2, satishkh, aeasi, jejb,
	martin.petersen, linux-scsi, linux-kernel, jmeneghi, revers,
	dan.carpenter, Karan Tilak Kumar

Add logs in FDMI and FDMI ABTS paths.
Modify log text in these paths.

Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
Reviewed-by: Arun Easi <aeasi@cisco.com>
Reviewed-by: John Meneghini <jmeneghi@redhat.com>
Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
---
 drivers/scsi/fnic/fdls_disc.c | 65 +++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
index 146744ca97c2..14691db4d5f9 100644
--- a/drivers/scsi/fnic/fdls_disc.c
+++ b/drivers/scsi/fnic/fdls_disc.c
@@ -791,6 +791,7 @@ static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
 static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
 {
 	uint8_t *frame;
+	struct fnic *fnic = iport->fnic;
 	unsigned long fdmi_tov;
 	uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
 			sizeof(struct fc_frame_header);
@@ -801,6 +802,9 @@ static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
 		if (frame == NULL)
 			return;
 
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: FDLS send FDMI PLOGI abts. iport->fabric.state: %d oxid: 0x%x",
+			 iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_plogi);
 		fnic_send_fcoe_frame(iport, frame, frame_size);
 	} else {
 		if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
@@ -809,6 +813,9 @@ static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
 			if (frame == NULL)
 				return;
 
+			FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+				 "0x%x: FDLS send FDMI RHBA abts. iport->fabric.state: %d oxid: 0x%x",
+				 iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rhba);
 			fnic_send_fcoe_frame(iport, frame, frame_size);
 		}
 		if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
@@ -821,6 +828,9 @@ static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
 					return;
 			}
 
+			FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+				 "0x%x: FDLS send FDMI RPA abts. iport->fabric.state: %d oxid: 0x%x",
+				 iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rpa);
 			fnic_send_fcoe_frame(iport, frame, frame_size);
 		}
 	}
@@ -829,6 +839,10 @@ static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
 	fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
 	mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
 	iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
+
+	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+		 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+		 iport->fcid, iport->fabric.fdmi_pending);
 }
 
 static void fdls_send_fabric_flogi(struct fnic_iport_s *iport)
@@ -2292,7 +2306,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
 	spin_lock_irqsave(&fnic->fnic_lock, flags);
 
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+		"iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
 
 	if (!iport->fabric.fdmi_pending) {
 		/* timer expired after fdmi responses received. */
@@ -2300,7 +2314,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
 		return;
 	}
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+		"iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
 
 	/* if not abort pending, send an abort */
 	if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
@@ -2309,26 +2323,37 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
 		return;
 	}
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+		"iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
 
 	/* ABTS pending for an active fdmi request that is pending.
 	 * That means FDMI ABTS timed out
 	 * Schedule to free the OXID after 2*r_a_tov and proceed
 	 */
 	if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			"FDMI PLOGI ABTS timed out. Schedule oxid free: 0x%x\n",
+			iport->active_oxid_fdmi_plogi);
 		fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_plogi);
 	} else {
-		if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
+		if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
+			FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+						"FDMI RHBA ABTS timed out. Schedule oxid free: 0x%x\n",
+						iport->active_oxid_fdmi_rhba);
 			fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rhba);
-		if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING)
+		}
+		if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
+			FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+						"FDMI RPA ABTS timed out. Schedule oxid free: 0x%x\n",
+						iport->active_oxid_fdmi_rpa);
 			fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rpa);
+		}
 	}
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+		"iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
 
 	fdls_fdmi_retry_plogi(iport);
 	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
+		"iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending);
 	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 }
 
@@ -3743,12 +3768,26 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
 
 	switch (FNIC_FRAME_TYPE(oxid)) {
 	case FNIC_FRAME_TYPE_FDMI_PLOGI:
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			"Received FDMI PLOGI ABTS rsp with oxid: 0x%x", oxid);
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+			 iport->fcid, iport->fabric.fdmi_pending);
 		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
 
 		iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
 		iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+			 iport->fcid, iport->fabric.fdmi_pending);
 		break;
 	case FNIC_FRAME_TYPE_FDMI_RHBA:
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			"Received FDMI RHBA ABTS rsp with oxid: 0x%x", oxid);
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+			 iport->fcid, iport->fabric.fdmi_pending);
+
 		iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
 
 		/* If RPA is still pending, don't turn off ABORT PENDING.
@@ -3759,8 +3798,17 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
 			iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
 
 		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+			 iport->fcid, iport->fabric.fdmi_pending);
 		break;
 	case FNIC_FRAME_TYPE_FDMI_RPA:
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			"Received FDMI RPA ABTS rsp with oxid: 0x%x", oxid);
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+			 iport->fcid, iport->fabric.fdmi_pending);
+
 		iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
 
 		/* If RHBA is still pending, don't turn off ABORT PENDING.
@@ -3771,6 +3819,9 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
 			iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
 
 		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
+		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+			 "0x%x: iport->fabric.fdmi_pending: 0x%x",
+			 iport->fcid, iport->fabric.fdmi_pending);
 		break;
 	default:
 		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v5 4/4] scsi: fnic: Set appropriate logging level for log message
  2025-06-16 16:26 [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Karan Tilak Kumar
  2025-06-16 16:26 ` [PATCH v5 2/4] scsi: fnic: Turn off FDMI ACTIVE flags on link down Karan Tilak Kumar
  2025-06-16 16:26 ` [PATCH v5 3/4] scsi: fnic: Add and improve logs in FDMI and FDMI ABTS paths Karan Tilak Kumar
@ 2025-06-16 16:26 ` Karan Tilak Kumar
  2025-07-08  8:54 ` [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Hannes Reinecke
  3 siblings, 0 replies; 6+ messages in thread
From: Karan Tilak Kumar @ 2025-06-16 16:26 UTC (permalink / raw)
  To: sebaddel
  Cc: arulponn, djhawar, gcboffa, mkai2, satishkh, aeasi, jejb,
	martin.petersen, linux-scsi, linux-kernel, jmeneghi, revers,
	dan.carpenter, Karan Tilak Kumar

Replace KERN_INFO with KERN_DEBUG for a log message.

Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
Reviewed-by: Arun Easi <aeasi@cisco.com>
Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
---
Changes between v4 and v5:
    - Incorporate review comments from John:
	- Refactor patches
---
 drivers/scsi/fnic/fnic_scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 7133b254cbe4..75b29a018d1f 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1046,7 +1046,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, unsigned int cq_ind
 		if (icmnd_cmpl->scsi_status == SAM_STAT_TASK_SET_FULL)
 			atomic64_inc(&fnic_stats->misc_stats.queue_fulls);
 
-		FNIC_SCSI_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->host, fnic->fnic_num,
 				"xfer_len: %llu", xfer_len);
 		break;
 
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out
  2025-06-16 16:26 [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Karan Tilak Kumar
                   ` (2 preceding siblings ...)
  2025-06-16 16:26 ` [PATCH v5 4/4] scsi: fnic: Set appropriate logging level for log message Karan Tilak Kumar
@ 2025-07-08  8:54 ` Hannes Reinecke
  2025-07-08 17:01   ` Karan Tilak Kumar (kartilak)
  3 siblings, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2025-07-08  8:54 UTC (permalink / raw)
  To: Karan Tilak Kumar, sebaddel
  Cc: arulponn, djhawar, gcboffa, mkai2, satishkh, aeasi, jejb,
	martin.petersen, linux-scsi, linux-kernel, jmeneghi, revers,
	dan.carpenter, stable

On 6/16/25 18:26, Karan Tilak Kumar wrote:
> When both the RHBA and RPA FDMI requests time out, fnic reuses a frame
> to send ABTS for each of them. On send completion, this causes an
> attempt to free the same frame twice that leads to a crash.
> 
> Fix crash by allocating separate frames for RHBA and RPA,
> and modify ABTS logic accordingly.
> 
> Tested by checking MDS for FDMI information.
> Tested by using instrumented driver to:
> Drop PLOGI response
> Drop RHBA response
> Drop RPA response
> Drop RHBA and RPA response
> Drop PLOGI response + ABTS response
> Drop RHBA response + ABTS response
> Drop RPA response + ABTS response
> Drop RHBA and RPA response + ABTS response for both of them
> 
> Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI")
> Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
> Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
> Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
> Tested-by: Arun Easi <aeasi@cisco.com>
> Co-developed-by: Arun Easi <aeasi@cisco.com>
> Signed-off-by: Arun Easi <aeasi@cisco.com>
> Tested-by: Karan Tilak Kumar <kartilak@cisco.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
> ---
> Changes between v4 and v5:
>      - Incorporate review comments from John:
> 	- Refactor patches
> 
> Changes between v3 and v4:
>      - Incorporate review comments from Dan:
> 	- Remove comments from Cc tag
> 
> Changes between v2 and v3:
>      - Incorporate review comments from Dan:
> 	- Add Cc to stable
> 
> Changes between v1 and v2:
>      - Incorporate review comments from Dan:
>          - Add Fixes tag
> ---
>   drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++---------
>   drivers/scsi/fnic/fnic.h      |   2 +-
>   drivers/scsi/fnic/fnic_fdls.h |   1 +
>   3 files changed, 87 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
> index c2b6f4eb338e..0ee1b74967b9 100644
> --- a/drivers/scsi/fnic/fdls_disc.c
> +++ b/drivers/scsi/fnic/fdls_disc.c
> @@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
>   	iport->fabric.timer_pending = 1;
>   }
>   
> -static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
> +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
> +		uint16_t oxid)
>   {
> -	uint8_t *frame;
> +	struct fc_frame_header *pfdmi_abts;
>   	uint8_t d_id[3];
> +	uint8_t *frame;
>   	struct fnic *fnic = iport->fnic;
> -	struct fc_frame_header *pfabric_abts;
> -	unsigned long fdmi_tov;
> -	uint16_t oxid;
> -	uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
> -			sizeof(struct fc_frame_header);
>   
>   	frame = fdls_alloc_frame(iport);
>   	if (frame == NULL) {
>   		FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
>   				"Failed to allocate frame to send FDMI ABTS");
> -		return;
> +		return NULL;
>   	}
>   
> -	pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
> +	pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
>   	fdls_init_fabric_abts_frame(frame, iport);
>   
>   	hton24(d_id, FC_FID_MGMT_SERV);
> -	FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
> +	FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
> +	FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
> +
> +	return frame;
> +}
> +
> +static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
> +{
> +	uint8_t *frame;
> +	unsigned long fdmi_tov;
> +	uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
> +			sizeof(struct fc_frame_header);
>   
>   	if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
> -		oxid = iport->active_oxid_fdmi_plogi;
> -		FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> +		frame = fdls_alloc_init_fdmi_abts_frame(iport,
> +						iport->active_oxid_fdmi_plogi);
> +		if (frame == NULL)
> +			return;
> +
>   		fnic_send_fcoe_frame(iport, frame, frame_size);
>   	} else {
>   		if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
> -			oxid = iport->active_oxid_fdmi_rhba;
> -			FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> +			frame = fdls_alloc_init_fdmi_abts_frame(iport,
> +						iport->active_oxid_fdmi_rhba);
> +			if (frame == NULL)
> +				return;
> +
>   			fnic_send_fcoe_frame(iport, frame, frame_size);
>   		}
>   		if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
> -			oxid = iport->active_oxid_fdmi_rpa;
> -			FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> +			frame = fdls_alloc_init_fdmi_abts_frame(iport,
> +						iport->active_oxid_fdmi_rpa);
> +			if (frame == NULL) {
> +				if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
> +					goto arm_timer;
> +				else
> +					return;
> +			}
> +
>   			fnic_send_fcoe_frame(iport, frame, frame_size);
>   		}
>   	}
>   
> +arm_timer:
>   	fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
>   	mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
>   	iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
> @@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
>   	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
>   }
>   
> +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
> +{
> +	struct fnic *fnic = iport->fnic;
> +
> +	iport->fabric.fdmi_pending = 0;
> +	/* If max retries not exhausted, start over from fdmi plogi */
> +	if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
> +		iport->fabric.fdmi_retry++;
> +		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> +					 "Retry FDMI PLOGI. FDMI retry: %d",
> +					 iport->fabric.fdmi_retry);
> +		fdls_send_fdmi_plogi(iport);
> +	}
> +}
> +
>   void fdls_fdmi_timer_callback(struct timer_list *t)
>   {
>   	struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer);
> @@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
>   	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
>   		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
>   
> -	iport->fabric.fdmi_pending = 0;
> -	/* If max retries not exhaused, start over from fdmi plogi */
> -	if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
> -		iport->fabric.fdmi_retry++;
> -		FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> -					 "retry fdmi timer %d", iport->fabric.fdmi_retry);
> -		fdls_send_fdmi_plogi(iport);
> -	}
> +	fdls_fdmi_retry_plogi(iport);
>   	FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
>   		"fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
>   	spin_unlock_irqrestore(&fnic->fnic_lock, flags);
> @@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
>   	switch (FNIC_FRAME_TYPE(oxid)) {
>   	case FNIC_FRAME_TYPE_FDMI_PLOGI:
>   		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
> +
> +		iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
> +		iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
>   		break;
>   	case FNIC_FRAME_TYPE_FDMI_RHBA:
> +		iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
> +
> +		/* If RPA is still pending, don't turn off ABORT PENDING.
> +		 * We count on the timer to detect the ABTS timeout and take
> +		 * corrective action.
> +		 */
> +		if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
> +			iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> +
>   		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
>   		break;
>   	case FNIC_FRAME_TYPE_FDMI_RPA:
> +		iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
> +
> +		/* If RHBA is still pending, don't turn off ABORT PENDING.
> +		 * We count on the timer to detect the ABTS timeout and take
> +		 * corrective action.
> +		 */
> +		if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
> +			iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> +
>   		fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
>   		break;
>   	default:
> @@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
>   		break;
>   	}
>   
> -	timer_delete_sync(&iport->fabric.fdmi_timer);
> -	iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> -
> -	fdls_send_fdmi_plogi(iport);
> +	/*
> +	 * Only if ABORT PENDING is off, delete the timer, and if no other
> +	 * operations are pending, retry FDMI.
> +	 * Otherwise, let the timer pop and take the appropriate action.
> +	 */
> +	if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
> +		timer_delete_sync(&iport->fabric.fdmi_timer);
> +		if (!iport->fabric.fdmi_pending)
> +			fdls_fdmi_retry_plogi(iport);
> +	}
>   }
>   
>   static void
> diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
> index 6c5f6046b1f5..86e293ce530d 100644
> --- a/drivers/scsi/fnic/fnic.h
> +++ b/drivers/scsi/fnic/fnic.h
> @@ -30,7 +30,7 @@
>   
>   #define DRV_NAME		"fnic"
>   #define DRV_DESCRIPTION		"Cisco FCoE HBA Driver"
> -#define DRV_VERSION		"1.8.0.0"
> +#define DRV_VERSION		"1.8.0.1"
>   #define PFX			DRV_NAME ": "
>   #define DFX                     DRV_NAME "%d: "
>   
> diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h
> index 8e610b65ad57..531d0b37e450 100644
> --- a/drivers/scsi/fnic/fnic_fdls.h
> +++ b/drivers/scsi/fnic/fnic_fdls.h
> @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
>   bool fdls_delete_tport(struct fnic_iport_s *iport,
>   		       struct fnic_tport_s *tport);
>   void fdls_fdmi_timer_callback(struct timer_list *t);
> +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
>   
>   /* fnic_fcs.c */
>   void fnic_fdls_init(struct fnic *fnic, int usefip);

Please make the version change a separate patch and add it as the last 
patch in the series. That way you'll have better tracking if all patches
from that series are applied.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out
  2025-07-08  8:54 ` [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Hannes Reinecke
@ 2025-07-08 17:01   ` Karan Tilak Kumar (kartilak)
  0 siblings, 0 replies; 6+ messages in thread
From: Karan Tilak Kumar (kartilak) @ 2025-07-08 17:01 UTC (permalink / raw)
  To: Hannes Reinecke, Sesidhar Baddela (sebaddel)
  Cc: Arulprabhu Ponnusamy (arulponn), Dhanraj Jhawar (djhawar),
	Gian Carlo Boffa (gcboffa), Masa Kai (mkai2),
	Satish Kharat (satishkh), Arun Easi (aeasi), jejb@linux.ibm.com,
	martin.petersen@oracle.com, linux-scsi@vger.kernel.org,
	linux-kernel@vger.kernel.org, jmeneghi@redhat.com,
	revers@redhat.com, dan.carpenter@linaro.org,
	stable@vger.kernel.org

On Tuesday, July 8, 2025 1:54 AM, Hannes Reinecke <hare@suse.de> wrote:
>
> On 6/16/25 18:26, Karan Tilak Kumar wrote:
> > When both the RHBA and RPA FDMI requests time out, fnic reuses a frame
> > to send ABTS for each of them. On send completion, this causes an
> > attempt to free the same frame twice that leads to a crash.
> >
> > Fix crash by allocating separate frames for RHBA and RPA,
> > and modify ABTS logic accordingly.
> >
> > Tested by checking MDS for FDMI information.
> > Tested by using instrumented driver to:
> > Drop PLOGI response
> > Drop RHBA response
> > Drop RPA response
> > Drop RHBA and RPA response
> > Drop PLOGI response + ABTS response
> > Drop RHBA response + ABTS response
> > Drop RPA response + ABTS response
> > Drop RHBA and RPA response + ABTS response for both of them
> >
> > Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI")
> > Reviewed-by: Sesidhar Baddela <sebaddel@cisco.com>
> > Reviewed-by: Arulprabhu Ponnusamy <arulponn@cisco.com>
> > Reviewed-by: Gian Carlo Boffa <gcboffa@cisco.com>
> > Tested-by: Arun Easi <aeasi@cisco.com>
> > Co-developed-by: Arun Easi <aeasi@cisco.com>
> > Signed-off-by: Arun Easi <aeasi@cisco.com>
> > Tested-by: Karan Tilak Kumar <kartilak@cisco.com>
> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Karan Tilak Kumar <kartilak@cisco.com>
> > ---
> > Changes between v4 and v5:
> >      - Incorporate review comments from John:
> >     - Refactor patches
> >
> > Changes between v3 and v4:
> >      - Incorporate review comments from Dan:
> >     - Remove comments from Cc tag
> >
> > Changes between v2 and v3:
> >      - Incorporate review comments from Dan:
> >     - Add Cc to stable
> >
> > Changes between v1 and v2:
> >      - Incorporate review comments from Dan:
> >          - Add Fixes tag
> > ---
> >   drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++---------
> >   drivers/scsi/fnic/fnic.h      |   2 +-
> >   drivers/scsi/fnic/fnic_fdls.h |   1 +
> >   3 files changed, 87 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c
> > index c2b6f4eb338e..0ee1b74967b9 100644
> > --- a/drivers/scsi/fnic/fdls_disc.c
> > +++ b/drivers/scsi/fnic/fdls_disc.c
> > @@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport)
> >     iport->fabric.timer_pending = 1;
> >   }
> >
> > -static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
> > +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
> > +           uint16_t oxid)
> >   {
> > -   uint8_t *frame;
> > +   struct fc_frame_header *pfdmi_abts;
> >     uint8_t d_id[3];
> > +   uint8_t *frame;
> >     struct fnic *fnic = iport->fnic;
> > -   struct fc_frame_header *pfabric_abts;
> > -   unsigned long fdmi_tov;
> > -   uint16_t oxid;
> > -   uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
> > -                   sizeof(struct fc_frame_header);
> >
> >     frame = fdls_alloc_frame(iport);
> >     if (frame == NULL) {
> >             FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num,
> >                             "Failed to allocate frame to send FDMI ABTS");
> > -           return;
> > +           return NULL;
> >     }
> >
> > -   pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
> > +   pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
> >     fdls_init_fabric_abts_frame(frame, iport);
> >
> >     hton24(d_id, FC_FID_MGMT_SERV);
> > -   FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
> > +   FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
> > +   FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
> > +
> > +   return frame;
> > +}
> > +
> > +static void fdls_send_fdmi_abts(struct fnic_iport_s *iport)
> > +{
> > +   uint8_t *frame;
> > +   unsigned long fdmi_tov;
> > +   uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
> > +                   sizeof(struct fc_frame_header);
> >
> >     if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
> > -           oxid = iport->active_oxid_fdmi_plogi;
> > -           FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> > +           frame = fdls_alloc_init_fdmi_abts_frame(iport,
> > +                                           iport->active_oxid_fdmi_plogi);
> > +           if (frame == NULL)
> > +                   return;
> > +
> >             fnic_send_fcoe_frame(iport, frame, frame_size);
> >     } else {
> >             if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {
> > -                   oxid = iport->active_oxid_fdmi_rhba;
> > -                   FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> > +                   frame = fdls_alloc_init_fdmi_abts_frame(iport,
> > +                                           iport->active_oxid_fdmi_rhba);
> > +                   if (frame == NULL)
> > +                           return;
> > +
> >                     fnic_send_fcoe_frame(iport, frame, frame_size);
> >             }
> >             if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
> > -                   oxid = iport->active_oxid_fdmi_rpa;
> > -                   FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
> > +                   frame = fdls_alloc_init_fdmi_abts_frame(iport,
> > +                                           iport->active_oxid_fdmi_rpa);
> > +                   if (frame == NULL) {
> > +                           if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
> > +                                   goto arm_timer;
> > +                           else
> > +                                   return;
> > +                   }
> > +
> >                     fnic_send_fcoe_frame(iport, frame, frame_size);
> >             }
> >     }
> >
> > +arm_timer:
> >     fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov);
> >     mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov));
> >     iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING;
> > @@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct timer_list *t)
> >     spin_unlock_irqrestore(&fnic->fnic_lock, flags);
> >   }
> >
> > +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport)
> > +{
> > +   struct fnic *fnic = iport->fnic;
> > +
> > +   iport->fabric.fdmi_pending = 0;
> > +   /* If max retries not exhausted, start over from fdmi plogi */
> > +   if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
> > +           iport->fabric.fdmi_retry++;
> > +           FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> > +                                    "Retry FDMI PLOGI. FDMI retry: %d",
> > +                                    iport->fabric.fdmi_retry);
> > +           fdls_send_fdmi_plogi(iport);
> > +   }
> > +}
> > +
> >   void fdls_fdmi_timer_callback(struct timer_list *t)
> >   {
> >     struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer);
> > @@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t)
> >     FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> >             "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
> >
> > -   iport->fabric.fdmi_pending = 0;
> > -   /* If max retries not exhaused, start over from fdmi plogi */
> > -   if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
> > -           iport->fabric.fdmi_retry++;
> > -           FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> > -                                    "retry fdmi timer %d", iport->fabric.fdmi_retry);
> > -           fdls_send_fdmi_plogi(iport);
> > -   }
> > +   fdls_fdmi_retry_plogi(iport);
> >     FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
> >             "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
> >     spin_unlock_irqrestore(&fnic->fnic_lock, flags);
> > @@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
> >     switch (FNIC_FRAME_TYPE(oxid)) {
> >     case FNIC_FRAME_TYPE_FDMI_PLOGI:
> >             fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
> > +
> > +           iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
> > +           iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> >             break;
> >     case FNIC_FRAME_TYPE_FDMI_RHBA:
> > +           iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
> > +
> > +           /* If RPA is still pending, don't turn off ABORT PENDING.
> > +            * We count on the timer to detect the ABTS timeout and take
> > +            * corrective action.
> > +            */
> > +           if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
> > +                   iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> > +
> >             fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba);
> >             break;
> >     case FNIC_FRAME_TYPE_FDMI_RPA:
> > +           iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
> > +
> > +           /* If RHBA is still pending, don't turn off ABORT PENDING.
> > +            * We count on the timer to detect the ABTS timeout and take
> > +            * corrective action.
> > +            */
> > +           if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
> > +                   iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> > +
> >             fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa);
> >             break;
> >     default:
> > @@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport,
> >             break;
> >     }
> >
> > -   timer_delete_sync(&iport->fabric.fdmi_timer);
> > -   iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
> > -
> > -   fdls_send_fdmi_plogi(iport);
> > +   /*
> > +    * Only if ABORT PENDING is off, delete the timer, and if no other
> > +    * operations are pending, retry FDMI.
> > +    * Otherwise, let the timer pop and take the appropriate action.
> > +    */
> > +   if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
> > +           timer_delete_sync(&iport->fabric.fdmi_timer);
> > +           if (!iport->fabric.fdmi_pending)
> > +                   fdls_fdmi_retry_plogi(iport);
> > +   }
> >   }
> >
> >   static void
> > diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
> > index 6c5f6046b1f5..86e293ce530d 100644
> > --- a/drivers/scsi/fnic/fnic.h
> > +++ b/drivers/scsi/fnic/fnic.h
> > @@ -30,7 +30,7 @@
> >
> >   #define DRV_NAME          "fnic"
> >   #define DRV_DESCRIPTION           "Cisco FCoE HBA Driver"
> > -#define DRV_VERSION                "1.8.0.0"
> > +#define DRV_VERSION                "1.8.0.1"
> >   #define PFX                       DRV_NAME ": "
> >   #define DFX                     DRV_NAME "%d: "
> >
> > diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h
> > index 8e610b65ad57..531d0b37e450 100644
> > --- a/drivers/scsi/fnic/fnic_fdls.h
> > +++ b/drivers/scsi/fnic/fnic_fdls.h
> > @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport,
> >   bool fdls_delete_tport(struct fnic_iport_s *iport,
> >                    struct fnic_tport_s *tport);
> >   void fdls_fdmi_timer_callback(struct timer_list *t);
> > +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
> >
> >   /* fnic_fcs.c */
> >   void fnic_fdls_init(struct fnic *fnic, int usefip);
>
> Please make the version change a separate patch and add it as the last
> patch in the series. That way you'll have better tracking if all patches
> from that series are applied.
>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                  Kernel Storage Architect
> hare@suse.de                                +49 911 74053 688
> SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
> HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
>

Thanks for your review and feedback Hannes.
Yes, that was the format with the prior versions of the patches.
However, we received feedback that since two of those patches were critical fixes,
and the others were debug enhancements, that we make some modifications to this format.
That's why we went along with this approach.

Regards,
Karan

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-07-08 17:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-16 16:26 [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Karan Tilak Kumar
2025-06-16 16:26 ` [PATCH v5 2/4] scsi: fnic: Turn off FDMI ACTIVE flags on link down Karan Tilak Kumar
2025-06-16 16:26 ` [PATCH v5 3/4] scsi: fnic: Add and improve logs in FDMI and FDMI ABTS paths Karan Tilak Kumar
2025-06-16 16:26 ` [PATCH v5 4/4] scsi: fnic: Set appropriate logging level for log message Karan Tilak Kumar
2025-07-08  8:54 ` [PATCH v5 1/4] scsi: fnic: Fix crash in fnic_wq_cmpl_handler when FDMI times out Hannes Reinecke
2025-07-08 17:01   ` Karan Tilak Kumar (kartilak)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).