[PATCH 22/42] lpfc: Fix deadlock on host_lock during cable pulls

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: James Smart <jsmart2021@gmail.com>
To: linux-scsi@vger.kernel.org
Cc: James Smart <jsmart2021@gmail.com>,
	Dick Kennedy <dick.kennedy@broadcom.com>
Subject: [PATCH 22/42] lpfc: Fix deadlock on host_lock during cable pulls
Date: Wed, 14 Aug 2019 16:56:52 -0700	[thread overview]
Message-ID: <20190814235712.4487-23-jsmart2021@gmail.com> (raw)
In-Reply-To: <20190814235712.4487-1-jsmart2021@gmail.com>

During cable pull testing a deadlock was seen between
lpfc_nlp_counters() vs lpfc_mbox_process_link_up() vs
lpfc_work_list_done(). They are all waiting on the
shost->host_lock.

Issue is all of these cases raise irq when taking out
the lock but use spin_unlock_irq() when unlocking. The
unlock path is will unconditionally re-enable interrupts
in cases where irq state should be preserved. The re-enablement
allowed the other paths to execute which then causes the
deadlock.

Fix by converting the lock/unlock to irqsave/irqrestore.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
---
 drivers/scsi/lpfc/lpfc_hbadisc.c | 47 ++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index a47db99784ab..44e779e4c885 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -118,6 +118,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 	struct lpfc_work_evt *evtp;
 	int  put_node;
 	int  put_rport;
+	unsigned long iflags;
 
 	rdata = rport->dd_data;
 	ndlp = rdata->pnode;
@@ -170,22 +171,22 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 	}
 
 	shost = lpfc_shost_from_vport(vport);
-	spin_lock_irq(shost->host_lock);
+	spin_lock_irqsave(shost->host_lock, iflags);
 	ndlp->nlp_flag |= NLP_IN_DEV_LOSS;
-	spin_unlock_irq(shost->host_lock);
+	spin_unlock_irqrestore(shost->host_lock, iflags);
 
 	/* We need to hold the node by incrementing the reference
 	 * count until this queued work is done
 	 */
 	evtp->evt_arg1  = lpfc_nlp_get(ndlp);
 
-	spin_lock_irq(&phba->hbalock);
+	spin_lock_irqsave(&phba->hbalock, iflags);
 	if (evtp->evt_arg1) {
 		evtp->evt = LPFC_EVT_DEV_LOSS;
 		list_add_tail(&evtp->evt_listp, &phba->work_list);
 		lpfc_worker_wake_up(phba);
 	}
-	spin_unlock_irq(&phba->hbalock);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
 
 	return;
 }
@@ -212,14 +213,15 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 	int  put_node;
 	int warn_on = 0;
 	int fcf_inuse = 0;
+	unsigned long iflags;
 
 	rport = ndlp->rport;
 	vport = ndlp->vport;
 	shost = lpfc_shost_from_vport(vport);
 
-	spin_lock_irq(shost->host_lock);
+	spin_lock_irqsave(shost->host_lock, iflags);
 	ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
-	spin_unlock_irq(shost->host_lock);
+	spin_unlock_irqrestore(shost->host_lock, iflags);
 
 	if (!rport)
 		return fcf_inuse;
@@ -3115,8 +3117,9 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 	int rc;
 	struct fcf_record *fcf_record;
 	uint32_t fc_flags = 0;
+	unsigned long iflags;
 
-	spin_lock_irq(&phba->hbalock);
+	spin_lock_irqsave(&phba->hbalock, iflags);
 	phba->fc_linkspeed = bf_get(lpfc_mbx_read_top_link_spd, la);
 
 	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
@@ -3213,12 +3216,12 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 		vport->fc_myDID = phba->fc_pref_DID;
 		fc_flags |= FC_LBIT;
 	}
-	spin_unlock_irq(&phba->hbalock);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
 
 	if (fc_flags) {
-		spin_lock_irq(shost->host_lock);
+		spin_lock_irqsave(shost->host_lock, iflags);
 		vport->fc_flag |= fc_flags;
-		spin_unlock_irq(shost->host_lock);
+		spin_unlock_irqrestore(shost->host_lock, iflags);
 	}
 
 	lpfc_linkup(phba);
@@ -3292,22 +3295,22 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 		 * The driver is expected to do FIP/FCF. Call the port
 		 * and get the FCF Table.
 		 */
-		spin_lock_irq(&phba->hbalock);
+		spin_lock_irqsave(&phba->hbalock, iflags);
 		if (phba->hba_flag & FCF_TS_INPROG) {
-			spin_unlock_irq(&phba->hbalock);
+			spin_unlock_irqrestore(&phba->hbalock, iflags);
 			return;
 		}
 		/* This is the initial FCF discovery scan */
 		phba->fcf.fcf_flag |= FCF_INIT_DISC;
-		spin_unlock_irq(&phba->hbalock);
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_DISCOVERY,
 				"2778 Start FCF table scan at linkup\n");
 		rc = lpfc_sli4_fcf_scan_read_fcf_rec(phba,
 						     LPFC_FCOE_FCF_GET_FIRST);
 		if (rc) {
-			spin_lock_irq(&phba->hbalock);
+			spin_lock_irqsave(&phba->hbalock, iflags);
 			phba->fcf.fcf_flag &= ~FCF_INIT_DISC;
-			spin_unlock_irq(&phba->hbalock);
+			spin_unlock_irqrestore(&phba->hbalock, iflags);
 			goto out;
 		}
 		/* Reset FCF roundrobin bmask for new discovery */
@@ -3366,6 +3369,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
 	uint8_t attn_type;
+	unsigned long iflags;
 
 	/* Unblock ELS traffic */
 	pring = lpfc_phba_elsring(phba);
@@ -3387,12 +3391,12 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	memcpy(&phba->alpa_map[0], mp->virt, 128);
 
-	spin_lock_irq(shost->host_lock);
+	spin_lock_irqsave(shost->host_lock, iflags);
 	if (bf_get(lpfc_mbx_read_top_pb, la))
 		vport->fc_flag |= FC_BYPASSED_MODE;
 	else
 		vport->fc_flag &= ~FC_BYPASSED_MODE;
-	spin_unlock_irq(shost->host_lock);
+	spin_unlock_irqrestore(shost->host_lock, iflags);
 
 	if (phba->fc_eventTag <= la->eventTag) {
 		phba->fc_stat.LinkMultiEvent++;
@@ -3403,12 +3407,12 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	phba->fc_eventTag = la->eventTag;
 	if (phba->sli_rev < LPFC_SLI_REV4) {
-		spin_lock_irq(&phba->hbalock);
+		spin_lock_irqsave(&phba->hbalock, iflags);
 		if (bf_get(lpfc_mbx_read_top_mm, la))
 			phba->sli.sli_flag |= LPFC_MENLO_MAINT;
 		else
 			phba->sli.sli_flag &= ~LPFC_MENLO_MAINT;
-		spin_unlock_irq(&phba->hbalock);
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
 	}
 
 	phba->link_events++;
@@ -4196,8 +4200,9 @@ static void
 lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	unsigned long iflags;
 
-	spin_lock_irq(shost->host_lock);
+	spin_lock_irqsave(shost->host_lock, iflags);
 	switch (state) {
 	case NLP_STE_UNUSED_NODE:
 		vport->fc_unused_cnt += count;
@@ -4227,7 +4232,7 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
 			vport->fc_npr_cnt += count;
 		break;
 	}
-	spin_unlock_irq(shost->host_lock);
+	spin_unlock_irqrestore(shost->host_lock, iflags);
 }
 
 static void
-- 
2.13.7

next prev parent reply	other threads:[~2019-08-14 23:57 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-14 23:56 [PATCH 00/42] lpfc: Update lpfc to revision 12.4.0.0 James Smart
2019-08-14 23:56 ` [PATCH 01/42] lpfc: Limit xri count for kdump environment James Smart
2019-08-14 23:56 ` [PATCH 02/42] lpfc: Fix PLOGI failure with high remoteport count James Smart
2019-08-14 23:56 ` [PATCH 03/42] lpfc: Fix ELS field alignments James Smart
2019-08-14 23:56 ` [PATCH 04/42] lpfc: Fix crash on driver unload in wq free James Smart
2019-08-14 23:56 ` [PATCH 05/42] lpfc: Fix failure to clear non-zero eq_delay after io rate reduction James Smart
2019-08-14 23:56 ` [PATCH 06/42] lpfc: Fix leak of ELS completions on adapter reset James Smart
2019-08-14 23:56 ` [PATCH 07/42] lpfc: Fix port relogin failure due to GID_FT interaction James Smart
2019-08-14 23:56 ` [PATCH 08/42] lpfc: Fix discovery when target has no GID_FT information James Smart
2019-08-14 23:56 ` [PATCH 09/42] lpfc: Fix ADISC reception terminating login state if a NVME target James Smart
2019-08-14 23:56 ` [PATCH 10/42] lpfc: Fix issuing init_vpi mbox on SLI-3 card James Smart
2019-08-14 23:56 ` [PATCH 11/42] lpfc: Fix Oops in nvme_register with target logout/login James Smart
2019-08-14 23:56 ` [PATCH 12/42] lpfc: Fix irq raising in lpfc_sli_hba_down James Smart
2019-08-14 23:56 ` [PATCH 13/42] lpfc: Fix oops when fewer hdwqs than cpus James Smart
2019-08-14 23:56 ` [PATCH 14/42] lpfc: Fix FLOGI handling across multiple link up/down conditions James Smart
2019-08-14 23:56 ` [PATCH 15/42] lpfc: Fix null ptr oops updating lpfc_devloss_tmo via sysfs attribute James Smart
2019-08-14 23:56 ` [PATCH 16/42] lpfc: Fix devices that don't return after devloss followed by rediscovery James Smart
2019-08-14 23:56 ` [PATCH 17/42] lpfc: Fix loss of remote port after devloss due to lack of RPIs James Smart
2019-08-14 23:56 ` [PATCH 18/42] lpfc: Fix propagation of devloss_tmo setting to nvme transport James Smart
2019-08-14 23:56 ` [PATCH 19/42] lpfc: Fix sg_seg_cnt for HBAs that don't support NVME James Smart
2019-08-14 23:56 ` [PATCH 20/42] lpfc: Fix driver nvme rescan logging James Smart
2019-08-14 23:56 ` [PATCH 21/42] lpfc: Fix error in remote port address change James Smart
2019-08-14 23:56 ` James Smart [this message]
2019-08-14 23:56 ` [PATCH 23/42] lpfc: Fix crash due to port reset racing vs adapter error handling James Smart
2019-08-14 23:56 ` [PATCH 24/42] lpfc: Fix too many sg segments spamming in kernel log James Smart
2019-08-14 23:56 ` [PATCH 25/42] lpfc: Fix hang when downloading fw on port enabled for nvme James Smart
2019-08-14 23:56 ` [PATCH 26/42] lpfc: Fix nvme target mode ABTSing a received ABTS James Smart
2019-08-14 23:56 ` [PATCH 27/42] lpfc: Fix nvme sg_seg_cnt display if HBA does not support NVME James Smart
2019-08-14 23:56 ` [PATCH 28/42] lpfc: Fix sli4 adapter initialization with MSI James Smart
2019-08-14 23:56 ` [PATCH 29/42] lpfc: Fix upcall to bsg done in non-success cases James Smart
2019-08-14 23:57 ` [PATCH 30/42] lpfc: Fix Max Frame Size value shown in fdmishow output James Smart
2019-08-14 23:57 ` [PATCH 31/42] lpfc: Fix reported physical link speed on a disabled trunked link James Smart
2019-08-14 23:57 ` [PATCH 32/42] lpfc: Fix BlockGuard enablement on FCoE adapters James Smart
2019-08-14 23:57 ` [PATCH 33/42] lpfc: Fix nvme first burst module parameter description James Smart
2019-08-14 23:57 ` [PATCH 34/42] lpfc: Fix coverity warnings James Smart
2019-08-14 23:57 ` [PATCH 35/42] lpfc: Add simple unlikely optimizations to reduce NVME latency James Smart
2019-08-14 23:57 ` [PATCH 36/42] lpfc: Migrate to %px and %pf in kernel print calls James Smart
2019-08-14 23:57 ` [PATCH 37/42] lpfc: Add first and second level hardware revisions to sysfs reporting James Smart
2019-08-14 23:57 ` [PATCH 38/42] lpfc: Add MDS driver loopback diagnostics support James Smart
2019-08-14 23:57 ` [PATCH 39/42] lpfc: Support dynamic unbounded SGL lists on G7 hardware James Smart
2019-08-14 23:57 ` [PATCH 40/42] lpfc: Add NVMe sequence level error recovery support James Smart
2019-08-14 23:57 ` [PATCH 41/42] lpfc: Merge per-protocol WQ/CQ pairs into single per-cpu pair James Smart
2019-08-14 23:57 ` [PATCH 42/42] lpfc: Update lpfc version to 12.4.0.0 James Smart
2019-08-20  3:06 ` [PATCH 00/42] lpfc: Update lpfc to revision 12.4.0.0 Martin K. Petersen
2019-08-27 13:31   ` Hannes Reinecke
2019-08-28  0:10     ` James Smart

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a47db99784a dfblob:44e779e4c88 )
 OR (
bs:"[PATCH 22/42] lpfc: Fix deadlock on host_lock during cable pulls" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190814235712.4487-23-jsmart2021@gmail.com \
    --to=jsmart2021@gmail.com \
    --cc=dick.kennedy@broadcom.com \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).