public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Quinn Tran <qutran@marvell.com>,
	Himanshu Madhani <hmadhani@marvell.com>,
	"Ewan D . Milne" <emilne@redhat.com>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	Sasha Levin <sashal@kernel.org>,
	linux-scsi@vger.kernel.org
Subject: [PATCH AUTOSEL 5.1 15/60] scsi: qla2xxx: Add cleanup for PCI EEH recovery
Date: Tue,  4 Jun 2019 19:21:25 -0400	[thread overview]
Message-ID: <20190604232212.6753-15-sashal@kernel.org> (raw)
In-Reply-To: <20190604232212.6753-1-sashal@kernel.org>

From: Quinn Tran <qutran@marvell.com>

[ Upstream commit 5386a4e6c7fecd282d265a24d930a74ba3c5917b ]

During EEH error recovery testing it was discovered that driver's reset()
callback partially frees resources used by driver, leaving some stale
memory.  After reset() is done and when resume() callback in driver uses
old data which results into error leaving adapter disabled due to PCIe
error.

This patch does cleanup for EEH recovery code path and prevents adapter
from getting disabled.

Signed-off-by: Quinn Tran <qutran@marvell.com>
Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 221 +++++++++++++---------------------
 1 file changed, 82 insertions(+), 139 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 91f576d743fe..d377e50a6c19 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -6838,6 +6838,78 @@ qla2x00_release_firmware(void)
 	mutex_unlock(&qla_fw_lock);
 }
 
+static void qla_pci_error_cleanup(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+	struct qla_qpair *qpair = NULL;
+	struct scsi_qla_host *vp;
+	fc_port_t *fcport;
+	int i;
+	unsigned long flags;
+
+	ha->chip_reset++;
+
+	ha->base_qpair->chip_reset = ha->chip_reset;
+	for (i = 0; i < ha->max_qpairs; i++) {
+		if (ha->queue_pair_map[i])
+			ha->queue_pair_map[i]->chip_reset =
+			    ha->base_qpair->chip_reset;
+	}
+
+	/* purge MBox commands */
+	if (atomic_read(&ha->num_pend_mbx_stage3)) {
+		clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
+		complete(&ha->mbx_intr_comp);
+	}
+
+	i = 0;
+
+	while (atomic_read(&ha->num_pend_mbx_stage3) ||
+	    atomic_read(&ha->num_pend_mbx_stage2) ||
+	    atomic_read(&ha->num_pend_mbx_stage1)) {
+		msleep(20);
+		i++;
+		if (i > 50)
+			break;
+	}
+
+	ha->flags.purge_mbox = 0;
+
+	mutex_lock(&ha->mq_lock);
+	list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem)
+		qpair->online = 0;
+	mutex_unlock(&ha->mq_lock);
+
+	qla2x00_mark_all_devices_lost(vha, 0);
+
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	list_for_each_entry(vp, &ha->vp_list, list) {
+		atomic_inc(&vp->vref_count);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+		qla2x00_mark_all_devices_lost(vp, 0);
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		atomic_dec(&vp->vref_count);
+	}
+	spin_unlock_irqrestore(&ha->vport_slock, flags);
+
+	/* Clear all async request states across all VPs. */
+	list_for_each_entry(fcport, &vha->vp_fcports, list)
+		fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
+
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	list_for_each_entry(vp, &ha->vp_list, list) {
+		atomic_inc(&vp->vref_count);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+		list_for_each_entry(fcport, &vp->vp_fcports, list)
+			fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		atomic_dec(&vp->vref_count);
+	}
+	spin_unlock_irqrestore(&ha->vport_slock, flags);
+}
+
+
 static pci_ers_result_t
 qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 {
@@ -6863,20 +6935,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 		return PCI_ERS_RESULT_CAN_RECOVER;
 	case pci_channel_io_frozen:
 		ha->flags.eeh_busy = 1;
-		/* For ISP82XX complete any pending mailbox cmd */
-		if (IS_QLA82XX(ha)) {
-			ha->flags.isp82xx_fw_hung = 1;
-			ql_dbg(ql_dbg_aer, vha, 0x9001, "Pci channel io frozen\n");
-			qla82xx_clear_pending_mbx(vha);
-		}
-		qla2x00_free_irqs(vha);
-		pci_disable_device(pdev);
-		/* Return back all IOs */
-		qla2x00_abort_all_cmds(vha, DID_RESET << 16);
-		if (ql2xmqsupport || ql2xnvmeenable) {
-			set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
-			qla2xxx_wake_dpc(vha);
-		}
+		qla_pci_error_cleanup(vha);
 		return PCI_ERS_RESULT_NEED_RESET;
 	case pci_channel_io_perm_failure:
 		ha->flags.pci_channel_io_perm_failure = 1;
@@ -6930,122 +6989,14 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev)
 		return PCI_ERS_RESULT_RECOVERED;
 }
 
-static uint32_t
-qla82xx_error_recovery(scsi_qla_host_t *base_vha)
-{
-	uint32_t rval = QLA_FUNCTION_FAILED;
-	uint32_t drv_active = 0;
-	struct qla_hw_data *ha = base_vha->hw;
-	int fn;
-	struct pci_dev *other_pdev = NULL;
-
-	ql_dbg(ql_dbg_aer, base_vha, 0x9006,
-	    "Entered %s.\n", __func__);
-
-	set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
-
-	if (base_vha->flags.online) {
-		/* Abort all outstanding commands,
-		 * so as to be requeued later */
-		qla2x00_abort_isp_cleanup(base_vha);
-	}
-
-
-	fn = PCI_FUNC(ha->pdev->devfn);
-	while (fn > 0) {
-		fn--;
-		ql_dbg(ql_dbg_aer, base_vha, 0x9007,
-		    "Finding pci device at function = 0x%x.\n", fn);
-		other_pdev =
-		    pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
-		    ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
-		    fn));
-
-		if (!other_pdev)
-			continue;
-		if (atomic_read(&other_pdev->enable_cnt)) {
-			ql_dbg(ql_dbg_aer, base_vha, 0x9008,
-			    "Found PCI func available and enable at 0x%x.\n",
-			    fn);
-			pci_dev_put(other_pdev);
-			break;
-		}
-		pci_dev_put(other_pdev);
-	}
-
-	if (!fn) {
-		/* Reset owner */
-		ql_dbg(ql_dbg_aer, base_vha, 0x9009,
-		    "This devfn is reset owner = 0x%x.\n",
-		    ha->pdev->devfn);
-		qla82xx_idc_lock(ha);
-
-		qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
-		    QLA8XXX_DEV_INITIALIZING);
-
-		qla82xx_wr_32(ha, QLA82XX_CRB_DRV_IDC_VERSION,
-		    QLA82XX_IDC_VERSION);
-
-		drv_active = qla82xx_rd_32(ha, QLA82XX_CRB_DRV_ACTIVE);
-		ql_dbg(ql_dbg_aer, base_vha, 0x900a,
-		    "drv_active = 0x%x.\n", drv_active);
-
-		qla82xx_idc_unlock(ha);
-		/* Reset if device is not already reset
-		 * drv_active would be 0 if a reset has already been done
-		 */
-		if (drv_active)
-			rval = qla82xx_start_firmware(base_vha);
-		else
-			rval = QLA_SUCCESS;
-		qla82xx_idc_lock(ha);
-
-		if (rval != QLA_SUCCESS) {
-			ql_log(ql_log_info, base_vha, 0x900b,
-			    "HW State: FAILED.\n");
-			qla82xx_clear_drv_active(ha);
-			qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
-			    QLA8XXX_DEV_FAILED);
-		} else {
-			ql_log(ql_log_info, base_vha, 0x900c,
-			    "HW State: READY.\n");
-			qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
-			    QLA8XXX_DEV_READY);
-			qla82xx_idc_unlock(ha);
-			ha->flags.isp82xx_fw_hung = 0;
-			rval = qla82xx_restart_isp(base_vha);
-			qla82xx_idc_lock(ha);
-			/* Clear driver state register */
-			qla82xx_wr_32(ha, QLA82XX_CRB_DRV_STATE, 0);
-			qla82xx_set_drv_active(base_vha);
-		}
-		qla82xx_idc_unlock(ha);
-	} else {
-		ql_dbg(ql_dbg_aer, base_vha, 0x900d,
-		    "This devfn is not reset owner = 0x%x.\n",
-		    ha->pdev->devfn);
-		if ((qla82xx_rd_32(ha, QLA82XX_CRB_DEV_STATE) ==
-		    QLA8XXX_DEV_READY)) {
-			ha->flags.isp82xx_fw_hung = 0;
-			rval = qla82xx_restart_isp(base_vha);
-			qla82xx_idc_lock(ha);
-			qla82xx_set_drv_active(base_vha);
-			qla82xx_idc_unlock(ha);
-		}
-	}
-	clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
-
-	return rval;
-}
-
 static pci_ers_result_t
 qla2xxx_pci_slot_reset(struct pci_dev *pdev)
 {
 	pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(pdev);
 	struct qla_hw_data *ha = base_vha->hw;
-	struct rsp_que *rsp;
-	int rc, retries = 10;
+	int rc;
+	struct qla_qpair *qpair = NULL;
 
 	ql_dbg(ql_dbg_aer, base_vha, 0x9004,
 	    "Slot Reset.\n");
@@ -7074,24 +7025,16 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev)
 		goto exit_slot_reset;
 	}
 
-	rsp = ha->rsp_q_map[0];
-	if (qla2x00_request_irqs(ha, rsp))
-		goto exit_slot_reset;
 
 	if (ha->isp_ops->pci_config(base_vha))
 		goto exit_slot_reset;
 
-	if (IS_QLA82XX(ha)) {
-		if (qla82xx_error_recovery(base_vha) == QLA_SUCCESS) {
-			ret = PCI_ERS_RESULT_RECOVERED;
-			goto exit_slot_reset;
-		} else
-			goto exit_slot_reset;
-	}
-
-	while (ha->flags.mbox_busy && retries--)
-		msleep(1000);
+	mutex_lock(&ha->mq_lock);
+	list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem)
+		qpair->online = 1;
+	mutex_unlock(&ha->mq_lock);
 
+	base_vha->flags.online = 1;
 	set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
 	if (ha->isp_ops->abort_isp(base_vha) == QLA_SUCCESS)
 		ret =  PCI_ERS_RESULT_RECOVERED;
@@ -7115,13 +7058,13 @@ qla2xxx_pci_resume(struct pci_dev *pdev)
 	ql_dbg(ql_dbg_aer, base_vha, 0x900f,
 	    "pci_resume.\n");
 
+	ha->flags.eeh_busy = 0;
+
 	ret = qla2x00_wait_for_hba_online(base_vha);
 	if (ret != QLA_SUCCESS) {
 		ql_log(ql_log_fatal, base_vha, 0x9002,
 		    "The device failed to resume I/O from slot/link_reset.\n");
 	}
-
-	ha->flags.eeh_busy = 0;
 }
 
 static void
-- 
2.20.1


  parent reply	other threads:[~2019-06-04 23:32 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-04 23:21 [PATCH AUTOSEL 5.1 01/60] x86/uaccess, kcov: Disable stack protector Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 02/60] ALSA: seq: Protect in-kernel ioctl calls with mutex Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 03/60] ALSA: seq: Fix race of get-subscription call vs port-delete ioctls Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 04/60] Revert "ALSA: seq: Protect in-kernel ioctl calls with mutex" Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 05/60] ALSA: seq: Cover unsubscribe_port() in list_mutex Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 06/60] driver core: platform: Fix the usage of platform device name(pdev->name) Sasha Levin
2019-06-05  4:58   ` Greg Kroah-Hartman
2019-06-15 22:26     ` Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 07/60] Drivers: misc: fix out-of-bounds access in function param_set_kgdbts_var Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 08/60] configfs: fix possible use-after-free in configfs_register_group Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 09/60] f2fs: fix to avoid accessing xattr across the boundary Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 10/60] ipc: prevent lockup on alloc_msg and free_msg Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 11/60] drivers/perf: arm_spe: Don't error on high-order pages for aux buf Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 12/60] bpf: sockmap, only stop/flush strp if it was enabled at some point Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 13/60] bpf: sockmap remove duplicate queue free Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 14/60] bpf: sockmap fix msg->sg.size account on ingress skb Sasha Levin
2019-06-04 23:21 ` Sasha Levin [this message]
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 16/60] scsi: qedi: remove memset/memcpy to nfunc and use func instead Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 17/60] scsi: qedi: remove set but not used variables 'cdev' and 'udev' Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 18/60] scsi: lpfc: resolve lockdep warnings Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 19/60] scsi: lpfc: correct rcu unlock issue in lpfc_nvme_info_show Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 20/60] scsi: lpfc: add check for loss of ndlp when sending RRQ Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 21/60] arm64: Print physical address of page table base in show_pte() Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 22/60] net: macb: fix error format in dev_err() Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 23/60] enetc: Fix NULL dma address unmap for Tx BD extensions Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 24/60] bpf, tcp: correctly handle DONT_WAIT flags and timeo == 0 Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 25/60] arm64/mm: Inhibit huge-vmap with ptdump Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 26/60] tools/bpftool: move set_max_rlimit() before __bpf_object__open_xattr() Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 27/60] selftests/bpf: fix bpf_get_current_task Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 28/60] nvme-pci: Fix controller freeze wait disabling Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 29/60] nvme: fix srcu locking on error return in nvme_get_ns_from_disk Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 30/60] nvme: remove the ifdef around nvme_nvm_ioctl Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 31/60] nvme: merge nvme_ns_ioctl into nvme_ioctl Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 32/60] nvme: release namespace SRCU protection before performing controller ioctls Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 33/60] nvme: fix memory leak for power latency tolerance Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 34/60] platform/x86: pmc_atom: Add Lex 3I380D industrial PC to critclk_systems DMI table Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 35/60] platform/x86: pmc_atom: Add several Beckhoff Automation boards " Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 36/60] scsi: myrs: Fix uninitialized variable Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 37/60] scsi: bnx2fc: fix incorrect cast to u64 on shift operation Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 38/60] drm/amdgpu: keep stolen memory on picasso Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 39/60] libnvdimm: Fix compilation warnings with W=1 Sasha Levin
2019-06-04 23:21 ` [PATCH AUTOSEL 5.1 40/60] selftests: fib_rule_tests: fix local IPv4 address typo Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190604232212.6753-15-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=emilne@redhat.com \
    --cc=hmadhani@marvell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=qutran@marvell.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox