* [PATCH 3/13]: PCI Err: IPR scsi device driver recovery
@ 2005-06-28 23:58 Linas Vepstas
2005-07-07 13:28 ` Brian King
0 siblings, 1 reply; 2+ messages in thread
From: Linas Vepstas @ 2005-06-28 23:58 UTC (permalink / raw)
To: linux-kernel, Benjamin Herrenschmidt, long
Cc: Hidetoshi Seto, Greg KH, ak, Paul Mackerras, linuxppc64-dev,
linux-pci, johnrose
[-- Attachment #1: Type: text/plain, Size: 236 bytes --]
pci-err-3-ipr.patch
Adds PCI error recovery callbacks to the IPR SCSI controller
driver. Tested, seems to work well, a variant of this ships
already in the Novell/SUSE SLES9 SP2 kernel.
Signed-off-by: Linas Vepstas <linas@linas.org>
[-- Attachment #2: pci-err-3-ipr.patch --]
[-- Type: text/plain, Size: 4279 bytes --]
--- linux-2.6.12-git10/drivers/scsi/ipr.c.linas-orig 2005-06-22 15:26:14.000000000 -0500
+++ linux-2.6.12-git10/drivers/scsi/ipr.c 2005-06-22 17:05:14.000000000 -0500
@@ -5326,6 +5326,88 @@ static void ipr_initiate_ioa_reset(struc
shutdown_type);
}
+#ifdef CONFIG_SCSI_IPR_EEH_RECOVERY
+
+/** If the PCI slot is frozen, hold off all i/o
+ * activity; then, as soon as the slot is available again,
+ * initiate an adapter reset.
+ */
+static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd)
+{
+ list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q);
+ ipr_cmd->done = ipr_reset_ioa_job;
+ return IPR_RC_JOB_RETURN;
+}
+
+/** ipr_eeh_frozen -- called when slot has experience PCI bus error.
+ * This routine is called to tell us that the PCI bus is down.
+ * Can't do anything here, except put the device driver into a
+ * holding pattern, waiting for the PCI bus to come back.
+ */
+static void ipr_eeh_frozen (struct pci_dev *pdev)
+{
+ unsigned long flags = 0;
+ struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
+
+ spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
+ _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_freeze, IPR_SHUTDOWN_NONE);
+ spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
+}
+
+/** ipr_eeh_slot_reset - called when pci slot has been reset.
+ *
+ * This routine is called by the pci error recovery recovery
+ * code after the PCI slot has been reset, just before we
+ * should resume normal operations.
+ */
+static int ipr_eeh_slot_reset (struct pci_dev *pdev)
+{
+ unsigned long flags = 0;
+ struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
+
+ pci_enable_device(pdev);
+ pci_set_master(pdev);
+ enable_irq (pdev->irq);
+ spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
+ _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_restore_cfg_space,
+ IPR_SHUTDOWN_NONE);
+ spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
+
+ return PCIERR_RESULT_RECOVERED;
+}
+
+/** This routine is called when the PCI bus has permanently
+ * failed. This routine should purge all pending I/O and
+ * shut down the device driver (close and unload).
+ * XXX Needs to be implemented.
+ */
+static void ipr_eeh_perm_failure (struct pci_dev *pdev)
+{
+#if 0 // XXXXXXXXXXXXXXXXXXXXXXX
+ ipr_cmd->job_step = ipr_reset_shutdown_ioa;
+ rc = IPR_RC_JOB_CONTINUE;
+#endif
+}
+
+static int ipr_eeh_error_detected (struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ switch (state) {
+ case pci_channel_io_frozen:
+ ipr_eeh_frozen (pdev);
+ return PCIERR_RESULT_NEED_RESET;
+
+ case pci_channel_io_perm_failure:
+ ipr_eeh_perm_failure (pdev);
+ return PCIERR_RESULT_DISCONNECT;
+ break;
+ default:
+ break;
+ }
+ return PCIERR_RESULT_NEED_RESET;
+}
+#endif
+
/**
* ipr_probe_ioa_part2 - Initializes IOAs found in ipr_probe_ioa(..)
* @ioa_cfg: ioa cfg struct
@@ -6068,6 +6150,10 @@ static struct pci_driver ipr_driver = {
.id_table = ipr_pci_table,
.probe = ipr_probe,
.remove = ipr_remove,
+ .err_handler = {
+ .error_detected = ipr_eeh_error_detected,
+ .slot_reset = ipr_eeh_slot_reset,
+ },
.driver = {
.shutdown = ipr_shutdown,
},
--- linux-2.6.12-git10/drivers/scsi/Kconfig.linas-orig 2005-06-22 15:26:14.000000000 -0500
+++ linux-2.6.12-git10/drivers/scsi/Kconfig 2005-06-22 15:28:29.000000000 -0500
@@ -1065,6 +1065,14 @@ config SCSI_IPR_DUMP
If you enable this support, the iprdump daemon can be used
to capture adapter failure analysis information.
+config SCSI_IPR_EEH_RECOVERY
+ bool "Enable PCI bus error recovery"
+ depends on SCSI_IPR && PPC_PSERIES
+ help
+ If you say Y here, the driver will be able to recover from
+ PCI bus errors on many PowerPC platforms. IBM pSeries users
+ should answer Y.
+
config SCSI_ZALON
tristate "Zalon SCSI support"
depends on GSC && SCSI
--- linux-2.6.12-git10/arch/ppc64/configs/pSeries_defconfig.linas-orig 2005-06-17 14:48:29.000000000 -0500
+++ linux-2.6.12-git10/arch/ppc64/configs/pSeries_defconfig 2005-06-22 15:30:33.000000000 -0500
@@ -314,6 +314,7 @@ CONFIG_SCSI_IPR
CONFIG_SCSI_IPR=y
CONFIG_SCSI_IPR_TRACE=y
CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_IPR_EEH_RECOVERY=y
# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
CONFIG_SCSI_QLA2XXX=y
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH 3/13]: PCI Err: IPR scsi device driver recovery
2005-06-28 23:58 [PATCH 3/13]: PCI Err: IPR scsi device driver recovery Linas Vepstas
@ 2005-07-07 13:28 ` Brian King
0 siblings, 0 replies; 2+ messages in thread
From: Brian King @ 2005-07-07 13:28 UTC (permalink / raw)
To: Linas Vepstas
Cc: linux-kernel, Benjamin Herrenschmidt, long, Hidetoshi Seto,
Greg KH, ak, Paul Mackerras, linuxppc64-dev, linux-pci, johnrose
Linas Vepstas wrote:
> +/** This routine is called when the PCI bus has permanently
> + * failed. This routine should purge all pending I/O and
> + * shut down the device driver (close and unload).
> + * XXX Needs to be implemented.
> + */
> +static void ipr_eeh_perm_failure (struct pci_dev *pdev)
> +{
> +#if 0 // XXXXXXXXXXXXXXXXXXXXXXX
> + ipr_cmd->job_step = ipr_reset_shutdown_ioa;
> + rc = IPR_RC_JOB_CONTINUE;
> +#endif
> +}
What were your plans here? What can the device driver rely on here?
Are interrupts disabled? Will pci config accesses all fail?
Should the driver attempt to talk to the pci adapter at all, or should
it simply clean up after it?
--
Brian King
eServer Storage I/O
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2005-07-07 13:32 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-28 23:58 [PATCH 3/13]: PCI Err: IPR scsi device driver recovery Linas Vepstas
2005-07-07 13:28 ` Brian King
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox