All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kok, Auke" <auke-jan.h.kok@intel.com>
To: "Garzik, Jeff" <jgarzik@pobox.com>
Cc: netdev@vger.kernel.org, akpm@osdl.org, "Brandeburg,
	Jesse" <jesse.brandeburg@intel.com>,
	"Kok, Auke" <auke-jan.h.kok@intel.com>,
	"Kok, Auke" <auke@foo-projects.org>,
	"Ronciak, John" <john.ronciak@intel.com>
Subject: [PATCH 24/26] ixgb: Add PCI Error recovery callbacks
Date: Tue, 29 Aug 2006 09:45:08 -0700	[thread overview]
Message-ID: <20060829164507.6872.90994.stgit@gitlost.site> (raw)
In-Reply-To: <20060829164153.6872.1713.stgit@gitlost.site>


Adds PCI Error recovery callbacks to the Intel 10-gigabit ethernet ixgb
device driver.  Lightly tested, works.

"Zhang, Yanmin" <yanmin_zhang@linux.intel.com> wrote:

Both pci_disable_device and ixgb_down would access the device.  It doesn't
follow Documentation/pci-error-recovery.txt that error_detected shouldn't do
any access to the device.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
---

 drivers/net/ixgb/ixgb_main.c |  112 ++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 111 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index a5da48a..abca75f 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -118,15 +118,26 @@ static void ixgb_restore_vlan(struct ixg
 static void ixgb_netpoll(struct net_device *dev);
 #endif
 
-/* Exported from other modules */
+static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
+	                     enum pci_channel_state state);
+static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev);
+static void ixgb_io_resume (struct pci_dev *pdev);
 
+/* Exported from other modules */
 extern void ixgb_check_options(struct ixgb_adapter *adapter);
 
+static struct pci_error_handlers ixgb_err_handler = {
+	.error_detected = ixgb_io_error_detected,
+	.slot_reset = ixgb_io_slot_reset,
+	.resume = ixgb_io_resume,
+};
+
 static struct pci_driver ixgb_driver = {
 	.name     = ixgb_driver_name,
 	.id_table = ixgb_pci_tbl,
 	.probe    = ixgb_probe,
 	.remove   = __devexit_p(ixgb_remove),
+	.err_handler = &ixgb_err_handler
 };
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
@@ -1550,6 +1561,11 @@ void
 ixgb_update_stats(struct ixgb_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* Prevent stats update while adapter is being reset */
+	if (pdev->error_state && pdev->error_state != pci_channel_io_normal)
+		return;
 
 	if((netdev->flags & IFF_PROMISC) || (netdev->flags & IFF_ALLMULTI) ||
 	   (netdev->mc_count > IXGB_MAX_NUM_MULTICAST_ADDRESSES)) {
@@ -2205,4 +2221,98 @@ static void ixgb_netpoll(struct net_devi
 }
 #endif
 
+/**
+ * ixgb_io_error_detected() - called when PCI error is detected
+ * @pdev    pointer to pci device with error
+ * @state   pci channel state after error
+ *
+ * This callback is called by the PCI subsystem whenever
+ * a PCI bus error is detected.
+ */
+static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
+			             enum pci_channel_state state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgb_adapter *adapter = netdev->priv;
+
+	if(netif_running(netdev))
+		ixgb_down(adapter, TRUE);
+
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ixgb_io_slot_reset - called after the pci bus has been reset.
+ * @pdev    pointer to pci device with error
+ *
+ * This callback is called after the PCI buss has been reset.
+ * Basically, this tries to restart the card from scratch.
+ * This is a shortened version of the device probe/discovery code,
+ * it resembles the first-half of the ixgb_probe() routine.
+ */
+static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgb_adapter *adapter = netdev->priv;
+
+	if(pci_enable_device(pdev)) {
+		DPRINTK(PROBE, ERR, "Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* Perform card reset only on one instance of the card */
+	if (0 != PCI_FUNC (pdev->devfn))
+		return PCI_ERS_RESULT_RECOVERED;
+
+	pci_set_master(pdev);
+
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+	ixgb_reset(adapter);
+
+	/* Make sure the EEPROM is good */
+	if(!ixgb_validate_eeprom_checksum(&adapter->hw)) {
+		DPRINTK(PROBE, ERR, "After reset, the EEPROM checksum is not valid.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr);
+	memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len);
+
+	if(!is_valid_ether_addr(netdev->perm_addr)) {
+		DPRINTK(PROBE, ERR, "After reset, invalid MAC address.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * ixgb_io_resume - called when its OK to resume normal operations
+ * @pdev    pointer to pci device with error
+ *
+ * The error recovery driver tells us that its OK to resume
+ * normal operation. Implementation resembles the second-half
+ * of the ixgb_probe() routine.
+ */
+static void ixgb_io_resume (struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgb_adapter *adapter = netdev->priv;
+
+	pci_set_master(pdev);
+
+	if(netif_running(netdev)) {
+		if(ixgb_up(adapter)) {
+			printk ("ixgb: can't bring device back up after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+	mod_timer(&adapter->watchdog_timer, jiffies);
+}
+
 /* ixgb_main.c */



---
Auke Kok <auke-jan.h.kok@intel.com>

  parent reply	other threads:[~2006-08-29 16:36 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-29 16:41 [PATCH 00/26] e100, e1000, ixgb updates Kok, Auke
2006-08-29 16:44 ` [PATCH 02/26] e1000: IRQ resources cleanup Kok, Auke
2006-08-29 16:44 ` [PATCH 03/26] e1000: e1000_probe " Kok, Auke
2006-08-29 16:44 ` [PATCH 04/26] e1000: ring buffers " Kok, Auke
2006-08-29 16:44 ` [PATCH 05/26] e1000: error out if we cannot enable PCI device on resume Kok, Auke
2006-08-29 16:44 ` [PATCH 06/26] e1000: remove unused part_num reading code Kok, Auke
2006-08-29 16:44 ` [PATCH 07/26] e1000: Use module param array code Kok, Auke
2006-08-29 20:50   ` Jeff Garzik
2006-08-29 16:44 ` [PATCH 08/26] e1000: Deprecate mii-tool SIOCMIIREG ioctl Kok, Auke
2006-08-29 20:49   ` Jeff Garzik
2006-08-29 16:44 ` [PATCH 09/26] e1000: unify WoL capability detection code Kok, Auke
2006-08-29 20:51   ` Jeff Garzik
2006-08-29 21:54     ` Auke Kok
2006-08-29 16:44 ` [PATCH 10/26] e1000: Add PCI ID 0x10a4 for our new 4-port PCI-Express device Kok, Auke
2006-08-29 16:44 ` [PATCH 11/26] e1000: clean up skb allocation code, patch submitted by Christoph Kok, Auke
2006-08-29 20:52   ` Jeff Garzik
2006-08-29 22:15     ` Auke Kok
2006-08-29 16:44 ` [PATCH 12/26] e1000: Increment driver version to 7.2.7-k2 Kok, Auke
2006-08-29 16:44 ` [PATCH 13/26] e100: Convert e100 to use netdev_alloc_skb() Kok, Auke
2006-08-29 16:44 ` [PATCH 14/26] e100: fix error recovery Kok, Auke
2006-08-29 20:55   ` Jeff Garzik
2006-08-29 16:44 ` [PATCH 15/26] e100: reduce time under spinlock Kok, Auke
2006-08-29 20:55   ` Jeff Garzik
2006-08-29 22:10     ` Auke Kok
2006-08-29 16:44 ` [PATCH 16/26] e100: remove skb->dev assignment Kok, Auke
2006-08-29 16:44 ` [PATCH 17/26] e100: increment version to 3.5.16-k2 Kok, Auke
2006-08-29 16:44 ` [PATCH 18/26] ixgb: Convert dev_alloc_skb to netdev_alloc_skb Kok, Auke
2006-08-29 16:44 ` [PATCH 19/26] ixgb: convert dev->priv to netdev_priv(dev) Kok, Auke
2006-08-29 16:44 ` [PATCH 20/26] ixgb: Set a constant blink rate for ixgb adapter identify (1sec on, 1sec off) Kok, Auke
2006-08-29 16:45 ` [PATCH 21/26] ixgb: recalculate after how many descriptors to wake the queue Kok, Auke
2006-08-29 21:00   ` Jeff Garzik
2006-08-29 16:45 ` [PATCH 22/26] ixgb: Cache-align all TX components of the adapter struct Kok, Auke
2006-08-29 17:33   ` Eric Dumazet
2006-08-29 20:59     ` Jeff Garzik
2006-08-29 21:01       ` Auke Kok
2006-08-29 22:20       ` Auke Kok
2006-08-29 16:45 ` [PATCH 23/26] ixgb: Add buffer_info and test like e1000 has Kok, Auke
2006-08-29 16:45 ` Kok, Auke [this message]
2006-08-29 16:45 ` [PATCH 25/26] ixgb: remove skb->dev assignment Kok, Auke
2006-08-29 16:45 ` [PATCH 26/26] ixgb: Increment version to 1.0.112-k2 Kok, Auke
2006-08-29 21:01   ` Jeff Garzik
     [not found] ` <20060829164415.6872.84250.stgit@gitlost.site>
2006-08-29 16:52   ` [PATCH 01/26] e1000: Whitespace cleanup, cosmetic changes Auke Kok
2006-08-31 21:26 ` [PATCH 00/26] e100, e1000, ixgb updates Auke Kok
2006-08-31 21:48   ` Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060829164507.6872.90994.stgit@gitlost.site \
    --to=auke-jan.h.kok@intel.com \
    --cc=akpm@osdl.org \
    --cc=auke@foo-projects.org \
    --cc=jesse.brandeburg@intel.com \
    --cc=jgarzik@pobox.com \
    --cc=john.ronciak@intel.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.