netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Kok, Auke" <auke-jan.h.kok@intel.com>
To: "Garzik, Jeff" <jgarzik@pobox.com>
Cc: netdev@vger.kernel.org, akpm@osdl.org, "Brandeburg,
	Jesse" <jesse.brandeburg@intel.com>,
	"Kok, Auke" <auke-jan.h.kok@intel.com>,
	"Kok, Auke" <auke@foo-projects.org>,
	"Ronciak, John" <john.ronciak@intel.com>
Subject: [PATCH 24/26] ixgb: Add PCI Error recovery callbacks
Date: Tue, 29 Aug 2006 09:45:08 -0700	[thread overview]
Message-ID: <20060829164507.6872.90994.stgit@gitlost.site> (raw)
In-Reply-To: <20060829164153.6872.1713.stgit@gitlost.site>


Adds PCI Error recovery callbacks to the Intel 10-gigabit ethernet ixgb
device driver.  Lightly tested, works.

"Zhang, Yanmin" <yanmin_zhang@linux.intel.com> wrote:

Both pci_disable_device and ixgb_down would access the device.  It doesn't
follow Documentation/pci-error-recovery.txt that error_detected shouldn't do
any access to the device.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
---

 drivers/net/ixgb/ixgb_main.c |  112 ++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 111 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index a5da48a..abca75f 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -118,15 +118,26 @@ static void ixgb_restore_vlan(struct ixg
 static void ixgb_netpoll(struct net_device *dev);
 #endif
 
-/* Exported from other modules */
+static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
+	                     enum pci_channel_state state);
+static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev);
+static void ixgb_io_resume (struct pci_dev *pdev);
 
+/* Exported from other modules */
 extern void ixgb_check_options(struct ixgb_adapter *adapter);
 
+static struct pci_error_handlers ixgb_err_handler = {
+	.error_detected = ixgb_io_error_detected,
+	.slot_reset = ixgb_io_slot_reset,
+	.resume = ixgb_io_resume,
+};
+
 static struct pci_driver ixgb_driver = {
 	.name     = ixgb_driver_name,
 	.id_table = ixgb_pci_tbl,
 	.probe    = ixgb_probe,
 	.remove   = __devexit_p(ixgb_remove),
+	.err_handler = &ixgb_err_handler
 };
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
@@ -1550,6 +1561,11 @@ void
 ixgb_update_stats(struct ixgb_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* Prevent stats update while adapter is being reset */
+	if (pdev->error_state && pdev->error_state != pci_channel_io_normal)
+		return;
 
 	if((netdev->flags & IFF_PROMISC) || (netdev->flags & IFF_ALLMULTI) ||
 	   (netdev->mc_count > IXGB_MAX_NUM_MULTICAST_ADDRESSES)) {
@@ -2205,4 +2221,98 @@ static void ixgb_netpoll(struct net_devi
 }
 #endif
 
+/**
+ * ixgb_io_error_detected() - called when PCI error is detected
+ * @pdev    pointer to pci device with error
+ * @state   pci channel state after error
+ *
+ * This callback is called by the PCI subsystem whenever
+ * a PCI bus error is detected.
+ */
+static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
+			             enum pci_channel_state state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgb_adapter *adapter = netdev->priv;
+
+	if(netif_running(netdev))
+		ixgb_down(adapter, TRUE);
+
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ixgb_io_slot_reset - called after the pci bus has been reset.
+ * @pdev    pointer to pci device with error
+ *
+ * This callback is called after the PCI buss has been reset.
+ * Basically, this tries to restart the card from scratch.
+ * This is a shortened version of the device probe/discovery code,
+ * it resembles the first-half of the ixgb_probe() routine.
+ */
+static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgb_adapter *adapter = netdev->priv;
+
+	if(pci_enable_device(pdev)) {
+		DPRINTK(PROBE, ERR, "Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* Perform card reset only on one instance of the card */
+	if (0 != PCI_FUNC (pdev->devfn))
+		return PCI_ERS_RESULT_RECOVERED;
+
+	pci_set_master(pdev);
+
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+	ixgb_reset(adapter);
+
+	/* Make sure the EEPROM is good */
+	if(!ixgb_validate_eeprom_checksum(&adapter->hw)) {
+		DPRINTK(PROBE, ERR, "After reset, the EEPROM checksum is not valid.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr);
+	memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len);
+
+	if(!is_valid_ether_addr(netdev->perm_addr)) {
+		DPRINTK(PROBE, ERR, "After reset, invalid MAC address.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * ixgb_io_resume - called when its OK to resume normal operations
+ * @pdev    pointer to pci device with error
+ *
+ * The error recovery driver tells us that its OK to resume
+ * normal operation. Implementation resembles the second-half
+ * of the ixgb_probe() routine.
+ */
+static void ixgb_io_resume (struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgb_adapter *adapter = netdev->priv;
+
+	pci_set_master(pdev);
+
+	if(netif_running(netdev)) {
+		if(ixgb_up(adapter)) {
+			printk ("ixgb: can't bring device back up after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+	mod_timer(&adapter->watchdog_timer, jiffies);
+}
+
 /* ixgb_main.c */



---
Auke Kok <auke-jan.h.kok@intel.com>

  parent reply	other threads:[~2006-08-29 16:36 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-29 16:41 [PATCH 00/26] e100, e1000, ixgb updates Kok, Auke
2006-08-29 16:44 ` [PATCH 02/26] e1000: IRQ resources cleanup Kok, Auke
2006-08-29 16:44 ` [PATCH 03/26] e1000: e1000_probe " Kok, Auke
2006-08-29 16:44 ` [PATCH 04/26] e1000: ring buffers " Kok, Auke
2006-08-29 16:44 ` [PATCH 05/26] e1000: error out if we cannot enable PCI device on resume Kok, Auke
2006-08-29 16:44 ` [PATCH 06/26] e1000: remove unused part_num reading code Kok, Auke
2006-08-29 16:44 ` [PATCH 07/26] e1000: Use module param array code Kok, Auke
2006-08-29 20:50   ` Jeff Garzik
2006-08-29 16:44 ` [PATCH 08/26] e1000: Deprecate mii-tool SIOCMIIREG ioctl Kok, Auke
2006-08-29 20:49   ` Jeff Garzik
2006-08-29 16:44 ` [PATCH 09/26] e1000: unify WoL capability detection code Kok, Auke
2006-08-29 20:51   ` Jeff Garzik
2006-08-29 21:54     ` Auke Kok
2006-08-29 16:44 ` [PATCH 10/26] e1000: Add PCI ID 0x10a4 for our new 4-port PCI-Express device Kok, Auke
2006-08-29 16:44 ` [PATCH 11/26] e1000: clean up skb allocation code, patch submitted by Christoph Kok, Auke
2006-08-29 20:52   ` Jeff Garzik
2006-08-29 22:15     ` Auke Kok
2006-08-29 16:44 ` [PATCH 12/26] e1000: Increment driver version to 7.2.7-k2 Kok, Auke
2006-08-29 16:44 ` [PATCH 13/26] e100: Convert e100 to use netdev_alloc_skb() Kok, Auke
2006-08-29 16:44 ` [PATCH 14/26] e100: fix error recovery Kok, Auke
2006-08-29 20:55   ` Jeff Garzik
2006-08-29 16:44 ` [PATCH 15/26] e100: reduce time under spinlock Kok, Auke
2006-08-29 20:55   ` Jeff Garzik
2006-08-29 22:10     ` Auke Kok
2006-08-29 16:44 ` [PATCH 16/26] e100: remove skb->dev assignment Kok, Auke
2006-08-29 16:44 ` [PATCH 17/26] e100: increment version to 3.5.16-k2 Kok, Auke
2006-08-29 16:44 ` [PATCH 18/26] ixgb: Convert dev_alloc_skb to netdev_alloc_skb Kok, Auke
2006-08-29 16:44 ` [PATCH 19/26] ixgb: convert dev->priv to netdev_priv(dev) Kok, Auke
2006-08-29 16:44 ` [PATCH 20/26] ixgb: Set a constant blink rate for ixgb adapter identify (1sec on, 1sec off) Kok, Auke
2006-08-29 16:45 ` [PATCH 21/26] ixgb: recalculate after how many descriptors to wake the queue Kok, Auke
2006-08-29 21:00   ` Jeff Garzik
2006-08-29 16:45 ` [PATCH 22/26] ixgb: Cache-align all TX components of the adapter struct Kok, Auke
2006-08-29 17:33   ` Eric Dumazet
2006-08-29 20:59     ` Jeff Garzik
2006-08-29 21:01       ` Auke Kok
2006-08-29 22:20       ` Auke Kok
2006-08-29 16:45 ` [PATCH 23/26] ixgb: Add buffer_info and test like e1000 has Kok, Auke
2006-08-29 16:45 ` Kok, Auke [this message]
2006-08-29 16:45 ` [PATCH 25/26] ixgb: remove skb->dev assignment Kok, Auke
2006-08-29 16:45 ` [PATCH 26/26] ixgb: Increment version to 1.0.112-k2 Kok, Auke
2006-08-29 21:01   ` Jeff Garzik
     [not found] ` <20060829164415.6872.84250.stgit@gitlost.site>
2006-08-29 16:52   ` [PATCH 01/26] e1000: Whitespace cleanup, cosmetic changes Auke Kok
2006-08-31 21:26 ` [PATCH 00/26] e100, e1000, ixgb updates Auke Kok
2006-08-31 21:48   ` Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060829164507.6872.90994.stgit@gitlost.site \
    --to=auke-jan.h.kok@intel.com \
    --cc=akpm@osdl.org \
    --cc=auke@foo-projects.org \
    --cc=jesse.brandeburg@intel.com \
    --cc=jgarzik@pobox.com \
    --cc=john.ronciak@intel.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).