netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, gospo@redhat.com,
	Bruce Allan <bruce.w.allan@intel.com>,
	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Subject: [net-next-2.6 PATCH 23/23] e1000e: update Tx Unit hang detection message
Date: Sat, 21 Nov 2009 01:28:56 -0800	[thread overview]
Message-ID: <20091121092855.5715.11832.stgit@localhost.localdomain> (raw)
In-Reply-To: <20091121092126.5715.41618.stgit@localhost.localdomain>

From: Bruce Allan <bruce.w.allan@intel.com>

The Tx unit hang detection code in e1000e detects other hangs caused by
hardware components (e.g. Rx, DMA units), but it is not possible to detect
exactly which component is hung so it has always assumed a Tx unit hang.
When dumping a message to the system log because of a hang, this patch adds
more data to help narrow the cause of the issue and makes the message
non-Tx-specific.  Because this new code reads PHY registers which can
sleep, move it off to a workqueue.  This and all previously existing work
tasks in the driver are now cancelled when the driver is removed.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/e1000e/e1000.h  |    1 +
 drivers/net/e1000e/netdev.c |   46 ++++++++++++++++++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index 5ac8675..c9fcef7 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -364,6 +364,7 @@ struct e1000_adapter {
 	struct work_struct downshift_task;
 	struct work_struct update_phy_task;
 	struct work_struct led_blink_task;
+	struct work_struct print_hang_task;
 };
 
 struct e1000_info {
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 39f01d9..11a5274 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -544,15 +544,27 @@ static void e1000_put_txbuf(struct e1000_adapter *adapter,
 	buffer_info->time_stamp = 0;
 }
 
-static void e1000_print_tx_hang(struct e1000_adapter *adapter)
+static void e1000_print_hw_hang(struct work_struct *work)
 {
+	struct e1000_adapter *adapter = container_of(work,
+	                                             struct e1000_adapter,
+	                                             print_hang_task);
 	struct e1000_ring *tx_ring = adapter->tx_ring;
 	unsigned int i = tx_ring->next_to_clean;
 	unsigned int eop = tx_ring->buffer_info[i].next_to_watch;
 	struct e1000_tx_desc *eop_desc = E1000_TX_DESC(*tx_ring, eop);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 phy_status, phy_1000t_status, phy_ext_status;
+	u16 pci_status;
+
+	e1e_rphy(hw, PHY_STATUS, &phy_status);
+	e1e_rphy(hw, PHY_1000T_STATUS, &phy_1000t_status);
+	e1e_rphy(hw, PHY_EXT_STATUS, &phy_ext_status);
 
-	/* detected Tx unit hang */
-	e_err("Detected Tx Unit Hang:\n"
+	pci_read_config_word(adapter->pdev, PCI_STATUS, &pci_status);
+
+	/* detected Hardware unit hang */
+	e_err("Detected Hardware Unit Hang:\n"
 	      "  TDH                  <%x>\n"
 	      "  TDT                  <%x>\n"
 	      "  next_to_use          <%x>\n"
@@ -561,7 +573,12 @@ static void e1000_print_tx_hang(struct e1000_adapter *adapter)
 	      "  time_stamp           <%lx>\n"
 	      "  next_to_watch        <%x>\n"
 	      "  jiffies              <%lx>\n"
-	      "  next_to_watch.status <%x>\n",
+	      "  next_to_watch.status <%x>\n"
+	      "MAC Status             <%x>\n"
+	      "PHY Status             <%x>\n"
+	      "PHY 1000BASE-T Status  <%x>\n"
+	      "PHY Extended Status    <%x>\n"
+	      "PCI Status             <%x>\n",
 	      readl(adapter->hw.hw_addr + tx_ring->head),
 	      readl(adapter->hw.hw_addr + tx_ring->tail),
 	      tx_ring->next_to_use,
@@ -569,7 +586,12 @@ static void e1000_print_tx_hang(struct e1000_adapter *adapter)
 	      tx_ring->buffer_info[eop].time_stamp,
 	      eop,
 	      jiffies,
-	      eop_desc->upper.fields.status);
+	      eop_desc->upper.fields.status,
+	      er32(STATUS),
+	      phy_status,
+	      phy_1000t_status,
+	      phy_ext_status,
+	      pci_status);
 }
 
 /**
@@ -643,14 +665,16 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
 	}
 
 	if (adapter->detect_tx_hung) {
-		/* Detect a transmit hang in hardware, this serializes the
-		 * check with the clearing of time_stamp and movement of i */
+		/*
+		 * Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
 		adapter->detect_tx_hung = 0;
 		if (tx_ring->buffer_info[i].time_stamp &&
 		    time_after(jiffies, tx_ring->buffer_info[i].time_stamp
 			       + (adapter->tx_timeout_factor * HZ))
 		    && !(er32(STATUS) & E1000_STATUS_TXOFF)) {
-			e1000_print_tx_hang(adapter);
+			schedule_work(&adapter->print_hang_task);
 			netif_stop_queue(netdev);
 		}
 	}
@@ -5118,6 +5142,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
 	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
 	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
+	INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
 
 	/* Initialize link parameters. User can change them with ethtool */
 	adapter->hw.mac.autoneg = 1;
@@ -5241,6 +5266,11 @@ static void __devexit e1000_remove(struct pci_dev *pdev)
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+	cancel_work_sync(&adapter->downshift_task);
+	cancel_work_sync(&adapter->update_phy_task);
+	cancel_work_sync(&adapter->print_hang_task);
 	flush_scheduled_work();
 
 	/*


      parent reply	other threads:[~2009-11-21  9:29 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-21  9:22 [net-next-2.6 PATCH 01/23] e1000e: check WoL mode is among set of supported modes Jeff Kirsher
2009-11-21  9:22 ` [net-next-2.6 PATCH 02/23] e1000e: add missing tests for 82583 in ethtool functions Jeff Kirsher
2009-11-21  9:22 ` [net-next-2.6 PATCH 03/23] e1000e: clearing interrupt timers causes descriptors to get flushed Jeff Kirsher
2009-11-21  9:22 ` [net-next-2.6 PATCH 04/23] e1000e: function pointers for ethtool set/get offloads Jeff Kirsher
2009-11-21  9:23 ` [net-next-2.6 PATCH 05/23] e1000e: don't clean Rx ring while resetting Jeff Kirsher
2009-11-21  9:23 ` [net-next-2.6 PATCH 06/23] e1000e: link reporting problems Jeff Kirsher
2009-11-21  9:23 ` [net-next-2.6 PATCH 07/23] e1000e: improper return code signage Jeff Kirsher
2009-11-21  9:24 ` [net-next-2.6 PATCH 08/23] e1000e: disable K1 on PCH LOM when in PHY loopback mode Jeff Kirsher
2009-11-21  9:24 ` [net-next-2.6 PATCH 09/23] e1000e: Incorrect MII Link beat reporting Jeff Kirsher
2009-11-21  9:24 ` [net-next-2.6 PATCH 10/23] e1000e: cleanup redundant #include's Jeff Kirsher
2009-11-21  9:25 ` [net-next-2.6 PATCH 11/23] e1000e: consolidate two dbug macros into one simpler one Jeff Kirsher
2009-11-21  9:25 ` [net-next-2.6 PATCH 12/23] e1000e: cleanup ops function pointers Jeff Kirsher
2009-11-21  9:25 ` [net-next-2.6 PATCH 13/23] e1000e: update copyright information Jeff Kirsher
2009-11-21  9:26 ` [net-next-2.6 PATCH 14/23] e1000e: remove comments regarding a non-existent api module Jeff Kirsher
2009-11-21  9:26 ` [net-next-2.6 PATCH 15/23] e1000e: provide comment for 82571 workaround Jeff Kirsher
2009-11-21  9:26 ` [net-next-2.6 PATCH 16/23] e1000e: set bools to true/false instead of 1/0 Jeff Kirsher
2009-11-21  9:27 ` [net-next-2.6 PATCH 17/23] e1000e: cleanup - shift indentation left by exiting early in e1000_tso Jeff Kirsher
2009-11-21  9:27 ` [net-next-2.6 PATCH 18/23] e1000e: cleanup functions that clear hardware statistics Jeff Kirsher
2009-11-21  9:27 ` [net-next-2.6 PATCH 19/23] e1000e: set pm_qos DMA latency requirement per interface when needed Jeff Kirsher
2009-11-21  9:27 ` [net-next-2.6 PATCH 20/23] e1000e: do not error out on identification LED init failure Jeff Kirsher
2009-11-21  9:28 ` [net-next-2.6 PATCH 21/23] e1000e: remove redundant might_sleep() Jeff Kirsher
2009-11-21  9:28 ` [net-next-2.6 PATCH 22/23] e1000e: cosmetic - group local variables of the same type Jeff Kirsher
2009-11-21  9:28 ` Jeff Kirsher [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091121092855.5715.11832.stgit@localhost.localdomain \
    --to=jeffrey.t.kirsher@intel.com \
    --cc=bruce.w.allan@intel.com \
    --cc=davem@davemloft.net \
    --cc=gospo@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).