Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next 10/11] igb: move TX hang check flag into ring->flags
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change moves the Tx hang check into the ring flags.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |    1 -
 drivers/net/ethernet/intel/igb/igb_main.c |    6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 04dd6d7..5def94c 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -230,7 +230,6 @@ struct igb_ring {
 			struct igb_tx_queue_stats tx_stats;
 			struct u64_stats_sync tx_syncp;
 			struct u64_stats_sync tx_syncp2;
-			bool detect_tx_hung;
 		};
 		/* RX */
 		struct {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 689c2c2..c4c3acf 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3684,7 +3684,7 @@ static void igb_watchdog_task(struct work_struct *work)
 		}
 
 		/* Force detection of hung controller every watchdog period */
-		tx_ring->detect_tx_hung = true;
+		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
 	}
 
 	/* Cause software interrupt to ensure rx ring is cleaned */
@@ -5651,14 +5651,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 	q_vector->tx.total_bytes += total_bytes;
 	q_vector->tx.total_packets += total_packets;
 
-	if (tx_ring->detect_tx_hung) {
+	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
 		struct e1000_hw *hw = &adapter->hw;
 
 		eop_desc = tx_buffer->next_to_watch;
 
 		/* Detect a transmit hang in hardware, this serializes the
 		 * check with the clearing of time_stamp and movement of i */
-		tx_ring->detect_tx_hung = false;
+		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
 		if (eop_desc &&
 		    time_after(jiffies, tx_buffer->time_stamp +
 			       (adapter->tx_timeout_factor * HZ)) &&
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 11/11] igb: add support for NETIF_F_RXHASH
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch adds support for Rx hashing.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |   52 +++++++++++++++++++---------
 1 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c4c3acf..b3a2e3d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1931,23 +1931,32 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 		dev_info(&pdev->dev,
 			"PHY reset is blocked due to SOL/IDER session.\n");
 
-	netdev->hw_features = NETIF_F_SG |
-			   NETIF_F_IP_CSUM |
-			   NETIF_F_IPV6_CSUM |
-			   NETIF_F_TSO |
-			   NETIF_F_TSO6 |
-			   NETIF_F_RXCSUM |
-			   NETIF_F_HW_VLAN_RX;
-
-	netdev->features = netdev->hw_features |
-			   NETIF_F_HW_VLAN_TX |
-			   NETIF_F_HW_VLAN_FILTER;
-
-	netdev->vlan_features |= NETIF_F_TSO;
-	netdev->vlan_features |= NETIF_F_TSO6;
-	netdev->vlan_features |= NETIF_F_IP_CSUM;
-	netdev->vlan_features |= NETIF_F_IPV6_CSUM;
-	netdev->vlan_features |= NETIF_F_SG;
+	/*
+	 * features is initialized to 0 in allocation, it might have bits
+	 * set by igb_sw_init so we should use an or instead of an
+	 * assignment.
+	 */
+	netdev->features |= NETIF_F_SG |
+			    NETIF_F_IP_CSUM |
+			    NETIF_F_IPV6_CSUM |
+			    NETIF_F_TSO |
+			    NETIF_F_TSO6 |
+			    NETIF_F_RXHASH |
+			    NETIF_F_RXCSUM |
+			    NETIF_F_HW_VLAN_RX |
+			    NETIF_F_HW_VLAN_TX;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features;
+
+	/* set this bit last since it cannot be part of hw_features */
+	netdev->features |= NETIF_F_HW_VLAN_FILTER;
+
+	netdev->vlan_features |= NETIF_F_TSO |
+				 NETIF_F_TSO6 |
+				 NETIF_F_IP_CSUM |
+				 NETIF_F_IPV6_CSUM |
+				 NETIF_F_SG;
 
 	if (pci_using_dac) {
 		netdev->features |= NETIF_F_HIGHDMA;
@@ -5757,6 +5766,14 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
 		le32_to_cpu(rx_desc->wb.upper.status_error));
 }
 
+static inline void igb_rx_hash(struct igb_ring *ring,
+			       union e1000_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	if (ring->netdev->features & NETIF_F_RXHASH)
+		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+}
+
 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
 			    union e1000_adv_rx_desc *rx_desc,
 			    struct sk_buff *skb)
@@ -5889,6 +5906,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 		}
 
 		igb_rx_hwtstamp(q_vector, rx_desc, skb);
+		igb_rx_hash(rx_ring, rx_desc, skb);
 		igb_rx_checksum(rx_ring, rx_desc, skb);
 
 		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 08/11] igb: leave staterr in place and instead us a helper function to check bits
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Instead of doing a byte swap on the staterr bits in the Rx descriptor we can
save ourselves a bit of space and some CPU time by instead just testing for
the various bits out of the RX descriptor directly.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h         |    7 +++
 drivers/net/ethernet/intel/igb/igb_ethtool.c |    5 +--
 drivers/net/ethernet/intel/igb/igb_main.c    |   55 ++++++++++++++-----------
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 8a15d43..04dd6d7 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -257,6 +257,13 @@ enum e1000_ring_flags_t {
 #define IGB_TX_CTXTDESC(R, i)	    \
 	(&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i]))
 
+/* igb_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc,
+				      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
 /* igb_desc_unused - calculate if we have unused descriptors */
 static inline int igb_desc_unused(struct igb_ring *ring)
 {
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 0a9e309..43873eb 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1577,16 +1577,14 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
 	union e1000_adv_rx_desc *rx_desc;
 	struct igb_rx_buffer *rx_buffer_info;
 	struct igb_tx_buffer *tx_buffer_info;
-	u32 staterr;
 	u16 rx_ntc, tx_ntc, count = 0;
 
 	/* initialize next to clean and descriptor values */
 	rx_ntc = rx_ring->next_to_clean;
 	tx_ntc = tx_ring->next_to_clean;
 	rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
-	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 
-	while (staterr & E1000_RXD_STAT_DD) {
+	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
 		/* check rx buffer */
 		rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
 
@@ -1615,7 +1613,6 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
 
 		/* fetch next descriptor */
 		rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
-		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	}
 
 	/* re-map buffers to ring, store next to clean values */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 33b250f..6917c33 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5720,12 +5720,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 }
 
 static inline void igb_rx_checksum(struct igb_ring *ring,
-				   u32 status_err, struct sk_buff *skb)
+				   union e1000_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
 {
 	skb_checksum_none_assert(skb);
 
 	/* Ignore Checksum bit is set */
-	if (status_err & E1000_RXD_STAT_IXSM)
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
 		return;
 
 	/* Rx checksum disabled via ethtool */
@@ -5733,8 +5734,9 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
 		return;
 
 	/* TCP/UDP checksum error bit is set */
-	if (status_err &
-	    (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+	if (igb_test_staterr(rx_desc,
+			     E1000_RXDEXT_STATERR_TCPE |
+			     E1000_RXDEXT_STATERR_IPE)) {
 		/*
 		 * work around errata with sctp packets where the TCPE aka
 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
@@ -5750,19 +5752,26 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
 		return;
 	}
 	/* It must be a TCP or UDP packet with a valid checksum */
-	if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
+				      E1000_RXD_STAT_UDPCS))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
+	dev_dbg(ring->dev, "cksum success: bits %08X\n",
+		le32_to_cpu(rx_desc->wb.upper.status_error));
 }
 
-static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
-                                   struct sk_buff *skb)
+static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
+			    union e1000_adv_rx_desc *rx_desc,
+			    struct sk_buff *skb)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct e1000_hw *hw = &adapter->hw;
 	u64 regval;
 
+	if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
+				       E1000_RXDADV_STAT_TS))
+		return;
+
 	/*
 	 * If this bit is set, then the RX registers contain the time stamp. No
 	 * other packet will be time stamped until we read these registers, so
@@ -5774,7 +5783,7 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
 	 * If nothing went wrong, then it should have a shared tx_flags that we
 	 * can turn into a skb_shared_hwtstamps.
 	 */
-	if (staterr & E1000_RXDADV_STAT_TSIP) {
+	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
 		u32 *stamp = (u32 *)skb->data;
 		regval = le32_to_cpu(*(stamp + 2));
 		regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
@@ -5808,14 +5817,12 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 	union e1000_adv_rx_desc *rx_desc;
 	const int current_node = numa_node_id();
 	unsigned int total_bytes = 0, total_packets = 0;
-	u32 staterr;
 	u16 cleaned_count = igb_desc_unused(rx_ring);
 	u16 i = rx_ring->next_to_clean;
 
 	rx_desc = IGB_RX_DESC(rx_ring, i);
-	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 
-	while (staterr & E1000_RXD_STAT_DD) {
+	while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
 		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
 		struct sk_buff *skb = buffer_info->skb;
 		union e1000_adv_rx_desc *next_rxd;
@@ -5868,7 +5875,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 			buffer_info->page_dma = 0;
 		}
 
-		if (!(staterr & E1000_RXD_STAT_EOP)) {
+		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
 			struct igb_rx_buffer *next_buffer;
 			next_buffer = &rx_ring->rx_buffer_info[i];
 			buffer_info->skb = next_buffer->skb;
@@ -5878,25 +5885,26 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 			goto next_desc;
 		}
 
-		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
+		if (igb_test_staterr(rx_desc,
+				     E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
 			dev_kfree_skb_any(skb);
 			goto next_desc;
 		}
 
-		if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
-			igb_rx_hwtstamp(q_vector, staterr, skb);
-		total_bytes += skb->len;
-		total_packets++;
-
-		igb_rx_checksum(rx_ring, staterr, skb);
-
-		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+		igb_rx_hwtstamp(q_vector, rx_desc, skb);
+		igb_rx_checksum(rx_ring, rx_desc, skb);
 
-		if (staterr & E1000_RXD_STAT_VP) {
+		if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
 			u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
 
 			__vlan_hwaccel_put_tag(skb, vid);
 		}
+
+		total_bytes += skb->len;
+		total_packets++;
+
+		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
 		napi_gro_receive(&q_vector->napi, skb);
 
 		budget--;
@@ -5913,7 +5921,6 @@ next_desc:
 
 		/* use prefetched values */
 		rx_desc = next_rxd;
-		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	}
 
 	rx_ring->next_to_clean = i;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 09/11] igb: fix recent VLAN changes that would leave VLANs disabled after reset
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch cleans up several issues with VLANs on igb after the recent
changes that were meant to leave the VLANs enabled/disable via the
netdev->features flags.

Specifically the Rx VLAN settings were being dropped after reset due to the
fact that they were not being restored correctly.  In addition I removed
the IRQ disable/enable since those were in place to protect the setting of
vlgrp.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |   18 ++++--------------
 1 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 6917c33..689c2c2 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2065,8 +2065,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_register;
 
-	igb_vlan_mode(netdev, netdev->features);
-
 	/* carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
@@ -5050,7 +5048,6 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
 			}
 
 			adapter->vf_data[vf].vlans_enabled++;
-			return 0;
 		}
 	} else {
 		if (i < E1000_VLVF_ARRAY_SIZE) {
@@ -6315,10 +6312,9 @@ static void igb_vlan_mode(struct net_device *netdev, u32 features)
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ctrl, rctl;
+	bool enable = !!(features & NETIF_F_HW_VLAN_RX);
 
-	igb_irq_disable(adapter);
-
-	if (features & NETIF_F_HW_VLAN_RX) {
+	if (enable) {
 		/* enable VLAN tag insert/strip */
 		ctrl = rd32(E1000_CTRL);
 		ctrl |= E1000_CTRL_VME;
@@ -6336,9 +6332,6 @@ static void igb_vlan_mode(struct net_device *netdev, u32 features)
 	}
 
 	igb_rlpml_set(adapter);
-
-	if (!test_bit(__IGB_DOWN, &adapter->state))
-		igb_irq_enable(adapter);
 }
 
 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
@@ -6363,11 +6356,6 @@ static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	int pf_id = adapter->vfs_allocated_count;
 	s32 err;
 
-	igb_irq_disable(adapter);
-
-	if (!test_bit(__IGB_DOWN, &adapter->state))
-		igb_irq_enable(adapter);
-
 	/* remove vlan from VLVF table array */
 	err = igb_vlvf_set(adapter, vid, false, pf_id);
 
@@ -6382,6 +6370,8 @@ static void igb_restore_vlan(struct igb_adapter *adapter)
 {
 	u16 vid;
 
+	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
+
 	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
 		igb_vlan_rx_add_vid(adapter->netdev, vid);
 }
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 05/11] igb: Move ITR related data into work container within the q_vector
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change moves information related to interrupt throttle rate
configuration into a separate q_vector sub-structure called a work
container. A similar change has already been made for ixgbe and this work
is based off of that.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_defines.h |    3 +
 drivers/net/ethernet/intel/igb/igb.h           |   32 +++--
 drivers/net/ethernet/intel/igb/igb_ethtool.c   |    4 +-
 drivers/net/ethernet/intel/igb/igb_main.c      |  203 +++++++++++-------------
 4 files changed, 119 insertions(+), 123 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 7b8ddd8..68558be 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -409,6 +409,9 @@
 #define E1000_ICS_DRSTA     E1000_ICR_DRSTA     /* Device Reset Aserted */
 
 /* Extended Interrupt Cause Set */
+/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
+#define E1000_EITR_CNT_IGNR     0x80000000 /* Don't reset counters on write */
+
 
 /* Transmit Descriptor Control */
 /* Enable the counting of descriptors still to be processed. */
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 0cc4702..e0dea22 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -42,8 +42,11 @@
 
 struct igb_adapter;
 
-/* ((1000000000ns / (6000ints/s * 1024ns)) << 2 = 648 */
-#define IGB_START_ITR 648
+/* Interrupt defines */
+#define IGB_START_ITR                    648 /* ~6000 ints/sec */
+#define IGB_4K_ITR                       980
+#define IGB_20K_ITR                      196
+#define IGB_70K_ITR                       56
 
 /* TX/RX descriptor defines */
 #define IGB_DEFAULT_TXD                  256
@@ -175,15 +178,23 @@ struct igb_rx_queue_stats {
 	u64 alloc_failed;
 };
 
+struct igb_ring_container {
+	struct igb_ring *ring;		/* pointer to linked list of rings */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 work_limit;			/* total work allowed per interrupt */
+	u8 count;			/* total number of rings in vector */
+	u8 itr;				/* current ITR setting for ring */
+};
+
 struct igb_q_vector {
-	struct igb_adapter *adapter; /* backlink */
-	struct igb_ring *rx_ring;
-	struct igb_ring *tx_ring;
-	struct napi_struct napi;
+	struct igb_adapter *adapter;	/* backlink */
+	int cpu;			/* CPU for DCA */
+	u32 eims_value;			/* EIMS mask value */
 
-	u32 eims_value;
-	u16 cpu;
-	u16 tx_work_limit;
+	struct igb_ring_container rx, tx;
+
+	struct napi_struct napi;
 
 	u16 itr_val;
 	u8 set_itr;
@@ -213,9 +224,6 @@ struct igb_ring {
 	u16 next_to_clean ____cacheline_aligned_in_smp;
 	u16 next_to_use;
 
-	unsigned int total_bytes;
-	unsigned int total_packets;
-
 	union {
 		/* TX */
 		struct {
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index c364d84..0a9e309 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2009,8 +2009,8 @@ static int igb_set_coalesce(struct net_device *netdev,
 
 	for (i = 0; i < adapter->num_q_vectors; i++) {
 		struct igb_q_vector *q_vector = adapter->q_vector[i];
-		q_vector->tx_work_limit = adapter->tx_work_limit;
-		if (q_vector->rx_ring)
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		if (q_vector->rx.ring)
 			q_vector->itr_val = adapter->rx_itr_setting;
 		else
 			q_vector->itr_val = adapter->tx_itr_setting;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 91ae368..a5597de 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -737,10 +737,10 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 	int rx_queue = IGB_N0_QUEUE;
 	int tx_queue = IGB_N0_QUEUE;
 
-	if (q_vector->rx_ring)
-		rx_queue = q_vector->rx_ring->reg_idx;
-	if (q_vector->tx_ring)
-		tx_queue = q_vector->tx_ring->reg_idx;
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
 
 	switch (hw->mac.type) {
 	case e1000_82575:
@@ -923,15 +923,15 @@ static int igb_request_msix(struct igb_adapter *adapter)
 
 		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
 
-		if (q_vector->rx_ring && q_vector->tx_ring)
+		if (q_vector->rx.ring && q_vector->tx.ring)
 			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
-			        q_vector->rx_ring->queue_index);
-		else if (q_vector->tx_ring)
+				q_vector->rx.ring->queue_index);
+		else if (q_vector->tx.ring)
 			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
-			        q_vector->tx_ring->queue_index);
-		else if (q_vector->rx_ring)
+				q_vector->tx.ring->queue_index);
+		else if (q_vector->rx.ring)
 			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
-			        q_vector->rx_ring->queue_index);
+				q_vector->rx.ring->queue_index);
 		else
 			sprintf(q_vector->name, "%s-unused", netdev->name);
 
@@ -1110,8 +1110,9 @@ static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
 {
 	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
 
-	q_vector->rx_ring = adapter->rx_ring[ring_idx];
-	q_vector->rx_ring->q_vector = q_vector;
+	q_vector->rx.ring = adapter->rx_ring[ring_idx];
+	q_vector->rx.ring->q_vector = q_vector;
+	q_vector->rx.count++;
 	q_vector->itr_val = adapter->rx_itr_setting;
 	if (q_vector->itr_val && q_vector->itr_val <= 3)
 		q_vector->itr_val = IGB_START_ITR;
@@ -1122,10 +1123,11 @@ static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
 {
 	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
 
-	q_vector->tx_ring = adapter->tx_ring[ring_idx];
-	q_vector->tx_ring->q_vector = q_vector;
+	q_vector->tx.ring = adapter->tx_ring[ring_idx];
+	q_vector->tx.ring->q_vector = q_vector;
+	q_vector->tx.count++;
 	q_vector->itr_val = adapter->tx_itr_setting;
-	q_vector->tx_work_limit = adapter->tx_work_limit;
+	q_vector->tx.work_limit = adapter->tx_work_limit;
 	if (q_vector->itr_val && q_vector->itr_val <= 3)
 		q_vector->itr_val = IGB_START_ITR;
 }
@@ -3756,33 +3758,24 @@ static void igb_update_ring_itr(struct igb_q_vector *q_vector)
 	int new_val = q_vector->itr_val;
 	int avg_wire_size = 0;
 	struct igb_adapter *adapter = q_vector->adapter;
-	struct igb_ring *ring;
 	unsigned int packets;
 
 	/* For non-gigabit speeds, just fix the interrupt rate at 4000
 	 * ints/sec - ITR timer value of 120 ticks.
 	 */
 	if (adapter->link_speed != SPEED_1000) {
-		new_val = 976;
+		new_val = IGB_4K_ITR;
 		goto set_itr_val;
 	}
 
-	ring = q_vector->rx_ring;
-	if (ring) {
-		packets = ACCESS_ONCE(ring->total_packets);
-
-		if (packets)
-			avg_wire_size = ring->total_bytes / packets;
-	}
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
 
-	ring = q_vector->tx_ring;
-	if (ring) {
-		packets = ACCESS_ONCE(ring->total_packets);
-
-		if (packets)
-			avg_wire_size = max_t(u32, avg_wire_size,
-			                      ring->total_bytes / packets);
-	}
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+				      q_vector->tx.total_bytes / packets);
 
 	/* if avg_wire_size isn't set no work was done */
 	if (!avg_wire_size)
@@ -3800,9 +3793,11 @@ static void igb_update_ring_itr(struct igb_q_vector *q_vector)
 	else
 		new_val = avg_wire_size / 2;
 
-	/* when in itr mode 3 do not exceed 20K ints/sec */
-	if (adapter->rx_itr_setting == 3 && new_val < 196)
-		new_val = 196;
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGB_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGB_20K_ITR;
 
 set_itr_val:
 	if (new_val != q_vector->itr_val) {
@@ -3810,14 +3805,10 @@ set_itr_val:
 		q_vector->set_itr = 1;
 	}
 clear_counts:
-	if (q_vector->rx_ring) {
-		q_vector->rx_ring->total_bytes = 0;
-		q_vector->rx_ring->total_packets = 0;
-	}
-	if (q_vector->tx_ring) {
-		q_vector->tx_ring->total_bytes = 0;
-		q_vector->tx_ring->total_packets = 0;
-	}
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
 }
 
 /**
@@ -3833,106 +3824,102 @@ clear_counts:
  *      parameter (see igb_param.c)
  *      NOTE:  These calculations are only valid when operating in a single-
  *             queue environment.
- * @adapter: pointer to adapter
- * @itr_setting: current q_vector->itr_val
- * @packets: the number of packets during this measurement interval
- * @bytes: the number of bytes during this measurement interval
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
  **/
-static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
-				   int packets, int bytes)
+static void igb_update_itr(struct igb_q_vector *q_vector,
+			   struct igb_ring_container *ring_container)
 {
-	unsigned int retval = itr_setting;
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
 
+	/* no packets, exit with status unchanged */
 	if (packets == 0)
-		goto update_itr_done;
+		return;
 
-	switch (itr_setting) {
+	switch (itrval) {
 	case lowest_latency:
 		/* handle TSO and jumbo frames */
 		if (bytes/packets > 8000)
-			retval = bulk_latency;
+			itrval = bulk_latency;
 		else if ((packets < 5) && (bytes > 512))
-			retval = low_latency;
+			itrval = low_latency;
 		break;
 	case low_latency:  /* 50 usec aka 20000 ints/s */
 		if (bytes > 10000) {
 			/* this if handles the TSO accounting */
 			if (bytes/packets > 8000) {
-				retval = bulk_latency;
+				itrval = bulk_latency;
 			} else if ((packets < 10) || ((bytes/packets) > 1200)) {
-				retval = bulk_latency;
+				itrval = bulk_latency;
 			} else if ((packets > 35)) {
-				retval = lowest_latency;
+				itrval = lowest_latency;
 			}
 		} else if (bytes/packets > 2000) {
-			retval = bulk_latency;
+			itrval = bulk_latency;
 		} else if (packets <= 2 && bytes < 512) {
-			retval = lowest_latency;
+			itrval = lowest_latency;
 		}
 		break;
 	case bulk_latency: /* 250 usec aka 4000 ints/s */
 		if (bytes > 25000) {
 			if (packets > 35)
-				retval = low_latency;
+				itrval = low_latency;
 		} else if (bytes < 1500) {
-			retval = low_latency;
+			itrval = low_latency;
 		}
 		break;
 	}
 
-update_itr_done:
-	return retval;
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
 }
 
-static void igb_set_itr(struct igb_adapter *adapter)
+static void igb_set_itr(struct igb_q_vector *q_vector)
 {
-	struct igb_q_vector *q_vector = adapter->q_vector[0];
-	u16 current_itr;
+	struct igb_adapter *adapter = q_vector->adapter;
 	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
 
 	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
 	if (adapter->link_speed != SPEED_1000) {
 		current_itr = 0;
-		new_itr = 4000;
+		new_itr = IGB_4K_ITR;
 		goto set_itr_now;
 	}
 
-	adapter->rx_itr = igb_update_itr(adapter,
-				    adapter->rx_itr,
-				    q_vector->rx_ring->total_packets,
-				    q_vector->rx_ring->total_bytes);
+	igb_update_itr(q_vector, &q_vector->tx);
+	igb_update_itr(q_vector, &q_vector->rx);
 
-	adapter->tx_itr = igb_update_itr(adapter,
-				    adapter->tx_itr,
-				    q_vector->tx_ring->total_packets,
-				    q_vector->tx_ring->total_bytes);
-	current_itr = max(adapter->rx_itr, adapter->tx_itr);
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
 
 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
-	if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
 		current_itr = low_latency;
 
 	switch (current_itr) {
 	/* counts and packets in update_itr are dependent on these numbers */
 	case lowest_latency:
-		new_itr = 56;  /* aka 70,000 ints/sec */
+		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
 		break;
 	case low_latency:
-		new_itr = 196; /* aka 20,000 ints/sec */
+		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
 		break;
 	case bulk_latency:
-		new_itr = 980; /* aka 4,000 ints/sec */
+		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
 		break;
 	default:
 		break;
 	}
 
 set_itr_now:
-	q_vector->rx_ring->total_bytes = 0;
-	q_vector->rx_ring->total_packets = 0;
-	q_vector->tx_ring->total_bytes = 0;
-	q_vector->tx_ring->total_packets = 0;
-
 	if (new_itr != q_vector->itr_val) {
 		/* this attempts to bias the interrupt rate towards Bulk
 		 * by adding intermediate steps when interrupt rate is
@@ -3940,7 +3927,7 @@ set_itr_now:
 		new_itr = new_itr > q_vector->itr_val ?
 		             max((new_itr * q_vector->itr_val) /
 		                 (new_itr + (q_vector->itr_val >> 2)),
-		                 new_itr) :
+				 new_itr) :
 			     new_itr;
 		/* Don't write the value here; it resets the adapter's
 		 * internal timer, and causes us to delay far longer than
@@ -4760,7 +4747,7 @@ static void igb_write_itr(struct igb_q_vector *q_vector)
 	if (adapter->hw.mac.type == e1000_82575)
 		itr_val |= itr_val << 16;
 	else
-		itr_val |= 0x8000000;
+		itr_val |= E1000_EITR_CNT_IGNR;
 
 	writel(itr_val, q_vector->itr_register);
 	q_vector->set_itr = 0;
@@ -4788,8 +4775,8 @@ static void igb_update_dca(struct igb_q_vector *q_vector)
 	if (q_vector->cpu == cpu)
 		goto out_no_update;
 
-	if (q_vector->tx_ring) {
-		int q = q_vector->tx_ring->reg_idx;
+	if (q_vector->tx.ring) {
+		int q = q_vector->tx.ring->reg_idx;
 		u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
 		if (hw->mac.type == e1000_82575) {
 			dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
@@ -4802,8 +4789,8 @@ static void igb_update_dca(struct igb_q_vector *q_vector)
 		dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
 		wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
 	}
-	if (q_vector->rx_ring) {
-		int q = q_vector->rx_ring->reg_idx;
+	if (q_vector->rx.ring) {
+		int q = q_vector->rx.ring->reg_idx;
 		u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
 		if (hw->mac.type == e1000_82575) {
 			dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
@@ -5447,16 +5434,14 @@ static irqreturn_t igb_intr(int irq, void *data)
 	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
 	 * need for the IMC write */
 	u32 icr = rd32(E1000_ICR);
-	if (!icr)
-		return IRQ_NONE;  /* Not our interrupt */
-
-	igb_write_itr(q_vector);
 
 	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
 	 * not set, then the adapter didn't send an interrupt */
 	if (!(icr & E1000_ICR_INT_ASSERTED))
 		return IRQ_NONE;
 
+	igb_write_itr(q_vector);
+
 	if (icr & E1000_ICR_DRSTA)
 		schedule_work(&adapter->reset_task);
 
@@ -5477,15 +5462,15 @@ static irqreturn_t igb_intr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
+void igb_ring_irq_enable(struct igb_q_vector *q_vector)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct e1000_hw *hw = &adapter->hw;
 
-	if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
-	    (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
-		if (!adapter->msix_entries)
-			igb_set_itr(adapter);
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
+			igb_set_itr(q_vector);
 		else
 			igb_update_ring_itr(q_vector);
 	}
@@ -5514,10 +5499,10 @@ static int igb_poll(struct napi_struct *napi, int budget)
 	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
 		igb_update_dca(q_vector);
 #endif
-	if (q_vector->tx_ring)
+	if (q_vector->tx.ring)
 		clean_complete = igb_clean_tx_irq(q_vector);
 
-	if (q_vector->rx_ring)
+	if (q_vector->rx.ring)
 		clean_complete &= igb_clean_rx_irq(q_vector, budget);
 
 	/* If all work not completed, return budget and keep polling */
@@ -5597,11 +5582,11 @@ static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
-	struct igb_ring *tx_ring = q_vector->tx_ring;
+	struct igb_ring *tx_ring = q_vector->tx.ring;
 	struct igb_tx_buffer *tx_buffer;
 	union e1000_adv_tx_desc *tx_desc, *eop_desc;
 	unsigned int total_bytes = 0, total_packets = 0;
-	unsigned int budget = q_vector->tx_work_limit;
+	unsigned int budget = q_vector->tx.work_limit;
 	unsigned int i = tx_ring->next_to_clean;
 
 	if (test_bit(__IGB_DOWN, &adapter->state))
@@ -5687,8 +5672,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 	tx_ring->tx_stats.bytes += total_bytes;
 	tx_ring->tx_stats.packets += total_packets;
 	u64_stats_update_end(&tx_ring->tx_syncp);
-	tx_ring->total_bytes += total_bytes;
-	tx_ring->total_packets += total_packets;
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
 
 	if (tx_ring->detect_tx_hung) {
 		struct e1000_hw *hw = &adapter->hw;
@@ -5837,7 +5822,7 @@ static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
 
 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
 {
-	struct igb_ring *rx_ring = q_vector->rx_ring;
+	struct igb_ring *rx_ring = q_vector->rx.ring;
 	union e1000_adv_rx_desc *rx_desc;
 	const int current_node = numa_node_id();
 	unsigned int total_bytes = 0, total_packets = 0;
@@ -5954,8 +5939,8 @@ next_desc:
 	rx_ring->rx_stats.packets += total_packets;
 	rx_ring->rx_stats.bytes += total_bytes;
 	u64_stats_update_end(&rx_ring->rx_syncp);
-	rx_ring->total_packets += total_packets;
-	rx_ring->total_bytes += total_bytes;
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
 
 	if (cleaned_count)
 		igb_alloc_rx_buffers(rx_ring, cleaned_count);
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 06/11] igb: cleanup IVAR configuration
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to cleanup some of the IVAR register configuration.
igb_assign_vector had become pretty large with multiple copies of the same
general code for setting the IVAR. This change consolidates most of that
code by adding the igb_write_ivar function which allows us just to compute
the index and offset and then use that information to setup the IVAR.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |  120 +++++++++++++---------------
 1 files changed, 56 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a5597de..bcc1c28 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -727,15 +727,40 @@ err:
 	return -ENOMEM;
 }
 
+/**
+ *  igb_write_ivar - configure ivar for given MSI-X vector
+ *  @hw: pointer to the HW structure
+ *  @msix_vector: vector number we are allocating to a given ring
+ *  @index: row index of IVAR register to write within IVAR table
+ *  @offset: column offset of in IVAR, should be multiple of 8
+ *
+ *  This function is intended to handle the writing of the IVAR register
+ *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
+ *  each containing an cause allocation for an Rx and Tx ring, and a
+ *  variable number of rows depending on the number of queues supported.
+ **/
+static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = array_rd32(E1000_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+	array_wr32(E1000_IVAR0, index, ivar);
+}
+
 #define IGB_N0_QUEUE -1
 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 {
-	u32 msixbm = 0;
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct e1000_hw *hw = &adapter->hw;
-	u32 ivar, index;
 	int rx_queue = IGB_N0_QUEUE;
 	int tx_queue = IGB_N0_QUEUE;
+	u32 msixbm = 0;
 
 	if (q_vector->rx.ring)
 		rx_queue = q_vector->rx.ring->reg_idx;
@@ -758,72 +783,39 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 		q_vector->eims_value = msixbm;
 		break;
 	case e1000_82576:
-		/* 82576 uses a table-based method for assigning vectors.
-		   Each queue has a single entry in the table to which we write
-		   a vector number along with a "valid" bit.  Sadly, the layout
-		   of the table is somewhat counterintuitive. */
-		if (rx_queue > IGB_N0_QUEUE) {
-			index = (rx_queue & 0x7);
-			ivar = array_rd32(E1000_IVAR0, index);
-			if (rx_queue < 8) {
-				/* vector goes into low byte of register */
-				ivar = ivar & 0xFFFFFF00;
-				ivar |= msix_vector | E1000_IVAR_VALID;
-			} else {
-				/* vector goes into third byte of register */
-				ivar = ivar & 0xFF00FFFF;
-				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
-			}
-			array_wr32(E1000_IVAR0, index, ivar);
-		}
-		if (tx_queue > IGB_N0_QUEUE) {
-			index = (tx_queue & 0x7);
-			ivar = array_rd32(E1000_IVAR0, index);
-			if (tx_queue < 8) {
-				/* vector goes into second byte of register */
-				ivar = ivar & 0xFFFF00FF;
-				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
-			} else {
-				/* vector goes into high byte of register */
-				ivar = ivar & 0x00FFFFFF;
-				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
-			}
-			array_wr32(E1000_IVAR0, index, ivar);
-		}
+		/*
+		 * 82576 uses a table that essentially consists of 2 columns
+		 * with 8 rows.  The ordering is column-major so we use the
+		 * lower 3 bits as the row index, and the 4th bit as the
+		 * column offset.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       rx_queue & 0x7,
+				       (rx_queue & 0x8) << 1);
+		if (tx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       tx_queue & 0x7,
+				       ((tx_queue & 0x8) << 1) + 8);
 		q_vector->eims_value = 1 << msix_vector;
 		break;
 	case e1000_82580:
 	case e1000_i350:
-		/* 82580 uses the same table-based approach as 82576 but has fewer
-		   entries as a result we carry over for queues greater than 4. */
-		if (rx_queue > IGB_N0_QUEUE) {
-			index = (rx_queue >> 1);
-			ivar = array_rd32(E1000_IVAR0, index);
-			if (rx_queue & 0x1) {
-				/* vector goes into third byte of register */
-				ivar = ivar & 0xFF00FFFF;
-				ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
-			} else {
-				/* vector goes into low byte of register */
-				ivar = ivar & 0xFFFFFF00;
-				ivar |= msix_vector | E1000_IVAR_VALID;
-			}
-			array_wr32(E1000_IVAR0, index, ivar);
-		}
-		if (tx_queue > IGB_N0_QUEUE) {
-			index = (tx_queue >> 1);
-			ivar = array_rd32(E1000_IVAR0, index);
-			if (tx_queue & 0x1) {
-				/* vector goes into high byte of register */
-				ivar = ivar & 0x00FFFFFF;
-				ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
-			} else {
-				/* vector goes into second byte of register */
-				ivar = ivar & 0xFFFF00FF;
-				ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
-			}
-			array_wr32(E1000_IVAR0, index, ivar);
-		}
+		/*
+		 * On 82580 and newer adapters the scheme is similar to 82576
+		 * however instead of ordering column-major we have things
+		 * ordered row-major.  So we traverse the table by using
+		 * bit 0 as the column offset, and the remaining bits as the
+		 * row index.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
 		q_vector->eims_value = 1 << msix_vector;
 		break;
 	default:
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 07/11] igb: retire the RX_CSUM flag and use the netdev flag instead
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Since the netdev now has its' own checksum flag to indicate if Rx checksum
is enabled we might as well use that instead of using the ring flag.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |    1 -
 drivers/net/ethernet/intel/igb/igb_main.c |   22 ++++++----------------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index e0dea22..8a15d43 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -243,7 +243,6 @@ struct igb_ring {
 };
 
 enum e1000_ring_flags_t {
-	IGB_RING_FLAG_RX_CSUM,
 	IGB_RING_FLAG_RX_SCTP_CSUM,
 	IGB_RING_FLAG_TX_CTX_IDX,
 	IGB_RING_FLAG_TX_DETECT_HANG
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index bcc1c28..33b250f 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -709,8 +709,6 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		ring->queue_index = i;
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
-		/* enable rx checksum */
-		set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
 		/* set flag indicating ring supports SCTP checksum offload */
 		if (adapter->hw.mac.type >= e1000_82576)
 			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
@@ -1764,19 +1762,8 @@ static u32 igb_fix_features(struct net_device *netdev, u32 features)
 
 static int igb_set_features(struct net_device *netdev, u32 features)
 {
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	int i;
 	u32 changed = netdev->features ^ features;
 
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		if (features & NETIF_F_RXCSUM)
-			set_bit(IGB_RING_FLAG_RX_CSUM,
-				&adapter->rx_ring[i]->flags);
-		else
-			clear_bit(IGB_RING_FLAG_RX_CSUM,
-				  &adapter->rx_ring[i]->flags);
-	}
-
 	if (changed & NETIF_F_HW_VLAN_RX)
 		igb_vlan_mode(netdev, features);
 
@@ -5737,9 +5724,12 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
 {
 	skb_checksum_none_assert(skb);
 
-	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
-	if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags) ||
-	     (status_err & E1000_RXD_STAT_IXSM))
+	/* Ignore Checksum bit is set */
+	if (status_err & E1000_RXD_STAT_IXSM)
+		return;
+
+	/* Rx checksum disabled via ethtool */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
 		return;
 
 	/* TCP/UDP checksum error bit is set */
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 03/11] igb: avoid unnecessary conversions from u16 to int
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

There are a number of places where we have values that are stored as u16
but are being converted to int unnecessarily.  In order to avoid that we
should convert all variables that deal with the next_to_clean, next_to_use,
and count to u16 values.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c |    5 +++--
 drivers/net/ethernet/intel/igb/igb_main.c    |    9 ++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 174540f..c364d84 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1577,8 +1577,8 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring,
 	union e1000_adv_rx_desc *rx_desc;
 	struct igb_rx_buffer *rx_buffer_info;
 	struct igb_tx_buffer *tx_buffer_info;
-	int rx_ntc, tx_ntc, count = 0;
 	u32 staterr;
+	u16 rx_ntc, tx_ntc, count = 0;
 
 	/* initialize next to clean and descriptor values */
 	rx_ntc = rx_ring->next_to_clean;
@@ -1630,7 +1630,8 @@ static int igb_run_loopback_test(struct igb_adapter *adapter)
 {
 	struct igb_ring *tx_ring = &adapter->test_tx_ring;
 	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	int i, j, lc, good_cnt, ret_val = 0;
+	u16 i, j, lc, good_cnt;
+	int ret_val = 0;
 	unsigned int size = IGB_RX_HDR_LEN;
 	netdev_tx_t tx_ret_val;
 	struct sk_buff *skb;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 1c234f0..150bf4e 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -338,14 +338,13 @@ static void igb_dump(struct igb_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	struct e1000_hw *hw = &adapter->hw;
 	struct igb_reg_info *reginfo;
-	int n = 0;
 	struct igb_ring *tx_ring;
 	union e1000_adv_tx_desc *tx_desc;
 	struct my_u0 { u64 a; u64 b; } *u0;
 	struct igb_ring *rx_ring;
 	union e1000_adv_rx_desc *rx_desc;
 	u32 staterr;
-	int i = 0;
+	u16 i, n;
 
 	if (!netif_msg_hw(adapter))
 		return;
@@ -3169,7 +3168,7 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
 {
 	struct igb_tx_buffer *buffer_info;
 	unsigned long size;
-	unsigned int i;
+	u16 i;
 
 	if (!tx_ring->tx_buffer_info)
 		return;
@@ -4285,7 +4284,7 @@ dma_error:
 	tx_ring->next_to_use = i;
 }
 
-static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
+static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
 {
 	struct net_device *netdev = tx_ring->netdev;
 
@@ -4311,7 +4310,7 @@ static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
 	return 0;
 }
 
-static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
+static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
 {
 	if (igb_desc_unused(tx_ring) >= size)
 		return 0;
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 04/11] igb: Consolidate all of the ring feature flags into a single value
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change moves all of the ring flags into a single value.  The advantage
to this is that there is one central area for all of these flags and they
can all make use of the set/test bit operations.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |   10 ++++++----
 drivers/net/ethernet/intel/igb/igb_main.c |   23 +++++++++++++----------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index de35c02..0cc4702 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -234,10 +234,12 @@ struct igb_ring {
 	dma_addr_t dma;                /* phys address of the ring */
 };
 
-#define IGB_RING_FLAG_RX_CSUM        0x00000001 /* RX CSUM enabled */
-#define IGB_RING_FLAG_RX_SCTP_CSUM   0x00000002 /* SCTP CSUM offload enabled */
-
-#define IGB_RING_FLAG_TX_CTX_IDX     0x00000001 /* HW requires context index */
+enum e1000_ring_flags_t {
+	IGB_RING_FLAG_RX_CSUM,
+	IGB_RING_FLAG_RX_SCTP_CSUM,
+	IGB_RING_FLAG_TX_CTX_IDX,
+	IGB_RING_FLAG_TX_DETECT_HANG
+};
 
 #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
 
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 150bf4e..91ae368 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -697,7 +697,7 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		ring->netdev = adapter->netdev;
 		/* For 82575, context index must be unique per ring. */
 		if (adapter->hw.mac.type == e1000_82575)
-			ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
+			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
 		adapter->tx_ring[i] = ring;
 	}
 
@@ -709,10 +709,11 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		ring->queue_index = i;
 		ring->dev = &adapter->pdev->dev;
 		ring->netdev = adapter->netdev;
-		ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
+		/* enable rx checksum */
+		set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
 		/* set flag indicating ring supports SCTP checksum offload */
 		if (adapter->hw.mac.type >= e1000_82576)
-			ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
+			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
 		adapter->rx_ring[i] = ring;
 	}
 
@@ -1775,9 +1776,11 @@ static int igb_set_features(struct net_device *netdev, u32 features)
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		if (features & NETIF_F_RXCSUM)
-			adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
+			set_bit(IGB_RING_FLAG_RX_CSUM,
+				&adapter->rx_ring[i]->flags);
 		else
-			adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
+			clear_bit(IGB_RING_FLAG_RX_CSUM,
+				  &adapter->rx_ring[i]->flags);
 	}
 
 	if (changed & NETIF_F_HW_VLAN_RX)
@@ -3965,7 +3968,7 @@ void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
 	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
 
 	/* For 82575, context index must be unique per ring. */
-	if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
 		mss_l4len_idx |= tx_ring->reg_idx << 4;
 
 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
@@ -4132,7 +4135,7 @@ static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
 
 	/* 82575 requires a unique index per ring if any offload is enabled */
 	if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
-	    (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX))
+	    test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
 		olinfo_status |= tx_ring->reg_idx << 4;
 
 	/* insert L4 checksum */
@@ -5758,7 +5761,7 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
 	skb_checksum_none_assert(skb);
 
 	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
-	if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
+	if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags) ||
 	     (status_err & E1000_RXD_STAT_IXSM))
 		return;
 
@@ -5770,8 +5773,8 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
 		 * packets, (aka let the stack check the crc32c)
 		 */
-		if ((skb->len == 60) &&
-		    (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
+		if (!((skb->len == 60) &&
+		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
 			u64_stats_update_begin(&ring->rx_syncp);
 			ring->rx_stats.csum_err++;
 			u64_stats_update_end(&ring->rx_syncp);
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 02/11] igb: push data into first igb_tx_buffer sooner to reduce stack usage
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Alexander Duyck, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Instead of storing most of the data for the TX hot path in the stack until
we are ready to write the descriptor we can save ourselves some time and
effort by pushing the SKB, tx_flags, gso_size, bytecount, and protocol into
the first igb_tx_buffer since that is where we will end up putting it
anyway.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |    1 +
 drivers/net/ethernet/intel/igb/igb_main.c |  103 +++++++++++++++--------------
 2 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 77793a9..de35c02 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -146,6 +146,7 @@ struct igb_tx_buffer {
 	struct sk_buff *skb;
 	unsigned int bytecount;
 	u16 gso_segs;
+	__be16 protocol;
 	dma_addr_t dma;
 	u32 length;
 	u32 tx_flags;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 862dd7c..1c234f0 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3975,10 +3975,11 @@ void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
 }
 
-static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
-			  u32 tx_flags, __be16 protocol, u8 *hdr_len)
+static int igb_tso(struct igb_ring *tx_ring,
+		   struct igb_tx_buffer *first,
+		   u8 *hdr_len)
 {
-	int err;
+	struct sk_buff *skb = first->skb;
 	u32 vlan_macip_lens, type_tucmd;
 	u32 mss_l4len_idx, l4len;
 
@@ -3986,7 +3987,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
 		return 0;
 
 	if (skb_header_cloned(skb)) {
-		err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+		int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 		if (err)
 			return err;
 	}
@@ -3994,7 +3995,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
 	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
 	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
 
-	if (protocol == __constant_htons(ETH_P_IP)) {
+	if (first->protocol == __constant_htons(ETH_P_IP)) {
 		struct iphdr *iph = ip_hdr(skb);
 		iph->tot_len = 0;
 		iph->check = 0;
@@ -4003,16 +4004,26 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
 							 IPPROTO_TCP,
 							 0);
 		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+		first->tx_flags |= IGB_TX_FLAGS_TSO |
+				   IGB_TX_FLAGS_CSUM |
+				   IGB_TX_FLAGS_IPV4;
 	} else if (skb_is_gso_v6(skb)) {
 		ipv6_hdr(skb)->payload_len = 0;
 		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						       &ipv6_hdr(skb)->daddr,
 						       0, IPPROTO_TCP, 0);
+		first->tx_flags |= IGB_TX_FLAGS_TSO |
+				   IGB_TX_FLAGS_CSUM;
 	}
 
+	/* compute header lengths */
 	l4len = tcp_hdrlen(skb);
 	*hdr_len = skb_transport_offset(skb) + l4len;
 
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
 	/* MSS L4LEN IDX */
 	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
@@ -4020,26 +4031,26 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb,
 	/* VLAN MACLEN IPLEN */
 	vlan_macip_lens = skb_network_header_len(skb);
 	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
-	vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
 
 	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
 
 	return 1;
 }
 
-static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
-			       u32 tx_flags, __be16 protocol)
+static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
 {
+	struct sk_buff *skb = first->skb;
 	u32 vlan_macip_lens = 0;
 	u32 mss_l4len_idx = 0;
 	u32 type_tucmd = 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		if (!(tx_flags & IGB_TX_FLAGS_VLAN))
-			return false;
+		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
+			return;
 	} else {
 		u8 l4_hdr = 0;
-		switch (protocol) {
+		switch (first->protocol) {
 		case __constant_htons(ETH_P_IP):
 			vlan_macip_lens |= skb_network_header_len(skb);
 			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
@@ -4053,7 +4064,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
 			if (unlikely(net_ratelimit())) {
 				dev_warn(tx_ring->dev,
 				 "partial checksum but proto=%x!\n",
-				 protocol);
+				 first->protocol);
 			}
 			break;
 		}
@@ -4081,14 +4092,15 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb,
 			}
 			break;
 		}
+
+		/* update TX checksum flag */
+		first->tx_flags |= IGB_TX_FLAGS_CSUM;
 	}
 
 	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
-	vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
 
 	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
-
-	return (skb->ip_summed == CHECKSUM_PARTIAL);
 }
 
 static __le32 igb_tx_cmd_type(u32 tx_flags)
@@ -4113,8 +4125,9 @@ static __le32 igb_tx_cmd_type(u32 tx_flags)
 	return cmd_type;
 }
 
-static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
-				   struct igb_ring *tx_ring)
+static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
+				 union e1000_adv_tx_desc *tx_desc,
+				 u32 tx_flags, unsigned int paylen)
 {
 	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
 
@@ -4132,7 +4145,7 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
 			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
 	}
 
-	return cpu_to_le32(olinfo_status);
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
 }
 
 /*
@@ -4140,12 +4153,13 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen,
  * maintain a power of two alignment we have to limit ourselves to 32K.
  */
 #define IGB_MAX_TXD_PWR	15
-#define IGB_MAX_DATA_PER_TXD	(1 << IGB_MAX_TXD_PWR)
+#define IGB_MAX_DATA_PER_TXD	(1<<IGB_MAX_TXD_PWR)
 
-static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
-		       struct igb_tx_buffer *first, u32 tx_flags,
+static void igb_tx_map(struct igb_ring *tx_ring,
+		       struct igb_tx_buffer *first,
 		       const u8 hdr_len)
 {
+	struct sk_buff *skb = first->skb;
 	struct igb_tx_buffer *tx_buffer_info;
 	union e1000_adv_tx_desc *tx_desc;
 	dma_addr_t dma;
@@ -4154,24 +4168,12 @@ static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
 	unsigned int size = skb_headlen(skb);
 	unsigned int paylen = skb->len - hdr_len;
 	__le32 cmd_type;
+	u32 tx_flags = first->tx_flags;
 	u16 i = tx_ring->next_to_use;
-	u16 gso_segs;
-
-	if (tx_flags & IGB_TX_FLAGS_TSO)
-		gso_segs = skb_shinfo(skb)->gso_segs;
-	else
-		gso_segs = 1;
-
-	/* multiply data chunks by size of headers */
-	first->bytecount = paylen + (gso_segs * hdr_len);
-	first->gso_segs = gso_segs;
-	first->skb = skb;
 
 	tx_desc = IGB_TX_DESC(tx_ring, i);
 
-	tx_desc->read.olinfo_status =
-		igb_tx_olinfo_status(tx_flags, paylen, tx_ring);
-
+	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
 	cmd_type = igb_tx_cmd_type(tx_flags);
 
 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
@@ -4181,7 +4183,6 @@ static void igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb,
 	/* record length, and DMA address */
 	first->length = size;
 	first->dma = dma;
-	first->tx_flags = tx_flags;
 	tx_desc->read.buffer_addr = cpu_to_le64(dma);
 
 	for (;;) {
@@ -4336,6 +4337,12 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_flags |= IGB_TX_FLAGS_TSTAMP;
@@ -4346,22 +4353,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
 	}
 
-	/* record the location of the first descriptor for this packet */
-	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
 
-	tso = igb_tso(tx_ring, skb, tx_flags, protocol, &hdr_len);
-	if (tso < 0) {
+	tso = igb_tso(tx_ring, first, &hdr_len);
+	if (tso < 0)
 		goto out_drop;
-	} else if (tso) {
-		tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM;
-		if (protocol == htons(ETH_P_IP))
-			tx_flags |= IGB_TX_FLAGS_IPV4;
-	} else if (igb_tx_csum(tx_ring, skb, tx_flags, protocol) &&
-		   (skb->ip_summed == CHECKSUM_PARTIAL)) {
-		tx_flags |= IGB_TX_FLAGS_CSUM;
-	}
+	else if (!tso)
+		igb_tx_csum(tx_ring, first);
 
-	igb_tx_map(tx_ring, skb, first, tx_flags, hdr_len);
+	igb_tx_map(tx_ring, first, hdr_len);
 
 	/* Make sure there is space in the ring for the next send. */
 	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
@@ -4369,7 +4371,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 
 out_drop:
-	dev_kfree_skb_any(skb);
+	igb_unmap_and_free_tx_resource(tx_ring, first);
+
 	return NETDEV_TX_OK;
 }
 
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 01/11] e1000e: locking bug introduced by commit 67fd4fcb
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1318347952-25068-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

Commit 67fd4fcb (e1000e: convert to stats64) added the ability to update
statistics more accurately and on-demand through the net_device_ops
.ndo_get_stats64 hook, but introduced a locking bug on 82577/8/9 when
linked at half-duplex (seen on kernels with CONFIG_DEBUG_ATOMIC_SLEEP=y and
CONFIG_PROVE_LOCKING=y).  The commit introduced code paths that caused a
mutex to be locked in atomic contexts, e.g. an rcu_read_lock is held when
irqbalance reads the stats from /sys/class/net/ethX/statistics causing the
mutex to be locked to read the Phy half-duplex statistics registers.

The mutex was originally introduced to prevent concurrent accesses of
resources (the NVM and Phy) shared by the driver, firmware and hardware
a few years back when there was an issue with the NVM getting corrupted.
It was later split into two mutexes - one for the NVM and one for the Phy
when it was determined the NVM, unlike the Phy, should not be protected by
the software/firmware/hardware semaphore (arbitration of which is done in
part with the SWFLAG bit in the EXTCNF_CTRL register).  This latter
semaphore should be sufficient to prevent resource contention of the Phy in
the driver (i.e. the mutex for Phy accesses is not needed), but to be sure
the mutex is replaced with an atomic bit flag which will warn if any
contention is possible.

Also add additional debug output to help determine when the sw/fw/hw
semaphore is owned by the firmware or hardware.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Reported-by: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/e1000.h   |    1 +
 drivers/net/ethernet/intel/e1000e/ich8lan.c |   21 +++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 7877b9c..9fe18d1 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -469,6 +469,7 @@ struct e1000_info {
 enum e1000_state_t {
 	__E1000_TESTING,
 	__E1000_RESETTING,
+	__E1000_ACCESS_SHARED_RESOURCE,
 	__E1000_DOWN
 };
 
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 4f70974..6a17c62 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -852,8 +852,6 @@ static void e1000_release_nvm_ich8lan(struct e1000_hw *hw)
 	mutex_unlock(&nvm_mutex);
 }
 
-static DEFINE_MUTEX(swflag_mutex);
-
 /**
  *  e1000_acquire_swflag_ich8lan - Acquire software control flag
  *  @hw: pointer to the HW structure
@@ -866,7 +864,12 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 	u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT;
 	s32 ret_val = 0;
 
-	mutex_lock(&swflag_mutex);
+	if (test_and_set_bit(__E1000_ACCESS_SHARED_RESOURCE,
+			     &hw->adapter->state)) {
+		WARN(1, "e1000e: %s: contention for Phy access\n",
+		     hw->adapter->netdev->name);
+		return -E1000_ERR_PHY;
+	}
 
 	while (timeout) {
 		extcnf_ctrl = er32(EXTCNF_CTRL);
@@ -878,7 +881,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 	}
 
 	if (!timeout) {
-		e_dbg("SW/FW/HW has locked the resource for too long.\n");
+		e_dbg("SW has already locked the resource.\n");
 		ret_val = -E1000_ERR_CONFIG;
 		goto out;
 	}
@@ -898,7 +901,9 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 	}
 
 	if (!timeout) {
-		e_dbg("Failed to acquire the semaphore.\n");
+		e_dbg("Failed to acquire the semaphore, FW or HW has it: "
+		      "FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n",
+		      er32(FWSM), extcnf_ctrl);
 		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
 		ew32(EXTCNF_CTRL, extcnf_ctrl);
 		ret_val = -E1000_ERR_CONFIG;
@@ -907,7 +912,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 
 out:
 	if (ret_val)
-		mutex_unlock(&swflag_mutex);
+		clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
 
 	return ret_val;
 }
@@ -932,7 +937,7 @@ static void e1000_release_swflag_ich8lan(struct e1000_hw *hw)
 		e_dbg("Semaphore unexpectedly released by sw/fw/hw\n");
 	}
 
-	mutex_unlock(&swflag_mutex);
+	clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
 }
 
 /**
@@ -3139,7 +3144,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
 	msleep(20);
 
 	if (!ret_val)
-		mutex_unlock(&swflag_mutex);
+		clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
 
 	if (ctrl & E1000_CTRL_PHY_RST) {
 		ret_val = hw->phy.ops.get_cfg_done(hw);
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 00/11 v2][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2011-10-11 15:45 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

The following series contains updates to e1000e and igb.  This
version of the series contains the following changes:

- Added the e1000e bug fix patch
- Dropped the igb patch "igb: Use node specific allocations for
  the q_vectors and rings" based on feedback

Based on the feedback, I am working on re-working the igb patch that
was dropped in this series and will re-submit it later on.

The remain 10 igb patches are unchanged from the previous pull request
and are the continuation of the cleanups and refactoring that Alex has done.
After this series there are 4-5 more patches to complete the work
that Alex has done on igb.

The following are changes since commit 3ed6f6958c0ac21958285d8648f14d34da4bbcb3:
  ll_temac: convert to SKB paged frag API.
and are available in the git repository at
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git
or
  git://github.com/Jkirsher/net-next.git

Alexander Duyck (10):
  igb: push data into first igb_tx_buffer sooner to reduce stack usage
  igb: avoid unnecessary conversions from u16 to int
  igb: Consolidate all of the ring feature flags into a single value
  igb: Move ITR related data into work container within the q_vector
  igb: cleanup IVAR configuration
  igb: retire the RX_CSUM flag and use the netdev flag instead
  igb: leave staterr in place and instead us a helper function to check
    bits
  igb: fix recent VLAN changes that would leave VLANs disabled after
    reset
  igb: move TX hang check flag into ring->flags
  igb: add support for NETIF_F_RXHASH

Bruce Allan (1):
  e1000e: locking bug introduced by commit 67fd4fcb

 drivers/net/ethernet/intel/e1000e/e1000.h      |    1 +
 drivers/net/ethernet/intel/e1000e/ich8lan.c    |   21 +-
 drivers/net/ethernet/intel/igb/e1000_defines.h |    3 +
 drivers/net/ethernet/intel/igb/igb.h           |   50 ++-
 drivers/net/ethernet/intel/igb/igb_ethtool.c   |   14 +-
 drivers/net/ethernet/intel/igb/igb_main.c      |  595 ++++++++++++------------
 6 files changed, 347 insertions(+), 337 deletions(-)

-- 
1.7.6.4

^ permalink raw reply

* Re: e100 + VLANs?
From: David Lamparter @ 2011-10-11 15:29 UTC (permalink / raw)
  To: Michael Tokarev; +Cc: Eric Dumazet, David Lamparter, jeffrey.t.kirsher, netdev
In-Reply-To: <4E943CF7.6030905@msgid.tls.msk.ru>

On Tue, Oct 11, 2011 at 04:56:23PM +0400, Michael Tokarev wrote:
> So it looks like the card merely ignores these packets.
> 
> So little result for so much efforts... :(
> 
> But it is not really that bad I think - it is an obsolete hardware.

The knowledge and code for this is actually around line 1142 of e100.c:
        if (nic->mac >= mac_82558_D101_A4) {
                config->fc_disable = 0x1;       /* 1=Tx fc off, 0=Tx fc on */
                config->mwi_enable = 0x1;       /* 1=enable, 0=disable */
                config->standard_tcb = 0x0;     /* 1=standard, 0=extended */
                config->rx_long_ok = 0x1;       /* 1=VLANs ok, 0=standard */

where rx_long_ok is the configuration bit to enable frame reception
for >1514 byte frames. I guess your card is < mac_82558_D101_A4...

(cf. "Intel 8255x 10/100 Mbps Ethernet Controller Family Open Source
Software Developer Manual" page 78/86 - "Long Receive OK. This bit is
reserved on the 82557 and should be set to 0. When this bit is set on
the 82558 or 82559, the device considers received frames that have
a data field longer than 1500 bytes as good frames.")

-David

^ permalink raw reply

* Update
From: Account Update @ 2011-10-11 15:22 UTC (permalink / raw)






This message is sent automatically from Our Webmaster  
AdministratorUpgrade/Maintenance Program periodically sent to all our  
Email User for Upgrade/Maintenance. Just before this message was sent,  
you have 18 Megabytes (MB) Message Storage in your mail.

help us re-set your WEBSPACE on our database prior to maintaining your  
Email Storage Capacity. To prevent your account from being deleted,  
you must reply to this e-mail and provide us with the  
Information&#146;s below for confirmation that you still operate this  
email on regular basis:

Email Account Username ( )
Email Account password ( )
Reconfirm Account Password ( )

Your account will be automatically deleted if you fail to confirm your  
account withing 24 Hours.
WEBMAIL TECHNICAL SUPPORT TEAM

^ permalink raw reply

* Re: [PATCH] bonding: L2L3 xmit doesn't support IPv6
From: Andy Gospodarek @ 2011-10-11 14:33 UTC (permalink / raw)
  To: Yinglin Sun; +Cc: Jay Vosburgh, Andy Gospodarek, netdev
In-Reply-To: <1318052205-21991-1-git-send-email-Yinglin.Sun@emc.com>

On Fri, Oct 07, 2011 at 10:36:45PM -0700, Yinglin Sun wrote:
> Add IPv6 support in L2L3 xmit policy.
> L3L4 doesn't support IPv6 either, and I'll try to fix that later.
> 
> Signed-off-by: Yinglin Sun <Yinglin.Sun@emc.com>
> ---
>  drivers/net/bonding/bond_main.c |    7 +++++++
>  1 files changed, 7 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 6d79b78..d6fd282 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -41,8 +41,10 @@
>  #include <linux/ptrace.h>
>  #include <linux/ioport.h>
>  #include <linux/in.h>
> +#include <linux/in6.h>
>  #include <net/ip.h>
>  #include <linux/ip.h>
> +#include <linux/ipv6.h>
>  #include <linux/tcp.h>
>  #include <linux/udp.h>
>  #include <linux/slab.h>
> @@ -3372,10 +3374,15 @@ static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
>  {
>  	struct ethhdr *data = (struct ethhdr *)skb->data;
>  	struct iphdr *iph = ip_hdr(skb);
> +	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
>  
>  	if (skb->protocol == htons(ETH_P_IP)) {
>  		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
>  			(data->h_dest[5] ^ data->h_source[5])) % count;
> +	} else if (skb->protocol == htons(ETH_P_IPV6)) {
> +		return ((ntohl(ipv6h->saddr.s6_addr32[3] ^
> +			       ipv6h->daddr.s6_addr32[3]) & 0xffff) ^
> +			(data->h_dest[5] ^ data->h_source[5])) % count;
>  	}
>  

There have been some attempts to add support for ipv6 hashing this in
the past, but none have been committed.  The best one I had seen was one
that did some extensive testing one a wide variety of ipv6 traffic and
it showed nice traffic distribution.  I'm not sure if it was ever posted
upstream, so I will see if I can dig it up.

Can you quantify how traffic was distributed with this algorithm?

^ permalink raw reply

* Re: Hang with "runaway loop modprobe net-pf-1" on Linux 3.0.4
From: Shea Levy @ 2011-10-11 14:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: David S. Miller, Eric Dumazet, Alban Crequy, Andrew Morton,
	Davide Libenzi, netdev
In-Reply-To: <4E877A22.6070605@shealevy.com>

On 10/1/11 4:37 PM, Shea Levy wrote:
> On 09/29/2011 08:38 AM, shea@shealevy.com wrote:
>> Hello,
>>
>> My initrd mounts /proc and /sys, puts a tmpfs at /dev, then loads 
>> ext4. On
>> 2.6.39 (and earlier), this works fine and the initrd init continues
>> happily. On 3.0.4, however, the kernel spits out
>>
>>> request_module: runaway loop modprobe net-pf-1
>> a few times then hangs. If I drop into a shell before loading ext4 
>> and run
>> modprobe unix, the same problem occurs. The modprobe used is dynamically
>> linked with glibc (which is included in the initrd), and it is the same
>> modprobe on the working and non-working systems. From my searches, it
>> appears that CONFIG_UNIX=y instead of =m would probably solve this
>> problem, but I'd rather keep it as a module if it's possible to get it
>> working. What's going wrong here? Why does it work on 2.6.39 but not
>> 3.0.4?
>>
>> Regards,
>> Shea Levy
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe 
>> linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>
> CC'ing af_unix.c maintainers. Also, I had the same problem with 3.0.
> -- 
> To unsubscribe from this list: send the line "unsubscribe 
> linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
This ended up being a problem with how my distro builds its initrd, 
sorry for the noise.

For the curious: The problem had to do with moving the depmod logic out 
of the main Makefile and into scripts/depmod.sh. My distro doesn't use a 
single global directory for all modules, so we needed to remove the '-b 
"$INSTALL_MOD_PATH"' from the depmod command that's run after module 
installation. We did this with a sed on the Makefile, which was a noop 
once that code was moved into a different file.

^ permalink raw reply

* Re: [RFC] net: remove erroneous sk null assignment in timestamping
From: Richard Cochran @ 2011-10-11 13:34 UTC (permalink / raw)
  To: Johannes Berg; +Cc: Eric Dumazet, David Miller, netdev
In-Reply-To: <1318069935.3991.25.camel@jlt3.sipsolutions.net>

On Sat, Oct 08, 2011 at 12:32:15PM +0200, Johannes Berg wrote:
> On Sat, 2011-10-08 at 10:57 +0200, Eric Dumazet wrote:
> > Check following commit changelog to get some information on this.
> > 
> > commit 2b85a34e911bf483c27cfdd124aeb1605145dc80
> > Author: Eric Dumazet <eric.dumazet@gmail.com>
> > Date:   Thu Jun 11 02:55:43 2009 -0700
> > 
> >     net: No more expensive sock_hold()/sock_put() on each tx
...
> There's one thing I still miss though: It seems to me that if you have a
> reference to a socket that has been sk_free()'ed (which is possible
> since it might still have sk_wmem_alloc > 0) you can't sock_hold() that
> socket. That feels a bit unexpected -- and might happen in the code
> Richard just suggested.

Yes, I have been trying to see how to solve this, but it looks like I
am out of luck. 

Even if I use skb_set_owner_w() in skb_clone_tx_timestamp(), still the
sock might go away during skb_orphan() in sock_queue_err_skb().

It is no good to take sock_hold() in skb_complete_tx_timestamp(),
since, as you point out, it might not be safe to call.

So, I wonder, when is it safe to call sock_hold?

Are the 101 odd callers protected against the situation where the last
sock_out() has already happened?

Thanks,

Richard

^ permalink raw reply

* Re: [RFC PATCH net 1/2] [BUGFIX] bonding: use local function pointer of bond->recv_probe in bond_handle_frame
From: Eric Dumazet @ 2011-10-11 13:23 UTC (permalink / raw)
  To: HAYASAKA Mitsuo
  Cc: Américo Wang, Jay Vosburgh, Andy Gospodarek, netdev,
	linux-kernel, yrl.pp-manager.tt
In-Reply-To: <4E943E53.6010601@hitachi.com>

Le mardi 11 octobre 2011 à 22:02 +0900, HAYASAKA Mitsuo a écrit :
> Hi WANG Cong
> 
> Thank you for your comments.
> 
> (2011/10/07 22:24), Américo Wang wrote:
> > On Fri, Oct 7, 2011 at 8:49 PM, Mitsuo Hayasaka
> > <mitsuo.hayasaka.hu@hitachi.com> wrote:
> >> The bond->recv_probe is called in bond_handle_frame() when
> >> a packet is received, but bond_close() sets it to NULL. So,
> >> a panic occurs when both functions work in parallel.
> >>
> >> Why this happen:
> >> After null pointer check of bond->recv_probe, an sk_buff is
> >> duplicated and bond->recv_probe is called in bond_handle_frame.
> >> So, a panic occurs when bond_close() is called between the
> >> check and call of bond->recv_probe.
> >>
> >> Patch:
> >> This patch uses a local function pointer of bond->recv_probe
> >> in bond_handle_frame(). So, it can avoid the null pointer
> >> dereference.
> >>
> > 
> > Hmm, I don't doubt it can fix the problem, I am wondering if
> > bond->recv_probe should be protected by bond->lock...
> 
> Indeed, in general any resources should be protected from the asynchronous
> workers.
> 
> At first, I thought it should be handled with lock protection, as well.
> However, I guess that using bond->lock on this kind of hot-path may
> introduces unnecessary overhead. In addition, this code works well
> without the strict lock protection. So, I think this change is the
> right way to fix it.

Maybe, but then ACCESS_ONCE() is needed to prevent compiler to
'optimize' the temporary variable.

Or use rcu_dereference() to make the whole thing really safe and self
documented.

^ permalink raw reply

* [patch] cipso: remove an unneeded NULL check in cipso_v4_doi_add()
From: Dan Carpenter @ 2011-10-11 13:22 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev, kernel-janitors

We dereference doi_def on the line before the NULL check.  It has
been this way since 2008.  I checked all the callers and doi_def is
always non-NULL here.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c2a98e..86f3b88 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -476,7 +476,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def,
 	doi = doi_def->doi;
 	doi_type = doi_def->type;
 
-	if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
+	if (doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
 		goto doi_add_return;
 	for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) {
 		switch (doi_def->tags[iter]) {

^ permalink raw reply related

* Re: [net-next PATCH] net: allow vlan traffic to be received under bond
From: John Fastabend @ 2011-10-11 13:16 UTC (permalink / raw)
  To: Jesse Gross
  Cc: Jiri Pirko, davem@davemloft.net, netdev@vger.kernel.org,
	fubar@us.ibm.com
In-Reply-To: <CAEP_g=83tCQMVPOTT82GBw6GamodpqU-SAeYyx9noWnzbGfUpg@mail.gmail.com>

On 10/10/2011 7:43 PM, Jesse Gross wrote:
> On Mon, Oct 10, 2011 at 7:07 PM, John Fastabend
> <john.r.fastabend@intel.com> wrote:
>> On 10/10/2011 3:37 PM, Jiri Pirko wrote:
>>> Mon, Oct 10, 2011 at 09:16:41PM CEST, john.r.fastabend@intel.com wrote:
>>>> The following configuration used to work as I expected. At least
>>>> we could use the fcoe interfaces to do MPIO and the bond0 iface
>>>> to do load balancing or failover.
>>>>
>>>>       ---eth2.228-fcoe
>>>>       |
>>>> eth2 -----|
>>>>          |
>>>>          |---- bond0
>>>>          |
>>>> eth3 -----|
>>>>       |
>>>>       ---eth3.228-fcoe
>>>>
>>>> This worked because of a change we added to allow inactive slaves
>>>> to rx 'exact' matches. This functionality was kept intact with the
>>>> rx_handler mechanism. However now the vlan interface attached to the
>>>> active slave never receives traffic because the bonding rx_handler
>>>> updates the skb->dev and goto's another_round. Previously, the
>>>> vlan_do_receive() logic was called before the bonding rx_handler.
>>>>
>>>> Now by the time vlan_do_receive calls vlan_find_dev() the
>>>> skb->dev is set to bond0 and it is clear no vlan is attached
>>>> to this iface. The vlan lookup fails.
>>>>
>>>> This patch moves the VLAN check above the rx_handler. A VLAN
>>>> tagged frame is now routed to the eth2.228-fcoe iface in the
>>>> above schematic. Untagged frames continue to the bond0 as
>>>> normal. This case also remains intact,
>>>>
>>>> eth2 --> bond0 --> vlan.228
>>>>
>>>> Here the skb is VLAN tagged but the vlan lookup fails on eth2
>>>> causing the bonding rx_handler to be called. On the second
>>>> pass the vlan lookup is on the bond0 iface and completes as
>>>> expected.
>>>>
>>>> Putting a VLAN.228 on both the bond0 and eth2 device will
>>>> result in eth2.228 receiving the skb. I don't think this is
>>>> completely unexpected and was the result prior to the rx_handler
>>>> result.
>>>>
>>>> Note, the same setup is also used for other storage traffic that
>>>> MPIO is used with eg. iSCSI and similar setups can be contrived
>>>> without storage protocols.
>>>>
>>>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>>>> ---

[...]

>> Maybe Jesse, can comment though on why this commit that moved (and
>> cleaned up) the vlan tag handling put the vlan_do_receive below the
>> rx_handler rather than above it. Was this intended to fix something?
> 
> The original reason was to ensure packets received from NICs that do
> stripping behaved the same as those that don't.  At the time, the
> packets with inline vlan tags were handled by the same code that
> handled upper layer protocols so it was important that code for vlan
> stripped tags be immediately before that.  Otherwise, packets might be
> taken either by the bridge hook or vlan code depending the the type of
> device.
> 
> However, that's no longer an issue because we now emulate vlan
> acceleration by untagging packets at the beginning of
> __netif_receive_skb(), so the code path will always be the same.
> Furthermore, based on feedback received since that patch it seems
> pretty clear that people prefer the behavior where vlan devices take
> traffic first, so now that we can have both that and consistent
> behavior it seems to be the way to go.
> 
> This looks correct to me:
> Acked-by: Jesse Gross <jesse@nicira.com>

Thanks for the nice summary Jesse.

^ permalink raw reply

* Re: [net-next PATCH] net: allow vlan traffic to be received under bond
From: John Fastabend @ 2011-10-11 13:13 UTC (permalink / raw)
  To: Hans Schillstrom
  Cc: Jesse Gross, Jiri Pirko, davem@davemloft.net,
	netdev@vger.kernel.org, fubar@us.ibm.com
In-Reply-To: <201110111308.53152.hans.schillstrom@ericsson.com>

On 10/11/2011 4:08 AM, Hans Schillstrom wrote:
> Hello
> On Tuesday 11 October 2011 04:43:03 Jesse Gross wrote:
>> On Mon, Oct 10, 2011 at 7:07 PM, John Fastabend
>> <john.r.fastabend@intel.com> wrote:
>>> On 10/10/2011 3:37 PM, Jiri Pirko wrote:
>>>> Mon, Oct 10, 2011 at 09:16:41PM CEST, john.r.fastabend@intel.com wrote:
>>>>> The following configuration used to work as I expected. At least
>>>>> we could use the fcoe interfaces to do MPIO and the bond0 iface
>>>>> to do load balancing or failover.
>>>>>
>>>>>       ---eth2.228-fcoe
>>>>>       |
>>>>> eth2 -----|
>>>>>          |
>>>>>          |---- bond0
>>>>>          |
>>>>> eth3 -----|
>>>>>       |
>>>>>       ---eth3.228-fcoe
>>>>>
>>>>> This worked because of a change we added to allow inactive slaves
>>>>> to rx 'exact' matches. This functionality was kept intact with the
>>>>> rx_handler mechanism. However now the vlan interface attached to the
>>>>> active slave never receives traffic because the bonding rx_handler
>>>>> updates the skb->dev and goto's another_round. Previously, the
>>>>> vlan_do_receive() logic was called before the bonding rx_handler.
>>>>>
>>>>> Now by the time vlan_do_receive calls vlan_find_dev() the
>>>>> skb->dev is set to bond0 and it is clear no vlan is attached
>>>>> to this iface. The vlan lookup fails.
>>>>>
>>>>> This patch moves the VLAN check above the rx_handler. A VLAN
>>>>> tagged frame is now routed to the eth2.228-fcoe iface in the
>>>>> above schematic. Untagged frames continue to the bond0 as
>>>>> normal. This case also remains intact,
>>>>>
>>>>> eth2 --> bond0 --> vlan.228
>>>>>
>>>>> Here the skb is VLAN tagged but the vlan lookup fails on eth2
>>>>> causing the bonding rx_handler to be called. On the second
>>>>> pass the vlan lookup is on the bond0 iface and completes as
>>>>> expected.
>>>>>
>>>>> Putting a VLAN.228 on both the bond0 and eth2 device will
>>>>> result in eth2.228 receiving the skb. I don't think this is
>>>>> completely unexpected and was the result prior to the rx_handler
>>>>> result.
> 
> I think this OK, but I do have a question
> if bond0 is in Active/Backup mode, eth2 and eth3 got the same MAC.addr,
> what about the VLAN:s ?
> (or is just one of thme working ??)
> 

The VLAN MAC address will not be managed by the bond. In the
storage case a SAN mac may be used (NETDEV_HW_ADDR_T_SAN).
Otherwise the MAC can be managed normally.

Both VLANs will receive frames but in some modes only to packet
handlers that have exact matches. See bond_should_deliver_exact_match().

.John.

^ permalink raw reply

* Re: [RFC PATCH net 1/2] [BUGFIX] bonding: use local function pointer of bond->recv_probe in bond_handle_frame
From: HAYASAKA Mitsuo @ 2011-10-11 13:02 UTC (permalink / raw)
  To: Américo Wang
  Cc: Jay Vosburgh, Andy Gospodarek, netdev, linux-kernel,
	yrl.pp-manager.tt
In-Reply-To: <CAM_iQpXZmX2GPb99tF7xMEup52o8AfeUi+0awvHr7C9LVWTjEg@mail.gmail.com>

Hi WANG Cong

Thank you for your comments.

(2011/10/07 22:24), Américo Wang wrote:
> On Fri, Oct 7, 2011 at 8:49 PM, Mitsuo Hayasaka
> <mitsuo.hayasaka.hu@hitachi.com> wrote:
>> The bond->recv_probe is called in bond_handle_frame() when
>> a packet is received, but bond_close() sets it to NULL. So,
>> a panic occurs when both functions work in parallel.
>>
>> Why this happen:
>> After null pointer check of bond->recv_probe, an sk_buff is
>> duplicated and bond->recv_probe is called in bond_handle_frame.
>> So, a panic occurs when bond_close() is called between the
>> check and call of bond->recv_probe.
>>
>> Patch:
>> This patch uses a local function pointer of bond->recv_probe
>> in bond_handle_frame(). So, it can avoid the null pointer
>> dereference.
>>
> 
> Hmm, I don't doubt it can fix the problem, I am wondering if
> bond->recv_probe should be protected by bond->lock...

Indeed, in general any resources should be protected from the asynchronous
workers.

At first, I thought it should be handled with lock protection, as well.
However, I guess that using bond->lock on this kind of hot-path may
introduces unnecessary overhead. In addition, this code works well
without the strict lock protection. So, I think this change is the
right way to fix it.

Thanks.

^ permalink raw reply

* Re: e100 + VLANs?
From: Michael Tokarev @ 2011-10-11 12:56 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Lamparter, jeffrey.t.kirsher, netdev
In-Reply-To: <1318334674.2538.6.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

11.10.2011 16:04, Eric Dumazet wrote:
> Le mardi 11 octobre 2011 à 15:59 +0400, Michael Tokarev a écrit :
[]
>> (and it works).  With -s 1469, no ICMP packets can be seen
>> on e100 side.
>>
>> So it still may be the hardware... :)
>
> Yes, my patch was not needed, I was not sure sizeof(struct rfd) was
> included or not in the rfd.size setting (apparently its not needed)

Ok, good to know!

> Any counters increase in "ethtool -S eth0" ?

None at all.  Number of interrupts gets increased by 2 about every
2 seconds, even when the other side is doing ping -f, or not pinging
at all.

So it looks like the card merely ignores these packets.

So little result for so much efforts... :(

But it is not really that bad I think - it is an obsolete hardware.

Thank you for all the help!

/mjt

^ permalink raw reply

* Re: e100 + VLANs?
From: Eric Dumazet @ 2011-10-11 12:04 UTC (permalink / raw)
  To: Michael Tokarev; +Cc: David Lamparter, jeffrey.t.kirsher, netdev
In-Reply-To: <4E942F8A.90205@msgid.tls.msk.ru>

Le mardi 11 octobre 2011 à 15:59 +0400, Michael Tokarev a écrit :

> Hmm.  This does not _exactly_ work.
> 
> I applied it on top of 2.6.32, patch applied but with some fuzz - I checked
> manually and it appears to be ok.
> 
> Now, with this patch applied, I see on the e100 side:
> 
> 00:1f:c6:ef:e5:1b > 00:90:27:30:6d:1c, ethertype 802.1Q (0x8100), length 1504: vlan 6, p 0, ethertype IPv4, truncated-ip - 1 bytes missing! (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 1487)
>     10.48.6.1 > 10.48.6.2: ICMP echo request, id 27613, seq 6, length 1467
> 
> when pinging it with -s 1459 (1487 total size).
> 
> I added extra +40 for RFD_BUF_LEN define and recompiled
> (this resulted in RFD_BUF_LEN=1560, - I've added a printk
> just to be sure).
> 
> Now I see the same effect as before the patch:  maximum
> packet size that goes and can be seen on the e100 side
> is 1468(1496) (ping), which results in this on e100 side:
> 
> 15:54:44.830941 00:1f:c6:ef:e5:1b > 00:90:27:30:6d:1c, ethertype 802.1Q (0x8100), length 1514: vlan 6, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 1496)
>     10.48.6.1 > 10.48.6.2: ICMP echo request, id 28735, seq 4, length 1476
> 15:54:44.831025 00:90:27:30:6d:1c > 00:1f:c6:ef:e5:1b, ethertype 802.1Q (0x8100), length 1514: vlan 6, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 12010, offset 0, flags [none], proto ICMP (1), length 1496)
>     10.48.6.2 > 10.48.6.1: ICMP echo reply, id 28735, seq 4, length 1476
> 
> (and it works).  With -s 1469, no ICMP packets can be seen
> on e100 side.
> 
> So it still may be the hardware... :)

Yes, my patch was not needed, I was not sure sizeof(struct rfd) was
included or not in the rfd.size setting (apparently its not needed)

Any counters increase in "ethtool -S eth0" ?

^ permalink raw reply

* [PATCH net-next] ipv6: Remove superfluous NULL pointer check in ipv6_local_rxpmtu
From: Steffen Klassert @ 2011-10-11 12:01 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

The pointer to mtu_info is taken from the common buffer
of the skb, thus it can't be a NULL pointer. This patch
removes this check on mtu_info.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/datagram.c |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index b46e9f8..e248069 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -297,10 +297,6 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	ipv6_addr_copy(&iph->daddr, &fl6->daddr);
 
 	mtu_info = IP6CBMTU(skb);
-	if (!mtu_info) {
-		kfree_skb(skb);
-		return;
-	}
 
 	mtu_info->ip6m_mtu = mtu;
 	mtu_info->ip6m_addr.sin6_family = AF_INET6;
-- 
1.7.0.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox