Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next-2.6 PATCH 11/20] igb: add a flags value to the ring
From: Jeff Kirsher @ 2009-10-28  1:52 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch adds a flags value to the ring that cleans up some of the last
remaining items from the ring in order to help seperate it from the adapter
struct.  By implementing these flags it becomes possible for different rings
to support different functions such as rx checksumming.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb.h         |   12 ++++-
 drivers/net/igb/igb_ethtool.c |   13 ++++--
 drivers/net/igb/igb_main.c    |   93 +++++++++++++++++++----------------------
 3 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 6a67fa2..0c30c5e 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -192,6 +192,8 @@ struct igb_ring {
 	unsigned int total_bytes;
 	unsigned int total_packets;
 
+	u32 flags;
+
 	union {
 		/* TX */
 		struct {
@@ -206,6 +208,13 @@ struct igb_ring {
 	};
 };
 
+#define IGB_RING_FLAG_RX_CSUM        0x00000001 /* RX CSUM enabled */
+#define IGB_RING_FLAG_RX_SCTP_CSUM   0x00000002 /* SCTP CSUM offload enabled */
+
+#define IGB_RING_FLAG_TX_CTX_IDX     0x00000001 /* HW requires context index */
+
+#define IGB_ADVTXD_DCMD (E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS)
+
 #define E1000_RX_DESC_ADV(R, i)	    \
 	(&(((union e1000_adv_rx_desc *)((R).desc))[i]))
 #define E1000_TX_DESC_ADV(R, i)	    \
@@ -245,7 +254,6 @@ struct igb_adapter {
 	/* TX */
 	struct igb_ring *tx_ring;      /* One per active queue */
 	unsigned long tx_queue_len;
-	u32 txd_cmd;
 	u32 gotc;
 	u64 gotc_old;
 	u64 tpt_old;
@@ -303,8 +311,6 @@ struct igb_adapter {
 #define IGB_FLAG_HAS_MSI           (1 << 0)
 #define IGB_FLAG_DCA_ENABLED       (1 << 1)
 #define IGB_FLAG_QUAD_PORT_A       (1 << 2)
-#define IGB_FLAG_NEED_CTX_IDX      (1 << 3)
-#define IGB_FLAG_RX_CSUM_DISABLED  (1 << 4)
 
 enum e1000_state_t {
 	__IGB_TESTING,
diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index f62430b..c44dede 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -279,17 +279,20 @@ static int igb_set_pauseparam(struct net_device *netdev,
 static u32 igb_get_rx_csum(struct net_device *netdev)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
-	return !(adapter->flags & IGB_FLAG_RX_CSUM_DISABLED);
+	return !!(adapter->rx_ring[0].flags & IGB_RING_FLAG_RX_CSUM);
 }
 
 static int igb_set_rx_csum(struct net_device *netdev, u32 data)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
+	int i;
 
-	if (data)
-		adapter->flags &= ~IGB_FLAG_RX_CSUM_DISABLED;
-	else
-		adapter->flags |= IGB_FLAG_RX_CSUM_DISABLED;
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		if (data)
+			adapter->rx_ring[i].flags |= IGB_RING_FLAG_RX_CSUM;
+		else
+			adapter->rx_ring[i].flags &= ~IGB_RING_FLAG_RX_CSUM;
+	}
 
 	return 0;
 }
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index bdd7bf0..00f3f2d 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -437,13 +437,21 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		ring->count = adapter->tx_ring_count;
 		ring->queue_index = i;
 		ring->pdev = adapter->pdev;
+		/* For 82575, context index must be unique per ring. */
+		if (adapter->hw.mac.type == e1000_82575)
+			ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
 	}
+
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		struct igb_ring *ring = &(adapter->rx_ring[i]);
 		ring->count = adapter->rx_ring_count;
 		ring->queue_index = i;
 		ring->pdev = adapter->pdev;
 		ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
+		ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
+		/* set flag indicating ring supports SCTP checksum offload */
+		if (adapter->hw.mac.type >= e1000_82576)
+			ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
 	}
 
 	igb_cache_ring_register(adapter);
@@ -1517,16 +1525,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 
 	igb_get_bus_info_pcie(hw);
 
-	/* set flags */
-	switch (hw->mac.type) {
-	case e1000_82575:
-		adapter->flags |= IGB_FLAG_NEED_CTX_IDX;
-		break;
-	case e1000_82576:
-	default:
-		break;
-	}
-
 	hw->phy.autoneg_wait_to_complete = false;
 	hw->mac.adaptive_ifs = true;
 
@@ -2149,9 +2147,6 @@ static void igb_configure_tx(struct igb_adapter *adapter)
 
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
-
-	/* Setup Transmit Descriptor Settings for eop descriptor */
-	adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS;
 }
 
 /**
@@ -3272,8 +3267,7 @@ set_itr_now:
 #define IGB_TX_FLAGS_VLAN_MASK	0xffff0000
 #define IGB_TX_FLAGS_VLAN_SHIFT	16
 
-static inline int igb_tso_adv(struct igb_adapter *adapter,
-			      struct igb_ring *tx_ring,
+static inline int igb_tso_adv(struct igb_ring *tx_ring,
 			      struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
 {
 	struct e1000_adv_tx_context_desc *context_desc;
@@ -3335,8 +3329,8 @@ static inline int igb_tso_adv(struct igb_adapter *adapter,
 	mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
 
 	/* For 82575, context index must be unique per ring. */
-	if (adapter->flags & IGB_FLAG_NEED_CTX_IDX)
-		mss_l4len_idx |= tx_ring->queue_index << 4;
+	if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
+		mss_l4len_idx |= tx_ring->reg_idx << 4;
 
 	context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
 	context_desc->seqnum_seed = 0;
@@ -3353,9 +3347,8 @@ static inline int igb_tso_adv(struct igb_adapter *adapter,
 	return true;
 }
 
-static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
-					struct igb_ring *tx_ring,
-					struct sk_buff *skb, u32 tx_flags)
+static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
+				   struct sk_buff *skb, u32 tx_flags)
 {
 	struct e1000_adv_tx_context_desc *context_desc;
 	struct pci_dev *pdev = tx_ring->pdev;
@@ -3417,11 +3410,9 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
 
 		context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
 		context_desc->seqnum_seed = 0;
-		if (adapter->flags & IGB_FLAG_NEED_CTX_IDX)
+		if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
 			context_desc->mss_l4len_idx =
-				cpu_to_le32(tx_ring->queue_index << 4);
-		else
-			context_desc->mss_l4len_idx = 0;
+				cpu_to_le32(tx_ring->reg_idx << 4);
 
 		buffer_info->time_stamp = jiffies;
 		buffer_info->next_to_watch = i;
@@ -3492,8 +3483,7 @@ static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
 	return count + 1;
 }
 
-static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
-				    struct igb_ring *tx_ring,
+static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
 				    int tx_flags, int count, u32 paylen,
 				    u8 hdr_len)
 {
@@ -3525,10 +3515,11 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
 		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
 	}
 
-	if ((adapter->flags & IGB_FLAG_NEED_CTX_IDX) &&
-	    (tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_TSO |
+	if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
+	    (tx_flags & (IGB_TX_FLAGS_CSUM |
+	                 IGB_TX_FLAGS_TSO |
 			 IGB_TX_FLAGS_VLAN)))
-		olinfo_status |= tx_ring->queue_index << 4;
+		olinfo_status |= tx_ring->reg_idx << 4;
 
 	olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
 
@@ -3545,7 +3536,7 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
 			i = 0;
 	}
 
-	tx_desc->read.cmd_type_len |= cpu_to_le32(adapter->txd_cmd);
+	tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
 	/* Force memory writes to complete before letting h/w
 	 * know there are new descriptors to fetch.  (Only
 	 * applicable for weak-ordered memory model archs,
@@ -3644,17 +3635,17 @@ static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 		tx_flags |= IGB_TX_FLAGS_IPV4;
 
 	first = tx_ring->next_to_use;
-	tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags,
-					      &hdr_len) : 0;
-
-	if (tso < 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
+	if (skb_is_gso(skb)) {
+		tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
+		if (tso < 0) {
+			dev_kfree_skb_any(skb);
+			return NETDEV_TX_OK;
+		}
 	}
 
 	if (tso)
 		tx_flags |= IGB_TX_FLAGS_TSO;
-	else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags) &&
+	else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
 	         (skb->ip_summed == CHECKSUM_PARTIAL))
 		tx_flags |= IGB_TX_FLAGS_CSUM;
 
@@ -3664,17 +3655,18 @@ static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 	 */
 	count = igb_tx_map_adv(tx_ring, skb, first);
 
-	if (count) {
-		igb_tx_queue_adv(adapter, tx_ring, tx_flags, count,
-			         skb->len, hdr_len);
-		/* Make sure there is space in the ring for the next send. */
-		igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
-	} else {
+	if (!count) {
 		dev_kfree_skb_any(skb);
 		tx_ring->buffer_info[first].time_stamp = 0;
 		tx_ring->next_to_use = first;
+		return NETDEV_TX_OK;
 	}
 
+	igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
+
+	/* Make sure there is space in the ring for the next send. */
+	igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+
 	return NETDEV_TX_OK;
 }
 
@@ -4800,15 +4792,15 @@ static void igb_receive_skb(struct igb_q_vector *q_vector,
 }
 
 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
-                                       struct igb_adapter *adapter,
 				       u32 status_err, struct sk_buff *skb)
 {
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
-	if ((status_err & E1000_RXD_STAT_IXSM) ||
-	    (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED))
+	if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
+	     (status_err & E1000_RXD_STAT_IXSM))
 		return;
+
 	/* TCP/UDP checksum error bit is set */
 	if (status_err &
 	    (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
@@ -4817,9 +4809,10 @@ static inline void igb_rx_checksum_adv(struct igb_ring *ring,
 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
 		 * packets, (aka let the stack check the crc32c)
 		 */
-		if (!((adapter->hw.mac.type == e1000_82576) &&
-		      (skb->len == 60)))
+		if ((skb->len == 60) &&
+		    (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
 			ring->rx_stats.csum_err++;
+
 		/* let the stack verify checksum errors */
 		return;
 	}
@@ -4827,7 +4820,7 @@ static inline void igb_rx_checksum_adv(struct igb_ring *ring,
 	if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err);
+	dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
 }
 
 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
@@ -4978,7 +4971,7 @@ send_up:
 		total_bytes += skb->len;
 		total_packets++;
 
-		igb_rx_checksum_adv(rx_ring, adapter, staterr, skb);
+		igb_rx_checksum_adv(rx_ring, staterr, skb);
 
 		skb->protocol = eth_type_trans(skb, netdev);
 		skb_record_rx_queue(skb, rx_ring->queue_index);


^ permalink raw reply related

* [net-next-2.6 PATCH 12/20] igb: place a pointer to the netdev struct in the ring itself
From: Jeff Kirsher @ 2009-10-28  1:53 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change adds a pointer to the netdev to the ring itself.  The idea being
at some point in the future it will be possible to support multiple netdevs
from a single adapter struct.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb.h      |    3 ++-
 drivers/net/igb/igb_main.c |   29 ++++++++++++++---------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 0c30c5e..2416c12 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -175,9 +175,10 @@ struct igb_q_vector {
 
 struct igb_ring {
 	struct igb_q_vector *q_vector; /* backlink to q_vector */
-	void *desc;                    /* descriptor ring memory */
+	struct net_device *netdev;     /* back pointer to net_device */
 	struct pci_dev *pdev;          /* pci device for dma mapping */
 	dma_addr_t dma;                /* phys address of the ring */
+	void *desc;                    /* descriptor ring memory */
 	unsigned int size;             /* length of desc. ring in bytes */
 	unsigned int count;            /* number of desc. in the ring */
 	u16 next_to_use;
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 00f3f2d..3dc8e88 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -101,7 +101,6 @@ static void igb_update_phy_info(unsigned long);
 static void igb_watchdog(unsigned long);
 static void igb_watchdog_task(struct work_struct *);
 static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *,
-					   struct net_device *,
 					   struct igb_ring *);
 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
 				      struct net_device *);
@@ -437,6 +436,7 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		ring->count = adapter->tx_ring_count;
 		ring->queue_index = i;
 		ring->pdev = adapter->pdev;
+		ring->netdev = adapter->netdev;
 		/* For 82575, context index must be unique per ring. */
 		if (adapter->hw.mac.type == e1000_82575)
 			ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
@@ -447,6 +447,7 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
 		ring->count = adapter->rx_ring_count;
 		ring->queue_index = i;
 		ring->pdev = adapter->pdev;
+		ring->netdev = adapter->netdev;
 		ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
 		ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
 		/* set flag indicating ring supports SCTP checksum offload */
@@ -3550,9 +3551,10 @@ static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
 	mmiowb();
 }
 
-static int __igb_maybe_stop_tx(struct net_device *netdev,
-			       struct igb_ring *tx_ring, int size)
+static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
 {
+	struct net_device *netdev = tx_ring->netdev;
+
 	netif_stop_subqueue(netdev, tx_ring->queue_index);
 
 	/* Herbert's original patch had:
@@ -3571,19 +3573,17 @@ static int __igb_maybe_stop_tx(struct net_device *netdev,
 	return 0;
 }
 
-static int igb_maybe_stop_tx(struct net_device *netdev,
-			     struct igb_ring *tx_ring, int size)
+static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
 {
 	if (igb_desc_unused(tx_ring) >= size)
 		return 0;
-	return __igb_maybe_stop_tx(netdev, tx_ring, size);
+	return __igb_maybe_stop_tx(tx_ring, size);
 }
 
 static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
-					   struct net_device *netdev,
 					   struct igb_ring *tx_ring)
 {
-	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 	unsigned int first;
 	unsigned int tx_flags = 0;
 	u8 hdr_len = 0;
@@ -3606,7 +3606,7 @@ static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 	 *       + 1 desc for skb->data,
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time */
-	if (igb_maybe_stop_tx(netdev, tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
+	if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
 		/* this is a hard error */
 		return NETDEV_TX_BUSY;
 	}
@@ -3665,7 +3665,7 @@ static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 	igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
 
 	/* Make sure there is space in the ring for the next send. */
-	igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
+	igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
 
 	return NETDEV_TX_OK;
 }
@@ -3684,7 +3684,7 @@ static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
 	 * to a flow.  Right now, performance is impacted slightly negatively
 	 * if using multiple tx queues.  If the stack breaks away from a
 	 * single qdisc implementation, we can look at this again. */
-	return igb_xmit_frame_ring_adv(skb, netdev, tx_ring);
+	return igb_xmit_frame_ring_adv(skb, tx_ring);
 }
 
 /**
@@ -4667,7 +4667,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct igb_ring *tx_ring = q_vector->tx_ring;
-	struct net_device *netdev = adapter->netdev;
+	struct net_device *netdev = tx_ring->netdev;
 	struct e1000_hw *hw = &adapter->hw;
 	struct igb_buffer *buffer_info;
 	struct sk_buff *skb;
@@ -4841,8 +4841,8 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
                                  int *work_done, int budget)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
-	struct net_device *netdev = adapter->netdev;
 	struct igb_ring *rx_ring = q_vector->rx_ring;
+	struct net_device *netdev = rx_ring->netdev;
 	struct e1000_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = rx_ring->pdev;
 	union e1000_adv_rx_desc *rx_desc , *next_rxd;
@@ -5018,8 +5018,7 @@ next_desc:
 static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
 				     int cleaned_count)
 {
-	struct igb_adapter *adapter = rx_ring->q_vector->adapter;
-	struct net_device *netdev = adapter->netdev;
+	struct net_device *netdev = rx_ring->netdev;
 	union e1000_adv_rx_desc *rx_desc;
 	struct igb_buffer *buffer_info;
 	struct sk_buff *skb;


^ permalink raw reply related

* [net-next-2.6 PATCH 13/20] igb: move the multiple receive queue configuration into seperate function
From: Jeff Kirsher @ 2009-10-28  1:53 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch moves the multiple receive queue configuration into a seperate
function from igb_configure_rx.  We can essentially do the configuration for
the multiple receive queues just prior to enabling the RX and this will allow
us to seperate the queue enablement from the receive queue layout
configuration.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb_main.c |  217 +++++++++++++++++++++++---------------------
 1 files changed, 111 insertions(+), 106 deletions(-)

diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 3dc8e88..ea05604 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -82,6 +82,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *);
 static int igb_setup_all_rx_resources(struct igb_adapter *);
 static void igb_free_all_tx_resources(struct igb_adapter *);
 static void igb_free_all_rx_resources(struct igb_adapter *);
+static void igb_setup_mrqc(struct igb_adapter *);
 void igb_update_stats(struct igb_adapter *);
 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
 static void __devexit igb_remove(struct pci_dev *pdev);
@@ -1115,6 +1116,7 @@ static void igb_configure(struct igb_adapter *adapter)
 	igb_restore_vlan(adapter);
 
 	igb_setup_tctl(adapter);
+	igb_setup_mrqc(adapter);
 	igb_setup_rctl(adapter);
 
 	igb_configure_tx(adapter);
@@ -1157,7 +1159,6 @@ int igb_up(struct igb_adapter *adapter)
 	if (adapter->msix_entries)
 		igb_configure_msix(adapter);
 
-	igb_vmm_control(adapter);
 	igb_set_vmolr(hw, adapter->vfs_allocated_count);
 
 	/* Clear any pending interrupts. */
@@ -1928,7 +1929,6 @@ static int igb_open(struct net_device *netdev)
 	 * clean_rx handler before we do so.  */
 	igb_configure(adapter);
 
-	igb_vmm_control(adapter);
 	igb_set_vmolr(hw, adapter->vfs_allocated_count);
 
 	err = igb_request_irq(adapter);
@@ -2217,6 +2217,111 @@ static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
 }
 
 /**
+ * igb_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ **/
+static void igb_setup_mrqc(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mrqc, rxcsum;
+	u32 j, num_rx_queues, shift = 0, shift2 = 0;
+	union e1000_reta {
+		u32 dword;
+		u8  bytes[4];
+	} reta;
+	static const u8 rsshash[40] = {
+		0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
+		0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
+		0xae, 0x7b, 0x30, 0xb4,	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
+		0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
+
+	/* Fill out hash function seeds */
+	for (j = 0; j < 10; j++) {
+		u32 rsskey = rsshash[(j * 4)];
+		rsskey |= rsshash[(j * 4) + 1] << 8;
+		rsskey |= rsshash[(j * 4) + 2] << 16;
+		rsskey |= rsshash[(j * 4) + 3] << 24;
+		array_wr32(E1000_RSSRK(0), j, rsskey);
+	}
+
+	num_rx_queues = adapter->num_rx_queues;
+
+	if (adapter->vfs_allocated_count) {
+		/* 82575 and 82576 supports 2 RSS queues for VMDq */
+		switch (hw->mac.type) {
+		case e1000_82576:
+			shift = 3;
+			num_rx_queues = 2;
+			break;
+		case e1000_82575:
+			shift = 2;
+			shift2 = 6;
+		default:
+			break;
+		}
+	} else {
+		if (hw->mac.type == e1000_82575)
+			shift = 6;
+	}
+
+	for (j = 0; j < (32 * 4); j++) {
+		reta.bytes[j & 3] = (j % num_rx_queues) << shift;
+		if (shift2)
+			reta.bytes[j & 3] |= num_rx_queues << shift2;
+		if ((j & 3) == 3)
+			wr32(E1000_RETA(j >> 2), reta.dword);
+	}
+
+	/*
+	 * Disable raw packet checksumming so that RSS hash is placed in
+	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
+	 * offloads as they are enabled by default
+	 */
+	rxcsum = rd32(E1000_RXCSUM);
+	rxcsum |= E1000_RXCSUM_PCSD;
+
+	if (adapter->hw.mac.type >= e1000_82576)
+		/* Enable Receive Checksum Offload for SCTP */
+		rxcsum |= E1000_RXCSUM_CRCOFL;
+
+	/* Don't need to set TUOFL or IPOFL, they default to 1 */
+	wr32(E1000_RXCSUM, rxcsum);
+
+	/* If VMDq is enabled then we set the appropriate mode for that, else
+	 * we default to RSS so that an RSS hash is calculated per packet even
+	 * if we are only using one queue */
+	if (adapter->vfs_allocated_count) {
+		if (hw->mac.type > e1000_82575) {
+			/* Set the default pool for the PF's first queue */
+			u32 vtctl = rd32(E1000_VT_CTL);
+			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
+				   E1000_VT_CTL_DISABLE_DEF_POOL);
+			vtctl |= adapter->vfs_allocated_count <<
+				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
+			wr32(E1000_VT_CTL, vtctl);
+		}
+		if (adapter->num_rx_queues > 1)
+			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
+		else
+			mrqc = E1000_MRQC_ENABLE_VMDQ;
+	} else {
+		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
+	}
+	igb_vmm_control(adapter);
+
+	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
+		 E1000_MRQC_RSS_FIELD_IPV4_TCP);
+	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
+		 E1000_MRQC_RSS_FIELD_IPV6_TCP);
+	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
+		 E1000_MRQC_RSS_FIELD_IPV6_UDP);
+	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
+		 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
+
+	wr32(E1000_MRQC, mrqc);
+}
+
+/**
  * igb_setup_rctl - configure the receive control registers
  * @adapter: Board private structure
  **/
@@ -2298,29 +2403,6 @@ static void igb_rlpml_set(struct igb_adapter *adapter)
 }
 
 /**
- * igb_configure_vt_default_pool - Configure VT default pool
- * @adapter: board private structure
- *
- * Configure the default pool
- **/
-static void igb_configure_vt_default_pool(struct igb_adapter *adapter)
-{
-	struct e1000_hw *hw = &adapter->hw;
-	u16 pf_id = adapter->vfs_allocated_count;
-	u32 vtctl;
-
-	/* not in sr-iov mode - do nothing */
-	if (!pf_id)
-		return;
-
-	vtctl = rd32(E1000_VT_CTL);
-	vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
-		   E1000_VT_CTL_DISABLE_DEF_POOL);
-	vtctl |= pf_id << E1000_VT_CTL_DEFAULT_POOL_SHIFT;
-	wr32(E1000_VT_CTL, vtctl);
-}
-
-/**
  * igb_configure_rx_ring - Configure a receive ring after Reset
  * @adapter: board private structure
  * @ring: receive ring to be configured
@@ -2391,85 +2473,8 @@ static void igb_configure_rx_ring(struct igb_adapter *adapter,
  **/
 static void igb_configure_rx(struct igb_adapter *adapter)
 {
-	struct e1000_hw *hw = &adapter->hw;
-	u32 rctl, rxcsum;
 	int i;
 
-	/* disable receives while setting up the descriptors */
-	rctl = rd32(E1000_RCTL);
-	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
-	wrfl();
-	mdelay(10);
-
-	if (adapter->itr_setting > 3)
-		wr32(E1000_ITR, adapter->itr);
-
-	/* Setup the HW Rx Head and Tail Descriptor Pointers and
-	 * the Base and Length of the Rx Descriptor Ring */
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
-
-	if (adapter->num_rx_queues > 1) {
-		u32 random[10];
-		u32 mrqc;
-		u32 j, shift;
-		union e1000_reta {
-			u32 dword;
-			u8  bytes[4];
-		} reta;
-
-		get_random_bytes(&random[0], 40);
-
-		if (hw->mac.type >= e1000_82576)
-			shift = 0;
-		else
-			shift = 6;
-		for (j = 0; j < (32 * 4); j++) {
-			reta.bytes[j & 3] =
-				adapter->rx_ring[(j % adapter->num_rx_queues)].reg_idx << shift;
-			if ((j & 3) == 3)
-				writel(reta.dword,
-				       hw->hw_addr + E1000_RETA(0) + (j & ~3));
-		}
-		if (adapter->vfs_allocated_count)
-			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
-		else
-			mrqc = E1000_MRQC_ENABLE_RSS_4Q;
-
-		/* Fill out hash function seeds */
-		for (j = 0; j < 10; j++)
-			array_wr32(E1000_RSSRK(0), j, random[j]);
-
-		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
-			 E1000_MRQC_RSS_FIELD_IPV4_TCP);
-		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
-			 E1000_MRQC_RSS_FIELD_IPV6_TCP);
-		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
-			 E1000_MRQC_RSS_FIELD_IPV6_UDP);
-		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
-			 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
-
-		wr32(E1000_MRQC, mrqc);
-	} else if (adapter->vfs_allocated_count) {
-		/* Enable multi-queue for sr-iov */
-		wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
-	}
-
-	/* Enable Receive Checksum Offload for TCP and UDP */
-	rxcsum = rd32(E1000_RXCSUM);
-	/* Disable raw packet checksumming */
-	rxcsum |= E1000_RXCSUM_PCSD;
-
-	if (adapter->hw.mac.type == e1000_82576)
-		/* Enable Receive Checksum Offload for SCTP */
-		rxcsum |= E1000_RXCSUM_CRCOFL;
-
-	/* Don't need to set TUOFL or IPOFL, they default to 1 */
-	wr32(E1000_RXCSUM, rxcsum);
-
-	/* Set the default pool for the PF's first queue */
-	igb_configure_vt_default_pool(adapter);
-
 	/* set UTA to appropriate mode */
 	igb_set_uta(adapter);
 
@@ -2477,10 +2482,10 @@ static void igb_configure_rx(struct igb_adapter *adapter)
 	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
 	                 adapter->vfs_allocated_count);
 
-	igb_rlpml_set(adapter);
-
-	/* Enable Receives */
-	wr32(E1000_RCTL, rctl);
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
 }
 
 /**


^ permalink raw reply related

* [net-next-2.6 PATCH 14/20] igb: delay VF reset notification until after interrupts are enabed
From: Jeff Kirsher @ 2009-10-28  1:53 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This update delays the VF reset notification until after interrupts are
enabled.  Otherwise there is a chance of having the VF try to reset itself too
soon and being ignored by the PF as a result.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb_main.c |   33 +++++++++++++++++++++++----------
 1 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index ea05604..24a119e 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1165,6 +1165,13 @@ int igb_up(struct igb_adapter *adapter)
 	rd32(E1000_ICR);
 	igb_irq_enable(adapter);
 
+	/* notify VFs that reset has been completed */
+	if (adapter->vfs_allocated_count) {
+		u32 reg_data = rd32(E1000_CTRL_EXT);
+		reg_data |= E1000_CTRL_EXT_PFRSTD;
+		wr32(E1000_CTRL_EXT, reg_data);
+	}
+
 	netif_tx_start_all_queues(adapter->netdev);
 
 	/* Fire a link change interrupt to start the watchdog. */
@@ -1948,6 +1955,13 @@ static int igb_open(struct net_device *netdev)
 
 	igb_irq_enable(adapter);
 
+	/* notify VFs that reset has been completed */
+	if (adapter->vfs_allocated_count) {
+		u32 reg_data = rd32(E1000_CTRL_EXT);
+		reg_data |= E1000_CTRL_EXT_PFRSTD;
+		wr32(E1000_CTRL_EXT, reg_data);
+	}
+
 	netif_tx_start_all_queues(netdev);
 
 	/* Fire a link status change interrupt to start the watchdog. */
@@ -5785,19 +5799,18 @@ static int igb_set_vf_mac(struct igb_adapter *adapter,
 static void igb_vmm_control(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
-	u32 reg_data;
 
-	if (!adapter->vfs_allocated_count)
+	/* replication is not supported for 82575 */
+	if (hw->mac.type == e1000_82575)
 		return;
 
-	/* VF's need PF reset indication before they
-	 * can send/receive mail */
-	reg_data = rd32(E1000_CTRL_EXT);
-	reg_data |= E1000_CTRL_EXT_PFRSTD;
-	wr32(E1000_CTRL_EXT, reg_data);
-
-	igb_vmdq_set_loopback_pf(hw, true);
-	igb_vmdq_set_replication_pf(hw, true);
+	if (adapter->vfs_allocated_count) {
+		igb_vmdq_set_loopback_pf(hw, true);
+		igb_vmdq_set_replication_pf(hw, true);
+	} else {
+		igb_vmdq_set_loopback_pf(hw, false);
+		igb_vmdq_set_replication_pf(hw, false);
+	}
 }
 
 /* igb_main.c */


^ permalink raw reply related

* [net-next-2.6 PATCH 15/20] igb: setup vlan tag replication stripping in igb_vmm_control
From: Jeff Kirsher @ 2009-10-28  1:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This update adds vlan tag stripping for inter-vf communications to the
igb_vmm_control configuration function.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/e1000_82575.h |    9 +++++++++
 drivers/net/igb/igb_main.c    |   11 +++++++++++
 2 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/drivers/net/igb/e1000_82575.h b/drivers/net/igb/e1000_82575.h
index 7be3a0b..9418683 100644
--- a/drivers/net/igb/e1000_82575.h
+++ b/drivers/net/igb/e1000_82575.h
@@ -203,6 +203,15 @@ struct e1000_adv_tx_context_desc {
 #define E1000_IOVCTL 0x05BBC
 #define E1000_IOVCTL_REUSE_VFQ 0x00000001
 
+#define E1000_RPLOLR_STRVLAN   0x40000000
+#define E1000_RPLOLR_STRCRC    0x80000000
+
+#define E1000_DTXCTL_8023LL     0x0004
+#define E1000_DTXCTL_VLAN_ADDED 0x0008
+#define E1000_DTXCTL_OOS_ENABLE 0x0010
+#define E1000_DTXCTL_MDP_EN     0x0020
+#define E1000_DTXCTL_SPOOF_INT  0x0040
+
 #define ALL_QUEUES   0xFFFF
 
 void igb_vmdq_set_loopback_pf(struct e1000_hw *, bool);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 24a119e..9dd290c 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -5799,11 +5799,22 @@ static int igb_set_vf_mac(struct igb_adapter *adapter,
 static void igb_vmm_control(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
+	u32 reg;
 
 	/* replication is not supported for 82575 */
 	if (hw->mac.type == e1000_82575)
 		return;
 
+	/* enable replication vlan tag stripping */
+	reg = rd32(E1000_RPLOLR);
+	reg |= E1000_RPLOLR_STRVLAN;
+	wr32(E1000_RPLOLR, reg);
+
+	/* notify HW that the MAC is adding vlan tags */
+	reg = rd32(E1000_DTXCTL);
+	reg |= E1000_DTXCTL_VLAN_ADDED;
+	wr32(E1000_DTXCTL, reg);
+
 	if (adapter->vfs_allocated_count) {
 		igb_vmdq_set_loopback_pf(hw, true);
 		igb_vmdq_set_replication_pf(hw, true);


^ permalink raw reply related

* [net-next-2.6 PATCH 16/20] igb: re-use ring configuration code in ethtool testing
From: Jeff Kirsher @ 2009-10-28  1:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

Since all of the ring code is now specific to the ring instead of the adapter
struct it is possible to cut a large section of code out of the ethtool
testing configuraiton since we can just use the existing functions to
configure the rings.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb.h         |   14 +++
 drivers/net/igb/igb_ethtool.c |  185 +++++++++--------------------------------
 drivers/net/igb/igb_main.c    |   29 ++----
 3 files changed, 60 insertions(+), 168 deletions(-)

diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 2416c12..8b189a0 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -223,6 +223,15 @@ struct igb_ring {
 #define E1000_TX_CTXTDESC_ADV(R, i)	    \
 	(&(((struct e1000_adv_tx_context_desc *)((R).desc))[i]))
 
+/* igb_desc_unused - calculate if we have unused descriptors */
+static inline int igb_desc_unused(struct igb_ring *ring)
+{
+	if (ring->next_to_clean > ring->next_to_use)
+		return ring->next_to_clean - ring->next_to_use - 1;
+
+	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
 /* board specific private data structure */
 
 struct igb_adapter {
@@ -336,6 +345,11 @@ extern int igb_setup_tx_resources(struct igb_ring *);
 extern int igb_setup_rx_resources(struct igb_ring *);
 extern void igb_free_tx_resources(struct igb_ring *);
 extern void igb_free_rx_resources(struct igb_ring *);
+extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
+extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
+extern void igb_setup_tctl(struct igb_adapter *);
+extern void igb_setup_rctl(struct igb_adapter *);
+extern void igb_alloc_rx_buffers_adv(struct igb_ring *, int);
 extern void igb_update_stats(struct igb_adapter *);
 extern void igb_set_ethtool_ops(struct net_device *);
 
diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index c44dede..80afd8a 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -1245,116 +1245,49 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
 
 static void igb_free_desc_rings(struct igb_adapter *adapter)
 {
-	struct igb_ring *tx_ring = &adapter->test_tx_ring;
-	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-	int i;
-
-	if (tx_ring->desc && tx_ring->buffer_info) {
-		for (i = 0; i < tx_ring->count; i++) {
-			struct igb_buffer *buf = &(tx_ring->buffer_info[i]);
-			if (buf->dma)
-				pci_unmap_single(pdev, buf->dma, buf->length,
-						 PCI_DMA_TODEVICE);
-			if (buf->skb)
-				dev_kfree_skb(buf->skb);
-		}
-	}
-
-	if (rx_ring->desc && rx_ring->buffer_info) {
-		for (i = 0; i < rx_ring->count; i++) {
-			struct igb_buffer *buf = &(rx_ring->buffer_info[i]);
-			if (buf->dma)
-				pci_unmap_single(pdev, buf->dma,
-						 IGB_RXBUFFER_2048,
-						 PCI_DMA_FROMDEVICE);
-			if (buf->skb)
-				dev_kfree_skb(buf->skb);
-		}
-	}
-
-	if (tx_ring->desc) {
-		pci_free_consistent(pdev, tx_ring->size, tx_ring->desc,
-				    tx_ring->dma);
-		tx_ring->desc = NULL;
-	}
-	if (rx_ring->desc) {
-		pci_free_consistent(pdev, rx_ring->size, rx_ring->desc,
-				    rx_ring->dma);
-		rx_ring->desc = NULL;
-	}
-
-	kfree(tx_ring->buffer_info);
-	tx_ring->buffer_info = NULL;
-	kfree(rx_ring->buffer_info);
-	rx_ring->buffer_info = NULL;
-
-	return;
+	igb_free_tx_resources(&adapter->test_tx_ring);
+	igb_free_rx_resources(&adapter->test_rx_ring);
 }
 
 static int igb_setup_desc_rings(struct igb_adapter *adapter)
 {
-	struct e1000_hw *hw = &adapter->hw;
 	struct igb_ring *tx_ring = &adapter->test_tx_ring;
 	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-	struct igb_buffer *buffer_info;
-	u32 rctl;
+	struct e1000_hw *hw = &adapter->hw;
 	int i, ret_val;
 
 	/* Setup Tx descriptor ring and Tx buffers */
+	tx_ring->count = IGB_DEFAULT_TXD;
+	tx_ring->pdev = adapter->pdev;
+	tx_ring->netdev = adapter->netdev;
+	tx_ring->reg_idx = adapter->vfs_allocated_count;
 
-	if (!tx_ring->count)
-		tx_ring->count = IGB_DEFAULT_TXD;
-
-	tx_ring->buffer_info = kcalloc(tx_ring->count,
-				       sizeof(struct igb_buffer),
-				       GFP_KERNEL);
-	if (!tx_ring->buffer_info) {
+	if (igb_setup_tx_resources(tx_ring)) {
 		ret_val = 1;
 		goto err_nomem;
 	}
 
-	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
-	tx_ring->size = ALIGN(tx_ring->size, 4096);
-	tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
-					     &tx_ring->dma);
-	if (!tx_ring->desc) {
-		ret_val = 2;
-		goto err_nomem;
-	}
-	tx_ring->next_to_use = tx_ring->next_to_clean = 0;
-
-	wr32(E1000_TDBAL(0),
-			((u64) tx_ring->dma & 0x00000000FFFFFFFF));
-	wr32(E1000_TDBAH(0), ((u64) tx_ring->dma >> 32));
-	wr32(E1000_TDLEN(0),
-			tx_ring->count * sizeof(union e1000_adv_tx_desc));
-	wr32(E1000_TDH(0), 0);
-	wr32(E1000_TDT(0), 0);
-	wr32(E1000_TCTL,
-			E1000_TCTL_PSP | E1000_TCTL_EN |
-			E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT |
-			E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT);
+	igb_setup_tctl(adapter);
+	igb_configure_tx_ring(adapter, tx_ring);
 
 	for (i = 0; i < tx_ring->count; i++) {
 		union e1000_adv_tx_desc *tx_desc;
-		struct sk_buff *skb;
 		unsigned int size = 1024;
+		struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
 
-		tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
-		skb = alloc_skb(size, GFP_KERNEL);
 		if (!skb) {
-			ret_val = 3;
+			ret_val = 2;
 			goto err_nomem;
 		}
 		skb_put(skb, size);
-		buffer_info = &tx_ring->buffer_info[i];
-		buffer_info->skb = skb;
-		buffer_info->length = skb->len;
-		buffer_info->dma = pci_map_single(pdev, skb->data, skb->len,
-		                                  PCI_DMA_TODEVICE);
-		tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
+		tx_ring->buffer_info[i].skb = skb;
+		tx_ring->buffer_info[i].length = skb->len;
+		tx_ring->buffer_info[i].dma =
+			pci_map_single(tx_ring->pdev, skb->data, skb->len,
+				       PCI_DMA_TODEVICE);
+		tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
+		tx_desc->read.buffer_addr =
+			cpu_to_le64(tx_ring->buffer_info[i].dma);
 		tx_desc->read.olinfo_status = cpu_to_le32(skb->len) <<
 		                              E1000_ADVTXD_PAYLEN_SHIFT;
 		tx_desc->read.cmd_type_len = cpu_to_le32(skb->len);
@@ -1366,62 +1299,25 @@ static int igb_setup_desc_rings(struct igb_adapter *adapter)
 	}
 
 	/* Setup Rx descriptor ring and Rx buffers */
-
-	if (!rx_ring->count)
-		rx_ring->count = IGB_DEFAULT_RXD;
-
-	rx_ring->buffer_info = kcalloc(rx_ring->count,
-				       sizeof(struct igb_buffer),
-				       GFP_KERNEL);
-	if (!rx_ring->buffer_info) {
-		ret_val = 4;
-		goto err_nomem;
-	}
-
-	rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
-	rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
-					     &rx_ring->dma);
-	if (!rx_ring->desc) {
-		ret_val = 5;
+	rx_ring->count = IGB_DEFAULT_RXD;
+	rx_ring->pdev = adapter->pdev;
+	rx_ring->netdev = adapter->netdev;
+	rx_ring->rx_buffer_len = IGB_RXBUFFER_2048;
+	rx_ring->reg_idx = adapter->vfs_allocated_count;
+
+	if (igb_setup_rx_resources(rx_ring)) {
+		ret_val = 3;
 		goto err_nomem;
 	}
-	rx_ring->next_to_use = rx_ring->next_to_clean = 0;
 
-	rctl = rd32(E1000_RCTL);
-	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
-	wr32(E1000_RDBAL(0),
-			((u64) rx_ring->dma & 0xFFFFFFFF));
-	wr32(E1000_RDBAH(0),
-			((u64) rx_ring->dma >> 32));
-	wr32(E1000_RDLEN(0), rx_ring->size);
-	wr32(E1000_RDH(0), 0);
-	wr32(E1000_RDT(0), 0);
-	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
-	rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
-		(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
-	wr32(E1000_RCTL, rctl);
-	wr32(E1000_SRRCTL(0), E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
+	/* set the default queue to queue 0 of PF */
+	wr32(E1000_MRQC, adapter->vfs_allocated_count << 3);
 
-	for (i = 0; i < rx_ring->count; i++) {
-		union e1000_adv_rx_desc *rx_desc;
-		struct sk_buff *skb;
+	/* enable receive ring */
+	igb_setup_rctl(adapter);
+	igb_configure_rx_ring(adapter, rx_ring);
 
-		buffer_info = &rx_ring->buffer_info[i];
-		rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
-		skb = alloc_skb(IGB_RXBUFFER_2048 + NET_IP_ALIGN,
-				GFP_KERNEL);
-		if (!skb) {
-			ret_val = 6;
-			goto err_nomem;
-		}
-		skb_reserve(skb, NET_IP_ALIGN);
-		buffer_info->skb = skb;
-		buffer_info->dma = pci_map_single(pdev, skb->data,
-		                                  IGB_RXBUFFER_2048,
-		                                  PCI_DMA_FROMDEVICE);
-		rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
-		memset(skb->data, 0x00, skb->len);
-	}
+	igb_alloc_rx_buffers_adv(rx_ring, igb_desc_unused(rx_ring));
 
 	return 0;
 
@@ -1576,15 +1472,12 @@ static int igb_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size)
 
 static int igb_run_loopback_test(struct igb_adapter *adapter)
 {
-	struct e1000_hw *hw = &adapter->hw;
 	struct igb_ring *tx_ring = &adapter->test_tx_ring;
 	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-	int i, j, k, l, lc, good_cnt;
-	int ret_val = 0;
+	int i, j, k, l, lc, good_cnt, ret_val = 0;
 	unsigned long time;
 
-	wr32(E1000_RDT(0), rx_ring->count - 1);
+	writel(rx_ring->count - 1, rx_ring->tail);
 
 	/* Calculate the loop count based on the largest descriptor ring
 	 * The idea is to wrap the largest ring a number of times using 64
@@ -1601,7 +1494,7 @@ static int igb_run_loopback_test(struct igb_adapter *adapter)
 		for (i = 0; i < 64; i++) { /* send the packets */
 			igb_create_lbtest_frame(tx_ring->buffer_info[k].skb,
 						1024);
-			pci_dma_sync_single_for_device(pdev,
+			pci_dma_sync_single_for_device(tx_ring->pdev,
 				tx_ring->buffer_info[k].dma,
 				tx_ring->buffer_info[k].length,
 				PCI_DMA_TODEVICE);
@@ -1609,12 +1502,12 @@ static int igb_run_loopback_test(struct igb_adapter *adapter)
 			if (k == tx_ring->count)
 				k = 0;
 		}
-		wr32(E1000_TDT(0), k);
+		writel(k, tx_ring->tail);
 		msleep(200);
 		time = jiffies; /* set the start time for the receive */
 		good_cnt = 0;
 		do { /* receive the sent packets */
-			pci_dma_sync_single_for_cpu(pdev,
+			pci_dma_sync_single_for_cpu(rx_ring->pdev,
 					rx_ring->buffer_info[l].dma,
 					IGB_RXBUFFER_2048,
 					PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 9dd290c..576a4fa 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -91,8 +91,6 @@ static int igb_open(struct net_device *);
 static int igb_close(struct net_device *);
 static void igb_configure_tx(struct igb_adapter *);
 static void igb_configure_rx(struct igb_adapter *);
-static void igb_setup_tctl(struct igb_adapter *);
-static void igb_setup_rctl(struct igb_adapter *);
 static void igb_clean_all_tx_rings(struct igb_adapter *);
 static void igb_clean_all_rx_rings(struct igb_adapter *);
 static void igb_clean_tx_ring(struct igb_ring *);
@@ -120,7 +118,6 @@ static void igb_setup_dca(struct igb_adapter *);
 static bool igb_clean_tx_irq(struct igb_q_vector *);
 static int igb_poll(struct napi_struct *, int);
 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
-static void igb_alloc_rx_buffers_adv(struct igb_ring *, int);
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
@@ -310,17 +307,6 @@ static char *igb_get_time_str(struct igb_adapter *adapter,
 #endif
 
 /**
- * igb_desc_unused - calculate if we have unused descriptors
- **/
-static int igb_desc_unused(struct igb_ring *ring)
-{
-	if (ring->next_to_clean > ring->next_to_use)
-		return ring->next_to_clean - ring->next_to_use - 1;
-
-	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
-}
-
-/**
  * igb_init_module - Driver Registration Routine
  *
  * igb_init_module is the first routine called when the driver is
@@ -2087,7 +2073,7 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
  * igb_setup_tctl - configure the transmit control registers
  * @adapter: Board private structure
  **/
-static void igb_setup_tctl(struct igb_adapter *adapter)
+void igb_setup_tctl(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 tctl;
@@ -2116,8 +2102,8 @@ static void igb_setup_tctl(struct igb_adapter *adapter)
  *
  * Configure a transmit ring after a reset.
  **/
-static void igb_configure_tx_ring(struct igb_adapter *adapter,
-                                  struct igb_ring *ring)
+void igb_configure_tx_ring(struct igb_adapter *adapter,
+                           struct igb_ring *ring)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 txdctl;
@@ -2339,7 +2325,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter)
  * igb_setup_rctl - configure the receive control registers
  * @adapter: Board private structure
  **/
-static void igb_setup_rctl(struct igb_adapter *adapter)
+void igb_setup_rctl(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 rctl;
@@ -2423,8 +2409,8 @@ static void igb_rlpml_set(struct igb_adapter *adapter)
  *
  * Configure the Rx unit of the MAC after a reset.
  **/
-static void igb_configure_rx_ring(struct igb_adapter *adapter,
-                                  struct igb_ring *ring)
+void igb_configure_rx_ring(struct igb_adapter *adapter,
+                           struct igb_ring *ring)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u64 rdba = ring->dma;
@@ -5034,8 +5020,7 @@ next_desc:
  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
  * @adapter: address of board private structure
  **/
-static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
-				     int cleaned_count)
+void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
 {
 	struct net_device *netdev = rx_ring->netdev;
 	union e1000_adv_rx_desc *rx_desc;


^ permalink raw reply related

* [net-next-2.6 PATCH 17/20] igb: make tx ring map and free functionality non-static
From: Jeff Kirsher @ 2009-10-28  1:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change makes a minor change to the xmit_frame_ring_adv funcition in that
it moves 2 checks from it into the xmit_frame_adv since the checks were not
ring specific.  In addition it exports the xmit_frame_ring_adv and the
unmap_and_free_tx_resource calls so that they can be used by other code such
as the ethtool loopback testing calls.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb.h      |    3 +++
 drivers/net/igb/igb_main.c |   35 ++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 8b189a0..6c35c90 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -349,6 +349,9 @@ extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
 extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
 extern void igb_setup_tctl(struct igb_adapter *);
 extern void igb_setup_rctl(struct igb_adapter *);
+extern netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *, struct igb_ring *);
+extern void igb_unmap_and_free_tx_resource(struct igb_ring *,
+					   struct igb_buffer *);
 extern void igb_alloc_rx_buffers_adv(struct igb_ring *, int);
 extern void igb_update_stats(struct igb_adapter *);
 extern void igb_set_ethtool_ops(struct net_device *);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 576a4fa..c9fda11 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -99,10 +99,7 @@ static void igb_set_rx_mode(struct net_device *);
 static void igb_update_phy_info(unsigned long);
 static void igb_watchdog(unsigned long);
 static void igb_watchdog_task(struct work_struct *);
-static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *,
-					   struct igb_ring *);
-static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
-				      struct net_device *);
+static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
 static struct net_device_stats *igb_get_stats(struct net_device *);
 static int igb_change_mtu(struct net_device *, int);
 static int igb_set_mac(struct net_device *, void *);
@@ -2521,8 +2518,8 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
 		igb_free_tx_resources(&adapter->tx_ring[i]);
 }
 
-static void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
-					   struct igb_buffer *buffer_info)
+void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
+				    struct igb_buffer *buffer_info)
 {
 	buffer_info->dma = 0;
 	if (buffer_info->skb) {
@@ -3585,8 +3582,8 @@ static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
 	return __igb_maybe_stop_tx(tx_ring, size);
 }
 
-static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
-					   struct igb_ring *tx_ring)
+netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
+				    struct igb_ring *tx_ring)
 {
 	struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 	unsigned int first;
@@ -3596,16 +3593,6 @@ static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
 	int tso = 0;
 	union skb_shared_tx *shtx;
 
-	if (test_bit(__IGB_DOWN, &adapter->state)) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	if (skb->len <= 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
 	/* need: 1 descriptor per page,
 	 *       + 2 desc gap to keep tail from touching head,
 	 *       + 1 desc for skb->data,
@@ -3680,8 +3667,18 @@ static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct igb_ring *tx_ring;
-
 	int r_idx = 0;
+
+	if (test_bit(__IGB_DOWN, &adapter->state)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
 	r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
 	tx_ring = adapter->multi_tx_table[r_idx];
 


^ permalink raw reply related

* [net-next-2.6 PATCH 18/20] igb: make ethtool use core xmit map and free functionality
From: Jeff Kirsher @ 2009-10-28  1:55 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change adds a clean_rx/tx_irq type function call to the ethtool loopback
testing which allows us to test the core transmit and receive functionality in
the driver.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb_ethtool.c |  156 +++++++++++++++++++++++------------------
 1 files changed, 89 insertions(+), 67 deletions(-)

diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index 80afd8a..aa05f00 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -1254,7 +1254,7 @@ static int igb_setup_desc_rings(struct igb_adapter *adapter)
 	struct igb_ring *tx_ring = &adapter->test_tx_ring;
 	struct igb_ring *rx_ring = &adapter->test_rx_ring;
 	struct e1000_hw *hw = &adapter->hw;
-	int i, ret_val;
+	int ret_val;
 
 	/* Setup Tx descriptor ring and Tx buffers */
 	tx_ring->count = IGB_DEFAULT_TXD;
@@ -1270,34 +1270,6 @@ static int igb_setup_desc_rings(struct igb_adapter *adapter)
 	igb_setup_tctl(adapter);
 	igb_configure_tx_ring(adapter, tx_ring);
 
-	for (i = 0; i < tx_ring->count; i++) {
-		union e1000_adv_tx_desc *tx_desc;
-		unsigned int size = 1024;
-		struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
-
-		if (!skb) {
-			ret_val = 2;
-			goto err_nomem;
-		}
-		skb_put(skb, size);
-		tx_ring->buffer_info[i].skb = skb;
-		tx_ring->buffer_info[i].length = skb->len;
-		tx_ring->buffer_info[i].dma =
-			pci_map_single(tx_ring->pdev, skb->data, skb->len,
-				       PCI_DMA_TODEVICE);
-		tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
-		tx_desc->read.buffer_addr =
-			cpu_to_le64(tx_ring->buffer_info[i].dma);
-		tx_desc->read.olinfo_status = cpu_to_le32(skb->len) <<
-		                              E1000_ADVTXD_PAYLEN_SHIFT;
-		tx_desc->read.cmd_type_len = cpu_to_le32(skb->len);
-		tx_desc->read.cmd_type_len |= cpu_to_le32(E1000_TXD_CMD_EOP |
-		                                          E1000_TXD_CMD_IFCS |
-		                                          E1000_TXD_CMD_RS |
-		                                          E1000_ADVTXD_DTYP_DATA |
-		                                          E1000_ADVTXD_DCMD_DEXT);
-	}
-
 	/* Setup Rx descriptor ring and Rx buffers */
 	rx_ring->count = IGB_DEFAULT_RXD;
 	rx_ring->pdev = adapter->pdev;
@@ -1470,14 +1442,78 @@ static int igb_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size)
 	return 13;
 }
 
+static int igb_clean_test_rings(struct igb_ring *rx_ring,
+                                struct igb_ring *tx_ring,
+                                unsigned int size)
+{
+	union e1000_adv_rx_desc *rx_desc;
+	struct igb_buffer *buffer_info;
+	int rx_ntc, tx_ntc, count = 0;
+	u32 staterr;
+
+	/* initialize next to clean and descriptor values */
+	rx_ntc = rx_ring->next_to_clean;
+	tx_ntc = tx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC_ADV(*rx_ring, rx_ntc);
+	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+
+	while (staterr & E1000_RXD_STAT_DD) {
+		/* check rx buffer */
+		buffer_info = &rx_ring->buffer_info[rx_ntc];
+
+		/* unmap rx buffer, will be remapped by alloc_rx_buffers */
+		pci_unmap_single(rx_ring->pdev,
+		                 buffer_info->dma,
+				 rx_ring->rx_buffer_len,
+				 PCI_DMA_FROMDEVICE);
+		buffer_info->dma = 0;
+
+		/* verify contents of skb */
+		if (!igb_check_lbtest_frame(buffer_info->skb, size))
+			count++;
+
+		/* unmap buffer on tx side */
+		buffer_info = &tx_ring->buffer_info[tx_ntc];
+		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
+
+		/* increment rx/tx next to clean counters */
+		rx_ntc++;
+		if (rx_ntc == rx_ring->count)
+			rx_ntc = 0;
+		tx_ntc++;
+		if (tx_ntc == tx_ring->count)
+			tx_ntc = 0;
+
+		/* fetch next descriptor */
+		rx_desc = E1000_RX_DESC_ADV(*rx_ring, rx_ntc);
+		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+	}
+
+	/* re-map buffers to ring, store next to clean values */
+	igb_alloc_rx_buffers_adv(rx_ring, count);
+	rx_ring->next_to_clean = rx_ntc;
+	tx_ring->next_to_clean = tx_ntc;
+
+	return count;
+}
+
 static int igb_run_loopback_test(struct igb_adapter *adapter)
 {
 	struct igb_ring *tx_ring = &adapter->test_tx_ring;
 	struct igb_ring *rx_ring = &adapter->test_rx_ring;
-	int i, j, k, l, lc, good_cnt, ret_val = 0;
-	unsigned long time;
+	int i, j, lc, good_cnt, ret_val = 0;
+	unsigned int size = 1024;
+	netdev_tx_t tx_ret_val;
+	struct sk_buff *skb;
+
+	/* allocate test skb */
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return 11;
 
-	writel(rx_ring->count - 1, rx_ring->tail);
+	/* place data into test skb */
+	igb_create_lbtest_frame(skb, size);
+	skb_put(skb, size);
 
 	/* Calculate the loop count based on the largest descriptor ring
 	 * The idea is to wrap the largest ring a number of times using 64
@@ -1489,50 +1525,36 @@ static int igb_run_loopback_test(struct igb_adapter *adapter)
 	else
 		lc = ((rx_ring->count / 64) * 2) + 1;
 
-	k = l = 0;
 	for (j = 0; j <= lc; j++) { /* loop count loop */
-		for (i = 0; i < 64; i++) { /* send the packets */
-			igb_create_lbtest_frame(tx_ring->buffer_info[k].skb,
-						1024);
-			pci_dma_sync_single_for_device(tx_ring->pdev,
-				tx_ring->buffer_info[k].dma,
-				tx_ring->buffer_info[k].length,
-				PCI_DMA_TODEVICE);
-			k++;
-			if (k == tx_ring->count)
-				k = 0;
-		}
-		writel(k, tx_ring->tail);
-		msleep(200);
-		time = jiffies; /* set the start time for the receive */
+		/* reset count of good packets */
 		good_cnt = 0;
-		do { /* receive the sent packets */
-			pci_dma_sync_single_for_cpu(rx_ring->pdev,
-					rx_ring->buffer_info[l].dma,
-					IGB_RXBUFFER_2048,
-					PCI_DMA_FROMDEVICE);
-
-			ret_val = igb_check_lbtest_frame(
-					     rx_ring->buffer_info[l].skb, 1024);
-			if (!ret_val)
+
+		/* place 64 packets on the transmit queue*/
+		for (i = 0; i < 64; i++) {
+			skb_get(skb);
+			tx_ret_val = igb_xmit_frame_ring_adv(skb, tx_ring);
+			if (tx_ret_val == NETDEV_TX_OK)
 				good_cnt++;
-			l++;
-			if (l == rx_ring->count)
-				l = 0;
-			/* time + 20 msecs (200 msecs on 2.4) is more than
-			 * enough time to complete the receives, if it's
-			 * exceeded, break and error off
-			 */
-		} while (good_cnt < 64 && jiffies < (time + 20));
+		}
+
 		if (good_cnt != 64) {
-			ret_val = 13; /* ret_val is the same as mis-compare */
+			ret_val = 12;
 			break;
 		}
-		if (jiffies >= (time + 20)) {
-			ret_val = 14; /* error code for time out error */
+
+		/* allow 200 milliseconds for packets to go from tx to rx */
+		msleep(200);
+
+		good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size);
+		if (good_cnt != 64) {
+			ret_val = 13;
 			break;
 		}
 	} /* end loop count loop */
+
+	/* free the original skb */
+	kfree_skb(skb);
+
 	return ret_val;
 }
 


^ permalink raw reply related

* [net-next-2.6 PATCH 19/20] igb: add single vector msi-x testing to interrupt test
From: Jeff Kirsher @ 2009-10-28  1:55 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change adds testing of the first msix vector to the interrupt testing.
This should help with determining the cause of interrupt issues when they are
encountered.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb_ethtool.c |   27 +++++++++++++++++----------
 1 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index aa05f00..65c538f 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -1123,32 +1123,36 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
 	*data = 0;
 
 	/* Hook up test interrupt handler just for this test */
-	if (adapter->msix_entries)
-		/* NOTE: we don't test MSI-X interrupts here, yet */
-		return 0;
+	if (adapter->msix_entries) {
+		if (request_irq(adapter->msix_entries[0].vector,
+		                &igb_test_intr, 0, netdev->name, adapter)) {
+			*data = 1;
+			return -1;
+		}
 
-	if (adapter->flags & IGB_FLAG_HAS_MSI) {
+	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
 		shared_int = false;
-		if (request_irq(irq, &igb_test_intr, 0, netdev->name, netdev)) {
+		if (request_irq(irq,
+		                &igb_test_intr, 0, netdev->name, adapter)) {
 			*data = 1;
 			return -1;
 		}
 	} else if (!request_irq(irq, &igb_test_intr, IRQF_PROBE_SHARED,
-				netdev->name, netdev)) {
+				netdev->name, adapter)) {
 		shared_int = false;
 	} else if (request_irq(irq, &igb_test_intr, IRQF_SHARED,
-		 netdev->name, netdev)) {
+		 netdev->name, adapter)) {
 		*data = 1;
 		return -1;
 	}
 	dev_info(&adapter->pdev->dev, "testing %s interrupt\n",
 		(shared_int ? "shared" : "unshared"));
 	/* Disable all the interrupts */
-	wr32(E1000_IMC, 0xFFFFFFFF);
+	wr32(E1000_IMC, ~0);
 	msleep(10);
 
 	/* Define all writable bits for ICS */
-	switch(hw->mac.type) {
+	switch (hw->mac.type) {
 	case e1000_82575:
 		ics_mask = 0x37F47EDD;
 		break;
@@ -1238,7 +1242,10 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
 	msleep(10);
 
 	/* Unhook test interrupt handler */
-	free_irq(irq, netdev);
+	if (adapter->msix_entries)
+		free_irq(adapter->msix_entries[0].vector, adapter);
+	else
+		free_irq(irq, adapter);
 
 	return *data;
 }


^ permalink raw reply related

* [net-next-2.6 PATCH 20/20] igb: cleanup "todo" code found in igb_ethtool.c
From: Jeff Kirsher @ 2009-10-28  1:55 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Alexander Duyck, Jeff Kirsher
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This patch moves some defines into the e1000_regs.h file since this is the
correct place for register defines and not inside of igb_ethtool.c

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/e1000_regs.h  |    7 +++++++
 drivers/net/igb/igb_ethtool.c |   11 +----------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/igb/e1000_regs.h b/drivers/net/igb/e1000_regs.h
index 76c3389..e06c3b7 100644
--- a/drivers/net/igb/e1000_regs.h
+++ b/drivers/net/igb/e1000_regs.h
@@ -288,10 +288,17 @@ enum {
 #define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
 #define E1000_RA       0x05400  /* Receive Address - RW Array */
 #define E1000_RA2      0x054E0  /* 2nd half of receive address array - RW Array */
+#define E1000_PSRTYPE(_i)       (0x05480 + ((_i) * 4))
 #define E1000_RAL(_i)  (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
                                        (0x054E0 + ((_i - 16) * 8)))
 #define E1000_RAH(_i)  (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
                                        (0x054E4 + ((_i - 16) * 8)))
+#define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
+#define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
+#define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
+#define E1000_FFMT_REG(_i)      (0x09000 + ((_i) * 8))
+#define E1000_FFVT_REG(_i)      (0x09800 + ((_i) * 8))
+#define E1000_FFLT_REG(_i)      (0x05F00 + ((_i) * 8))
 #define E1000_VFTA     0x05600  /* VLAN Filter Table Array - RW Array */
 #define E1000_VT_CTL   0x0581C  /* VMDq Control - RW */
 #define E1000_WUC      0x05800  /* Wakeup Control - RW */
diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index 65c538f..048a615 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -502,19 +502,10 @@ static void igb_get_regs(struct net_device *netdev,
 	regs_buff[119] = adapter->stats.scvpc;
 	regs_buff[120] = adapter->stats.hrmpc;
 
-	/* These should probably be added to e1000_regs.h instead */
-	#define E1000_PSRTYPE_REG(_i) (0x05480 + ((_i) * 4))
-	#define E1000_IP4AT_REG(_i)   (0x05840 + ((_i) * 8))
-	#define E1000_IP6AT_REG(_i)   (0x05880 + ((_i) * 4))
-	#define E1000_WUPM_REG(_i)    (0x05A00 + ((_i) * 4))
-	#define E1000_FFMT_REG(_i)    (0x09000 + ((_i) * 8))
-	#define E1000_FFVT_REG(_i)    (0x09800 + ((_i) * 8))
-	#define E1000_FFLT_REG(_i)    (0x05F00 + ((_i) * 8))
-
 	for (i = 0; i < 4; i++)
 		regs_buff[121 + i] = rd32(E1000_SRRCTL(i));
 	for (i = 0; i < 4; i++)
-		regs_buff[125 + i] = rd32(E1000_PSRTYPE_REG(i));
+		regs_buff[125 + i] = rd32(E1000_PSRTYPE(i));
 	for (i = 0; i < 4; i++)
 		regs_buff[129 + i] = rd32(E1000_RDBAL(i));
 	for (i = 0; i < 4; i++)


^ permalink raw reply related

* Re: [PATCH 3/3] net: TCP thin dupack
From: William Allen Simpson @ 2009-10-28  2:43 UTC (permalink / raw)
  To: Andreas Petlund; +Cc: netdev, linux-kernel, shemminger, ilpo.jarvinen, davem
In-Reply-To: <4AE7207D.8090402@simula.no>

Andreas Petlund wrote:
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index e64368d..f4a05ff 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -97,6 +97,7 @@ enum {
>  #define TCP_CONGESTION		13	/* Congestion control algorithm */
>  #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
>  #define TCP_THIN_RM_EXPB        15      /* Remove exp. backoff for thin streams*/
> +#define TCP_THIN_DUPACK         16      /* Fast retrans. after 1 dupack */
>  
I've not had the chance to examine the rest, but I've been poking at a
patch series that's used 15 for over a year, so could you try 16 and 17?

^ permalink raw reply

* Re: [PATCH 1/3] net: TCP thin-stream detection
From: William Allen Simpson @ 2009-10-28  3:09 UTC (permalink / raw)
  To: Andreas Petlund; +Cc: netdev, linux-kernel, shemminger, ilpo.jarvinen, davem
In-Reply-To: <4AE72075.4070702@simula.no>

Andreas Petlund wrote:
> +/* Determines whether this is a thin stream (which may suffer from
> + * increased latency). Used to trigger latency-reducing mechanisms.
> + */
> +static inline unsigned int tcp_stream_is_thin(const struct tcp_sock *tp)
> +{
> +	return tp->packets_out < 4;
> +}
> +
This bothers me a bit.  Having just looked at your Linux presentation,
and not (yet) read your papers, it seems much of your justification was
with 1 packet per RTT.  Here, you seem to be concentrating on 4, probably
because many implementations quickly ramp up to 4.

But there's a fair amount of experience showing that ramping to 4 is
problematic on congested paths, especially wireless networks.  Fast
retransmit in that case would be disastrous.

Once upon a time, I worked on a fair number of interactive games a decade
or so ago.  And agree that this can be a problem, although I've never
been a fan of turning off the Nagle algorithm.  My solution has always
been a heartbeat, rather than trying to shoehorn this into TCP.

Also, I've not seen any discussion on the end-to-end interest list.

^ permalink raw reply

* Re: [PATCH 2/3] net: TCP thin linear timeouts
From: William Allen Simpson @ 2009-10-28  3:20 UTC (permalink / raw)
  To: Andreas Petlund; +Cc: netdev, linux-kernel, shemminger, ilpo.jarvinen, davem
In-Reply-To: <4AE72079.4030504@simula.no>

Sorry to be too picky about the naming, but "rm_expb" really doesn't
mean what is actually done.  Perhaps TCP_THIN_LINEAR_BACKOFF and
sysctl_tcp_thin_linear_backoff?

Also, as debated on some other recent patches, shouldn't the global
sysctl specify the default, and the per socket option specify the
forced override?

^ permalink raw reply

* [patch net-next]atl1c: duplicate atl1c_get_tpd
From: jie.yang @ 2009-10-28  5:14 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, Jie Yang

From: Jie Yang <jie.yang@atheros.com>

remove duplicate atl1c_get_tpd, it may cause hardware to send wrong packets.

Signed-off-by: Jie Yang <jie.yang@atheros.com>

---

 drivers/net/atl1c/atl1c_main.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 1372e9a..3b8801a 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -1981,8 +1981,6 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter,
 		else {
 			use_tpd = atl1c_get_tpd(adapter, type);
 			memcpy(use_tpd, tpd, sizeof(struct atl1c_tpd_desc));
-			use_tpd = atl1c_get_tpd(adapter, type);
-			memcpy(use_tpd, tpd, sizeof(struct atl1c_tpd_desc));
 		}
 		buffer_info = atl1c_get_tx_buffer(adapter, use_tpd);
 		buffer_info->length = buf_len - mapped_len;

^ permalink raw reply related

* Re: [patch net-next]atl1c: duplicate atl1c_get_tpd
From: David Miller @ 2009-10-28  5:31 UTC (permalink / raw)
  To: jie.yang; +Cc: netdev, linux-kernel
In-Reply-To: <12567068551959-git-send-email-jie.yang@atheros.com>

From: <jie.yang@atheros.com>
Date: Wed, 28 Oct 2009 13:14:15 +0800

> From: Jie Yang <jie.yang@atheros.com>
> 
> remove duplicate atl1c_get_tpd, it may cause hardware to send wrong packets.
> 
> Signed-off-by: Jie Yang <jie.yang@atheros.com>

Applied, thanks.

^ permalink raw reply

* [net-next-2.6 PATCH] vxge: Configure the number of transmit descriptors per packet to MAX_SKB_FRAGS + 1.
From: Sreenivasa Honnur @ 2009-10-28  5:49 UTC (permalink / raw)
  To: davem; +Cc: netdev, support

- Configure the number of transmit descriptors per packet to MAX_SKB_FRAGS + 1.

Signed-off-by: Sreenivasa Honnur <sreenivasa.honnur@neterion.com>
Signed-off-by: Ramkrishna Vepa <ram.vepa@neterion.com>
---
diff -urpN orig/drivers/net/vxge/vxge-main.c patch1/drivers/net/vxge/vxge-main.c
--- orig/drivers/net/vxge/vxge-main.c	2009-10-26 23:28:15.000000000 -0700
+++ patch1/drivers/net/vxge/vxge-main.c	2009-10-26 23:32:07.000000000 -0700
@@ -3612,11 +3612,12 @@ static int __devinit vxge_config_vpaths(
 		device_config->vp_config[i].fifo.enable =
 						VXGE_HW_FIFO_ENABLE;
 		device_config->vp_config[i].fifo.max_frags =
-				MAX_SKB_FRAGS;
+				MAX_SKB_FRAGS + 1;
 		device_config->vp_config[i].fifo.memblock_size =
 			VXGE_HW_MIN_FIFO_MEMBLOCK_SIZE;
 
-		txdl_size = MAX_SKB_FRAGS * sizeof(struct vxge_hw_fifo_txd);
+		txdl_size = device_config->vp_config[i].fifo.max_frags *
+				sizeof(struct vxge_hw_fifo_txd);
 		txdl_per_memblock = VXGE_HW_MIN_FIFO_MEMBLOCK_SIZE / txdl_size;
 
 		device_config->vp_config[i].fifo.fifo_blocks =
diff -urpN orig/drivers/net/vxge/vxge-version.h patch1/drivers/net/vxge/vxge-version.h
--- orig/drivers/net/vxge/vxge-version.h	2009-10-26 23:28:15.000000000 -0700
+++ patch1/drivers/net/vxge/vxge-version.h	2009-10-27 02:19:18.000000000 -0700
@@ -18,6 +18,6 @@
 #define VXGE_VERSION_MAJOR	"2"
 #define VXGE_VERSION_MINOR	"0"
 #define VXGE_VERSION_FIX	"6"
-#define VXGE_VERSION_BUILD	"18707"
+#define VXGE_VERSION_BUILD	"18937"
 #define VXGE_VERSION_FOR	"k"
 #endif


^ permalink raw reply

* Re: [PATCH] net: fold network name hash (v2)
From: Eric Dumazet @ 2009-10-28  6:07 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, netdev, linux-kernel, akpm, torvalds, opurdila,
	viro
In-Reply-To: <20091027150436.56e673cd@nehalam>

Stephen Hemminger a écrit :
> The full_name_hash does not produce a value that is evenly distributed
> over the lower 8 bits. This causes name hash to be unbalanced with large
> number of names. There is a standard function to fold in upper bits
> so use that.
> 
> This is independent of possible improvements to full_name_hash()
> in future.

>  static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
>  {
>  	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
> -	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
> +	return &net->dev_name_head[hash_long(hash, NETDEV_HASHBITS)];
>  }
>  
>  static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)

full_name_hash() returns an "unsigned int", which is guaranteed to be 32 bits

You should therefore use hash_32(hash, NETDEV_HASHBITS),
not hash_long() that maps to hash_64() on 64 bit arches, which is
slower and certainly not any better with a 32bits input.



/* Compute the hash for a name string. */
static inline unsigned int
full_name_hash(const unsigned char *name, unsigned int len)
{
        unsigned long hash = init_name_hash();
        while (len--)
                hash = partial_name_hash(*name++, hash);
        return end_name_hash(hash);
}

static inline u32 hash_32(u32 val, unsigned int bits)
{
        /* On some cpus multiply is faster, on others gcc will do shifts */
        u32 hash = val * GOLDEN_RATIO_PRIME_32;

        /* High bits are more random, so use them. */
        return hash >> (32 - bits);
}


static inline u64 hash_64(u64 val, unsigned int bits)
{
        u64 hash = val;

        /*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
        u64 n = hash;
        n <<= 18;
        hash -= n;
        n <<= 33;
        hash -= n;
        n <<= 3;
        hash += n;
        n <<= 3;
        hash -= n;
        n <<= 4;
        hash += n;
        n <<= 2;
        hash += n;

        /* High bits are more random, so use them. */
        return hash >> (64 - bits);
}

^ permalink raw reply

* Re: iproute uses too small of a receive buffer
From: Eric Dumazet @ 2009-10-28  7:01 UTC (permalink / raw)
  To: Ben Greear; +Cc: Stephen Hemminger, NetDev
In-Reply-To: <4AE78297.9000909@candelatech.com>

Ben Greear a écrit :
> 
> Probably the right way is to give a cmd-line arg to set the buffer size
> and also continue if the error is ENOBUFs (but print some error out
> so users know they have issues).  I can make the attempt if that
> sounds good to you.

Real fix is to realloc buffer at receive time, no need for user setting.

In my testings I saw it reaching 1 Mbyte
write(2, "REALLOC buflen 8192\n"..., 20) = 20
write(2, "REALLOC buflen 16384\n"..., 21) = 21
write(2, "REALLOC buflen 32768\n"..., 21) = 21
write(2, "REALLOC buflen 65536\n"..., 21) = 21
write(2, "REALLOC buflen 131072\n"..., 22) = 22
write(2, "REALLOC buflen 262144\n"..., 22) = 22
write(2, "REALLOC buflen 524288\n"..., 22) = 22


[iproute2] realloc buffer in rtnl_listen

# ip monitor route
netlink receive error No buffer space available (105)
Dump terminated 

Reported-by: Ben Greear<greearb@candelatech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index b68e2fd..134ce7f 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -392,8 +392,14 @@ int rtnl_listen(struct rtnl_handle *rtnl,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
 	};
-	char   buf[8192];
+	char   *buf;
+	size_t buflen = 8192;
 
+	buf = malloc(buflen);
+	if (buf == NULL) {
+		fprintf(stderr, "netlink could not alloc %lu bytes\n", buflen);
+		return -1;
+	}
 	memset(&nladdr, 0, sizeof(nladdr));
 	nladdr.nl_family = AF_NETLINK;
 	nladdr.nl_pid = 0;
@@ -401,12 +407,20 @@ int rtnl_listen(struct rtnl_handle *rtnl,
 
 	iov.iov_base = buf;
 	while (1) {
-		iov.iov_len = sizeof(buf);
+		iov.iov_len = buflen;
 		status = recvmsg(rtnl->fd, &msg, 0);
 
 		if (status < 0) {
 			if (errno == EINTR || errno == EAGAIN)
 				continue;
+			if (errno == ENOBUFS) {
+				buf = realloc(buf, buflen * 2);
+				if (buf) {
+					buflen *= 2;
+					iov.iov_base = buf;
+					continue;
+				}
+			}
 			fprintf(stderr, "netlink receive error %s (%d)\n",
 				strerror(errno), errno);
 			return -1;

^ permalink raw reply related

* Re: iproute uses too small of a receive buffer
From: Eric Dumazet @ 2009-10-28  7:09 UTC (permalink / raw)
  Cc: Ben Greear, Stephen Hemminger, NetDev
In-Reply-To: <4AE7EC65.8000600@gmail.com>

Eric Dumazet a écrit :
> Ben Greear a écrit :
>> Probably the right way is to give a cmd-line arg to set the buffer size
>> and also continue if the error is ENOBUFs (but print some error out
>> so users know they have issues).  I can make the attempt if that
>> sounds good to you.
> 
> Real fix is to realloc buffer at receive time, no need for user setting.
> 

Then, another problem is that some information can be dropped at kernel level
when socket rcvbuf is full (ip monitor too slow to read its socket)

Thats hard to fix because you need to tweak /proc/sys/net/core/rmem_max



^ permalink raw reply

* Re: PATCH 23/10]Optimize the upload speed for PPP connection.
From: fangxiaozhi 00110321 @ 2009-10-28  7:30 UTC (permalink / raw)
  To: davem, william.allen.simpson; +Cc: netdev, kernel, zihan, greg, haegar


Thanks your advice.

But generally, PAGE_SIZE is 4096, whether it is too large or not?

If PAGE_SIZE is really appropriate, then I can resubmit the patch.

Thanks very much.
----- Original Message ----- 
From: "David Miller" <davem@davemloft.net>
To: <william.allen.simpson@gmail.com>
Cc: <huananhu@huawei.com>; <netdev@vger.kernel.org>; <linux-kernel@vger.kernel.org>; <zihan@huawei.com>; <greg@kroah.com>; <haegar@sdinet.de>
Sent: Saturday, October 24, 2009 9:46 PM
Subject: Re: PATCH 23/10]Optimize the upload speed for PPP connection.


> From: William Allen Simpson <william.allen.simpson@gmail.com>
> Date: Fri, 23 Oct 2009 07:46:08 -0400
> 
>> Concur.  I'd go further than that, my code usually made room for at
>> least
>> a full MTU (MRU) with HDLC escaping.  To minimize context switches,
>> that
>> should be 3014 ((1500 MRU + 2 FCS + 4 header) * 2 escapes + 2 flags).
>> 
>> Even in the old days, when memory was tight, context switches and
>> interrupt
>> time were more expensive, too.  PPP is supposed to scale to OC-192.
> 
> Actually I'd like to see ->obuf allocated externally and then
> make it simply PAGE_SIZE.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
******************************************************************************************
 This email and its attachments contain confidential information from HUAWEI, which is intended only for the person or entity whose address is listed above. Any use of the information contained here in any way (including, but not limited to, total or partial disclosure, reproduction, or dissemination) by persons other than the intended recipient(s) is prohibited. If you receive this email in error, please notify the sender by phone or email
 immediately and delete it!
 *****************************************************************************************

^ permalink raw reply

* Re: iproute uses too small of a receive buffer
From: Eric Dumazet @ 2009-10-28  7:37 UTC (permalink / raw)
  To: Ben Greear, Stephen Hemminger; +Cc: NetDev
In-Reply-To: <4AE7EC65.8000600@gmail.com>

Eric Dumazet a écrit :
> Ben Greear a écrit :
>> Probably the right way is to give a cmd-line arg to set the buffer size
>> and also continue if the error is ENOBUFs (but print some error out
>> so users know they have issues).  I can make the attempt if that
>> sounds good to you.
> 
> Real fix is to realloc buffer at receive time, no need for user setting.
> 
> In my testings I saw it reaching 1 Mbyte
> write(2, "REALLOC buflen 8192\n"..., 20) = 20
> write(2, "REALLOC buflen 16384\n"..., 21) = 21
> write(2, "REALLOC buflen 32768\n"..., 21) = 21
> write(2, "REALLOC buflen 65536\n"..., 21) = 21
> write(2, "REALLOC buflen 131072\n"..., 22) = 22
> write(2, "REALLOC buflen 262144\n"..., 22) = 22
> write(2, "REALLOC buflen 524288\n"..., 22) = 22
> 
> 
> [iproute2] realloc buffer in rtnl_listen
> 
> # ip monitor route
> netlink receive error No buffer space available (105)
> Dump terminated 
> 
> Reported-by: Ben Greear<greearb@candelatech.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Oops, this was wrong, Ben was right, sorry...

ENOBUFS errors is a flag to actually report to user that some information was dropped,
not that user supplied buffer at recv() time is not big enough.

I was surprised that buffer could reach 1Mbytes, while RCVBUF was 32768 or so.



^ permalink raw reply

* Re: iproute uses too small of a receive buffer
From: Eric Dumazet @ 2009-10-28  7:52 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Ben Greear, NetDev
In-Reply-To: <20091027162434.6dc31b2d@nehalam>

Stephen Hemminger a écrit :
> 
> Just having larger buffer isn't guarantee of success. Allocating
> a huge buffer is not going to work on embedded.
> 

Please note we do not allocate a big buffer, only allow more small skbs
to be queued on socket receive queue.

If memory is not available, skb allocation will eventually fail
and be reported as well, embedded or not.

I vote for allowing 1024*1024 bytes instead of 32768,
and eventually user should be warned that it is capped by 
/proc/sys/net/core/rmem_max


> Why not have it continue after one error.

Yes, but caller of 'ip monitor' just restart it anyway

^ permalink raw reply

* Re: iproute uses too small of a receive buffer
From: David Miller @ 2009-10-28  7:55 UTC (permalink / raw)
  To: eric.dumazet; +Cc: shemminger, greearb, netdev
In-Reply-To: <4AE7F859.7020105@gmail.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Oct 2009 08:52:57 +0100

> Stephen Hemminger a écrit :
>> 
>> Just having larger buffer isn't guarantee of success. Allocating
>> a huge buffer is not going to work on embedded.
>> 
> 
> Please note we do not allocate a big buffer, only allow more small skbs
> to be queued on socket receive queue.
> 
> If memory is not available, skb allocation will eventually fail
> and be reported as well, embedded or not.
> 
> I vote for allowing 1024*1024 bytes instead of 32768,
> and eventually user should be warned that it is capped by 
> /proc/sys/net/core/rmem_max

This discussion constantly reminds me of:

/*
 *	skb should fit one page. This choice is good for headerless malloc.
 *	But we should limit to 8K so that userspace does not have to
 *	use enormous buffer sizes on recvmsg() calls just to avoid
 *	MSG_TRUNC when PAGE_SIZE is very large.
 */
#if PAGE_SIZE < 8192UL
#define NLMSG_GOODSIZE	SKB_WITH_OVERHEAD(PAGE_SIZE)
#else
#define NLMSG_GOODSIZE	SKB_WITH_OVERHEAD(8192UL)
#endif

#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)

^ permalink raw reply

* Re: [PATCH] udev: create empty regular files to represent net interfaces
From: Kay Sievers @ 2009-10-28  8:23 UTC (permalink / raw)
  To: Matt Domsch
  Cc: dann frazier, linux-hotplug, Narendra_K, netdev, Jordan_Hargrave,
	Charles_Rose, Ben Hutchings
In-Reply-To: <20091027205551.GA31963@auslistsprd01.us.dell.com>

On Tue, Oct 27, 2009 at 21:55, Matt Domsch <Matt_Domsch@dell.com> wrote:
> On Thu, Oct 22, 2009 at 12:36:20AM -0600, dann frazier wrote:
>> Here's a proof of concept to further the discussion..
>>
>> The default filename uses the format:
>>   /dev/netdev/by-ifindex/$ifindex
>>
>> This provides the infrastructure to permit udev rules to create aliases for
>> network devices using symlinks, for example:
>>
>>   /dev/netdev/by-name/eth0 -> ../by-ifindex/1
>>   /dev/netdev/by-biosname/LOM0 -> ../by-ifindex/3
>>
>> A library (such as the proposed libnetdevname) could use this information
>> to provide an alias->realname mapping for network utilities.
>
> yes, this could work, as IFINDEX is already exported in the uevents,
> and that's the primary value udev needs to set up the mapping.
>
> While I like the little ifindex2name script you've got, I think udev
> could simply call if_indextoname() to get this, and not call an
> external program?  I suppose it could be a really really simple
> external program too.

What's the point of all this? Why would udev ever need to find the
name of a device by the ifindex? The device name is the primary value
for the kernel events udev acts on.

> I'd be fine with this approach.  It has the advantages of not
> requiring a kernel change at all, and not creating a whole character
> device which would be useless.  And it doesn't preclude someone in the
> future from creating a char device for network devices should they so
> choose.
>
> Kay, what say you as udev owner?

That all sounds very much like something which will hit us back some
day. I'm not sure, if udev should publish such dead text files in
/dev, it does not seem to fit the usual APIs/assumptions where /sys
and /dev match, and libudev provides access to both. It all sounds
more like a database for a possible netdevname library, which does not
need to be public in /dev, right?

Thanks,
Kay
--
To unsubscribe from this list: send the line "unsubscribe linux-hotplug" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [net-next-2.6 PATCH 01/20] igb: add new data structure for handling interrupts and NAPI
From: David Miller @ 2009-10-28  8:28 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, alexander.h.duyck
In-Reply-To: <20091028014858.12470.99520.stgit@localhost.localdomain>


All 20 patches applied, thanks a lot.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox