Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 6/8] tg3: Prepare for larger rx ring sizes
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch adds two new variables to track the size of the standard and
jumbo rx producer ring sizes.  The code is then pivoted to these
variables from preprocessor constants.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |  119 ++++++++++++++++++++++++++++------------------------
 drivers/net/tg3.h |    2 +
 2 files changed, 66 insertions(+), 55 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 50b7e35..16848a9 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -101,9 +101,9 @@
  * You can't change the ring sizes, but you can change where you place
  * them in the NIC onboard memory.
  */
-#define TG3_RX_RING_SIZE		512
+#define TG3_RX_STD_RING_SIZE(tp)	512
 #define TG3_DEF_RX_RING_PENDING		200
-#define TG3_RX_JUMBO_RING_SIZE		256
+#define TG3_RX_JMB_RING_SIZE(tp)	256
 #define TG3_DEF_RX_JUMBO_RING_PENDING	100
 #define TG3_RSS_INDIR_TBL_SIZE		128
 
@@ -120,12 +120,12 @@
 #define TG3_TX_RING_SIZE		512
 #define TG3_DEF_TX_RING_PENDING		(TG3_TX_RING_SIZE - 1)
 
-#define TG3_RX_RING_BYTES	(sizeof(struct tg3_rx_buffer_desc) * \
-				 TG3_RX_RING_SIZE)
-#define TG3_RX_JUMBO_RING_BYTES	(sizeof(struct tg3_ext_rx_buffer_desc) * \
-				 TG3_RX_JUMBO_RING_SIZE)
-#define TG3_RX_RCB_RING_BYTES(tp) (sizeof(struct tg3_rx_buffer_desc) * \
-				 TG3_RX_RCB_RING_SIZE(tp))
+#define TG3_RX_STD_RING_BYTES(tp) \
+	(sizeof(struct tg3_rx_buffer_desc) * TG3_RX_STD_RING_SIZE(tp))
+#define TG3_RX_JMB_RING_BYTES(tp) \
+	(sizeof(struct tg3_ext_rx_buffer_desc) * TG3_RX_JMB_RING_SIZE(tp))
+#define TG3_RX_RCB_RING_BYTES(tp) \
+	(sizeof(struct tg3_rx_buffer_desc) * TG3_RX_RCB_RING_SIZE(tp))
 #define TG3_TX_RING_BYTES	(sizeof(struct tg3_tx_buffer_desc) * \
 				 TG3_TX_RING_SIZE)
 #define NEXT_TX(N)		(((N) + 1) & (TG3_TX_RING_SIZE - 1))
@@ -143,11 +143,11 @@
 #define TG3_RX_STD_MAP_SZ		TG3_RX_DMA_TO_MAP_SZ(TG3_RX_STD_DMA_SZ)
 #define TG3_RX_JMB_MAP_SZ		TG3_RX_DMA_TO_MAP_SZ(TG3_RX_JMB_DMA_SZ)
 
-#define TG3_RX_STD_BUFF_RING_SIZE \
-	(sizeof(struct ring_info) * TG3_RX_RING_SIZE)
+#define TG3_RX_STD_BUFF_RING_SIZE(tp) \
+	(sizeof(struct ring_info) * TG3_RX_STD_RING_SIZE(tp))
 
-#define TG3_RX_JMB_BUFF_RING_SIZE \
-	(sizeof(struct ring_info) * TG3_RX_JUMBO_RING_SIZE)
+#define TG3_RX_JMB_BUFF_RING_SIZE(tp) \
+	(sizeof(struct ring_info) * TG3_RX_JMB_RING_SIZE(tp))
 
 /* Due to a hardware bug, the 5701 can only DMA to memory addresses
  * that are at least dword aligned when used in PCIX mode.  The driver
@@ -4445,14 +4445,14 @@ static int tg3_alloc_rx_skb(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
 	src_map = NULL;
 	switch (opaque_key) {
 	case RXD_OPAQUE_RING_STD:
-		dest_idx = dest_idx_unmasked % TG3_RX_RING_SIZE;
+		dest_idx = dest_idx_unmasked & tp->rx_std_ring_mask;
 		desc = &tpr->rx_std[dest_idx];
 		map = &tpr->rx_std_buffers[dest_idx];
 		skb_size = tp->rx_pkt_map_sz;
 		break;
 
 	case RXD_OPAQUE_RING_JUMBO:
-		dest_idx = dest_idx_unmasked % TG3_RX_JUMBO_RING_SIZE;
+		dest_idx = dest_idx_unmasked & tp->rx_jmb_ring_mask;
 		desc = &tpr->rx_jmb[dest_idx].std;
 		map = &tpr->rx_jmb_buffers[dest_idx];
 		skb_size = TG3_RX_JMB_MAP_SZ;
@@ -4507,7 +4507,7 @@ static void tg3_recycle_rx(struct tg3_napi *tnapi,
 
 	switch (opaque_key) {
 	case RXD_OPAQUE_RING_STD:
-		dest_idx = dest_idx_unmasked % TG3_RX_RING_SIZE;
+		dest_idx = dest_idx_unmasked & tp->rx_std_ring_mask;
 		dest_desc = &dpr->rx_std[dest_idx];
 		dest_map = &dpr->rx_std_buffers[dest_idx];
 		src_desc = &spr->rx_std[src_idx];
@@ -4515,7 +4515,7 @@ static void tg3_recycle_rx(struct tg3_napi *tnapi,
 		break;
 
 	case RXD_OPAQUE_RING_JUMBO:
-		dest_idx = dest_idx_unmasked % TG3_RX_JUMBO_RING_SIZE;
+		dest_idx = dest_idx_unmasked & tp->rx_jmb_ring_mask;
 		dest_desc = &dpr->rx_jmb[dest_idx].std;
 		dest_map = &dpr->rx_jmb_buffers[dest_idx];
 		src_desc = &spr->rx_jmb[src_idx].std;
@@ -4715,7 +4715,8 @@ next_pkt:
 		(*post_ptr)++;
 
 		if (unlikely(rx_std_posted >= tp->rx_std_max_post)) {
-			tpr->rx_std_prod_idx = std_prod_idx % TG3_RX_RING_SIZE;
+			tpr->rx_std_prod_idx = std_prod_idx &
+					       tp->rx_std_ring_mask;
 			tw32_rx_mbox(TG3_RX_STD_PROD_IDX_REG,
 				     tpr->rx_std_prod_idx);
 			work_mask &= ~RXD_OPAQUE_RING_STD;
@@ -4739,13 +4740,14 @@ next_pkt_nopost:
 	/* Refill RX ring(s). */
 	if (!(tp->tg3_flags3 & TG3_FLG3_ENABLE_RSS)) {
 		if (work_mask & RXD_OPAQUE_RING_STD) {
-			tpr->rx_std_prod_idx = std_prod_idx % TG3_RX_RING_SIZE;
+			tpr->rx_std_prod_idx = std_prod_idx &
+					       tp->rx_std_ring_mask;
 			tw32_rx_mbox(TG3_RX_STD_PROD_IDX_REG,
 				     tpr->rx_std_prod_idx);
 		}
 		if (work_mask & RXD_OPAQUE_RING_JUMBO) {
-			tpr->rx_jmb_prod_idx = jmb_prod_idx %
-					       TG3_RX_JUMBO_RING_SIZE;
+			tpr->rx_jmb_prod_idx = jmb_prod_idx &
+					       tp->rx_jmb_ring_mask;
 			tw32_rx_mbox(TG3_RX_JMB_PROD_IDX_REG,
 				     tpr->rx_jmb_prod_idx);
 		}
@@ -4756,8 +4758,8 @@ next_pkt_nopost:
 		 */
 		smp_wmb();
 
-		tpr->rx_std_prod_idx = std_prod_idx % TG3_RX_RING_SIZE;
-		tpr->rx_jmb_prod_idx = jmb_prod_idx % TG3_RX_JUMBO_RING_SIZE;
+		tpr->rx_std_prod_idx = std_prod_idx & tp->rx_std_ring_mask;
+		tpr->rx_jmb_prod_idx = jmb_prod_idx & tp->rx_jmb_ring_mask;
 
 		if (tnapi != &tp->napi[1])
 			napi_schedule(&tp->napi[1].napi);
@@ -4813,9 +4815,11 @@ static int tg3_rx_prodring_xfer(struct tg3 *tp,
 		if (spr->rx_std_cons_idx < src_prod_idx)
 			cpycnt = src_prod_idx - spr->rx_std_cons_idx;
 		else
-			cpycnt = TG3_RX_RING_SIZE - spr->rx_std_cons_idx;
+			cpycnt = tp->rx_std_ring_mask + 1 -
+				 spr->rx_std_cons_idx;
 
-		cpycnt = min(cpycnt, TG3_RX_RING_SIZE - dpr->rx_std_prod_idx);
+		cpycnt = min(cpycnt,
+			     tp->rx_std_ring_mask + 1 - dpr->rx_std_prod_idx);
 
 		si = spr->rx_std_cons_idx;
 		di = dpr->rx_std_prod_idx;
@@ -4849,10 +4853,10 @@ static int tg3_rx_prodring_xfer(struct tg3 *tp,
 			dbd->addr_lo = sbd->addr_lo;
 		}
 
-		spr->rx_std_cons_idx = (spr->rx_std_cons_idx + cpycnt) %
-				       TG3_RX_RING_SIZE;
-		dpr->rx_std_prod_idx = (dpr->rx_std_prod_idx + cpycnt) %
-				       TG3_RX_RING_SIZE;
+		spr->rx_std_cons_idx = (spr->rx_std_cons_idx + cpycnt) &
+				       tp->rx_std_ring_mask;
+		dpr->rx_std_prod_idx = (dpr->rx_std_prod_idx + cpycnt) &
+				       tp->rx_std_ring_mask;
 	}
 
 	while (1) {
@@ -4869,10 +4873,11 @@ static int tg3_rx_prodring_xfer(struct tg3 *tp,
 		if (spr->rx_jmb_cons_idx < src_prod_idx)
 			cpycnt = src_prod_idx - spr->rx_jmb_cons_idx;
 		else
-			cpycnt = TG3_RX_JUMBO_RING_SIZE - spr->rx_jmb_cons_idx;
+			cpycnt = tp->rx_jmb_ring_mask + 1 -
+				 spr->rx_jmb_cons_idx;
 
 		cpycnt = min(cpycnt,
-			     TG3_RX_JUMBO_RING_SIZE - dpr->rx_jmb_prod_idx);
+			     tp->rx_jmb_ring_mask + 1 - dpr->rx_jmb_prod_idx);
 
 		si = spr->rx_jmb_cons_idx;
 		di = dpr->rx_jmb_prod_idx;
@@ -4906,10 +4911,10 @@ static int tg3_rx_prodring_xfer(struct tg3 *tp,
 			dbd->addr_lo = sbd->addr_lo;
 		}
 
-		spr->rx_jmb_cons_idx = (spr->rx_jmb_cons_idx + cpycnt) %
-				       TG3_RX_JUMBO_RING_SIZE;
-		dpr->rx_jmb_prod_idx = (dpr->rx_jmb_prod_idx + cpycnt) %
-				       TG3_RX_JUMBO_RING_SIZE;
+		spr->rx_jmb_cons_idx = (spr->rx_jmb_cons_idx + cpycnt) &
+				       tp->rx_jmb_ring_mask;
+		dpr->rx_jmb_prod_idx = (dpr->rx_jmb_prod_idx + cpycnt) &
+				       tp->rx_jmb_ring_mask;
 	}
 
 	return err;
@@ -6059,14 +6064,14 @@ static void tg3_rx_prodring_free(struct tg3 *tp,
 
 	if (tpr != &tp->napi[0].prodring) {
 		for (i = tpr->rx_std_cons_idx; i != tpr->rx_std_prod_idx;
-		     i = (i + 1) % TG3_RX_RING_SIZE)
+		     i = (i + 1) & tp->rx_std_ring_mask)
 			tg3_rx_skb_free(tp, &tpr->rx_std_buffers[i],
 					tp->rx_pkt_map_sz);
 
 		if (tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) {
 			for (i = tpr->rx_jmb_cons_idx;
 			     i != tpr->rx_jmb_prod_idx;
-			     i = (i + 1) % TG3_RX_JUMBO_RING_SIZE) {
+			     i = (i + 1) & tp->rx_jmb_ring_mask) {
 				tg3_rx_skb_free(tp, &tpr->rx_jmb_buffers[i],
 						TG3_RX_JMB_MAP_SZ);
 			}
@@ -6075,12 +6080,12 @@ static void tg3_rx_prodring_free(struct tg3 *tp,
 		return;
 	}
 
-	for (i = 0; i < TG3_RX_RING_SIZE; i++)
+	for (i = 0; i <= tp->rx_std_ring_mask; i++)
 		tg3_rx_skb_free(tp, &tpr->rx_std_buffers[i],
 				tp->rx_pkt_map_sz);
 
 	if (tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) {
-		for (i = 0; i < TG3_RX_JUMBO_RING_SIZE; i++)
+		for (i = 0; i <= tp->rx_jmb_ring_mask; i++)
 			tg3_rx_skb_free(tp, &tpr->rx_jmb_buffers[i],
 					TG3_RX_JMB_MAP_SZ);
 	}
@@ -6104,15 +6109,16 @@ static int tg3_rx_prodring_alloc(struct tg3 *tp,
 	tpr->rx_jmb_prod_idx = 0;
 
 	if (tpr != &tp->napi[0].prodring) {
-		memset(&tpr->rx_std_buffers[0], 0, TG3_RX_STD_BUFF_RING_SIZE);
+		memset(&tpr->rx_std_buffers[0], 0,
+		       TG3_RX_STD_BUFF_RING_SIZE(tp));
 		if (tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE)
 			memset(&tpr->rx_jmb_buffers[0], 0,
-			       TG3_RX_JMB_BUFF_RING_SIZE);
+			       TG3_RX_JMB_BUFF_RING_SIZE(tp));
 		goto done;
 	}
 
 	/* Zero out all descriptors. */
-	memset(tpr->rx_std, 0, TG3_RX_RING_BYTES);
+	memset(tpr->rx_std, 0, TG3_RX_STD_RING_BYTES(tp));
 
 	rx_pkt_dma_sz = TG3_RX_STD_DMA_SZ;
 	if ((tp->tg3_flags2 & TG3_FLG2_5780_CLASS) &&
@@ -6124,7 +6130,7 @@ static int tg3_rx_prodring_alloc(struct tg3 *tp,
 	 * stuff once.  This works because the card does not
 	 * write into the rx buffer posting rings.
 	 */
-	for (i = 0; i < TG3_RX_RING_SIZE; i++) {
+	for (i = 0; i <= tp->rx_std_ring_mask; i++) {
 		struct tg3_rx_buffer_desc *rxd;
 
 		rxd = &tpr->rx_std[i];
@@ -6151,12 +6157,12 @@ static int tg3_rx_prodring_alloc(struct tg3 *tp,
 	if (!(tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE))
 		goto done;
 
-	memset(tpr->rx_jmb, 0, TG3_RX_JUMBO_RING_BYTES);
+	memset(tpr->rx_jmb, 0, TG3_RX_JMB_RING_BYTES(tp));
 
 	if (!(tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE))
 		goto done;
 
-	for (i = 0; i < TG3_RX_JUMBO_RING_SIZE; i++) {
+	for (i = 0; i <= tp->rx_jmb_ring_mask; i++) {
 		struct tg3_rx_buffer_desc *rxd;
 
 		rxd = &tpr->rx_jmb[i].std;
@@ -6196,12 +6202,12 @@ static void tg3_rx_prodring_fini(struct tg3 *tp,
 	kfree(tpr->rx_jmb_buffers);
 	tpr->rx_jmb_buffers = NULL;
 	if (tpr->rx_std) {
-		pci_free_consistent(tp->pdev, TG3_RX_RING_BYTES,
+		pci_free_consistent(tp->pdev, TG3_RX_STD_RING_BYTES(tp),
 				    tpr->rx_std, tpr->rx_std_mapping);
 		tpr->rx_std = NULL;
 	}
 	if (tpr->rx_jmb) {
-		pci_free_consistent(tp->pdev, TG3_RX_JUMBO_RING_BYTES,
+		pci_free_consistent(tp->pdev, TG3_RX_JMB_RING_BYTES(tp),
 				    tpr->rx_jmb, tpr->rx_jmb_mapping);
 		tpr->rx_jmb = NULL;
 	}
@@ -6210,23 +6216,24 @@ static void tg3_rx_prodring_fini(struct tg3 *tp,
 static int tg3_rx_prodring_init(struct tg3 *tp,
 				struct tg3_rx_prodring_set *tpr)
 {
-	tpr->rx_std_buffers = kzalloc(TG3_RX_STD_BUFF_RING_SIZE, GFP_KERNEL);
+	tpr->rx_std_buffers = kzalloc(TG3_RX_STD_BUFF_RING_SIZE(tp),
+				      GFP_KERNEL);
 	if (!tpr->rx_std_buffers)
 		return -ENOMEM;
 
-	tpr->rx_std = pci_alloc_consistent(tp->pdev, TG3_RX_RING_BYTES,
+	tpr->rx_std = pci_alloc_consistent(tp->pdev, TG3_RX_STD_RING_BYTES(tp),
 					   &tpr->rx_std_mapping);
 	if (!tpr->rx_std)
 		goto err_out;
 
 	if (tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) {
-		tpr->rx_jmb_buffers = kzalloc(TG3_RX_JMB_BUFF_RING_SIZE,
+		tpr->rx_jmb_buffers = kzalloc(TG3_RX_JMB_BUFF_RING_SIZE(tp),
 					      GFP_KERNEL);
 		if (!tpr->rx_jmb_buffers)
 			goto err_out;
 
 		tpr->rx_jmb = pci_alloc_consistent(tp->pdev,
-						   TG3_RX_JUMBO_RING_BYTES,
+						   TG3_RX_JMB_RING_BYTES(tp),
 						   &tpr->rx_jmb_mapping);
 		if (!tpr->rx_jmb)
 			goto err_out;
@@ -9849,10 +9856,10 @@ static void tg3_get_ringparam(struct net_device *dev, struct ethtool_ringparam *
 {
 	struct tg3 *tp = netdev_priv(dev);
 
-	ering->rx_max_pending = TG3_RX_RING_SIZE - 1;
+	ering->rx_max_pending = tp->rx_std_ring_mask;
 	ering->rx_mini_max_pending = 0;
 	if (tp->tg3_flags & TG3_FLAG_JUMBO_RING_ENABLE)
-		ering->rx_jumbo_max_pending = TG3_RX_JUMBO_RING_SIZE - 1;
+		ering->rx_jumbo_max_pending = tp->rx_jmb_ring_mask;
 	else
 		ering->rx_jumbo_max_pending = 0;
 
@@ -9873,8 +9880,8 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 	struct tg3 *tp = netdev_priv(dev);
 	int i, irq_sync = 0, err = 0;
 
-	if ((ering->rx_pending > TG3_RX_RING_SIZE - 1) ||
-	    (ering->rx_jumbo_pending > TG3_RX_JUMBO_RING_SIZE - 1) ||
+	if ((ering->rx_pending > tp->rx_std_ring_mask) ||
+	    (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) ||
 	    (ering->tx_pending > TG3_TX_RING_SIZE - 1) ||
 	    (ering->tx_pending <= MAX_SKB_FRAGS) ||
 	    ((tp->tg3_flags2 & TG3_FLG2_TSO_BUG) &&
@@ -13592,7 +13599,9 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 #endif
 	}
 
-	tp->rx_std_max_post = TG3_RX_RING_SIZE;
+	tp->rx_std_ring_mask = TG3_RX_STD_RING_SIZE(tp) - 1;
+	tp->rx_jmb_ring_mask = TG3_RX_JMB_RING_SIZE(tp) - 1;
+	tp->rx_std_max_post = tp->rx_std_ring_mask + 1;
 
 	/* Increment the rx prod index on the rx std ring by at most
 	 * 8 for these chips to workaround hw errata.
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 241e314..9763298 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2762,6 +2762,8 @@ struct tg3 {
 	void				(*write32_rx_mbox) (struct tg3 *, u32,
 							    u32);
 	u32				rx_copy_thresh;
+	u32				rx_std_ring_mask;
+	u32				rx_jmb_ring_mask;
 	u32				rx_pending;
 	u32				rx_jumbo_pending;
 	u32				rx_std_max_post;
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 7/8] tg3: Add extend rx ring sizes for 5717 and 5719
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch increases the rx ring sizes for those asic revs that support
them.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   55 ++++++++++++++++++++++++++++++++++++++--------------
 drivers/net/tg3.h |    3 ++
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 16848a9..98f7158 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -101,9 +101,15 @@
  * You can't change the ring sizes, but you can change where you place
  * them in the NIC onboard memory.
  */
-#define TG3_RX_STD_RING_SIZE(tp)	512
+#define TG3_RX_STD_RING_SIZE(tp) \
+	((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 || \
+	  GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) ? \
+	 RX_STD_MAX_SIZE_5717 : 512)
 #define TG3_DEF_RX_RING_PENDING		200
-#define TG3_RX_JMB_RING_SIZE(tp)	256
+#define TG3_RX_JMB_RING_SIZE(tp) \
+	((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 || \
+	  GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) ? \
+	 1024 : 256)
 #define TG3_DEF_RX_JUMBO_RING_PENDING	100
 #define TG3_RSS_INDIR_TBL_SIZE		128
 
@@ -113,9 +119,6 @@
  * hw multiply/modulo instructions.  Another solution would be to
  * replace things like '% foo' with '& (foo - 1)'.
  */
-#define TG3_RX_RCB_RING_SIZE(tp)	\
-	(((tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) && \
-	  !(tp->tg3_flags2 & TG3_FLG2_5780_CLASS)) ? 1024 : 512)
 
 #define TG3_TX_RING_SIZE		512
 #define TG3_DEF_TX_RING_PENDING		(TG3_TX_RING_SIZE - 1)
@@ -125,7 +128,7 @@
 #define TG3_RX_JMB_RING_BYTES(tp) \
 	(sizeof(struct tg3_ext_rx_buffer_desc) * TG3_RX_JMB_RING_SIZE(tp))
 #define TG3_RX_RCB_RING_BYTES(tp) \
-	(sizeof(struct tg3_rx_buffer_desc) * TG3_RX_RCB_RING_SIZE(tp))
+	(sizeof(struct tg3_rx_buffer_desc) * (tp->rx_ret_ring_mask + 1))
 #define TG3_TX_RING_BYTES	(sizeof(struct tg3_tx_buffer_desc) * \
 				 TG3_TX_RING_SIZE)
 #define NEXT_TX(N)		(((N) + 1) & (TG3_TX_RING_SIZE - 1))
@@ -4724,7 +4727,7 @@ next_pkt:
 		}
 next_pkt_nopost:
 		sw_idx++;
-		sw_idx &= (TG3_RX_RCB_RING_SIZE(tp) - 1);
+		sw_idx &= tp->rx_ret_ring_mask;
 
 		/* Refresh hw_idx to see if there is new work */
 		if (sw_idx == hw_idx) {
@@ -7612,8 +7615,8 @@ static void tg3_rings_reset(struct tg3 *tp)
 
 	if (tnapi->rx_rcb) {
 		tg3_set_bdinfo(tp, rxrcb, tnapi->rx_rcb_mapping,
-			       (TG3_RX_RCB_RING_SIZE(tp) <<
-				BDINFO_FLAGS_MAXLEN_SHIFT), 0);
+			       (tp->rx_ret_ring_mask + 1) <<
+				BDINFO_FLAGS_MAXLEN_SHIFT, 0);
 		rxrcb += TG3_BDINFO_SIZE;
 	}
 
@@ -7636,7 +7639,7 @@ static void tg3_rings_reset(struct tg3 *tp)
 		}
 
 		tg3_set_bdinfo(tp, rxrcb, tnapi->rx_rcb_mapping,
-			       (TG3_RX_RCB_RING_SIZE(tp) <<
+			       ((tp->rx_ret_ring_mask + 1) <<
 				BDINFO_FLAGS_MAXLEN_SHIFT), 0);
 
 		stblk += 8;
@@ -7949,10 +7952,14 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 			     BDINFO_FLAGS_DISABLED);
 		}
 
-		if (tp->tg3_flags3 & TG3_FLG3_5717_PLUS)
-			val = (RX_STD_MAX_SIZE_5705 << BDINFO_FLAGS_MAXLEN_SHIFT) |
-			      (TG3_RX_STD_DMA_SZ << 2);
-		else
+		if (tp->tg3_flags3 & TG3_FLG3_5717_PLUS) {
+			if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765)
+				val = RX_STD_MAX_SIZE_5705;
+			else
+				val = RX_STD_MAX_SIZE_5717;
+			val <<= BDINFO_FLAGS_MAXLEN_SHIFT;
+			val |= (TG3_RX_STD_DMA_SZ << 2);
+		} else
 			val = TG3_RX_STD_DMA_SZ << BDINFO_FLAGS_MAXLEN_SHIFT;
 	} else
 		val = RX_STD_MAX_SIZE_5705 << BDINFO_FLAGS_MAXLEN_SHIFT;
@@ -8235,7 +8242,11 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 
 	tw32(SNDBDC_MODE, SNDBDC_MODE_ENABLE | SNDBDC_MODE_ATTN_ENABLE);
 	tw32(RCVBDI_MODE, RCVBDI_MODE_ENABLE | RCVBDI_MODE_RCB_ATTN_ENAB);
-	tw32(RCVDBDI_MODE, RCVDBDI_MODE_ENABLE | RCVDBDI_MODE_INV_RING_SZ);
+	val = RCVDBDI_MODE_ENABLE | RCVDBDI_MODE_INV_RING_SZ;
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+		val |= RCVDBDI_MODE_LRG_RING_SZ;
+	tw32(RCVDBDI_MODE, val);
 	tw32(SNDDATAI_MODE, SNDDATAI_MODE_ENABLE);
 	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
 		tw32(SNDDATAI_MODE, SNDDATAI_MODE_ENABLE | 0x8);
@@ -12846,6 +12857,18 @@ static void inline vlan_features_add(struct net_device *dev, unsigned long flags
 #endif
 }
 
+static inline u32 tg3_rx_ret_ring_size(struct tg3 *tp)
+{
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+		return 4096;
+	else if ((tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) &&
+		 !(tp->tg3_flags2 & TG3_FLG2_5780_CLASS))
+		return 1024;
+	else
+		return 512;
+}
+
 static int __devinit tg3_get_invariants(struct tg3 *tp)
 {
 	static struct pci_device_id write_reorder_chipsets[] = {
@@ -13601,6 +13624,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 
 	tp->rx_std_ring_mask = TG3_RX_STD_RING_SIZE(tp) - 1;
 	tp->rx_jmb_ring_mask = TG3_RX_JMB_RING_SIZE(tp) - 1;
+	tp->rx_ret_ring_mask = tg3_rx_ret_ring_size(tp) - 1;
+
 	tp->rx_std_max_post = tp->rx_std_ring_mask + 1;
 
 	/* Increment the rx prod index on the rx std ring by at most
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 9763298..f6b709a 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -26,6 +26,7 @@
 #define TG3_RX_INTERNAL_RING_SZ_5906	32
 
 #define RX_STD_MAX_SIZE_5705		512
+#define RX_STD_MAX_SIZE_5717		2048
 #define RX_JUMBO_MAX_SIZE		0xdeadbeef /* XXX */
 
 /* First 256 bytes are a mirror of PCI config space. */
@@ -972,6 +973,7 @@
 #define  RCVDBDI_MODE_JUMBOBD_NEEDED	 0x00000004
 #define  RCVDBDI_MODE_FRM_TOO_BIG	 0x00000008
 #define  RCVDBDI_MODE_INV_RING_SZ	 0x00000010
+#define  RCVDBDI_MODE_LRG_RING_SZ	 0x00010000
 #define RCVDBDI_STATUS			0x00002404
 #define  RCVDBDI_STATUS_JUMBOBD_NEEDED	 0x00000004
 #define  RCVDBDI_STATUS_FRM_TOO_BIG	 0x00000008
@@ -2764,6 +2766,7 @@ struct tg3 {
 	u32				rx_copy_thresh;
 	u32				rx_std_ring_mask;
 	u32				rx_jmb_ring_mask;
+	u32				rx_ret_ring_mask;
 	u32				rx_pending;
 	u32				rx_jumbo_pending;
 	u32				rx_std_max_post;
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 2/8] tg3: 5719: Prevent tx data corruption
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch enables a bit that prevents read DMA overflows and adjusts
the txmbuf margin from the hardware default.  The combination of these
modifications prevents a tx data corruption issue we were seeing on the
5719.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   12 +++++++++++-
 drivers/net/tg3.h |    8 +++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index d64fec1..4f35a5c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7857,7 +7857,10 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 	tw32(BUFMGR_DMA_HIGH_WATER,
 	     tp->bufmgr_config.dma_high_water);
 
-	tw32(BUFMGR_MODE, BUFMGR_MODE_ENABLE | BUFMGR_MODE_ATTN_ENABLE);
+	val = BUFMGR_MODE_ENABLE | BUFMGR_MODE_ATTN_ENABLE;
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719)
+		val |= BUFMGR_MODE_NO_TX_UNDERRUN;
+	tw32(BUFMGR_MODE, val);
 	for (i = 0; i < 2000; i++) {
 		if (tr32(BUFMGR_MODE) & BUFMGR_MODE_ENABLE)
 			break;
@@ -8037,6 +8040,13 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		     val | TG3_RDMA_RSRVCTRL_FIFO_OFLW_FIX);
 	}
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) {
+		val = tr32(TG3_LSO_RD_DMA_CRPTEN_CTRL);
+		tw32(TG3_LSO_RD_DMA_CRPTEN_CTRL, val |
+		     TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_BD_4K |
+		     TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_LSO_4K);
+	}
+
 	/* Receive/send statistics. */
 	if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS) {
 		val = tr32(RCVLPC_STATS_ENABLE);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 44733e4..ec62f05 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1225,6 +1225,7 @@
 #define  BUFMGR_MODE_ATTN_ENABLE	 0x00000004
 #define  BUFMGR_MODE_BM_TEST		 0x00000008
 #define  BUFMGR_MODE_MBLOW_ATTN_ENAB	 0x00000010
+#define  BUFMGR_MODE_NO_TX_UNDERRUN	 0x80000000
 #define BUFMGR_STATUS			0x00004404
 #define  BUFMGR_STATUS_ERROR		 0x00000004
 #define  BUFMGR_STATUS_MBLOW		 0x00000010
@@ -1306,7 +1307,12 @@
 
 #define TG3_RDMA_RSRVCTRL_REG		0x00004900
 #define TG3_RDMA_RSRVCTRL_FIFO_OFLW_FIX	 0x00000004
-/* 0x4904 --> 0x4c00 unused */
+/* 0x4904 --> 0x4910 unused */
+
+#define TG3_LSO_RD_DMA_CRPTEN_CTRL	0x00004910
+#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_BD_4K	 0x00030000
+#define TG3_LSO_RD_DMA_CRPTEN_CTRL_BLEN_LSO_4K	 0x000c0000
+/* 0x4914 --> 0x4c00 unused */
 
 /* Write DMA control registers */
 #define WDMAC_MODE			0x00004c00
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 0/8] tg3: Bugfixes and updates
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patchset implements some bugfixes, removes the 5724 device
ID and introduces extended rx buffer rings.



^ permalink raw reply

* [PATCH net-next 1/8] tg3: Fix potential netpoll crash
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

Up until now the tg3 driver would call netif_napi_add() for the maximum
number of NAPI instances the driver could use.  The problem is that
netpoll could call tg3_poll() on instances that are not active.  The net
effect is that the driver will crash attempting to dereference
uninitialized pointers.

The fix is to only allocate as many NAPI instances as the driver would
use in tg3_open() and deleted them in tg3_close().

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |  111 +++++++++++++++++++++++++++++++----------------------
 1 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index fdb438d..d64fec1 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -752,42 +752,6 @@ static void tg3_int_reenable(struct tg3_napi *tnapi)
 		     HOSTCC_MODE_ENABLE | tnapi->coal_now);
 }
 
-static void tg3_napi_disable(struct tg3 *tp)
-{
-	int i;
-
-	for (i = tp->irq_cnt - 1; i >= 0; i--)
-		napi_disable(&tp->napi[i].napi);
-}
-
-static void tg3_napi_enable(struct tg3 *tp)
-{
-	int i;
-
-	for (i = 0; i < tp->irq_cnt; i++)
-		napi_enable(&tp->napi[i].napi);
-}
-
-static inline void tg3_netif_stop(struct tg3 *tp)
-{
-	tp->dev->trans_start = jiffies;	/* prevent tx timeout */
-	tg3_napi_disable(tp);
-	netif_tx_disable(tp->dev);
-}
-
-static inline void tg3_netif_start(struct tg3 *tp)
-{
-	/* NOTE: unconditional netif_tx_wake_all_queues is only
-	 * appropriate so long as all callers are assured to
-	 * have free tx slots (such as after tg3_init_hw)
-	 */
-	netif_tx_wake_all_queues(tp->dev);
-
-	tg3_napi_enable(tp);
-	tp->napi[0].hw_status->status |= SD_STATUS_UPDATED;
-	tg3_enable_ints(tp);
-}
-
 static void tg3_switch_clocks(struct tg3 *tp)
 {
 	u32 clock_ctrl;
@@ -4338,6 +4302,11 @@ static int tg3_setup_phy(struct tg3 *tp, int force_reset)
 	return err;
 }
 
+static inline int tg3_irq_sync(struct tg3 *tp)
+{
+	return tp->irq_sync;
+}
+
 /* This is called whenever we suspect that the system chipset is re-
  * ordering the sequence of MMIO to the tx send mailbox. The symptom
  * is bogus tx completions. We try to recover by setting the
@@ -5083,6 +5052,59 @@ tx_recovery:
 	return work_done;
 }
 
+static void tg3_napi_disable(struct tg3 *tp)
+{
+	int i;
+
+	for (i = tp->irq_cnt - 1; i >= 0; i--)
+		napi_disable(&tp->napi[i].napi);
+}
+
+static void tg3_napi_enable(struct tg3 *tp)
+{
+	int i;
+
+	for (i = 0; i < tp->irq_cnt; i++)
+		napi_enable(&tp->napi[i].napi);
+}
+
+static void tg3_napi_init(struct tg3 *tp)
+{
+	int i;
+
+	netif_napi_add(tp->dev, &tp->napi[0].napi, tg3_poll, 64);
+	for (i = 1; i < tp->irq_cnt; i++)
+		netif_napi_add(tp->dev, &tp->napi[i].napi, tg3_poll_msix, 64);
+}
+
+static void tg3_napi_fini(struct tg3 *tp)
+{
+	int i;
+
+	for (i = 0; i < tp->irq_cnt; i++)
+		netif_napi_del(&tp->napi[i].napi);
+}
+
+static inline void tg3_netif_stop(struct tg3 *tp)
+{
+	tp->dev->trans_start = jiffies;	/* prevent tx timeout */
+	tg3_napi_disable(tp);
+	netif_tx_disable(tp->dev);
+}
+
+static inline void tg3_netif_start(struct tg3 *tp)
+{
+	/* NOTE: unconditional netif_tx_wake_all_queues is only
+	 * appropriate so long as all callers are assured to
+	 * have free tx slots (such as after tg3_init_hw)
+	 */
+	netif_tx_wake_all_queues(tp->dev);
+
+	tg3_napi_enable(tp);
+	tp->napi[0].hw_status->status |= SD_STATUS_UPDATED;
+	tg3_enable_ints(tp);
+}
+
 static void tg3_irq_quiesce(struct tg3 *tp)
 {
 	int i;
@@ -5096,11 +5118,6 @@ static void tg3_irq_quiesce(struct tg3 *tp)
 		synchronize_irq(tp->napi[i].irq_vec);
 }
 
-static inline int tg3_irq_sync(struct tg3 *tp)
-{
-	return tp->irq_sync;
-}
-
 /* Fully shutdown all tg3 driver activity elsewhere in the system.
  * If irq_sync is non-zero, then the IRQ handler must be synchronized
  * with as well.  Most of the time, this is not necessary except when
@@ -8915,6 +8932,8 @@ static int tg3_open(struct net_device *dev)
 	if (err)
 		goto err_out1;
 
+	tg3_napi_init(tp);
+
 	tg3_napi_enable(tp);
 
 	for (i = 0; i < tp->irq_cnt; i++) {
@@ -9002,6 +9021,7 @@ err_out3:
 
 err_out2:
 	tg3_napi_disable(tp);
+	tg3_napi_fini(tp);
 	tg3_free_consistent(tp);
 
 err_out1:
@@ -9049,6 +9069,8 @@ static int tg3_close(struct net_device *dev)
 	memcpy(&tp->estats_prev, tg3_get_estats(tp),
 	       sizeof(tp->estats_prev));
 
+	tg3_napi_fini(tp);
+
 	tg3_free_consistent(tp);
 
 	tg3_set_power_state(tp, PCI_D3hot);
@@ -14599,13 +14621,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 		tnapi->consmbox = rcvmbx;
 		tnapi->prodmbox = sndmbx;
 
-		if (i) {
+		if (i)
 			tnapi->coal_now = HOSTCC_MODE_COAL_VEC1_NOW << (i - 1);
-			netif_napi_add(dev, &tnapi->napi, tg3_poll_msix, 64);
-		} else {
+		else
 			tnapi->coal_now = HOSTCC_MODE_NOW;
-			netif_napi_add(dev, &tnapi->napi, tg3_poll, 64);
-		}
 
 		if (!(tp->tg3_flags & TG3_FLAG_SUPPORT_MSIX))
 			break;
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 3/8] tg3: Remove 5724 device ID
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This product was never released to the public.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    2 --
 drivers/net/tg3.h |    1 -
 2 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 4f35a5c..93228aa 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -264,7 +264,6 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57788)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5717)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5718)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5724)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57781)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57785)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57761)},
@@ -12878,7 +12877,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 
 		if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717 ||
 		    tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718 ||
-		    tp->pdev->device == TG3PCI_DEVICE_TIGON3_5724 ||
 		    tp->pdev->device == TG3PCI_DEVICE_TIGON3_5719)
 			pci_read_config_dword(tp->pdev,
 					      TG3PCI_GEN2_PRODID_ASICREV,
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index ec62f05..241e314 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -46,7 +46,6 @@
 #define  TG3PCI_DEVICE_TIGON3_5785_F	 0x16a0 /* 10/100 only */
 #define  TG3PCI_DEVICE_TIGON3_5717	 0x1655
 #define  TG3PCI_DEVICE_TIGON3_5718	 0x1656
-#define  TG3PCI_DEVICE_TIGON3_5724	 0x165c
 #define  TG3PCI_DEVICE_TIGON3_57781	 0x16b1
 #define  TG3PCI_DEVICE_TIGON3_57785	 0x16b5
 #define  TG3PCI_DEVICE_TIGON3_57761	 0x16b0
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 5/8] tg3: Futureproof the loopback test
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

There are other multiqueue modes 5717 and 5719 devices can assume.  This
patch makes sure that the loopback test is safe, should those other
modes be enabled in the future.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index d76e718..50b7e35 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10642,7 +10642,8 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode)
 	tnapi = &tp->napi[0];
 	rnapi = &tp->napi[0];
 	if (tp->irq_cnt > 1) {
-		rnapi = &tp->napi[1];
+		if (tp->tg3_flags3 & TG3_FLG3_ENABLE_RSS)
+			rnapi = &tp->napi[1];
 		if (tp->tg3_flags3 & TG3_FLG3_ENABLE_TSS)
 			tnapi = &tp->napi[1];
 	}
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 8/8] tg3: Update version to 3.114
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch updates the tg3 version to 3.114.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 98f7158..9b134fd 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -69,10 +69,10 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			113
+#define TG3_MIN_NUM			114
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"August 2, 2010"
+#define DRV_MODULE_RELDATE	"September 30, 2010"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
-- 
1.7.2.2



^ permalink raw reply related

* [PATCH net-next 4/8] tg3: Cleanup missing VPD partno section
From: Matt Carlson @ 2010-09-30 20:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, andy, mcarlson

This patch cleans up the default VPD partno section.  New entries for
5717 asic rev devices were also added.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   71 ++++++++++++++++++++++++++++------------------------
 1 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 93228aa..d76e718 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12527,44 +12527,49 @@ partno:
 
 out_not_found:
 	kfree(vpd_data);
-	if (!tp->board_part_number[0])
+	if (tp->board_part_number[0])
 		return;
 
 out_no_vpd:
-	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717) {
+		if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717)
+			strcpy(tp->board_part_number, "BCM5717");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718)
+			strcpy(tp->board_part_number, "BCM5718");
+		else
+			goto nomatch;
+	} else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780) {
+		if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57780)
+			strcpy(tp->board_part_number, "BCM57780");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57760)
+			strcpy(tp->board_part_number, "BCM57760");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57790)
+			strcpy(tp->board_part_number, "BCM57790");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57788)
+			strcpy(tp->board_part_number, "BCM57788");
+		else
+			goto nomatch;
+	} else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765) {
+		if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57761)
+			strcpy(tp->board_part_number, "BCM57761");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57765)
+			strcpy(tp->board_part_number, "BCM57765");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57781)
+			strcpy(tp->board_part_number, "BCM57781");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57785)
+			strcpy(tp->board_part_number, "BCM57785");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57791)
+			strcpy(tp->board_part_number, "BCM57791");
+		else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57795)
+			strcpy(tp->board_part_number, "BCM57795");
+		else
+			goto nomatch;
+	} else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
 		strcpy(tp->board_part_number, "BCM95906");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57780)
-		strcpy(tp->board_part_number, "BCM57780");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57760)
-		strcpy(tp->board_part_number, "BCM57760");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57790)
-		strcpy(tp->board_part_number, "BCM57790");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57780 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57788)
-		strcpy(tp->board_part_number, "BCM57788");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57761)
-		strcpy(tp->board_part_number, "BCM57761");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57765)
-		strcpy(tp->board_part_number, "BCM57765");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57781)
-		strcpy(tp->board_part_number, "BCM57781");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57785)
-		strcpy(tp->board_part_number, "BCM57785");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57791)
-		strcpy(tp->board_part_number, "BCM57791");
-	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_57765 &&
-		 tp->pdev->device == TG3PCI_DEVICE_TIGON3_57795)
-		strcpy(tp->board_part_number, "BCM57795");
-	else
+	} else {
+nomatch:
 		strcpy(tp->board_part_number, "none");
+	}
 }
 
 static int __devinit tg3_fw_img_is_valid(struct tg3 *tp, u32 offset)
-- 
1.7.2.2



^ permalink raw reply related

* Re: [PATCH V3] fs: allow for more than 2^31 files
From: Robin Holt @ 2010-09-30 20:26 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, dipankar, holt, viro, bcrl, den, mingo, mszeredi,
	cmm, npiggin, xemul, linux-kernel, netdev
In-Reply-To: <1285645611.10438.27.camel@edumazet-laptop>

On Tue, Sep 28, 2010 at 05:46:51AM +0200, Eric Dumazet wrote:
> Le lundi 27 septembre 2010 à 15:36 -0700, David Miller a écrit :
...

> Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
> integers, and change af_unix to use an atomic_long_t instead of
> atomic_t.
> 
> get_max_files() is changed to return an unsigned long.

I _THINK_ we actually want get_max_files to return a long and have
the files_stat_struct definitions be longs.  If we do not have it that
way, we could theoretically open enough files on a single cpu to make
get_nr_files return a negative without overflowing max_files.  That,
of course, would require an insane amount of memory, but I think it is
technically more correct.

> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = {
>  	{
>  		.procname	= "file-nr",
>  		.data		= &files_stat,
> -		.maxlen		= 3*sizeof(int),
> +		.maxlen		= sizeof(files_stat),
>  		.mode		= 0444,
> -		.proc_handler	= proc_nr_files,
> +		.proc_handler	= proc_doulongvec_minmax,

With this change, don't we lose the current nr_files value?  I think
you need proc_nr_files to stay as it was.  If you disagree, we should
probably eliminate the definitions for proc_nr_files as I don't believe
they are used anywhere else.

Thanks,
Robin

^ permalink raw reply

* wireless-testing (2.6.36-rc6):  inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
From: Ben Greear @ 2010-09-30 20:16 UTC (permalink / raw)
  To: linux-wireless@vger.kernel.org, NetDev

We saw this on a system that has two ath9k APs, some extra routing tables
and rules to use them, and a user-space 'bridge' that uses packet-sockets.

Aside from a few patches to help virtualize wireless devices (and none directly to ath9k),
this is today's wireless-testing tree.

=================================
[ INFO: inconsistent lock state ]
2.6.36-rc6-wl+ #20
---------------------------------
inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
kworker/u:0/5 [HC0[0]:SC0[0]:HE1:SE1] takes:
  (&(&list->lock)->rlock){+.?...}, at: [<c0741066>] packet_rcv+0x1f3/0x27a
{IN-SOFTIRQ-W} state was registered at:
   [<c0457036>] __lock_acquire+0x27f/0xb8c
   [<c045799d>] lock_acquire+0x5a/0x78
   [<c07638dc>] _raw_spin_lock+0x1b/0x2a
   [<c0741066>] packet_rcv+0x1f3/0x27a
   [<c06d04a6>] __netif_receive_skb+0x340/0x389
   [<c06d058d>] process_backlog+0x9e/0x16e
   [<c06d1104>] net_rx_action+0x99/0x17a
   [<c04393f2>] __do_softirq+0x86/0x111
   [<c04394b3>] do_softirq+0x36/0x5a
   [<c04395ec>] irq_exit+0x35/0x69
   [<c0403fb9>] do_IRQ+0x86/0x9a
   [<c04034ee>] common_interrupt+0x2e/0x40
   [<c06adb37>] cpuidle_idle_call+0x7f/0xb4
   [<c040227f>] cpu_idle+0x4e/0x6b
   [<c074f9c5>] rest_init+0x8d/0x92
   [<c097b8e0>] start_kernel+0x316/0x31b
   [<c097b0d0>] i386_start_kernel+0xd0/0xd7
irq event stamp: 62565
hardirqs last  enabled at (62565): [<c04adf88>] kmem_cache_alloc+0xa0/0xc5
hardirqs last disabled at (62564): [<c04adf3a>] kmem_cache_alloc+0x52/0xc5
softirqs last  enabled at (62562): [<c073ffe3>] run_filter+0x9b/0xa5
softirqs last disabled at (62560): [<c073ff59>] run_filter+0x11/0xa5

other info that might help us debug this:
4 locks held by kworker/u:0/5:
  #0:  ((wiphy_name(local->hw.wiphy))){+.+...}, at: [<c0443d2d>] process_one_work+0x173/0x2c3
  #1:  ((&sc->hw_check_work)){+.+...}, at: [<c0443d2d>] process_one_work+0x173/0x2c3
  #2:  (rcu_read_lock){.+.+..}, at: [<f8ff86f8>] ieee80211_tx_status+0x5c1/0x6b9 [mac80211]
  #3:  (rcu_read_lock){.+.+..}, at: [<c06cedec>] rcu_read_lock+0x0/0x21

stack backtrace:
Pid: 5, comm: kworker/u:0 Not tainted 2.6.36-rc6-wl+ #20
Call Trace:
  [<c0761c6a>] ? printk+0xf/0x15
  [<c0455f15>] valid_state+0x131/0x144
  [<c0456017>] mark_lock+0xef/0x1de
  [<c0456738>] ? check_usage_backwards+0x0/0x68
  [<c04570a4>] __lock_acquire+0x2ed/0xb8c
  [<c0455f03>] ? valid_state+0x11f/0x144
  [<c06dd853>] ? sk_run_filter+0x1d0/0x3c0
  [<c0455f46>] ? mark_lock+0x1e/0x1de
  [<c045799d>] lock_acquire+0x5a/0x78
  [<c0741066>] ? packet_rcv+0x1f3/0x27a
  [<c07638dc>] _raw_spin_lock+0x1b/0x2a
  [<c0741066>] ? packet_rcv+0x1f3/0x27a
  [<c0741066>] packet_rcv+0x1f3/0x27a
  [<c06d04a6>] __netif_receive_skb+0x340/0x389
  [<c06d0f3b>] netif_receive_skb+0x72/0x78
  [<f8ff87a0>] ieee80211_tx_status+0x669/0x6b9 [mac80211]
  [<c0450050>] ? ntp_start_leap_timer+0x4b/0x67
  [<f9091861>] ath_tx_complete_buf+0x1ba/0x219 [ath9k]
  [<c04563a9>] ? trace_hardirqs_on_caller+0x104/0x125
  [<f9092d9d>] ath_draintxq+0x179/0x2c8 [ath9k]
  [<f9094346>] ath_drain_all_txq+0x10f/0x11d [ath9k]
  [<f908e9d1>] ath_reset+0x40/0x15e [ath9k]
  [<f908f31e>] ath_hw_check+0x3c/0x47 [ath9k]
  [<c0443d77>] process_one_work+0x1bd/0x2c3
  [<c0443d2d>] ? process_one_work+0x173/0x2c3
  [<f908f2e2>] ? ath_hw_check+0x0/0x47 [ath9k]
  [<c0445422>] worker_thread+0xf7/0x1f7
  [<c044532b>] ? worker_thread+0x0/0x1f7
  [<c0447dff>] kthread+0x62/0x67
  [<c0447d9d>] ? kthread+0x0/0x67
  [<c0403506>] kernel_thread_helper+0x6/0x1a


Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply

* sending VLAN packets via packet_mmap
From: Phil Sutter @ 2010-09-30 19:24 UTC (permalink / raw)
  To: netdev; +Cc: Johann Baudy, Eric Dumazet

Hi,

support for VLAN tags in af_packet.c seems to be incomplete. While it's
possible to receive a full packet using SOCK_RAW, sending one will fail
due to size constraints. tpacket_snd() does not account for the
additional four bytes.

There are a few possible solutions to this problem. When searching for
the most appropriate one, I've been looking at tpacket_rcv() which
simply writes the whole frame out, setting tpacket2_hdr.tp_vlan_tci on
the go. So from a user's point of view, information is redundantly
available.

The actual problem in tpacket_snd() is this:

| reserve = dev->hard_header_len;
| [...]
| if (size_max > dev->mtu + reserve)
| 	size_max = dev->mtu + reserve;

I guess the check is there to avoid skb overflows on malicious data
input. Is this correct? Are there other reasons for it's existence?

As af_packet.c has no knowledge about VLANs (other than a call to
vlan_tx_tag_get()), I guess avoiding expensive parsing of the inserted
data for the VLAN tag should be appropriate. Nevertheless the check from
above needs to account for the additional VLAN_HLEN when the tag exists.

So a rather trivial solution would be to drop the check completely
(given no other constraints, of course), thereby giving the user a
little more ability to break things. Alternatively, one could require
that tpacket2_hdr.tp_vlan_tci be set (at least non-zero) to identify
packets containing a VLAN tag and allow the additional size (probably
mostly consistent to the logic inside tpacket_rcv()).

A third solution could be like the second one, but not accepting
prebuilt packets including VLAN header at all and using
tpacket2_hdr.tp_vlan_tci together with vlan_put_tag() to instead insert
it from inside the kernel.

Hopefully I didn't overlook something crucial. Feel free to flame me if
that's the case! :)

Greetings, Phil

^ permalink raw reply

* Re: [PATCH 4/5] AF_UNIX: find peers on multicast Unix stream sockets
From: Alban Crequy @ 2010-09-30 19:24 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Stephen Hemminger, Cyrill Gorcunov,
	Alexey Dobriyan, Lennart Poettering, Kay Sievers, Ian Molton,
	netdev, linux-kernel
In-Reply-To: <1285351237.2478.7.camel@edumazet-laptop>

Le Fri, 24 Sep 2010 20:00:37 +0200,
Eric Dumazet <eric.dumazet@gmail.com> a écrit :

> Le vendredi 24 septembre 2010 à 18:25 +0100, Alban Crequy a écrit :
> 
> > @@ -1612,7 +1671,12 @@ static int unix_stream_sendmsg(struct kiocb
> > *kiocb, struct socket *sock, } else {
> >  		sunaddr = NULL;
> >  		err = -ENOTCONN;
> > -		other = NULL; /* FIXME: get the list of other
> > connection */
> > +		max_others = atomic_read(&unix_nr_multicast_socks);
> > +		others = kzalloc((max_others + 1) * sizeof(void
> > *), GFP_KERNEL);
> > +		unix_find_other(sock_net(sk), u->addr->name,
> > +		    u->addr->len, 0, u->addr->hash, 1, others,
> > max_others, &err);
> > +		other = others[0];
> > +		kfree(others);
> >  		if (!other)
> >  			goto out_err;
> >  	}
> 
> Seriously, this block sizing against unix_nr_multicast_socks is not
> scalable. What happens if we have 1000 sockets ?
> kzalloc() to clear 8000 bytes ?
> Its also unsafe.
> 
> (say you kzalloc() a buffer for 2 sockets, and another cpu inserts a
> new socket. unix_find_socket_byname() can overflow the buffer)
> 
> 
> You should use a list, and allocates elements in
> unix_find_socket_byname()
> 
> struct item {
> 	struct item *next;
> 	struct sock *s;
> };

Thanks for your review.

I cannot allocate elements directly in unix_find_socket_byname()
iteration after iteration because the spinlock "unix_table_lock" is
held. If I release the spinlock to allocate, the number of sockets in
the table may change.

I changed the code to count the sockets with the lock held and then
allocate. In the unfortunate case where the allocation is not big
enough (if another process inserts a new socket), it just tries again.

The code is available here. Please pull from:

git://git.collabora.co.uk/git/user/alban/linux-2.6.35.y/.git unix-multicast2

It is still a work in progress. Missing pieces:

- The flow control does not work correctly: poll/select does not match
  the reality

- Atomic delivery: if a process is killed or interrupted in the middle
  of a delivery, only a subset of the recipients will get the message 

- Some locking to provide the same delivery order to all the recipients
  when several senders run concurrently.

Feedback welcome,

Alban Crequy

^ permalink raw reply

* [PATCH net-next] cxgb4: remove a bogus PCI function number check
From: Dimitris Michailidis @ 2010-09-30 19:17 UTC (permalink / raw)
  To: netdev; +Cc: Dimitris Michailidis

Remove a bogus PCI function number check from the driver's .remove
method that causes pci_release_regions not to be called for function 0
if additional functions are attached and one of them is used as primary.

Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
---
 drivers/net/cxgb4/cxgb4_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c
index 4fb08e3..22169a7 100644
--- a/drivers/net/cxgb4/cxgb4_main.c
+++ b/drivers/net/cxgb4/cxgb4_main.c
@@ -3863,7 +3863,7 @@ static void __devexit remove_one(struct pci_dev *pdev)
 		pci_disable_device(pdev);
 		pci_release_regions(pdev);
 		pci_set_drvdata(pdev, NULL);
-	} else if (PCI_FUNC(pdev->devfn) > 0)
+	} else
 		pci_release_regions(pdev);
 }
 
-- 
1.5.4


^ permalink raw reply related

* Re: pull request: wireless-2.6 2010-09-29
From: David Miller @ 2010-09-30 19:03 UTC (permalink / raw)
  To: linville; +Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20100929203352.GC2516@tuxdriver.com>

From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 29 Sep 2010 16:33:53 -0400

> Here are two more fixes intended for 2.6.36.  One fixes a user after
> free error, the other fixes a reported regression (bug 17722).  Both are
> reasonably small and well documented in the commit logs.

Pulled, thanks John.

^ permalink raw reply

* Re: Packet time delays on multi-core systems
From: Eric Dumazet @ 2010-09-30 18:52 UTC (permalink / raw)
  To: Alexey Vlasov; +Cc: Linux Kernel Mailing List, netdev
In-Reply-To: <20100930181556.GC4094@beaver.vrungel.ru>

Le jeudi 30 septembre 2010 à 22:15 +0400, Alexey Vlasov a écrit :
> On Thu, Sep 30, 2010 at 08:03:02PM +0200, Eric Dumazet wrote:
> > >  
> > > The last test were made already concerning such rx queue binding:
> > > # cat /proc/irq/60/smp_affinity
> > > 001000
> > > # cat /proc/irq/61/smp_affinity
> > > 010000
> > > # cat /proc/irq/62/smp_affinity
> > > 080000
> > > # cat /proc/irq/63/smp_affinity
> > > 800000
> > > 
> > 
> > Why 60, 61, 62, 63 ? This should be 753, 754, 755, 756
> 
> I've got several similar servers, the interrups 60-63 are on
> that one that I can test now, so this isn't a mistake.
> 

If you have a burst of 'LOG' matches, it can really slow down the whole
thing.

You should add a limiter (eg: no more than 5 messages per second)

http://netfilter.org/documentation/HOWTO/packet-filtering-HOWTO-7.html

	This module is most useful after a limit match, so you don't
	flood your logs.

^ permalink raw reply

* Re: RFC: MTU for serving NFS on Infiniband
From: Marc Aurele La France @ 2010-09-30 18:50 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Ben Hutchings, linux-kernel, netdev, David S. Miller,
	Alexey Kuznetsov, Pekka Savola (ipv6), James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy
In-Reply-To: <alpine.WNT.2.00.1008251408520.632@cluij.ucs.ualberta.ca>

On Thu, 26 Aug 2010, Marc Aurele La France wrote:
> I do want to thank you, however, for reminding me of TCP.  It's something 
> 20/20 hindsight says I should have checked out before starting this thread. 
> Logistically, it'll be a few days before I can do so though.  If that allows 
> me to increase the MTU all the way up to 65520, then this UDP thing will 
> likely remain unresolved.

Just to close off on this.  It's been a few weeks now, but moving to NFS 
over TCP allows me to increase the MTU all the way up to 65520 without 
issues.

Thanks for the help.

Marc.

+----------------------------------+----------------------------------+
|  Marc Aurele La France           |  work:   1-780-492-9310          |
|  Academic Information and        |  fax:    1-780-492-1729          |
|    Communications Technologies   |  email:  tsi@ualberta.ca         |
|  352 General Services Building   +----------------------------------+
|  University of Alberta           |                                  |
|  Edmonton, Alberta               |    Standard disclaimers apply    |
|  T6G 2H1                         |                                  |
|  CANADA                          |                                  |
+----------------------------------+----------------------------------+

^ permalink raw reply

* Re: [MeeGo-Dev][PATCH v3] Topcliff: Update PCH_CAN driver to 2.6.35
From: David Miller @ 2010-09-30 18:50 UTC (permalink / raw)
  To: wg-5Yr1BZd7O62+XT7JhA+gdA
  Cc: andrew.chih.howe.khor-ral2JQCrhuEAvxtiuMwx3w,
	socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	sameo-VuQAYsv1563Yd54FQh9/CA,
	margie.foster-ral2JQCrhuEAvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	masa-korg-ECg8zkTtlr0C6LszWs/t0g,
	kok.howg.ewe-ral2JQCrhuEAvxtiuMwx3w,
	joel.clark-ral2JQCrhuEAvxtiuMwx3w,
	morinaga526-ECg8zkTtlr0C6LszWs/t0g,
	meego-dev-WXzIur8shnEAvxtiuMwx3w,
	yong.y.wang-ral2JQCrhuEAvxtiuMwx3w, chripell-VaTbYqLCNhc,
	qi.wang-ral2JQCrhuEAvxtiuMwx3w
In-Reply-To: <4CA4541F.5040804-5Yr1BZd7O62+XT7JhA+gdA@public.gmane.org>

From: Wolfgang Grandegger <wg-5Yr1BZd7O62+XT7JhA+gdA@public.gmane.org>
Date: Thu, 30 Sep 2010 11:10:55 +0200

> On 09/24/2010 12:24 PM, Masayuki Ohtak wrote:
>> +};
>> +
>> +static struct can_bittiming_const pch_can_bittiming_const = {
>> +	.name = KBUILD_MODNAME,
> 
> Not sure what KBUILD_MODNAME is. Should be "pch_can", the name of the
> driver.

That's what KBUILD_MODNAME will be defined to when this gets
compiled :-)

^ permalink raw reply

* Re: how to use secure_tcp_sequence_number
From: Eric Dumazet @ 2010-09-30 18:21 UTC (permalink / raw)
  To: Nicola Padovano; +Cc: netfilter-devel, netdev
In-Reply-To: <AANLkTikWop213k7cBXAaoCMw_8LouX-qauYEbHD=d1Vh@mail.gmail.com>

Le jeudi 30 septembre 2010 à 19:01 +0200, Nicola Padovano a écrit :
> If i attempt to insert the module that uses the
> secure_tcp_sequence_number i get this error message:
> 
> from insmod:  -1 Invalid module format
> and from dmesg: version magic '2.6.35.4 SMP mod_unload 586 ' should be
> '2.6.35.4 SMP mod_unload modversions 586 '
> 
> what's the matter?

diff --git a/drivers/char/random.c b/drivers/char/random.c
index caef35a..f5ef7ba 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1550,6 +1550,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 
 	return seq;
 }
+EXPORT_SYMBOL(secure_tcp_sequence_number);
 
 /* Generate secure starting point for ephemeral IPV4 transport port search */
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: Packet time delays on multi-core systems
From: Alexey Vlasov @ 2010-09-30 18:15 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Mailing List, netdev
In-Reply-To: <1285869782.2615.920.camel@edumazet-laptop>

On Thu, Sep 30, 2010 at 08:03:02PM +0200, Eric Dumazet wrote:
> >  
> > The last test were made already concerning such rx queue binding:
> > # cat /proc/irq/60/smp_affinity
> > 001000
> > # cat /proc/irq/61/smp_affinity
> > 010000
> > # cat /proc/irq/62/smp_affinity
> > 080000
> > # cat /proc/irq/63/smp_affinity
> > 800000
> > 
> 
> Why 60, 61, 62, 63 ? This should be 753, 754, 755, 756

I've got several similar servers, the interrups 60-63 are on
that one that I can test now, so this isn't a mistake.

-- 
BRGDS. Alexey Vlasov.

^ permalink raw reply

* Re: Packet time delays on multi-core systems
From: Eric Dumazet @ 2010-09-30 18:03 UTC (permalink / raw)
  To: Alexey Vlasov; +Cc: Linux Kernel Mailing List, netdev
In-Reply-To: <20100930173732.GB4094@beaver.vrungel.ru>

Le jeudi 30 septembre 2010 à 21:37 +0400, Alexey Vlasov a écrit :
> On Thu, Sep 30, 2010 at 02:44:29PM +0200, Eric Dumazet wrote:
> > Le jeudi 30 septembre 2010 ?? 16:23 +0400, Alexey Vlasov a ??crit :
> > > On Thu, Sep 30, 2010 at 08:33:52AM +0200, Eric Dumazet wrote:
> > > > Le jeudi 30 septembre 2010 ?? 10:24 +0400, Alexey Vlasov a ??crit :
> > > > > Here I found some dude with the same problem:
> > > > > http://lkml.org/lkml/2010/7/9/340
> > >  
> > > Well I put interrups from NIC, namely tx/rx query, to different
> > > processors and got normal pings by adding LOG rule.
> > > 
> > > I also found that overruns is constantly growing, I don't know if these are connected.
> > > RX packets:2831439546 errors:0 dropped:134726 overruns:947671733 frame:0
> > > TX packets:2880849825 errors:0 dropped:0 overruns:0 carrier:0
> > > 
> 
> Too early to be happy, concerning one rule- the situation got better, but still
> there are some time delays. But adding one more rule:
> -A INPUT -p all -m state --state INVALID -j LOG --log-prefix
> "ipsec:IN-INVALID "
> it got totally wrecked:
> ...
> 64 bytes from (10.0.2.17): icmp_seq=24 ttl=64 time=0.342 ms
> 64 bytes from (10.0.2.17): icmp_seq=25 ttl=64 time=1868 ms
> 64 bytes from (10.0.2.17): icmp_seq=26 ttl=64 time=1448 ms
> 64 bytes from (10.0.2.17): icmp_seq=27 ttl=64 time=447 ms
> 64 bytes from (10.0.2.17): icmp_seq=28 ttl=64 time=0.196 ms
> ...
> 100 packets transmitted, 100 received, 0% packet loss, time 99990ms
> rtt min/avg/max/mdev = 0.108/39.068/1868.663/237.507 ms, pipe 2
> 
> # iptables -L -v -n
> Chain INPUT (policy ACCEPT 601K packets, 475M bytes)
>  pkts bytes target     prot opt in     out     source               destination
>   275 11096 LOG        all  --  *      *       0.0.0.0/0            0.0.0.0/0           state INVALID LOG flags 0 level 4 prefix `ipsec:IN-INVALID '
> 
> Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
>  pkts bytes target     prot opt in     out     source               destination
> 
> Chain OUTPUT (policy ACCEPT 529K packets, 561M bytes)
>  pkts bytes target     prot opt in     out     source               destination
> 13979  839K LOG        tcp  --  *      *       0.0.0.0/0            0.0.0.0/0           tcp dpt:80 flags:0x17/0x02 LOG flags 8 level 4 prefix `ipsec:SYN-OUTPUT-DROP '
>  
> > > Here goes the typical distribution of interrups on new servers:
> > >            CPU0    CPU1    CPU2    CPU3 ... CPU23
> > > 752:         11       0       0       0 ...     0 PCI-MSI-edge eth0
> > > 753: 2799366721       0       0       0 ...     0 PCI-MSI-edge eth0-rx3
> > > 754: 2821840553       0       0       0 ...     0 PCI-MSI-edge eth0-rx2
> > > 755: 2786117044       0       0       0 ...     0 PCI-MSI-edge eth0-rx1
> > > 756: 2896099336       0       0       0 ...     0 PCI-MSI-edge eth0-rx0
> > > 757: 1808404680       0       0       0 ...     0 PCI-MSI-edge eth0-tx3
> > > 758: 1797855130       0       0       0 ...     0 PCI-MSI-edge eth0-tx2
> > > 759: 1807222032       0       0       0 ...     0 PCI-MSI-edge eth0-tx1
> > > 760: 1820309360       0       0       0 ...     0 PCI-MSI-edge eth0-tx0
> > > 
> > 
> > echo 01 >/proc/irq/*/eth0-rx0/../smp_affinity
> > echo 02 >/proc/irq/*/eth0-rx1/../smp_affinity
> > echo 04 >/proc/irq/*/eth0-rx2/../smp_affinity
> > echo 08 >/proc/irq/*/eth0-rx3/../smp_affinity
> > 
> > 
> > cat /proc/irq/*/eth0-rx0/../smp_affinity
> > cat /proc/irq/*/eth0-rx1/../smp_affinity
> > cat /proc/irq/*/eth0-rx2/../smp_affinity
> > cat /proc/irq/*/eth0-rx3/../smp_affinity
>  
> The last test were made already concerning such rx queue binding:
> # cat /proc/irq/60/smp_affinity
> 001000
> # cat /proc/irq/61/smp_affinity
> 010000
> # cat /proc/irq/62/smp_affinity
> 080000
> # cat /proc/irq/63/smp_affinity
> 800000
> 

Why 60, 61, 62, 63 ? This should be 753, 754, 755, 756



> Now ksoftirqd eats not only one processor but all oness where I assigned the IRQs.
> 
> > > On the old ones:
> > >            CPU0       CPU1       CPU2  ...      CPU8
> > > 502:  522320256  522384039  522327386  ... 522380267 PCI-MSI-edge eth0
> > > 
> > 
> > What network driver is it (newbox), was it (old box) ?
> 
> newbox:
> 01:00.0 Ethernet controller: Intel Corporation 82575EB Gigabit Network
> Connection (rev 02)
> driver: igb
> version: 1.3.16-k2
> firmware-version: 2.1-0
> bus-info: 0000:01:00.0
> 
> oldbox:
> 05:00.0 Ethernet controller: Intel Corporation 80003ES2LAN Gigabit
> Ethernet Controller (Copper) (rev 01)
> driver: e1000e
> version: 0.3.3.3-k6
> firmware-version: 1.0-0
> bus-info: 0000:05:00.0
> 

^ permalink raw reply

* [PATCH net-next] net: introduce DST_NOCACHE flag
From: Eric Dumazet @ 2010-09-30 17:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

While doing stress tests with IP route cache disabled, and multi queue
devices, I noticed a very high contention on one rwlock used in
neighbour code.

When many cpus are trying to send frames (possibly using a high
performance multiqueue device) to the same neighbour, they fight for the
neigh->lock rwlock in order to call neigh_hh_init(), and fight on
hh->hh_refcnt (a pair of atomic_inc/atomic_dec_and_test())

But we dont need to call neigh_hh_init() for dst that are used only
once. It costs four atomic operations at least, on two contended cache
lines, plus the high contention on neigh->lock rwlock.

Introduce a new dst flag, DST_NOCACHE, that is set when dst was not
inserted in route cache.

With the stress test bench, sending 160000000 frames on one neighbour,
results are :

Before patch:

real	2m28.406s
user	0m11.781s
sys	36m17.964s


After patch:

real	1m26.532s
user	0m12.185s
sys	20m3.903s

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/dst.h    |    9 +++++----
 net/core/neighbour.c |    4 +++-
 net/ipv4/route.c     |    1 +
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index aa53fbc..a217c83 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -43,10 +43,11 @@ struct dst_entry {
 	short			error;
 	short			obsolete;
 	int			flags;
-#define DST_HOST		1
-#define DST_NOXFRM		2
-#define DST_NOPOLICY		4
-#define DST_NOHASH		8
+#define DST_HOST		0x0001
+#define DST_NOXFRM		0x0002
+#define DST_NOPOLICY		0x0004
+#define DST_NOHASH		0x0008
+#define DST_NOCACHE		0x0010
 	unsigned long		expires;
 
 	unsigned short		header_len;	/* more space at head required */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 96b1a74..b142a0d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1210,7 +1210,9 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
 		struct net_device *dev = neigh->dev;
-		if (dev->header_ops->cache && !dst->hh) {
+		if (dev->header_ops->cache &&
+		    !dst->hh &&
+		    !(dst->flags & DST_NOCACHE)) {
 			write_lock_bh(&neigh->lock);
 			if (!dst->hh)
 				neigh_hh_init(neigh, dst, dst->ops->protocol);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98beda4..b0c7a87 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1107,6 +1107,7 @@ restart:
 		 * on the route gc list.
 		 */
 
+		rt->dst.flags |= DST_NOCACHE;
 		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
 			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {



^ permalink raw reply related

* Re: Packet time delays on multi-core systems
From: Alexey Vlasov @ 2010-09-30 17:37 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Kernel Mailing List, netdev
In-Reply-To: <1285850669.2615.426.camel@edumazet-laptop>

On Thu, Sep 30, 2010 at 02:44:29PM +0200, Eric Dumazet wrote:
> Le jeudi 30 septembre 2010 ?? 16:23 +0400, Alexey Vlasov a ??crit :
> > On Thu, Sep 30, 2010 at 08:33:52AM +0200, Eric Dumazet wrote:
> > > Le jeudi 30 septembre 2010 ?? 10:24 +0400, Alexey Vlasov a ??crit :
> > > > Here I found some dude with the same problem:
> > > > http://lkml.org/lkml/2010/7/9/340
> >  
> > Well I put interrups from NIC, namely tx/rx query, to different
> > processors and got normal pings by adding LOG rule.
> > 
> > I also found that overruns is constantly growing, I don't know if these are connected.
> > RX packets:2831439546 errors:0 dropped:134726 overruns:947671733 frame:0
> > TX packets:2880849825 errors:0 dropped:0 overruns:0 carrier:0
> > 

Too early to be happy, concerning one rule- the situation got better, but still
there are some time delays. But adding one more rule:
-A INPUT -p all -m state --state INVALID -j LOG --log-prefix
"ipsec:IN-INVALID "
it got totally wrecked:
...
64 bytes from (10.0.2.17): icmp_seq=24 ttl=64 time=0.342 ms
64 bytes from (10.0.2.17): icmp_seq=25 ttl=64 time=1868 ms
64 bytes from (10.0.2.17): icmp_seq=26 ttl=64 time=1448 ms
64 bytes from (10.0.2.17): icmp_seq=27 ttl=64 time=447 ms
64 bytes from (10.0.2.17): icmp_seq=28 ttl=64 time=0.196 ms
...
100 packets transmitted, 100 received, 0% packet loss, time 99990ms
rtt min/avg/max/mdev = 0.108/39.068/1868.663/237.507 ms, pipe 2

# iptables -L -v -n
Chain INPUT (policy ACCEPT 601K packets, 475M bytes)
 pkts bytes target     prot opt in     out     source               destination
  275 11096 LOG        all  --  *      *       0.0.0.0/0            0.0.0.0/0           state INVALID LOG flags 0 level 4 prefix `ipsec:IN-INVALID '

Chain FORWARD (policy ACCEPT 0 packets, 0 bytes)
 pkts bytes target     prot opt in     out     source               destination

Chain OUTPUT (policy ACCEPT 529K packets, 561M bytes)
 pkts bytes target     prot opt in     out     source               destination
13979  839K LOG        tcp  --  *      *       0.0.0.0/0            0.0.0.0/0           tcp dpt:80 flags:0x17/0x02 LOG flags 8 level 4 prefix `ipsec:SYN-OUTPUT-DROP '
 
> > Here goes the typical distribution of interrups on new servers:
> >            CPU0    CPU1    CPU2    CPU3 ... CPU23
> > 752:         11       0       0       0 ...     0 PCI-MSI-edge eth0
> > 753: 2799366721       0       0       0 ...     0 PCI-MSI-edge eth0-rx3
> > 754: 2821840553       0       0       0 ...     0 PCI-MSI-edge eth0-rx2
> > 755: 2786117044       0       0       0 ...     0 PCI-MSI-edge eth0-rx1
> > 756: 2896099336       0       0       0 ...     0 PCI-MSI-edge eth0-rx0
> > 757: 1808404680       0       0       0 ...     0 PCI-MSI-edge eth0-tx3
> > 758: 1797855130       0       0       0 ...     0 PCI-MSI-edge eth0-tx2
> > 759: 1807222032       0       0       0 ...     0 PCI-MSI-edge eth0-tx1
> > 760: 1820309360       0       0       0 ...     0 PCI-MSI-edge eth0-tx0
> > 
> 
> echo 01 >/proc/irq/*/eth0-rx0/../smp_affinity
> echo 02 >/proc/irq/*/eth0-rx1/../smp_affinity
> echo 04 >/proc/irq/*/eth0-rx2/../smp_affinity
> echo 08 >/proc/irq/*/eth0-rx3/../smp_affinity
> 
> 
> cat /proc/irq/*/eth0-rx0/../smp_affinity
> cat /proc/irq/*/eth0-rx1/../smp_affinity
> cat /proc/irq/*/eth0-rx2/../smp_affinity
> cat /proc/irq/*/eth0-rx3/../smp_affinity
 
The last test were made already concerning such rx queue binding:
# cat /proc/irq/60/smp_affinity
001000
# cat /proc/irq/61/smp_affinity
010000
# cat /proc/irq/62/smp_affinity
080000
# cat /proc/irq/63/smp_affinity
800000

Now ksoftirqd eats not only one processor but all oness where I assigned the IRQs.

> > On the old ones:
> >            CPU0       CPU1       CPU2  ...      CPU8
> > 502:  522320256  522384039  522327386  ... 522380267 PCI-MSI-edge eth0
> > 
> 
> What network driver is it (newbox), was it (old box) ?

newbox:
01:00.0 Ethernet controller: Intel Corporation 82575EB Gigabit Network
Connection (rev 02)
driver: igb
version: 1.3.16-k2
firmware-version: 2.1-0
bus-info: 0000:01:00.0

oldbox:
05:00.0 Ethernet controller: Intel Corporation 80003ES2LAN Gigabit
Ethernet Controller (Copper) (rev 01)
driver: e1000e
version: 0.3.3.3-k6
firmware-version: 1.0-0
bus-info: 0000:05:00.0

-- 
BRGDS. Alexey Vlasov.

^ permalink raw reply

* Re: how to use secure_tcp_sequence_number
From: Nicola Padovano @ 2010-09-30 17:01 UTC (permalink / raw)
  To: netfilter-devel, netdev
In-Reply-To: <AANLkTimA6jJbCvtxFi41JbH_XSJtKo0FrDE=fQ2OiafY@mail.gmail.com>

If i attempt to insert the module that uses the
secure_tcp_sequence_number i get this error message:

from insmod:  -1 Invalid module format
and from dmesg: version magic '2.6.35.4 SMP mod_unload 586 ' should be
'2.6.35.4 SMP mod_unload modversions 586 '

what's the matter?

On Wed, Sep 29, 2010 at 6:46 PM, Nicola Padovano
<nicola.padovano@gmail.com> wrote:
> Hi all. How can I export the secure_tcp_sequence_number to use it in my modules?
>
> --
> Nicola Padovano
> e-mail: nicola.padovano@gmail.com
> web: http://npadovano.altervista.org
>
> "My only ambition is not be anything at all; it seems the most
> sensible thing" (C. Bukowski)
>



-- 
Nicola Padovano
e-mail: nicola.padovano@gmail.com
web: http://npadovano.altervista.org

"My only ambition is not be anything at all; it seems the most
sensible thing" (C. Bukowski)

^ permalink raw reply

* [net-next-2.6 PATCH 3/3] bonding: reread information about speed and duplex when interface goes up
From: Krzysztof Piotr Oledzki @ 2010-09-30 16:19 UTC (permalink / raw)
  To: fubar, bonding-devel, netdev

>From 43285224a785e90c7d4cff2be0766ca8df6ddfb9 Mon Sep 17 00:00:00 2001
From: Krzysztof Piotr Oledzki <ole@ans.pl>
Date: Thu, 30 Sep 2010 17:09:02 +0200
Subject: bonding: reread information about speed and duplex when interface goes up

When an interface was enslaved when it was down, bonding thinks
it has speed -1 even after it goes up. This leads into selecting
a wrong active interface in active/backup mode on mixed 10G/1G or
1G/100M environment.

before:
 bonding: bond0: link status definitely up for interface eth5, 100 Mbps full duplex.
 bonding: bond0: link status definitely up for interface eth0, 100 Mbps full duplex.

after:
 bonding: bond0: link status definitely up for interface eth5, 10000 Mbps full duplex.
 bonding: bond0: link status definitely up for interface eth0, 1000 Mbps full duplex.

Signed-off-by: Krzysztof Piotr Oledzki <ole@ans.pl>

---
 drivers/net/bonding/bond_main.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 721abc4..e409c14 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2368,6 +2368,8 @@ static void bond_miimon_commit(struct bonding *bond)
 				slave->state = BOND_STATE_BACKUP;
 			}

+			bond_update_speed_duplex(slave);
+
 			pr_info("%s: link status definitely up for interface %s, %d Mbps %s duplex.\n",
 				bond->dev->name, slave->dev->name,
 				slave->speed, slave->duplex ? "full" : "half");
-- 
1.7.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox