Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 2/6] tg3: Consilidate MAC loopback code
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

The driver puts the device into MAC loopback in two places in the
driver.  This patch consolidates the code into a single routine.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |   91 +++++++++++++++++++---------------
 1 files changed, 51 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 756e2bb..4529095 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -94,7 +94,6 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
 #define DRV_MODULE_RELDATE	"May 18, 2011"
 
-#define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
 #define TG3_DEF_TX_MODE		0
 #define TG3_DEF_MSG_ENABLE	  \
@@ -6343,6 +6342,34 @@ dma_error:
 	return NETDEV_TX_OK;
 }
 
+static void tg3_mac_loopback(struct tg3 *tp, bool enable)
+{
+	if (enable) {
+		tp->mac_mode &= ~(MAC_MODE_HALF_DUPLEX |
+				  MAC_MODE_PORT_MODE_MASK);
+
+		tp->mac_mode |= MAC_MODE_PORT_INT_LPBACK;
+
+		if (!tg3_flag(tp, 5705_PLUS))
+			tp->mac_mode |= MAC_MODE_LINK_POLARITY;
+
+		if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
+			tp->mac_mode |= MAC_MODE_PORT_MODE_MII;
+		else
+			tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
+	} else {
+		tp->mac_mode &= ~MAC_MODE_PORT_INT_LPBACK;
+
+		if (tg3_flag(tp, 5705_PLUS) ||
+		    (tp->phy_flags & TG3_PHYFLG_PHY_SERDES) ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700)
+			tp->mac_mode &= ~MAC_MODE_LINK_POLARITY;
+	}
+
+	tw32(MAC_MODE, tp->mac_mode);
+	udelay(40);
+}
+
 static void tg3_set_loopback(struct net_device *dev, u32 features)
 {
 	struct tg3 *tp = netdev_priv(dev);
@@ -6351,16 +6378,8 @@ static void tg3_set_loopback(struct net_device *dev, u32 features)
 		if (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK)
 			return;
 
-		/*
-		 * Clear MAC_MODE_HALF_DUPLEX or you won't get packets back in
-		 * loopback mode if Half-Duplex mode was negotiated earlier.
-		 */
-		tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX;
-
-		/* Enable internal MAC loopback mode */
-		tp->mac_mode |= MAC_MODE_PORT_INT_LPBACK;
 		spin_lock_bh(&tp->lock);
-		tw32(MAC_MODE, tp->mac_mode);
+		tg3_mac_loopback(tp, true);
 		netif_carrier_on(tp->dev);
 		spin_unlock_bh(&tp->lock);
 		netdev_info(dev, "Internal MAC loopback mode enabled.\n");
@@ -6368,10 +6387,8 @@ static void tg3_set_loopback(struct net_device *dev, u32 features)
 		if (!(tp->mac_mode & MAC_MODE_PORT_INT_LPBACK))
 			return;
 
-		/* Disable internal MAC loopback mode */
-		tp->mac_mode &= ~MAC_MODE_PORT_INT_LPBACK;
 		spin_lock_bh(&tp->lock);
-		tw32(MAC_MODE, tp->mac_mode);
+		tg3_mac_loopback(tp, false);
 		/* Force link status check */
 		tg3_setup_phy(tp, 1);
 		spin_unlock_bh(&tp->lock);
@@ -11269,27 +11286,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 	}
 	coal_now = tnapi->coal_now | rnapi->coal_now;
 
-	if (loopback_mode == TG3_MAC_LOOPBACK) {
-		/* HW errata - mac loopback fails in some cases on 5780.
-		 * Normal traffic and PHY loopback are not affected by
-		 * errata.  Also, the MAC loopback test is deprecated for
-		 * all newer ASIC revisions.
-		 */
-		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5780 ||
-		    tg3_flag(tp, CPMU_PRESENT))
-			return 0;
-
-		mac_mode = tp->mac_mode &
-			   ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
-		mac_mode |= MAC_MODE_PORT_INT_LPBACK;
-		if (!tg3_flag(tp, 5705_PLUS))
-			mac_mode |= MAC_MODE_LINK_POLARITY;
-		if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
-			mac_mode |= MAC_MODE_PORT_MODE_MII;
-		else
-			mac_mode |= MAC_MODE_PORT_MODE_GMII;
-		tw32(MAC_MODE, mac_mode);
-	} else {
+	if (loopback_mode != TG3_MAC_LOOPBACK) {
 		if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
 			tg3_phy_fet_toggle_apd(tp, false);
 			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED100;
@@ -11554,12 +11551,26 @@ static int tg3_test_loopback(struct tg3 *tp)
 	if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
 		tg3_phy_toggle_apd(tp, false);
 
-	if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_MAC_LOOPBACK))
-		err |= TG3_STD_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
+	/* HW errata - mac loopback fails in some cases on 5780.
+	 * Normal traffic and PHY loopback are not affected by
+	 * errata.  Also, the MAC loopback test is deprecated for
+	 * all newer ASIC revisions.
+	 */
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5780 &&
+	    !tg3_flag(tp, CPMU_PRESENT)) {
+		tg3_mac_loopback(tp, true);
 
-	if (tg3_flag(tp, JUMBO_RING_ENABLE) &&
-	    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_MAC_LOOPBACK))
-		err |= TG3_JMB_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
+		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_MAC_LOOPBACK))
+			err |= TG3_STD_LOOPBACK_FAILED <<
+			       TG3_MAC_LOOPBACK_SHIFT;
+
+		if (tg3_flag(tp, JUMBO_RING_ENABLE) &&
+		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_MAC_LOOPBACK))
+			err |= TG3_JMB_LOOPBACK_FAILED <<
+			       TG3_MAC_LOOPBACK_SHIFT;
+
+		tg3_mac_loopback(tp, false);
+	}
 
 	if (!(tp->phy_flags & TG3_PHYFLG_PHY_SERDES) &&
 	    !tg3_flag(tp, USE_PHYLIB)) {
@@ -14335,7 +14346,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	if (tg3_flag(tp, ENABLE_APE))
 		tp->mac_mode = MAC_MODE_APE_TX_EN | MAC_MODE_APE_RX_EN;
 	else
-		tp->mac_mode = TG3_DEF_MAC_MODE;
+		tp->mac_mode = 0;
 
 	/* these are limited to 10/100 only */
 	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 4/6] tg3: Restructure tg3_test_loopback
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

The tg3_test_loopback() function is starting to get more complicated as
more loopback tests are added.  This patch cleans up the code.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |   80 +++++++++++++++++-----------------
 1 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 5e9d8a0..6827b4f 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -390,12 +390,13 @@ static const struct {
 static const struct {
 	const char string[ETH_GSTRING_LEN];
 } ethtool_test_keys[] = {
-	{ "nvram test     (online) " },
-	{ "link test      (online) " },
-	{ "register test  (offline)" },
-	{ "memory test    (offline)" },
-	{ "loopback test  (offline)" },
-	{ "interrupt test (offline)" },
+	{ "nvram test        (online) " },
+	{ "link test         (online) " },
+	{ "register test     (offline)" },
+	{ "memory test       (offline)" },
+	{ "mac loopback test (offline)" },
+	{ "phy loopback test (offline)" },
+	{ "interrupt test    (offline)" },
 };
 
 #define TG3_NUM_TEST	ARRAY_SIZE(ethtool_test_keys)
@@ -11310,10 +11311,6 @@ static int tg3_test_memory(struct tg3 *tp)
 	return err;
 }
 
-#define TG3_MAC_LOOPBACK	0
-#define TG3_PHY_LOOPBACK	1
-#define TG3_TSO_LOOPBACK	2
-
 #define TG3_TSO_MSS		500
 
 #define TG3_TSO_IP_HDR_LEN	20
@@ -11337,7 +11334,7 @@ static const u8 tg3_tso_header[] = {
 0x11, 0x11, 0x11, 0x11,
 };
 
-static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
+static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, bool tso_loopback)
 {
 	u32 rx_start_idx, rx_idx, tx_idx, opaque_key;
 	u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
@@ -11373,7 +11370,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 
 	tw32(MAC_RX_MTU_SIZE, tx_len + ETH_FCS_LEN);
 
-	if (loopback_mode == TG3_TSO_LOOPBACK) {
+	if (tso_loopback) {
 		struct iphdr *iph = (struct iphdr *)&tx_data[ETH_HLEN];
 
 		u32 hdr_len = TG3_TSO_IP_HDR_LEN + TG3_TSO_TCP_HDR_LEN +
@@ -11493,7 +11490,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 		rx_len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT)
 			 - ETH_FCS_LEN;
 
-		if (loopback_mode != TG3_TSO_LOOPBACK) {
+		if (!tso_loopback) {
 			if (rx_len != tx_len)
 				goto out;
 
@@ -11540,25 +11537,29 @@ out:
 #define TG3_STD_LOOPBACK_FAILED		1
 #define TG3_JMB_LOOPBACK_FAILED		2
 #define TG3_TSO_LOOPBACK_FAILED		4
+#define TG3_LOOPBACK_FAILED \
+	(TG3_STD_LOOPBACK_FAILED | \
+	 TG3_JMB_LOOPBACK_FAILED | \
+	 TG3_TSO_LOOPBACK_FAILED)
 
-#define TG3_MAC_LOOPBACK_SHIFT		0
-#define TG3_PHY_LOOPBACK_SHIFT		4
-#define TG3_LOOPBACK_FAILED		0x00000077
-
-static int tg3_test_loopback(struct tg3 *tp)
+static int tg3_test_loopback(struct tg3 *tp, u64 *data)
 {
-	int err = 0;
+	int err = -EIO;
 	u32 eee_cap;
 
-	if (!netif_running(tp->dev))
-		return TG3_LOOPBACK_FAILED;
-
 	eee_cap = tp->phy_flags & TG3_PHYFLG_EEE_CAP;
 	tp->phy_flags &= ~TG3_PHYFLG_EEE_CAP;
 
+	if (!netif_running(tp->dev)) {
+		data[0] = TG3_LOOPBACK_FAILED;
+		data[1] = TG3_LOOPBACK_FAILED;
+		goto done;
+	}
+
 	err = tg3_reset_hw(tp, 1);
 	if (err) {
-		err = TG3_LOOPBACK_FAILED;
+		data[0] = TG3_LOOPBACK_FAILED;
+		data[1] = TG3_LOOPBACK_FAILED;
 		goto done;
 	}
 
@@ -11580,14 +11581,12 @@ static int tg3_test_loopback(struct tg3 *tp)
 	    !tg3_flag(tp, CPMU_PRESENT)) {
 		tg3_mac_loopback(tp, true);
 
-		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_MAC_LOOPBACK))
-			err |= TG3_STD_LOOPBACK_FAILED <<
-			       TG3_MAC_LOOPBACK_SHIFT;
+		if (tg3_run_loopback(tp, ETH_FRAME_LEN, false))
+			data[0] |= TG3_STD_LOOPBACK_FAILED;
 
 		if (tg3_flag(tp, JUMBO_RING_ENABLE) &&
-		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_MAC_LOOPBACK))
-			err |= TG3_JMB_LOOPBACK_FAILED <<
-			       TG3_MAC_LOOPBACK_SHIFT;
+		    tg3_run_loopback(tp, 9000 + ETH_HLEN, false))
+			data[0] |= TG3_JMB_LOOPBACK_FAILED;
 
 		tg3_mac_loopback(tp, false);
 	}
@@ -11605,23 +11604,22 @@ static int tg3_test_loopback(struct tg3 *tp)
 			mdelay(1);
 		}
 
-		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
-			err |= TG3_STD_LOOPBACK_FAILED <<
-			       TG3_PHY_LOOPBACK_SHIFT;
+		if (tg3_run_loopback(tp, ETH_FRAME_LEN, false))
+			data[1] |= TG3_STD_LOOPBACK_FAILED;
 		if (tg3_flag(tp, TSO_CAPABLE) &&
-		    tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_TSO_LOOPBACK))
-			err |= TG3_TSO_LOOPBACK_FAILED <<
-			       TG3_PHY_LOOPBACK_SHIFT;
+		    tg3_run_loopback(tp, ETH_FRAME_LEN, true))
+			data[1] |= TG3_TSO_LOOPBACK_FAILED;
 		if (tg3_flag(tp, JUMBO_RING_ENABLE) &&
-		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_PHY_LOOPBACK))
-			err |= TG3_JMB_LOOPBACK_FAILED <<
-			       TG3_PHY_LOOPBACK_SHIFT;
+		    tg3_run_loopback(tp, 9000 + ETH_HLEN, false))
+			data[1] |= TG3_JMB_LOOPBACK_FAILED;
 
 		/* Re-enable gphy autopowerdown. */
 		if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
 			tg3_phy_toggle_apd(tp, true);
 	}
 
+	err = (data[0] | data[1]) ? -EIO : 0;
+
 done:
 	tp->phy_flags |= eee_cap;
 
@@ -11676,18 +11674,20 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 			etest->flags |= ETH_TEST_FL_FAILED;
 			data[2] = 1;
 		}
+
 		if (tg3_test_memory(tp) != 0) {
 			etest->flags |= ETH_TEST_FL_FAILED;
 			data[3] = 1;
 		}
-		if ((data[4] = tg3_test_loopback(tp)) != 0)
+
+		if (tg3_test_loopback(tp, &data[4]))
 			etest->flags |= ETH_TEST_FL_FAILED;
 
 		tg3_full_unlock(tp);
 
 		if (tg3_test_interrupt(tp) != 0) {
 			etest->flags |= ETH_TEST_FL_FAILED;
-			data[5] = 1;
+			data[6] = 1;
 		}
 
 		tg3_full_lock(tp, 0);
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 5/6] tg3: Add external loopback support to selftest
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch adds external loopback support to tg3's ethtool selftest.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |   97 +++++++++++++++++++++++++++++++----
 drivers/net/ethernet/broadcom/tg3.h |    3 +
 2 files changed, 90 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 6827b4f..eb3b5ce 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -396,6 +396,7 @@ static const struct {
 	{ "memory test       (offline)" },
 	{ "mac loopback test (offline)" },
 	{ "phy loopback test (offline)" },
+	{ "ext loopback test (offline)" },
 	{ "interrupt test    (offline)" },
 };
 
@@ -1680,6 +1681,36 @@ static void tg3_phy_fini(struct tg3 *tp)
 	}
 }
 
+static int tg3_phy_set_extloopbk(struct tg3 *tp)
+{
+	int err;
+	u32 val;
+
+	if (tp->phy_flags & TG3_PHYFLG_IS_FET)
+		return 0;
+
+	if ((tp->phy_id & TG3_PHY_ID_MASK) == TG3_PHY_ID_BCM5401) {
+		/* Cannot do read-modify-write on 5401 */
+		err = tg3_phy_auxctl_write(tp,
+					   MII_TG3_AUXCTL_SHDWSEL_AUXCTL,
+					   MII_TG3_AUXCTL_ACTL_EXTLOOPBK |
+					   0x4c20);
+		goto done;
+	}
+
+	err = tg3_phy_auxctl_read(tp,
+				  MII_TG3_AUXCTL_SHDWSEL_AUXCTL, &val);
+	if (err)
+		return err;
+
+	val |= MII_TG3_AUXCTL_ACTL_EXTLOOPBK;
+	err = tg3_phy_auxctl_write(tp,
+				   MII_TG3_AUXCTL_SHDWSEL_AUXCTL, val);
+
+done:
+	return err;
+}
+
 static void tg3_phy_fet_toggle_apd(struct tg3 *tp, bool enable)
 {
 	u32 phytest;
@@ -6371,14 +6402,17 @@ static void tg3_mac_loopback(struct tg3 *tp, bool enable)
 	udelay(40);
 }
 
-static void tg3_phy_lpbk_set(struct tg3 *tp, u32 speed)
+static int tg3_phy_lpbk_set(struct tg3 *tp, u32 speed, bool extlpbk)
 {
-	u32 val, bmcr, mac_mode;
+	u32 val, bmcr, mac_mode, ptest = 0;
 
 	tg3_phy_toggle_apd(tp, false);
 	tg3_phy_toggle_automdix(tp, 0);
 
-	bmcr = BMCR_LOOPBACK | BMCR_FULLDPLX;
+	if (extlpbk && tg3_phy_set_extloopbk(tp))
+		return -EIO;
+
+	bmcr = BMCR_FULLDPLX;
 	switch (speed) {
 	case SPEED_10:
 		break;
@@ -6396,6 +6430,20 @@ static void tg3_phy_lpbk_set(struct tg3 *tp, u32 speed)
 		}
 	}
 
+	if (extlpbk) {
+		if (!(tp->phy_flags & TG3_PHYFLG_IS_FET)) {
+			tg3_readphy(tp, MII_CTRL1000, &val);
+			val |= CTL1000_AS_MASTER |
+			       CTL1000_ENABLE_MASTER;
+			tg3_writephy(tp, MII_CTRL1000, val);
+		} else {
+			ptest = MII_TG3_FET_PTEST_TRIM_SEL |
+				MII_TG3_FET_PTEST_TRIM_2;
+			tg3_writephy(tp, MII_TG3_FET_PTEST, ptest);
+		}
+	} else
+		bmcr |= BMCR_LOOPBACK;
+
 	tg3_writephy(tp, MII_BMCR, bmcr);
 
 	/* The write needs to be flushed for the FETs */
@@ -6406,7 +6454,7 @@ static void tg3_phy_lpbk_set(struct tg3 *tp, u32 speed)
 
 	if ((tp->phy_flags & TG3_PHYFLG_IS_FET) &&
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) {
-		tg3_writephy(tp, MII_TG3_FET_PTEST,
+		tg3_writephy(tp, MII_TG3_FET_PTEST, ptest |
 			     MII_TG3_FET_PTEST_FRC_TX_LINK |
 			     MII_TG3_FET_PTEST_FRC_TX_LOCK);
 
@@ -6443,6 +6491,8 @@ static void tg3_phy_lpbk_set(struct tg3 *tp, u32 speed)
 
 	tw32(MAC_MODE, mac_mode);
 	udelay(40);
+
+	return 0;
 }
 
 static void tg3_set_loopback(struct net_device *dev, u32 features)
@@ -11542,7 +11592,7 @@ out:
 	 TG3_JMB_LOOPBACK_FAILED | \
 	 TG3_TSO_LOOPBACK_FAILED)
 
-static int tg3_test_loopback(struct tg3 *tp, u64 *data)
+static int tg3_test_loopback(struct tg3 *tp, u64 *data, bool do_extlpbk)
 {
 	int err = -EIO;
 	u32 eee_cap;
@@ -11553,6 +11603,8 @@ static int tg3_test_loopback(struct tg3 *tp, u64 *data)
 	if (!netif_running(tp->dev)) {
 		data[0] = TG3_LOOPBACK_FAILED;
 		data[1] = TG3_LOOPBACK_FAILED;
+		if (do_extlpbk)
+			data[2] = TG3_LOOPBACK_FAILED;
 		goto done;
 	}
 
@@ -11560,6 +11612,8 @@ static int tg3_test_loopback(struct tg3 *tp, u64 *data)
 	if (err) {
 		data[0] = TG3_LOOPBACK_FAILED;
 		data[1] = TG3_LOOPBACK_FAILED;
+		if (do_extlpbk)
+			data[2] = TG3_LOOPBACK_FAILED;
 		goto done;
 	}
 
@@ -11595,7 +11649,7 @@ static int tg3_test_loopback(struct tg3 *tp, u64 *data)
 	    !tg3_flag(tp, USE_PHYLIB)) {
 		int i;
 
-		tg3_phy_lpbk_set(tp, 0);
+		tg3_phy_lpbk_set(tp, 0, false);
 
 		/* Wait for link */
 		for (i = 0; i < 100; i++) {
@@ -11613,12 +11667,31 @@ static int tg3_test_loopback(struct tg3 *tp, u64 *data)
 		    tg3_run_loopback(tp, 9000 + ETH_HLEN, false))
 			data[1] |= TG3_JMB_LOOPBACK_FAILED;
 
+		if (do_extlpbk) {
+			tg3_phy_lpbk_set(tp, 0, true);
+
+			/* All link indications report up, but the hardware
+			 * isn't really ready for about 20 msec.  Double it
+			 * to be sure.
+			 */
+			mdelay(40);
+
+			if (tg3_run_loopback(tp, ETH_FRAME_LEN, false))
+				data[2] |= TG3_STD_LOOPBACK_FAILED;
+			if (tg3_flag(tp, TSO_CAPABLE) &&
+			    tg3_run_loopback(tp, ETH_FRAME_LEN, true))
+				data[2] |= TG3_TSO_LOOPBACK_FAILED;
+			if (tg3_flag(tp, JUMBO_RING_ENABLE) &&
+			    tg3_run_loopback(tp, 9000 + ETH_HLEN, false))
+				data[2] |= TG3_JMB_LOOPBACK_FAILED;
+		}
+
 		/* Re-enable gphy autopowerdown. */
 		if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
 			tg3_phy_toggle_apd(tp, true);
 	}
 
-	err = (data[0] | data[1]) ? -EIO : 0;
+	err = (data[0] | data[1] | data[2]) ? -EIO : 0;
 
 done:
 	tp->phy_flags |= eee_cap;
@@ -11630,6 +11703,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 			  u64 *data)
 {
 	struct tg3 *tp = netdev_priv(dev);
+	bool doextlpbk = etest->flags & ETH_TEST_FL_EXTERNAL_LB;
 
 	if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) &&
 	    tg3_power_up(tp)) {
@@ -11644,7 +11718,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 		etest->flags |= ETH_TEST_FL_FAILED;
 		data[0] = 1;
 	}
-	if (tg3_test_link(tp) != 0) {
+	if (!doextlpbk && tg3_test_link(tp)) {
 		etest->flags |= ETH_TEST_FL_FAILED;
 		data[1] = 1;
 	}
@@ -11680,14 +11754,17 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 			data[3] = 1;
 		}
 
-		if (tg3_test_loopback(tp, &data[4]))
+		if (doextlpbk)
+			etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE;
+
+		if (tg3_test_loopback(tp, &data[4], doextlpbk))
 			etest->flags |= ETH_TEST_FL_FAILED;
 
 		tg3_full_unlock(tp);
 
 		if (tg3_test_interrupt(tp) != 0) {
 			etest->flags |= ETH_TEST_FL_FAILED;
-			data[6] = 1;
+			data[7] = 1;
 		}
 
 		tg3_full_lock(tp, 0);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 2ea456d..d2976f3 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2197,6 +2197,7 @@
 #define MII_TG3_AUXCTL_ACTL_TX_6DB	0x0400
 #define MII_TG3_AUXCTL_ACTL_SMDSP_ENA	0x0800
 #define MII_TG3_AUXCTL_ACTL_EXTPKTLEN	0x4000
+#define MII_TG3_AUXCTL_ACTL_EXTLOOPBK	0x8000
 
 #define MII_TG3_AUXCTL_SHDWSEL_PWRCTL	0x0002
 #define MII_TG3_AUXCTL_PCTL_WOL_EN	0x0008
@@ -2262,6 +2263,8 @@
 
 /* Fast Ethernet Tranceiver definitions */
 #define MII_TG3_FET_PTEST		0x17
+#define  MII_TG3_FET_PTEST_TRIM_SEL	0x0010
+#define  MII_TG3_FET_PTEST_TRIM_2	0x0002
 #define  MII_TG3_FET_PTEST_FRC_TX_LINK	0x1000
 #define  MII_TG3_FET_PTEST_FRC_TX_LOCK	0x0800
 
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 3/6] tg3: Pull phy int lpbk setup into separate func
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch pulls out the internal phy loopback setup code into a
separate function.  This cleans up the loopback test code and makes it
available for NETIF_F_LOOPBACK support later.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |  149 +++++++++++++++++++++--------------
 1 files changed, 90 insertions(+), 59 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 4529095..5e9d8a0 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6370,6 +6370,80 @@ static void tg3_mac_loopback(struct tg3 *tp, bool enable)
 	udelay(40);
 }
 
+static void tg3_phy_lpbk_set(struct tg3 *tp, u32 speed)
+{
+	u32 val, bmcr, mac_mode;
+
+	tg3_phy_toggle_apd(tp, false);
+	tg3_phy_toggle_automdix(tp, 0);
+
+	bmcr = BMCR_LOOPBACK | BMCR_FULLDPLX;
+	switch (speed) {
+	case SPEED_10:
+		break;
+	case SPEED_100:
+		bmcr |= BMCR_SPEED100;
+		break;
+	case SPEED_1000:
+	default:
+		if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
+			speed = SPEED_100;
+			bmcr |= BMCR_SPEED100;
+		} else {
+			speed = SPEED_1000;
+			bmcr |= BMCR_SPEED1000;
+		}
+	}
+
+	tg3_writephy(tp, MII_BMCR, bmcr);
+
+	/* The write needs to be flushed for the FETs */
+	if (tp->phy_flags & TG3_PHYFLG_IS_FET)
+		tg3_readphy(tp, MII_BMCR, &bmcr);
+
+	udelay(40);
+
+	if ((tp->phy_flags & TG3_PHYFLG_IS_FET) &&
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) {
+		tg3_writephy(tp, MII_TG3_FET_PTEST,
+			     MII_TG3_FET_PTEST_FRC_TX_LINK |
+			     MII_TG3_FET_PTEST_FRC_TX_LOCK);
+
+		/* The write needs to be flushed for the AC131 */
+		tg3_readphy(tp, MII_TG3_FET_PTEST, &val);
+	}
+
+	/* Reset to prevent losing 1st rx packet intermittently */
+	if ((tp->phy_flags & TG3_PHYFLG_MII_SERDES) &&
+	    tg3_flag(tp, 5780_CLASS)) {
+		tw32_f(MAC_RX_MODE, RX_MODE_RESET);
+		udelay(10);
+		tw32_f(MAC_RX_MODE, tp->rx_mode);
+	}
+
+	mac_mode = tp->mac_mode &
+		   ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
+	if (speed == SPEED_1000)
+		mac_mode |= MAC_MODE_PORT_MODE_GMII;
+	else
+		mac_mode |= MAC_MODE_PORT_MODE_MII;
+
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
+		u32 masked_phy_id = tp->phy_id & TG3_PHY_ID_MASK;
+
+		if (masked_phy_id == TG3_PHY_ID_BCM5401)
+			mac_mode &= ~MAC_MODE_LINK_POLARITY;
+		else if (masked_phy_id == TG3_PHY_ID_BCM5411)
+			mac_mode |= MAC_MODE_LINK_POLARITY;
+
+		tg3_writephy(tp, MII_TG3_EXT_CTRL,
+			     MII_TG3_EXT_CTRL_LNK3_LED_MODE);
+	}
+
+	tw32(MAC_MODE, mac_mode);
+	udelay(40);
+}
+
 static void tg3_set_loopback(struct net_device *dev, u32 features)
 {
 	struct tg3 *tp = netdev_priv(dev);
@@ -11265,7 +11339,7 @@ static const u8 tg3_tso_header[] = {
 
 static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 {
-	u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key;
+	u32 rx_start_idx, rx_idx, tx_idx, opaque_key;
 	u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val;
 	u32 budget;
 	struct sk_buff *skb, *rx_skb;
@@ -11286,56 +11360,6 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode)
 	}
 	coal_now = tnapi->coal_now | rnapi->coal_now;
 
-	if (loopback_mode != TG3_MAC_LOOPBACK) {
-		if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
-			tg3_phy_fet_toggle_apd(tp, false);
-			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED100;
-		} else
-			val = BMCR_LOOPBACK | BMCR_FULLDPLX | BMCR_SPEED1000;
-
-		tg3_phy_toggle_automdix(tp, 0);
-
-		tg3_writephy(tp, MII_BMCR, val);
-		udelay(40);
-
-		mac_mode = tp->mac_mode &
-			   ~(MAC_MODE_PORT_MODE_MASK | MAC_MODE_HALF_DUPLEX);
-		if (tp->phy_flags & TG3_PHYFLG_IS_FET) {
-			tg3_writephy(tp, MII_TG3_FET_PTEST,
-				     MII_TG3_FET_PTEST_FRC_TX_LINK |
-				     MII_TG3_FET_PTEST_FRC_TX_LOCK);
-			/* The write needs to be flushed for the AC131 */
-			if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785)
-				tg3_readphy(tp, MII_TG3_FET_PTEST, &val);
-			mac_mode |= MAC_MODE_PORT_MODE_MII;
-		} else
-			mac_mode |= MAC_MODE_PORT_MODE_GMII;
-
-		/* reset to prevent losing 1st rx packet intermittently */
-		if (tp->phy_flags & TG3_PHYFLG_MII_SERDES) {
-			tw32_f(MAC_RX_MODE, RX_MODE_RESET);
-			udelay(10);
-			tw32_f(MAC_RX_MODE, tp->rx_mode);
-		}
-		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700) {
-			u32 masked_phy_id = tp->phy_id & TG3_PHY_ID_MASK;
-			if (masked_phy_id == TG3_PHY_ID_BCM5401)
-				mac_mode &= ~MAC_MODE_LINK_POLARITY;
-			else if (masked_phy_id == TG3_PHY_ID_BCM5411)
-				mac_mode |= MAC_MODE_LINK_POLARITY;
-			tg3_writephy(tp, MII_TG3_EXT_CTRL,
-				     MII_TG3_EXT_CTRL_LNK3_LED_MODE);
-		}
-		tw32(MAC_MODE, mac_mode);
-
-		/* Wait for link */
-		for (i = 0; i < 100; i++) {
-			if (tr32(MAC_TX_STATUS) & TX_STATUS_LINK_UP)
-				break;
-			mdelay(1);
-		}
-	}
-
 	err = -EIO;
 
 	tx_len = pktsz;
@@ -11547,10 +11571,6 @@ static int tg3_test_loopback(struct tg3 *tp)
 			tw32(i, 0x0);
 	}
 
-	/* Turn off gphy autopowerdown. */
-	if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
-		tg3_phy_toggle_apd(tp, false);
-
 	/* HW errata - mac loopback fails in some cases on 5780.
 	 * Normal traffic and PHY loopback are not affected by
 	 * errata.  Also, the MAC loopback test is deprecated for
@@ -11574,6 +11594,17 @@ static int tg3_test_loopback(struct tg3 *tp)
 
 	if (!(tp->phy_flags & TG3_PHYFLG_PHY_SERDES) &&
 	    !tg3_flag(tp, USE_PHYLIB)) {
+		int i;
+
+		tg3_phy_lpbk_set(tp, 0);
+
+		/* Wait for link */
+		for (i = 0; i < 100; i++) {
+			if (tr32(MAC_TX_STATUS) & TX_STATUS_LINK_UP)
+				break;
+			mdelay(1);
+		}
+
 		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
 			err |= TG3_STD_LOOPBACK_FAILED <<
 			       TG3_PHY_LOOPBACK_SHIFT;
@@ -11585,11 +11616,11 @@ static int tg3_test_loopback(struct tg3 *tp)
 		    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_PHY_LOOPBACK))
 			err |= TG3_JMB_LOOPBACK_FAILED <<
 			       TG3_PHY_LOOPBACK_SHIFT;
-	}
 
-	/* Re-enable gphy autopowerdown. */
-	if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
-		tg3_phy_toggle_apd(tp, true);
+		/* Re-enable gphy autopowerdown. */
+		if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
+			tg3_phy_toggle_apd(tp, true);
+	}
 
 done:
 	tp->phy_flags |= eee_cap;
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 6/6] tg3: Update version to 3.120
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch updates the tg3 version to 3.120.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index eb3b5ce..b3251dc 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -89,10 +89,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			119
+#define TG3_MIN_NUM			120
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"May 18, 2011"
+#define DRV_MODULE_RELDATE	"August 18, 2011"
 
 #define TG3_DEF_RX_MODE		0
 #define TG3_DEF_TX_MODE		0
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 1/6] tg3: Remove dead code
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

Now that CPMU devices don't do MAC loopback, all the CPMU power saving
mode adjustments are unneeded.  This patch removes the dead code.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |   35 +----------------------------------
 1 files changed, 1 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index dc3fbf6..756e2bb 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -11527,7 +11527,7 @@ out:
 static int tg3_test_loopback(struct tg3 *tp)
 {
 	int err = 0;
-	u32 eee_cap, cpmuctrl = 0;
+	u32 eee_cap;
 
 	if (!netif_running(tp->dev))
 		return TG3_LOOPBACK_FAILED;
@@ -11554,32 +11554,6 @@ static int tg3_test_loopback(struct tg3 *tp)
 	if (tp->phy_flags & TG3_PHYFLG_ENABLE_APD)
 		tg3_phy_toggle_apd(tp, false);
 
-	if (tg3_flag(tp, CPMU_PRESENT)) {
-		int i;
-		u32 status;
-
-		tw32(TG3_CPMU_MUTEX_REQ, CPMU_MUTEX_REQ_DRIVER);
-
-		/* Wait for up to 40 microseconds to acquire lock. */
-		for (i = 0; i < 4; i++) {
-			status = tr32(TG3_CPMU_MUTEX_GNT);
-			if (status == CPMU_MUTEX_GNT_DRIVER)
-				break;
-			udelay(10);
-		}
-
-		if (status != CPMU_MUTEX_GNT_DRIVER) {
-			err = TG3_LOOPBACK_FAILED;
-			goto done;
-		}
-
-		/* Turn off link-based power management. */
-		cpmuctrl = tr32(TG3_CPMU_CTRL);
-		tw32(TG3_CPMU_CTRL,
-		     cpmuctrl & ~(CPMU_CTRL_LINK_SPEED_MODE |
-				  CPMU_CTRL_LINK_AWARE_MODE));
-	}
-
 	if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_MAC_LOOPBACK))
 		err |= TG3_STD_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
 
@@ -11587,13 +11561,6 @@ static int tg3_test_loopback(struct tg3 *tp)
 	    tg3_run_loopback(tp, 9000 + ETH_HLEN, TG3_MAC_LOOPBACK))
 		err |= TG3_JMB_LOOPBACK_FAILED << TG3_MAC_LOOPBACK_SHIFT;
 
-	if (tg3_flag(tp, CPMU_PRESENT)) {
-		tw32(TG3_CPMU_CTRL, cpmuctrl);
-
-		/* Release the mutex */
-		tw32(TG3_CPMU_MUTEX_GNT, CPMU_MUTEX_GNT_DRIVER);
-	}
-
 	if (!(tp->phy_flags & TG3_PHYFLG_PHY_SERDES) &&
 	    !tg3_flag(tp, USE_PHYLIB)) {
 		if (tg3_run_loopback(tp, ETH_FRAME_LEN, TG3_PHY_LOOPBACK))
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 0/6] tg3: Add external loopback support to selftest
From: Matt Carlson @ 2011-08-19 23:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patchset adds external loopback support to the driver's selftest code.



^ permalink raw reply

* Re: net: rps: support 802.1Q
From: Ben Hutchings @ 2011-08-20  1:12 UTC (permalink / raw)
  To: Changli Gao; +Cc: David S. Miller, Eric Dumazet, Tom Herbert, netdev
In-Reply-To: <CABa6K_FhG9_XrCfhJDhhUArFEeG5FqOoU_HN1vY-TOKmjMxBKQ@mail.gmail.com>

On Fri, 2011-08-19 at 23:05 +0800, Changli Gao wrote:
> On Fri, Aug 19, 2011 at 7:54 PM, Ben Hutchings
> <bhutchings@solarflare.com> wrote:
> >
> > Should this really be reading an unlimited number of tags?
> 
> Not unlimited, but it won't stop until reaching the end of the packet.

Right, I understand that the parsing is properly range-checked against
the length of the packet.

> >  What if an
> > attacker starts sending packets full of VLAN tags?  Since this runs
> > before netfilter, there would be no way to prevent those packets burning
> > our CPU time.  And if there are legitimately multiple VLAN tags, they
> > presumably won't all have the 802.1q Ethertype.
> >
> 
> Do we need to limit the number of rounds to stop this kind of "bad"
> packets from burning our CPU time?

Well, maybe.  Then again, the most effective way for an attacker to
waste a target's CPU time (aside from application-level vulnerabilities)
will often be just to send minimum size packets.

> Then,  __netif_receive_skb() has to
> be update too, so the inspection of tunnel in __skb_get_rxhash() does.

Yes, if we agree this is something worth defending against then we would
need to be consistent in limiting any such parsing loop in pre-netfilter
processing.

> Is there a such limitation in xfrm?

It appears to be limited to 6 levels of encapsulation.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH v2] Proportional Rate Reduction for TCP.
From: Nandita Dukkipati @ 2011-08-20  1:28 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: David S. Miller, netdev, Tom Herbert, Matt Mathis, Yuchung Cheng
In-Reply-To: <alpine.DEB.2.00.1108191312170.12780@wel-95.cs.helsinki.fi>

Forgot to turn off gmail's rich formatting, so re-sending to the list.

On Fri, Aug 19, 2011 at 3:25 AM, Ilpo Järvinen
<ilpo.jarvinen@helsinki.fi> wrote:
>
> On Fri, 19 Aug 2011, Nandita Dukkipati wrote:
>
> > +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
> > +                                     int fast_rexmit, int flag)
> > +{
> > +     struct tcp_sock *tp = tcp_sk(sk);
> > +     int sndcnt = 0;
> > +     int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
> > +
> > +     if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
> > +             if (WARN_ON(!tp->prior_cwnd))
> > +                     tp->prior_cwnd = 1;
>
> This should still be made larger to avoid problems if it ever will be
> needed.

I am letting the value remain at 1, mainly because this is the valid
lowest non-zero value for snd_cwnd to take on. The main purpose of
this code is to catch any lurking bug outside of PRR which results in
an undesirable divide by 0 in PRR. I would like to fix that bug if I
find this code is executed.

>
> > +             sndcnt = DIV_ROUND_UP((u64)(tp->prr_delivered *
> > +                                         tp->snd_ssthresh),
> > +                                   (u64)tp->prior_cwnd) - tp->prr_out;
>
> I think you should pick one from include/linux/math64.h instead of letting
> gcc to do / operand all by itself. ...Obviosly then the ROUND_UP part
> needs to be done manually (either using the remainder or pre-addition
> like DIV_ROUND_UP does).

Done. patch v3 used div_u64.

Thanks
Nandita

^ permalink raw reply

* Re: [PATCH v2] Proportional Rate Reduction for TCP.
From: Nandita Dukkipati @ 2011-08-20  1:29 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, therbert, mattmathis, ycheng
In-Reply-To: <20110819.032651.405436165608502880.davem@davemloft.net>

On Fri, Aug 19, 2011 at 3:26 AM, David Miller <davem@davemloft.net> wrote:
> From: Nandita Dukkipati <nanditad@google.com>
> Date: Fri, 19 Aug 2011 00:33:32 -0700
>
>> @@ -2830,9 +2830,14 @@ static int tcp_try_undo_loss(struct sock *sk)
>>  static inline void tcp_complete_cwr(struct sock *sk)
>>  {
>>       struct tcp_sock *tp = tcp_sk(sk);
>> -     /* Do not moderate cwnd if it's already undone in cwr or recovery */
>> -     if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
>> -             tp->snd_cwnd = tp->snd_ssthresh;
>> +
>> +     /* Do not moderate cwnd if it's already undone in cwr or recovery. */
>> +     if (tp->undo_marker) {
>> +
>> +             if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
>
> Please get rid of that empty line before the TCP_CA_CWR case.

Done.
>
>> +             sndcnt = DIV_ROUND_UP((u64)(tp->prr_delivered *
>> +                                         tp->snd_ssthresh),
>> +                                   (u64)tp->prior_cwnd) - tp->prr_out;
>
> This won't link on 32-bit unless __divdi3 libgcc routine is provided
> by the architecture.  To portably do 64-bit division you need to use
> do_div() or something based upon it.  Perhaps DIV_ROUND_UP_LL() will
> work best in this case.

patch v3 uses div_u64 which is based on do_div() for 32-bit archs.

Thanks
Nandita

^ permalink raw reply

* [PATCH v3] Proportional Rate Reduction for TCP.
From: Nandita Dukkipati @ 2011-08-20  1:29 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Tom Herbert, Matt Mathis, Yuchung Cheng,
	Nandita Dukkipati
In-Reply-To: <1313739212-2315-1-git-send-email-nanditad@google.com>

This patch implements Proportional Rate Reduction (PRR) for TCP.
PRR is an algorithm that determines TCP's sending rate in fast
recovery. PRR avoids excessive window reductions and aims for
the actual congestion window size at the end of recovery to be as
close as possible to the window determined by the congestion control
algorithm. PRR also improves accuracy of the amount of data sent
during loss recovery.

The patch implements the recommended flavor of PRR called PRR-SSRB
(Proportional rate reduction with slow start reduction bound) and
replaces the existing rate halving algorithm. PRR improves upon the
existing Linux fast recovery under a number of conditions including:
  1) burst losses where the losses implicitly reduce the amount of
outstanding data (pipe) below the ssthresh value selected by the
congestion control algorithm and,
  2) losses near the end of short flows where application runs out of
data to send.

As an example, with the existing rate halving implementation a single
loss event can cause a connection carrying short Web transactions to
go into the slow start mode after the recovery. This is because during
recovery Linux pulls the congestion window down to packets_in_flight+1
on every ACK. A short Web response often runs out of new data to send
and its pipe reduces to zero by the end of recovery when all its packets
are drained from the network. Subsequent HTTP responses using the same
connection will have to slow start to raise cwnd to ssthresh. PRR on
the other hand aims for the cwnd to be as close as possible to ssthresh
by the end of recovery.

A description of PRR and a discussion of its performance can be found at
the following links:
- IETF Draft:
    http://tools.ietf.org/html/draft-mathis-tcpm-proportional-rate-reduction-01
- IETF Slides:
    http://www.ietf.org/proceedings/80/slides/tcpm-6.pdf
    http://tools.ietf.org/agenda/81/slides/tcpm-2.pdf
- Paper to appear in Internet Measurements Conference (IMC) 2011:
    Improving TCP Loss Recovery
    Nandita Dukkipati, Matt Mathis, Yuchung Cheng

Signed-off-by: Nandita Dukkipati <nanditad@google.com>
---
Changelog since v2:
- Used div_u64 in tcp_update_cwnd_in_recovery() to ensure portable 64-bit division. 
- Removed an empty line in tcp_complete_cwr().

Changelog since v1:
- Took care of overflow for large congestion windows in tcp_update_cwnd_in_recovery().
- Renamed prr_cwnd to prior_cwnd.
- Renamed pkts_delivered to newly_acked_sacked.

 include/linux/tcp.h   |    4 +++
 net/ipv4/tcp_input.c  |   61 ++++++++++++++++++++++++++++++++++++++++++++-----
 net/ipv4/tcp_output.c |    7 +++++-
 3 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 531ede8..6b63b31 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -379,6 +379,10 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
+	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
+	u32	prr_delivered;	/* Number of newly delivered packets to
+				 * receiver in Recovery. */
+	u32	prr_out;	/* Total number of pkts sent during Recovery. */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea0d218..40408f1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2830,9 +2830,13 @@ static int tcp_try_undo_loss(struct sock *sk)
 static inline void tcp_complete_cwr(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	/* Do not moderate cwnd if it's already undone in cwr or recovery */
-	if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
-		tp->snd_cwnd = tp->snd_ssthresh;
+
+	/* Do not moderate cwnd if it's already undone in cwr or recovery. */
+	if (tp->undo_marker) {
+		if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
+			tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+		else /* PRR */
+			tp->snd_cwnd = tp->snd_ssthresh;
 		tp->snd_cwnd_stamp = tcp_time_stamp;
 	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2950,6 +2954,41 @@ void tcp_simple_retransmit(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_simple_retransmit);
 
+/* This function implements the PRR algorithm, specifcally the PRR-SSRB
+ * (proportional rate reduction with slow start reduction bound) as described in
+ * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+ * It computes the number of packets to send (sndcnt) based on packets newly
+ * delivered:
+ *   1) If the packets in flight is larger than ssthresh, PRR spreads the
+ *	cwnd reductions across a full RTT.
+ *   2) If packets in flight is lower than ssthresh (such as due to excess
+ *	losses and/or application stalls), do not perform any further cwnd
+ *	reductions, but instead slow start up to ssthresh.
+ */
+static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
+					int fast_rexmit, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int sndcnt = 0;
+	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+
+	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+		u64 dividend = 0;
+		if (WARN_ON(!tp->prior_cwnd))
+			tp->prior_cwnd = 1;
+		dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
+			   tp->prior_cwnd - 1;
+		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
+	} else {
+		sndcnt = min_t(int, delta,
+			       max_t(int, tp->prr_delivered - tp->prr_out,
+				     newly_acked_sacked) + 1);
+	}
+
+	sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+}
+
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
  * taking into account both packets sitting in receiver's buffer and
@@ -2961,7 +3000,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
  * It does _not_ decide what to send, it is made in function
  * tcp_xmit_retransmit_queue().
  */
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
+				  int newly_acked_sacked, int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -3111,13 +3151,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 
 		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
+		tp->prior_cwnd = tp->snd_cwnd;
+		tp->prr_delivered = 0;
+		tp->prr_out = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 		fast_rexmit = 1;
 	}
 
 	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
 		tcp_update_scoreboard(sk, fast_rexmit);
-	tcp_cwnd_down(sk, flag);
+	tp->prr_delivered += newly_acked_sacked;
+	tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
 	tcp_xmit_retransmit_queue(sk);
 }
 
@@ -3632,6 +3676,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 prior_in_flight;
 	u32 prior_fackets;
 	int prior_packets;
+	int prior_sacked = tp->sacked_out;
+	int newly_acked_sacked = 0;
 	int frto_cwnd = 0;
 
 	/* If the ack is older than previous acks
@@ -3703,6 +3749,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	/* See if we can take anything off of the retransmit queue. */
 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
 
+	newly_acked_sacked = (prior_packets - prior_sacked) -
+			     (tp->packets_out - tp->sacked_out);
+
 	if (tp->frto_counter)
 		frto_cwnd = tcp_process_frto(sk, flag);
 	/* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3715,7 +3764,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
-				      flag);
+				      newly_acked_sacked, flag);
 	} else {
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
 			tcp_cong_avoid(sk, ack, prior_in_flight);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0..ca50408 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1796,11 +1796,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		tcp_event_new_data_sent(sk, skb);
 
 		tcp_minshall_update(tp, mss_now, skb);
-		sent_pkts++;
+		sent_pkts += tcp_skb_pcount(skb);
 
 		if (push_one)
 			break;
 	}
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+		tp->prr_out += sent_pkts;
 
 	if (likely(sent_pkts)) {
 		tcp_cwnd_validate(sk);
@@ -2294,6 +2296,9 @@ begin_fwd:
 			return;
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
+		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+			tp->prr_out += tcp_skb_pcount(skb);
+
 		if (skb == tcp_write_queue_head(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
-- 
1.7.3.1


^ permalink raw reply related

* FILL AND SUBMIT THE ACCOUNT VERIFICATION FORM.
From: System Administrator @ 2011-08-20  1:39 UTC (permalink / raw)




Dear account owner,

Click in the link below to re-validate your account: https://
spreadsheets.google.com/spreadsheet/
viewform?formkey=dG1uWkhGcDUyTmJYUUpPZk1sck5xdHc6MQ

Warning!!! All Webmail. Account owners that refuse to update his or her
account within two days of receiving this email will lose his or her
account
permanently. AGB © upc cablecom GmbH 2011. We apologize for any
inconvenience this may have cause you. Thank you for using this Webmail
Account

System Administrator.
Customer Care Unit.
----------------------------------------------------------------
This e-mail has been sent via JARING webmail at http://www.jaring.my


^ permalink raw reply

* Re: [PATCH net-next] ipv4: one more case for non-local saddr in ICMP
From: Herbert Xu @ 2011-08-20  8:20 UTC (permalink / raw)
  To: David Miller; +Cc: ja, netdev
In-Reply-To: <20110819.034354.219449243171613725.davem@davemloft.net>

On Fri, Aug 19, 2011 at 03:43:54AM -0700, David Miller wrote:
> From: Julian Anastasov <ja@ssi.bg>
> Date: Mon, 15 Aug 2011 19:21:23 +0300 (EEST)
> 
> > 
> > 	May be there is one more case that we can avoid using
> > non-local source for ICMP errors: xfrm_lookup, num_xfrms = 0 when
> > reverse "Flow passes untransformed". Avoid using the input route
> > if xfrm_lookup returns same dst.
> > 
> > Signed-off-by: Julian Anastasov <ja@ssi.bg>
> > ---
> > 
> > 	In fact, should we use local IP in all cases when
> > sending ICMP? I'm asking it for the following case:
> > 
> > 	Large packet is forwarded but is rejected with ICMP FRAG
> > NEEDED. We usually send ICMP with local saddr instead of the
> > original non-local destination. What is the role of
> > this reverse check? May be after xfrm_decode_session_reverse
> > we should use 'fl4_dec.saddr = fl4->saddr;' so that xfrm_lookup
> > works with ICMP from local IP? What is right thing to do here?
> > I don't see code that looks in the embedded header...
> 
> Well.. this relookup behavior is guided by a special transform state
> XFRM_STATE_ICMP that the user must explicitly create IPSEC rules for.
> 
> Presumably they are going to add real transforms to such special IPSEC
> rules, not create NOP ones with no transforms.  And if they do create
> such IPSEC state with no transforms, perhaps the intention is to trigger
> to use of the non-local source.
> 
> The whole thing revolves around how Herbert envisions people implementing
> RFC4301 support using this new XFRM_STATE_ICMP thing.
> 
> Right?

The intention of XFRM_STATE_ICMP is to automatically allow inbound
IPsec-protected ICMP packets (remember that IPsec tunnels are not
automatically allowed, as that opens room for address spoofing).

Imagine if you have a policy P that allows IPsec packets with inner
addresses going from S to D.  The purpose of this is to ensure that
ICMP packets from D to S are automatically allowed.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* BUG: rt2x00usb: Vendor Request 0x07 failed
From: Marc Kleine-Budde @ 2011-08-20  8:42 UTC (permalink / raw)
  To: linux-wireless-u79uwXL29TY76Z2rM5mHXA
  Cc: Netdev-u79uwXL29TY76Z2rM5mHXA, Stanislaw Gruszka,
	Gertjan van Wingerde

[-- Attachment #1: Type: text/plain, Size: 5046 bytes --]

Hello,

I'm a running a sheeva plug (ARM/kirkwood) with a rt2800 USB stick in AP mode.
Bus 001 Device 002: ID 1737:0071 Linksys WUSB600N v1 Dual-Band Wireless-N Network Adapter [Ralink RT2870]

kernel is v3.0.3 +
http://www.spinics.net/lists/linux-wireless/msg74377.html
(see http://git.pengutronix.de/?p=mkl/linux-2.6.git;a=shortlog;h=refs/heads/wireless/rt2x00/v3.0.3)

With the patch the oops is gone, but after a few hours it fails with:

[30971.764840] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x0438 with error -71.
[30975.519840] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x06 failed for offset 0x0408 with error -71.
[30978.890840] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x06 failed for offset 0x0408 with error -71.
[30982.389845] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x0438 with error -71.
[30987.200842] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
[30990.795843] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
...
[31306.426842] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
[31306.437366] phy0 -> rt2x00usb_regbusy_read: Error - Indirect register access failed: offset=0x0000101c, value=0xc003ec28
[31309.647841] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x06 failed for offset 0x6888 with error -71.
..
[31838.105841] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
[31838.116386] phy0 -> rt2x00usb_regbusy_read: Error - Indirect register access failed: offset=0x0000101c, value=0xc003ec28
[31841.327840] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
...
[32043.619842] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
[32046.830843] phy0 -> rt2x00usb_vendor_request: Error - Vendor Request 0x07 failed for offset 0x101c with error -71.
[32048.956352] INFO: task hostapd:1499 blocked for more than 120 seconds.
[32048.962920] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[32048.970801] hostapd         D c02d85e0     0  1499      1 0x00000000
[32048.977234] [<c02d85e0>] (schedule+0x478/0x4e8) from [<c02d9370>] (__mutex_lock_slowpath+0x64/0x88)
[32048.986352] [<c02d9370>] (__mutex_lock_slowpath+0x64/0x88) from [<bf378f5c>] (rt2x00usb_vendor_request_buff+0x24/0xb8 [rt2x00usb])
[32048.998185] [<bf378f5c>] (rt2x00usb_vendor_request_buff+0x24/0xb8 [rt2x00usb]) from [<bf38d034>] (rt2x00usb_register_read+0x34/0x44 [rt2800usb])
[32049.011243] [<bf38d034>] (rt2x00usb_register_read+0x34/0x44 [rt2800usb]) from [<bf381d74>] (rt2800_config_shared_key+0xc4/0x11c [rt2800lib])
[32049.023958] [<bf381d74>] (rt2800_config_shared_key+0xc4/0x11c [rt2800lib]) from [<bf36aac0>] (rt2x00mac_set_key+0x138/0x14c [rt2x00lib])
[32049.036410] [<bf36aac0>] (rt2x00mac_set_key+0x138/0x14c [rt2x00lib]) from [<bf34379c>] (ieee80211_key_disable_hw_accel+0x80/0xd0 [mac80211])
[32049.049221] [<bf34379c>] (ieee80211_key_disable_hw_accel+0x80/0xd0 [mac80211]) from [<bf343810>] (__ieee80211_key_destroy+0x24/0x6c [mac80211])
[32049.062285] [<bf343810>] (__ieee80211_key_destroy+0x24/0x6c [mac80211]) from [<bf343bf4>] (ieee80211_key_link+0x10c/0x134 [mac80211])
[32049.074471] [<bf343bf4>] (ieee80211_key_link+0x10c/0x134 [mac80211]) from [<bf33c3f8>] (ieee80211_add_key+0x104/0x130 [mac80211])
[32049.086392] [<bf33c3f8>] (ieee80211_add_key+0x104/0x130 [mac80211]) from [<bf306cc8>] (nl80211_new_key+0xec/0x110 [cfg80211])
[32049.097834] [<bf306cc8>] (nl80211_new_key+0xec/0x110 [cfg80211]) from [<c027542c>] (genl_rcv_msg+0x1bc/0x204)
[32049.107835] [<c027542c>] (genl_rcv_msg+0x1bc/0x204) from [<c02748e0>] (netlink_rcv_skb+0x50/0xac)
[32049.116769] [<c02748e0>] (netlink_rcv_skb+0x50/0xac) from [<c0275264>] (genl_rcv+0x18/0x24)
[32049.125172] [<c0275264>] (genl_rcv+0x18/0x24) from [<c02742ac>] (netlink_unicast+0x230/0x2cc)
[32049.133754] [<c02742ac>] (netlink_unicast+0x230/0x2cc) from [<c027466c>] (netlink_sendmsg+0x28c/0x324)
[32049.143132] [<c027466c>] (netlink_sendmsg+0x28c/0x324) from [<c0242a78>] (sock_sendmsg+0xac/0xd4)
[32049.152067] [<c0242a78>] (sock_sendmsg+0xac/0xd4) from [<c0243d50>] (__sys_sendmsg+0x1c4/0x258)
[32049.160829] [<c0243d50>] (__sys_sendmsg+0x1c4/0x258) from [<c0244830>] (sys_sendmsg+0x3c/0x60)
[32049.169510] [<c0244830>] (sys_sendmsg+0x3c/0x60) from [<c002fbc0>] (ret_fast_syscall+0x0/0x2c)

The vendor requests 0x06 and 0x07 keep failing with some rt2x00usb_regbusy_read.

cheers,
Marc
-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply

* Re: Bridge stays down until a port is added
From: Marc Haber @ 2011-08-20  9:47 UTC (permalink / raw)
  To: netdev; +Cc: Sven-Haegar Koch, Stephen Hemminger
In-Reply-To: <alpine.DEB.2.02.1108121418090.11637@aurora>

Hi,

I was a little bit confused. The problem is that with IPv6 an IP
address configured on a bridge which is still in the NO-CARRIER state
will never leave tentative state and will thus not get useable.

On Fri, Aug 12, 2011 at 02:22:27PM +0200, Sven-Haegar Koch wrote:
> For me (using kernel 3.0.0) it seems to work as I expect it:
> 
> aurora:~# brctl addbr br0
> aurora:~# ifconfig br0 192.168.254.1 netmask 255.255.255.0 up
> aurora:~# ping 192.168.254.1
> PING 192.168.254.1 (192.168.254.1) 56(84) bytes of data.
> 64 bytes from 192.168.254.1: icmp_req=1 ttl=64 time=0.087 ms

Now try it with IPv6.

Greetings
Marc

-- 
-----------------------------------------------------------------------------
Marc Haber         | "I don't trust Computers. They | Mailadresse im Header
Mannheim, Germany  |  lose things."    Winona Ryder | Fon: *49 621 72739834
Nordisch by Nature |  How to make an American Quilt | Fax: *49 3221 2323190

^ permalink raw reply

* Re: [PATCH v2] Proportional Rate Reduction for TCP.
From: Ilpo Järvinen @ 2011-08-20 12:41 UTC (permalink / raw)
  To: Nandita Dukkipati
  Cc: David S. Miller, Netdev, Tom Herbert, Matt Mathis, Yuchung Cheng
In-Reply-To: <CAB_+Fg4z46iLv29jX6dXK6GSBbnPxWroq99V6Yr25vpCYrJBqw@mail.gmail.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1430 bytes --]

On Fri, 19 Aug 2011, Nandita Dukkipati wrote:

> Forgot to turn off gmail's rich formatting, so re-sending to the list.
> 
> On Fri, Aug 19, 2011 at 3:25 AM, Ilpo Järvinen
> <ilpo.jarvinen@helsinki.fi> wrote:
> >
> > On Fri, 19 Aug 2011, Nandita Dukkipati wrote:
> >
> > > +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
> > > +                                     int fast_rexmit, int flag)
> > > +{
> > > +     struct tcp_sock *tp = tcp_sk(sk);
> > > +     int sndcnt = 0;
> > > +     int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
> > > +
> > > +     if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
> > > +             if (WARN_ON(!tp->prior_cwnd))
> > > +                     tp->prior_cwnd = 1;
> >
> > This should still be made larger to avoid problems if it ever will be
> > needed.
> 
> I am letting the value remain at 1, mainly because this is the valid
> lowest non-zero value for snd_cwnd to take on. The main purpose of
> this code is to catch any lurking bug outside of PRR which results in
> an undesirable divide by 0 in PRR. I would like to fix that bug if I
> find this code is executed.

NACK, until this value is at least 2 * tp->snd_ssthresh. Or alternatively 
the fallback is removed so that we DBZ and do not end up wrecking the 
network.

Other than that I'm ok with the patch (assuming the branches I brought
up earlier is ok for everybody else).

-- 
 i.

^ permalink raw reply

* Re: KVM induced panic on 2.6.38[2367] & 2.6.39
From: Brad Campbell @ 2011-08-20 13:16 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Avi Kivity, CaT, Borislav Petkov, linux-kernel, kvm, netdev
In-Reply-To: <1307453874.3091.14.camel@edumazet-laptop>

On 07/06/11 21:37, Eric Dumazet wrote:
> Le mardi 07 juin 2011 à 21:27 +0800, Brad Campbell a écrit :
>> On 07/06/11 04:22, Eric Dumazet wrote:
>>
>>> Could you please try latest linux-2.6 tree ?
>>>
>>> We fixed many networking bugs that could explain your crash.
>>>
>>>
>>>
>>>
>>
>> No good I'm afraid.
>>
>> [  543.040056]
>> =============================================================================
>> [  543.040136] BUG ip_dst_cache: Padding overwritten.
>> 0xffff8803e4217ffe-0xffff8803e4217fff
>> [  543.040194]
>
> Thats pretty strange : These are the last two bytes of a page, set to
> 0x0000 (a 16 bit value)
>
> There is no way a dst field could actually sit on this location (its a
> padding), since a dst is a bit less than 256 bytes (0xe8), and each
> entry is aligned on a 64byte address.
>
> grep dst /proc/slabinfo
>
> ip_dst_cache       32823  62944    256   32    2 : tunables    0    0
> 0 : slabdata   1967   1967      0
>
> sizeof(struct rtable)=0xe8
>
>
>> -----------------------------------------------------------------------------
>> [  543.040198]
>> [  543.040298] INFO: Slab 0xffffea000d9e74d0 objects=25 used=25 fp=0x
>>          (null) flags=0x8000000000004081
>> [  543.040364] Pid: 4576, comm: kworker/1:2 Not tainted 3.0.0-rc2 #1
>> [  543.040415] Call Trace:
>> [  543.040472]  [<ffffffff810b9c1d>] ? slab_err+0xad/0xd0
>> [  543.040528]  [<ffffffff8102e034>] ? check_preempt_wakeup+0xa4/0x160
>> [  543.040595]  [<ffffffff810ba206>] ? slab_pad_check+0x126/0x170
>> [  543.040650]  [<ffffffff8133045b>] ? dst_destroy+0x8b/0x110
>> [  543.040701]  [<ffffffff810ba29a>] ? check_slab+0x4a/0xc0
>> [  543.040753]  [<ffffffff810baf2d>] ? free_debug_processing+0x2d/0x250
>> [  543.040808]  [<ffffffff810bb27b>] ? __slab_free+0x12b/0x140
>> [  543.040862]  [<ffffffff810bbe99>] ? kmem_cache_free+0x99/0xa0
>> [  543.040915]  [<ffffffff8133045b>] ? dst_destroy+0x8b/0x110
>> [  543.040967]  [<ffffffff813307f6>] ? dst_gc_task+0x196/0x1f0
>> [  543.041021]  [<ffffffff8104e954>] ? queue_delayed_work_on+0x154/0x160
>> [  543.041081]  [<ffffffff813066fe>] ? do_dbs_timer+0x20e/0x3d0
>> [  543.041133]  [<ffffffff81330660>] ? dst_alloc+0x180/0x180
>> [  543.041187]  [<ffffffff8104f28b>] ? process_one_work+0xfb/0x3b0
>> [  543.041242]  [<ffffffff8104f964>] ? worker_thread+0x144/0x3d0
>> [  543.041296]  [<ffffffff8102cc10>] ? __wake_up_common+0x50/0x80
>> [  543.041678]  [<ffffffff8104f820>] ? rescuer_thread+0x2e0/0x2e0
>> [  543.041729]  [<ffffffff8104f820>] ? rescuer_thread+0x2e0/0x2e0
>> [  543.041782]  [<ffffffff81053436>] ? kthread+0x96/0xa0
>> [  543.041835]  [<ffffffff813e1d14>] ? kernel_thread_helper+0x4/0x10
>> [  543.041890]  [<ffffffff810533a0>] ? kthread_worker_fn+0x120/0x120
>> [  543.041944]  [<ffffffff813e1d10>] ? gs_change+0xb/0xb
>> [  543.041993]  Padding 0xffff8803e4217f40:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.042718]  Padding 0xffff8803e4217f50:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.043433]  Padding 0xffff8803e4217f60:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.044155]  Padding 0xffff8803e4217f70:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.044866]  Padding 0xffff8803e4217f80:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.045590]  Padding 0xffff8803e4217f90:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.046311]  Padding 0xffff8803e4217fa0:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.047034]  Padding 0xffff8803e4217fb0:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.047755]  Padding 0xffff8803e4217fc0:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.048474]  Padding 0xffff8803e4217fd0:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.049203]  Padding 0xffff8803e4217fe0:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ
>> [  543.049909]  Padding 0xffff8803e4217ff0:  5a 5a 5a 5a 5a 5a 5a 5a 5a
>> 5a 5a 5a 5a 5a 00 00 ZZZZZZZZZZZZZZ..
>> [  543.050021] FIX ip_dst_cache: Restoring
>> 0xffff8803e4217f40-0xffff8803e4217fff=0x5a
>> [  543.050021]
>>
>> Dropped -mm, Hugh and Andrea from CC as this does not appear to be mm or
>> ksm related.
>>
>> I'll pare down the firewall and see if I can make it break easier with a
>> smaller test set.
>
> Hmm, not sure now :(
>
> Could you reproduce another bug please ?

I know this is an old one, but I recently purchased a second system to 
allow me to test and bisect this off-line (the live system is too much 
of a headache to bisect on).

brad@test:/raid10/src/linux-2.6$ git bisect log
git bisect start
# good: [9fe6206f400646a2322096b56c59891d530e8d51] Linux 2.6.35
git bisect good 9fe6206f400646a2322096b56c59891d530e8d51
# bad: [da5cabf80e2433131bf0ed8993abc0f7ea618c73] Linux 2.6.36-rc1
git bisect bad da5cabf80e2433131bf0ed8993abc0f7ea618c73
# bad: [0f477dd0851bdcee82923da66a7fc4a44cb1bc3d] Merge branch 
'x86-cpu-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
git bisect bad 0f477dd0851bdcee82923da66a7fc4a44cb1bc3d
# bad: [3ff1c25927e3af61c6bf0e4ed959504058ae4565] phy/marvell: add 
88ec048 support
git bisect bad 3ff1c25927e3af61c6bf0e4ed959504058ae4565
# good: [05318bc905467237d4aa68a701f6e92a2b332218] Merge branch 'master' 
of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6
git bisect good 05318bc905467237d4aa68a701f6e92a2b332218
# bad: [2ba13ed678775195e8255b4e503c59d48b615bd8] Bluetooth: Remove 
check for supported mode
git bisect bad 2ba13ed678775195e8255b4e503c59d48b615bd8
# bad: [1e2cfeef060fa0270f9a2d66b1218c12c05062e0] Revert "tc35815: fix 
iomap leak"
git bisect bad 1e2cfeef060fa0270f9a2d66b1218c12c05062e0
# bad: [d9bed6bbd4f2a0120c93fed68605950651e1f225] isdn/gigaset: remove 
EXPERIMENTAL tag from GIGASET_CAPI
git bisect bad d9bed6bbd4f2a0120c93fed68605950651e1f225
# bad: [d117b6665847084cfe8a44b870f771153e18991d] fealnx: Use the 
instance of net_device_stats from net_device.
git bisect bad d117b6665847084cfe8a44b870f771153e18991d
# bad: [e490c1defec4236a6a131fe2d13bf7ba787c02f8] Merge branch 'master' 
of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
git bisect bad e490c1defec4236a6a131fe2d13bf7ba787c02f8
# bad: [0a17d8c744e44617a3c22e7af68b4c5c9c1c5dba] ixgbe: use NETIF_F_LRO
git bisect bad 0a17d8c744e44617a3c22e7af68b4c5c9c1c5dba
# bad: [ede3ef0d940ef052466f42c849390b23c6859abc] igb: fix PHY config 
access on 82580
git bisect bad ede3ef0d940ef052466f42c849390b23c6859abc
# good: [ee3cb6295144b0adfa75ccaca307643a6998b1e2] be2net: changes to 
properly provide phy details
git bisect good ee3cb6295144b0adfa75ccaca307643a6998b1e2
# bad: [7475271004b66e9c22e1bb28f240a38c5d6fe76e] x86: Drop 
CONFIG_MCORE2 check around setting of NET_IP_ALIGN
git bisect bad 7475271004b66e9c22e1bb28f240a38c5d6fe76e
brad@test:/raid10/src/linux-2.6$ git bisect log
git bisect start
# good: [9fe6206f400646a2322096b56c59891d530e8d51] Linux 2.6.35
git bisect good 9fe6206f400646a2322096b56c59891d530e8d51
# bad: [da5cabf80e2433131bf0ed8993abc0f7ea618c73] Linux 2.6.36-rc1
git bisect bad da5cabf80e2433131bf0ed8993abc0f7ea618c73
# bad: [0f477dd0851bdcee82923da66a7fc4a44cb1bc3d] Merge branch 
'x86-cpu-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
git bisect bad 0f477dd0851bdcee82923da66a7fc4a44cb1bc3d
# bad: [3ff1c25927e3af61c6bf0e4ed959504058ae4565] phy/marvell: add 
88ec048 support
git bisect bad 3ff1c25927e3af61c6bf0e4ed959504058ae4565
# good: [05318bc905467237d4aa68a701f6e92a2b332218] Merge branch 'master' 
of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6
git bisect good 05318bc905467237d4aa68a701f6e92a2b332218
# bad: [2ba13ed678775195e8255b4e503c59d48b615bd8] Bluetooth: Remove 
check for supported mode
git bisect bad 2ba13ed678775195e8255b4e503c59d48b615bd8
# bad: [1e2cfeef060fa0270f9a2d66b1218c12c05062e0] Revert "tc35815: fix 
iomap leak"
git bisect bad 1e2cfeef060fa0270f9a2d66b1218c12c05062e0
# bad: [d9bed6bbd4f2a0120c93fed68605950651e1f225] isdn/gigaset: remove 
EXPERIMENTAL tag from GIGASET_CAPI
git bisect bad d9bed6bbd4f2a0120c93fed68605950651e1f225
# bad: [d117b6665847084cfe8a44b870f771153e18991d] fealnx: Use the 
instance of net_device_stats from net_device.
git bisect bad d117b6665847084cfe8a44b870f771153e18991d
# bad: [e490c1defec4236a6a131fe2d13bf7ba787c02f8] Merge branch 'master' 
of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
git bisect bad e490c1defec4236a6a131fe2d13bf7ba787c02f8
# bad: [0a17d8c744e44617a3c22e7af68b4c5c9c1c5dba] ixgbe: use NETIF_F_LRO
git bisect bad 0a17d8c744e44617a3c22e7af68b4c5c9c1c5dba
# bad: [ede3ef0d940ef052466f42c849390b23c6859abc] igb: fix PHY config 
access on 82580
git bisect bad ede3ef0d940ef052466f42c849390b23c6859abc
# good: [ee3cb6295144b0adfa75ccaca307643a6998b1e2] be2net: changes to 
properly provide phy details
git bisect good ee3cb6295144b0adfa75ccaca307643a6998b1e2
# bad: [7475271004b66e9c22e1bb28f240a38c5d6fe76e] x86: Drop 
CONFIG_MCORE2 check around setting of NET_IP_ALIGN
git bisect bad 7475271004b66e9c22e1bb28f240a38c5d6fe76e
brad@test:/raid10/src/linux-2.6$ git bisect good
7475271004b66e9c22e1bb28f240a38c5d6fe76e is the first bad commit
commit 7475271004b66e9c22e1bb28f240a38c5d6fe76e
Author: Alexander Duyck <alexander.h.duyck@intel.com>
Date:   Thu Jul 1 13:28:27 2010 +0000

     x86: Drop CONFIG_MCORE2 check around setting of NET_IP_ALIGN

     This patch removes the CONFIG_MCORE2 check from around 
NET_IP_ALIGN.  It is
     based on a suggestion from Andi Kleen.  The assumption is that 
there are
     not any x86 cores where unaligned access is really slow, and this 
change
     would allow for a performance improvement to still exist on 
configurations
     that are not necessarily optimized for Core 2.

     Cc: Andi Kleen <ak@linux.intel.com>
     Cc: Thomas Gleixner <tglx@linutronix.de>
     Cc: Ingo Molnar <mingo@redhat.com>
     Cc: "H. Peter Anvin" <hpa@zytor.com>
     Cc: x86@kernel.org
     Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
     Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
     Acked-by: H. Peter Anvin <hpa@zytor.com>
     Signed-off-by: David S. Miller <davem@davemloft.net>

:040000 040000 5a15867789080a2f67a74b17c4422f85b7a9fb4a 
b98769348bd765731ca3ff03b33764257e23226c M	arch

I can confirm this bug exists in the 3.0 kernel, however I'm unable to 
reproduce it on todays git.

So anyone using netfilter, kvm and bridge on kernels between 2.6.36-rc1 
and 3.0 may hit this bug, but it looks like it is fixed in the current 
3.1-rc kernels.

^ permalink raw reply

* Re: [RFC 0/0] Introducing a generic socket offload framework
From: jamal @ 2011-08-20 14:32 UTC (permalink / raw)
  To: San Mehat
  Cc: davem, mst, rusty, linux-kernel, virtualization, netdev,
	digitaleric, mikew, miche, maccarro
In-Reply-To: <CAPi7mHp==JPA-0vNZ+xWRzOuJYUxsUL3UY=o=VtMUsB-YcFHwQ@mail.gmail.com>

On Fri, 2011-08-19 at 07:58 -0700, San Mehat wrote:

> Can you explain a good use-case for SOCK_RAW in this type of
> environment? We were noodling it around locally and couldn't come up
> with one that we needed to support.

One that comes to mind is the case of Samir's app: youd need to handle
some of the apps that ride on top of IP typically using SOCK_RAW
eg ping, OSPF essentially anything on IP that doesnt have transport
built into kernel etc; 

> > Q: If you want this to be transparent to the apps, who/what is doing
> > the tagging of SOCK_HWASSIST? clearly not the app if you dont want to
> > change it.
> 
> The decision of whether to tag a socket or not is made by the 'hardware'

As in some config interface? 

cheers,
jamal


^ permalink raw reply

* Re: [RFC] bridge: add netfilter hook for forwarding 802.1D group addresses
From: Bart De Schuymer @ 2011-08-20 15:31 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Lamparter, Nick Carter, Ed Swierk, netdev, bridge,
	netfilter-devel
In-Reply-To: <20110819135810.1a529ab2@nehalam.ftrdhcpuser.net>

Op 19/08/2011 22:58, Stephen Hemminger schreef:

> Signed-off-by: Stephen Hemminger<shemminger@vyatta.com>
>
> ---
>   include/linux/netfilter_bridge.h      |    5 ++++-
>   net/bridge/br_input.c                 |   15 ++++++++++++---
>   net/bridge/netfilter/ebtable_filter.c |   18 ++++++++++++++++--
>   3 files changed, 32 insertions(+), 6 deletions(-)
>
> --- a/include/linux/netfilter_bridge.h	2011-08-19 13:11:51.972125670 -0700
> +++ b/include/linux/netfilter_bridge.h	2011-08-19 13:13:36.452130443 -0700
> @@ -22,7 +22,10 @@
>   #define NF_BR_POST_ROUTING	4
>   /* Not really a hook, but used for the ebtables broute table */
>   #define NF_BR_BROUTING		5
> -#define NF_BR_NUMHOOKS		6
> +/* Packets to link local multicast addresses (01-80-C2-00-00-XX) */
> +#define NF_BR_LINK_LOCAL_IN	6
> +
> +#define NF_BR_NUMHOOKS		7
>

You will need to make sure you don't break backwards compatibility with 
the ebtables userspace tool. ebtables.h::struct ebt_replace is a 
structure used for communication between userspace and the kernel. It 
has the member hook_entry defined like this:
struct ebt_entries __user *hook_entry[NF_BR_NUMHOOKS];

cheers,
Bart



-- 
Bart De Schuymer
www.artinalgorithms.be

^ permalink raw reply

* Re: Bridge stays down until a port is added
From: Stephen Hemminger @ 2011-08-20 16:30 UTC (permalink / raw)
  To: Marc Haber; +Cc: netdev, Sven-Haegar Koch
In-Reply-To: <20110820094712.GC21307@torres.zugschlus.de>

On Sat, 20 Aug 2011 11:47:12 +0200
Marc Haber <mh+netdev@zugschlus.de> wrote:

> Hi,
> 
> I was a little bit confused. The problem is that with IPv6 an IP
> address configured on a bridge which is still in the NO-CARRIER state
> will never leave tentative state and will thus not get useable.
> 
> On Fri, Aug 12, 2011 at 02:22:27PM +0200, Sven-Haegar Koch wrote:
> > For me (using kernel 3.0.0) it seems to work as I expect it:
> > 
> > aurora:~# brctl addbr br0
> > aurora:~# ifconfig br0 192.168.254.1 netmask 255.255.255.0 up
> > aurora:~# ping 192.168.254.1
> > PING 192.168.254.1 (192.168.254.1) 56(84) bytes of data.
> > 64 bytes from 192.168.254.1: icmp_req=1 ttl=64 time=0.087 ms
> 
> Now try it with IPv6.
> 

The problem is that IPv6 Duplicate Address Detection needs to
work. This is not a simple problem.  If the bridge asserted
carrier with no ports then:

1. IPv6 address assigned and IPv6 decides it is okay.
2. Port added later
3. Another system has the same address.
*broke*

If you want to avoid DAD, then you can configure disable DAD
by setting /proc/sys/net/ipv6/conf/br0/accept_dad to 0

^ permalink raw reply

* Re: [net-next PATCH 1/1] qlge: Adding Maintainer.
From: David Miller @ 2011-08-20 17:33 UTC (permalink / raw)
  To: jitendra.kalsaria; +Cc: netdev, ron.mercer, Dept_NX_Linux_NIC_Driver
In-Reply-To: <1313796415-3908-1-git-send-email-jitendra.kalsaria@qlogic.com>

From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
Date: Fri, 19 Aug 2011 16:26:55 -0700

> Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 0/6] tg3: Add external loopback support to selftest
From: David Miller @ 2011-08-20 17:38 UTC (permalink / raw)
  To: mcarlson; +Cc: netdev
In-Reply-To: <1313798304-26171-1-git-send-email-mcarlson@broadcom.com>

From: "Matt Carlson" <mcarlson@broadcom.com>
Date: Fri, 19 Aug 2011 16:58:18 -0700

> This patchset adds external loopback support to the driver's selftest code.

Even the very first patch doesn't apply, the tg3_test_loopback() function
lacks any of that "cpmuctrl" code that is being removed.

Now it does exist in the plain "net" tree, but if your patches have
such a dependency upon me pulling those changes into net-next FIRST,
how in the world do you expect me to know it unless you tell me?

^ permalink raw reply

* Re: [PATCH net-next 0/6] tg3: Add external loopback support to selftest
From: David Miller @ 2011-08-20 17:40 UTC (permalink / raw)
  To: mcarlson; +Cc: netdev
In-Reply-To: <20110820.103800.1829588763643418877.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Sat, 20 Aug 2011 10:38:00 -0700 (PDT)

> From: "Matt Carlson" <mcarlson@broadcom.com>
> Date: Fri, 19 Aug 2011 16:58:18 -0700
> 
>> This patchset adds external loopback support to the driver's selftest code.
> 
> Even the very first patch doesn't apply, the tg3_test_loopback() function
> lacks any of that "cpmuctrl" code that is being removed.
> 
> Now it does exist in the plain "net" tree, but if your patches have
> such a dependency upon me pulling those changes into net-next FIRST,
> how in the world do you expect me to know it unless you tell me?

Ignore me, I forgot that I applied this patch series already and tried
to apply it again :-)

^ permalink raw reply

* Re: [PATCH 04/13] bna: SKB Check and Drop Macros
From: David Miller @ 2011-08-20 18:05 UTC (permalink / raw)
  To: rmody; +Cc: netdev, adapter_linux_open_src_team, gkaraje
In-Reply-To: <1313789972-22711-5-git-send-email-rmody@brocade.com>

From: Rasesh Mody <rmody@brocade.com>
Date: Fri, 19 Aug 2011 14:39:23 -0700

> Add macros to check and drop skb from transmit path and return.
> 
> Signed-off-by: Gurunatha Karaje <gkaraje@brocade.com>
> Signed-off-by: Rasesh Mody <rmody@brocade.com>

Do not EVER create macros that have the side effect of doing
a function return.

This makes code impossible to read and audit, because it is not
possible to see just by looking at the macro invocation that the
function might be returned from what at that moment.

^ permalink raw reply

* Re: [PATCH] virtio-net: Read MAC only after initializing MSI-X
From: Michael S. Tsirkin @ 2011-08-20 20:00 UTC (permalink / raw)
  To: Sasha Levin; +Cc: linux-kernel, Rusty Russell, virtualization, netdev, kvm
In-Reply-To: <1313771587.12243.16.camel@lappy>

On Fri, Aug 19, 2011 at 07:33:07PM +0300, Sasha Levin wrote:
> On Fri, 2011-08-19 at 18:23 +0300, Michael S. Tsirkin wrote:
> > On Sat, Aug 13, 2011 at 11:51:01AM +0300, Sasha Levin wrote:
> > > The MAC of a virtio-net device is located at the first field of the device
> > > specific header. This header is located at offset 20 if the device doesn't
> > > support MSI-X or offset 24 if it does.
> > > 
> > > Current code in virtnet_probe() used to probe the MAC before checking for
> > > MSI-X, which means that the read was always made from offset 20 regardless
> > > of whether MSI-X in enabled or not.
> > > 
> > > This patch moves the MAC probe to after the detection of whether MSI-X is
> > > enabled. This way the MAC will be read from offset 24 if the device indeed
> > > supports MSI-X.
> > > 
> > > Cc: Rusty Russell <rusty@rustcorp.com.au>
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: virtualization@lists.linux-foundation.org
> > > Cc: netdev@vger.kernel.org
> > > Cc: kvm@vger.kernel.org
> > > Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
> > 
> > I am not sure I see a bug in virtio: the config pace layout simply
> > changes as msix is enabled and disabled (and if you look at the latest
> > draft, also on whether 64 bit features are enabled).
> > It doesn't depend on msix capability being present in device.
> > 
> > The spec seems to be explicit enough:
> > 	If MSI-X is enabled for the device, two additional fields immediately
> > 	follow this header.
> > 
> > So I'm guessing the bug is in kvm tools which assume
> > same layout for when msix is enabled and disabled.
> > qemu-kvm seems to do the right thing so the device
> > seems to get the correct mac.
> 
> We assumed that PCI config space has a static layout like most other
> devices. Having a behavior of "First bit 20 does something, but after
> enabling MSI-X it does something completely different" sounds strange.

The layout is always virtio header followed by device specific header.
We started with a small header so when more data was added, we could not
extend the header unconditionally.

We can't change that behaviour for MSI-X now, guests and
hosts rely on it.

>
> I'm wondering why offsets of the config structure change during run time
> and are not statically defined when the device is started.

That's because of backwards compatibility with old guests.
When we know the guest is new, we expose new layout,
but old guests must see old layout.

> It's not like VIRTIO_F_FEATURES_HI can be disabled after it was enabled,

Yes it can, e.g. at guest reset. Generally features can be tweaked
any way guest likes until status is set to OK.

> or MSI-X can be simply disabled during run time.

Not sure what you mean by 'run time'. Guest can reset
or disable the device, change any parameters,
then re-enable.

> Maybe this is better solved by copying the way it was done in PCI itself
> with capability linked list?
> 
> -- 
> 
> Sasha.

There are any number of ways to lay out the structure.  I went for what
seemed a simplest one.  For MSI-X the train has left the station.  We
can probably still tweak where the high 32 bit features
for 64 bit features are.  No idea if it's worth it.

-- 
MST

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox