Netdev List
 help / color / mirror / Atom feed
* [net 2/7] bnx2x: Add link retry to 578xx-KR
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

This fix solves a problem of no link on 578xx-KR by retrying to link up to
four timer using the periodic function.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_link.c |  117 ++++++++++++++++++++++++++++++++++------
 drivers/net/bnx2x/bnx2x_link.h |    3 +
 2 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index 5777dcd..974a50d 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -261,6 +261,7 @@
 
 #define MAX_PACKET_SIZE					(9700)
 #define WC_UC_TIMEOUT					100
+#define MAX_KR_LINK_RETRY				4
 
 /**********************************************************/
 /*                     INTERFACE                          */
@@ -3578,6 +3579,11 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 	u16 val16 = 0, lane, bam37 = 0;
 	struct bnx2x *bp = params->bp;
 	DP(NETIF_MSG_LINK, "Enable Auto Negotiation for KR\n");
+
+	/* Disable Autoneg: re-enable it after adv is done. */
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0);
+
 	/* Check adding advertisement for 1G KX */
 	if (((vars->line_speed == SPEED_AUTO_NEG) &&
 	     (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) ||
@@ -3619,9 +3625,6 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_CL72_USERB0_CL72_2P5_DEF_CTRL,
 			 0x03f0);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL,
-			 0x383f);
 
 	/* Advertised speeds */
 	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
@@ -3648,19 +3651,22 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 	/* Advertise pause */
 	bnx2x_ext_phy_set_pause(params, phy, vars);
 
-	/* Enable Autoneg */
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1000);
-
-	/* Over 1G - AN local device user page 1 */
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_DIGITAL3_UP1, 0x1f);
+	vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY;
 
 	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
 			MDIO_WC_REG_DIGITAL5_MISC7, &val16);
 
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_DIGITAL5_MISC7, val16 | 0x100);
+
+	/* Over 1G - AN local device user page 1 */
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			MDIO_WC_REG_DIGITAL3_UP1, 0x1f);
+
+	/* Enable Autoneg */
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1000);
+
 }
 
 static void bnx2x_warpcore_set_10G_KR(struct bnx2x_phy *phy,
@@ -4129,6 +4135,85 @@ static int bnx2x_is_sfp_module_plugged(struct bnx2x_phy *phy,
 	else
 		return 0;
 }
+static int bnx2x_warpcore_get_sigdet(struct bnx2x_phy *phy,
+					struct link_params *params)
+{
+	u16 gp2_status_reg0, lane;
+	struct bnx2x *bp = params->bp;
+
+	lane = bnx2x_get_warpcore_lane(phy, params);
+
+	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_GP2_STATUS_GP_2_0,
+				 &gp2_status_reg0);
+
+	return (gp2_status_reg0 >> (8+lane)) & 0x1;
+}
+
+static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy,
+				       struct link_params *params,
+				       struct link_vars *vars)
+{
+	struct bnx2x *bp = params->bp;
+	u32 serdes_net_if;
+	u16 gp_status1 = 0, lnkup = 0, lnkup_kr = 0;
+	u16 lane = bnx2x_get_warpcore_lane(phy, params);
+
+	vars->turn_to_run_wc_rt = vars->turn_to_run_wc_rt ? 0 : 1;
+
+	if (!vars->turn_to_run_wc_rt)
+		return;
+
+	/* return if there is no link partner */
+	if (!(bnx2x_warpcore_get_sigdet(phy, params))) {
+		DP(NETIF_MSG_LINK, "bnx2x_warpcore_get_sigdet false\n");
+		return;
+	}
+
+	if (vars->rx_tx_asic_rst) {
+		serdes_net_if = (REG_RD(bp, params->shmem_base +
+				offsetof(struct shmem_region, dev_info.
+				port_hw_config[params->port].default_cfg)) &
+				PORT_HW_CFG_NET_SERDES_IF_MASK);
+
+		switch (serdes_net_if) {
+		case PORT_HW_CFG_NET_SERDES_IF_KR:
+			/* Do we get link yet? */
+			bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, 0x81d1,
+								&gp_status1);
+			lnkup = (gp_status1 >> (8+lane)) & 0x1;/* 1G */
+				/*10G KR*/
+			lnkup_kr = (gp_status1 >> (12+lane)) & 0x1;
+
+			DP(NETIF_MSG_LINK,
+				"gp_status1 0x%x\n", gp_status1);
+
+			if (lnkup_kr || lnkup) {
+					vars->rx_tx_asic_rst = 0;
+					DP(NETIF_MSG_LINK,
+					"link up, rx_tx_asic_rst 0x%x\n",
+					vars->rx_tx_asic_rst);
+			} else {
+				/*reset the lane to see if link comes up.*/
+				bnx2x_warpcore_reset_lane(bp, phy, 1);
+				bnx2x_warpcore_reset_lane(bp, phy, 0);
+
+				/* restart Autoneg */
+				bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+					MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1200);
+
+				vars->rx_tx_asic_rst--;
+				DP(NETIF_MSG_LINK, "0x%x retry left\n",
+				vars->rx_tx_asic_rst);
+			}
+			break;
+
+		default:
+			break;
+		}
+
+	} /*params->rx_tx_asic_rst*/
+
+}
 
 static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 				       struct link_params *params,
@@ -12331,11 +12416,6 @@ void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
 {
 	struct bnx2x *bp = params->bp;
 	u16 phy_idx;
-	if (!params) {
-		DP(NETIF_MSG_LINK, "Uninitialized params !\n");
-		return;
-	}
-
 	for (phy_idx = INT_PHY; phy_idx < MAX_PHYS; phy_idx++) {
 		if (params->phy[phy_idx].flags & FLAGS_TX_ERROR_CHECK) {
 			bnx2x_set_aer_mmd(params, &params->phy[phy_idx]);
@@ -12344,8 +12424,13 @@ void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
 		}
 	}
 
-	if (CHIP_IS_E3(bp))
+	if (CHIP_IS_E3(bp)) {
+		struct bnx2x_phy *phy = &params->phy[INT_PHY];
+		bnx2x_set_aer_mmd(params, phy);
 		bnx2x_check_over_curr(params, vars);
+		bnx2x_warpcore_config_runtime(phy, params, vars);
+	}
+
 }
 
 u8 bnx2x_hw_lock_required(struct bnx2x *bp, u32 shmem_base, u32 shmem2_base)
diff --git a/drivers/net/bnx2x/bnx2x_link.h b/drivers/net/bnx2x/bnx2x_link.h
index c12db6d..2a46e63 100644
--- a/drivers/net/bnx2x/bnx2x_link.h
+++ b/drivers/net/bnx2x/bnx2x_link.h
@@ -303,6 +303,9 @@ struct link_vars {
 #define PERIODIC_FLAGS_LINK_EVENT	0x0001
 
 	u32 aeu_int_mask;
+	u8 rx_tx_asic_rst;
+	u8 turn_to_run_wc_rt;
+	u16 rsrv2;
 };
 
 /***********************************************************/
-- 
1.7.7.1

^ permalink raw reply related

* [net 3/7] bnx2x: Fix RX/TX problem caused by the MAC layer
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

This patch fixes a problem in which the host stops receiving data after
restarting the interface. This issue is caused by combination of incorrect
data path tap closure, along with missing MAC reset.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_link.c |   49 ++++++++++++++++++++++++++++-----------
 1 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index 974a50d..ea2a11d 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -1494,6 +1494,18 @@ static void bnx2x_set_xumac_nig(struct link_params *params,
 	       NIG_REG_P0_MAC_PAUSE_OUT_EN, tx_pause_en);
 }
 
+static void bnx2x_umac_disable(struct link_params *params)
+{
+	u32 umac_base = params->port ? GRCBASE_UMAC1 : GRCBASE_UMAC0;
+	struct bnx2x *bp = params->bp;
+	if (!(REG_RD(bp, MISC_REG_RESET_REG_2) &
+		   (MISC_REGISTERS_RESET_REG_2_UMAC0 << params->port)))
+		return;
+
+	/* Disable RX and TX */
+	REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, 0);
+}
+
 static void bnx2x_umac_enable(struct link_params *params,
 			    struct link_vars *vars, u8 lb)
 {
@@ -1603,8 +1615,9 @@ static u8 bnx2x_is_4_port_mode(struct bnx2x *bp)
 }
 
 /* Define the XMAC mode */
-static void bnx2x_xmac_init(struct bnx2x *bp, u32 max_speed)
+static void bnx2x_xmac_init(struct link_params *params, u32 max_speed)
 {
+	struct bnx2x *bp = params->bp;
 	u32 is_port4mode = bnx2x_is_4_port_mode(bp);
 
 	/**
@@ -1614,10 +1627,11 @@ static void bnx2x_xmac_init(struct bnx2x *bp, u32 max_speed)
 	* ports of the path
 	**/
 
-	if (is_port4mode && (REG_RD(bp, MISC_REG_RESET_REG_2) &
+	if ((CHIP_NUM(bp) == CHIP_NUM_57840) &&
+	    (REG_RD(bp, MISC_REG_RESET_REG_2) &
 	     MISC_REGISTERS_RESET_REG_2_XMAC)) {
-		DP(NETIF_MSG_LINK, "XMAC already out of reset"
-				   " in 4-port mode\n");
+		DP(NETIF_MSG_LINK,
+		   "XMAC already out of reset in 4-port mode\n");
 		return;
 	}
 
@@ -1681,10 +1695,6 @@ static void bnx2x_xmac_disable(struct link_params *params)
 		       (pfc_ctrl | (1<<1)));
 		DP(NETIF_MSG_LINK, "Disable XMAC on port %x\n", port);
 		REG_WR(bp, xmac_base + XMAC_REG_CTRL, 0);
-		usleep_range(1000, 1000);
-		bnx2x_set_xumac_nig(params, 0, 0);
-		REG_WR(bp, xmac_base + XMAC_REG_CTRL,
-		       XMAC_CTRL_REG_SOFT_RESET);
 	}
 }
 
@@ -1697,7 +1707,7 @@ static int bnx2x_xmac_enable(struct link_params *params,
 
 	xmac_base = (params->port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
 
-	bnx2x_xmac_init(bp, vars->line_speed);
+	bnx2x_xmac_init(params, vars->line_speed);
 
 	/*
 	 * This register determines on which events the MAC will assert
@@ -6309,8 +6319,10 @@ static int bnx2x_update_link_down(struct link_params *params,
 		       MISC_REGISTERS_RESET_REG_2_CLEAR,
 	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
 	}
-	if (CHIP_IS_E3(bp))
+	if (CHIP_IS_E3(bp)) {
 		bnx2x_xmac_disable(params);
+		bnx2x_umac_disable(params);
+	}
 
 	return 0;
 }
@@ -11802,8 +11814,10 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 	/* Stop BigMac rx */
 	if (!CHIP_IS_E3(bp))
 		bnx2x_bmac_rx_disable(bp, port);
-	else
+	else {
 		bnx2x_xmac_disable(params);
+		bnx2x_umac_disable(params);
+	}
 	/* disable emac */
 	if (!CHIP_IS_E3(bp))
 		REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);
@@ -11841,14 +11855,21 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 	if (params->phy[INT_PHY].link_reset)
 		params->phy[INT_PHY].link_reset(
 			&params->phy[INT_PHY], params);
-	/* reset BigMac */
-	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
-	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
 
 	/* disable nig ingress interface */
 	if (!CHIP_IS_E3(bp)) {
+		/* reset BigMac */
+		REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
+		       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
 		REG_WR(bp, NIG_REG_BMAC0_IN_EN + port*4, 0);
 		REG_WR(bp, NIG_REG_EMAC0_IN_EN + port*4, 0);
+	} else {
+		u32 xmac_base = (params->port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
+		bnx2x_set_xumac_nig(params, 0, 0);
+		if (REG_RD(bp, MISC_REG_RESET_REG_2) &
+		    MISC_REGISTERS_RESET_REG_2_XMAC)
+			REG_WR(bp, xmac_base + XMAC_REG_CTRL,
+			       XMAC_CTRL_REG_SOFT_RESET);
 	}
 	vars->link_up = 0;
 	vars->phy_flags = 0;
-- 
1.7.7.1

^ permalink raw reply related

* [net 5/7] bnx2x: Enable changing speed when port type is PORT_DA
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_ethtool.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index cf3e479..24d7ada 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -326,6 +326,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 			PORT_HW_CFG_PHY_SELECTION_FIRST_PHY;
 		break;
 	case PORT_FIBRE:
+	case PORT_DA:
 		if (bp->port.supported[cfg_idx] & SUPPORTED_FIBRE)
 			break; /* no port change */
 
-- 
1.7.7.1

^ permalink raw reply related

* [net 4/7] bnx2x: Fix 54618se LED behavior
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_link.c |   44 ++++++++++++++++++++-------------------
 1 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index ea2a11d..5acf274 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -5993,7 +5993,13 @@ int bnx2x_set_led(struct link_params *params,
 		       SHARED_HW_CFG_LED_MAC1);
 
 		tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
-		EMAC_WR(bp, EMAC_REG_EMAC_LED, (tmp | EMAC_LED_OVERRIDE));
+		if (params->phy[EXT_PHY1].type ==
+			  PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE)
+			EMAC_WR(bp, EMAC_REG_EMAC_LED, tmp & 0xfff1);
+		else {
+			EMAC_WR(bp, EMAC_REG_EMAC_LED,
+				(tmp | EMAC_LED_OVERRIDE));
+		}
 		break;
 
 	case LED_MODE_OPER:
@@ -6046,8 +6052,15 @@ int bnx2x_set_led(struct link_params *params,
 			else
 				REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4,
 				       hw_led_mode);
+		} else if ((params->phy[EXT_PHY1].type ==
+			    PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE) &&
+			   (mode != LED_MODE_OPER)) {
+			REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, 0);
+			tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
+			EMAC_WR(bp, EMAC_REG_EMAC_LED, tmp | 0x3);
 		} else
-			REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, hw_led_mode);
+			REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4,
+			       hw_led_mode);
 
 		REG_WR(bp, NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0 + port*4, 0);
 		/* Set blinking rate to ~15.9Hz */
@@ -6059,8 +6072,13 @@ int bnx2x_set_led(struct link_params *params,
 			       LED_BLINK_RATE_VAL_E1X_E2);
 		REG_WR(bp, NIG_REG_LED_CONTROL_BLINK_RATE_ENA_P0 +
 		       port*4, 1);
-		tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
-		EMAC_WR(bp, EMAC_REG_EMAC_LED, (tmp & (~EMAC_LED_OVERRIDE)));
+		if ((params->phy[EXT_PHY1].type !=
+		     PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE) &&
+		    (mode != LED_MODE_OPER)) {
+			tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
+			EMAC_WR(bp, EMAC_REG_EMAC_LED,
+				(tmp & (~EMAC_LED_OVERRIDE)));
+		}
 
 		if (CHIP_IS_E1(bp) &&
 		    ((speed == SPEED_2500) ||
@@ -10304,22 +10322,6 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 	return 0;
 }
 
-static void bnx2x_54618se_set_link_led(struct bnx2x_phy *phy,
-				       struct link_params *params, u8 mode)
-{
-	struct bnx2x *bp = params->bp;
-	DP(NETIF_MSG_LINK, "54618SE set link led (mode=%x)\n", mode);
-	switch (mode) {
-	case LED_MODE_FRONT_PANEL_OFF:
-	case LED_MODE_OFF:
-	case LED_MODE_OPER:
-	case LED_MODE_ON:
-	default:
-		break;
-	}
-	return;
-}
-
 static void bnx2x_54618se_link_reset(struct bnx2x_phy *phy,
 				     struct link_params *params)
 {
@@ -11095,7 +11097,7 @@ static struct bnx2x_phy phy_54618se = {
 	.config_loopback = (config_loopback_t)bnx2x_54618se_config_loopback,
 	.format_fw_ver	= (format_fw_ver_t)NULL,
 	.hw_reset	= (hw_reset_t)NULL,
-	.set_link_led	= (set_link_led_t)bnx2x_54618se_set_link_led,
+	.set_link_led	= (set_link_led_t)NULL,
 	.phy_specific_func = (phy_specific_func_t)NULL
 };
 /*****************************************************************/
-- 
1.7.7.1

^ permalink raw reply related

* [net 1/7] bnx2x: Fix LED blink rate for 578xx
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

Adjust blink rate on 578xx to fit its clock rate.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_link.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index ba15bdc..5777dcd 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -45,6 +45,9 @@
 #define MCPR_IMC_COMMAND_READ_OP	1
 #define MCPR_IMC_COMMAND_WRITE_OP	2
 
+/* LED Blink rate that will achieve ~15.9Hz */
+#define LED_BLINK_RATE_VAL_E3		354
+#define LED_BLINK_RATE_VAL_E1X_E2	480
 /***********************************************************/
 /*			Shortcut definitions		   */
 /***********************************************************/
@@ -5953,8 +5956,12 @@ int bnx2x_set_led(struct link_params *params,
 
 		REG_WR(bp, NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0 + port*4, 0);
 		/* Set blinking rate to ~15.9Hz */
-		REG_WR(bp, NIG_REG_LED_CONTROL_BLINK_RATE_P0 + port*4,
-		       LED_BLINK_RATE_VAL);
+		if (CHIP_IS_E3(bp))
+			REG_WR(bp, NIG_REG_LED_CONTROL_BLINK_RATE_P0 + port*4,
+			       LED_BLINK_RATE_VAL_E3);
+		else
+			REG_WR(bp, NIG_REG_LED_CONTROL_BLINK_RATE_P0 + port*4,
+			       LED_BLINK_RATE_VAL_E1X_E2);
 		REG_WR(bp, NIG_REG_LED_CONTROL_BLINK_RATE_ENA_P0 +
 		       port*4, 1);
 		tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
-- 
1.7.7.1

^ permalink raw reply related

* [net 6/7] bnx2x: use FW 7.0.29.0
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

From: Dmitry Kravkov <dmitry@broadcom.com>

The FW includes the following fixes:
  1. (iSCSI) Arrival of un-solicited ASYNC message causes
     firmware to abort the connection with RST.
  2. (FCoE) There is a probability that truncated FCoE packet on
     RX path won't get detected which might lead to FW assert.
  3. (iSCSI) Arrival of target-initiated NOP-IN during intense
     ISCSI traffic might lead to FW assert.
  4. (iSCSI) Chip hangs when in case of retransmission not aligned
     to 4-bytes from the beginning of iSCSI PDU.
  5. (FCoE) Arrival of packets beyond task IO size can lead to crash.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_hsi.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_hsi.h b/drivers/net/bnx2x/bnx2x_hsi.h
index dc24de4..8beb5b5 100644
--- a/drivers/net/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/bnx2x/bnx2x_hsi.h
@@ -2548,7 +2548,7 @@ struct host_func_stats {
 
 #define BCM_5710_FW_MAJOR_VERSION			7
 #define BCM_5710_FW_MINOR_VERSION			0
-#define BCM_5710_FW_REVISION_VERSION		23
+#define BCM_5710_FW_REVISION_VERSION		29
 #define BCM_5710_FW_ENGINEERING_VERSION		0
 #define BCM_5710_FW_COMPILE_FLAGS			1
 
-- 
1.7.7.1

^ permalink raw reply related

* [net 7/7] bnx2x: update driver version to 1.70.30-0
From: Yaniv Rosner @ 2011-10-26  9:40 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1319622039-10553-1-git-send-email-yanivr@broadcom.com>

From: Dmitry Kravkov <dmitry@broadcom.com>

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 9a7eb3b..54975a2 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -23,8 +23,8 @@
  * (you will need to reboot afterwards) */
 /* #define BNX2X_STOP_ON_ERROR */
 
-#define DRV_MODULE_VERSION      "1.70.00-0"
-#define DRV_MODULE_RELDATE      "2011/06/13"
+#define DRV_MODULE_VERSION      "1.70.30-0"
+#define DRV_MODULE_RELDATE      "2011/10/25"
 #define BNX2X_BC_VER            0x040200
 
 #if defined(CONFIG_DCB)
-- 
1.7.7.1

^ permalink raw reply related

* Re: Bug#645308: tg3 broken for NetXtreme 5714S in squeeze 6.0.3 installer
From: Ben Hutchings @ 2011-10-26  7:41 UTC (permalink / raw)
  To: Matt Carlson; +Cc: Michael Chan, 645308@bugs.debian.org, Marc Haber, netdev
In-Reply-To: <20111026002032.GA31575@mcarlson.broadcom.com>

[-- Attachment #1: Type: text/plain, Size: 3089 bytes --]

On Tue, 2011-10-25 at 17:20 -0700, Matt Carlson wrote:
> On Mon, Oct 24, 2011 at 04:47:54PM -0700, Ben Hutchings wrote:
> > On Mon, 2011-10-24 at 14:24 -0700, Matt Carlson wrote:
> > > On Fri, Oct 21, 2011 at 05:19:39AM -0700, Ben Hutchings wrote:
> > > > On Fri, 2011-10-21 at 11:08 +0200, Marc Haber wrote:
> > > > > On Fri, Oct 21, 2011 at 11:00:46AM +0200, Marc Haber wrote:
> > > > > > On Thu, Oct 20, 2011 at 05:28:34AM +0100, Ben Hutchings wrote:
> > > > > > > I don't see any changes that would obviously change the way this device
> > > > > > > is reconfigured during a down/up cycle.  There were some changes to
> > > > > > > power management that should just let the PCI core do some work that the
> > > > > > > driver used to, but it's possible that the result isn't quite the same.
> > > > > > > I built a module with those reverted; source and binary attached.  Could
> > > > > > > you test that?  I checked that d-i does include an insmod command.
> > > > > > 
> > > > > > The squeeze 6.0.3 installer with the shipped tg3.ko replaced with
> > > > > > yours boots and networks just fine without any workaround and without
> > > > > > manual interaction.
> > > > > 
> > > > > I was a bit fast on that. The interface now fails right in the middle
> > > > > of installation and needs the modprobe -r, modprobe stunt to network
> > > > > again.
> > > > 
> > > > Matt, Michael,
> > > > 
> > > > The tg3 driver has regressed for the 5714S since Linux 2.6.32.  Marc
> > > > Haber found this in the backported version included in our stable
> > > > update, but also confirmed it in Linux 3.0.
> > > > 
> > > > Bringing the interface down and then up again (which the installer does
> > > > for some reason) can leave it unable to pass traffic (possibly after
> > > > working for a few packets) until the module is reloaded.
> > > > 
> > > > I asked Marc to check whether reverting the power management changes
> > > > (071697e2bcd8dff2af4d6fdd6525c2324f89553b,
> > > > d237d9ecf06a00f0ebca657958cf2a1e92940796) made a difference, but it
> > > > doesn't seem to.
> > > > 
> > > > There is more information in the bug log at
> > > > <http://bugs.debian.org/645308>.
> > > 
> > > Where can I get the sources for this driver?  Commit
> > > 9e975cc291d80d5e4562d6bed15ec171e896d69b, entitled
> > > "tg3: Fix io failures after chip reset" has been a common source of
> > > problems.
> > 
> > Our current package has Linux 3.0.6 which includes the backport of that
> > change.  However, it is *not* included in my backport to 2.6.32 so it
> > doesn't explain the original report.
> > 
> > The backported version can be found in:
> > 
> > git://anonscm.debian.org/kernel/linux-2.6.git squeeze
> 
> The kernel version of that repository is 3.0.0-rc1.  Am I looking in the
> right place?

Look at the squeeze branch, not master.

Ben.

> But you're right.  The version of the driver in that repository does not
> have the change.
> 
> 

-- 
Ben Hutchings
Reality is just a crutch for people who can't handle science fiction.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply

* Re: [PATCH V2 2/4] MIPS: Add board support for Loongson1B
From: Kelvin Cheung @ 2011-10-26  7:48 UTC (permalink / raw)
  To: Giuseppe CAVALLARO
  Cc: Wu Zhangjin, linux-mips, linux-kernel, ralf, r0bertz, netdev
In-Reply-To: <4EA7B4D7.9000101@st.com>

It's perfect now.
Please add me to CC list when you send the new patch.

Thanks a lot for your help.

2011/10/26, Giuseppe CAVALLARO <peppe.cavallaro@st.com>:
> Hello Kelvin
>
> On 10/26/2011 6:27 AM, Kelvin Cheung wrote:
>> Hi Giuseppe,
>>
>> This patch works well on Loongson1B platform except one thing.
>> The rx checksum offload of normal descriptor is disabled by default.
>> So, I enabled this functon. And one minor tweak is added to your patch.
>> What about your opinion?
>
> Yes, I had not enabled the rx coe. I'm resending your patch (v3) where I
> fixed a problem. Old mac10/100 has no rx csum in HW. So I added an extra
> check.
> Let me consider it the final version. ;-)
>
> Thanks a lot for you effort.
> I'll send it for net-next now.
>
> Regards
> Peppe
>
>>
>> BTW, tx checksum insertion works now.
>>
>> 2011/10/25, Giuseppe CAVALLARO <peppe.cavallaro@st.com>:
>>> On 10/25/2011 9:09 AM, Giuseppe CAVALLARO wrote:
>>>> On 10/25/2011 4:12 AM, Kelvin Cheung wrote:
>>>>> 2011/10/24, Giuseppe CAVALLARO <peppe.cavallaro@st.com>:
>>>>>> On 10/24/2011 4:05 PM, Kelvin Cheung wrote:
>>>>>>> 2011/10/24, Giuseppe CAVALLARO <peppe.cavallaro@st.com>:
>>>>>>>> Hello Kelvin.
>>>>>>>>
>>>>>>>> On 10/24/2011 12:36 PM, Kelvin Cheung wrote:
>>>>>>>>
>>>>>>>> [snip]
>>>>>>>>
>>>>>>>>> According to datasheet of Loongson 1B, the buffer size in RX/TX
>>>>>>>>> descriptor is only 2KB. So the Loongson1B's GMAC could not handle
>>>>>>>>> jumbo frames. And the second buffer is useless in this case. Am I
>>>>>>>>> right? Is there a better way than ifdef CONFIG_MACH_LOONGSON1 to
>>>>>>>>> avoid duplicate code?
>>>>>>>>
>>>>>>>> Sorry for my misunderstanding.
>>>>>>>>
>>>>>>>> I think you have to use the normal descriptor and remove the
>>>>>>>> enh_desc
>>>>>>>> from the platform w/o modifying the driver at all.
>>>>>>>>
>>>>>>>> The driver will be able to select/configure all automatically (also
>>>>>>>> jumbo).
>>>>>>>>
>>>>>>>> Let me know.
>>>>>>>
>>>>>>> That's the problem.
>>>>>>> The bitfield definition of Loongson1B is also different from normal
>>>>>>> descriptor.
>>>>>>
>>>>>> The problem is not in the Loongson1B gmac.
>>>>>
>>>>> I found that the bit checksum_insertion is not existed in normal
>>>>> descriptor.
>>>>>
>>>>>> The normal descriptor fields in the stmmac refer to an old synopsys
>>>>>> databook.
>>>>>
>>>>> Could you send me the new databook of Synopsys GMAC?
>>>>>
>>>>>> New chips have the same structure you have added; so we should fix
>>>>>> this
>>>>>> in the driver w/o breaking the compatibility for old chips.
>>>>>
>>>>> Agree.
>>>>>
>>>>>> I kindly ask you to confirm if the currently normal descriptor
>>>>>> structure
>>>>>> (w/o your changes) doesn't work on your platform.
>>>>>> Did you test it?
>>>>>
>>>>> Well, the normal descriptor works on my platform except TX checksum
>>>>> offload.
>>>>
>>>> ok! I suspected that.
>>>>
>>>>
>>>>>>> Moreover, I want to enable the TX checksum offload function which is
>>>>>>> not supported in normal descriptor.
>>>>>>> Any suggestions?
>>>>>>
>>>>>> It is supported but you have to pass from the platform: tx_coe = 1.
>>>>>
>>>>> I noticed that the flag csum_insertion is passed to
>>>>> ndesc_prepare_tx_desc() in stmmac_xmit(). But ndesc_prepare_tx_desc()
>>>>> just ignores it.
>>>>> In other words, the TX checksum offload function is disabled in normal
>>>>> descriptor currently.
>>>>>
>>>>> Should we fix this problem for normal descriptor?
>>>>
>>>> Yes, we should. If you agree, I'll update the normal descriptor
>>>> structure to yours. This is the normal descriptor used in newer GMAC.
>>>> Tx csum will be done for normal descriptors in case of these GMAC
>>>> devices and not for old MAC10/100. For the MAC10/100 some bits for
>>>> normal descriptors are reserved and won't be used at all.
>>>>
>>>> I'll also verify that the patch doesn't break the back-compatibility
>>>> with old MAC10/100. I have the HW where doing the tests.
>>>>
>>>> After that, I'll prepare the patch for net-next and for your kernel.
>>>
>>> Hello Kelvin
>>>
>>> attached the patch tested on my development kernel.
>>> It runs fine on old and new mac devices.
>>>
>>> Can you try it on your side? Hmm, it is likely it won't apply fine on
>>> your tree but you know the changes ;-).
>>>
>>> If ok, I'll rework it for net-next and send it to the mailing list.
>>>
>>> Thanks
>>> Peppe
>>>
>>>>
>>>>>
>>>>>> Peppe
>>>>>>>
>>>>>>>> Note:
>>>>>>>> IIRC, there is a bit difference in case of normal descriptors for
>>>>>>>> Synopsys databook newer than the 1.91 (I used for testing this
>>>>>>>> mode).
>>>>>>>> In any case, I remember that, on some platforms, the normal
>>>>>>>> descriptors
>>>>>>>> have been used w/o problems also on these new chip generations.
>>>>>>>>
>>>>>>>> Peppe
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>>>
>>
>>
>
>


-- 
Best Regards!
Kelvin

^ permalink raw reply

* Re: [PATCH net-next] ipv4: use IS_ENABLED() macro to cleanup code
From: Ben Hutchings @ 2011-10-26  8:00 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1319610874.18883.8.camel@edumazet-laptop>

On Wed, 2011-10-26 at 08:34 +0200, Eric Dumazet wrote:
> Le mardi 25 octobre 2011 à 19:30 -0400, David Miller a écrit :
> 
> > net/ipv4/ip_sockglue.c: In function ‘do_ip_setsockopt’:
> > net/ipv4/ip_sockglue.c:523:29: error: ‘LOOPBACK4_IPV6’ undeclared (first use in this function)
> > net/ipv4/ip_sockglue.c:523:29: note: each undeclared identifier is reported only once for each function it appears in
> > 
> > This fails because ip_sockglue.c guards the net/transp_v6.h header
> > inclusion with a real CPP guard.
> 
> Yep, it seems compiler was not able to perform optimisation of dead
> code.
[...]

No, the compiler is just checking the code further than you wanted
before optimising it away.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH net-next] ipv4: use IS_ENABLED() macro to cleanup code
From: Eric Dumazet @ 2011-10-26  8:05 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev
In-Reply-To: <1319616054.11727.19.camel@deadeye>

Le mercredi 26 octobre 2011 à 10:00 +0200, Ben Hutchings a écrit :
> On Wed, 2011-10-26 at 08:34 +0200, Eric Dumazet wrote:
> > Le mardi 25 octobre 2011 à 19:30 -0400, David Miller a écrit :
> > 
> > > net/ipv4/ip_sockglue.c: In function ‘do_ip_setsockopt’:
> > > net/ipv4/ip_sockglue.c:523:29: error: ‘LOOPBACK4_IPV6’ undeclared (first use in this function)
> > > net/ipv4/ip_sockglue.c:523:29: note: each undeclared identifier is reported only once for each function it appears in
> > > 
> > > This fails because ip_sockglue.c guards the net/transp_v6.h header
> > > inclusion with a real CPP guard.
> > 
> > Yep, it seems compiler was not able to perform optimisation of dead
> > code.
> [...]
> 
> No, the compiler is just checking the code further than you wanted
> before optimising it away.
> 

Well, I was not saying compiler was guilty here.

Dead code must be parsed and compiled too.

^ permalink raw reply

* [PATCH 1/2] net/smsc911x: Always wait for the chip to be ready
From: Linus Walleij @ 2011-10-26  8:05 UTC (permalink / raw)
  To: netdev, Steve Glendinning; +Cc: Mathieu Poirer, Robert Marklund, Linus Walleij

From: Robert Marklund <robert.marklund@stericsson.com>

Wait for the chip to be ready before any access to it. On the
Snowball platform we need to enable an external regulator before
the chip comes online, and then it happens that the device is
not yet ready at probe time, so let's wait for it.

Signed-off-by: Robert Marklund <robert.marklund@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/net/ethernet/smsc/smsc911x.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index d2be42a..8843071 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -1937,6 +1937,7 @@ static int __devinit smsc911x_init(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
 	unsigned int byte_test;
+	unsigned int to = 100;
 
 	SMSC_TRACE(pdata, probe, "Driver Parameters:");
 	SMSC_TRACE(pdata, probe, "LAN base: 0x%08lX",
@@ -1952,6 +1953,17 @@ static int __devinit smsc911x_init(struct net_device *dev)
 		return -ENODEV;
 	}
 
+	/*
+	 * poll the READY bit in PMT_CTRL. Any other access to the device is
+	 * forbidden while this bit isn't set. Try for 100ms
+	 */
+	while (!(smsc911x_reg_read(pdata, PMT_CTRL) & PMT_CTRL_READY_) && --to)
+		udelay(1000);
+	if (to == 0) {
+		pr_err("Device not READY in 100ms aborting\n");
+		return -ENODEV;
+	}
+
 	/* Check byte ordering */
 	byte_test = smsc911x_reg_read(pdata, BYTE_TEST);
 	SMSC_TRACE(pdata, probe, "BYTE_TEST: 0x%08X", byte_test);
-- 
1.7.3.2

^ permalink raw reply related

* [PATCH] HFSC (7) & (8) documentation + assorted changes
From: Mike Frysinger @ 2011-10-26  8:15 UTC (permalink / raw)
  To: stephen.hemminger, netdev; +Cc: Michal Soltys

From: Michal Soltys <soltys@ziu.info>

This patch adds detailed documentation for HFSC scheduler. It roughly
follows HFSC paper, but tries to not rely too much on math side of things.
Post-paper/Linux specific subjects (timer resolution, ul service curve, etc.)
are also discussed.

I've read it many times over, but it's a lengthy chunk of text - so try
to be understanding in case I made some mistakes.

tc-hfsc(7): explains algorithm in detail (very long)
tc-hfsc(8): explains command line options briefly
tc(8): adds references to new man pages
Makefile: adds man7 directory to install target
q_hfsc.c: minimal help text changes, consistency with tc-hfsc(8)

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
Note: i can't find any sign off from Michal Soltys, so we'll probably need
	him to post it before we can merge ...

 Makefile           |    2 +
 man/man7/tc-hfsc.7 |  525 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 man/man8/tc-hfsc.8 |   61 ++++++
 man/man8/tc-stab.8 |  156 ++++++++++++++++
 man/man8/tc.8      |    3 +
 tc/q_hfsc.c        |    6 +-
 6 files changed, 752 insertions(+), 1 deletions(-)
 create mode 100644 man/man7/tc-hfsc.7
 create mode 100644 man/man8/tc-hfsc.8
 create mode 100644 man/man8/tc-stab.8

diff --git a/Makefile b/Makefile
index d1ace1f..c6e4943 100644
--- a/Makefile
+++ b/Makefile
@@ -60,6 +60,8 @@ install: all
 	install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
 	install -m 0755 -d $(DESTDIR)$(MANDIR)/man8
 	install -m 0644 $(shell find man/man8 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man8
+	install -m 0755 -d $(DESTDIR)$(MANDIR)/man7
+	install -m 0644 $(shell find man/man7 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man7
 	ln -sf tc-bfifo.8  $(DESTDIR)$(MANDIR)/man8/tc-pfifo.8
 	ln -sf lnstat.8  $(DESTDIR)$(MANDIR)/man8/rtstat.8
 	ln -sf lnstat.8  $(DESTDIR)$(MANDIR)/man8/ctstat.8
diff --git a/man/man7/tc-hfsc.7 b/man/man7/tc-hfsc.7
new file mode 100644
index 0000000..bcdea7b
--- /dev/null
+++ b/man/man7/tc-hfsc.7
@@ -0,0 +1,525 @@
+.TH HFSC 7 "25 February 2009" iproute2 Linux
+.ce 1
+\fBHIERARCHICAL FAIR SERVICE CURVE\fR
+.
+.SH "HISTORY & INTRODUCTION"
+.
+HFSC \- \fBHierarchical Fair Service Curve\fR was first presented at
+SIGCOMM'97. Developed as a part of ALTQ (ALTernative Queuing) on NetBSD, found
+its way quickly to other BSD systems, and then a few years ago became part of
+the linux kernel. Still, it's not the most popular scheduling algorithm \-
+especially if compared to HTB \- and it's not well documented from enduser's
+perspective. This introduction aims to explain how HFSC works without
+going to deep into math side of things (although some if it will be
+inevitable).
+
+In short HFSC aims to:
+.
+.RS 4
+.IP \fB1)\fR 4
+guarantee precise bandwidth and delay allocation for all leaf classes (realtime
+criterion)
+.IP \fB2)\fR
+allocate excess bandwidth fairly as specified by class hierarchy (linkshare &
+upperlimit criterion)
+.IP \fB3)\fR
+minimize any discrepancy between the service curve and the actual amount of
+service provided during linksharing
+.RE
+.PP
+.
+The main "selling" point of HFSC is feature \fB(1)\fR, which is achieved by
+using nonlinear service curves (more about what it actually is later). This is
+particularly useful in VoIP or games, where not only guarantee of consistent
+bandwidth is important, but initial delay of a data stream as well. Note that
+it matters only for leaf classes (where the actual queues are) \- thus class
+hierarchy is ignored in realtime case.
+
+Feature \fB(2)\fR is well, obvious \- any algorithm featuring class hierarchy
+(such as HTB or CBQ) strives to achieve that. HFSC does that well, although
+you might end with unusual situations, if you define service curves carelessly
+\- see section CORNER CASES for examples.
+
+Feature \fB(3)\fR is mentioned due to the nature of the problem. There may be
+situations where it's either not possible to guarantee service of all curves at
+the same time, and/or it's impossible to do so fairly. Both will be explained
+later. Note that this is mainly related to interior (aka aggregate) classes, as
+the leafs are already handled by \fB(1)\fR. Still \- it's perfectly possible to
+create a leaf class w/o realtime service, and in such case \- the caveats will
+naturally extend to leaf classes as well.
+
+.SH ABBREVIATIONS
+For the remaining part of the document, we'll use following shortcuts:
+.nf
+.RS 4
+
+RT \- realtime
+LS \- linkshare
+UL \- upperlimit
+SC \- service curve
+.fi
+.
+.SH "BASICS OF HFSC"
+.
+To understand how HFSC works, we must first introduce a service curve.
+Overall, it's a nondecreasing function of some time unit, returning amount of
+service (allowed or allocated amount of bandwidth) by some specific point in
+time. The purpose of it should be subconsciously obvious \- if a class was
+allowed to transfer not less than the amount specified by its service curve \-
+then service curve is not violated.
+
+Still \- we need more elaborate criterion than just the above (although in
+most generic case it can be reduced to it). The criterion has to take two
+things into account:
+.
+.RS 4
+.IP \(bu 4
+idling periods
+.IP \(bu
+ability to "look back", so if during current active period service curve is violated, maybe it
+isn't if we count excess bandwidth received during earlier active period(s)
+.RE
+.PP
+Let's define the criterion as follows:
+.RS 4
+.nf
+.IP "\fB(1)\fR" 4
+For each t1, there must exist t0 in set B, so S(t1\-t0)\~<=\~w(t0,t1)
+.fi
+.RE
+.
+.PP
+Here 'w' denotes the amount of service received during some time period between t0
+and t1. B is a set of all times, where a session becomes active after idling
+period (further denoted as 'becoming backlogged'). For a clearer picture,
+imagine two situations:
+.
+.RS 4
+.IP \fBa)\fR 4
+our session was active during two periods, with a small time gap between them
+.IP \fBb)\fR
+as in (a), but with a larger gap
+.RE
+.
+.PP
+Consider \fB(a)\fR \- if the service received during both periods meets
+\fB(1)\fR, then all is good. But what if it doesn't do so during the 2nd
+period ? If the amount of service received during the 1st period is bigger
+than the service curve, then it might compensate for smaller service during
+the 2nd period \fIand\fR the gap \- if the gap is small enough.
+
+If the gap is larger \fB(b)\fR \- then it's less likely to happen (unless the
+excess bandwidth allocated during the 1st part was really large). Still, the
+larger the gap \- the less interesting is what happened in the past (e.g. 10
+minutes ago) \- what matters is the current traffic that just started.
+
+From HFSC's perspective, more interesting is answering the following question:
+when should we start transferring packets, so a service curve of a class is not
+violated. Or rephrasing it: How much X() amount of service should a session
+receive by time t, so the service curve is not violated. Function X() defined
+as below is the basic building block of HFSC, used in: eligible, deadline,
+virtual\-time and fit\-time curves. Of course, X() is based on equation
+\fB(1)\fR and is defined recursively:
+
+.RS 4
+.IP \(bu 4
+At the 1st backlogged period beginning function X is initialized to generic
+service curve assigned to a class
+.IP \(bu
+At any subsequent backlogged period, X() is:
+.nf
+\fBmin(X() from previous period ; w(t0)+S(t\-t0) for t>=t0),\fR
+.fi
+\&... where t0 denotes the beginning of the current backlogged period.
+.RE
+.
+.PP
+HFSC uses either linear, or two\-piece linear service curves. In case of
+linear or two\-piece linear convex functions (first slope < second slope),
+min() in X's definition reduces to the 2nd argument. But in case of two\-piece
+concave functions, the 1st argument might quickly become lesser for some
+t>=t0. Note, that for some backlogged period, X() is defined only from that
+period's beginning. We also define X^(\-1)(w) as smallest t>=t0, for which
+X(t)\~=\~w. We have to define it this way, as X() is usually not an injection.
+
+The above generic X() can be one of the following:
+.
+.RS 4
+.IP "E()" 4
+In realtime criterion, selects packets eligible for sending. If none are
+eligible, HFSC will use linkshare criterion. Eligible time \&'et' is calculated
+with reference to packets' heads ( et\~=\~E^(\-1)(w) ). It's based on RT
+service curve, \fIbut in case of a convex curve, uses its 2nd slope only.\fR
+.IP "D()"
+In realtime criterion, selects the most suitable packet from the ones chosen
+by E(). Deadline time \&'dt' corresponds to packets' tails
+(dt\~=\~D^(\-1)(w+l), where \&'l' is packet's length). Based on RT service
+curve.
+.IP "V()"
+In linkshare criterion, arbitrates which packet to send next. Note that V() is
+function of a virtual time \- see \fBLINKSHARE CRITERION\fR section for
+details.  Virtual time \&'vt' corresponds to packets' heads
+(vt\~=\~V^(\-1)(w)). Based on LS service curve.
+.IP "F()"
+An extension to linkshare criterion, used to limit at which speed linkshare
+criterion is allowed to dequeue. Fit\-time 'ft' corresponds to packets' heads
+as well (ft\~=\~F^(\-1)(w)). Based on UL service curve.
+.RE
+
+Be sure to make clean distinction between session's RT, LS and UL service
+curves and the above "utility" functions.
+.
+.SH "REALTIME CRITERION"
+.
+RT criterion \fIignores class hierarchy\fR and guarantees precise bandwidth and
+delay allocation. We say that packet is eligible for sending, when current real
+time is bigger than eligible time. From all packets eligible, the one most
+suited for sending, is the one with the smallest deadline time. Sounds simply,
+but consider following example:
+
+Interface 10mbit, two classes, both with two\-piece linear service curves:
+.RS 4
+.IP \(bu 4
+1st class \- 2mbit for 100ms, then 7mbit (convex \- 1st slope < 2nd slope)
+.IP \(bu
+2nd class \- 7mbit for 100ms, then 2mbit (concave \- 1st slope > 2nd slope)
+.RE
+.PP
+Assume for a moment, that we only use D() for both finding eligible packets,
+and choosing the most fitting one, thus eligible time would be computed as
+D^(\-1)(w) and deadline time would be computed as D^(\-1)(w+l).  If the 2nd
+class starts sending packets 1 second after the 1st class, it's of course
+impossible to guarantee 14mbit, as the interface capability is only 10mbit.
+The only workaround in this scenario is to allow the 1st class to send the
+packets earlier that would normally be allowed. That's where separate E() comes
+to help.  Putting all the math aside (see HFSC paper for details), E() for RT
+concave service curve is just like D(), but for the RT convex service curve \-
+it's constructed using \fIonly\fR RT service curve's 2nd slope (in our example
+\- 7mbit).
+
+The effect of such E() \- packets will be sent earlier, and at the same time
+D() \fIwill\fR be updated \- so current deadline time calculated from it will
+be bigger. Thus, when the 2nd class starts sending packets later, both the 1st
+and the 2nd class will be eligible, but the 2nd session's deadline time will be
+smaller and its packets will be sent first. When the 1st class becomes idle at
+some later point, the 2nd class will be able to "buffer" up again for later
+active period of the 1st class.
+
+A short remark \- in a situation, where the total amount of bandwidth
+available on the interface is bigger than the allocated total realtime parts
+(imagine interface 10 mbit, but 1mbit/2mbit and 2mbit/1mbit classes), the sole
+speed of the interface could suffice to guarantee the times.
+
+Important part of RT criterion is that apart from updating its D() and E(),
+also V() used by LS criterion is updated. Generally the RT criterion is
+secondary to LS one, and used \fIonly\fR if there's a risk of violating precise
+realtime requirements. Still, the "participation" in bandwidth distributed by
+LS criterion is there, so V() has to be updated along the way. LS criterion can
+than properly compensate for non\-ideal fair sharing situation, caused by RT
+scheduling. If you use UL service curve its F() will be updated as well (UL
+service curve is an extension to LS one \- see \fBUPPERLIMIT CRITERION\fR
+section).
+
+Anyway \- careless specification of LS and RT service curves can lead to
+potentially undesired situations (see CORNER CASES for examples). This wasn't
+the case in HFSC paper where LS and RT service curves couldn't be specified
+separately.
+
+.SH "LINKSHARING CRITERION"
+.
+LS criterion's task is to distribute bandwidth according to specified class
+hierarchy. Contrary to RT criterion, there're no comparisons between current
+real time and virtual time \- the decision is based solely on direct comparison
+of virtual times of all active subclasses \- the one with the smallest vt wins
+and gets scheduled. One immediate conclusion from this fact is that absolute
+values don't matter \- only ratios between them (so for example, two children
+classes with simple linear 1mbit service curves will get the same treatment
+from LS criterion's perspective, as if they were 5mbit). The other conclusion
+is, that in perfectly fluid system with linear curves, all virtual times across
+whole class hierarchy would be equal.
+
+Why is VC defined in term of virtual time (and what is it) ?
+
+Imagine an example: class A with two children \- A1 and A2, both with let's say
+10mbit SCs. If A2 is idle, A1 receives all the bandwidth of A (and update its
+V() in the process). When A2 becomes active, A1's virtual time is already
+\fIfar\fR bigger than A2's one. Considering the type of decision made by LS
+criterion, A1 would become idle for a lot of time. We can workaround this
+situation by adjusting virtual time of the class becoming active \- we do that
+by getting such time "up to date". HFSC uses a mean of the smallest and the
+biggest virtual time of currently active children fit for sending. As it's not
+real time anymore (excluding trivial case of situation where all classes become
+active at the same time, and never become idle), it's called virtual time.
+
+Such approach has its price though. The problem is analogous to what was
+presented in previous section and is caused by non\-linearity of service
+curves:
+.IP 1) 4
+either it's impossible to guarantee both service curves and satisfy fairness
+during certain time periods:
+
+.RS 4
+Recall the example from RT section, slightly modified (with 3mbit slopes
+instead of 2mbit ones):
+
+.IP \(bu 4
+1st class \- 3mbit for 100ms, then 7mbit (convex \- 1st slope < 2nd slope)
+.IP \(bu
+2nd class \- 7mbit for 100ms, then 3mbit (concave \- 1st slope > 2nd slope)
+
+.PP
+They sum up nicely to 10mbit \- interface's capacity. But if we wanted to only
+use LS for guarantees and fairness \- it simply won't work. In LS context,
+only V() is used for making decision which class to schedule. If the 2nd class
+becomes active when the 1st one is in its second slope, the fairness will be
+preserved \- ratio will be 1:1 (7mbit:7mbit), but LS itself is of course
+unable to guarantee the absolute values themselves \- as it would have to go
+beyond of what the interface is capable of.
+.RE
+
+.IP 2) 4
+and/or it's impossible to guarantee service curves of all classes at all
+
+.RS 4
+Even if we didn't use virtual time and allowed a session to be "punished",
+there's a possibility that service curves of all classes couldn't be
+guaranteed for a brief period. Consider following, a bit more complicated
+example:
+
+Root interface, classes A and B with concave and convex curve (summing up to
+root), A1 & A2 (children of A), \fIboth\fR with concave curves summing up to A,
+B1 & B2 (children of B), \fIboth\fR with convex curves summing up to B.
+
+Assume that A2, B1 and B2 are constantly backlogged, and at some later point
+A1 becomes backlogged. We can easily choose slopes, so that even if we
+"punish" A2 for earlier excess bandwidth received, A1 will have no chance of
+getting bandwidth corresponding to its first slope. Following from the above
+example:
+
+.nf
+A  \- 7mbit, then 3mbit
+A1 \- 5mbit, then 2mbit
+A2 \- 2mbit, then 1mbit
+
+B  \- 3mbit, then 7mbit
+B1 \- 2mbit, then 5mbit
+B2 \- 1mbit, then 2mbit
+.fi
+
+At the point when A1 starts sending, it should get 5mbit to not violate its
+service curve. A2 gets punished and doesn't send at all, B1 and B2 both keep
+sending at their 5mbit and 2mbit. But as you can see, we already are beyond
+interface's capacity \- at 12mbit. A1 could get 3mbit at most. If we used
+virtual times and kept fairness property, A1 and A2 would send at 3mbit
+together with 5:2 ratio (so respectively at ~2.14mbit and ~0.86mbit).
+.RE
+.
+.SH "UPPERLIMIT CRITERION"
+.
+UL criterion is an extensions to LS one, that permits sending packets only
+if current real time is bigger than fit\-time ('ft'). So the modified LS
+criterion becomes: choose the smallest virtual time from all active children,
+such that fit\-time < current real time also holds. Fit\-time is calculated
+from F(), which is based on UL service curve. As you can see, it's role is
+kinda similar to E() used in RT criterion. Also, for obvious reasons \- you
+can't specify UL service curve without LS one.
+
+Main purpose of UL service curve is to limit HFSC to bandwidth available on the
+upstream router (think adsl home modem/router, and linux server as
+nat/firewall/etc. with 100mbit+ connection to mentioned modem/router).
+Typically, it's used to create a single class directly under root, setting
+linear UL service curve to available bandwidth \- and then creating your class
+structure from that class downwards. Of course, you're free to add UL service
+(linear or not) curve to any class with LS criterion.
+
+Important part about UL service curve is, that whenever at some point in time
+a class doesn't qualify for linksharing due to its fit\-time, the next time it
+does qualify, it will update its virtual time to the smallest virtual time of
+all active children fit for linksharing. This way, one of the main things LS
+criterion tries to achieve \- equality of all virtual times across whole
+hierarchy \- is preserved (in perfectly fluid system with only linear curves,
+all virtual times would be equal).
+
+Without that, 'vt' would lag behind other virtual times, and could cause
+problems. Consider interface with capacity 10mbit, and following leaf classes
+(just in case you're skipping this text quickly \- this example shows behavior
+that \f(BIdoesn't happen\fR):
+
+.nf
+A \- ls 5.0mbit
+B \- ls 2.5mbit
+C \- ls 2.5mbit, ul 2.5mbit
+.fi
+
+If B was idle, while A and C were constantly backlogged, they would normally
+(as far as LS criterion is concerned) divide bandwidth in 2:1 ratio. But due
+to UL service curve in place, C would get at most 2.5mbit, and A would get the
+remaining 7.5mbit. The longer the backlogged period, the more virtual times of
+A and C would drift apart. If B became backlogged at some later point in time,
+its virtual time would be set to (A's\~vt\~+\~C's\~vt)/2, thus blocking A from
+sending any traffic, until B's virtual time catches up with A.
+.
+.SH "SEPARATE LS / RT SCs"
+.
+Another difference from original HFSC paper, is that RT and LS SCs can be
+specified separately. Moreover \- leaf classes are allowed to have only either
+RT SC or LS SC. For interior classes, only LS SCs make sense \- Any RT SC will
+be ignored.
+.
+.SH "CORNER CASES"
+.
+Separate service curves for LS and RT criteria can lead to certain traps,
+that come from "fighting" between ideal linksharing and enforced realtime
+guarantees. Those situations didn't exist in original HFSC paper, where
+specifying separate LS / RT service curves was not discussed.
+
+Consider interface with capacity 10mbit, with following leaf classes:
+
+.nf
+A \- ls 5.0mbit, rt 8mbit
+B \- ls 2.5mbit
+C \- ls 2.5mbit
+.fi
+
+Imagine A and C are constantly backlogged. As B is idle, A and C would divide
+bandwidth in 2:1 ratio, considering LS service curve (so in theory \- 6.66 and
+3.33). Alas RT criterion takes priority, so A will get 8mbit and LS will be
+able to compensate class C for only 2 mbit \- this will cause discrepancy
+between virtual times of A and C.
+
+Assume this situation lasts for a lot of time with no idle periods, and
+suddenly B becomes active. B's virtual time will be updated to
+(A's\~vt\~+\~C's\~vt)/2, effectively landing in the middle between A's and C's
+virtual time. The effect \- B, having no RT guarantees, will be punished and
+will not be allowed to transfer until C's virtual time catches up.
+
+If the interface had higher capacity \- for example 100mbit, this example
+would behave perfectly fine though.
+
+Let's look a bit closer at the above example \- it "cleverly" invalidates one
+of the basic things LS criterion tries to achieve \- equality of all virtual
+times across class hierarchy. Leaf classes without RT service curves are
+literally left to their own fate (governed by messed up virtual times).
+
+Also - it doesn't make much sense. Class A will always be guaranteed up to
+8mbit, and this is more than any absolute bandwidth that could happen from its
+LS criterion (excluding trivial case of only A being active). If the bandwidth
+taken by A is smaller than absolute value from LS criterion, the unused part
+will be automatically assigned to other active classes (as A has idling periods
+in such case). The only "advantage" is, that even in case of low bandwidth on
+average, bursts would be handled at the speed defined by RT criterion. Still,
+if extra speed is needed (e.g. due to latency), non linear service curves
+should be used in such case.
+
+In the other words - LS criterion is meaningless in the above example.
+
+You can quickly "workaround" it by making sure each leaf class has RT service
+curve assigned (thus guaranteeing all of them will get some bandwidth), but it
+doesn't make it any more valid.
+.
+.SH "LINUX AND TIMER RESOLUTION"
+.
+In certain situations, the scheduler can throttle itself and setup so
+called watchdog to wakeup dequeue function at some time later. In case of HFSC
+it happens when for example no packet is eligible for scheduling, and UL
+service curve is used to limit the speed at which LS criterion is allowed to
+dequeue packets. It's called throttling, and accuracy of it is dependent on
+how the kernel is compiled.
+
+There're 3 important options in modern kernels, as far as timers' resolution
+goes: \&'tickless system', \&'high resolution timer support' and \&'timer
+frequency'.
+
+If you have \&'tickless system' enabled, then the timer interrupt will trigger
+as slowly as possible, but each time a scheduler throttles itself (or any
+other part of the kernel needs better accuracy), the rate will be increased as
+needed / possible. The ceiling is either \&'timer frequency' if \&'high
+resolution timer support' is not available or not compiled in. Otherwise it's
+hardware dependent and can go \fIfar\fR beyond the highest \&'timer frequency'
+setting available.
+
+If \&'tickless system' is not enabled, the timer will trigger at a fixed rate
+specified by \&'timer frequency' \- regardless if high resolution timers are
+or aren't available.
+
+This is important to keep those settings in mind, as in scenario like: no
+tickless, no HR timers, frequency set to 100hz \- throttling accuracy would be
+at 10ms. It doesn't automatically mean you would be limited to ~0.8mbit/s
+(assuming packets at ~1KB) \- as long as your queues are prepared to cover for
+timer inaccuracy. Of course, in case of e.g. locally generated udp traffic \-
+appropriate socket size is needed as well. Short example to make it more
+understandable (assume hardcore anti\-schedule settings \- HZ=100, no HR
+timers, no tickless):
+
+.nf
+tc qdisc add dev eth0 root handle 1:0 hfsc default 1
+tc class add dev eth0 parent 1:0 classid 1:1 hfsc rt m2 10mbit
+.fi
+
+Assuming packet of ~1KB size and HZ=100, that averages to ~0.8mbit \- anything
+beyond it (e.g. the above example with specified rate over 10x bigger) will
+require appropriate queuing and cause bursts every ~10 ms.  As you can
+imagine, any HFSC's RT guarantees will be seriously invalidated by that.
+Aforementioned example is mainly important if you deal with old hardware \- as
+it's particularly popular for home server chores. Even then, you can easily
+set HZ=1000 and have very accurate scheduling for typical adsl speeds.
+
+Anything modern (apic or even hpet msi based timers + \&'tickless system')
+will provide enough accuracy for superb 1gbit scheduling. For example, on one
+of basically cheap dual core AMD boards I have with following settings:
+
+.nf
+tc qdisc add dev eth0 parent root handle 1:0 hfsc default 1
+tc class add dev eth0 paretn 1:0 classid 1:1 hfsc rt m2 300mbit
+.fi
+
+And simple:
+
+.nf
+nc \-u dst.host.com 54321 </dev/zero
+nc \-l \-p 54321 >/dev/null
+.fi
+
+\&...will yield following effects over period of ~10 seconds (taken from
+/proc/interrupts):
+
+.nf
+319: 42124229   0  HPET_MSI\-edge  hpet2 (before)
+319: 42436214   0  HPET_MSI\-edge  hpet2 (after 10s.)
+.fi
+
+That's roughly 31000/s. Now compare it with HZ=1000 setting. The obvious
+drawback of it is that cpu load can be rather extensive with servicing that
+many timer interrupts. Example with 300mbit RT service curve on 1gbit link is
+particularly ugly, as it requires a lot of throttling with minuscule delays.
+
+Also note that it's just an example showing capability of current hardware.
+The above example (essentially 300mbit TBF emulator) is pointless on internal
+interface to begin with \- you will pretty much always want regular LS service
+curve there, and in such scenario HFSC simply doesn't throttle at all.
+
+300mbit RT service curve (selected columns from mpstat \-P ALL 1):
+
+.nf
+10:56:43 PM  CPU  %sys     %irq   %soft   %idle
+10:56:44 PM  all  20.10    6.53   34.67   37.19
+10:56:44 PM    0  35.00    0.00   63.00    0.00
+10:56:44 PM    1   4.95   12.87    6.93   73.27
+.fi
+
+So, in rare case you need those speeds with only RT service curve, or with UL
+service curve \- remember about drawbacks.
+.
+.SH "LAYER2 ADAPTATION"
+.
+Please refer to \fBtc\-stab\fR(8)
+.
+.SH "SEE ALSO"
+.
+\fBtc\fR(8), \fBtc\-hfsc\fR(8), \fBtc\-stab\fR(8)
+
+Please direct bugreports and patches to: <net...@vger.kernel.org>
+.
+.SH "AUTHOR"
+.
+Manpage created by Michal Soltys (sol...@ziu.info)
diff --git a/man/man8/tc-hfsc.8 b/man/man8/tc-hfsc.8
new file mode 100644
index 0000000..22018c0
--- /dev/null
+++ b/man/man8/tc-hfsc.8
@@ -0,0 +1,61 @@
+.TH HFSC 8 "25 February 2009" iproute2 Linux
+.
+.SH NAME
+HFSC \- Hierarchical Fair Service Curve's control under linux
+.
+.SH SYNOPSIS
+.nf
+tc qdisc add ... hfsc [ \fBdefault\fR CLASSID ]
+
+tc class add ... hfsc [ [ \fBrt\fR SC ] [ \fBls\fR SC ] | [ \fBsc\fR SC ] ] [ \fBul\fR SC ]
+
+\fBrt\fR : realtime service curve
+\fBls\fR : linkshare service curve
+\fBsc\fR : rt+ls service curve
+\fBul\fR : upperlimit service curve
+
+\(bu at least one of \fBrt\fR, \fBls\fR or \fBsc\fR must be specified
+\(bu \fBul\fR can only be specified with \fBls\fR or \fBsc\fR
+.
+.IP "SC := [ [ \fBm1\fR BPS ] \fBd\fR SEC ] \fBm2\fR BPS"
+\fBm1\fR : slope of the first segment
+\fBd\fR  : x\-coordinate of intersection
+\fBm2\fR : slope of the second segment
+.PP
+.IP "SC := [ [ \fBumax\fR BYTE ] \fBdmax\fR SEC ] \fBrate\fR BPS"
+\fBumax\fR : maximum unit of work
+\fBdmax\fR : maximum delay
+\fBrate\fR : rate
+.PP
+.fi
+For description of BYTE, BPS and SEC \- please see \fBUNITS\fR
+section of \fBtc\fR(8).
+.
+.SH DESCRIPTION (qdisc)
+HFSC qdisc has only one optional parameter \- \fBdefault\fR.  CLASSID specifies
+the minor part of the default classid, where packets not classified by other
+means (e.g. u32 filter, CLASSIFY target of iptables) will be enqueued. If
+\fBdefault\fR is not specified, unclassified packets will be dropped.
+.
+.SH DESCRIPTION (class)
+HFSC class is used to create a class hierarchy for HFSC scheduler. For
+explanation of the algorithm, and the meaning behind \fBrt\fR, \fBls\fR,
+\fBsc\fR and \fBul\fR service curves \- please refer to \fBtc\-hfsc\fR(7).
+
+As you can see in \fBSYNOPSIS\fR, service curve (SC) can be specified in two
+ways. Either as maximum delay for certain amount of work, or as a bandwidth
+assigned for certain amount of time. Obviously, \fBm1\fR is simply
+\fBumax\fR/\fBdmax\fR.
+
+Both \fBm2\fR and \fBrate\fR are mandatory. If you omit other
+parameters, you will specify linear service curve.
+.
+.SH "SEE ALSO"
+.
+\fBtc\fR(8), \fBtc\-hfsc\fR(7), \fBtc\-stab\fR(8)
+
+Please direct bugreports and patches to: <net...@vger.kernel.org>
+.
+.SH "AUTHOR"
+.
+Manpage created by Michal Soltys (sol...@ziu.info)
diff --git a/man/man8/tc-stab.8 b/man/man8/tc-stab.8
new file mode 100644
index 0000000..1442a69
--- /dev/null
+++ b/man/man8/tc-stab.8
@@ -0,0 +1,156 @@
+.TH STAB 8 "25 February 2009" iproute2 Linux
+.
+.SH NAME
+tc\-stab \- Generic size table manipulations
+.
+.SH SYNOPSIS
+.nf
+tc qdisc add ... stab \\
+.RS 4
+[ \fBmtu\fR BYTES ] [ \fBtsize\fR SLOTS ] \\
+[ \fBmpu\fR BYTES ] [ \fBoverhead\fR BYTES ] [ \fBlinklayer\fR TYPE ] ...
+.RE
+
+TYPE := adsl | atm | ethernet
+.fi
+
+For the description of BYTES \- please refer to the \fBUNITS\fR
+section of \fBtc\fR(8).
+
+.IP \fBmtu\fR 4
+.br
+maximum packet size we create size table for, assumed 2048 if not specified explicitly
+.IP \fBtsize\fR
+.br
+required table size, assumed 512 if not specified explicitly
+.IP \fBmpu\fR
+.br
+minimum packet size used in computations
+.IP \fBoverhead\fR
+.br
+per\-packet size overhead (can be negative) used in computations
+.IP \fBlinklayer\fR
+.br
+required linklayer adaptation.
+.PP
+.
+.SH DESCRIPTION
+.
+Size tables allow manipulation of packet size, as seen by whole scheduler
+framework (of course, the actual packet size remains the same). Adjusted packet
+size is calculated only once \- when a qdisc enqueues the packet. Initial root
+enqueue initializes it to the real packet's size.
+
+Each qdisc can use different size table, but the adjusted size is stored in
+area shared by whole qdisc hierarchy attached to the interface (technically,
+it's stored in skb). The effect is, that if you have such setup, the last qdisc
+with a stab in a chain "wins". For example, consider HFSC with simple pfifo
+attached to one of its leaf classes. If that pfifo qdisc has stab defined, it
+will override lengths calculated during HFSC's enqueue, and in turn, whenever
+HFSC tries to dequeue a packet, it will use potentially invalid size in its
+calculations. Normal setups will usually include stab defined only on root
+qdisc, but further overriding gives extra flexibility for less usual setups.
+
+Initial size table is calculated by \fBtc\fR tool using \fBmtu\fR and
+\fBtsize\fR parameters. The algorithm sets each slot's size to the smallest
+power of 2 value, so the whole \fBmtu\fR is covered by the size table. Neither
+\fBtsize\fR, nor \fBmtu\fR have to be power of 2 value, so the size
+table will usually support more than is required by \fBmtu\fR.
+
+For example, with \fBmtu\fR\~=\~1500 and \fBtsize\fR\~=\~128, a table with 128
+slots will be created, where slot 0 will correspond to sizes 0\-16, slot 1 to
+17\~\-\~32, \&..., slot 127 to 2033\~\-\~2048. Note, that the sizes
+are shifted 1 byte (normally you would expect 0\~\-\~15, 16\~\-\~31, \&...,
+2032\~\-\~2047). Sizes assigned to each slot depend on \fBlinklayer\fR parameter.
+
+Stab calculation is also safe for an unusual case, when a size assigned to a
+slot would be larger than 2^16\-1 (you will lose the accuracy though).
+
+During kernel part of packet size adjustment, \fBoverhead\fR will be added to
+original size, and after subtracting 1 (to land in the proper slot \- see above
+about shifting by 1 byte) slot will be calculated. If the size would cause
+overflow, more than 1 slot will be used to get the final size. It of course will
+affect accuracy, but it's only a guard against unusual situations.
+
+Currently there're two methods of creating values stored in the size table \-
+ethernet and atm (adsl):
+
+.IP ethernet 4
+.br
+This is basically 1\-1 mapping, so following our example from above
+(disregarding \fBmpu\fR for a moment) slot 0 would have 8, slot 1 would have 16
+and so on, up to slot 127 with 2048. Note, that \fBmpu\fR\~>\~0 must be
+specified, and slots that would get less than specified by \fBmpu\fR, will get
+\fBmpu\fR instead. If you don't specify \fBmpu\fR, the size table will not be
+created at all, although any \fBoverhead\fR value will be respected during
+calculations.
+.IP "atm, adsl"
+.br
+ATM linklayer consists of 53 byte cells, where each of them provides 48 bytes
+for payload. Also all the cells must be fully utilized, thus the last one is
+padded if/as necessary.
+
+When size table is calculated, adjusted size that fits properly into lowest
+amount of cells is assigned to a slot. For example, a 100 byte long packet
+requires three 48\-byte payloads, so the final size would require 3 ATM cells
+\- 159 bytes.
+
+For ATM size tables, 16\~bytes sized slots are perfectly enough. The default
+values of \fBmtu\fR and \fBtsize\fR create 4\~bytes sized slots.
+.PP
+.
+.SH "TYPICAL OVERHEADS"
+The following values are typical for different adsl scenarios (based on
+\fB[1]\fR and \fB[2]\fR):
+
+.nf
+LLC based:
+.RS 4
+PPPoA \- 14 (PPP \- 2, ATM \- 12)
+PPPoE \- 40+ (PPPoE \- 8, ATM \- 18, ethernet 14, possibly FCS \- 4+padding)
+Bridged \- 32 (ATM \- 18, ethernet 14, possibly FCS \- 4+padding)
+IPoA \- 16 (ATM \- 16)
+.RE
+
+VC Mux based:
+.RS 4
+PPPoA \- 10 (PPP \- 2, ATM \- 8)
+PPPoE \- 32+ (PPPoE \- 8, ATM \- 10, ethernet 14, possibly FCS \- 4+padding)
+Bridged \- 24+ (ATM \- 10, ethernet 14, possibly FCS \- 4+padding)
+IPoA \- 8 (ATM \- 8)
+.RE
+.fi
+\p There're few important things regarding the above overheads:
+.
+.IP \(bu 4
+IPoA in LLC case requires SNAP, instead of LLC\-NLPID (see rfc2684) \- this is
+the reason, why it actually takes more space than PPPoA.
+.IP \(bu
+In rare cases, FCS might be preserved on protocols that include ethernet frame
+(Bridged and PPPoE).  In such situation, any ethernet specific padding
+guaranteeing 64 bytes long frame size has to be included as well (see rfc2684).
+In the other words, it also guarantees that any packet you send will take
+minimum 2 atm cells. You should set \fBmpu\fR accordingly for that.
+.IP \(bu
+When size table is consulted, and you're shaping traffic for the sake of
+another modem/router, ethernet header (without padding) will already be added
+to initial packet's length. You should compensate for that by subtracting 14
+from the above overheads in such case. If you're shaping directly on the router
+(for example, with speedtouch usb modem) using ppp daemon, layer2 header will
+not be added yet.
+
+For more thorough explanations, please see \fB[1]\fR and \fB[2]\fR.
+.
+.SH "SEE ALSO"
+.
+\fBtc\fR(8), \fBtc\-hfsc\fR(7), \fBtc\-hfsc\fR(8),
+.br
+\fB[1]\fR http://ace\-host.stuart.id.au/russell/files/tc/tc\-atm/
+.br
+\fB[2]\fR http://www.faqs.org/rfcs/rfc2684.html
+
+Please direct bugreports and patches to: <net...@vger.kernel.org>
+.
+.SH "AUTHOR"
+.
+Manpage created by Michal Soltys (sol...@ziu.info)
diff --git a/man/man8/tc.8 b/man/man8/tc.8
index bfc7f26..49df2d7 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -370,12 +370,15 @@ was written by Alexey N. Kuznetsov and added in Linux 2.2.
 .BR tc-choke (8),
 .BR tc-drr (8),
 .BR tc-htb (8),
+.BR tc-hfsc (8),
+.BR tc-hfsc (7),
 .BR tc-sfq (8),
 .BR tc-red (8),
 .BR tc-tbf (8),
 .BR tc-pfifo (8),
 .BR tc-bfifo (8),
 .BR tc-pfifo_fast (8),
+.BR tc-stab (8),
 .br
 .RB "User documentation at " http://lartc.org/ ", but please direct bugreports and patches to: " <netdev@vger.kernel.org>
 
diff --git a/tc/q_hfsc.c b/tc/q_hfsc.c
index b190c71..03539ec 100644
--- a/tc/q_hfsc.c
+++ b/tc/q_hfsc.c
@@ -43,7 +43,7 @@ explain_class(void)
 	fprintf(stderr,
 		"Usage: ... hfsc [ [ rt SC ] [ ls SC ] | [ sc SC ] ] [ ul SC ]\n"
 		"\n"
-		"SC := [ [ m1 BPS ] [ d SEC ] m2 BPS\n"
+		"SC := [ [ m1 BPS ] d SEC ] m2 BPS\n"
 		"\n"
 		" m1 : slope of first segment\n"
 		" d  : x-coordinate of intersection\n"
@@ -57,6 +57,10 @@ explain_class(void)
 		" dmax : maximum delay\n"
 		" rate : rate\n"
 		"\n"
+		"Remarks:\n"
+		" - at least one of 'rt', 'ls' or 'sc' must be specified\n"
+		" - 'ul' can only be specified with 'ls' or 'sc'\n"
+		"\n"
 	);
 }
 
-- 
1.7.6.1

^ permalink raw reply related

* [PATCH 2/2] net/smsc911x: Add regulator support
From: Linus Walleij @ 2011-10-26  8:05 UTC (permalink / raw)
  To: netdev, Steve Glendinning
  Cc: Mathieu Poirer, Robert Marklund, Mark Brown, Linus Walleij

From: Robert Marklund <robert.marklund@stericsson.com>

Add some basic regulator support for the power pins, as needed
by the ST-Ericsson Snowball platform that powers up the SMSC911
chip using an external regulator.

Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Robert Marklund <robert.marklund@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/net/ethernet/smsc/smsc911x.c |  128 +++++++++++++++++++++++++++++++---
 1 files changed, 117 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8843071..fca01eb 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -44,6 +44,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/bug.h>
@@ -138,6 +139,10 @@ struct smsc911x_data {
 
 	/* register access functions */
 	const struct smsc911x_ops *ops;
+
+	/* regulators */
+	struct regulator *regulator_vddvario;
+	struct regulator *regulator_vdd33a;
 };
 
 /* Easy access to information */
@@ -362,6 +367,86 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/*
+ * Enable or disable resources, currently just regulators.
+ */
+static int smsc911x_enable_disable_resources(struct platform_device *pdev,
+					     bool enable)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct smsc911x_data *pdata = netdev_priv(ndev);
+	int err = 0;
+
+	/* enable/disable regulator for vddvario */
+	if (pdata->regulator_vddvario) {
+		if (enable) {
+			err = regulator_enable(pdata->regulator_vddvario);
+			if (err < 0) {
+			  netdev_err(ndev, "regulator_enable failed for "
+				     "vddvario");
+			}
+		} else
+			err = regulator_disable(pdata->regulator_vddvario);
+	}
+
+	/* enable/disable regulator for vdd33a */
+	if (pdata->regulator_vdd33a) {
+		if (enable) {
+			err = regulator_enable(pdata->regulator_vdd33a);
+			if (err < 0) {
+				netdev_err(ndev, "regulator_enable failed for "
+					   "vdd33a");
+			}
+		} else
+			err = regulator_disable(pdata->regulator_vdd33a);
+	}
+	return err;
+}
+
+/*
+ * Request or free resources, currently just regulators.
+ *
+ * The SMSC911x has two power pins: vddvario and vdd33a, in designs where
+ * these are not always-on we need to request regulators to be turned on
+ * before we can try to access the device registers.
+ */
+static int smsc911x_request_free_resources(struct platform_device *pdev,
+		bool request)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct smsc911x_data *pdata = netdev_priv(ndev);
+	int err = 0;
+
+	/* Request regulator for vddvario */
+	if (request && !pdata->regulator_vddvario) {
+		pdata->regulator_vddvario = regulator_get(&pdev->dev,
+				"vddvario");
+		if (IS_ERR(pdata->regulator_vddvario)) {
+			netdev_err(ndev, "Failed to get regulator vddvario\n");
+			err = PTR_ERR(pdata->regulator_vddvario);
+			pdata->regulator_vddvario = NULL;
+		}
+	} else if (!request && pdata->regulator_vddvario) {
+		regulator_put(pdata->regulator_vddvario);
+		pdata->regulator_vddvario = NULL;
+	}
+
+	/* Request regulator for vdd33a */
+	if (request && !pdata->regulator_vdd33a) {
+		pdata->regulator_vdd33a = regulator_get(&pdev->dev, "vdd33a");
+		if (IS_ERR(pdata->regulator_vdd33a)) {
+			netdev_err(ndev, "Failed to get regulator vdd33a\n");
+			err = PTR_ERR(pdata->regulator_vdd33a);
+			pdata->regulator_vdd33a = NULL;
+		}
+	} else if (!request && pdata->regulator_vdd33a) {
+		regulator_put(pdata->regulator_vdd33a);
+		pdata->regulator_vdd33a = NULL;
+	}
+
+	return err;
+}
+
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
  * and smsc911x_mac_write, so assumes mac_lock is held */
 static int smsc911x_mac_complete(struct smsc911x_data *pdata)
@@ -2065,6 +2150,7 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 	struct net_device *dev;
 	struct smsc911x_data *pdata;
 	struct resource *res;
+	int retval;
 
 	dev = platform_get_drvdata(pdev);
 	BUG_ON(!dev);
@@ -2092,6 +2178,12 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 
 	iounmap(pdata->ioaddr);
 
+	if (smsc911x_enable_disable_resources(pdev, false))
+		pr_warn("Could not disable resource\n");
+
+	retval = smsc911x_request_free_resources(pdev, false);
+	/* ignore not all have regulators */
+
 	free_netdev(dev);
 
 	return 0;
@@ -2218,10 +2310,24 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	pdata->dev = dev;
 	pdata->msg_enable = ((1 << debug) - 1);
 
+	platform_set_drvdata(pdev, dev);
+
+	retval = smsc911x_request_free_resources(pdev, true);
+	if (retval) {
+		pr_err("Could request regulators needed aborting\n");
+		goto out_return_resources;
+	}
+
+	retval = smsc911x_enable_disable_resources(pdev, true);
+	if (retval) {
+		pr_err("Could enable regulators needed aborting\n");
+		goto out_disable_resources;
+	}
+
 	if (pdata->ioaddr == NULL) {
 		SMSC_WARN(pdata, probe, "Error smsc911x base address invalid");
 		retval = -ENOMEM;
-		goto out_free_netdev_2;
+		goto out_disable_resources;
 	}
 
 	retval = smsc911x_probe_config_dt(&pdata->config, np);
@@ -2233,7 +2339,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 	if (retval) {
 		SMSC_WARN(pdata, probe, "Error smsc911x config not found");
-		goto out_unmap_io_3;
+		goto out_disable_resources;
 	}
 
 	/* assume standard, non-shifted, access to HW registers */
@@ -2244,7 +2350,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 	retval = smsc911x_init(dev);
 	if (retval < 0)
-		goto out_unmap_io_3;
+		goto out_disable_resources;
 
 	/* configure irq polarity and type before connecting isr */
 	if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
@@ -2264,15 +2370,13 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	if (retval) {
 		SMSC_WARN(pdata, probe,
 			  "Unable to claim requested irq: %d", dev->irq);
-		goto out_unmap_io_3;
+		goto out_free_irq;
 	}
 
-	platform_set_drvdata(pdev, dev);
-
 	retval = register_netdev(dev);
 	if (retval) {
 		SMSC_WARN(pdata, probe, "Error %i registering device", retval);
-		goto out_unset_drvdata_4;
+		goto out_free_irq;
 	} else {
 		SMSC_TRACE(pdata, probe,
 			   "Network interface: \"%s\"", dev->name);
@@ -2321,12 +2425,14 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 out_unregister_netdev_5:
 	unregister_netdev(dev);
-out_unset_drvdata_4:
-	platform_set_drvdata(pdev, NULL);
+out_free_irq:
 	free_irq(dev->irq, dev);
-out_unmap_io_3:
+out_disable_resources:
+	(void)smsc911x_enable_disable_resources(pdev, false);
+out_return_resources:
+	(void)smsc911x_request_free_resources(pdev, false);
+	platform_set_drvdata(pdev, NULL);
 	iounmap(pdata->ioaddr);
-out_free_netdev_2:
 	free_netdev(dev);
 out_release_io_1:
 	release_mem_region(res->start, resource_size(res));
-- 
1.7.3.2

^ permalink raw reply related

* Re: [PATCH 2/2] net/smsc911x: Add regulator support
From: Mark Brown @ 2011-10-26  8:38 UTC (permalink / raw)
  To: Linus Walleij
  Cc: netdev, Steve Glendinning, Mathieu Poirer, Robert Marklund,
	Linus Walleij
In-Reply-To: <1319616356-6391-1-git-send-email-linus.walleij@stericsson.com>

On Wed, Oct 26, 2011 at 10:05:56AM +0200, Linus Walleij wrote:

> +	/* enable/disable regulator for vddvario */
> +	if (pdata->regulator_vddvario) {

This has the same issue as last time - if you've got conditional code
like this in the body of the driver something is going wrong.  Unless
the supply is genuinely optional and might not be physically present on
some systems the driver should fail if it can't get it.  The regulator
API will stub itself out when not in use.

^ permalink raw reply

* [virtio-spec: RFC PATCH] virtio-spec: introduce VIRTIO_NET_F_GUEST_ANNOUNCE
From: Jason Wang @ 2011-10-26  8:49 UTC (permalink / raw)
  To: netdev, rusty, qemu-devel, kvm, mst

Network connections in guest need to be kept after migration. This is done by
sending gratuitous packet and let switch learn new port of the mac
address. As hypervisor does not have the knowledge of guest network
configurations such as tagged vlan or ipv6, it may require guest to send
gratuitous packet.

This patch introduces a new feature bit of virtio network adapter -
VIRTIO_NET_F_GUEST_ANNOUNCE which is used to indicate the ability to send
gratuitous packets by guest. A new status bit - VIRTIO_NET_S_ANNOUNCE is also
introduced to notify the guest the need for sending gratuitous packet. When
guest notice this, it should clear it and send the gratuitous packet.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 virtio-spec.lyx |   45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 44 insertions(+), 1 deletions(-)

diff --git a/virtio-spec.lyx b/virtio-spec.lyx
index 6426f8f..87733b9 100644
--- a/virtio-spec.lyx
+++ b/virtio-spec.lyx
@@ -4133,6 +4133,14 @@ VIRTIO_NET_F_CTRL_VLAN
 (19) Control channel VLAN filtering.
 \end_layout
 
+\begin_layout Description
+VIRTIO_NET_F_GUEST_ANNOUNCE
+\begin_inset space ~
+\end_inset
+
+(21) Guest can send gratuitous packets.
+\end_layout
+
 \end_deeper
 \begin_layout Description
 Device
@@ -4146,7 +4154,8 @@ configuration
 layout Two configuration fields are currently defined.
  The mac address field always exists (though is only valid if VIRTIO_NET_F_MAC
  is set), and the status field only exists if VIRTIO_NET_F_STATUS is set.
- Only one bit is currently defined for the status field: VIRTIO_NET_S_LINK_UP.
+ Two bits are currently defined for the status field: VIRTIO_NET_S_LINK_UP
+ and VIRTIO_NET_S_ANOUNCE.
  
 \begin_inset listings
 inline false
@@ -4159,6 +4168,11 @@ status open
 
 \begin_layout Plain Layout
 
+#define VIRTIO_NET_S_ANNOUNCE	2
+\end_layout
+
+\begin_layout Plain Layout
+
 \end_layout
 
 \begin_layout Plain Layout
@@ -5015,6 +5029,35 @@ Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL command take
  a 16-bit VLAN id as the command-specific-data.
 \end_layout
 
+\begin_layout Subsection*
+Gratuitous Packet Sending
+\end_layout
+
+\begin_layout Standard
+If the driver negotiates the VIRTIO_NET_F_GUEST_ANNOUNCE, it can send gratuitous
+ packet.
+ Gratuitous packet were used to notify the change of physical link and it
+ is usually sent after migration.
+ As hypervisor does not have the knowledge of guest network configuration
+ (ie.
+ tagged vlan), it would demand guest to send gratuitous packet by setting
+ VIRTIO_NET_S_ANNOUNCE bit in status field.
+ Guest needs to check VIRTIO_NET_S_ANNOUNCE bit in status field when it
+ notices the changes of device configuration.
+\end_layout
+
+\begin_layout Standard
+Processing this notification invloves:
+\end_layout
+
+\begin_layout Enumerate
+Clear VIRTIO_NET_S_ANNOUNCE bit in the status field.
+\end_layout
+
+\begin_layout Enumerate
+Send the gratuitous packets.
+\end_layout
+
 \begin_layout Chapter*
 Appendix D: Block Device
 \end_layout

^ permalink raw reply related

* Re: [patch net-next V5] net: introduce ethernet teaming device
From: Jiri Pirko @ 2011-10-26  8:49 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, davem, bhutchings, shemminger, fubar, andy, tgraf,
	ebiederm, mirqus, kaber, greearb, jesse, fbl, benjamin.poirier,
	jzupka
In-Reply-To: <1319549255.10883.16.camel@edumazet-laptop>

Tue, Oct 25, 2011 at 03:27:35PM CEST, eric.dumazet@gmail.com wrote:
>Le mardi 25 octobre 2011 à 15:03 +0200, Jiri Pirko a écrit :
>> This patch introduces new network device called team. It supposes to be
>> very fast, simple, userspace-driven alternative to existing bonding
>> driver.
>> 
>> Userspace library called libteam with couple of demo apps is available
>> here:
>> https://github.com/jpirko/libteam
>> Note it's still in its dipers atm.
>> 
>> team<->libteam use generic netlink for communication. That and rtnl
>> suppose to be the only way to configure team device, no sysfs etc.
>> 
>> Python binding basis for libteam was recently introduced (some need
>> still need to be done on it though). Daemon providing arpmon/miimon
>> active-backup functionality will be introduced shortly.
>> All what's necessary is already implemented in kernel team driver.
>> 
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>> 
>> v4->v5:
>> 	- team_change_mtu() uses team->lock while travesing though port
>> 	  list
>> 	- mac address changes are moved completely to jurisdiction of
>> 	  userspace daemon. This way the daemon can do FOM1, FOM2 and
>> 	  possibly other weird things with mac addresses.
>> 	  Only round-robin mode sets up all ports to bond's address then
>> 	  enslaved.
>> 	- Extended Kconfig text
>
>
>Following is not called under rcu, but rtnl or team spinlock held.
>
>Therefore, team_get_port_by_index_rcu() is not the right thing.

Yes, this is bug, I missed this.

>
>+static void __reconstruct_port_hlist(struct team *team, int rm_index)
>+{
>+       int i;
>+       struct team_port *port;
>+
>+       for (i = rm_index + 1; i < team->port_count; i++) {
>+               port = team_get_port_by_index_rcu(team, i);
>+               hlist_del_rcu(&port->hlist);
>+               port->index--;
>+               hlist_add_head_rcu(&port->hlist,
>+                                  team_port_index_hash(team, port->index));
>+       }
>+}
>+
>
>In fact, I claim most of your rcu_read_lock() in management side are
>bogus and obfuscate code.
>
>RCU is an exact science, not a commodity.
>
>When RCU is used, rcu_read_lock()/rcu_read_unlock() are used by readers,
>not managers :
>
>They should use a spin/mutex(rtnl usually in network land)
>and normal reads, no need for rcu_something
>
>Only writes must take care of concurrent readers (aka rcu_assign_pointer())
>
>For example:
>
>team_nl_fill_port_list_get_changed() should not use
>	list_for_each_entry_rcu(port, &team->port_list, list) {
>but a regular
>	list_for_each_entry(port, &team->port_list, list) {

Nod. This I missed as well.

>
>
>team_nl_team_get() should not play with RCU either.
>	It can use __dev_get_by_index() instead of dev_get_by_index_rcu()

Not true. RTNL is not held here.


>
>Comment in front of team_nl_team_get() is bogus as well :
>
>/*
> * Netlink cmd functions should be locked by following two functions.
> * To ensure team_uninit would not be called in between, hold rcu_read_lock
> * all the time.
> */
>
>How can holding rcu_read_lock() can prevent another cpu doing whatever he wants ?
>
>It seems you believe rcu_read_lock() is a read_lock(), but it isnt.

I'm aware. But in this particular case, holding rcu_read_lock
effectively does the thing. Because team_uninit is called from
rollback_registered_many() after synchronize_net is called. The thing is
that holding rcu_read_lock ensures that team->dev does not disappear
until team_nl_team_put() is called.

>
>Using right API is essential to get appropriate LOCKDEP semantic and
>code maintainability.
>
>
>

^ permalink raw reply

* Re: [PATCH 2/2] net/smsc911x: Add regulator support
From: Linus Walleij @ 2011-10-26  9:25 UTC (permalink / raw)
  To: Mark Brown
  Cc: netdev@vger.kernel.org, Steve Glendinning, Mathieu Poirer,
	Robert MARKLUND, Linus Walleij
In-Reply-To: <20111026083838.GA9157@opensource.wolfsonmicro.com>

On 10/26/2011 10:38 AM, Mark Brown wrote:
> On Wed, Oct 26, 2011 at 10:05:56AM +0200, Linus Walleij wrote:
>
>    
>> +	/* enable/disable regulator for vddvario */
>> +	if (pdata->regulator_vddvario) {
>>      
> This has the same issue as last time - if you've got conditional code
> like this in the body of the driver something is going wrong.  Unless
> the supply is genuinely optional and might not be physically present on
> some systems the driver should fail if it can't get it.  The regulator
> API will stub itself out when not in use.
>    

That solves the issue for platforms with no regulator
support at all.

Then we have platforms with regulator support, but no
regulator for this hardware, because that one happens
to be always-on in these systems.

And they do not have CONFIG_REGULATOR_DUMMY
either.

So the driver probe will fail.

How do we solve this?

Shall we have CONFIG_SMC911X select
REGULATOR_DUMMY?

Yours,
Linus Walleij

^ permalink raw reply

* Re: [PATCH] HFSC (7) & (8) documentation + assorted changes
From: Michal Soltys @ 2011-10-26  9:27 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: stephen.hemminger, netdev
In-Reply-To: <1319616922-18034-1-git-send-email-vapier@gentoo.org>

On 26.10.2011 10:15, Mike Frysinger wrote:
>
> I've read it many times over, but it's a lengthy chunk of text - so try
> to be understanding in case I made some mistakes.
>

TBH, I planned to do some fixes / corrections (minor ones) to the 
version I submitted back then, though I had two doubts after looking at 
it after a bit of time:

- too long / too detailed / hard to read
   (or ITOW, following HFSC paper to closely to be of practical use for
   someone just wanting to know how it works, without [too] gritty
   details)
- some friendlier format with man export (asciidoc ? yodl ?
   reText ? etc. - not sure what's it the policy regarding iproute2)
   as otherwise it will be quite a burden to maintain / update.

If my memory serves right, one of the corner cases described in it might 
have been fixed by a patch I submitted a good few months ago, though 
I'll have to read it to be sure. And there're few other small 
long-overdue patches I'm sitting on (they won't change anything 
algorithmically, but at least one could be noted in the text - though 
that can be changed any time in the future, and if the patches get 
accepted at all of course).

Another thing was minor kernel patch to stab, and the docs assumed it 
was added (not sure if you corrected it).


I'll read this version and see how it differs (and try to recollect what 
I wanted to fix in the past).

^ permalink raw reply

* Re: [PATCH] HFSC (7) & (8) documentation + assorted changes
From: Mike Frysinger @ 2011-10-26 10:02 UTC (permalink / raw)
  To: Michal Soltys; +Cc: stephen.hemminger, netdev
In-Reply-To: <4EA7D268.1050504@ziu.info>

On Wed, Oct 26, 2011 at 05:27, Michal Soltys wrote:
> On 26.10.2011 10:15, Mike Frysinger wrote:
>> I've read it many times over, but it's a lengthy chunk of text - so try
>> to be understanding in case I made some mistakes.
>
> TBH, I planned to do some fixes / corrections (minor ones) to the version I
> submitted back then, though I had two doubts after looking at it after a bit
> of time:
>
> - too long / too detailed / hard to read
>  (or ITOW, following HFSC paper to closely to be of practical use for
>  someone just wanting to know how it works, without [too] gritty
>  details)

i think there should be something here about HFSC.  having too much
information in a man page is better than none at all imo.

> - some friendlier format with man export (asciidoc ? yodl ?
>  reText ? etc. - not sure what's it the policy regarding iproute2)
>  as otherwise it will be quite a burden to maintain / update.

that's orthogonal to the patch i think.  all the iproute2 man pages
are written in roff.  let's keep that sep from getting HFSC
documentation into the tree.

> If my memory serves right, one of the corner cases described in it might
> have been fixed by a patch I submitted a good few months ago, though I'll
> have to read it to be sure. And there're few other small long-overdue
> patches I'm sitting on (they won't change anything algorithmically, but at
> least one could be noted in the text - though that can be changed any time
> in the future, and if the patches get accepted at all of course).
>
> Another thing was minor kernel patch to stab, and the docs assumed it was
> added (not sure if you corrected it).

if you want to post updates to the content, i can take care of the
*roff formatting.
-mike

^ permalink raw reply

* Re: [PATCH 2/2] net/smsc911x: Add regulator support
From: Mark Brown @ 2011-10-26 10:17 UTC (permalink / raw)
  To: Linus Walleij
  Cc: netdev@vger.kernel.org, Steve Glendinning, Mathieu Poirer,
	Robert MARKLUND, Linus Walleij
In-Reply-To: <4EA7D1F5.2030904@stericsson.com>

On Wed, Oct 26, 2011 at 11:25:09AM +0200, Linus Walleij wrote:
> On 10/26/2011 10:38 AM, Mark Brown wrote:

> >This has the same issue as last time - if you've got conditional code
> >like this in the body of the driver something is going wrong.  Unless
> >the supply is genuinely optional and might not be physically present on
> >some systems the driver should fail if it can't get it.  The regulator
> >API will stub itself out when not in use.

> That solves the issue for platforms with no regulator
> support at all.

No, it solves the problem for all platforms.

> Then we have platforms with regulator support, but no
> regulator for this hardware, because that one happens
> to be always-on in these systems.

Right, this is extremely common and is exactly what the fixed voltage
regulator is there for - if you've got an always on regulator in your
system define a fixed voltage regulator to represent it.

> Shall we have CONFIG_SMC911X select
> REGULATOR_DUMMY?

No, nothing should be selecting that.  Users can enable it for their
systems if they want it.

^ permalink raw reply

* Re: [PATCH 2/2] net/smsc911x: Add regulator support
From: Linus Walleij @ 2011-10-26 10:44 UTC (permalink / raw)
  To: Mark Brown
  Cc: netdev@vger.kernel.org, Steve Glendinning, Mathieu Poirer,
	Robert MARKLUND, Linus Walleij
In-Reply-To: <20111026101750.GA2921@opensource.wolfsonmicro.com>

On 10/26/2011 12:17 PM, Mark Brown wrote:
>> Then we have platforms with regulator support, but no
>> regulator for this hardware, because that one happens
>> to be always-on in these systems.
>>      
> Right, this is extremely common and is exactly what the fixed voltage
> regulator is there for - if you've got an always on regulator in your
> system define a fixed voltage regulator to represent it.
>    

Aha you mean I need to go into the platform/board files
for all systems using the smsc911x and add a fixed voltage
regulator?

OK that can't be too hard...

Yours,
Linus Walleij

^ permalink raw reply

* RE: [PATCH 2/2] net/smsc911x: Add regulator support
From: Robert MARKLUND @ 2011-10-26 10:57 UTC (permalink / raw)
  To: Mark Brown, Linus WALLEIJ
  Cc: netdev@vger.kernel.org, Steve Glendinning, Mathieu Poirer,
	Linus Walleij
In-Reply-To: <20111026083838.GA9157@opensource.wolfsonmicro.com>

> -----Original Message-----
> From: Mark Brown [mailto:broonie@opensource.wolfsonmicro.com]
> Sent: den 26 oktober 2011 10:39
> To: Linus WALLEIJ
> Cc: netdev@vger.kernel.org; Steve Glendinning; Mathieu Poirer; Robert MARKLUND; Linus Walleij
> Subject: Re: [PATCH 2/2] net/smsc911x: Add regulator support
> 
> On Wed, Oct 26, 2011 at 10:05:56AM +0200, Linus Walleij wrote:
> 
> > +	/* enable/disable regulator for vddvario */
> > +	if (pdata->regulator_vddvario) {
> 
> This has the same issue as last time - if you've got conditional code
> like this in the body of the driver something is going wrong.  Unless
> the supply is genuinely optional and might not be physically present on
> some systems the driver should fail if it can't get it.  The regulator
> API will stub itself out when not in use.

My mistake I didn't remember to remove them.
If you review the rest of the code you see that the driver will not start if we can't get the regulators.
So that code will never run in the case where pdata->regulator_vddvario == 0.

So this will leave all the platforms using this driver and have full constraints to fail to start this driver.
If they don't create dummy regulators.

Just so we are on the clear with that.

/R

^ permalink raw reply

* [PATCH 2/2 v2] net/smsc911x: Add regulator support
From: Linus Walleij @ 2011-10-26 11:05 UTC (permalink / raw)
  To: netdev, Steve Glendinning
  Cc: Mathieu Poirer, Robert Marklund, Mark Brown, Linus Walleij

From: Robert Marklund <robert.marklund@stericsson.com>

Add some basic regulator support for the power pins, as needed
by the ST-Ericsson Snowball platform that powers up the SMSC911
chip using an external regulator.

Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Robert Marklund <robert.marklund@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
ChangeLog v1->v2:
- Don't check for NULL regulators and error out properly if the
  regulators can't be found. All platforms using the smsc911x
  and the regulator framework simultaneously need to provide some
  kind of regulator for it.
---
 drivers/net/ethernet/smsc/smsc911x.c |  123 +++++++++++++++++++++++++++++++---
 1 files changed, 112 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8843071..70fb695 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -44,6 +44,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/bug.h>
@@ -138,6 +139,10 @@ struct smsc911x_data {
 
 	/* register access functions */
 	const struct smsc911x_ops *ops;
+
+	/* regulators */
+	struct regulator *regulator_vddvario;
+	struct regulator *regulator_vdd33a;
 };
 
 /* Easy access to information */
@@ -362,6 +367,81 @@ out:
 	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
+/*
+ * Enable or disable resources, currently just regulators.
+ */
+static int smsc911x_enable_disable_resources(struct platform_device *pdev,
+					     bool enable)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct smsc911x_data *pdata = netdev_priv(ndev);
+	int err = 0;
+
+	/* enable/disable regulator for vddvario */
+	if (enable) {
+		err = regulator_enable(pdata->regulator_vddvario);
+		if (err < 0) {
+			netdev_err(ndev, "regulator_enable failed for "
+				   "vddvario");
+		}
+	} else
+		err = regulator_disable(pdata->regulator_vddvario);
+
+	/* enable/disable regulator for vdd33a */
+	if (enable) {
+		err = regulator_enable(pdata->regulator_vdd33a);
+		if (err < 0) {
+			netdev_err(ndev, "regulator_enable failed for "
+				   "vdd33a");
+		}
+	} else
+		err = regulator_disable(pdata->regulator_vdd33a);
+
+	return err;
+}
+
+/*
+ * Request or free resources, currently just regulators.
+ *
+ * The SMSC911x has two power pins: vddvario and vdd33a, in designs where
+ * these are not always-on we need to request regulators to be turned on
+ * before we can try to access the device registers.
+ */
+static int smsc911x_request_free_resources(struct platform_device *pdev,
+		bool request)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct smsc911x_data *pdata = netdev_priv(ndev);
+	int err = 0;
+
+	/* Request regulator for vddvario */
+	if (request) {
+		pdata->regulator_vddvario = regulator_get(&pdev->dev,
+				"vddvario");
+		if (IS_ERR(pdata->regulator_vddvario)) {
+			netdev_err(ndev, "Failed to get regulator vddvario\n");
+			err = PTR_ERR(pdata->regulator_vddvario);
+		}
+	} else {
+		regulator_put(pdata->regulator_vddvario);
+		pdata->regulator_vddvario = NULL;
+	}
+
+	/* Request regulator for vdd33a */
+	if (request) {
+		pdata->regulator_vdd33a = regulator_get(&pdev->dev, "vdd33a");
+		if (IS_ERR(pdata->regulator_vdd33a)) {
+			netdev_err(ndev, "Failed to get regulator vdd33a\n");
+			err = PTR_ERR(pdata->regulator_vdd33a);
+		}
+	} else {
+		regulator_put(pdata->regulator_vdd33a);
+		pdata->regulator_vdd33a = NULL;
+	}
+
+	return err;
+}
+
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read
  * and smsc911x_mac_write, so assumes mac_lock is held */
 static int smsc911x_mac_complete(struct smsc911x_data *pdata)
@@ -2065,6 +2145,7 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 	struct net_device *dev;
 	struct smsc911x_data *pdata;
 	struct resource *res;
+	int retval;
 
 	dev = platform_get_drvdata(pdev);
 	BUG_ON(!dev);
@@ -2092,6 +2173,12 @@ static int __devexit smsc911x_drv_remove(struct platform_device *pdev)
 
 	iounmap(pdata->ioaddr);
 
+	if (smsc911x_enable_disable_resources(pdev, false))
+		pr_warn("Could not disable resource\n");
+
+	retval = smsc911x_request_free_resources(pdev, false);
+	/* ignore not all have regulators */
+
 	free_netdev(dev);
 
 	return 0;
@@ -2218,10 +2305,24 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	pdata->dev = dev;
 	pdata->msg_enable = ((1 << debug) - 1);
 
+	platform_set_drvdata(pdev, dev);
+
+	retval = smsc911x_request_free_resources(pdev, true);
+	if (retval) {
+		pr_err("Could request regulators needed aborting\n");
+		goto out_return_resources;
+	}
+
+	retval = smsc911x_enable_disable_resources(pdev, true);
+	if (retval) {
+		pr_err("Could enable regulators needed aborting\n");
+		goto out_disable_resources;
+	}
+
 	if (pdata->ioaddr == NULL) {
 		SMSC_WARN(pdata, probe, "Error smsc911x base address invalid");
 		retval = -ENOMEM;
-		goto out_free_netdev_2;
+		goto out_disable_resources;
 	}
 
 	retval = smsc911x_probe_config_dt(&pdata->config, np);
@@ -2233,7 +2334,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 	if (retval) {
 		SMSC_WARN(pdata, probe, "Error smsc911x config not found");
-		goto out_unmap_io_3;
+		goto out_disable_resources;
 	}
 
 	/* assume standard, non-shifted, access to HW registers */
@@ -2244,7 +2345,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 	retval = smsc911x_init(dev);
 	if (retval < 0)
-		goto out_unmap_io_3;
+		goto out_disable_resources;
 
 	/* configure irq polarity and type before connecting isr */
 	if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
@@ -2264,15 +2365,13 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 	if (retval) {
 		SMSC_WARN(pdata, probe,
 			  "Unable to claim requested irq: %d", dev->irq);
-		goto out_unmap_io_3;
+		goto out_free_irq;
 	}
 
-	platform_set_drvdata(pdev, dev);
-
 	retval = register_netdev(dev);
 	if (retval) {
 		SMSC_WARN(pdata, probe, "Error %i registering device", retval);
-		goto out_unset_drvdata_4;
+		goto out_free_irq;
 	} else {
 		SMSC_TRACE(pdata, probe,
 			   "Network interface: \"%s\"", dev->name);
@@ -2321,12 +2420,14 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev)
 
 out_unregister_netdev_5:
 	unregister_netdev(dev);
-out_unset_drvdata_4:
-	platform_set_drvdata(pdev, NULL);
+out_free_irq:
 	free_irq(dev->irq, dev);
-out_unmap_io_3:
+out_disable_resources:
+	(void)smsc911x_enable_disable_resources(pdev, false);
+out_return_resources:
+	(void)smsc911x_request_free_resources(pdev, false);
+	platform_set_drvdata(pdev, NULL);
 	iounmap(pdata->ioaddr);
-out_free_netdev_2:
 	free_netdev(dev);
 out_release_io_1:
 	release_mem_region(res->start, resource_size(res));
-- 
1.7.3.2

^ permalink raw reply related

* [linux-firmware v5 4/4] rtl_nic: add new firmware for RTL8402
From: Hayes Wang @ 2011-10-26 12:45 UTC (permalink / raw)
  To: dwmw2, ben; +Cc: romieu, netdev, Hayes Wang
In-Reply-To: <1319633135-25799-1-git-send-email-hayeswang@realtek.com>

Add new firmware:
1. rtl_nic/rtl8402-1.fw
   version: 0.0.1

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
---
 WHENCE               |    3 +++
 rtl_nic/rtl8402-1.fw |  Bin 0 -> 1824 bytes
 2 files changed, 3 insertions(+), 0 deletions(-)
 create mode 100644 rtl_nic/rtl8402-1.fw

diff --git a/WHENCE b/WHENCE
index 3d4dc0a..555008e 100644
--- a/WHENCE
+++ b/WHENCE
@@ -1673,6 +1673,9 @@ Version: 0.0.3
 File: rtl_nic/rtl8411-1.fw
 Version: 0.0.1
 
+File: rtl_nic/rtl8402-1.fw
+Version: 0.0.1
+
 Licence:
  * Copyright © 2011, Realtek Semiconductor Corporation
  *
diff --git a/rtl_nic/rtl8402-1.fw b/rtl_nic/rtl8402-1.fw
new file mode 100644
index 0000000000000000000000000000000000000000..82fa35d70994dc679a481dc946d8985cd2f7bb3e
GIT binary patch
literal 1824
zcmZ{lU5J!r6vv<WSa+gg*9+0Y<IcrGYoWd~*cx7qq_P+G0SOrfGv$VwTP*9c6j+_i
zAV`hG5+P~I(Tj|X%pw!qyPyx$$Uy93y6Yk;v5O#S`u*p9Uv_w92hN=5eV%j9|9?K7
z_YEP0sp*LaR!7wpm3p)?T3IPqqE*$=Rh0@y-Cu+M)434Nh3B_TPt+&3?VjBITwN~p
zXic=bK0dW`=9#H-HCkI8U3-5u9B2*8uTOu)FyxZS$-6u(SnA=3d3XugndlwnN5{;E
z_VBzZ);V-=^Y$z8UVja}9_g_ge2nJ?g8%4tbcki>ar1xc=7o>Uw`?_E7MYKuZ=N*2
z_KW!<>=u1xzVNL1uFK{FJP&-%`jYvbtQUM{o)ecX=0)iz&F?0L_HxNykv+wI5xI^W
zt;Ad#@GiioJU+*#a2mXZc@4Y8=r;WBAkKc|I6TGso}Ro^he74P&pcvX#^*xjT=lv5
zvw0rh|2)Qa_R6vsw~T$BGu5l}s`)v1bnE(%c{O7v+!8TV*Y8{AXRZ>z6Wf=u4=*E6
zo8O+qnAIK}B*!i#e5j@3J7=n2kyygc?|@rRpHiHO3<u>_cxMhv{rtM8pNGI!KTrK&
z{sP<%pl8s#;5v<N!sDv+cf^6$UvB<BJ|83E)h2#6?^2R?39jKaxH2E=?Nf<QQ+f9E
zw5c9GG1tEqSVgeJMLoF*rm&jI<qPts4uiy2Z-+Kh*SdKh^XaNJbD%l>7OeDH`3-04
zQY2Q`JO8W*LwytOEVioId@<Uxg-35cy1jjNexL3+Jyb1)5gs;A?`8Kca@Knf2dGg8
z4!Vb1$uV$8wxbJ&StoOJoO-RGUazC?L#v0&(aYb(CKGR%J>izYExsAgY3Whool0hw
zbq=4Io*CP7&HRrJv55IAzBPQ;jF~T4Z~o1?IG*-uNj=mb&0nhf-=c><ns36t?~wU}
zO?Z-r=3n<-F&p^5N6e9paXdYbke{B@xY_r~Z(yewss0_Tb-a7-1!H6KK4}sw52rkQ
z$B@I|r1sJUxTZbsXZ<H}JILFKo%Uq67_IloKL_KyFN1Z3d<H&X=6<4n<mXq+zob_A
z51D)LyLT5%t@mMbgEQ&tXyvY6wBeWL_y)EY!A{R+IlGbJdmB7T8)6Qh!eI%u{e*Ze
z^fC0_ByYzzy`yPA7UDYshVW{eZ+#!Dre|8_N0J!jM{b>=>`b1(Rx|nc`M9V0PKEqZ
KkMyn5n&1B#>OCI-

literal 0
HcmV?d00001

-- 
1.7.6.2

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox