Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 5/9] bnx2x: Add support for BCM84834
From: Yaniv Rosner @ 2012-11-27 13:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1354023996-3512-1-git-send-email-yanivr@broadcom.com>

Add support for the 10G-baseT PHY - BCM84834, which is the quad-port version of
the dual-port BCM84833.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h  |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |   81 +++++++++++++++++----
 2 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index a1a3ff4..1504e0a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -695,6 +695,7 @@ struct port_hw_cfg {		    /* port 0: 0x12c  port 1: 0x2bc */
 		#define PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BCM54618SE    0x00000e00
 		#define PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BCM8722       0x00000f00
 		#define PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BCM54616      0x00001000
+		#define PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BCM84834      0x00001100
 		#define PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_FAILURE       0x0000fd00
 		#define PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_NOT_CONN      0x0000ff00
 
@@ -751,6 +752,7 @@ struct port_hw_cfg {		    /* port 0: 0x12c  port 1: 0x2bc */
 		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE     0x00000e00
 		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8722        0x00000f00
 		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54616       0x00001000
+		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834       0x00001100
 		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT_WC      0x0000fc00
 		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE        0x0000fd00
 		#define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN       0x0000ff00
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index a5fe2b9..f5a310e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -9519,7 +9519,8 @@ static void bnx2x_save_848xx_spirom_version(struct bnx2x_phy *phy,
 {
 	u16 val, fw_ver1, fw_ver2, cnt;
 
-	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+	if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
+	    (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) {
 		bnx2x_cl45_read(bp, phy, MDIO_CTL_DEVAD, 0x400f, &fw_ver1);
 		bnx2x_save_spirom_version(bp, port, fw_ver1 & 0xfff,
 				phy->ver_addr);
@@ -9619,7 +9620,8 @@ static void bnx2x_848xx_set_led(struct bnx2x *bp,
 			MDIO_PMA_REG_84823_CTL_SLOW_CLK_CNT_HIGH,
 			MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ);
 
-	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833)
+	if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
+	    (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834))
 		offset = MDIO_PMA_REG_84833_CTL_LED_CTL_1;
 	else
 		offset = MDIO_PMA_REG_84823_CTL_LED_CTL_1;
@@ -9643,7 +9645,8 @@ static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	switch (action) {
 	case PHY_INIT:
-		if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+		if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) &&
+		    (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) {
 			/* Save spirom version */
 			bnx2x_save_848xx_spirom_version(phy, bp, params->port);
 		}
@@ -9763,10 +9766,11 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy,
 	if (phy->req_duplex == DUPLEX_FULL)
 		autoneg_val |= (1<<8);
 
-	/* Always write this if this is not 84833.
-	 * For 84833, write it only when it's a forced speed.
+	/* Always write this if this is not 84833/4.
+	 * For 84833/4, write it only when it's a forced speed.
 	 */
-	if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
+	if (((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) &&
+	     (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) ||
 		((autoneg_val & (1<<12)) == 0))
 		bnx2x_cl45_write(bp, phy,
 			 MDIO_AN_DEVAD,
@@ -10049,7 +10053,8 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 
 	/* Wait for GPHY to come out of reset */
 	msleep(50);
-	if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+	if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) &&
+	    (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) {
 		/* BCM84823 requires that XGXS links up first @ 10G for normal
 		 * behavior.
 		 */
@@ -10105,7 +10110,8 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 	DP(NETIF_MSG_LINK, "Multi_phy config = 0x%x, Media control = 0x%x\n",
 		   params->multi_phy_config, val);
 
-	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+	if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
+	    (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) {
 		bnx2x_84833_pair_swap_cfg(phy, params, vars);
 
 		/* Keep AutogrEEEn disabled. */
@@ -10169,7 +10175,8 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 		vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK;
 	}
 
-	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) {
+	if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
+	    (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) {
 		/* Bring PHY out of super isolate mode as the final step. */
 		bnx2x_cl45_read(bp, phy,
 				MDIO_CTL_DEVAD,
@@ -11607,6 +11614,40 @@ static struct bnx2x_phy phy_84833 = {
 	.phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
 };
 
+static const struct bnx2x_phy phy_84834 = {
+	.type		= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834,
+	.addr		= 0xff,
+	.def_md_devad	= 0,
+	.flags		= FLAGS_FAN_FAILURE_DET_REQ |
+			    FLAGS_REARM_LATCH_SIGNAL,
+	.rx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
+	.tx_preemphasis	= {0xffff, 0xffff, 0xffff, 0xffff},
+	.mdio_ctrl	= 0,
+	.supported	= (SUPPORTED_100baseT_Half |
+			   SUPPORTED_100baseT_Full |
+			   SUPPORTED_1000baseT_Full |
+			   SUPPORTED_10000baseT_Full |
+			   SUPPORTED_TP |
+			   SUPPORTED_Autoneg |
+			   SUPPORTED_Pause |
+			   SUPPORTED_Asym_Pause),
+	.media_type	= ETH_PHY_BASE_T,
+	.ver_addr	= 0,
+	.req_flow_ctrl	= 0,
+	.req_line_speed	= 0,
+	.speed_cap_mask	= 0,
+	.req_duplex	= 0,
+	.rsrv		= 0,
+	.config_init	= (config_init_t)bnx2x_848x3_config_init,
+	.read_status	= (read_status_t)bnx2x_848xx_read_status,
+	.link_reset	= (link_reset_t)bnx2x_848x3_link_reset,
+	.config_loopback = (config_loopback_t)NULL,
+	.format_fw_ver	= (format_fw_ver_t)bnx2x_848xx_format_ver,
+	.hw_reset	= (hw_reset_t)bnx2x_84833_hw_reset_phy,
+	.set_link_led	= (set_link_led_t)bnx2x_848xx_set_link_led,
+	.phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func
+};
+
 static struct bnx2x_phy phy_54618se = {
 	.type		= PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE,
 	.addr		= 0xff,
@@ -11888,6 +11929,9 @@ static int bnx2x_populate_ext_phy(struct bnx2x *bp,
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833:
 		*phy = phy_84833;
 		break;
+	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834:
+		*phy = phy_84834;
+		break;
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54616:
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE:
 		*phy = phy_54618se;
@@ -11944,9 +11988,10 @@ static int bnx2x_populate_ext_phy(struct bnx2x *bp,
 	}
 	phy->mdio_ctrl = bnx2x_get_emac_base(bp, mdc_mdio_access, port);
 
-	if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) &&
+	if (((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
+	     (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834)) &&
 	    (phy->ver_addr)) {
-		/* Remove 100Mb link supported for BCM84833 when phy fw
+		/* Remove 100Mb link supported for BCM84833/4 when phy fw
 		 * version lower than or equal to 1.39
 		 */
 		u32 raw_ver = REG_RD(bp, phy->ver_addr);
@@ -13015,7 +13060,8 @@ static int bnx2x_84833_common_init_phy(struct bnx2x *bp,
 }
 
 static int bnx2x_84833_pre_init_phy(struct bnx2x *bp,
-					       struct bnx2x_phy *phy)
+				    struct bnx2x_phy *phy,
+				    u8 port)
 {
 	u16 val, cnt;
 	/* Wait for FW completing its initialization. */
@@ -13042,26 +13088,28 @@ static int bnx2x_84833_pre_init_phy(struct bnx2x *bp,
 			 MDIO_84833_TOP_CFG_XGPHY_STRAP1, val);
 
 	/* Save spirom version */
-	bnx2x_save_848xx_spirom_version(phy, bp, PORT_0);
+	bnx2x_save_848xx_spirom_version(phy, bp, port);
 	return 0;
 }
 
 int bnx2x_pre_init_phy(struct bnx2x *bp,
 				  u32 shmem_base,
 				  u32 shmem2_base,
-				  u32 chip_id)
+				  u32 chip_id,
+				  u8 port)
 {
 	int rc = 0;
 	struct bnx2x_phy phy;
 	if (bnx2x_populate_phy(bp, EXT_PHY1, shmem_base, shmem2_base,
-			       PORT_0, &phy)) {
+			       port, &phy) != 0) {
 		DP(NETIF_MSG_LINK, "populate_phy failed\n");
 		return -EINVAL;
 	}
 	bnx2x_set_mdio_clk(bp, chip_id, phy.mdio_ctrl);
 	switch (phy.type) {
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833:
-		rc = bnx2x_84833_pre_init_phy(bp, &phy);
+	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834:
+		rc = bnx2x_84833_pre_init_phy(bp, &phy, port);
 		break;
 	default:
 		break;
@@ -13098,6 +13146,7 @@ static int bnx2x_ext_phy_common_init(struct bnx2x *bp, u32 shmem_base_path[],
 						phy_index, chip_id);
 		break;
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833:
+	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84834:
 		/* GPIO3's are linked, and so both need to be toggled
 		 * to obtain required 2us pulse.
 		 */
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 4/9] bnx2x: Fix SFP+ current leakage
From: Yaniv Rosner @ 2012-11-27 13:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1354023996-3512-1-git-send-email-yanivr@broadcom.com>

Per measurements, the SFP+ suffered from small current leakage in two cases:
 - When no module was plugged and TX laser was disabled. The fix was to enable
   it, and when module is plugged in, check if it needs to be disabled.
 - When over-current event occurs due to invalid SFP+ module, the HW basically
   shuts down the current for this module, but the SW needs to complete this
   by issuing a power down via a GPIO.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |   91 +++++++++++-----------
 1 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index e054921..a5fe2b9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -4414,6 +4414,27 @@ static void bnx2x_warpcore_config_sfi(struct bnx2x_phy *phy,
 	}
 }
 
+static void bnx2x_sfp_e3_set_transmitter(struct link_params *params,
+					 struct bnx2x_phy *phy,
+					 u8 tx_en)
+{
+	struct bnx2x *bp = params->bp;
+	u32 cfg_pin;
+	u8 port = params->port;
+
+	cfg_pin = REG_RD(bp, params->shmem_base +
+			 offsetof(struct shmem_region,
+				  dev_info.port_hw_config[port].e3_sfp_ctrl)) &
+		PORT_HW_CFG_E3_TX_LASER_MASK;
+	/* Set the !tx_en since this pin is DISABLE_TX_LASER */
+	DP(NETIF_MSG_LINK, "Setting WC TX to %d\n", tx_en);
+
+	/* For 20G, the expected pin to be used is 3 pins after the current */
+	bnx2x_set_cfg_pin(bp, cfg_pin, tx_en ^ 1);
+	if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)
+		bnx2x_set_cfg_pin(bp, cfg_pin + 3, tx_en ^ 1);
+}
+
 static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 				       struct link_params *params,
 				       struct link_vars *vars)
@@ -4474,9 +4495,14 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 			break;
 
 		case PORT_HW_CFG_NET_SERDES_IF_SFI:
-			/* Issue Module detection */
+			/* Issue Module detection if module is plugged, or
+			 * enabled transmitter to avoid current leakage in case
+			 * no module is connected
+			 */
 			if (bnx2x_is_sfp_module_plugged(phy, params))
 				bnx2x_sfp_module_detection(phy, params);
+			else
+				bnx2x_sfp_e3_set_transmitter(params, phy, 1);
 
 			bnx2x_warpcore_config_sfi(phy, params);
 			break;
@@ -4513,27 +4539,6 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 	DP(NETIF_MSG_LINK, "Exit config init\n");
 }
 
-static void bnx2x_sfp_e3_set_transmitter(struct link_params *params,
-					 struct bnx2x_phy *phy,
-					 u8 tx_en)
-{
-	struct bnx2x *bp = params->bp;
-	u32 cfg_pin;
-	u8 port = params->port;
-
-	cfg_pin = REG_RD(bp, params->shmem_base +
-				offsetof(struct shmem_region,
-				dev_info.port_hw_config[port].e3_sfp_ctrl)) &
-				PORT_HW_CFG_TX_LASER_MASK;
-	/* Set the !tx_en since this pin is DISABLE_TX_LASER */
-	DP(NETIF_MSG_LINK, "Setting WC TX to %d\n", tx_en);
-	/* For 20G, the expected pin to be used is 3 pins after the current */
-
-	bnx2x_set_cfg_pin(bp, cfg_pin, tx_en ^ 1);
-	if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)
-		bnx2x_set_cfg_pin(bp, cfg_pin + 3, tx_en ^ 1);
-}
-
 static void bnx2x_warpcore_link_reset(struct bnx2x_phy *phy,
 				      struct link_params *params)
 {
@@ -7833,7 +7838,6 @@ static int bnx2x_8726_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 }
 
 static void bnx2x_warpcore_power_module(struct link_params *params,
-					struct bnx2x_phy *phy,
 					u8 power)
 {
 	u32 pin_cfg;
@@ -7875,10 +7879,10 @@ static int bnx2x_warpcore_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 	addr32 = addr & (~0x3);
 	do {
 		if ((!is_init) && (cnt == I2C_WA_PWR_ITER)) {
-			bnx2x_warpcore_power_module(params, phy, 0);
+			bnx2x_warpcore_power_module(params, 0);
 			/* Note that 100us are not enough here */
 			usleep_range(1000, 2000);
-			bnx2x_warpcore_power_module(params, phy, 1);
+			bnx2x_warpcore_power_module(params, 1);
 		}
 		rc = bnx2x_bsc_read(params, phy, 0xa0, addr32, 0, byte_cnt,
 				    data_array);
@@ -8464,7 +8468,7 @@ static void bnx2x_warpcore_hw_reset(struct bnx2x_phy *phy,
 				    struct link_params *params)
 {
 	struct bnx2x *bp = params->bp;
-	bnx2x_warpcore_power_module(params, phy, 0);
+	bnx2x_warpcore_power_module(params, 0);
 	/* Put Warpcore in low power mode */
 	REG_WR(bp, MISC_REG_WC0_RESET, 0x0c0e);
 
@@ -8487,7 +8491,7 @@ static void bnx2x_power_sfp_module(struct link_params *params,
 		bnx2x_8727_power_module(params->bp, phy, power);
 		break;
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT:
-		bnx2x_warpcore_power_module(params, phy, power);
+		bnx2x_warpcore_power_module(params, power);
 		break;
 	default:
 		break;
@@ -8560,7 +8564,8 @@ int bnx2x_sfp_module_detection(struct bnx2x_phy *phy,
 	u32 val = REG_RD(bp, params->shmem_base +
 			     offsetof(struct shmem_region, dev_info.
 				     port_feature_config[params->port].config));
-
+	/* Enabled transmitter by default */
+	bnx2x_sfp_set_transmitter(params, phy, 1);
 	DP(NETIF_MSG_LINK, "SFP+ module plugged in/out detected on port %d\n",
 		 params->port);
 	/* Power up module */
@@ -8593,14 +8598,12 @@ int bnx2x_sfp_module_detection(struct bnx2x_phy *phy,
 	 */
 	bnx2x_set_limiting_mode(params, phy, edc_mode);
 
-	/* Enable transmit for this module if the module is approved, or
-	 * if unapproved modules should also enable the Tx laser
+	/* Disable transmit for this module if the module is not approved, and
+	 * laser needs to be disabled.
 	 */
-	if (rc == 0 ||
-	    (val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) !=
-	    PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER)
-		bnx2x_sfp_set_transmitter(params, phy, 1);
-	else
+	if ((rc) &&
+	    ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) ==
+	     PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER))
 		bnx2x_sfp_set_transmitter(params, phy, 0);
 
 	return rc;
@@ -8612,11 +8615,13 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
 	struct bnx2x_phy *phy;
 	u32 gpio_val;
 	u8 gpio_num, gpio_port;
-	if (CHIP_IS_E3(bp))
+	if (CHIP_IS_E3(bp)) {
 		phy = &params->phy[INT_PHY];
-	else
+		/* Always enable TX laser,will be disabled in case of fault */
+		bnx2x_sfp_set_transmitter(params, phy, 1);
+	} else {
 		phy = &params->phy[EXT_PHY1];
-
+	}
 	if (bnx2x_get_mod_abs_int_cfg(bp, params->chip_id, params->shmem_base,
 				      params->port, &gpio_num, &gpio_port) ==
 	    -EINVAL) {
@@ -8661,10 +8666,6 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
 			DP(NETIF_MSG_LINK, "SFP+ module is not initialized\n");
 		}
 	} else {
-		u32 val = REG_RD(bp, params->shmem_base +
-				 offsetof(struct shmem_region, dev_info.
-					  port_feature_config[params->port].
-					  config));
 		bnx2x_set_gpio_int(bp, gpio_num,
 				   MISC_REGISTERS_GPIO_INT_OUTPUT_SET,
 				   gpio_port);
@@ -8672,10 +8673,6 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
 		 * Disable transmit for this module
 		 */
 		phy->media_type = ETH_PHY_NOT_PRESENT;
-		if (((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) ==
-		     PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER) ||
-		    CHIP_IS_E3(bp))
-			bnx2x_sfp_set_transmitter(params, phy, 0);
 	}
 }
 
@@ -9415,6 +9412,7 @@ static u8 bnx2x_8727_read_status(struct bnx2x_phy *phy,
 			bnx2x_cl45_read(bp, phy,
 				MDIO_PMA_DEVAD,
 				MDIO_PMA_LASI_RXSTAT, &rx_alarm_status);
+			bnx2x_8727_power_module(params->bp, phy, 0);
 			return 0;
 		}
 	} /* Over current check */
@@ -13194,6 +13192,7 @@ static void bnx2x_check_over_curr(struct link_params *params,
 					    " error.\n",
 			 params->port);
 			vars->phy_flags |= PHY_OVER_CURRENT_FLAG;
+			bnx2x_warpcore_power_module(params, 0);
 		}
 	} else
 		vars->phy_flags &= ~PHY_OVER_CURRENT_FLAG;
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 2/9] bnx2x: Add support for 20G-KR2
From: Yaniv Rosner @ 2012-11-27 13:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1354023996-3512-1-git-send-email-yanivr@broadcom.com>

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h  |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |  366 ++++++++++++++++++++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h  |   29 ++-
 4 files changed, 365 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index f478bed..525a9bc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -2167,6 +2167,8 @@ struct shmem2_region {
 	u32 reserved2;				/* Offset 0x148 */
 	u32 reserved3;				/* Offset 0x14C */
 	u32 reserved4;				/* Offset 0x150 */
+	u32 link_attr_sync[PORT_MAX];		/* Offset 0x154 */
+	#define LINK_ATTR_SYNC_KR2_ENABLE	(1<<0)
 };
 
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index c98da25..eb44b23 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -121,6 +121,7 @@
 #define	GP_STATUS_10G_XFI   MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_XFI
 #define	GP_STATUS_20G_DXGXS MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_DXGXS
 #define	GP_STATUS_10G_SFI   MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_SFI
+#define	GP_STATUS_20G_KR2 MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_KR2
 #define LINK_10THD		LINK_STATUS_SPEED_AND_DUPLEX_10THD
 #define LINK_10TFD		LINK_STATUS_SPEED_AND_DUPLEX_10TFD
 #define LINK_100TXHD		LINK_STATUS_SPEED_AND_DUPLEX_100TXHD
@@ -1664,7 +1665,10 @@ static void bnx2x_xmac_init(struct link_params *params, u32 max_speed)
 	 * ports of the path
 	 */
 
-	if ((CHIP_NUM(bp) == CHIP_NUM_57840_4_10) &&
+	if (((CHIP_NUM(bp) == CHIP_NUM_57840_4_10) ||
+	     (CHIP_NUM(bp) == CHIP_NUM_57840_2_20) ||
+	     (CHIP_NUM(bp) == CHIP_NUM_57840_OBSOLETE)) &&
+	    is_port4mode &&
 	    (REG_RD(bp, MISC_REG_RESET_REG_2) &
 	     MISC_REGISTERS_RESET_REG_2_XMAC)) {
 		DP(NETIF_MSG_LINK,
@@ -1760,6 +1764,18 @@ static int bnx2x_xmac_enable(struct link_params *params,
 	 */
 	REG_WR(bp, NIG_REG_EGRESS_EMAC0_PORT + params->port*4, 0);
 
+	/* When XMAC is in XLGMII mode, disable sending idles for fault
+	 * detection.
+	 */
+	if (!(params->phy[INT_PHY].flags & FLAGS_TX_ERROR_CHECK)) {
+		REG_WR(bp, xmac_base + XMAC_REG_RX_LSS_CTRL,
+		       (XMAC_RX_LSS_CTRL_REG_LOCAL_FAULT_DISABLE |
+			XMAC_RX_LSS_CTRL_REG_REMOTE_FAULT_DISABLE));
+		REG_WR(bp, xmac_base + XMAC_REG_CLEAR_RX_LSS_STATUS, 0);
+		REG_WR(bp, xmac_base + XMAC_REG_CLEAR_RX_LSS_STATUS,
+		       XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS |
+		       XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS);
+	}
 	/* Set Max packet size */
 	REG_WR(bp, xmac_base + XMAC_REG_RX_MAX_SIZE, 0x2710);
 
@@ -1780,6 +1796,12 @@ static int bnx2x_xmac_enable(struct link_params *params,
 	/* Enable TX and RX */
 	val = XMAC_CTRL_REG_TX_EN | XMAC_CTRL_REG_RX_EN;
 
+	/* Set MAC in XLGMII mode for dual-mode */
+	if ((vars->line_speed == SPEED_20000) &&
+	    (params->phy[INT_PHY].supported &
+	     SUPPORTED_20000baseKR2_Full))
+		val |= XMAC_CTRL_REG_XLGMII_ALIGN_ENB;
+
 	/* Check loopback mode */
 	if (lb)
 		val |= XMAC_CTRL_REG_LINE_LOCAL_LPBK;
@@ -2096,6 +2118,16 @@ static void bnx2x_update_mng(struct link_params *params, u32 link_status)
 			port_mb[params->port].link_status), link_status);
 }
 
+static void bnx2x_update_link_attr(struct link_params *params, u32 link_attr)
+{
+	struct bnx2x *bp = params->bp;
+
+	if (SHMEM2_HAS(bp, link_attr_sync))
+		REG_WR(bp, params->shmem2_base +
+		       offsetof(struct shmem2_region,
+				link_attr_sync[params->port]), link_attr);
+}
+
 static void bnx2x_update_pfc_nig(struct link_params *params,
 		struct link_vars *vars,
 		struct bnx2x_nig_brb_pfc_port_params *nig_params)
@@ -3147,6 +3179,15 @@ static void bnx2x_cl45_read_or_write(struct bnx2x *bp, struct bnx2x_phy *phy,
 	bnx2x_cl45_write(bp, phy, devad, reg, val | or_val);
 }
 
+static void bnx2x_cl45_read_and_write(struct bnx2x *bp,
+				      struct bnx2x_phy *phy,
+				      u8 devad, u16 reg, u16 and_val)
+{
+	u16 val;
+	bnx2x_cl45_read(bp, phy, devad, reg, &val);
+	bnx2x_cl45_write(bp, phy, devad, reg, val & and_val);
+}
+
 int bnx2x_phy_read(struct link_params *params, u8 phy_addr,
 		   u8 devad, u16 reg, u16 *ret_val)
 {
@@ -3551,6 +3592,44 @@ static u8 bnx2x_ext_phy_resolve_fc(struct bnx2x_phy *phy,
  * init configuration, and set/clear SGMII flag. Internal
  * phy init is done purely in phy_init stage.
  */
+static void bnx2x_warpcore_enable_AN_KR2(struct bnx2x_phy *phy,
+					 struct link_params *params,
+					 struct link_vars *vars)
+{
+	struct bnx2x *bp = params->bp;
+	u16 i;
+	static struct bnx2x_reg_set reg_set[] = {
+		/* Step 1 - Program the TX/RX alignment markers */
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0xa157},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL7, 0xcbe2},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL6, 0x7537},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL9, 0xa157},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL11, 0xcbe2},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL10, 0x7537},
+		/* Step 2 - Configure the NP registers */
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_USERB0_CTRL, 0x000a},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL1, 0x6400},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL3, 0x0620},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CODE_FIELD, 0x0157},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI1, 0x6464},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI2, 0x3150},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI3, 0x3150},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0157},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0620}
+	};
+	DP(NETIF_MSG_LINK, "Enabling 20G-KR2\n");
+
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_CL49_USERB0_CTRL, (3<<6));
+
+	for (i = 0; i < sizeof(reg_set)/sizeof(struct bnx2x_reg_set); i++)
+		bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg,
+				 reg_set[i].val);
+
+	/* Start KR2 work-around timer which handles BCM8073 link-parner */
+	vars->link_attr_sync |= LINK_ATTR_SYNC_KR2_ENABLE;
+	bnx2x_update_link_attr(params, vars->link_attr_sync);
+}
 
 static void bnx2x_warpcore_set_lpi_passthrough(struct bnx2x_phy *phy,
 					       struct link_params *params)
@@ -3564,6 +3643,21 @@ static void bnx2x_warpcore_set_lpi_passthrough(struct bnx2x_phy *phy,
 				 MDIO_WC_REG_DIGITAL4_MISC5, 0xc000);
 }
 
+static void bnx2x_warpcore_restart_AN_KR(struct bnx2x_phy *phy,
+					 struct link_params *params)
+{
+	/* Restart autoneg on the leading lane only */
+	struct bnx2x *bp = params->bp;
+	u16 lane = bnx2x_get_warpcore_lane(phy, params);
+	CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
+			  MDIO_AER_BLOCK_AER_REG, lane);
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1200);
+
+	/* Restore AER */
+	bnx2x_set_aer_mmd(params, phy);
+}
+
 static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 					struct link_params *params,
 					struct link_vars *vars) {
@@ -3576,7 +3670,9 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 		{MDIO_WC_DEVAD, MDIO_WC_REG_RX66_CONTROL, 0x7415},
 		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_MISC2, 0x6190},
 		/* Disable Autoneg: re-enable it after adv is done. */
-		{MDIO_AN_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0}
+		{MDIO_AN_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0},
+		{MDIO_PMA_DEVAD, MDIO_WC_REG_PMD_KR_CONTROL, 0x2},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL72_USERB0_CL72_TX_FIR_TAP, 0},
 	};
 	DP(NETIF_MSG_LINK, "Enable Auto Negotiation for KR\n");
 	/* Set to default registers that may be overriden by 10G force */
@@ -3586,7 +3682,7 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 
 	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
 		MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, &cl72_ctrl);
-	cl72_ctrl &= 0xf8ff;
+	cl72_ctrl &= 0x08ff;
 	cl72_ctrl |= 0x3800;
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 		MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, cl72_ctrl);
@@ -3624,6 +3720,16 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 		     ((0x02 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) |
 		      (0x06 << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) |
 		      (0x09 << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET)));
+	/* Configure the next lane if dual mode */
+	if (phy->flags & FLAGS_WC_DUAL_MODE)
+		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_TX0_TX_DRIVER + 0x10*(lane+1),
+				 ((0x02 <<
+				 MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) |
+				  (0x06 <<
+				   MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) |
+				  (0x09 <<
+				MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET)));
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_CL72_USERB0_CL72_OS_DEF_CTRL,
 			 0x03f0);
@@ -3670,10 +3776,26 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			MDIO_WC_REG_DIGITAL3_UP1, 0x1f);
 
-	/* Enable Autoneg */
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1200);
+	if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
+	     (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) ||
+	    (phy->req_line_speed == SPEED_20000)) {
+
+		CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
+				  MDIO_AER_BLOCK_AER_REG, lane);
+
+		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+					 MDIO_WC_REG_RX1_PCI_CTRL + (0x10*lane),
+					 (1<<11));
+
+		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_XGXS_X2_CONTROL3, 0x7);
+		bnx2x_set_aer_mmd(params, phy);
 
+		bnx2x_warpcore_enable_AN_KR2(phy, params, vars);
+	}
+
+	/* Enable Autoneg: only on the main lane */
+	bnx2x_warpcore_restart_AN_KR(phy, params);
 }
 
 static void bnx2x_warpcore_set_10G_KR(struct bnx2x_phy *phy,
@@ -3692,9 +3814,7 @@ static void bnx2x_warpcore_set_10G_KR(struct bnx2x_phy *phy,
 		{MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL3_UP1, 0x1},
 		{MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL5_MISC7, 0xa},
 		/* Leave cl72 training enable, needed for KR */
-		{MDIO_PMA_DEVAD,
-		MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150,
-		0x2}
+		{MDIO_PMA_DEVAD, MDIO_WC_REG_PMD_KR_CONTROL, 0x2}
 	};
 
 	for (i = 0; i < sizeof(reg_set)/sizeof(struct bnx2x_reg_set); i++)
@@ -3858,10 +3978,57 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy,
 			 MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0, (val & 0x7FFF));
 }
 
-static void bnx2x_warpcore_set_20G_KR2(struct bnx2x *bp,
-				       struct bnx2x_phy *phy)
+static void bnx2x_warpcore_set_20G_force_KR2(struct bnx2x_phy *phy,
+					     struct link_params *params)
 {
-	DP(NETIF_MSG_LINK, "KR2 still not supported !!!\n");
+	u16 val;
+	struct bnx2x *bp = params->bp;
+	/* Set global registers, so set AER lane to 0 */
+	CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
+			  MDIO_AER_BLOCK_AER_REG, 0);
+
+	/* Disable sequencer */
+	bnx2x_cl45_read_and_write(bp, phy, MDIO_WC_DEVAD,
+				  MDIO_WC_REG_XGXSBLK0_XGXSCONTROL, ~(1<<13));
+
+	bnx2x_set_aer_mmd(params, phy);
+
+	bnx2x_cl45_read_and_write(bp, phy, MDIO_PMA_DEVAD,
+				  MDIO_WC_REG_PMD_KR_CONTROL, ~(1<<1));
+	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+			 MDIO_AN_REG_CTRL, 0);
+	/* Turn off CL73 */
+	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
+			MDIO_WC_REG_CL73_USERB0_CTRL, &val);
+	val &= ~(1<<5);
+	val |= (1<<6);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_CL73_USERB0_CTRL, val);
+
+	/* Set 20G KR2 force speed */
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_SERDESDIGITAL_MISC1, 0x1f);
+
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_DIGITAL4_MISC3, (1<<7));
+
+	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
+			MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, &val);
+	val &= ~(3<<14);
+	val |= (1<<15);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, val);
+	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
+			 MDIO_WC_REG_CL72_USERB0_CL72_TX_FIR_TAP, 0x835A);
+
+	/* Enable sequencer (over lane 0) */
+	CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
+			  MDIO_AER_BLOCK_AER_REG, 0);
+
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_XGXSBLK0_XGXSCONTROL, (1<<13));
+
+	bnx2x_set_aer_mmd(params, phy);
 }
 
 static void bnx2x_warpcore_set_20G_DXGXS(struct bnx2x *bp,
@@ -4293,16 +4460,14 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 
 			bnx2x_sfp_module_detection(phy, params);
 			break;
-
 		case PORT_HW_CFG_NET_SERDES_IF_KR2:
-			if (vars->line_speed != SPEED_20000) {
-				DP(NETIF_MSG_LINK, "Speed not supported yet\n");
-				return;
+			if (!params->loopback_mode) {
+				bnx2x_warpcore_enable_AN_KR(phy, params, vars);
+			} else {
+				DP(NETIF_MSG_LINK, "Setting KR 20G-Force\n");
+				bnx2x_warpcore_set_20G_force_KR2(phy, params);
 			}
-			DP(NETIF_MSG_LINK, "Setting 20G KR2\n");
-			bnx2x_warpcore_set_20G_KR2(bp, phy);
 			break;
-
 		default:
 			DP(NETIF_MSG_LINK,
 			   "Unsupported Serdes Net Interface 0x%x\n",
@@ -4413,8 +4578,9 @@ static void bnx2x_set_warpcore_loopback(struct bnx2x_phy *phy,
 	DP(NETIF_MSG_LINK, "Setting Warpcore loopback type %x, speed %d\n",
 		       params->loopback_mode, phy->req_line_speed);
 
-	if (phy->req_line_speed < SPEED_10000) {
-		/* 10/100/1000 */
+	if (phy->req_line_speed < SPEED_10000 ||
+	    phy->supported & SUPPORTED_20000baseKR2_Full) {
+		/* 10/100/1000/20G-KR2 */
 
 		/* Update those 1-copy registers */
 		CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
@@ -4427,18 +4593,20 @@ static void bnx2x_set_warpcore_loopback(struct bnx2x_phy *phy,
 		lane = bnx2x_get_warpcore_lane(phy, params);
 		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
 				MDIO_WC_REG_XGXSBLK1_LANECTRL2, &val16);
+		val16 |= (1<<lane);
+		if (phy->flags & FLAGS_WC_DUAL_MODE)
+			val16 |= (2<<lane);
 		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 				MDIO_WC_REG_XGXSBLK1_LANECTRL2,
-				val16 | (1<<lane));
+				val16);
 
 		/* Switch back to 4-copy registers */
 		bnx2x_set_aer_mmd(params, phy);
 	} else {
-		/* 10G & 20G */
+		/* 10G / 20G-DXGXS */
 		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
 					 MDIO_WC_REG_COMBO_IEEE0_MIICTRL,
 					 0x4000);
-
 		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
 					 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1);
 	}
@@ -4603,6 +4771,10 @@ void bnx2x_link_status_update(struct link_params *params,
 		params->feature_config_flags &=
 					~FEATURE_CONFIG_PFC_ENABLED;
 
+	if (SHMEM2_HAS(bp, link_attr_sync))
+		vars->link_attr_sync = SHMEM2_RD(bp,
+						 link_attr_sync[params->port]);
+
 	DP(NETIF_MSG_LINK, "link_status 0x%x  phy_link_up %x int_mask 0x%x\n",
 		 vars->link_status, vars->phy_link_up, vars->aeu_int_mask);
 	DP(NETIF_MSG_LINK, "line_speed %x  duplex %x  flow_ctrl 0x%x\n",
@@ -5332,6 +5504,7 @@ static int bnx2x_get_link_speed_duplex(struct bnx2x_phy *phy,
 			vars->link_status |= LINK_10GTFD;
 			break;
 		case GP_STATUS_20G_DXGXS:
+		case GP_STATUS_20G_KR2:
 			vars->line_speed = SPEED_20000;
 			vars->link_status |= LINK_20GTFD;
 			break;
@@ -5439,7 +5612,15 @@ static int bnx2x_warpcore_read_status(struct bnx2x_phy *phy,
 	int rc = 0;
 	lane = bnx2x_get_warpcore_lane(phy, params);
 	/* Read gp_status */
-	if (phy->req_line_speed > SPEED_10000) {
+	if ((params->loopback_mode) &&
+	    (phy->flags & FLAGS_WC_DUAL_MODE)) {
+		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
+				MDIO_WC_REG_DIGITAL5_LINK_STATUS, &link_up);
+		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
+				MDIO_WC_REG_DIGITAL5_LINK_STATUS, &link_up);
+		link_up &= 0x1;
+	} else if ((phy->req_line_speed > SPEED_10000) &&
+		(phy->supported & SUPPORTED_20000baseMLD2_Full)) {
 		u16 temp_link_up;
 		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
 				1, &temp_link_up);
@@ -5452,12 +5633,22 @@ static int bnx2x_warpcore_read_status(struct bnx2x_phy *phy,
 			bnx2x_ext_phy_resolve_fc(phy, params, vars);
 	} else {
 		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_GP2_STATUS_GP_2_1, &gp_status1);
+				MDIO_WC_REG_GP2_STATUS_GP_2_1,
+				&gp_status1);
 		DP(NETIF_MSG_LINK, "0x81d1 = 0x%x\n", gp_status1);
-		/* Check for either KR or generic link up. */
-		gp_status1 = ((gp_status1 >> 8) & 0xf) |
-			((gp_status1 >> 12) & 0xf);
-		link_up = gp_status1 & (1 << lane);
+		/* Check for either KR, 1G, or AN up. */
+		link_up = ((gp_status1 >> 8) |
+			   (gp_status1 >> 12) |
+			   (gp_status1)) &
+			(1 << lane);
+		if (phy->supported & SUPPORTED_20000baseKR2_Full) {
+			u16 an_link;
+			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+					MDIO_AN_REG_STATUS, &an_link);
+			bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+					MDIO_AN_REG_STATUS, &an_link);
+			link_up |= (an_link & (1<<2));
+		}
 		if (link_up && SINGLE_MEDIA_DIRECT(params)) {
 			u16 pd, gp_status4;
 			if (phy->req_line_speed == SPEED_AUTO_NEG) {
@@ -5522,7 +5713,7 @@ static int bnx2x_warpcore_read_status(struct bnx2x_phy *phy,
 	if ((lane & 1) == 0)
 		gp_speed <<= 8;
 	gp_speed &= 0x3f00;
-
+	link_up = !!link_up;
 
 	rc = bnx2x_get_link_speed_duplex(phy, params, vars, link_up, gp_speed,
 					 duplex);
@@ -11564,9 +11755,11 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
 			phy->media_type = ETH_PHY_KR;
 			phy->flags |= FLAGS_WC_DUAL_MODE;
 			phy->supported &= (SUPPORTED_20000baseKR2_Full |
+					   SUPPORTED_Autoneg |
 					   SUPPORTED_FIBRE |
 					   SUPPORTED_Pause |
 					   SUPPORTED_Asym_Pause);
+			phy->flags &= ~FLAGS_TX_ERROR_CHECK;
 			break;
 		default:
 			DP(NETIF_MSG_LINK, "Unknown WC interface type 0x%x\n",
@@ -12018,13 +12211,17 @@ static void bnx2x_init_xgxs_loopback(struct link_params *params,
 				     struct link_vars *vars)
 {
 	struct bnx2x *bp = params->bp;
+	struct bnx2x_phy *int_phy = &params->phy[INT_PHY];
 		vars->link_up = 1;
 		vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
 		vars->duplex = DUPLEX_FULL;
 	if (params->req_line_speed[0] == SPEED_1000)
 			vars->line_speed = SPEED_1000;
+	else if ((params->req_line_speed[0] == SPEED_20000) ||
+		 (int_phy->flags & FLAGS_WC_DUAL_MODE))
+		vars->line_speed = SPEED_20000;
 	else
-			vars->line_speed = SPEED_10000;
+		vars->line_speed = SPEED_10000;
 
 	if (!USES_WARPCORE(bp))
 		bnx2x_xgxs_deassert(params);
@@ -13139,6 +13336,108 @@ static void bnx2x_sfp_tx_fault_detection(struct bnx2x_phy *phy,
 		}
 	}
 }
+static void bnx2x_disable_kr2(struct link_params *params,
+			      struct link_vars *vars,
+			      struct bnx2x_phy *phy)
+{
+	struct bnx2x *bp = params->bp;
+	int i;
+	static struct bnx2x_reg_set reg_set[] = {
+		/* Step 1 - Program the TX/RX alignment markers */
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0x7690},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL7, 0xe647},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL6, 0xc4f0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL9, 0x7690},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL11, 0xe647},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL10, 0xc4f0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_USERB0_CTRL, 0x000c},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL1, 0x6000},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL3, 0x0000},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CODE_FIELD, 0x0002},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI1, 0x0000},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI2, 0x0af7},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI3, 0x0af7},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0002},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0000}
+	};
+	DP(NETIF_MSG_LINK, "Disabling 20G-KR2\n");
+
+	for (i = 0; i < sizeof(reg_set)/sizeof(struct bnx2x_reg_set); i++)
+		bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg,
+				 reg_set[i].val);
+	vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE;
+	bnx2x_update_link_attr(params, vars->link_attr_sync);
+
+	/* Restart AN on leading lane */
+	bnx2x_warpcore_restart_AN_KR(phy, params);
+}
+
+static void bnx2x_kr2_recovery(struct link_params *params,
+			       struct link_vars *vars,
+			       struct bnx2x_phy *phy)
+{
+	struct bnx2x *bp = params->bp;
+	DP(NETIF_MSG_LINK, "KR2 recovery\n");
+	bnx2x_warpcore_enable_AN_KR2(phy, params, vars);
+	bnx2x_warpcore_restart_AN_KR(phy, params);
+}
+
+static void bnx2x_check_kr2_wa(struct link_params *params,
+			       struct link_vars *vars,
+			       struct bnx2x_phy *phy)
+{
+	struct bnx2x *bp = params->bp;
+	u16 base_page, next_page, not_kr2_device, lane;
+	int sigdet = bnx2x_warpcore_get_sigdet(phy, params);
+
+	if (!sigdet) {
+		if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE))
+			bnx2x_kr2_recovery(params, vars, phy);
+		return;
+	}
+
+	lane = bnx2x_get_warpcore_lane(phy, params);
+	CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
+			  MDIO_AER_BLOCK_AER_REG, lane);
+	bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+			MDIO_AN_REG_LP_AUTO_NEG, &base_page);
+	bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD,
+			MDIO_AN_REG_LP_AUTO_NEG2, &next_page);
+	bnx2x_set_aer_mmd(params, phy);
+
+	/* CL73 has not begun yet */
+	if (base_page == 0) {
+		if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE))
+			bnx2x_kr2_recovery(params, vars, phy);
+		return;
+	}
+
+	/* In case NP bit is not set in the BasePage, or it is set,
+	 * but only KX is advertised, declare this link partner as non-KR2
+	 * device.
+	 */
+	not_kr2_device = (((base_page & 0x8000) == 0) ||
+			  (((base_page & 0x8000) &&
+			    ((next_page & 0xe0) == 0x2))));
+
+	/* In case KR2 is already disabled, check if we need to re-enable it */
+	if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
+		if (!not_kr2_device) {
+			DP(NETIF_MSG_LINK, "BP=0x%x, NP=0x%x\n", base_page,
+			   next_page);
+			bnx2x_kr2_recovery(params, vars, phy);
+		}
+		return;
+	}
+	/* KR2 is enabled, but not KR2 device */
+	if (not_kr2_device) {
+		/* Disable KR2 on both lanes */
+		DP(NETIF_MSG_LINK, "BP=0x%x, NP=0x%x\n", base_page, next_page);
+		bnx2x_disable_kr2(params, vars, phy);
+		return;
+	}
+}
+
 void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
 {
 	u16 phy_idx;
@@ -13156,6 +13455,9 @@ void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
 	if (CHIP_IS_E3(bp)) {
 		struct bnx2x_phy *phy = &params->phy[INT_PHY];
 		bnx2x_set_aer_mmd(params, phy);
+		if ((phy->supported & SUPPORTED_20000baseKR2_Full) &&
+		    (phy->speed_cap_mask & SPEED_20000))
+			bnx2x_check_kr2_wa(params, vars, phy);
 		bnx2x_check_over_curr(params, vars);
 		if (vars->rx_tx_asic_rst)
 			bnx2x_warpcore_config_runtime(phy, params, vars);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index ba981ce..33940fb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -347,6 +347,8 @@ struct link_vars {
 	u8 rx_tx_asic_rst;
 	u8 turn_to_run_wc_rt;
 	u16 rsrv2;
+	/* The same definitions as the shmem2 parameter */
+	u32 link_attr_sync;
 };
 
 /***********************************************************/
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index 7d93adb..f8d432a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -5498,6 +5498,7 @@
 #define XMAC_CTRL_REG_RX_EN					 (0x1<<1)
 #define XMAC_CTRL_REG_SOFT_RESET				 (0x1<<6)
 #define XMAC_CTRL_REG_TX_EN					 (0x1<<0)
+#define XMAC_CTRL_REG_XLGMII_ALIGN_ENB				 (0x1<<7)
 #define XMAC_PAUSE_CTRL_REG_RX_PAUSE_EN				 (0x1<<18)
 #define XMAC_PAUSE_CTRL_REG_TX_PAUSE_EN				 (0x1<<17)
 #define XMAC_PFC_CTRL_HI_REG_FORCE_PFC_XON			 (0x1<<1)
@@ -5518,11 +5519,14 @@
 #define XMAC_REG_PAUSE_CTRL					 0x68
 #define XMAC_REG_PFC_CTRL					 0x70
 #define XMAC_REG_PFC_CTRL_HI					 0x74
+#define XMAC_REG_RX_LSS_CTRL					 0x50
 #define XMAC_REG_RX_LSS_STATUS					 0x58
 /* [RW 14] Maximum packet size in receive direction; exclusive of preamble &
  * CRC in strip mode */
 #define XMAC_REG_RX_MAX_SIZE					 0x40
 #define XMAC_REG_TX_CTRL					 0x20
+#define XMAC_RX_LSS_CTRL_REG_LOCAL_FAULT_DISABLE		 (0x1<<0)
+#define XMAC_RX_LSS_CTRL_REG_REMOTE_FAULT_DISABLE		 (0x1<<1)
 /* [RW 16] Indirect access to the XX table of the XX protection mechanism.
    The fields are:[4:0] - tail pointer; 9:5] - Link List size; 14:10] -
    header pointer. */
@@ -6688,6 +6692,7 @@
 #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_XFI	0x1B00
 #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_DXGXS	0x1E00
 #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_SFI	0x1F00
+#define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_KR2	0x3900
 
 
 #define MDIO_REG_BANK_10G_PARALLEL_DETECT		0x8130
@@ -7062,7 +7067,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2	0x12
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY	0x4000
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ		0x8000
-#define MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150  0x96
+#define MDIO_WC_REG_PCS_STATUS2				0x0021
+#define MDIO_WC_REG_PMD_KR_CONTROL			0x0096
 #define MDIO_WC_REG_XGXSBLK0_XGXSCONTROL		0x8000
 #define MDIO_WC_REG_XGXSBLK0_MISCCONTROL1		0x800e
 #define MDIO_WC_REG_XGXSBLK1_DESKEW			0x8010
@@ -7094,6 +7100,7 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_PAR_DET_10G_STATUS			0x8130
 #define MDIO_WC_REG_PAR_DET_10G_CTRL			0x8131
 #define MDIO_WC_REG_XGXS_X2_CONTROL2			0x8141
+#define MDIO_WC_REG_XGXS_X2_CONTROL3			0x8142
 #define MDIO_WC_REG_XGXS_RX_LN_SWAP1			0x816B
 #define MDIO_WC_REG_XGXS_TX_LN_SWAP1			0x8169
 #define MDIO_WC_REG_GP2_STATUS_GP_2_0			0x81d0
@@ -7128,6 +7135,7 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET		0x0a
 #define MDIO_WC_REG_TX_FIR_TAP_POST_TAP_MASK		0x7c00
 #define MDIO_WC_REG_TX_FIR_TAP_ENABLE		0x8000
+#define MDIO_WC_REG_CL72_USERB0_CL72_TX_FIR_TAP		0x82e2
 #define MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL	0x82e3
 #define MDIO_WC_REG_CL72_USERB0_CL72_OS_DEF_CTRL	0x82e6
 #define MDIO_WC_REG_CL72_USERB0_CL72_BR_DEF_CTRL	0x82e7
@@ -7145,9 +7153,16 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_DIGITAL4_MISC5			0x833e
 #define MDIO_WC_REG_DIGITAL5_MISC6			0x8345
 #define MDIO_WC_REG_DIGITAL5_MISC7			0x8349
+#define MDIO_WC_REG_DIGITAL5_LINK_STATUS		0x834d
 #define MDIO_WC_REG_DIGITAL5_ACTUAL_SPEED		0x834e
 #define MDIO_WC_REG_DIGITAL6_MP5_NEXTPAGECTRL		0x8350
 #define MDIO_WC_REG_CL49_USERB0_CTRL			0x8368
+#define MDIO_WC_REG_CL73_USERB0_CTRL			0x8370
+#define MDIO_WC_REG_CL73_USERB0_USTAT			0x8371
+#define MDIO_WC_REG_CL73_BAM_CTRL1			0x8372
+#define MDIO_WC_REG_CL73_BAM_CTRL2			0x8373
+#define MDIO_WC_REG_CL73_BAM_CTRL3			0x8374
+#define MDIO_WC_REG_CL73_BAM_CODE_FIELD			0x837b
 #define MDIO_WC_REG_EEE_COMBO_CONTROL0			0x8390
 #define MDIO_WC_REG_TX66_CONTROL			0x83b0
 #define MDIO_WC_REG_RX66_CONTROL			0x83c0
@@ -7161,7 +7176,17 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_RX66_SCW3_MASK			0x83c9
 #define MDIO_WC_REG_FX100_CTRL1				0x8400
 #define MDIO_WC_REG_FX100_CTRL3				0x8402
-
+#define MDIO_WC_REG_CL82_USERB1_TX_CTRL5		0x8436
+#define MDIO_WC_REG_CL82_USERB1_TX_CTRL6		0x8437
+#define MDIO_WC_REG_CL82_USERB1_TX_CTRL7		0x8438
+#define MDIO_WC_REG_CL82_USERB1_TX_CTRL9		0x8439
+#define MDIO_WC_REG_CL82_USERB1_RX_CTRL10		0x843a
+#define MDIO_WC_REG_CL82_USERB1_RX_CTRL11		0x843b
+#define MDIO_WC_REG_ETA_CL73_OUI1			0x8453
+#define MDIO_WC_REG_ETA_CL73_OUI2			0x8454
+#define MDIO_WC_REG_ETA_CL73_OUI3			0x8455
+#define MDIO_WC_REG_ETA_CL73_LD_BAM_CODE		0x8456
+#define MDIO_WC_REG_ETA_CL73_LD_UD_CODE			0x8457
 #define MDIO_WC_REG_MICROBLK_CMD			0xffc2
 #define MDIO_WC_REG_MICROBLK_DL_STATUS			0xffc5
 #define MDIO_WC_REG_MICROBLK_CMD3			0xffcc
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 1/9] bnx2x: Activate LFA
From: Yaniv Rosner @ 2012-11-27 13:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner, Eilon Greenstein
In-Reply-To: <1354023996-3512-1-git-send-email-yanivr@broadcom.com>

In case Link Flap Avoidance feature is supported by the MCP, bnx2x will enable
it, and will pass the appropriate parameter when load request is sent to
the MCP.

Signed-off-by: Yaniv Rosner <yanivr@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |    3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h  |    8 ++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   12 ++++++++++++
 3 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 54d522d..2d30979 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2248,7 +2248,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 			 DRV_PULSE_SEQ_MASK);
 		BNX2X_DEV_INFO("drv_pulse 0x%x\n", bp->fw_drv_pulse_wr_seq);
 
-		load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0);
+		load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ,
+					     DRV_MSG_CODE_LOAD_REQ_WITH_LFA);
 		if (!load_code) {
 			BNX2X_ERR("MCP response failure, aborting\n");
 			rc = -EBUSY;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 7eaa74b..f478bed 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -2159,6 +2159,14 @@ struct shmem2_region {
 	#define SHMEM_EEE_TIME_OUTPUT_BIT	   0x80000000
 
 	u32 sizeof_port_stats;
+
+	/* Link Flap Avoidance */
+	u32 lfa_host_addr[PORT_MAX];
+	u32 reserved1;
+
+	u32 reserved2;				/* Offset 0x148 */
+	u32 reserved3;				/* Offset 0x14C */
+	u32 reserved4;				/* Offset 0x150 */
 };
 
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 3519fed..6d125c2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -6267,6 +6267,10 @@ void bnx2x_pf_disable(struct bnx2x *bp)
 static void bnx2x__common_init_phy(struct bnx2x *bp)
 {
 	u32 shmem_base[2], shmem2_base[2];
+	/* Avoid common init in case MFW supports LFA */
+	if (SHMEM2_RD(bp, size) >
+	    (u32)offsetof(struct shmem2_region, lfa_host_addr[BP_PORT(bp)]))
+		return;
 	shmem_base[0] =  bp->common.shmem_base;
 	shmem2_base[0] = bp->common.shmem2_base;
 	if (!CHIP_IS_E1x(bp)) {
@@ -9859,6 +9863,14 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
 
 	bp->link_params.shmem_base = bp->common.shmem_base;
 	bp->link_params.shmem2_base = bp->common.shmem2_base;
+	if (SHMEM2_RD(bp, size) >
+	    (u32)offsetof(struct shmem2_region, lfa_host_addr[BP_PORT(bp)]))
+		bp->link_params.lfa_base =
+		REG_RD(bp, bp->common.shmem2_base +
+		       (u32)offsetof(struct shmem2_region,
+				     lfa_host_addr[BP_PORT(bp)]));
+	else
+		bp->link_params.lfa_base = 0;
 	BNX2X_DEV_INFO("shmem offset 0x%x  shmem2 offset 0x%x\n",
 		       bp->common.shmem_base, bp->common.shmem2_base);
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 0/9] bnx2x: Link changes
From: Yaniv Rosner @ 2012-11-27 13:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Yaniv Rosner

Hi Dave,
The following patch series describe some link changes.
Please consider applying it to net-next.

Thanks,
Yaniv

^ permalink raw reply

* Re: [PATCH 1/1] net: cpts: fix for build break after ARM SoC integration
From: Mugunthan V N @ 2012-11-27 11:25 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-arm-kernel, linux-omap, b-cousson, paul,
	richardcochran
In-Reply-To: <1354012034-31686-1-git-send-email-mugunthanvnm@ti.com>

On 11/27/2012 3:57 PM, Mugunthan V N wrote:
>    CC      drivers/net/ethernet/ti/cpts.o
> drivers/net/ethernet/ti/cpts.c:30:24: fatal error: plat/clock.h: No such file or directory
> compilation terminated.
> make[4]: *** [drivers/net/ethernet/ti/cpts.o] Error 1
> make[3]: *** [drivers/net/ethernet/ti] Error 2
> make[2]: *** [drivers/net/ethernet] Error 2
> make[1]: *** [drivers/net] Error 2
>
> fix for build break as the header file is removed from plat-omap as part of
> the below patch
>
> commit a135eaae524acba1509a3b19c97fae556e4da7cd
> Author: Paul Walmsley <paul@pwsan.com>
> Date:   Thu Sep 27 10:33:34 2012 -0600
>
>      ARM: OMAP: remove plat/clock.h
>
>      Remove arch/arm/plat-omap/include/plat/clock.h by merging it into
>      arch/arm/mach-omap1/clock.h and arch/arm/mach-omap2/clock.h.
>      The goal here is to facilitate ARM single image kernels by removing
>      includes via the "plat/" symlink.
The header removal patch can be found in linux-next git repo.

Regards
Mugunthan V N

^ permalink raw reply

* Re: linux-next: manual merge of the arm-soc tree with the net-next tree
From: Mugunthan V N @ 2012-11-27 11:22 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: Olof Johansson, Arnd Bergmann, linux-arm-kernel, linux-next,
	linux-kernel, Vaibhav Hiremath, David Miller, netdev,
	AnilKumar Ch, Benoit Cousson, Jon Hunter, Afzal Mohammed,
	Philip, Avinash
In-Reply-To: <20121126204546.ef1f1b55e903eda38a61d5d1@canb.auug.org.au>

On 11/26/2012 3:15 PM, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the arm-soc tree got a conflict in
> arch/arm/boot/dts/am33xx.dtsi between commit 1a39a65cba08 ("arm/dts:
> am33xx: Add CPSW and MDIO module nodes for AM33XX") from the net-next
> tree and commits 059b185d5345 ("ARM: dts: AM33XX: Add D_CAN device tree
> data") and 4c94ac29b5c1 ("ARM: dts: OMAP: Move interrupt-parent to the
> root node to avoid duplication") (and a few others that added more later
> nodes) from the arm-soc tree.
>
> I fixed it up (see below) and can carry the fix as necessary (no action
> is required).
>
Stephen Rothwell

The fix is correct and I have tested CPSW from linux-next.

Regards
Mugunthan V N

^ permalink raw reply

* Re: [net-next RFC v2] net_cls: traffic counter based on classification control cgroup
From: Glauber Costa @ 2012-11-27 11:03 UTC (permalink / raw)
  To: Alexey Perevalov; +Cc: netdev, cgroups, Kyungmin Park, Daniel Wagner
In-Reply-To: <50B49C6C.8030604@samsung.com>

On 11/27/2012 02:56 PM, Alexey Perevalov wrote:
> Hello.
> 
> It's second version of patch I already sent to netdev.
> 
> The main goal of this patch it's counting traffic for process placed to
> net_cls cgroup (ingress and egress).
> It's based on res_counters and holds counter per network interfaces.
> 
> Description of patch.
> It handles packets in net/core/dev.c for egress and in
> /net/ipv4/tcp.c|udp.c for ingress.
> These places were chosen because we need to know also network interface.
> 
> Cgroup fs interface provides following files additional to existing
> net_cls files:
> net_cls.ifacename.usage_in_bytes
> Containing rcv/snd lines.
> Also this patch adds to net_cls ability to handle a network device
> registration.
> 
> It could be included or excluded in compile time.
> I moved the menu entry for "Control group classifier" from network/QoS to
> General Option/Control Group.
> 
> I'm waiting for you comments.
>

Daniel Wagner is working on something a lot similar.

Maybe you should be in contact, in case you are not yet.

A few general comments:
1) res_counters are incredibly expensive. If you are more interested in
counting than you are in limiting, they may not be your best choice.

2) When Daniel exposed his use case to me, it gave me the impression
that "counting traffic" is something that is totally doable by having a
dedicated interface in a separate namespace. Basically, we already count
traffic (rx and tx) for all interfaces anyway, so it suggests that it
could be an interesting way to see the problem.

AFAIK, Daniel is still measuring this. But it would be great to know if
that could work for your use case as well.

Thanks.

^ permalink raw reply

* [net-next RFC v2] net_cls: traffic counter based on classification control cgroup
From: Alexey Perevalov @ 2012-11-27 10:56 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA
  Cc: Kyungmin Park

[-- Attachment #1: Type: text/plain, Size: 1176 bytes --]

Hello.

It's second version of patch I already sent to netdev.

The main goal of this patch it's counting traffic for process placed to 
net_cls cgroup (ingress and egress).
It's based on res_counters and holds counter per network interfaces.

Description of patch.
It handles packets in net/core/dev.c for egress and in 
/net/ipv4/tcp.c|udp.c for ingress.
These places were chosen because we need to know also network interface.

Cgroup fs interface provides following files additional to existing 
net_cls files:
net_cls.ifacename.usage_in_bytes
Containing rcv/snd lines.
Also this patch adds to net_cls ability to handle a network device 
registration.

It could be included or excluded in compile time.
I moved the menu entry for "Control group classifier" from network/QoS to
General Option/Control Group.

I'm waiting for you comments.


-- 
Best regards,
Alexey Perevalov,
Leading Software Engineer,
phone: +7 (495) 797 25 00 ext 3969
e-mail: a.perevalov-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org <mailto:a.perevalov-LDgkF/uwaJFWk0Htik3J/w@public.gmane.org>

Mobile group, Moscow Samsung Research Center
12 Dvintsev street, building 1
127018, Moscow, Russian Federation


[-- Attachment #2: 0001-Traffic-statistics-based-on-packet-classification-co.patch --]
[-- Type: text/x-patch, Size: 25252 bytes --]

>From bc59794d7fcc75de0c7a408860fd6ec3be8c50fe Mon Sep 17 00:00:00 2001
From: Alexey Perevalov <a.perevalov-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
Date: Fri, 26 Oct 2012 17:45:44 +0400
Subject: [PATCH] Traffic statistics based on packet classification control
 group

---
 include/net/cls_cgroup.h         |  203 ++++++++++++++++++++++++++++++++++----
 include/net/cls_counter_holder.h |   26 +++++
 init/Kconfig                     |   25 +++++
 kernel/cgroup.c                  |    2 +
 kernel/res_counter.c             |    4 +
 net/core/dev.c                   |    6 ++
 net/ipv4/tcp.c                   |   27 ++++-
 net/ipv4/udp.c                   |    6 ++
 net/sched/Kconfig                |   11 ---
 net/sched/Makefile               |    1 +
 net/sched/cls_cgroup.c           |  192 ++++++++++++++++++++++++++++++++++-
 net/sched/cls_counter_holder.c   |  145 +++++++++++++++++++++++++++
 12 files changed, 613 insertions(+), 35 deletions(-)
 create mode 100644 include/net/cls_counter_holder.h
 create mode 100644 net/sched/cls_counter_holder.c

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 2581638..3a6954f 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -17,50 +17,198 @@
 #include <linux/hardirq.h>
 #include <linux/rcupdate.h>
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+#include <linux/nsproxy.h>
+#include <linux/res_counter.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/cls_counter_holder.h>
+#include <net/sock.h>
+
+/*TODO hide all it to separate file*/
+
+struct cls_iface_cntrs {
+	char *dev_name;
+	struct res_counter snd_counter;
+	struct res_counter rcv_counter;
+	struct list_head link;
+};
+
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
+
 #if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
 struct cgroup_cls_state
 {
 	struct cgroup_subsys_state css;
 	u32 classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	struct cls_iface_cntrs iface_stats;
+#endif /*CONFIG_NET_CLS_COUNTER*/
 };
 
 extern void sock_update_classid(struct sock *sk, struct task_struct *task);
 
-#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
-static inline u32 task_cls_classid(struct task_struct *p)
+#if IS_MODULE(CONFIG_NET_CLS_CGROUP)
+static inline struct cgroup_cls_state *get_cls_cgroup(struct task_struct *p)
 {
-	u32 classid;
+	struct cgroup_subsys_state *css = task_subsys_state(p,
+		net_cls_subsys_id);
+	if (css)
+		return container_of(css,
+				       struct cgroup_cls_state, css);
+	return NULL;
+}
+#elif IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
+static inline struct cgroup_cls_state *get_cls_cgroup(struct task_struct *p)
+{
+	return container_of(task_subsys_state(p, net_cls_subsys_id),
+			       struct cgroup_cls_state, css);
+}
+#endif
 
-	if (in_interrupt())
-		return 0;
 
-	rcu_read_lock();
-	classid = container_of(task_subsys_state(p, net_cls_subsys_id),
-			       struct cgroup_cls_state, css)->classid;
-	rcu_read_unlock();
+#endif /*CONFIG_NET_CLS_CGROUP*/
 
-	return classid;
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline u32 skb_cls_classid(const struct sk_buff *skb)
+{
+	return (skb && skb->sk) ? skb->sk->sk_classid : 0;
+}
+
+static inline int get_ifindex_from_skb(const struct sk_buff *skb)
+{
+	int ifindex = 0;
+	if (skb)
+		ifindex = skb->skb_iif;
+	return ifindex;
+}
+
+static struct cls_iface_cntrs *find_cls_counter(
+	struct cgroup_cls_state *cls_cgroup,
+	const char *dev_name,
+	bool create)
+{
+	/*TODO Add lock*/
+	struct cls_iface_cntrs *entry = NULL;
+
+	if (!dev_name) {
+		pr_err("cls please provide valid dev name");
+		return NULL;
+	}
+
+	list_for_each_entry(entry, &cls_cgroup->iface_stats.link, link)
+		if (!strcmp(entry->dev_name, dev_name))
+			return entry;
+
+	if (!create)
+		return entry;
+
+	/*not found, insert*/
+	entry = kmalloc(sizeof(struct cls_iface_cntrs), GFP_ATOMIC);
+	entry->dev_name = kstrdup(dev_name, GFP_ATOMIC);
+	memset(&entry->rcv_counter, 0, sizeof(struct res_counter));
+	memset(&entry->snd_counter, 0, sizeof(struct res_counter));
+	res_counter_init(&entry->rcv_counter, NULL);
+	res_counter_init(&entry->snd_counter, NULL);
+	list_add_tail(&entry->link, &cls_cgroup->iface_stats.link);
+	return entry;
 }
-#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
+
+static void charge_net_cls_snd(struct cgroup_cls_state *cls_cgroup,
+	const u32 copied, const char *dev_name)
+{
+	struct res_counter *fail_res;
+	int res;
+	struct cls_iface_cntrs *cnt = find_cls_counter(cls_cgroup,
+		dev_name, true);
+
+	if (!cnt)
+		return;
+
+	res = res_counter_charge(&cnt->snd_counter, copied, &fail_res);
+}
+
+static char *get_dev_name(const int ifindex)
+{
+	struct net *net = NULL;
+	struct nsproxy *nsproxy = NULL;
+	struct net_device *net_dev = NULL;
+
+	nsproxy = task_nsproxy(current);
+	if (!nsproxy) {
+		pr_debug("cls cant find task_nsproxy");
+		return NULL;
+	}
+
+	net = get_net(nsproxy->net_ns);
+	if (!net) {
+		pr_debug("cls cant find net");
+		return NULL;
+	}
+	net_dev = dev_get_by_index(net, ifindex);
+
+	return net_dev ? net_dev->name : NULL;
+}
+
+static void charge_net_cls_rcv(struct cgroup_cls_state *cls_cgroup,
+	const u32 copied, const int ifindex)
+{
+	char *dev_name = get_dev_name(ifindex);
+	struct res_counter *fail_res;
+	int res;
+	struct cls_iface_cntrs *cnt = find_cls_counter(cls_cgroup,
+		dev_name, true);
+
+	if (!cnt)
+		return;
+
+	res = res_counter_charge(&cnt->rcv_counter, copied, &fail_res);
+}
+
+static inline void count_cls_rcv(struct task_struct *p, const u32 copied, const int ifindex)
+{
+	struct cgroup_cls_state *cls_cgroup;
+
+	cls_cgroup = get_cls_cgroup(p);
+
+	if (cls_cgroup)
+		charge_net_cls_rcv(cls_cgroup, copied, ifindex);
+}
+
+static inline void count_cls_snd(u32 classid, const u32 copied,
+	const char *dev_name)
+{
+	struct cgroup_cls_state *cls_cgroup;
+
+	cls_cgroup = find_cls_cgroup_by_classid(classid);
+
+	if (cls_cgroup)
+		charge_net_cls_snd(cls_cgroup, copied, dev_name);
+}
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
 static inline u32 task_cls_classid(struct task_struct *p)
 {
-	struct cgroup_subsys_state *css;
-	u32 classid = 0;
+	int classid = 0;
+	struct cgroup_cls_state *cls_cgroup = NULL;
 
 	if (in_interrupt())
 		return 0;
 
 	rcu_read_lock();
-	css = task_subsys_state(p, net_cls_subsys_id);
-	if (css)
-		classid = container_of(css,
-				       struct cgroup_cls_state, css)->classid;
+
+	cls_cgroup = get_cls_cgroup(p);
+	if (cls_cgroup)
+		classid = cls_cgroup->classid;
+
 	rcu_read_unlock();
 
 	return classid;
 }
-#endif
-#else /* !CGROUP_NET_CLS_CGROUP */
+
+#else /* !CONFIG_NET_CLS_CGROUP */
 static inline void sock_update_classid(struct sock *sk, struct task_struct *task)
 {
 }
@@ -69,5 +217,20 @@ static inline u32 task_cls_classid(struct task_struct *p)
 {
 	return 0;
 }
-#endif /* CGROUP_NET_CLS_CGROUP */
+#endif /* CONFIG_NET_CLS_CGROUP */
+
+#if !IS_ENABLED(CONFIG_NET_CLS_CGROUP) || !IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline void count_cls_rcv(struct task_struct *p, const u32 copied, const int ifindex)
+{
+}
+
+static inline void count_cls_snd(u32 classid, const u32 copied,	const char *dev_name)
+{
+}
+
+static inline u32 skb_cls_classid(const struct sk_buff *skb)
+{
+	return 0;
+}
+#endif
 #endif  /* _NET_CLS_CGROUP_H */
diff --git a/include/net/cls_counter_holder.h b/include/net/cls_counter_holder.h
new file mode 100644
index 0000000..a129baa
--- /dev/null
+++ b/include/net/cls_counter_holder.h
@@ -0,0 +1,26 @@
+/*
+ * cls_counter_holder.c  Interface for holding references of the
+ *                       net cls cgroup instances.
+ *
+ * Authors:	Alexey Perevalov, <a.perevalov-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
+ *
+ * Changes:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_CLS_COUNTER_HOLDER_H_
+#define _NET_CLS_COUNTER_HOLDER_H_
+
+#include <net/cls_cgroup.h>
+
+struct cgroup_cls_state;
+
+void insert_cls_cgroup_entry(struct cgroup_cls_state *obj);
+void delete_cls_cgroup_entry(const u32 classid);
+struct cgroup_cls_state *find_cls_cgroup_by_classid(const u32 classid);
+
+
+#endif /* _NET_CLS_COUNTER_HOLDER_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e3..2e6af85 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -841,6 +841,31 @@ config CGROUP_HUGETLB
 	  control group is tracked in the third page lru pointer. This means
 	  that we cannot use the controller with huge page less than 3 pages.
 
+menuconfig NET_CLS_CGROUP
+	tristate "Control Group Classifier"
+	select NET_CLS
+	depends on CGROUPS
+	---help---
+	  Say Y here if you want to classify packets based on the control
+	  cgroup of their process.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
+if NET_CLS_CGROUP
+config NET_CLS_COUNTER
+	bool "Network traffic counter for network Control Group Classifier"
+	select NET_CLS
+	default n
+	depends on NET_CLS_CGROUP && RESOURCE_COUNTERS
+	---help---
+	  Say Y here if you want to count traffic associate with the control
+	  cgroup.
+
+	  To add functionality to cls_cgroup select y.
+
+endif #NET_CLS_CGROUP
+
 config CGROUP_PERF
 	bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
 	depends on PERF_EVENTS && CGROUPS
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 13774b3..68a4a53 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2966,6 +2966,8 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 	cgroup_cfts_commit(ss, NULL, false);
 	return -ENOENT;
 }
+EXPORT_SYMBOL_GPL(cgroup_rm_cftypes);
+
 
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index ad581aa..f5767af 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,8 @@
 #include <linux/res_counter.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
+#include <linux/export.h>
+
 
 void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
@@ -21,6 +23,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 	counter->soft_limit = RESOURCE_MAX;
 	counter->parent = parent;
 }
+EXPORT_SYMBOL(res_counter_init);
 
 int res_counter_charge_locked(struct res_counter *counter, unsigned long val,
 			      bool force)
@@ -170,6 +173,7 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
 	return *res_counter_member(counter, member);
 }
 #endif
+EXPORT_SYMBOL(res_counter_read_u64);
 
 int res_counter_memparse_write_strategy(const char *buf,
 					unsigned long long *res)
diff --git a/net/core/dev.c b/net/core/dev.c
index b4978e2..61c9a61 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/static_key.h>
 #include <net/flow_keys.h>
+#include <net/cls_cgroup.h>
 
 #include "net-sysfs.h"
 
@@ -2570,6 +2571,11 @@ int dev_queue_xmit(struct sk_buff *skb)
 	 */
 	rcu_read_lock_bh();
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	if (dev)
+		count_cls_snd(skb_cls_classid(skb), skb->len, dev->name);
+#endif
+
 	skb_update_prio(skb);
 
 	txq = netdev_pick_tx(dev, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eace049..3013509 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -276,6 +276,7 @@
 #include <net/ip.h>
 #include <net/netdma.h>
 #include <net/sock.h>
+#include <net/cls_cgroup.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -1467,6 +1468,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	u32 seq = tp->copied_seq;
 	u32 offset;
 	int copied = 0;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	int ifindex = 0;
+#endif
 
 	if (sk->sk_state == TCP_LISTEN)
 		return -ENOTCONN;
@@ -1509,6 +1513,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 			++seq;
 			break;
 		}
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+		ifindex = get_ifindex_from_skb(skb);
+#endif
 		sk_eat_skb(sk, skb, false);
 		if (!desc->count)
 			break;
@@ -1519,8 +1526,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	tcp_rcv_space_adjust(sk);
 
 	/* Clean up data we have read: This will do ACK frames. */
-	if (copied > 0)
+	if (copied > 0) {
 		tcp_cleanup_rbuf(sk, copied);
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+		count_cls_rcv(current, copied, ifindex);
+#endif
+	}
 	return copied;
 }
 EXPORT_SYMBOL(tcp_read_sock);
@@ -1548,6 +1559,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	bool copied_early = false;
 	struct sk_buff *skb;
 	u32 urg_hole = 0;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	int ifindex = 0;
+#endif
 
 	lock_sock(sk);
 
@@ -1872,6 +1886,9 @@ skip_copy:
 		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK)) {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+			ifindex = get_ifindex_from_skb(skb);
+#endif
 			sk_eat_skb(sk, skb, copied_early);
 			copied_early = false;
 		}
@@ -1881,6 +1898,9 @@ skip_copy:
 		/* Process the FIN. */
 		++*seq;
 		if (!(flags & MSG_PEEK)) {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+			ifindex = get_ifindex_from_skb(skb);
+#endif
 			sk_eat_skb(sk, skb, copied_early);
 			copied_early = false;
 		}
@@ -1923,6 +1943,11 @@ skip_copy:
 	/* Clean up data we have read: This will do ACK frames. */
 	tcp_cleanup_rbuf(sk, copied);
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	if (copied > 0)
+		count_cls_rcv(current, copied, ifindex);
+#endif
+
 	release_sock(sk);
 	return copied;
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 79c8dbe..a143629 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -101,6 +101,7 @@
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <net/cls_cgroup.h>
 #include <net/net_namespace.h>
 #include <net/icmp.h>
 #include <net/route.h>
@@ -1254,6 +1255,11 @@ try_again:
 	if (flags & MSG_TRUNC)
 		err = ulen;
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	if (ulen > 0)
+		count_cls_rcv(current, ulen, get_ifindex_from_skb(skb));
+#endif
+
 out_free:
 	skb_free_datagram_locked(sk, skb);
 out:
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 62fb51f..926dedf 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -418,17 +418,6 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.
 
-config NET_CLS_CGROUP
-	tristate "Control Group Classifier"
-	select NET_CLS
-	depends on CGROUPS
-	---help---
-	  Say Y here if you want to classify packets based on the control
-	  cgroup of their process.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called cls_cgroup.
-
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf0..95dbb12 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
+obj-$(CONFIG_NET_CLS_COUNTER)   += cls_counter_holder.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 709b0fb..d032689 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,6 +22,16 @@
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 #include <net/cls_cgroup.h>
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+#include <linux/rbtree.h>
+#include <linux/res_counter.h>
+#include <net/cls_counter_holder.h>
+
+static struct notifier_block counter_notifier;
+static const char *rcv_label = "rcv:";
+static const char *snd_label = "snd:";
+
+#endif
 
 static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
 {
@@ -46,11 +56,47 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
 	if (cgrp->parent)
 		cs->classid = cgrp_cls_state(cgrp->parent)->classid;
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	res_counter_init(&cs->iface_stats.snd_counter, NULL);
+	res_counter_init(&cs->iface_stats.rcv_counter, NULL);
+	cs->iface_stats.dev_name = 0;
+	INIT_LIST_HEAD(&cs->iface_stats.link);
+#endif
+
 	return &cs->css;
 }
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline void cgrp_counter_destroy(struct cgroup_cls_state *cs)
+{
+	struct list_head *pos, *q;
+	delete_cls_cgroup_entry(cs->classid);
+
+	list_for_each_safe(pos, q, &cs->iface_stats.link) {
+		struct cls_iface_cntrs *tmp = list_entry(
+			pos, struct cls_iface_cntrs, link);
+		list_del(pos);
+		if (!tmp)
+			continue;
+
+		if (!tmp->dev_name)
+			kfree(tmp->dev_name);
+		kfree(tmp);
+	}
+
+}
+#endif
+
 static void cgrp_destroy(struct cgroup *cgrp)
 {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+
+	struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+
+	if (!cs)
+		return;
+	cgrp_counter_destroy(cs);
+#endif
 	kfree(cgrp_cls_state(cgrp));
 }
 
@@ -81,9 +127,56 @@ static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
 	return cgrp_cls_state(cgrp)->classid;
 }
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static const char *extract_dev_name(const char *cgroup_file_name)
+{
+	const char *dot = strchr(cgroup_file_name, '.');
+	const size_t len = dot ? dot - cgroup_file_name : strlen(cgroup_file_name);
+
+	return kstrndup(cgroup_file_name, len, GFP_KERNEL);
+}
+
+static int read_stat(struct cgroup *cgrp, struct cftype *cft,
+		struct cgroup_map_cb *cb)
+{
+	struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+	const char *dev_name = extract_dev_name(cft->name);
+	struct cls_iface_cntrs *res = find_cls_counter(cs, dev_name, false);
+
+	if (!res) {
+		pr_debug("cls cant read for cls");
+		return -EINVAL;
+	}
+
+	cb->fill(cb, rcv_label,
+		res_counter_read_u64(&res->rcv_counter, RES_USAGE));
+	cb->fill(cb, snd_label,
+		res_counter_read_u64(&res->snd_counter, RES_USAGE));
+
+	kfree(dev_name);
+	return 0;
+}
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
 static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
 {
-	cgrp_cls_state(cgrp)->classid = (u32) value;
+	struct cgroup_cls_state *cgrp_cls = cgrp_cls_state(cgrp);
+	u32 *classid = &cgrp_cls->classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	u32 oldclassid = *classid;
+
+	if(find_cls_cgroup_by_classid(value)) {
+		pr_err("cls: classid %llu already exists\n", value);
+		return -EINVAL;
+	}
+
+	insert_cls_cgroup_entry(cgrp_cls);
+
+	if (oldclassid)
+		delete_cls_cgroup_entry(oldclassid);
+#endif /*CONFIG_NET_CLS_COUNTER*/
+	*classid = (u32) value;
+
 	return 0;
 }
 
@@ -307,17 +400,107 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline int add_cft_file_for_device(struct net_device *dev)
+{
+	struct cftype *cft;
+	int ret = 0;
+
+	if (!dev)
+		return ret;
+
+	cft = kmalloc(sizeof(struct cftype) * 2,
+		GFP_KERNEL);
+	/* *2 and last 0 fill for terminator */
+	memset(cft, 0, sizeof(struct cftype) * 2);
+
+	snprintf(cft->name, MAX_CFTYPE_NAME,
+		"%s.usage_in_bytes", dev->name);
+	cft->read_map = read_stat;
+	cft->private = RES_USAGE;
+	ret = cgroup_add_cftypes(&net_cls_subsys, cft);
+	if (ret)
+		pr_err("cls error adding cft for counting at " \
+			"cls_cgroup %d\n", ret);
+	return ret;
+}
+
+static int device_state_cb(struct notifier_block *nb,
+	unsigned long state, void *arg)
+{
+	struct net_device *net = (struct net_device *)arg;
+	if (!nb || !net) {
+		pr_err("Not valid arguments for net_device notifier cb\n");
+		return 0;
+	}
+
+	if (state == NETDEV_REGISTER) {
+		pr_info("cls New device %s\n", net->name);
+		return add_cft_file_for_device(net);
+	}
+	return 0;
+}
+
+static inline int init_cgroup_counter(void)
+{
+	int ret = 0;
+	struct net_device *dev;
+	counter_notifier.notifier_call = device_state_cb;
+
+	ret = register_netdevice_notifier(&counter_notifier);
+	if (ret)
+		pr_err("cls Cant register nofier\n");
+
+	for_each_netdev(&init_net, dev) {
+		ret = add_cft_file_for_device(dev);
+		if (ret)
+			goto unregister_notifier;
+	}
+
+	return ret;
+unregister_notifier:
+
+	unregister_netdevice_notifier(&counter_notifier);
+	return ret;
+}
+
+static void release_cft(void)
+{
+	struct list_head *pos, *q;
+        list_for_each_safe(pos, q, &net_cls_subsys.cftsets) {
+		struct cftype_set *set =
+                                list_entry(pos, struct cftype_set, node);
+		int ret = cgroup_rm_cftypes(&net_cls_subsys, set->cfts);
+		if (!ret) {
+			pr_err("cls cant remove cftypes\n");
+			break;
+		}
+
+                kfree(set->cfts);
+        }
+}
+#endif
+
 static int __init init_cgroup_cls(void)
 {
 	int ret;
-
 	ret = cgroup_load_subsys(&net_cls_subsys);
 	if (ret)
 		goto out;
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	ret = init_cgroup_counter();
+	if (ret)
+		goto unload;
+#endif
+
 	ret = register_tcf_proto_ops(&cls_cgroup_ops);
 	if (ret)
-		cgroup_unload_subsys(&net_cls_subsys);
+		goto unload;
+
+	return 0;
+unload:
+	cgroup_unload_subsys(&net_cls_subsys);
 
 out:
 	return ret;
@@ -327,6 +510,9 @@ static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	release_cft();
+#endif
 	cgroup_unload_subsys(&net_cls_subsys);
 }
 
diff --git a/net/sched/cls_counter_holder.c b/net/sched/cls_counter_holder.c
new file mode 100644
index 0000000..eb56298
--- /dev/null
+++ b/net/sched/cls_counter_holder.c
@@ -0,0 +1,145 @@
+/*
+ * net/sched/cls_counter_holder.c Interface for holding references of the
+ *                     net cls cgroup instances.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Perevalov <a.perevalov-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
+ */
+
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <net/cls_cgroup.h>
+#include <net/cls_counter_holder.h>
+
+static struct rb_root classid_tree = RB_ROOT;
+static DEFINE_SPINLOCK(classid_tree_lock);
+
+struct entry {
+	struct cgroup_cls_state *data;
+	struct rb_node node;
+};
+
+static struct entry *find_entry(struct rb_root *root, const u32 classid)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct entry *cls_entry = rb_entry(node, struct entry, node);
+		int result = 0;
+		if (!cls_entry || !cls_entry->data)
+			break;
+		result = cls_entry->data->classid - classid;
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return cls_entry;
+	}
+	return NULL;
+}
+
+void insert_cls_cgroup_entry(struct cgroup_cls_state *obj)
+{
+	struct rb_node **new;
+	struct rb_node *parent = NULL;
+	struct entry *new_entry;
+	unsigned long irq_flags = 0;
+
+	struct rb_root *root = &classid_tree;
+
+	spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+	new = &root->rb_node;
+
+        while (*new) {
+                struct entry *this = rb_entry(*new, struct entry, node);
+                /* Sort by classid, then by ifindex */
+                int result =
+                    (this->data->classid - obj->classid);
+                parent = *new;
+                if (result < 0)
+                        new = &((*new)->rb_left);
+                else if (result > 0)
+                        new = &((*new)->rb_right);
+                else
+                        goto unlock;
+        }
+
+	/* If we here, we need to insert new entry into tree */
+	new_entry = kmalloc(sizeof(struct entry), GFP_ATOMIC);
+	if (!new_entry)
+		goto unlock;
+
+	new_entry->data = obj;
+	/* Add new node and rebalance tree */
+	rb_link_node(&new_entry->node, parent, new);
+	rb_insert_color(&new_entry->node, root);
+
+unlock:
+	spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+EXPORT_SYMBOL(insert_cls_cgroup_entry);
+
+void delete_cls_cgroup_entry(const u32 classid)
+{
+	unsigned long irq_flags = 0;
+	struct entry *data = NULL;
+	struct rb_root *root = &classid_tree;
+	spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+	data = find_entry(root, classid);
+
+	if (data) {
+		rb_erase(&data->node, root);
+		kfree(data);
+	}
+	spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+EXPORT_SYMBOL(delete_cls_cgroup_entry);
+
+static void free_node(struct rb_node *root)
+{
+	struct entry *cur_entry = rb_entry(root, struct entry, node);
+	if (root->rb_left)
+		free_node(root->rb_left);
+	if (root->rb_right)
+		free_node(root->rb_right);
+	if (cur_entry)
+		kfree(cur_entry);
+}
+
+static void free_classid_tree(void)
+{
+	unsigned long irq_flags = 0;
+
+	spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+	free_node(classid_tree.rb_node);
+
+	spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+
+struct cgroup_cls_state *find_cls_cgroup_by_classid(const u32 classid)
+{
+	struct entry *cls_entry = find_entry(&classid_tree, classid);
+	if (cls_entry)
+		return cls_entry->data;
+
+	return NULL;
+}
+EXPORT_SYMBOL(find_cls_cgroup_by_classid);
+
+static void __exit exit_cls_counter_holder(void)
+{
+	free_classid_tree();
+}
+
+module_exit(exit_cls_counter_holder);
+MODULE_LICENSE("GPL");
-- 
1.7.9.5



^ permalink raw reply related

* [PATCH 1/1] net: cpts: fix for build break after ARM SoC integration
From: Mugunthan V N @ 2012-11-27 10:27 UTC (permalink / raw)
  To: netdev
  Cc: davem, linux-arm-kernel, linux-omap, b-cousson, paul,
	richardcochran, Mugunthan V N

  CC      drivers/net/ethernet/ti/cpts.o
drivers/net/ethernet/ti/cpts.c:30:24: fatal error: plat/clock.h: No such file or directory
compilation terminated.
make[4]: *** [drivers/net/ethernet/ti/cpts.o] Error 1
make[3]: *** [drivers/net/ethernet/ti] Error 2
make[2]: *** [drivers/net/ethernet] Error 2
make[1]: *** [drivers/net] Error 2

fix for build break as the header file is removed from plat-omap as part of
the below patch

commit a135eaae524acba1509a3b19c97fae556e4da7cd
Author: Paul Walmsley <paul@pwsan.com>
Date:   Thu Sep 27 10:33:34 2012 -0600

    ARM: OMAP: remove plat/clock.h

    Remove arch/arm/plat-omap/include/plat/clock.h by merging it into
    arch/arm/mach-omap1/clock.h and arch/arm/mach-omap2/clock.h.
    The goal here is to facilitate ARM single image kernels by removing
    includes via the "plat/" symlink.

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
---
 drivers/net/ethernet/ti/cpts.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 3377667..5e62c1a 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -27,8 +27,6 @@
 #include <linux/uaccess.h>
 #include <linux/workqueue.h>
 
-#include <plat/clock.h>
-
 #include "cpts.h"
 
 #ifdef CONFIG_TI_CPTS
-- 
1.7.0.4


^ permalink raw reply related

* [net-next rfc v7 3/3] virtio-net: change the number of queues through ethtool
From: Jason Wang @ 2012-11-27 10:16 UTC (permalink / raw)
  To: rusty, mst, krkumar2, virtualization, netdev, linux-kernel
  Cc: bhutchings, jwhan, shiyer, kvm
In-Reply-To: <1354011360-39479-1-git-send-email-jasowang@redhat.com>

This patch implement the {set|get}_channels method of ethool to allow user to
change the number of queues dymaically when the device is running. This would
let the user to configure it on demand.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c |   41 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index bcaa6e5..f08ec2a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1578,10 +1578,51 @@ static struct virtio_driver virtio_net_driver = {
 #endif
 };
 
+/* TODO: Eliminate OOO packets during switching */
+static int virtnet_set_channels(struct net_device *dev,
+				struct ethtool_channels *channels)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	u16 queue_pairs = channels->combined_count;
+
+	/* We don't support separate rx/tx channels.
+	 * We don't allow setting 'other' channels.
+	 */
+	if (channels->rx_count || channels->tx_count || channels->other_count)
+		return -EINVAL;
+
+	/* Only two modes were support currently */
+	if (queue_pairs != vi->max_queue_pairs && queue_pairs != 1)
+		return -EINVAL;
+
+	vi->curr_queue_pairs = queue_pairs;
+	BUG_ON(virtnet_set_queues(vi));
+
+	netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
+	netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
+
+	return 0;
+}
+
+static void virtnet_get_channels(struct net_device *dev,
+				 struct ethtool_channels *channels)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	channels->combined_count = vi->curr_queue_pairs;
+	channels->max_combined = vi->max_queue_pairs;
+	channels->max_other = 0;
+	channels->rx_count = 0;
+	channels->tx_count = 0;
+	channels->other_count = 0;
+}
+
 static const struct ethtool_ops virtnet_ethtool_ops = {
 	.get_drvinfo = virtnet_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = virtnet_get_ringparam,
+	.set_channels = virtnet_set_channels,
+	.get_channels = virtnet_get_channels,
 };
 
 static int __init init(void)
-- 
1.7.1

^ permalink raw reply related

* [net-next rfc v7 2/3] virtio_net: multiqueue support
From: Jason Wang @ 2012-11-27 10:15 UTC (permalink / raw)
  To: rusty, mst, krkumar2, virtualization, netdev, linux-kernel
  Cc: bhutchings, jwhan, shiyer, kvm
In-Reply-To: <1354011360-39479-1-git-send-email-jasowang@redhat.com>

This addes multiqueue support to virtio_net driver. In multiple queue modes, the
driver expects the number of queue paris is equal to the number of vcpus. To
eliminate the contention bettwen vcpus and virtqueues, per-cpu virtqueue pairs
were implemented through:

- select the txq based on the smp processor id.
- smp affinity hint were set to the vcpu that owns the queue pairs.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c        |  454 ++++++++++++++++++++++++++++++---------
 include/uapi/linux/virtio_net.h |   16 ++
 2 files changed, 371 insertions(+), 99 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7975133..bcaa6e5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -84,17 +84,25 @@ struct virtnet_info {
 	struct virtio_device *vdev;
 	struct virtqueue *cvq;
 	struct net_device *dev;
-	struct napi_struct napi;
-	struct send_queue sq;
-	struct receive_queue rq;
+	struct send_queue *sq;
+	struct receive_queue *rq;
 	unsigned int status;
 
+	/* Max # of queue pairs supported by the device */
+	u16 max_queue_pairs;
+
+	/* # of queue pairs currently used by the driver */
+	u16 curr_queue_pairs;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
 	/* Host will merge rx buffers for big packets (shake it! shake it!) */
 	bool mergeable_rx_bufs;
 
+	/* Has control virtqueue */
+	bool has_cvq;
+
 	/* enable config space updates */
 	bool config_enable;
 
@@ -126,6 +134,34 @@ struct padded_vnet_hdr {
 	char padding[6];
 };
 
+static const struct ethtool_ops virtnet_ethtool_ops;
+
+/*
+ * Converting between virtqueue no. and kernel tx/rx queue no.
+ * 0:rx0 1:tx0 2:cvq 3:rx1 4:tx1 ... 2N+1:rxN 2N+2:txN
+ */
+static int vq2txq(struct virtqueue *vq)
+{
+	int index = virtqueue_get_queue_index(vq);
+	return index == 1 ? 0 : (index - 2) / 2;
+}
+
+static int txq2vq(int txq)
+{
+	return txq ? 2 * txq + 2 : 1;
+}
+
+static int vq2rxq(struct virtqueue *vq)
+{
+	int index = virtqueue_get_queue_index(vq);
+	return index ? (index - 1) / 2 : 0;
+}
+
+static int rxq2vq(int rxq)
+{
+	return rxq ? 2 * rxq + 1 : 0;
+}
+
 static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
 {
 	return (struct skb_vnet_hdr *)skb->cb;
@@ -166,7 +202,7 @@ static void skb_xmit_done(struct virtqueue *vq)
 	virtqueue_disable_cb(vq);
 
 	/* We were probably waiting for more output buffers. */
-	netif_wake_queue(vi->dev);
+	netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
 static void set_skb_frag(struct sk_buff *skb, struct page *page,
@@ -503,7 +539,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
-	struct receive_queue *rq = &vi->rq;
+	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
 
 	/* Schedule NAPI, Suppress further interrupts if successful. */
 	if (napi_schedule_prep(&rq->napi)) {
@@ -650,7 +686,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	struct send_queue *sq = &vi->sq;
+	int qnum = skb_get_queue_mapping(skb);
+	struct send_queue *sq = &vi->sq[qnum];
 	int capacity;
 
 	/* Free up any pending old buffers before queueing new ones. */
@@ -664,13 +701,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (likely(capacity == -ENOMEM)) {
 			if (net_ratelimit())
 				dev_warn(&dev->dev,
-					 "TX queue failure: out of memory\n");
+					 "TXQ (%d) failure: out of memory\n",
+					 qnum);
 		} else {
 			dev->stats.tx_fifo_errors++;
 			if (net_ratelimit())
 				dev_warn(&dev->dev,
-					 "Unexpected TX queue failure: %d\n",
-					 capacity);
+					 "Unexpected TXQ (%d) failure: %d\n",
+					 qnum, capacity);
 		}
 		dev->stats.tx_dropped++;
 		kfree_skb(skb);
@@ -685,12 +723,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (capacity < 2+MAX_SKB_FRAGS) {
-		netif_stop_queue(dev);
+		netif_stop_subqueue(dev, qnum);
 		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 			/* More just got used, free them then recheck. */
 			capacity += free_old_xmit_skbs(sq);
 			if (capacity >= 2+MAX_SKB_FRAGS) {
-				netif_start_queue(dev);
+				netif_start_subqueue(dev, qnum);
 				virtqueue_disable_cb(sq->vq);
 			}
 		}
@@ -758,23 +796,13 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
 static void virtnet_netpoll(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
 
-	napi_schedule(&vi->rq.napi);
+	for (i = 0; i < vi->curr_queue_pairs; i++)
+		napi_schedule(&vi->rq[i].napi);
 }
 #endif
 
-static int virtnet_open(struct net_device *dev)
-{
-	struct virtnet_info *vi = netdev_priv(dev);
-
-	/* Make sure we have some buffers: if oom use wq. */
-	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
-		schedule_delayed_work(&vi->rq.refill, 0);
-
-	virtnet_napi_enable(&vi->rq);
-	return 0;
-}
-
 /*
  * Send command via the control virtqueue and check status.  Commands
  * supported by the hypervisor, as indicated by feature bits, should
@@ -830,13 +858,53 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
 	rtnl_unlock();
 }
 
+static int virtnet_set_queues(struct virtnet_info *vi)
+{
+	struct scatterlist sg;
+	struct virtio_net_ctrl_rfs s;
+	struct net_device *dev = vi->dev;
+
+	s.virtqueue_pairs = vi->curr_queue_pairs;
+	sg_init_one(&sg, &s, sizeof(s));
+
+	if (!vi->has_cvq)
+		return -EINVAL;
+
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RFS,
+				  VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET, &sg, 1, 0)){
+		dev_warn(&dev->dev, "Fail to set the number of queue pairs to"
+			 " %d\n", vi->curr_queue_pairs);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int virtnet_open(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		/* Make sure we have some buffers: if oom use wq. */
+		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
+			schedule_delayed_work(&vi->rq[i].refill, 0);
+		virtnet_napi_enable(&vi->rq[i]);
+	}
+
+	return 0;
+}
+
 static int virtnet_close(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
 
 	/* Make sure refill_work doesn't re-enable napi! */
-	cancel_delayed_work_sync(&vi->rq.refill);
-	napi_disable(&vi->rq.napi);
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		cancel_delayed_work_sync(&vi->rq[i].refill);
+		napi_disable(&vi->rq[i].napi);
+	}
 
 	return 0;
 }
@@ -948,8 +1016,8 @@ static void virtnet_get_ringparam(struct net_device *dev,
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
-	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq);
-	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq);
+	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
 	ring->rx_pending = ring->rx_max_pending;
 	ring->tx_pending = ring->tx_max_pending;
 }
@@ -967,12 +1035,6 @@ static void virtnet_get_drvinfo(struct net_device *dev,
 
 }
 
-static const struct ethtool_ops virtnet_ethtool_ops = {
-	.get_drvinfo = virtnet_get_drvinfo,
-	.get_link = ethtool_op_get_link,
-	.get_ringparam = virtnet_get_ringparam,
-};
-
 #define MIN_MTU 68
 #define MAX_MTU 65535
 
@@ -984,6 +1046,20 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+/* To avoid contending a lock hold by a vcpu who would exit to host, select the
+ * txq based on the processor id.
+ */
+static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
+		  smp_processor_id();
+
+	while (unlikely(txq >= dev->real_num_tx_queues))
+		txq -= dev->real_num_tx_queues;
+
+	return txq;
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -995,6 +1071,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_get_stats64     = virtnet_stats,
 	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
+	.ndo_select_queue     = virtnet_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
@@ -1030,10 +1107,10 @@ static void virtnet_config_changed_work(struct work_struct *work)
 
 	if (vi->status & VIRTIO_NET_S_LINK_UP) {
 		netif_carrier_on(vi->dev);
-		netif_wake_queue(vi->dev);
+		netif_tx_wake_all_queues(vi->dev);
 	} else {
 		netif_carrier_off(vi->dev);
-		netif_stop_queue(vi->dev);
+		netif_tx_stop_all_queues(vi->dev);
 	}
 done:
 	mutex_unlock(&vi->config_lock);
@@ -1046,41 +1123,212 @@ static void virtnet_config_changed(struct virtio_device *vdev)
 	schedule_work(&vi->config_work);
 }
 
-static int init_vqs(struct virtnet_info *vi)
+static void free_receive_bufs(struct virtnet_info *vi)
+{
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		while (vi->rq[i].pages)
+			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
+	}
+}
+
+/* Free memory allocated for send and receive queues */
+static void virtnet_free_queues(struct virtnet_info *vi)
 {
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
-	const char *names[] = { "input", "output", "control" };
-	int nvqs, err;
+	kfree(vi->rq);
+	vi->rq = NULL;
+	kfree(vi->sq);
+	vi->sq = NULL;
+}
 
-	/* We expect two virtqueues, receive then send,
-	 * and optionally control. */
-	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+static void free_unused_bufs(struct virtnet_info *vi)
+{
+	void *buf;
+	int i;
 
-	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
-	if (err)
-		return err;
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		struct virtqueue *vq = vi->sq[i].vq;
+		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+			dev_kfree_skb(buf);
+	}
 
-	vi->rq.vq = vqs[0];
-	vi->sq.vq = vqs[1];
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		struct virtqueue *vq = vi->rq[i].vq;
 
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vqs[2];
+		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+			if (vi->mergeable_rx_bufs || vi->big_packets)
+				give_pages(&vi->rq[i], buf);
+			else
+				dev_kfree_skb(buf);
+			--vi->rq[i].num;
+		}
+		BUG_ON(vi->rq[i].num != 0);
+	}
+}
 
+static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
+{
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		int cpu = set ? i : -1;
+		virtqueue_set_affinity(vi->rq[i].vq, cpu);
+		virtqueue_set_affinity(vi->sq[i].vq, cpu);
+	}
+}
+
+static void virtnet_del_vqs(struct virtnet_info *vi)
+{
+	struct virtio_device *vdev = vi->vdev;
+
+	virtnet_set_affinity(vi, false);
+
+	vdev->config->del_vqs(vdev);
+
+	virtnet_free_queues(vi);
+}
+
+static int virtnet_find_vqs(struct virtnet_info *vi)
+{
+	vq_callback_t **callbacks;
+	struct virtqueue **vqs;
+	int ret = -ENOMEM;
+	int i, total_vqs;
+	char **names;
+
+	/*
+	 * We expect 1 RX virtqueue followed by 1 TX virtqueue, followd by
+	 * possible control virtqueue, followed by RX/TX N-1 queue pairs used
+	 * in multiqueue mode.
+	 */
+	total_vqs = vi->max_queue_pairs * 2 +
+		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
+
+	/* Allocate space for find_vqs parameters */
+	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
+	callbacks = kzalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
+	if (!vqs || !callbacks)
+		goto err_mem;
+	names = kzalloc(total_vqs * sizeof(*names), GFP_KERNEL);
+	if (!names)
+		goto err_mem;
+
+	/* Parameters for control virtqueue, if any */
+	if (vi->has_cvq) {
+		callbacks[2] = NULL;
+		names[2] = kasprintf(GFP_KERNEL, "control");
+	}
+
+	/* Allocate/initialize parameters for send/receive virtqueues */
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		callbacks[rxq2vq(i)] = skb_recv_done;
+		callbacks[txq2vq(i)] = skb_xmit_done;
+		names[rxq2vq(i)] = kasprintf(GFP_KERNEL, "input.%d", i);
+		names[txq2vq(i)] = kasprintf(GFP_KERNEL, "output.%d", i);
+	}
+
+	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
+					 (const char **)names);
+	if (ret)
+		goto err_names;
+
+	if (vi->has_cvq) {
+		vi->cvq = vqs[2];
 		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
 			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
 	}
+
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		vi->rq[i].vq = vqs[rxq2vq(i)];
+		vi->sq[i].vq = vqs[txq2vq(i)];
+	}
+
+	kfree(callbacks);
+	kfree(vqs);
+
+	return 0;
+
+err_names:
+	for (i = 0; i < total_vqs * 2; i ++)
+		kfree(names[i]);
+	kfree(names);
+
+err_mem:
+	kfree(callbacks);
+	kfree(vqs);
+
+	return ret;
+}
+
+static int virtnet_alloc_queues(struct virtnet_info *vi)
+{
+	int i;
+
+	vi->sq = kzalloc(sizeof(vi->sq[0]) * vi->max_queue_pairs, GFP_KERNEL);
+	vi->rq = kzalloc(sizeof(vi->rq[0]) * vi->max_queue_pairs, GFP_KERNEL);
+	if (!vi->rq || !vi->sq)
+		goto err;
+
+	/* setup initial receive and send queue parameters */
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		vi->rq[i].pages = NULL;
+		INIT_DELAYED_WORK(&vi->rq[i].refill, refill_work);
+		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
+			       napi_weight);
+
+		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
+		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
+	}
+
+
 	return 0;
+
+err:
+	virtnet_free_queues(vi);
+	return -ENOMEM;
+}
+
+static int init_vqs(struct virtnet_info *vi)
+{
+	int ret;
+
+	/* Allocate send & receive queues */
+	ret = virtnet_alloc_queues(vi);
+	if (ret)
+		goto err;
+
+	ret = virtnet_find_vqs(vi);
+	if (ret)
+		goto err_free;
+
+	virtnet_set_affinity(vi, true);
+	return 0;
+
+err_free:
+	virtnet_free_queues(vi);
+err:
+	return ret;
 }
 
 static int virtnet_probe(struct virtio_device *vdev)
 {
-	int err;
+	int i, err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
+	u16 curr_queue_pairs;
+
+	/* Find if host supports multiqueue virtio_net device */
+	err = virtio_config_val(vdev, VIRTIO_NET_F_RFS,
+				offsetof(struct virtio_net_config,
+				max_virtqueue_pairs), &curr_queue_pairs);
+
+	/* We need at least 2 queue's */
+	if (err)
+		curr_queue_pairs = 1;
 
 	/* Allocate ourselves a network device with room for our info */
-	dev = alloc_etherdev(sizeof(struct virtnet_info));
+	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), curr_queue_pairs);
 	if (!dev)
 		return -ENOMEM;
 
@@ -1126,22 +1374,17 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Set up our device-specific information */
 	vi = netdev_priv(dev);
-	netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
 	vi->dev = dev;
 	vi->vdev = vdev;
 	vdev->priv = vi;
-	vi->rq.pages = NULL;
 	vi->stats = alloc_percpu(struct virtnet_stats);
 	err = -ENOMEM;
 	if (vi->stats == NULL)
 		goto free;
 
-	INIT_DELAYED_WORK(&vi->rq.refill, refill_work);
 	mutex_init(&vi->config_lock);
 	vi->config_enable = true;
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
-	sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
-	sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
 
 	/* If we can receive ANY GSO packets, we must allocate large ones. */
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1152,10 +1395,21 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
+		vi->has_cvq = true;
+
+	/* Use single tx/rx queue pair as default */
+	vi->curr_queue_pairs = 1;
+	vi->max_queue_pairs = curr_queue_pairs;
+
+	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
 	err = init_vqs(vi);
 	if (err)
 		goto free_stats;
 
+	netif_set_real_num_tx_queues(dev, 1);
+	netif_set_real_num_rx_queues(dev, 1);
+
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
@@ -1163,12 +1417,15 @@ static int virtnet_probe(struct virtio_device *vdev)
 	}
 
 	/* Last of all, set up some receive buffers. */
-	try_fill_recv(&vi->rq, GFP_KERNEL);
-
-	/* If we didn't even get one input buffer, we're useless. */
-	if (vi->rq.num == 0) {
-		err = -ENOMEM;
-		goto unregister;
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		try_fill_recv(&vi->rq[i], GFP_KERNEL);
+
+		/* If we didn't even get one input buffer, we're useless. */
+		if (vi->rq[i].num == 0) {
+			free_unused_bufs(vi);
+			err = -ENOMEM;
+			goto free_recv_bufs;
+		}
 	}
 
 	/* Assume link up if device can't report link status,
@@ -1181,13 +1438,20 @@ static int virtnet_probe(struct virtio_device *vdev)
 		netif_carrier_on(dev);
 	}
 
-	pr_debug("virtnet: registered device %s\n", dev->name);
+	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
+		 dev->name, curr_queue_pairs);
+
 	return 0;
 
-unregister:
+free_recv_bufs:
+	free_receive_bufs(vi);
 	unregister_netdev(dev);
+
 free_vqs:
-	vdev->config->del_vqs(vdev);
+	for (i = 0; i <curr_queue_pairs; i++)
+		cancel_delayed_work_sync(&vi->rq[i].refill);
+	virtnet_del_vqs(vi);
+
 free_stats:
 	free_percpu(vi->stats);
 free:
@@ -1195,28 +1459,6 @@ free:
 	return err;
 }
 
-static void free_unused_bufs(struct virtnet_info *vi)
-{
-	void *buf;
-	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->sq.vq);
-		if (!buf)
-			break;
-		dev_kfree_skb(buf);
-	}
-	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->rq.vq);
-		if (!buf)
-			break;
-		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(&vi->rq, buf);
-		else
-			dev_kfree_skb(buf);
-		--vi->rq.num;
-	}
-	BUG_ON(vi->rq.num != 0);
-}
-
 static void remove_vq_common(struct virtnet_info *vi)
 {
 	vi->vdev->config->reset(vi->vdev);
@@ -1224,10 +1466,9 @@ static void remove_vq_common(struct virtnet_info *vi)
 	/* Free unused buffers in both send and recv, if any. */
 	free_unused_bufs(vi);
 
-	vi->vdev->config->del_vqs(vi->vdev);
+	free_receive_bufs(vi);
 
-	while (vi->rq.pages)
-		__free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
+	virtnet_del_vqs(vi);
 }
 
 static void __devexit virtnet_remove(struct virtio_device *vdev)
@@ -1253,6 +1494,7 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
 static int virtnet_freeze(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
+	int i;
 
 	/* Prevent config work handler from accessing the device */
 	mutex_lock(&vi->config_lock);
@@ -1260,10 +1502,14 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	mutex_unlock(&vi->config_lock);
 
 	netif_device_detach(vi->dev);
-	cancel_delayed_work_sync(&vi->rq.refill);
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		cancel_delayed_work_sync(&vi->rq[i].refill);
 
 	if (netif_running(vi->dev))
-		napi_disable(&vi->rq.napi);
+		for (i = 0; i < vi->max_queue_pairs; i++) {
+			napi_disable(&vi->rq[i].napi);
+			netif_napi_del(&vi->rq[i].napi);
+		}
 
 	remove_vq_common(vi);
 
@@ -1275,24 +1521,28 @@ static int virtnet_freeze(struct virtio_device *vdev)
 static int virtnet_restore(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
-	int err;
+	int err, i;
 
 	err = init_vqs(vi);
 	if (err)
 		return err;
 
 	if (netif_running(vi->dev))
-		virtnet_napi_enable(&vi->rq);
+		for (i = 0; i < vi->max_queue_pairs; i++)
+			virtnet_napi_enable(&vi->rq[i]);
 
 	netif_device_attach(vi->dev);
 
-	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
-		schedule_delayed_work(&vi->rq.refill, 0);
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
+			schedule_delayed_work(&vi->rq[i].refill, 0);
 
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = true;
 	mutex_unlock(&vi->config_lock);
 
+	BUG_ON(virtnet_set_queues(vi));
+
 	return 0;
 }
 #endif
@@ -1310,7 +1560,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
-	VIRTIO_NET_F_GUEST_ANNOUNCE,
+	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_RFS,
 };
 
 static struct virtio_driver virtio_net_driver = {
@@ -1328,6 +1578,12 @@ static struct virtio_driver virtio_net_driver = {
 #endif
 };
 
+static const struct ethtool_ops virtnet_ethtool_ops = {
+	.get_drvinfo = virtnet_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = virtnet_get_ringparam,
+};
+
 static int __init init(void)
 {
 	return register_virtio_driver(&virtio_net_driver);
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 2470f54..6056cec 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -51,6 +51,7 @@
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
 #define VIRTIO_NET_F_GUEST_ANNOUNCE 21	/* Guest can announce device on the
 					 * network */
+#define VIRTIO_NET_F_RFS	22	/* Device supports multiple TXQ/RXQ */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
@@ -60,6 +61,8 @@ struct virtio_net_config {
 	__u8 mac[6];
 	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
 	__u16 status;
+	/* Total number of RX/TX queues */
+	__u16 max_virtqueue_pairs;
 } __attribute__((packed));
 
 /* This is the first element of the scatter-gather list.  If you don't
@@ -166,4 +169,17 @@ struct virtio_net_ctrl_mac {
 #define VIRTIO_NET_CTRL_ANNOUNCE       3
  #define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
 
+/*
+ * Control multiqueue
+ *
+ */
+struct virtio_net_ctrl_rfs {
+	u16 virtqueue_pairs;
+};
+
+#define VIRTIO_NET_CTRL_RFS   4
+ #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET        0
+ #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MIN        1
+ #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MAX        0x8000
+
 #endif /* _LINUX_VIRTIO_NET_H */
-- 
1.7.1

^ permalink raw reply related

* [net-next rfc v7 1/3] virtio-net: separate fields of sending/receiving queue from virtnet_info
From: Jason Wang @ 2012-11-27 10:15 UTC (permalink / raw)
  To: rusty, mst, krkumar2, virtualization, netdev, linux-kernel
  Cc: bhutchings, jwhan, shiyer, kvm
In-Reply-To: <1354011360-39479-1-git-send-email-jasowang@redhat.com>

To support multiqueue transmitq/receiveq, the first step is to separate queue
related structure from virtnet_info. This patch introduce send_queue and
receive_queue structure and use the pointer to them as the parameter in
functions handling sending/receiving.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c |  289 +++++++++++++++++++++++++---------------------
 1 files changed, 158 insertions(+), 131 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 26c502e..7975133 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -51,16 +51,44 @@ struct virtnet_stats {
 	u64 rx_packets;
 };
 
+/* Internal representation of a send virtqueue */
+struct send_queue {
+	/* Virtqueue associated with this send _queue */
+	struct virtqueue *vq;
+
+	/* TX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+};
+
+/* Internal representation of a receive virtqueue */
+struct receive_queue {
+	/* Virtqueue associated with this receive_queue */
+	struct virtqueue *vq;
+
+        struct napi_struct napi;
+
+        /* Number of input buffers, and max we've ever had. */
+        unsigned int num, max;
+
+	/* Work struct for refilling if we run low on memory. */
+	struct delayed_work refill;
+
+	/* Chain pages by the private ptr. */
+	struct page *pages;
+
+	/* RX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+};
+
 struct virtnet_info {
 	struct virtio_device *vdev;
-	struct virtqueue *rvq, *svq, *cvq;
+	struct virtqueue *cvq;
 	struct net_device *dev;
 	struct napi_struct napi;
+	struct send_queue sq;
+	struct receive_queue rq;
 	unsigned int status;
 
-	/* Number of input buffers, and max we've ever had. */
-	unsigned int num, max;
-
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
@@ -73,21 +101,11 @@ struct virtnet_info {
 	/* Active statistics */
 	struct virtnet_stats __percpu *stats;
 
-	/* Work struct for refilling if we run low on memory. */
-	struct delayed_work refill;
-
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
 	/* Lock for config space updates */
 	struct mutex config_lock;
-
-	/* Chain pages by the private ptr. */
-	struct page *pages;
-
-	/* fragments + linear part + virtio header */
-	struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
-	struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
 };
 
 struct skb_vnet_hdr {
@@ -117,22 +135,22 @@ static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
  * private is used to chain pages for big packets, put the whole
  * most recent used list in the beginning for reuse
  */
-static void give_pages(struct virtnet_info *vi, struct page *page)
+static void give_pages(struct receive_queue *rq, struct page *page)
 {
 	struct page *end;
 
-	/* Find end of list, sew whole thing into vi->pages. */
+	/* Find end of list, sew whole thing into vi->rq.pages. */
 	for (end = page; end->private; end = (struct page *)end->private);
-	end->private = (unsigned long)vi->pages;
-	vi->pages = page;
+	end->private = (unsigned long)rq->pages;
+	rq->pages = page;
 }
 
-static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 {
-	struct page *p = vi->pages;
+	struct page *p = rq->pages;
 
 	if (p) {
-		vi->pages = (struct page *)p->private;
+		rq->pages = (struct page *)p->private;
 		/* clear private here, it is used to chain pages */
 		p->private = 0;
 	} else
@@ -140,12 +158,12 @@ static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
 	return p;
 }
 
-static void skb_xmit_done(struct virtqueue *svq)
+static void skb_xmit_done(struct virtqueue *vq)
 {
-	struct virtnet_info *vi = svq->vdev->priv;
+	struct virtnet_info *vi = vq->vdev->priv;
 
 	/* Suppress further interrupts. */
-	virtqueue_disable_cb(svq);
+	virtqueue_disable_cb(vq);
 
 	/* We were probably waiting for more output buffers. */
 	netif_wake_queue(vi->dev);
@@ -167,9 +185,10 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 }
 
 /* Called from bottom half context */
-static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+static struct sk_buff *page_to_skb(struct receive_queue *rq,
 				   struct page *page, unsigned int len)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	struct sk_buff *skb;
 	struct skb_vnet_hdr *hdr;
 	unsigned int copy, hdr_len, offset;
@@ -224,12 +243,12 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	}
 
 	if (page)
-		give_pages(vi, page);
+		give_pages(rq, page);
 
 	return skb;
 }
 
-static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
+static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	struct page *page;
@@ -243,7 +262,7 @@ static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 			skb->dev->stats.rx_length_errors++;
 			return -EINVAL;
 		}
-		page = virtqueue_get_buf(vi->rvq, &len);
+		page = virtqueue_get_buf(rq->vq, &len);
 		if (!page) {
 			pr_debug("%s: rx error: %d buffers missing\n",
 				 skb->dev->name, hdr->mhdr.num_buffers);
@@ -256,14 +275,15 @@ static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 
 		set_skb_frag(skb, page, 0, &len);
 
-		--vi->num;
+		--rq->num;
 	}
 	return 0;
 }
 
-static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
+static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_info *vi = rq->vq->vdev->priv;
+	struct net_device *dev = vi->dev;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 	struct sk_buff *skb;
 	struct page *page;
@@ -273,7 +293,7 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 		pr_debug("%s: short packet %i\n", dev->name, len);
 		dev->stats.rx_length_errors++;
 		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(vi, buf);
+			give_pages(rq, buf);
 		else
 			dev_kfree_skb(buf);
 		return;
@@ -285,14 +305,14 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 		skb_trim(skb, len);
 	} else {
 		page = buf;
-		skb = page_to_skb(vi, page, len);
+		skb = page_to_skb(rq, page, len);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
-			give_pages(vi, page);
+			give_pages(rq, page);
 			return;
 		}
 		if (vi->mergeable_rx_bufs)
-			if (receive_mergeable(vi, skb)) {
+			if (receive_mergeable(rq, skb)) {
 				dev_kfree_skb(skb);
 				return;
 			}
@@ -359,8 +379,9 @@ frame_err:
 	dev_kfree_skb(skb);
 }
 
-static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	struct sk_buff *skb;
 	struct skb_vnet_hdr *hdr;
 	int err;
@@ -372,77 +393,77 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
 	skb_put(skb, MAX_PACKET_LEN);
 
 	hdr = skb_vnet_hdr(skb);
-	sg_set_buf(vi->rx_sg, &hdr->hdr, sizeof hdr->hdr);
+	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
 
-	skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
+	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
 
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 2, skb, gfp);
 	if (err < 0)
 		dev_kfree_skb(skb);
 
 	return err;
 }
 
-static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 {
 	struct page *first, *list = NULL;
 	char *p;
 	int i, err, offset;
 
-	/* page in vi->rx_sg[MAX_SKB_FRAGS + 1] is list tail */
+	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
-		first = get_a_page(vi, gfp);
+		first = get_a_page(rq, gfp);
 		if (!first) {
 			if (list)
-				give_pages(vi, list);
+				give_pages(rq, list);
 			return -ENOMEM;
 		}
-		sg_set_buf(&vi->rx_sg[i], page_address(first), PAGE_SIZE);
+		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
 
 		/* chain new page in list head to match sg */
 		first->private = (unsigned long)list;
 		list = first;
 	}
 
-	first = get_a_page(vi, gfp);
+	first = get_a_page(rq, gfp);
 	if (!first) {
-		give_pages(vi, list);
+		give_pages(rq, list);
 		return -ENOMEM;
 	}
 	p = page_address(first);
 
-	/* vi->rx_sg[0], vi->rx_sg[1] share the same page */
-	/* a separated vi->rx_sg[0] for virtio_net_hdr only due to QEMU bug */
-	sg_set_buf(&vi->rx_sg[0], p, sizeof(struct virtio_net_hdr));
+	/* rq->sg[0], rq->sg[1] share the same page */
+	/* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
+	sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
 
-	/* vi->rx_sg[1] for data packet, from offset */
+	/* rq->sg[1] for data packet, from offset */
 	offset = sizeof(struct padded_vnet_hdr);
-	sg_set_buf(&vi->rx_sg[1], p + offset, PAGE_SIZE - offset);
+	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 
 	/* chain first in list head */
 	first->private = (unsigned long)list;
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, MAX_SKB_FRAGS + 2,
 				first, gfp);
 	if (err < 0)
-		give_pages(vi, first);
+		give_pages(rq, first);
 
 	return err;
 }
 
-static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 {
 	struct page *page;
 	int err;
 
-	page = get_a_page(vi, gfp);
+	page = get_a_page(rq, gfp);
 	if (!page)
 		return -ENOMEM;
 
-	sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
+	sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
 
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 1, page, gfp);
 	if (err < 0)
-		give_pages(vi, page);
+		give_pages(rq, page);
 
 	return err;
 }
@@ -454,97 +475,101 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
  * before we're receiving packets, or from refill_work which is
  * careful to disable receiving (using napi_disable).
  */
-static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
+static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	int err;
 	bool oom;
 
 	do {
 		if (vi->mergeable_rx_bufs)
-			err = add_recvbuf_mergeable(vi, gfp);
+			err = add_recvbuf_mergeable(rq, gfp);
 		else if (vi->big_packets)
-			err = add_recvbuf_big(vi, gfp);
+			err = add_recvbuf_big(rq, gfp);
 		else
-			err = add_recvbuf_small(vi, gfp);
+			err = add_recvbuf_small(rq, gfp);
 
 		oom = err == -ENOMEM;
 		if (err < 0)
 			break;
-		++vi->num;
+		++rq->num;
 	} while (err > 0);
-	if (unlikely(vi->num > vi->max))
-		vi->max = vi->num;
-	virtqueue_kick(vi->rvq);
+	if (unlikely(rq->num > rq->max))
+		rq->max = rq->num;
+	virtqueue_kick(rq->vq);
 	return !oom;
 }
 
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
+	struct receive_queue *rq = &vi->rq;
+
 	/* Schedule NAPI, Suppress further interrupts if successful. */
-	if (napi_schedule_prep(&vi->napi)) {
+	if (napi_schedule_prep(&rq->napi)) {
 		virtqueue_disable_cb(rvq);
-		__napi_schedule(&vi->napi);
+		__napi_schedule(&rq->napi);
 	}
 }
 
-static void virtnet_napi_enable(struct virtnet_info *vi)
+static void virtnet_napi_enable(struct receive_queue *rq)
 {
-	napi_enable(&vi->napi);
+	napi_enable(&rq->napi);
 
 	/* If all buffers were filled by other side before we napi_enabled, we
 	 * won't get another interrupt, so process any outstanding packets
 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
-	if (napi_schedule_prep(&vi->napi)) {
-		virtqueue_disable_cb(vi->rvq);
+	if (napi_schedule_prep(&rq->napi)) {
+		virtqueue_disable_cb(rq->vq);
 		local_bh_disable();
-		__napi_schedule(&vi->napi);
+		__napi_schedule(&rq->napi);
 		local_bh_enable();
 	}
 }
 
 static void refill_work(struct work_struct *work)
 {
-	struct virtnet_info *vi;
+	struct receive_queue *rq =
+		container_of(work, struct receive_queue, refill.work);
 	bool still_empty;
 
-	vi = container_of(work, struct virtnet_info, refill.work);
-	napi_disable(&vi->napi);
-	still_empty = !try_fill_recv(vi, GFP_KERNEL);
-	virtnet_napi_enable(vi);
+	napi_disable(&rq->napi);
+	still_empty = !try_fill_recv(rq, GFP_KERNEL);
+	virtnet_napi_enable(rq);
 
 	/* In theory, this can happen: if we don't get any buffers in
 	 * we will *never* try to fill again. */
 	if (still_empty)
-		schedule_delayed_work(&vi->refill, HZ/2);
+		schedule_delayed_work(&rq->refill, HZ/2);
 }
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
-	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
+	struct receive_queue *rq =
+		container_of(napi, struct receive_queue, napi);
 	void *buf;
 	unsigned int len, received = 0;
 
 again:
 	while (received < budget &&
-	       (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
-		receive_buf(vi->dev, buf, len);
-		--vi->num;
+	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+		receive_buf(rq, buf, len);
+		--rq->num;
 		received++;
 	}
 
-	if (vi->num < vi->max / 2) {
-		if (!try_fill_recv(vi, GFP_ATOMIC))
-			schedule_delayed_work(&vi->refill, 0);
+	if (rq->num < rq->max / 2) {
+		if (!try_fill_recv(rq, GFP_ATOMIC))
+			schedule_delayed_work(&rq->refill, 0);
 	}
 
 	/* Out of packets? */
 	if (received < budget) {
 		napi_complete(napi);
-		if (unlikely(!virtqueue_enable_cb(vi->rvq)) &&
+		if (unlikely(!virtqueue_enable_cb(rq->vq)) &&
 		    napi_schedule_prep(napi)) {
-			virtqueue_disable_cb(vi->rvq);
+			virtqueue_disable_cb(rq->vq);
 			__napi_schedule(napi);
 			goto again;
 		}
@@ -553,13 +578,14 @@ again:
 	return received;
 }
 
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static unsigned int free_old_xmit_skbs(struct send_queue *sq)
 {
 	struct sk_buff *skb;
 	unsigned int len, tot_sgs = 0;
+	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
+	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 
 		u64_stats_update_begin(&stats->tx_syncp);
@@ -573,10 +599,11 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
 	return tot_sgs;
 }
 
-static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
+static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
+	struct virtnet_info *vi = sq->vq->vdev->priv;
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 
@@ -611,25 +638,26 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 
 	/* Encode metadata header at front. */
 	if (vi->mergeable_rx_bufs)
-		sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
+		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
 	else
-		sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr);
+		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
 
-	hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
-	return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
+	hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg,
 				 0, skb, GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	struct send_queue *sq = &vi->sq;
 	int capacity;
 
 	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(vi);
+	free_old_xmit_skbs(sq);
 
 	/* Try to transmit */
-	capacity = xmit_skb(vi, skb);
+	capacity = xmit_skb(sq, skb);
 
 	/* This can happen with OOM and indirect buffers. */
 	if (unlikely(capacity < 0)) {
@@ -648,7 +676,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
-	virtqueue_kick(vi->svq);
+	virtqueue_kick(sq->vq);
 
 	/* Don't wait up for transmitted skbs to be freed. */
 	skb_orphan(skb);
@@ -658,12 +686,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (capacity < 2+MAX_SKB_FRAGS) {
 		netif_stop_queue(dev);
-		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
+		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 			/* More just got used, free them then recheck. */
-			capacity += free_old_xmit_skbs(vi);
+			capacity += free_old_xmit_skbs(sq);
 			if (capacity >= 2+MAX_SKB_FRAGS) {
 				netif_start_queue(dev);
-				virtqueue_disable_cb(vi->svq);
+				virtqueue_disable_cb(sq->vq);
 			}
 		}
 	}
@@ -731,7 +759,7 @@ static void virtnet_netpoll(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
-	napi_schedule(&vi->napi);
+	napi_schedule(&vi->rq.napi);
 }
 #endif
 
@@ -740,10 +768,10 @@ static int virtnet_open(struct net_device *dev)
 	struct virtnet_info *vi = netdev_priv(dev);
 
 	/* Make sure we have some buffers: if oom use wq. */
-	if (!try_fill_recv(vi, GFP_KERNEL))
-		schedule_delayed_work(&vi->refill, 0);
+	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
+		schedule_delayed_work(&vi->rq.refill, 0);
 
-	virtnet_napi_enable(vi);
+	virtnet_napi_enable(&vi->rq);
 	return 0;
 }
 
@@ -807,8 +835,8 @@ static int virtnet_close(struct net_device *dev)
 	struct virtnet_info *vi = netdev_priv(dev);
 
 	/* Make sure refill_work doesn't re-enable napi! */
-	cancel_delayed_work_sync(&vi->refill);
-	napi_disable(&vi->napi);
+	cancel_delayed_work_sync(&vi->rq.refill);
+	napi_disable(&vi->rq.napi);
 
 	return 0;
 }
@@ -920,11 +948,10 @@ static void virtnet_get_ringparam(struct net_device *dev,
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
-	ring->rx_max_pending = virtqueue_get_vring_size(vi->rvq);
-	ring->tx_max_pending = virtqueue_get_vring_size(vi->svq);
+	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq);
+	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq);
 	ring->rx_pending = ring->rx_max_pending;
 	ring->tx_pending = ring->tx_max_pending;
-
 }
 
 
@@ -1034,8 +1061,8 @@ static int init_vqs(struct virtnet_info *vi)
 	if (err)
 		return err;
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
+	vi->rq.vq = vqs[0];
+	vi->sq.vq = vqs[1];
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
 		vi->cvq = vqs[2];
@@ -1099,22 +1126,22 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Set up our device-specific information */
 	vi = netdev_priv(dev);
-	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
+	netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
 	vi->dev = dev;
 	vi->vdev = vdev;
 	vdev->priv = vi;
-	vi->pages = NULL;
+	vi->rq.pages = NULL;
 	vi->stats = alloc_percpu(struct virtnet_stats);
 	err = -ENOMEM;
 	if (vi->stats == NULL)
 		goto free;
 
-	INIT_DELAYED_WORK(&vi->refill, refill_work);
+	INIT_DELAYED_WORK(&vi->rq.refill, refill_work);
 	mutex_init(&vi->config_lock);
 	vi->config_enable = true;
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
-	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
-	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
+	sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
+	sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
 
 	/* If we can receive ANY GSO packets, we must allocate large ones. */
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1136,10 +1163,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 	}
 
 	/* Last of all, set up some receive buffers. */
-	try_fill_recv(vi, GFP_KERNEL);
+	try_fill_recv(&vi->rq, GFP_KERNEL);
 
 	/* If we didn't even get one input buffer, we're useless. */
-	if (vi->num == 0) {
+	if (vi->rq.num == 0) {
 		err = -ENOMEM;
 		goto unregister;
 	}
@@ -1172,22 +1199,22 @@ static void free_unused_bufs(struct virtnet_info *vi)
 {
 	void *buf;
 	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->svq);
+		buf = virtqueue_detach_unused_buf(vi->sq.vq);
 		if (!buf)
 			break;
 		dev_kfree_skb(buf);
 	}
 	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->rvq);
+		buf = virtqueue_detach_unused_buf(vi->rq.vq);
 		if (!buf)
 			break;
 		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(vi, buf);
+			give_pages(&vi->rq, buf);
 		else
 			dev_kfree_skb(buf);
-		--vi->num;
+		--vi->rq.num;
 	}
-	BUG_ON(vi->num != 0);
+	BUG_ON(vi->rq.num != 0);
 }
 
 static void remove_vq_common(struct virtnet_info *vi)
@@ -1199,8 +1226,8 @@ static void remove_vq_common(struct virtnet_info *vi)
 
 	vi->vdev->config->del_vqs(vi->vdev);
 
-	while (vi->pages)
-		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
+	while (vi->rq.pages)
+		__free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
 }
 
 static void __devexit virtnet_remove(struct virtio_device *vdev)
@@ -1233,10 +1260,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	mutex_unlock(&vi->config_lock);
 
 	netif_device_detach(vi->dev);
-	cancel_delayed_work_sync(&vi->refill);
+	cancel_delayed_work_sync(&vi->rq.refill);
 
 	if (netif_running(vi->dev))
-		napi_disable(&vi->napi);
+		napi_disable(&vi->rq.napi);
 
 	remove_vq_common(vi);
 
@@ -1255,12 +1282,12 @@ static int virtnet_restore(struct virtio_device *vdev)
 		return err;
 
 	if (netif_running(vi->dev))
-		virtnet_napi_enable(vi);
+		virtnet_napi_enable(&vi->rq);
 
 	netif_device_attach(vi->dev);
 
-	if (!try_fill_recv(vi, GFP_KERNEL))
-		schedule_delayed_work(&vi->refill, 0);
+	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
+		schedule_delayed_work(&vi->rq.refill, 0);
 
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = true;
-- 
1.7.1

^ permalink raw reply related

* [net-next rfc v7 0/3] Multiqueue virtio-net
From: Jason Wang @ 2012-11-27 10:15 UTC (permalink / raw)
  To: rusty, mst, krkumar2, virtualization, netdev, linux-kernel
  Cc: bhutchings, jwhan, shiyer, kvm

Hi all:

This series is an update version of multiqueue virtio-net driver based on
Krishna Kumar's work to let virtio-net use multiple rx/tx queues to do the
packets reception and transmission. Please review and comments.

A protype implementation of qemu-kvm support could by found in
git://github.com/jasowang/qemu-kvm-mq.git. To start a guest with two queues, you
could specify the queues parameters to both tap and virtio-net like:

./qemu-kvm -netdev tap,queues=2,... -device virtio-net-pci,queues=2,...

then enable the multiqueue through ethtool by:

ethtool -L eth0 combined 2

Changes from V6:
- Align the implementation with the RFC spec update v5
- Addressing Rusty's comments:
  * split the patches
  * rename to max_queue_pairs and curr_queue_pairs
  * remove the useless status
  * fix the hibernation bug
- Addressing Ben's comments:
  * check other parameters in ethtool_set_queues

Changes from v5:
- Align the implementation with the RFC spec update v4
- Switch the mode between single mode and multiqueue mode without reset
- Remove the 256 limitation of queues
- Use helpers to do the mapping between virtqueues and tx/rx queues
- Use commbined channels instead of separated rx/tx queus when do the queue
number configuartion
- Other coding style comments from Michael

Changes from V4:
- Add ability to negotiate the number of queues through control virtqueue
- Ethtool -{L|l} support and default the tx/rx queue number to 1
- Expose the API to set irq affinity instead of irq itself

Changes from V3:
- Rebase to the net-next
- Let queue 2 to be the control virtqueue to obey the spec
- Prodives irq affinity
- Choose txq based on processor id

Reference:
- Virtio spec RFC: http://patchwork.ozlabs.org/patch/201303/
- V6: https://lkml.org/lkml/2012/10/30/127
- V5: http://lwn.net/Articles/505388/
- V4: https://lkml.org/lkml/2012/6/25/120
- V2: http://lwn.net/Articles/467283/


Perf Numbers:

Will do some basic test and post as a reply to this mail.

Jason Wang (3):
  virtio-net: separate fields of sending/receiving queue from
    virtnet_info
  virtio_net: multiqueue support
  virtio-net: change the number of queues through ethtool

 drivers/net/virtio_net.c        |  716 ++++++++++++++++++++++++++++-----------
 include/uapi/linux/virtio_net.h |   16 +
 2 files changed, 536 insertions(+), 196 deletions(-)

^ permalink raw reply

* [RFC PATCH iproute2] Add mdb command to bridge
From: Cong Wang @ 2012-11-27  9:49 UTC (permalink / raw)
  To: netdev
  Cc: bridge, Cong Wang, Herbert Xu, Stephen Hemminger, David S. Miller,
	Thomas Graf, Jesper Dangaard Brouer
In-Reply-To: <1354009785-20014-1-git-send-email-amwang@redhat.com>

Warning: it is still a draft! :)

The output is ugly, I just want to prove it works.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 bridge/Makefile           |    2 +-
 bridge/br_common.h        |    1 +
 bridge/bridge.c           |    1 +
 bridge/mdb.c              |  200 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h |    3 +
 5 files changed, 206 insertions(+), 1 deletions(-)
 create mode 100644 bridge/mdb.c

diff --git a/bridge/Makefile b/bridge/Makefile
index 9a6743e..67aceb4 100644
--- a/bridge/Makefile
+++ b/bridge/Makefile
@@ -1,4 +1,4 @@
-BROBJ = bridge.o fdb.o monitor.o link.o
+BROBJ = bridge.o fdb.o monitor.o link.o mdb.o
 
 include ../Config
 
diff --git a/bridge/br_common.h b/bridge/br_common.h
index 718ecb9..67fd75c 100644
--- a/bridge/br_common.h
+++ b/bridge/br_common.h
@@ -5,6 +5,7 @@ extern int print_fdb(const struct sockaddr_nl *who,
 		     struct nlmsghdr *n, void *arg);
 
 extern int do_fdb(int argc, char **argv);
+extern int do_mdb(int argc, char **argv);
 extern int do_monitor(int argc, char **argv);
 
 extern int preferred_family;
diff --git a/bridge/bridge.c b/bridge/bridge.c
index e2c33b0..1fcd365 100644
--- a/bridge/bridge.c
+++ b/bridge/bridge.c
@@ -43,6 +43,7 @@ static const struct cmd {
 	int (*func)(int argc, char **argv);
 } cmds[] = {
 	{ "fdb", 	do_fdb },
+	{ "mdb", 	do_mdb },
 	{ "monitor",	do_monitor },
 	{ "help",	do_help },
 	{ 0 }
diff --git a/bridge/mdb.c b/bridge/mdb.c
new file mode 100644
index 0000000..7873128
--- /dev/null
+++ b/bridge/mdb.c
@@ -0,0 +1,200 @@
+/*
+ * Get mdb table with netlink
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <linux/if_bridge.h>
+#include <linux/if_ether.h>
+#include <linux/neighbour.h>
+#include <string.h>
+#include <arpa/inet.h>
+
+#include "libnetlink.h"
+#include "br_common.h"
+#include "rt_names.h"
+#include "utils.h"
+
+int filter_index;
+
+static void usage(void)
+{
+	fprintf(stderr, "       bridge mdb {show} [ dev DEV ]\n");
+	exit(-1);
+}
+
+/* Bridge multicast database attributes
+ * [MDBA_MDB] = {
+ *    [MDBA_MCADDR]
+ *    [MDBA_BRPORT] = {
+ *        [MDBA_BRPORT_NO]
+ *    }
+ * }
+ * [MDBA_ROUTER] = {
+ *    [MDBA_BRPORT] = {
+ *        [MDBA_BRPORT_NO]
+ *    }
+ * }
+ */
+enum {
+        MDBA_UNSPEC,
+        MDBA_MDB,
+        MDBA_ROUTER,
+        __MDBA_MAX,
+};
+#define MDBA_MAX (__MDBA_MAX - 1)
+
+enum {
+        MDBA_MDB_UNSPEC,
+        MDBA_MDB_MCADDR,
+        MDBA_MDB_BRPORT,
+        __MDBA_MDB_MAX,
+};
+#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
+
+enum {
+        MDBA_BRPORT_UNSPEC,
+        MDBA_BRPORT_NO,
+        __MDBA_BRPORT_MAX,
+};
+#define MDBA_BRPORT_MAX (__MDBA_BRPORT_MAX - 1)
+
+
+struct br_port_msg {
+        int ifindex;
+};
+
+#ifndef MDBA_RTA
+#define MDBA_RTA(r) \
+	((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct br_port_msg))))
+#endif
+
+static void br_print_ports(FILE *f, struct rtattr *attr)
+{
+	uint16_t *port_no;
+	struct rtattr *i;
+	int rem;
+
+	fprintf(f, "\n      { ");
+
+	rem = RTA_PAYLOAD(attr);
+	for (i = RTA_DATA(attr); RTA_OK(i, rem); i = RTA_NEXT(i, rem)) {
+		port_no = RTA_DATA(i);
+		fprintf(f, "%u ", *port_no);
+	}
+	fprintf(f, "} ");
+}
+
+int print_mdb(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+	FILE *fp = arg;
+	struct br_port_msg *r = NLMSG_DATA(n);
+	int len = n->nlmsg_len;
+	struct rtattr * tb[MDBA_MAX+1];
+	struct rtattr * ports[MDBA_MDB_MAX+1];
+	SPRINT_BUF(abuf);
+
+	if (n->nlmsg_type != RTM_GETMDB) {
+		fprintf(stderr, "Not RTM_MDB: %08x %08x %08x\n",
+			n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+
+		return 0;
+	}
+
+	len -= NLMSG_LENGTH(sizeof(*r));
+	if (len < 0) {
+		fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+		return -1;
+	}
+
+	if (filter_index && filter_index != r->ifindex)
+		return 0;
+
+	if (!filter_index && r->ifindex)
+		fprintf(fp, "bridge dev %s ", ll_index_to_name(r->ifindex));
+
+	parse_rtattr(tb, MDBA_MAX, MDBA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+
+	if (tb[MDBA_MDB]) {
+		parse_rtattr_nested(ports, MDBA_MDB_MAX, tb[MDBA_MDB]);
+
+		if (ports[MDBA_MDB_MCADDR]) {
+			uint32_t addr = rta_getattr_u32(ports[MDBA_MDB_MCADDR]);
+			fprintf(fp, "multicast addr: %s ", inet_ntop(AF_INET, &addr, abuf, sizeof(abuf)));
+		}
+
+		if (ports[MDBA_MDB_BRPORT])
+			br_print_ports(fp, ports[MDBA_MDB_BRPORT]);
+	}
+
+	if (tb[MDBA_ROUTER]) {
+		parse_rtattr_nested(ports, MDBA_MDB_MAX, tb[MDBA_ROUTER]);
+
+		if (ports[MDBA_MDB_BRPORT])
+			br_print_ports(fp, ports[MDBA_MDB_BRPORT]);
+	}
+
+	fprintf(fp, "\n");
+	return 0;
+}
+
+static int mdb_show(int argc, char **argv)
+{
+	char *filter_dev = NULL;
+
+	while (argc > 0) {
+		if (strcmp(*argv, "dev") == 0) {
+			NEXT_ARG();
+			if (filter_dev)
+				duparg("dev", *argv);
+			filter_dev = *argv;
+		}
+		argc--; argv++;
+	}
+
+	if (filter_dev) {
+		filter_index = if_nametoindex(filter_dev);
+		if (filter_index == 0) {
+			fprintf(stderr, "Cannot find device \"%s\"\n",
+				filter_dev);
+			return -1;
+		}
+	}
+
+	if (rtnl_wilddump_request(&rth, PF_BRIDGE, RTM_GETMDB) < 0) {
+		perror("Cannot send dump request");
+		exit(1);
+	}
+
+	if (rtnl_dump_filter(&rth, print_mdb, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		exit(1);
+	}
+
+	return 0;
+}
+
+int do_mdb(int argc, char **argv)
+{
+	ll_init_map(&rth);
+
+	if (argc > 0) {
+		if (matches(*argv, "show") == 0 ||
+		    matches(*argv, "lst") == 0 ||
+		    matches(*argv, "list") == 0)
+			return mdb_show(argc-1, argv+1);
+		if (matches(*argv, "help") == 0)
+			usage();
+	} else
+		return mdb_show(0, NULL);
+
+	fprintf(stderr, "Command \"%s\" is unknown, try \"bridge mdb help\".\n", *argv);
+	exit(-1);
+}
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0e3e0c1..f400b5e 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -120,6 +120,9 @@ enum {
 	RTM_SETDCB,
 #define RTM_SETDCB RTM_SETDCB
 
+	RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
-- 
1.7.7.6

^ permalink raw reply related

* [RFC PATCH 2/2] bridge: export multicast database via netlink
From: Cong Wang @ 2012-11-27  9:49 UTC (permalink / raw)
  To: netdev
  Cc: bridge, Cong Wang, Herbert Xu, Stephen Hemminger, David S. Miller,
	Thomas Graf, Jesper Dangaard Brouer
In-Reply-To: <1354009785-20014-1-git-send-email-amwang@redhat.com>

Based on net-next.

Warning: this patch is still a draft! :)

This patch exports bridge multicast database via netlink
message type RTM_GETMDB. Similar to fdb, but currently bridge-specific.
We may need to support modify multicast database too (RTM_{ADD,DEL}MDB).

So, the questions are:

1) Is this design okay?
2) Do we need to make it generic like fdb?
3) Do we need to support RTM_{ADD,DEL}MDB?

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 include/uapi/linux/if_bridge.h |   37 +++++++++
 include/uapi/linux/rtnetlink.h |    3 +
 net/bridge/Makefile            |    2 +-
 net/bridge/br_mdb.c            |  174 ++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_multicast.c      |    1 +
 net/bridge/br_private.h        |    1 +
 6 files changed, 217 insertions(+), 1 deletions(-)
 create mode 100644 net/bridge/br_mdb.c

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b388579..f5655ea 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -116,4 +116,41 @@ enum {
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
+
+/* Bridge multicast database attributes
+ * [MDBA_MDB] = {
+ *    [MDBA_MCADDR]
+ *    [MDBA_BRPORT] = {
+ *        [MDBA_BRPORT_NO]
+ *    }
+ * }
+ * [MDBA_ROUTER] = {
+ *    [MDBA_BRPORT] = {
+ *        [MDBA_BRPORT_NO]
+ *    }
+ * }
+ */
+enum {
+	MDBA_UNSPEC,
+	MDBA_MDB,
+	MDBA_ROUTER,
+	__MDBA_MAX,
+};
+#define MDBA_MAX (__MDBA_MAX - 1)
+
+enum {
+	MDBA_MDB_UNSPEC,
+	MDBA_MDB_MCADDR,
+	MDBA_MDB_BRPORT,
+	__MDBA_MDB_MAX,
+};
+#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1)
+
+enum {
+	MDBA_BRPORT_UNSPEC,
+	MDBA_BRPORT_NO,
+	__MDBA_BRPORT_MAX,
+};
+#define MDBA_BRPORT_MAX (__MDBA_BRPORT_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_BRIDGE_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 3dee071..0df623f 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -125,6 +125,9 @@ enum {
 	RTM_GETNETCONF = 82,
 #define RTM_GETNETCONF RTM_GETNETCONF
 
+	RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index d0359ea..e859098 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -12,6 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
 
 bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
-bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
new file mode 100644
index 0000000..dc73091
--- /dev/null
+++ b/net/bridge/br_mdb.c
@@ -0,0 +1,174 @@
+#include <linux/err.h>
+#include <linux/if_ether.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#include "br_private.h"
+
+struct br_port_msg {
+	int ifindex;
+};
+
+static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+			       u32 seq, struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *p;
+	struct hlist_node *n;
+	struct nlattr *nest, *nest2;
+
+	if (!br->multicast_router || hlist_empty(&br->router_list)) {
+		printk(KERN_INFO "no router on bridge\n");
+		return 0;
+	}
+
+	nest = nla_nest_start(skb, MDBA_ROUTER);
+	if (nest == NULL)
+		return -EMSGSIZE;
+	nest2 = nla_nest_start(skb, MDBA_MDB_BRPORT);
+	if (nest2 == NULL)
+		goto fail;
+
+	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+		if (p && nla_put_u16(skb, MDBA_BRPORT_NO, p->port_no)) {
+			nla_nest_cancel(skb, nest2);
+			goto fail;
+		}
+	}
+
+	nla_nest_end(skb, nest2);
+	nla_nest_end(skb, nest);
+	return 0;
+fail:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
+			    u32 seq, struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_mdb_htable *mdb;
+	struct nlattr *nest, *nest2;
+	unsigned int i;
+
+	if (br->multicast_disabled) {
+		printk(KERN_INFO "multicast is disabled on bridge\n");
+		return 0;
+	}
+
+	mdb = rcu_dereference(br->mdb);
+	if (!mdb) {
+		printk(KERN_INFO "no mdb on bridge\n");
+		return 0;
+	}
+
+	nest = nla_nest_start(skb, MDBA_MDB);
+	if (nest == NULL)
+		return -EMSGSIZE;
+
+	for (i = 0; i < mdb->max; i++) {
+		struct hlist_node *h;
+		struct net_bridge_mdb_entry *mp;
+		struct net_bridge_port_group *p, **pp;
+		struct net_bridge_port *port;
+
+		hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
+			if (nla_put_be32(skb, MDBA_MDB_MCADDR, mp->addr.u.ip4))
+				goto fail;
+
+			nest2 = nla_nest_start(skb, MDBA_MDB_BRPORT);
+			if (nest2 == NULL)
+				goto fail;
+
+			for (pp = &mp->ports;
+			     (p = rcu_dereference(*pp)) != NULL;
+			      pp = &p->next) {
+				port = p->port;
+				if (port) {
+					printk(KERN_INFO "port %u, mcaddr: %pI4\n", port->port_no, &mp->addr.u.ip4);
+					if (nla_put_u16(skb, MDBA_BRPORT_NO, port->port_no)) {
+						goto fail;
+					}
+				}
+			}
+
+			nla_nest_end(skb, nest2);
+		}
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+fail:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net_device *dev;
+	struct net *net = sock_net(skb->sk);
+	struct nlmsghdr *nlh;
+	u32 seq = cb->nlh->nlmsg_seq;
+	int idx = 0, s_idx;
+
+	s_idx = cb->args[0];
+
+	rcu_read_lock();
+	cb->seq = net->dev_base_seq;
+
+	for_each_netdev_rcu(net, dev) {
+		if (dev->priv_flags & IFF_EBRIDGE) {
+			struct br_port_msg *bpm;
+
+			if (idx < s_idx)
+				goto cont;
+
+			nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+					seq, RTM_GETMDB,
+					sizeof(*bpm), NLM_F_MULTI);
+			if (nlh == NULL)
+				break;
+
+			bpm = nlmsg_data(nlh);
+			bpm->ifindex = dev->ifindex;
+			if (br_mdb_fill_info(skb, cb, seq, dev) < 0) {
+				printk(KERN_INFO "br_mdb_fill_info failed\n");
+				goto fail;
+			}
+			if (br_rports_fill_info(skb, cb, seq, dev) < 0) {
+				printk(KERN_INFO "br_rports_fill_info failed\n");
+				goto fail;
+			}
+
+			nlmsg_end(skb, nlh);
+cont:
+			idx++;
+		}
+	}
+
+	rcu_read_unlock();
+	cb->args[0] = idx;
+	return skb->len;
+
+fail:
+	rcu_read_unlock();
+	nlmsg_cancel(skb, nlh);
+	return skb->len;
+}
+
+void br_mdb_init(void)
+{
+	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
+}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2417434..be69cd4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1584,6 +1584,7 @@ void br_multicast_init(struct net_bridge *br)
 		    br_multicast_querier_expired, (unsigned long)br);
 	setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
 		    (unsigned long)br);
+	br_mdb_init();
 }
 
 void br_multicast_open(struct net_bridge *br)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index eb9cd42..bf0f6d5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -432,6 +432,7 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p,
 extern int br_multicast_toggle(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
 extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+extern void br_mdb_init(void);
 
 static inline bool br_multicast_is_router(struct net_bridge *br)
 {
-- 
1.7.7.6

^ permalink raw reply related

* [RFC PATCH 1/2] bridge: export port_no and port_id via IFA_INFO_DATA
From: Cong Wang @ 2012-11-27  9:49 UTC (permalink / raw)
  To: netdev
  Cc: Cong Wang, bridge, Herbert Xu, Jesper Dangaard Brouer,
	Thomas Graf, Stephen Hemminger, David S. Miller

Based on net-next.

This patch exports port->port_no port->port_id in the end of IFA_INFO_DATA.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Cong Wang <amwang@redhat.com>

---
 include/uapi/linux/if_link.h |    2 ++
 net/bridge/br_netlink.c      |    8 +++++++-
 2 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bb58aeb..9cd91a9 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -218,6 +218,8 @@ enum {
 	IFLA_BRPORT_MODE,	/* mode (hairpin)          */
 	IFLA_BRPORT_GUARD,	/* bpdu guard              */
 	IFLA_BRPORT_PROTECT,	/* root port protection    */
+	IFLA_BRPORT_NO,		/* port no                 */
+	IFLA_BRPORT_ID,		/* port id                 */
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 65429b9..7b7414e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -28,6 +28,8 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_MODE */
 		+ nla_total_size(1)	/* IFLA_BRPORT_GUARD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROTECT */
+		+ nla_total_size(2)	/* IFLA_BRPORT_NO */
+		+ nla_total_size(2)	/* IFLA_BRPORT_ID */
 		+ 0;
 }
 
@@ -53,7 +55,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) ||
 	    nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
 	    nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)))
+	    nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
+	    nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) ||
+	    nla_put_u16(skb, IFLA_BRPORT_ID, p->port_id))
 		return -EMSGSIZE;
 
 	return 0;
@@ -168,6 +172,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_MODE]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_GUARD]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_PROTECT]	= { .type = NLA_U8 },
+	[IFLA_BRPORT_NO]	= { .type = NLA_U16 },
+	[IFLA_BRPORT_ID]	= { .type = NLA_U16 },
 };
 
 /* Change the state of the port and notify spanning tree */
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 2/2] ewrk3: remove outdated comment
From: Paul Bolle @ 2012-11-27  9:48 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, richard -rw- weinberger

Remove an outdated comment, that should have been removed in the
patch named "MODULE_PARM conversions" from early 2005.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
---
 drivers/net/ethernet/dec/ewrk3.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/dec/ewrk3.c b/drivers/net/ethernet/dec/ewrk3.c
index 2ee1c47..9f992b9 100644
--- a/drivers/net/ethernet/dec/ewrk3.c
+++ b/drivers/net/ethernet/dec/ewrk3.c
@@ -1910,7 +1910,6 @@ static struct net_device *ewrk3_devs[MAX_NUM_EWRK3S];
 static int ndevs;
 static int io[MAX_NUM_EWRK3S+1] = { 0x300, 0, };
 
-/* '21' below should really be 'MAX_NUM_EWRK3S' */
 module_param_array(io, int, NULL, 0);
 module_param_array(irq, byte, NULL, 0);
 MODULE_PARM_DESC(io, "EtherWORKS 3 I/O base address(es)");
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH 1/2] ewrk3: silence GCC warning
From: Paul Bolle @ 2012-11-27  9:47 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, richard -rw- weinberger

Building ewrk3.o triggers this GCC warning:
    drivers/net/ethernet/dec/ewrk3.c: In function '__check_irq':
    drivers/net/ethernet/dec/ewrk3.c:1915:1: warning: return from incompatible pointer type [enabled by default]

This can be trivially fixed by changing the 'irq' parameter from int to
byte (which is an alias for unsigned char for module parameters).

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
---
Only compile tested.

 drivers/net/ethernet/dec/ewrk3.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/dec/ewrk3.c b/drivers/net/ethernet/dec/ewrk3.c
index 17ae8c6..2ee1c47 100644
--- a/drivers/net/ethernet/dec/ewrk3.c
+++ b/drivers/net/ethernet/dec/ewrk3.c
@@ -1912,7 +1912,7 @@ static int io[MAX_NUM_EWRK3S+1] = { 0x300, 0, };
 
 /* '21' below should really be 'MAX_NUM_EWRK3S' */
 module_param_array(io, int, NULL, 0);
-module_param_array(irq, int, NULL, 0);
+module_param_array(irq, byte, NULL, 0);
 MODULE_PARM_DESC(io, "EtherWORKS 3 I/O base address(es)");
 MODULE_PARM_DESC(irq, "EtherWORKS 3 IRQ number(s)");
 
-- 
1.7.7.6

^ permalink raw reply related

* Re: BQL support in gianfar causes network hickup
From: Keitel, Tino (ALC NetworX GmbH) @ 2012-11-27  9:36 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tino Keitel, Paul Gortmaker, netdev@vger.kernel.org
In-Reply-To: <1353950255.7553.6.camel@edumazet-glaptop>

On Mo, 2012-11-26 at 09:17 -0800, Eric Dumazet wrote:
> On Mon, 2012-11-26 at 18:08 +0100, Keitel, Tino (ALC NetworX GmbH)
> wrote:
> > On Mo, 2012-11-26 at 08:34 -0800, Eric Dumazet wrote:
> > > On Mon, 2012-11-26 at 11:01 +0100, Tino Keitel wrote:
> > > > On Sat, Nov 24, 2012 at 15:43:36 -0800, Eric Dumazet wrote:
> > > > 
> > > > [...]
> > > > 
> > > > > Hmm, I wonder if BQL makes a particular bug showing more often.
> > > > > 
> > > > > I see gianfar uses a very small watchdog_timeo of 1 second, while many
> > > > > drivers use 5 seconds.
> > > > > 
> > > > > What happens if you change this to 5 seconds ?
> > > > 
> > > > I still got the trace and a failing ptp client.
> > > > 
> > > 
> > > Thanks. Is this bug easy to trigger ?
> > > 
> > > I suspect a core issue and a race, likely to happen on your (non x86)
> > > hardware
> > > 
> > > Could you add the following debugging patch ?
> > 
> > No visible difference:
> 
> OK it seems you trigger the problem fast !
> 
> Please try the following as well :

Hi,

yes, it can be triggered within 2 minutes.

The patch makes no difference:

NETDEV WATCHDOG: eth1 (fsl-gianfar): transmit queue 0 timed out
------------[ cut here ]------------
WARNING:
at /home/keitelt1/src/git/linux-stable/net/sched/sch_generic.c:255
Modules linked in:
NIP: c02448e0 LR: c02448e0 CTR: c01c19b8
REGS: c7ffbe40 TRAP: 0700   Not tainted  (3.7.0-rc6-dirty)
MSR: 00029032 <EE,ME,IR,DR,RI>  CR: 22002044  XER: 20000000
TASK = c03dd370[0] 'swapper' THREAD: c03fe000
GPR00: c02448e0 c7ffbef0 c03dd370 0000003f 00000001 c001aea8 00000000
00000001
GPR08: 00000001 c03e0000 00000000 0000009d 22002084 1008eb5c 07ffb000
ffffffff
GPR16: 00000004 c0362c7c c03dfbf8 00200000 c0411ed0 c0411cd0 c0411ad0
ffffffff
GPR24: 00000000 c749e1d8 00000004 c783c330 c0400000 c03e0000 c749e000
00000000
NIP [c02448e0] dev_watchdog+0x288/0x298
LR [c02448e0] dev_watchdog+0x288/0x298
Call Trace:
[c7ffbef0] [c02448e0] dev_watchdog+0x288/0x298 (unreliable)
[c7ffbf20] [c00267f8] call_timer_fn+0x6c/0xd8
[c7ffbf50] [c00269e4] run_timer_softirq+0x180/0x1f8
[c7ffbfa0] [c0021144] __do_softirq+0xc4/0x160
[c7ffbff0] [c000d0b8] call_do_softirq+0x14/0x24
[c03ffe90] [c00058e8] do_softirq+0x8c/0xb8
[c03ffeb0] [c0021358] irq_exit+0x98/0xb4
[c03ffec0] [c0009fb0] timer_interrupt+0x158/0x170
[c03ffee0] [c000f02c] ret_from_except+0x0/0x14
--- Exception: 901 at cpu_idle+0x94/0x100
    LR = cpu_idle+0x94/0x100
[c03fffa0] [c00088ec] cpu_idle+0x5c/0x100 (unreliable)
[c03fffc0] [c03b37b0] start_kernel+0x2dc/0x2f0
[c03ffff0] [00003438] 0x3438
Instruction dump:
7d2903a6 4e800421 80fe01fc 4bffff74 7fc3f378 4bfecb7d 7fc4f378 7fe6fb78
7c651b78 3c60c038 38637280 48090e69 <0fe00000> 39200001 993cc7c9
4bffffb8
---[ end trace 26a9da9c2717d65b ]---

Regards,
Tino



^ permalink raw reply

* TCP and reordering
From: Saku Ytti @ 2012-11-27  9:32 UTC (permalink / raw)
  To: netdev

Today due to fast retransmit performance on links which cause
reordering is appalling.

Is it too esoteric situation to handle gracefully? Couldn't we
maintain 'reorder' counter in socket, which is increment when we get
two copies of same packet after duplicate ack, if this counter is
sufficiently high in relation to packet loss, we could start delaying
duplicate acks as we'd expect to receive the sequence very soon.

--
  ++ytti

^ permalink raw reply

* Re: [RFC net-next PATCH V1 7/9] net: frag queue locking per hash bucket
From: Jesper Dangaard Brouer @ 2012-11-27  9:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Florian Westphal, netdev, Pablo Neira Ayuso,
	Thomas Graf, Cong Wang, Patrick McHardy, Paul E. McKenney,
	Herbert Xu
In-Reply-To: <20121123130836.18764.9297.stgit@dragon>

On Fri, 2012-11-23 at 14:08 +0100, Jesper Dangaard Brouer wrote:
> DO NOT apply - patch not finished, can cause on OOPS/PANIC during hash rebuild

Think I have fixed this issue.  And its even fixed in this patch, as the
oops occurred, when testing, with a slightly older version of this
patch.


> This patch implements per hash bucket locking for the frag queue
> hash.  This removes two write locks, and the only remaining write
> lock is for protecting hash rebuild.  This essentially reduce the
> readers-writer lock to a rebuild lock.

[... cut ...]
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index 1620a21..447423f 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
[...]
> @@ -102,9 +112,17 @@ EXPORT_SYMBOL(inet_frags_exit_net);
>  
>  static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
>  {
> -	write_lock(&f->lock);
> +	struct inet_frag_bucket *hb;
> +	unsigned int hash;
> +
> +	read_lock(&f->lock);
> +	hash = f->hashfn(fq);
> +	hb = &f->hash[hash];

Before the read_lock, were _here_ below then hash calc.  Which is wrong,
and caused the oops, as the hash result can change, when rnd is changed,
during hash rebuild.

> +
> +	spin_lock_bh(&hb->chain_lock);
>  	hlist_del(&fq->list);
> -	write_unlock(&f->lock);
> +	spin_unlock_bh(&hb->chain_lock);
> +	read_unlock(&f->lock);
>  	inet_frag_lru_del(fq);
>  }

[... cut ...]

^ permalink raw reply

* pull-request: can-next 2012-11-27
From: Marc Kleine-Budde @ 2012-11-27  8:57 UTC (permalink / raw)
  To: netdev; +Cc: linux-can@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 2876 bytes --]

Hello David,

this is pull request is for net-next. Contains a patch by Andreas
Larsson, which enables the sja1000 of driver to work under sparc.
AnilKumar Ch contributed a patch to improve the c_can support under
omap, Olivier Sobrie's patch brings support for the CAN/USB dongles
from Kvaser. In a bunch of patches by me missing MODULE_ALIAS and/or
MODULE_DEVICE_TABLE entries were added to the CAN drivers.

regards,
Marc

---

The following changes since commit 03f52a0a554210d5049eeed9f1bb29047dc807cb:

  ip6mr: Add sizeof verification to MRT6_ASSERT and MT6_PIM (2012-11-26 17:35:58 -0500)

are available in the git repository at:

  git://gitorious.org/linux-can/linux-can-next.git for-davem

for you to fetch changes up to fc8f40b10e58f37ec07e7b0cf85d19e899bdf23f:

  can: mpc5xxx_can: add MODULE_DEVICE_TABLE (2012-11-27 09:49:36 +0100)

----------------------------------------------------------------
Andreas Larsson (1):
      can: sja1000: Make sja1000_of_platform selectable and compilable on SPARC

AnilKumar Ch (1):
      can: c_can: Add d_can raminit support

Marc Kleine-Budde (8):
      can: bfin_can: add MODULE_ALIAS
      can: ti_hecc: add MODULE_ALIAS
      can: sja1000_platform: add MODULE_ALIAS
      can: cc770_platform: add MODULE_ALIAS and MODULE_DEVICE_TABLE
      can: flexcan: add MODULE_DEVICE_TABLE
      can: at91_can: add MODULE_DEVICE_TABLE
      can: c_can_platform: add MODULE_DEVICE_TABLE
      can: mpc5xxx_can: add MODULE_DEVICE_TABLE

Olivier Sobrie (1):
      can: kvaser_usb: Add support for Kvaser CAN/USB devices

 drivers/net/can/at91_can.c                    |    1 +
 drivers/net/can/bfin_can.c                    |    1 +
 drivers/net/can/c_can/c_can.c                 |   12 +
 drivers/net/can/c_can/c_can.h                 |    3 +
 drivers/net/can/c_can/c_can_platform.c        |   30 +-
 drivers/net/can/cc770/cc770_platform.c        |    2 +
 drivers/net/can/flexcan.c                     |    2 +
 drivers/net/can/mscan/mpc5xxx_can.c           |    1 +
 drivers/net/can/sja1000/Kconfig               |    2 +-
 drivers/net/can/sja1000/sja1000_of_platform.c |    6 +-
 drivers/net/can/sja1000/sja1000_platform.c    |    1 +
 drivers/net/can/ti_hecc.c                     |    1 +
 drivers/net/can/usb/Kconfig                   |   29 +
 drivers/net/can/usb/Makefile                  |    1 +
 drivers/net/can/usb/kvaser_usb.c              | 1627 +++++++++++++++++++++++++
 15 files changed, 1715 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/can/usb/kvaser_usb.c

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 261 bytes --]

^ permalink raw reply

* Re: [net-next 06/10] ixgbe: eliminate Smatch warnings in ixgbe_debugfs.c
From: Dan Carpenter @ 2012-11-27  7:18 UTC (permalink / raw)
  To: Hay, Joshua A
  Cc: Kirsher, Jeffrey T, davem@davemloft.net, netdev@vger.kernel.org,
	gospo@redhat.com, sassmann@redhat.com
In-Reply-To: <4329753313F5A742A09953EE6CC1B3CE28C7D984@ORSMSX108.amr.corp.intel.com>

On Tue, Nov 27, 2012 at 01:06:17AM +0000, Hay, Joshua A wrote:
> The return value will be changed to len to preserve error codes returned from simple_write_to_buffer.
> 
> However, changing the logic preceding this return breaks these functions.  If simple_write_to_buffer returns a positive value, other actions are performed with this value.  With this patch, the function will return immediately with that value instead.  This will effectively break the ixgbe_debugfs write operations.
> 
> So ultimately, the change should be: 
> > +	len = simple_write_to_buffer(ixgbe_dbg_reg_ops_buf,
> > +				     sizeof(ixgbe_dbg_reg_ops_buf)-1,
> > +				     ppos,
> > +				     buffer,
> > +				     count);
> > +	if (len < 0)
> > +		return -EFAULT;
> 	
> 	if (len < 0)
> 		return len;
> 

Yes.  Sorry, I wasn't reading carefully before.  That looks fine.

regards,
dan carpenter

^ permalink raw reply

* Re: private netdev flags into UAPI?
From: Or Gerlitz @ 2012-11-27  6:51 UTC (permalink / raw)
  To: David Howells; +Cc: Or Gerlitz, netdev
In-Reply-To: <990.1353981789@warthog.procyon.org.uk>

On 27/11/2012 04:03, David Howells wrote:
> Or Gerlitz <or.gerlitz@gmail.com> wrote:
>
>> On Mon, Nov 26, 2012 at 11:22 AM, David Howells <dhowells@redhat.com> wrote:
>>> They were exposed to userspace already
>> So the script carries the bug into a new directory... why? AFAIK,
>> intentionally there's no way to read private flags from user space, so
>> what's the point in defining them there?
> How should the script know what's private and what's not?  By the
> encapsulation of code inside __KERNEL__ blocks.  In their absence, everything
> is assumed to be public - given it is already part of the UAPI.  I don't know
> that the code is private rather than the comment is wrong.
>
>

makes sense, but I have pointed on a bug in the final result, so this 
way or another, the fact that the bug
existed before doesn't mean we should carry it over.

Or.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox