Netdev List
 help / color / mirror / Atom feed
* Re: linux-next: build failure after merge of the net-next tree
From: viresh kumar @ 2012-06-20  8:45 UTC (permalink / raw)
  To: David Miller
  Cc: bhupesh.sharma@st.com, sfr@canb.auug.org.au,
	netdev@vger.kernel.org, linux-next@vger.kernel.org,
	linux-kernel@vger.kernel.org, federico.vaga@gmail.com,
	giancarlo.asnaghi@st.com, wg@grandegger.com, mkl@pengutronix.de,
	spear-devel@list.st.com
In-Reply-To: <20120620.012037.783895812206310043.davem@davemloft.net>

On 20/06/12 09:20, David Miller wrote:
> From: viresh kumar <viresh.kumar2@arm.com>
> Date: Wed, 20 Jun 2012 09:08:34 +0100
>
>> > Please see following patchset from me, that got applied in linux-next
>> >
>> > https://lkml.org/lkml/2012/4/24/154
>> >
>> > Please check if this patchset is present in your build repo. I believe it should be
>> > there. If it is, then you shouldn't get these errors.
> Well, then Stephen shouldn't get those errors either.
>
> But obviously he did.

Surprised. I am not able to get why this happen. Because we don't have HAVE_CLK defined
for x86_64, then the inline routines like following should come into play:

static inline unsigned long clk_get_rate(struct clk *clk)
{
        return 0;
}


Is there any tricky part for such routines with drivers compiled as modules?

--
Viresh

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

^ permalink raw reply

* [PATCH net-next 0/6] bnx2x: link patch series
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz

Hi Dave,

This patch series change bnx2x's link code. It contains both
semantic cleanup of the code as well as several patches that
add new support for optical modules - better 1G SFP support,
SFP+ Tx Fault support and implementation of the new ethtool API
for accessing the module's eeprom (on the bnx2x side).

Please consider applying this patch-series to 'net-next'.

Thanks,
Yuval Mintz

^ permalink raw reply

* [PATCH net-next 1/6] bnx2x: sfp+ Tx fault detection added
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz
In-Reply-To: <1340182175-916-1-git-send-email-yuvalmin@broadcom.com>

Adds the ability to identify sfp+ modules' Tx fault, and when such
occur shut down the link.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h  |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |  112 ++++++++++++++++++---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h |    1 +
 3 files changed, 99 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 6b77630..e7c390c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -1150,6 +1150,7 @@ struct drv_port_mb {
 	u32 link_status;
 	/* Driver should update this field on any link change event */
 
+	#define LINK_STATUS_NONE				(0<<0)
 	#define LINK_STATUS_LINK_FLAG_MASK			0x00000001
 	#define LINK_STATUS_LINK_UP				0x00000001
 	#define LINK_STATUS_SPEED_AND_DUPLEX_MASK		0x0000001E
@@ -1207,6 +1208,7 @@ struct drv_port_mb {
 	#define LINK_STATUS_PFC_ENABLED				0x20000000
 
 	#define LINK_STATUS_PHYSICAL_LINK_FLAG			0x40000000
+	#define LINK_STATUS_SFP_TX_FAULT			0x80000000
 
 	u32 port_stx;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 91aa565..9e008e4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -4746,6 +4746,8 @@ void bnx2x_sync_link(struct link_params *params,
 		vars->mac_type = MAC_TYPE_NONE;
 		if (vars->link_status & LINK_STATUS_PHYSICAL_LINK_FLAG)
 			vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
+		if (vars->link_status & LINK_STATUS_SFP_TX_FAULT)
+			vars->phy_flags |= PHY_SFP_TX_FAULT_FLAG;
 	}
 }
 
@@ -12925,30 +12927,41 @@ static void bnx2x_check_over_curr(struct link_params *params,
 		vars->phy_flags &= ~PHY_OVER_CURRENT_FLAG;
 }
 
-static void bnx2x_analyze_link_error(struct link_params *params,
-				     struct link_vars *vars, u32 lss_status,
-				     u8 notify)
+/* Returns 0 if no change occured since last check; 1 otherwise. */
+static u8 bnx2x_analyze_link_error(struct link_params *params,
+				    struct link_vars *vars, u32 status,
+				    u32 phy_flag, u32 link_flag, u8 notify)
 {
 	struct bnx2x *bp = params->bp;
 	/* Compare new value with previous value */
 	u8 led_mode;
-	u32 half_open_conn = (vars->phy_flags & PHY_HALF_OPEN_CONN_FLAG) > 0;
+	u32 old_status = (vars->phy_flags & phy_flag) ? 1 : 0;
 
-	if ((lss_status ^ half_open_conn) == 0)
-		return;
+	if ((status ^ old_status) == 0)
+		return 0;
 
 	/* If values differ */
-	DP(NETIF_MSG_LINK, "Link changed:%x %x->%x\n", vars->link_up,
-		       half_open_conn, lss_status);
+	switch (phy_flag) {
+	case PHY_HALF_OPEN_CONN_FLAG:
+		DP(NETIF_MSG_LINK, "Analyze Remote Fault\n");
+		break;
+	case PHY_SFP_TX_FAULT_FLAG:
+		DP(NETIF_MSG_LINK, "Analyze TX Fault\n");
+		break;
+	default:
+		DP(NETIF_MSG_LINK, "Analyze UNKOWN\n");
+	}
+	DP(NETIF_MSG_LINK, "Link changed:[%x %x]->%x\n", vars->link_up,
+	   old_status, status);
 
 	/* a. Update shmem->link_status accordingly
 	 * b. Update link_vars->link_up
 	 */
-	if (lss_status) {
-		DP(NETIF_MSG_LINK, "Remote Fault detected !!!\n");
+	if (status) {
 		vars->link_status &= ~LINK_STATUS_LINK_UP;
+		vars->link_status |= link_flag;
 		vars->link_up = 0;
-		vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
+		vars->phy_flags |= phy_flag;
 
 		/* activate nig drain */
 		REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 1);
@@ -12957,10 +12970,10 @@ static void bnx2x_analyze_link_error(struct link_params *params,
 		 */
 		led_mode = LED_MODE_OFF;
 	} else {
-		DP(NETIF_MSG_LINK, "Remote Fault cleared\n");
 		vars->link_status |= LINK_STATUS_LINK_UP;
+		vars->link_status &= ~link_flag;
 		vars->link_up = 1;
-		vars->phy_flags &= ~PHY_HALF_OPEN_CONN_FLAG;
+		vars->phy_flags &= ~phy_flag;
 		led_mode = LED_MODE_OPER;
 
 		/* Clear nig drain */
@@ -12977,6 +12990,8 @@ static void bnx2x_analyze_link_error(struct link_params *params,
 	vars->periodic_flags |= PERIODIC_FLAGS_LINK_EVENT;
 	if (notify)
 		bnx2x_notify_link_changed(bp);
+
+	return 1;
 }
 
 /******************************************************************************
@@ -13018,7 +13033,9 @@ int bnx2x_check_half_open_conn(struct link_params *params,
 		if (REG_RD(bp, mac_base + XMAC_REG_RX_LSS_STATUS))
 			lss_status = 1;
 
-		bnx2x_analyze_link_error(params, vars, lss_status, notify);
+		bnx2x_analyze_link_error(params, vars, lss_status,
+					 PHY_HALF_OPEN_CONN_FLAG,
+					 LINK_STATUS_NONE, notify);
 	} else if (REG_RD(bp, MISC_REG_RESET_REG_2) &
 		   (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << params->port)) {
 		/* Check E1X / E2 BMAC */
@@ -13035,11 +13052,55 @@ int bnx2x_check_half_open_conn(struct link_params *params,
 		REG_RD_DMAE(bp, mac_base + lss_status_reg, wb_data, 2);
 		lss_status = (wb_data[0] > 0);
 
-		bnx2x_analyze_link_error(params, vars, lss_status, notify);
+		bnx2x_analyze_link_error(params, vars, lss_status,
+					 PHY_HALF_OPEN_CONN_FLAG,
+					 LINK_STATUS_NONE, notify);
 	}
 	return 0;
 }
+static void bnx2x_sfp_tx_fault_detection(struct bnx2x_phy *phy,
+					 struct link_params *params,
+					 struct link_vars *vars)
+{
+	struct bnx2x *bp = params->bp;
+	u32 cfg_pin, value = 0;
+	u8 led_change, port = params->port;
 
+	/* Get The SFP+ TX_Fault controlling pin ([eg]pio) */
+	cfg_pin = (REG_RD(bp, params->shmem_base + offsetof(struct shmem_region,
+			  dev_info.port_hw_config[port].e3_cmn_pin_cfg)) &
+		   PORT_HW_CFG_E3_TX_FAULT_MASK) >>
+		  PORT_HW_CFG_E3_TX_FAULT_SHIFT;
+
+	if (bnx2x_get_cfg_pin(bp, cfg_pin, &value)) {
+		DP(NETIF_MSG_LINK, "Failed to read pin 0x%02x\n", cfg_pin);
+		return;
+	}
+
+	led_change = bnx2x_analyze_link_error(params, vars, value,
+					      PHY_SFP_TX_FAULT_FLAG,
+					      LINK_STATUS_SFP_TX_FAULT, 1);
+
+	if (led_change) {
+		/* Change TX_Fault led, set link status for further syncs */
+		u8 led_mode;
+
+		if (vars->phy_flags & PHY_SFP_TX_FAULT_FLAG) {
+			led_mode = MISC_REGISTERS_GPIO_HIGH;
+			vars->link_status |= LINK_STATUS_SFP_TX_FAULT;
+		} else {
+			led_mode = MISC_REGISTERS_GPIO_LOW;
+			vars->link_status &= ~LINK_STATUS_SFP_TX_FAULT;
+		}
+
+		/* If module is unapproved, led should be on regardless */
+		if (!(phy->flags & FLAGS_SFP_NOT_APPROVED)) {
+			DP(NETIF_MSG_LINK, "Change TX_Fault LED: ->%x\n",
+			   led_mode);
+			bnx2x_set_e3_module_fault_led(params, led_mode);
+		}
+	}
+}
 void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
 {
 	u16 phy_idx;
@@ -13058,7 +13119,26 @@ void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
 		struct bnx2x_phy *phy = &params->phy[INT_PHY];
 		bnx2x_set_aer_mmd(params, phy);
 		bnx2x_check_over_curr(params, vars);
-		bnx2x_warpcore_config_runtime(phy, params, vars);
+		if (vars->rx_tx_asic_rst)
+			bnx2x_warpcore_config_runtime(phy, params, vars);
+
+		if ((REG_RD(bp, params->shmem_base +
+			    offsetof(struct shmem_region, dev_info.
+				port_hw_config[params->port].default_cfg))
+		    & PORT_HW_CFG_NET_SERDES_IF_MASK) ==
+		    PORT_HW_CFG_NET_SERDES_IF_SFI) {
+			if (bnx2x_is_sfp_module_plugged(phy, params)) {
+				bnx2x_sfp_tx_fault_detection(phy, params, vars);
+			} else if (vars->link_status &
+				LINK_STATUS_SFP_TX_FAULT) {
+				/* Clean trail, interrupt corrects the leds */
+				vars->link_status &= ~LINK_STATUS_SFP_TX_FAULT;
+				vars->phy_flags &= ~PHY_SFP_TX_FAULT_FLAG;
+				/* Update link status in the shared memory */
+				bnx2x_update_mng(params, vars->link_status);
+			}
+		}
+
 	}
 
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index e920800..cd1f271 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -307,6 +307,7 @@ struct link_vars {
 #define PHY_PHYSICAL_LINK_FLAG		(1<<2)
 #define PHY_HALF_OPEN_CONN_FLAG		(1<<3)
 #define PHY_OVER_CURRENT_FLAG		(1<<4)
+#define PHY_SFP_TX_FAULT_FLAG		(1<<5)
 
 	u8 mac_type;
 #define MAC_TYPE_NONE		0
-- 
1.7.9.rc2

^ permalink raw reply related

* [PATCH net-next 4/6] bnx2x: revised link register access
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz, Yaniv Rosner
In-Reply-To: <1340182175-916-1-git-send-email-yuvalmin@broadcom.com>

This is a semantic change, cleaning some sections in which the bnx2x
handles the phy's registers.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Yaniv Rosner <yaniv.rosner@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |  238 +++++++++-------------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h |    5 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h  |    1 +
 3 files changed, 100 insertions(+), 144 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 17ddb9f..fc7ff06 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -3755,44 +3755,35 @@ static u8 bnx2x_ext_phy_resolve_fc(struct bnx2x_phy *phy,
 static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 					struct link_params *params,
 					struct link_vars *vars) {
-	u16 val16 = 0, lane, bam37 = 0;
-	struct bnx2x *bp = params->bp;
+	u16 val16 = 0, lane, i;
+	struct bnx2x *bp = params->bp;
+	static struct bnx2x_reg_set reg_set[] = {
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7},
+		{MDIO_AN_DEVAD, MDIO_WC_REG_PAR_DET_10G_CTRL, 0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, 0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_XGXSBLK1_LANECTRL0, 0xff},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_XGXSBLK1_LANECTRL1, 0x5555},
+		{MDIO_PMA_DEVAD, MDIO_WC_REG_IEEE0BLK_AUTONEGNP, 0x0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_RX66_CONTROL, 0x7415},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_MISC2, 0x6190},
+		/* Disable Autoneg: re-enable it after adv is done. */
+		{MDIO_AN_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0}
+	};
 	DP(NETIF_MSG_LINK, "Enable Auto Negotiation for KR\n");
 	/* Set to default registers that may be overriden by 10G force */
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7);
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_PAR_DET_10G_CTRL, 0);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, 0);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_XGXSBLK1_LANECTRL0, 0xff);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_XGXSBLK1_LANECTRL1, 0x5555);
-	bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD,
-			 MDIO_WC_REG_IEEE0BLK_AUTONEGNP, 0x0);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_RX66_CONTROL, 0x7415);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_MISC2, 0x6190);
-	/* Disable Autoneg: re-enable it after adv is done. */
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0);
+	for (i = 0; i < sizeof(reg_set)/sizeof(struct bnx2x_reg_set); i++)
+		bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg,
+				 reg_set[i].val);
 
 	/* Check adding advertisement for 1G KX */
 	if (((vars->line_speed == SPEED_AUTO_NEG) &&
 	     (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) ||
 	    (vars->line_speed == SPEED_1000)) {
-		u16 sd_digital;
+		u32 addr = MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2;
 		val16 |= (1<<5);
 
 		/* Enable CL37 1G Parallel Detect */
-		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, &sd_digital);
-		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
-				 (sd_digital | 0x1));
-
+		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, addr, 0x1);
 		DP(NETIF_MSG_LINK, "Advertize 1G\n");
 	}
 	if (((vars->line_speed == SPEED_AUTO_NEG) &&
@@ -3802,7 +3793,7 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 		val16 |= (1<<7);
 		/* Enable 10G Parallel Detect */
 		bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-				MDIO_WC_REG_PAR_DET_10G_CTRL, 1);
+				 MDIO_WC_REG_PAR_DET_10G_CTRL, 1);
 
 		DP(NETIF_MSG_LINK, "Advertize 10G\n");
 	}
@@ -3836,10 +3827,9 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 		   offsetof(struct shmem_region, dev_info.
 			    port_hw_config[params->port].default_cfg)) &
 	    PORT_HW_CFG_ENABLE_BAM_ON_KR_ENABLED) {
-		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_DIGITAL6_MP5_NEXTPAGECTRL, &bam37);
-		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_DIGITAL6_MP5_NEXTPAGECTRL, bam37 | 1);
+		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+					 MDIO_WC_REG_DIGITAL6_MP5_NEXTPAGECTRL,
+					 1);
 		DP(NETIF_MSG_LINK, "Enable CL37 BAM on KR\n");
 	}
 
@@ -3853,11 +3843,8 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "Enable AN KR work-around\n");
 		vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY;
 	}
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_DIGITAL5_MISC7, &val16);
-
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_DIGITAL5_MISC7, val16 | 0x100);
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_DIGITAL5_MISC7, 0x100);
 
 	/* Over 1G - AN local device user page 1 */
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
@@ -3874,50 +3861,35 @@ static void bnx2x_warpcore_set_10G_KR(struct bnx2x_phy *phy,
 				      struct link_vars *vars)
 {
 	struct bnx2x *bp = params->bp;
-	u16 val;
-
-	/* Disable Autoneg */
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7);
-
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_PAR_DET_10G_CTRL, 0);
-
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL, 0x3f00);
-
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, 0);
-
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
-			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x0);
-
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_DIGITAL3_UP1, 0x1);
-
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_DIGITAL5_MISC7, 0xa);
-
-	/* Disable CL36 PCS Tx */
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_XGXSBLK1_LANECTRL0, 0x0);
-
-	/* Double Wide Single Data Rate @ pll rate */
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_XGXSBLK1_LANECTRL1, 0xFFFF);
-
-	/* Leave cl72 training enable, needed for KR */
-	bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD,
+	u16 i;
+	static struct bnx2x_reg_set reg_set[] = {
+		/* Disable Autoneg */
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7},
+		{MDIO_AN_DEVAD, MDIO_WC_REG_PAR_DET_10G_CTRL, 0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL,
+			0x3f00},
+		{MDIO_AN_DEVAD, MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, 0},
+		{MDIO_AN_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL3_UP1, 0x1},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL5_MISC7, 0xa},
+		/* Disable CL36 PCS Tx */
+		{MDIO_WC_DEVAD, MDIO_WC_REG_XGXSBLK1_LANECTRL0, 0x0},
+		/* Double Wide Single Data Rate @ pll rate */
+		{MDIO_WC_DEVAD, MDIO_WC_REG_XGXSBLK1_LANECTRL1, 0xFFFF},
+		/* Leave cl72 training enable, needed for KR */
+		{MDIO_PMA_DEVAD,
 		MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150,
-		0x2);
+		0x2}
+	};
+
+	for (i = 0; i < sizeof(reg_set)/sizeof(struct bnx2x_reg_set); i++)
+		bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg,
+				 reg_set[i].val);
 
 	/* Leave CL72 enabled */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL,
-			 &val);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL,
-			 val | 0x3800);
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL,
+				 0x3800);
 
 	/* Set speed via PMA/PMD register */
 	bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD,
@@ -3953,16 +3925,12 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 misc1_val, tap_val, tx_driver_val, lane, val;
 	/* Hold rxSeqStart */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0, &val);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0, (val | 0x8000));
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0, 0x8000);
 
 	/* Hold tx_fifo_reset */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X3, &val);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X3, (val | 0x1));
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X3, 0x1);
 
 	/* Disable CL73 AN */
 	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0);
@@ -3974,10 +3942,8 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy,
 			 MDIO_WC_REG_FX100_CTRL1, (val & 0xFFFA));
 
 	/* Disable 100FX Idle detect */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_FX100_CTRL3, &val);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_FX100_CTRL3, (val | 0x0080));
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_FX100_CTRL3, 0x0080);
 
 	/* Set Block address to Remote PHY & Clear forced_speed[5] */
 	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
@@ -4038,16 +4004,12 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy,
 			 tx_driver_val);
 
 	/* Enable fiber mode, enable and invert sig_det */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, &val);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, val | 0xd);
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, 0xd);
 
 	/* Set Block address to Remote PHY & Set forced_speed[5], 40bit mode */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_DIGITAL4_MISC3, &val);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_DIGITAL4_MISC3, val | 0x8080);
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_DIGITAL4_MISC3, 0x8080);
 
 	/* Enable LPI pass through */
 	DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n");
@@ -4245,40 +4207,35 @@ static void bnx2x_warpcore_clear_regs(struct bnx2x_phy *phy,
 				      u16 lane)
 {
 	struct bnx2x *bp = params->bp;
-	u16 val16;
-
+	u16 i;
+	static struct bnx2x_reg_set wc_regs[] = {
+		{MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_FX100_CTRL1, 0x014a},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_FX100_CTRL3, 0x0800},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL4_MISC3, 0x8008},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1,
+			0x0195},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
+			0x0007},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X3,
+			0x0002},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_MISC1, 0x6000},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_TX_FIR_TAP, 0x0000},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x2040},
+		{MDIO_WC_DEVAD, MDIO_WC_REG_COMBO_IEEE0_MIICTRL, 0x0140}
+	};
 	/* Set XFI clock comp as default. */
-	bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-			MDIO_WC_REG_RX66_CONTROL, &val16);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_RX66_CONTROL, val16 | (3<<13));
+	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+				 MDIO_WC_REG_RX66_CONTROL, (3<<13));
+
+	for (i = 0; i < sizeof(wc_regs)/sizeof(struct bnx2x_reg_set); i++)
+		bnx2x_cl45_write(bp, phy, wc_regs[i].devad, wc_regs[i].reg,
+				 wc_regs[i].val);
 
-	bnx2x_warpcore_reset_lane(bp, phy, 1);
-	bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_FX100_CTRL1, 0x014a);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_FX100_CTRL3, 0x0800);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_DIGITAL4_MISC3, 0x8008);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, 0x0195);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x0007);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X3, 0x0002);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_SERDESDIGITAL_MISC1, 0x6000);
 	lane = bnx2x_get_warpcore_lane(phy, params);
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_TX_FIR_TAP, 0x0000);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_TX0_TX_DRIVER + 0x10*lane, 0x0990);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x2040);
-	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-			 MDIO_WC_REG_COMBO_IEEE0_MIICTRL, 0x0140);
-	bnx2x_warpcore_reset_lane(bp, phy, 0);
+
 }
 
 static int bnx2x_get_mod_abs_int_cfg(struct bnx2x *bp,
@@ -4606,12 +4563,9 @@ static void bnx2x_set_warpcore_loopback(struct bnx2x_phy *phy,
 		CL22_WR_OVER_CL45(bp, phy, MDIO_REG_BANK_AER_BLOCK,
 				  MDIO_AER_BLOCK_AER_REG, 0);
 		/* Enable 1G MDIO (1-copy) */
-		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_XGXSBLK0_XGXSCONTROL,
-				&val16);
-		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_XGXSBLK0_XGXSCONTROL,
-				val16 | 0x10);
+		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+					 MDIO_WC_REG_XGXSBLK0_XGXSCONTROL,
+					 0x10);
 		/* Set 1G loopback based on lane (1-copy) */
 		lane = bnx2x_get_warpcore_lane(phy, params);
 		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
@@ -4624,16 +4578,12 @@ static void bnx2x_set_warpcore_loopback(struct bnx2x_phy *phy,
 		bnx2x_set_aer_mmd(params, phy);
 	} else {
 		/* 10G & 20G */
-		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_COMBO_IEEE0_MIICTRL, &val16);
-		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_COMBO_IEEE0_MIICTRL, val16 |
-				 0x4000);
+		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+					 MDIO_WC_REG_COMBO_IEEE0_MIICTRL,
+					 0x4000);
 
-		bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_IEEE0BLK_MIICNTL, &val16);
-		bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
-				MDIO_WC_REG_IEEE0BLK_MIICNTL, val16 | 0x1);
+		bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
+					 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1);
 	}
 }
 
@@ -10820,7 +10770,7 @@ static u8 bnx2x_54618se_read_status(struct bnx2x_phy *phy,
 
 	/* Get speed operation status */
 	bnx2x_cl22_read(bp, phy,
-			0x19,
+			MDIO_REG_GPHY_AUX_STATUS,
 			&legacy_status);
 	DP(NETIF_MSG_LINK, "54618SE read_status: 0x%x\n", legacy_status);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index cd1f271..7b6051b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -125,6 +125,11 @@ typedef void (*set_link_led_t)(struct bnx2x_phy *phy,
 			       struct link_params *params, u8 mode);
 typedef void (*phy_specific_func_t)(struct bnx2x_phy *phy,
 				    struct link_params *params, u32 action);
+struct bnx2x_reg_set {
+	u8  devad;
+	u16 reg;
+	u16 val;
+};
 
 struct bnx2x_phy {
 	u32 type;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index a78e356..f371e3c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -7160,6 +7160,7 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_REG_GPHY_EEE_1G		(0x1 << 2)
 #define MDIO_REG_GPHY_EEE_100		(0x1 << 1)
 #define MDIO_REG_GPHY_EEE_RESOLVED		0x803e
+#define MDIO_REG_GPHY_AUX_STATUS			0x19
 #define MDIO_REG_INTR_STATUS				0x1a
 #define MDIO_REG_INTR_MASK				0x1b
 #define MDIO_REG_INTR_MASK_LINK_STATUS			(0x1 << 1)
-- 
1.7.9.rc2

^ permalink raw reply related

* [PATCH net-next 2/6] bnx2x: link cleanup
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz, Yaniv Rosner
In-Reply-To: <1340182175-916-1-git-send-email-yuvalmin@broadcom.com>

This patch does several things:
 1. Add static to function when possible.
 2. Correct comments.
 3. Change msleep(1) --> usleep_range(1000, 1000)
 4. Remove dead code.
 5. Change 'if(rc != 0)' --> if(rc)

Most of these changes are purely semantic.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Yaniv Rosner <yaniv.rosner@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |  309 +++++++++++-----------
 1 files changed, 152 insertions(+), 157 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 9e008e4..121275e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -284,7 +284,6 @@
 #define ETS_E3B0_PBF_MIN_W_VAL				(10000)
 
 #define MAX_PACKET_SIZE					(9700)
-#define WC_UC_TIMEOUT					100
 #define MAX_KR_LINK_RETRY				4
 
 /**********************************************************/
@@ -1880,11 +1879,6 @@ static int bnx2x_emac_enable(struct link_params *params,
 	bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
 		      EMAC_TX_MODE_RESET);
 
-	if (CHIP_REV_IS_SLOW(bp)) {
-		/* config GMII mode */
-		val = REG_RD(bp, emac_base + EMAC_REG_EMAC_MODE);
-		EMAC_WR(bp, EMAC_REG_EMAC_MODE, (val | EMAC_MODE_PORT_GMII));
-	} else { /* ASIC */
 		/* pause enable/disable */
 		bnx2x_bits_dis(bp, emac_base + EMAC_REG_EMAC_RX_MODE,
 			       EMAC_RX_MODE_FLOW_EN);
@@ -1907,7 +1901,6 @@ static int bnx2x_emac_enable(struct link_params *params,
 		} else
 			bnx2x_bits_en(bp, emac_base + EMAC_REG_EMAC_TX_MODE,
 				      EMAC_TX_MODE_FLOW_EN);
-	}
 
 	/* KEEP_VLAN_TAG, promiscuous */
 	val = REG_RD(bp, emac_base + EMAC_REG_EMAC_RX_MODE);
@@ -1946,23 +1939,23 @@ static int bnx2x_emac_enable(struct link_params *params,
 		val &= ~0x810;
 	EMAC_WR(bp, EMAC_REG_EMAC_MODE, val);
 
-	/* enable emac */
+	/* Enable emac */
 	REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 1);
 
-	/* enable emac for jumbo packets */
+	/* Enable emac for jumbo packets */
 	EMAC_WR(bp, EMAC_REG_EMAC_RX_MTU_SIZE,
 		(EMAC_RX_MTU_SIZE_JUMBO_ENA |
 		 (ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD)));
 
-	/* strip CRC */
+	/* Strip CRC */
 	REG_WR(bp, NIG_REG_NIG_INGRESS_EMAC0_NO_CRC + port*4, 0x1);
 
-	/* disable the NIG in/out to the bmac */
+	/* Disable the NIG in/out to the bmac */
 	REG_WR(bp, NIG_REG_BMAC0_IN_EN + port*4, 0x0);
 	REG_WR(bp, NIG_REG_BMAC0_PAUSE_OUT_EN + port*4, 0x0);
 	REG_WR(bp, NIG_REG_BMAC0_OUT_EN + port*4, 0x0);
 
-	/* enable the NIG in/out to the emac */
+	/* Enable the NIG in/out to the emac */
 	REG_WR(bp, NIG_REG_EMAC0_IN_EN + port*4, 0x1);
 	val = 0;
 	if ((params->feature_config_flags &
@@ -1997,7 +1990,7 @@ static void bnx2x_update_pfc_bmac1(struct link_params *params,
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_CONTROL, wb_data, 2);
 
-	/* tx control */
+	/* TX control */
 	val = 0xc0;
 	if (!(params->feature_config_flags &
 	      FEATURE_CONFIG_PFC_ENABLED) &&
@@ -2057,7 +2050,7 @@ static void bnx2x_update_pfc_bmac2(struct link_params *params,
 		wb_data[0] &= ~(1<<2);
 	} else {
 		DP(NETIF_MSG_LINK, "PFC is disabled\n");
-		/* disable PFC RX & TX & STATS and set 8 COS */
+		/* Disable PFC RX & TX & STATS and set 8 COS */
 		wb_data[0] = 0x8;
 		wb_data[1] = 0;
 	}
@@ -2151,7 +2144,7 @@ static int bnx2x_pfc_brb_get_config_params(
 			PFC_E2_BRB_MAC_FULL_XOFF_THR_PAUSE;
 		config_val->pauseable_th.full_xon =
 			PFC_E2_BRB_MAC_FULL_XON_THR_PAUSE;
-		/* non pause able*/
+		/* Non pause able*/
 		config_val->non_pauseable_th.pause_xoff =
 			PFC_E2_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
 		config_val->non_pauseable_th.pause_xon =
@@ -2179,7 +2172,7 @@ static int bnx2x_pfc_brb_get_config_params(
 			PFC_E3A0_BRB_MAC_FULL_XOFF_THR_PAUSE;
 		config_val->pauseable_th.full_xon =
 			PFC_E3A0_BRB_MAC_FULL_XON_THR_PAUSE;
-		/* non pause able*/
+		/* Non pause able*/
 		config_val->non_pauseable_th.pause_xoff =
 			PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
 		config_val->non_pauseable_th.pause_xon =
@@ -2209,7 +2202,7 @@ static int bnx2x_pfc_brb_get_config_params(
 				PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_PAUSE;
 			config_val->pauseable_th.full_xon =
 				PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_PAUSE;
-			/* non pause able*/
+			/* Non pause able*/
 			config_val->non_pauseable_th.pause_xoff =
 			PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
 			config_val->non_pauseable_th.pause_xon =
@@ -2227,7 +2220,7 @@ static int bnx2x_pfc_brb_get_config_params(
 				PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_PAUSE;
 			config_val->pauseable_th.full_xon =
 				PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_PAUSE;
-			/* non pause able*/
+			/* Non pause able*/
 			config_val->non_pauseable_th.pause_xoff =
 				PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE;
 			config_val->non_pauseable_th.pause_xon =
@@ -2284,7 +2277,7 @@ static void bnx2x_pfc_brb_get_e3b0_config_params(
 
 			if (pfc_params->cos0_pauseable !=
 				pfc_params->cos1_pauseable) {
-				/* nonpauseable= Lossy + pauseable = Lossless*/
+				/* Nonpauseable= Lossy + pauseable = Lossless*/
 				e3b0_val->lb_guarantied =
 					PFC_E3B0_2P_MIX_PAUSE_LB_GUART;
 				e3b0_val->mac_0_class_t_guarantied =
@@ -2483,9 +2476,9 @@ static int bnx2x_update_pfc_brb(struct link_params *params,
 *  This function is needed because NIG ARB_CREDIT_WEIGHT_X are
 *  not continues and ARB_CREDIT_WEIGHT_0 + offset is suitable.
 ******************************************************************************/
-int bnx2x_pfc_nig_rx_priority_mask(struct bnx2x *bp,
-					      u8 cos_entry,
-					      u32 priority_mask, u8 port)
+static int bnx2x_pfc_nig_rx_priority_mask(struct bnx2x *bp,
+					   u8 cos_entry,
+					   u32 priority_mask, u8 port)
 {
 	u32 nig_reg_rx_priority_mask_add = 0;
 
@@ -2612,7 +2605,7 @@ static void bnx2x_update_pfc_nig(struct link_params *params,
 	REG_WR(bp, port ? NIG_REG_LLFC_EGRESS_SRC_ENABLE_1 :
 	       NIG_REG_LLFC_EGRESS_SRC_ENABLE_0, 0x7);
 
-	/* output enable for RX_XCM # IF */
+	/* Output enable for RX_XCM # IF */
 	REG_WR(bp, port ? NIG_REG_XCM1_OUT_EN :
 	       NIG_REG_XCM0_OUT_EN, xcm_out_en);
 
@@ -2661,10 +2654,10 @@ int bnx2x_update_pfc(struct link_params *params,
 
 	bnx2x_update_mng(params, vars->link_status);
 
-	/* update NIG params */
+	/* Update NIG params */
 	bnx2x_update_pfc_nig(params, vars, pfc_params);
 
-	/* update BRB params */
+	/* Update BRB params */
 	bnx2x_status = bnx2x_update_pfc_brb(params, vars, pfc_params);
 	if (bnx2x_status)
 		return bnx2x_status;
@@ -2719,7 +2712,7 @@ static int bnx2x_bmac1_enable(struct link_params *params,
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_BMAC_XGXS_CONTROL,
 		    wb_data, 2);
 
-	/* tx MAC SA */
+	/* TX MAC SA */
 	wb_data[0] = ((params->mac_addr[2] << 24) |
 		       (params->mac_addr[3] << 16) |
 		       (params->mac_addr[4] << 8) |
@@ -2728,7 +2721,7 @@ static int bnx2x_bmac1_enable(struct link_params *params,
 			params->mac_addr[1]);
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_TX_SOURCE_ADDR, wb_data, 2);
 
-	/* mac control */
+	/* MAC control */
 	val = 0x3;
 	if (is_lb) {
 		val |= 0x4;
@@ -2738,24 +2731,24 @@ static int bnx2x_bmac1_enable(struct link_params *params,
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_BMAC_CONTROL, wb_data, 2);
 
-	/* set rx mtu */
+	/* Set rx mtu */
 	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_MAX_SIZE, wb_data, 2);
 
 	bnx2x_update_pfc_bmac1(params, vars);
 
-	/* set tx mtu */
+	/* Set tx mtu */
 	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_TX_MAX_SIZE, wb_data, 2);
 
-	/* set cnt max size */
+	/* Set cnt max size */
 	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_CNT_MAX_SIZE, wb_data, 2);
 
-	/* configure safc */
+	/* Configure SAFC */
 	wb_data[0] = 0x1000200;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS,
@@ -2789,7 +2782,7 @@ static int bnx2x_bmac2_enable(struct link_params *params,
 
 	udelay(30);
 
-	/* tx MAC SA */
+	/* TX MAC SA */
 	wb_data[0] = ((params->mac_addr[2] << 24) |
 		       (params->mac_addr[3] << 16) |
 		       (params->mac_addr[4] << 8) |
@@ -2808,18 +2801,18 @@ static int bnx2x_bmac2_enable(struct link_params *params,
 		    wb_data, 2);
 	udelay(30);
 
-	/* set rx mtu */
+	/* Set RX MTU */
 	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_RX_MAX_SIZE, wb_data, 2);
 	udelay(30);
 
-	/* set tx mtu */
+	/* Set TX MTU */
 	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_TX_MAX_SIZE, wb_data, 2);
 	udelay(30);
-	/* set cnt max size */
+	/* Set cnt max size */
 	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD - 2;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_CNT_MAX_SIZE, wb_data, 2);
@@ -2837,15 +2830,15 @@ static int bnx2x_bmac_enable(struct link_params *params,
 	u8 port = params->port;
 	struct bnx2x *bp = params->bp;
 	u32 val;
-	/* reset and unreset the BigMac */
+	/* Reset and unreset the BigMac */
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
 	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
-	msleep(1);
+	usleep_range(1000, 1000);
 
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
 	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
 
-	/* enable access for bmac registers */
+	/* Enable access for bmac registers */
 	REG_WR(bp, NIG_REG_BMAC0_REGS_OUT_EN + port*4, 0x1);
 
 	/* Enable BMAC according to BMAC type*/
@@ -2903,7 +2896,7 @@ static void bnx2x_bmac_rx_disable(struct bnx2x *bp, u8 port)
 					BIGMAC_REGISTER_BMAC_CONTROL,
 					wb_data, 2);
 		}
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 }
 
@@ -2915,10 +2908,10 @@ static int bnx2x_pbf_update(struct link_params *params, u32 flow_ctrl,
 	u32 init_crd, crd;
 	u32 count = 1000;
 
-	/* disable port */
+	/* Disable port */
 	REG_WR(bp, PBF_REG_DISABLE_NEW_TASK_PROC_P0 + port*4, 0x1);
 
-	/* wait for init credit */
+	/* Wait for init credit */
 	init_crd = REG_RD(bp, PBF_REG_P0_INIT_CRD + port*4);
 	crd = REG_RD(bp, PBF_REG_P0_CREDIT + port*8);
 	DP(NETIF_MSG_LINK, "init_crd 0x%x  crd 0x%x\n", init_crd, crd);
@@ -2942,18 +2935,18 @@ static int bnx2x_pbf_update(struct link_params *params, u32 flow_ctrl,
 	    line_speed == SPEED_1000 ||
 	    line_speed == SPEED_2500) {
 		REG_WR(bp, PBF_REG_P0_PAUSE_ENABLE + port*4, 1);
-		/* update threshold */
+		/* Update threshold */
 		REG_WR(bp, PBF_REG_P0_ARB_THRSH + port*4, 0);
-		/* update init credit */
+		/* Update init credit */
 		init_crd = 778;		/* (800-18-4) */
 
 	} else {
 		u32 thresh = (ETH_MAX_JUMBO_PACKET_SIZE +
 			      ETH_OVREHEAD)/16;
 		REG_WR(bp, PBF_REG_P0_PAUSE_ENABLE + port*4, 0);
-		/* update threshold */
+		/* Update threshold */
 		REG_WR(bp, PBF_REG_P0_ARB_THRSH + port*4, thresh);
-		/* update init credit */
+		/* Update init credit */
 		switch (line_speed) {
 		case SPEED_10000:
 			init_crd = thresh + 553 - 22;
@@ -2968,12 +2961,12 @@ static int bnx2x_pbf_update(struct link_params *params, u32 flow_ctrl,
 	DP(NETIF_MSG_LINK, "PBF updated to speed %d credit %d\n",
 		 line_speed, init_crd);
 
-	/* probe the credit changes */
+	/* Probe the credit changes */
 	REG_WR(bp, PBF_REG_INIT_P0 + port*4, 0x1);
 	msleep(5);
 	REG_WR(bp, PBF_REG_INIT_P0 + port*4, 0x0);
 
-	/* enable port */
+	/* Enable port */
 	REG_WR(bp, PBF_REG_DISABLE_NEW_TASK_PROC_P0 + port*4, 0x0);
 	return 0;
 }
@@ -3040,7 +3033,7 @@ static int bnx2x_cl22_write(struct bnx2x *bp,
 	REG_WR(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_MODE,
 	       mode & ~EMAC_MDIO_MODE_CLAUSE_45);
 
-	/* address */
+	/* Address */
 	tmp = ((phy->addr << 21) | (reg << 16) | val |
 	       EMAC_MDIO_COMM_COMMAND_WRITE_22 |
 	       EMAC_MDIO_COMM_START_BUSY);
@@ -3076,7 +3069,7 @@ static int bnx2x_cl22_read(struct bnx2x *bp,
 	REG_WR(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_MODE,
 	       mode & ~EMAC_MDIO_MODE_CLAUSE_45);
 
-	/* address */
+	/* Address */
 	val = ((phy->addr << 21) | (reg << 16) |
 	       EMAC_MDIO_COMM_COMMAND_READ_22 |
 	       EMAC_MDIO_COMM_START_BUSY);
@@ -3114,7 +3107,7 @@ static int bnx2x_cl45_read(struct bnx2x *bp, struct bnx2x_phy *phy,
 	if (phy->flags & FLAGS_MDC_MDIO_WA_B0)
 		bnx2x_bits_en(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS,
 			      EMAC_MDIO_STATUS_10MB);
-	/* address */
+	/* Address */
 	val = ((phy->addr << 21) | (devad << 16) | reg |
 	       EMAC_MDIO_COMM_COMMAND_ADDRESS |
 	       EMAC_MDIO_COMM_START_BUSY);
@@ -3135,7 +3128,7 @@ static int bnx2x_cl45_read(struct bnx2x *bp, struct bnx2x_phy *phy,
 		*ret_val = 0;
 		rc = -EFAULT;
 	} else {
-		/* data */
+		/* Data */
 		val = ((phy->addr << 21) | (devad << 16) |
 		       EMAC_MDIO_COMM_COMMAND_READ_45 |
 		       EMAC_MDIO_COMM_START_BUSY);
@@ -3183,7 +3176,7 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy,
 		bnx2x_bits_en(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS,
 			      EMAC_MDIO_STATUS_10MB);
 
-	/* address */
+	/* Address */
 	tmp = ((phy->addr << 21) | (devad << 16) | reg |
 	       EMAC_MDIO_COMM_COMMAND_ADDRESS |
 	       EMAC_MDIO_COMM_START_BUSY);
@@ -3203,7 +3196,7 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy,
 		netdev_err(bp->dev,  "MDC/MDIO access timeout\n");
 		rc = -EFAULT;
 	} else {
-		/* data */
+		/* Data */
 		tmp = ((phy->addr << 21) | (devad << 16) | val |
 		       EMAC_MDIO_COMM_COMMAND_WRITE_45 |
 		       EMAC_MDIO_COMM_START_BUSY);
@@ -3293,23 +3286,23 @@ static int bnx2x_bsc_read(struct link_params *params,
 
 	xfer_cnt = 16 - lc_addr;
 
-	/* enable the engine */
+	/* Enable the engine */
 	val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	val |= MCPR_IMC_COMMAND_ENABLE;
 	REG_WR(bp, MCP_REG_MCPR_IMC_COMMAND, val);
 
-	/* program slave device ID */
+	/* Program slave device ID */
 	val = (sl_devid << 16) | sl_addr;
 	REG_WR(bp, MCP_REG_MCPR_IMC_SLAVE_CONTROL, val);
 
-	/* start xfer with 0 byte to update the address pointer ???*/
+	/* Start xfer with 0 byte to update the address pointer ???*/
 	val = (MCPR_IMC_COMMAND_ENABLE) |
 	      (MCPR_IMC_COMMAND_WRITE_OP <<
 		MCPR_IMC_COMMAND_OPERATION_BITSHIFT) |
 		(lc_addr << MCPR_IMC_COMMAND_TRANSFER_ADDRESS_BITSHIFT) | (0);
 	REG_WR(bp, MCP_REG_MCPR_IMC_COMMAND, val);
 
-	/* poll for completion */
+	/* Poll for completion */
 	i = 0;
 	val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	while (((val >> MCPR_IMC_COMMAND_IMC_STATUS_BITSHIFT) & 0x3) != 1) {
@@ -3325,7 +3318,7 @@ static int bnx2x_bsc_read(struct link_params *params,
 	if (rc == -EFAULT)
 		return rc;
 
-	/* start xfer with read op */
+	/* Start xfer with read op */
 	val = (MCPR_IMC_COMMAND_ENABLE) |
 		(MCPR_IMC_COMMAND_READ_OP <<
 		MCPR_IMC_COMMAND_OPERATION_BITSHIFT) |
@@ -3333,7 +3326,7 @@ static int bnx2x_bsc_read(struct link_params *params,
 		  (xfer_cnt);
 	REG_WR(bp, MCP_REG_MCPR_IMC_COMMAND, val);
 
-	/* poll for completion */
+	/* Poll for completion */
 	i = 0;
 	val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	while (((val >> MCPR_IMC_COMMAND_IMC_STATUS_BITSHIFT) & 0x3) != 1) {
@@ -3436,7 +3429,7 @@ static u8 bnx2x_get_warpcore_lane(struct bnx2x_phy *phy,
 			port = port ^ 1;
 
 		lane = (port<<1) + path;
-	} else { /* two port mode - no port swap */
+	} else { /* Two port mode - no port swap */
 
 		/* Figure out path swap value */
 		path_swap_ovr =
@@ -3514,7 +3507,7 @@ static void bnx2x_serdes_deassert(struct bnx2x *bp, u8 port)
 
 	val = SERDES_RESET_BITS << (port*16);
 
-	/* reset and unreset the SerDes/XGXS */
+	/* Reset and unreset the SerDes/XGXS */
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_3_CLEAR, val);
 	udelay(500);
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_3_SET, val);
@@ -3535,7 +3528,7 @@ static void bnx2x_xgxs_deassert(struct link_params *params)
 
 	val = XGXS_RESET_BITS << (port*16);
 
-	/* reset and unreset the SerDes/XGXS */
+	/* Reset and unreset the SerDes/XGXS */
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_3_CLEAR, val);
 	udelay(500);
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_3_SET, val);
@@ -3627,7 +3620,7 @@ static void bnx2x_ext_phy_set_pause(struct link_params *params,
 {
 	u16 val;
 	struct bnx2x *bp = params->bp;
-	/* read modify write pause advertizing */
+	/* Read modify write pause advertizing */
 	bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV_PAUSE, &val);
 
 	val &= ~MDIO_AN_REG_ADV_PAUSE_BOTH;
@@ -3945,7 +3938,7 @@ static void bnx2x_warpcore_set_10G_KR(struct bnx2x_phy *phy,
 	bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
 				 MDIO_WC_REG_RX66_CONTROL, 0xF9);
 
-	/* set and clear loopback to cause a reset to 64/66 decoder */
+	/* Set and clear loopback to cause a reset to 64/66 decoder */
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
 			 MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x4000);
 	bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD,
@@ -4373,7 +4366,7 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy,
 	if (!vars->turn_to_run_wc_rt)
 		return;
 
-	/* return if there is no link partner */
+	/* Return if there is no link partner */
 	if (!(bnx2x_warpcore_get_sigdet(phy, params))) {
 		DP(NETIF_MSG_LINK, "bnx2x_warpcore_get_sigdet false\n");
 		return;
@@ -4407,7 +4400,7 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy,
 				bnx2x_warpcore_reset_lane(bp, phy, 1);
 				bnx2x_warpcore_reset_lane(bp, phy, 0);
 
-				/* restart Autoneg */
+				/* Restart Autoneg */
 				bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
 					MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1200);
 
@@ -4645,8 +4638,9 @@ static void bnx2x_set_warpcore_loopback(struct bnx2x_phy *phy,
 }
 
 
-void bnx2x_sync_link(struct link_params *params,
-			   struct link_vars *vars)
+
+static void bnx2x_sync_link(struct link_params *params,
+			     struct link_vars *vars)
 {
 	struct bnx2x *bp = params->bp;
 	u8 link_10g_plus;
@@ -4719,7 +4713,7 @@ void bnx2x_sync_link(struct link_params *params,
 		    USES_WARPCORE(bp) &&
 		    (vars->line_speed == SPEED_1000))
 			vars->phy_flags |= PHY_SGMII_FLAG;
-		/* anything 10 and over uses the bmac */
+		/* Anything 10 and over uses the bmac */
 		link_10g_plus = (vars->line_speed >= SPEED_10000);
 
 		if (link_10g_plus) {
@@ -4733,7 +4727,7 @@ void bnx2x_sync_link(struct link_params *params,
 			else
 				vars->mac_type = MAC_TYPE_EMAC;
 		}
-	} else { /* link down */
+	} else { /* Link down */
 		DP(NETIF_MSG_LINK, "phy link down\n");
 
 		vars->phy_link_up = 0;
@@ -4742,7 +4736,7 @@ void bnx2x_sync_link(struct link_params *params,
 		vars->duplex = DUPLEX_FULL;
 		vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
 
-		/* indicate no mac active */
+		/* Indicate no mac active */
 		vars->mac_type = MAC_TYPE_NONE;
 		if (vars->link_status & LINK_STATUS_PHYSICAL_LINK_FLAG)
 			vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
@@ -4813,7 +4807,7 @@ static void bnx2x_set_master_ln(struct link_params *params,
 		     PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
 		    PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
 
-	/* set the master_ln for AN */
+	/* Set the master_ln for AN */
 	CL22_RD_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_XGXS_BLOCK2,
 			  MDIO_XGXS_BLOCK2_TEST_MODE_LANE,
@@ -4836,7 +4830,7 @@ static int bnx2x_reset_unicore(struct link_params *params,
 			  MDIO_REG_BANK_COMBO_IEEE0,
 			  MDIO_COMBO_IEEE0_MII_CONTROL, &mii_control);
 
-	/* reset the unicore */
+	/* Reset the unicore */
 	CL22_WR_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_COMBO_IEEE0,
 			  MDIO_COMBO_IEEE0_MII_CONTROL,
@@ -4845,11 +4839,11 @@ static int bnx2x_reset_unicore(struct link_params *params,
 	if (set_serdes)
 		bnx2x_set_serdes_access(bp, params->port);
 
-	/* wait for the reset to self clear */
+	/* Wait for the reset to self clear */
 	for (i = 0; i < MDIO_ACCESS_TIMEOUT; i++) {
 		udelay(5);
 
-		/* the reset erased the previous bank value */
+		/* The reset erased the previous bank value */
 		CL22_RD_OVER_CL45(bp, phy,
 				  MDIO_REG_BANK_COMBO_IEEE0,
 				  MDIO_COMBO_IEEE0_MII_CONTROL,
@@ -5067,7 +5061,7 @@ static void bnx2x_set_autoneg(struct bnx2x_phy *phy,
 			  MDIO_CL73_IEEEB0_CL73_AN_CONTROL, reg_val);
 }
 
-/* program SerDes, forced speed */
+/* Program SerDes, forced speed */
 static void bnx2x_program_serdes(struct bnx2x_phy *phy,
 				 struct link_params *params,
 				 struct link_vars *vars)
@@ -5075,7 +5069,7 @@ static void bnx2x_program_serdes(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 reg_val;
 
-	/* program duplex, disable autoneg and sgmii*/
+	/* Program duplex, disable autoneg and sgmii*/
 	CL22_RD_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_COMBO_IEEE0,
 			  MDIO_COMBO_IEEE0_MII_CONTROL, &reg_val);
@@ -5094,7 +5088,7 @@ static void bnx2x_program_serdes(struct bnx2x_phy *phy,
 	CL22_RD_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_SERDES_DIGITAL,
 			  MDIO_SERDES_DIGITAL_MISC1, &reg_val);
-	/* clearing the speed value before setting the right speed */
+	/* Clearing the speed value before setting the right speed */
 	DP(NETIF_MSG_LINK, "MDIO_REG_BANK_SERDES_DIGITAL = 0x%x\n", reg_val);
 
 	reg_val &= ~(MDIO_SERDES_DIGITAL_MISC1_FORCE_SPEED_MASK |
@@ -5123,7 +5117,7 @@ static void bnx2x_set_brcm_cl37_advertisement(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 val = 0;
 
-	/* set extended capabilities */
+	/* Set extended capabilities */
 	if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G)
 		val |= MDIO_OVER_1G_UP1_2_5G;
 	if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
@@ -5143,7 +5137,7 @@ static void bnx2x_set_ieee_aneg_advertisement(struct bnx2x_phy *phy,
 {
 	struct bnx2x *bp = params->bp;
 	u16 val;
-	/* for AN, we are always publishing full duplex */
+	/* For AN, we are always publishing full duplex */
 
 	CL22_WR_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_COMBO_IEEE0,
@@ -5205,14 +5199,14 @@ static void bnx2x_initialize_sgmii_process(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 control1;
 
-	/* in SGMII mode, the unicore is always slave */
+	/* In SGMII mode, the unicore is always slave */
 
 	CL22_RD_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_SERDES_DIGITAL,
 			  MDIO_SERDES_DIGITAL_A_1000X_CONTROL1,
 			  &control1);
 	control1 |= MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_INVERT_SIGNAL_DETECT;
-	/* set sgmii mode (and not fiber) */
+	/* Set sgmii mode (and not fiber) */
 	control1 &= ~(MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_FIBER_MODE |
 		      MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_AUTODET |
 		      MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_MSTR_MODE);
@@ -5221,9 +5215,9 @@ static void bnx2x_initialize_sgmii_process(struct bnx2x_phy *phy,
 			  MDIO_SERDES_DIGITAL_A_1000X_CONTROL1,
 			  control1);
 
-	/* if forced speed */
+	/* If forced speed */
 	if (!(vars->line_speed == SPEED_AUTO_NEG)) {
-		/* set speed, disable autoneg */
+		/* Set speed, disable autoneg */
 		u16 mii_control;
 
 		CL22_RD_OVER_CL45(bp, phy,
@@ -5244,16 +5238,16 @@ static void bnx2x_initialize_sgmii_process(struct bnx2x_phy *phy,
 				MDIO_COMBO_IEEO_MII_CONTROL_MAN_SGMII_SP_1000;
 			break;
 		case SPEED_10:
-			/* there is nothing to set for 10M */
+			/* There is nothing to set for 10M */
 			break;
 		default:
-			/* invalid speed for SGMII */
+			/* Invalid speed for SGMII */
 			DP(NETIF_MSG_LINK, "Invalid line_speed 0x%x\n",
 				  vars->line_speed);
 			break;
 		}
 
-		/* setting the full duplex */
+		/* Setting the full duplex */
 		if (phy->req_duplex == DUPLEX_FULL)
 			mii_control |=
 				MDIO_COMBO_IEEO_MII_CONTROL_FULL_DUPLEX;
@@ -5263,7 +5257,7 @@ static void bnx2x_initialize_sgmii_process(struct bnx2x_phy *phy,
 				  mii_control);
 
 	} else { /* AN mode */
-		/* enable and restart AN */
+		/* Enable and restart AN */
 		bnx2x_restart_autoneg(phy, params, 0);
 	}
 }
@@ -5359,7 +5353,7 @@ static void bnx2x_flow_ctrl_resolve(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE;
 
-	/* resolve from gp_status in case of AN complete and not sgmii */
+	/* Resolve from gp_status in case of AN complete and not sgmii */
 	if (phy->req_flow_ctrl != BNX2X_FLOW_CTRL_AUTO) {
 		/* Update the advertised flow-controled of LD/LP in AN */
 		if (phy->req_line_speed == SPEED_AUTO_NEG)
@@ -5583,7 +5577,7 @@ static int bnx2x_link_settings_status(struct bnx2x_phy *phy,
 				bnx2x_xgxs_an_resolve(phy, params, vars,
 						      gp_status);
 		}
-	} else { /* link_down */
+	} else { /* Link_down */
 		if ((phy->req_line_speed == SPEED_AUTO_NEG) &&
 		    SINGLE_MEDIA_DIRECT(params)) {
 			/* Check signal is detected */
@@ -5732,12 +5726,12 @@ static void bnx2x_set_gmii_tx_driver(struct link_params *params)
 	u16 tx_driver;
 	u16 bank;
 
-	/* read precomp */
+	/* Read precomp */
 	CL22_RD_OVER_CL45(bp, phy,
 			  MDIO_REG_BANK_OVER_1G,
 			  MDIO_OVER_1G_LP_UP2, &lp_up2);
 
-	/* bits [10:7] at lp_up2, positioned at [15:12] */
+	/* Bits [10:7] at lp_up2, positioned at [15:12] */
 	lp_up2 = (((lp_up2 & MDIO_OVER_1G_LP_UP2_PREEMPHASIS_MASK) >>
 		   MDIO_OVER_1G_LP_UP2_PREEMPHASIS_SHIFT) <<
 		  MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT);
@@ -5751,7 +5745,7 @@ static void bnx2x_set_gmii_tx_driver(struct link_params *params)
 				  bank,
 				  MDIO_TX0_TX_DRIVER, &tx_driver);
 
-		/* replace tx_driver bits [15:12] */
+		/* Replace tx_driver bits [15:12] */
 		if (lp_up2 !=
 		    (tx_driver & MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK)) {
 			tx_driver &= ~MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK;
@@ -5847,16 +5841,16 @@ static void bnx2x_xgxs_config_init(struct bnx2x_phy *phy,
 		     FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED))
 			bnx2x_set_preemphasis(phy, params);
 
-		/* forced speed requested? */
+		/* Forced speed requested? */
 		if (vars->line_speed != SPEED_AUTO_NEG ||
 		    (SINGLE_MEDIA_DIRECT(params) &&
 		     params->loopback_mode == LOOPBACK_EXT)) {
 			DP(NETIF_MSG_LINK, "not SGMII, no AN\n");
 
-			/* disable autoneg */
+			/* Disable autoneg */
 			bnx2x_set_autoneg(phy, params, vars, 0);
 
-			/* program speed and duplex */
+			/* Program speed and duplex */
 			bnx2x_program_serdes(phy, params, vars);
 
 		} else { /* AN_mode */
@@ -5865,14 +5859,14 @@ static void bnx2x_xgxs_config_init(struct bnx2x_phy *phy,
 			/* AN enabled */
 			bnx2x_set_brcm_cl37_advertisement(phy, params);
 
-			/* program duplex & pause advertisement (for aneg) */
+			/* Program duplex & pause advertisement (for aneg) */
 			bnx2x_set_ieee_aneg_advertisement(phy, params,
 							  vars->ieee_fc);
 
-			/* enable autoneg */
+			/* Enable autoneg */
 			bnx2x_set_autoneg(phy, params, vars, enable_cl73);
 
-			/* enable and restart AN */
+			/* Enable and restart AN */
 			bnx2x_restart_autoneg(phy, params, enable_cl73);
 		}
 
@@ -5908,12 +5902,12 @@ static int bnx2x_prepare_xgxs(struct bnx2x_phy *phy,
 		bnx2x_set_master_ln(params, phy);
 
 	rc = bnx2x_reset_unicore(params, phy, 0);
-	/* reset the SerDes and wait for reset bit return low */
-	if (rc != 0)
+	/* Reset the SerDes and wait for reset bit return low */
+	if (rc)
 		return rc;
 
 	bnx2x_set_aer_mmd(params, phy);
-	/* setting the masterLn_def again after the reset */
+	/* Setting the masterLn_def again after the reset */
 	if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) {
 		bnx2x_set_master_ln(params, phy);
 		bnx2x_set_swap_lanes(params, phy);
@@ -5938,7 +5932,7 @@ static u16 bnx2x_wait_reset_complete(struct bnx2x *bp,
 				MDIO_PMA_REG_CTRL, &ctrl);
 		if (!(ctrl & (1<<15)))
 			break;
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 
 	if (cnt == 1000)
@@ -6169,7 +6163,7 @@ static void bnx2x_set_xgxs_loopback(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "XGXS 10G loopback enable\n");
 
 		if (!CHIP_IS_E3(bp)) {
-			/* change the uni_phy_addr in the nig */
+			/* Change the uni_phy_addr in the nig */
 			md_devad = REG_RD(bp, (NIG_REG_XGXS0_CTRL_MD_DEVAD +
 					       port*0x18));
 
@@ -6189,11 +6183,11 @@ static void bnx2x_set_xgxs_loopback(struct bnx2x_phy *phy,
 				  (MDIO_CL73_IEEEB0_CL73_AN_CONTROL & 0xf)),
 				 0x6041);
 		msleep(200);
-		/* set aer mmd back */
+		/* Set aer mmd back */
 		bnx2x_set_aer_mmd(params, phy);
 
 		if (!CHIP_IS_E3(bp)) {
-			/* and md_devad */
+			/* And md_devad */
 			REG_WR(bp, NIG_REG_XGXS0_CTRL_MD_DEVAD + port*0x18,
 			       md_devad);
 		}
@@ -6390,7 +6384,7 @@ int bnx2x_test_link(struct link_params *params, struct link_vars *vars,
 			  MDIO_REG_BANK_GP_STATUS,
 			  MDIO_GP_STATUS_TOP_AN_STATUS1,
 			  &gp_status);
-	/* link is up only if both local phy and external phy are up */
+	/* Link is up only if both local phy and external phy are up */
 	if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS))
 		return -ESRCH;
 	}
@@ -6512,7 +6506,7 @@ static int bnx2x_link_initialize(struct link_params *params,
 static void bnx2x_int_link_reset(struct bnx2x_phy *phy,
 				 struct link_params *params)
 {
-	/* reset the SerDes/XGXS */
+	/* Reset the SerDes/XGXS */
 	REG_WR(params->bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_3_CLEAR,
 	       (0x1ff << (params->port*16)));
 }
@@ -6545,10 +6539,10 @@ static int bnx2x_update_link_down(struct link_params *params,
 	DP(NETIF_MSG_LINK, "Port %x: Link is down\n", port);
 	bnx2x_set_led(params, vars, LED_MODE_OFF, 0);
 	vars->phy_flags &= ~PHY_PHYSICAL_LINK_FLAG;
-	/* indicate no mac active */
+	/* Indicate no mac active */
 	vars->mac_type = MAC_TYPE_NONE;
 
-	/* update shared memory */
+	/* Update shared memory */
 	vars->link_status &= ~(LINK_STATUS_SPEED_AND_DUPLEX_MASK |
 			       LINK_STATUS_LINK_UP |
 			       LINK_STATUS_PHYSICAL_LINK_FLAG |
@@ -6561,15 +6555,15 @@ static int bnx2x_update_link_down(struct link_params *params,
 	vars->line_speed = 0;
 	bnx2x_update_mng(params, vars->link_status);
 
-	/* activate nig drain */
+	/* Activate nig drain */
 	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + port*4, 1);
 
-	/* disable emac */
+	/* Disable emac */
 	if (!CHIP_IS_E3(bp))
 		REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);
 
 	msleep(10);
-	/* reset BigMac/Xmac */
+	/* Reset BigMac/Xmac */
 	if (CHIP_IS_E1x(bp) ||
 	    CHIP_IS_E2(bp)) {
 		bnx2x_bmac_rx_disable(bp, params->port);
@@ -6578,6 +6572,7 @@ static int bnx2x_update_link_down(struct link_params *params,
 	       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
 	}
 	if (CHIP_IS_E3(bp)) {
+		/* Prevent LPI Generation by chip */
 		REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2),
 		       0);
 		REG_WR(bp, MISC_REG_CPMU_LP_DR_ENABLE, 0);
@@ -6668,10 +6663,10 @@ static int bnx2x_update_link_up(struct link_params *params,
 		rc |= bnx2x_pbf_update(params, vars->flow_ctrl,
 				       vars->line_speed);
 
-	/* disable drain */
+	/* Disable drain */
 	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + port*4, 0);
 
-	/* update shared memory */
+	/* Update shared memory */
 	bnx2x_update_mng(params, vars->link_status);
 	bnx2x_update_mng_eee(params, vars->eee_status);
 	/* Check remote fault */
@@ -6739,7 +6734,7 @@ int bnx2x_link_update(struct link_params *params, struct link_vars *vars)
 	  REG_RD(bp, NIG_REG_XGXS0_STATUS_LINK10G + port*0x68),
 	  REG_RD(bp, NIG_REG_XGXS0_STATUS_LINK_STATUS + port*0x68));
 
-	/* disable emac */
+	/* Disable emac */
 	if (!CHIP_IS_E3(bp))
 		REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);
 
@@ -6884,11 +6879,11 @@ int bnx2x_link_update(struct link_params *params, struct link_vars *vars)
 		} else if (prev_line_speed != vars->line_speed) {
 			REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4,
 			       0);
-			msleep(1);
+			usleep_range(1000, 1000);
 		}
 	}
 
-	/* anything 10 and over uses the bmac */
+	/* Anything 10 and over uses the bmac */
 	link_10g_plus = (vars->line_speed >= SPEED_10000);
 
 	bnx2x_link_int_ack(params, vars, link_10g_plus);
@@ -6954,7 +6949,7 @@ void bnx2x_ext_phy_hw_reset(struct bnx2x *bp, u8 port)
 {
 	bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1,
 		       MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
-	msleep(1);
+	usleep_range(1000, 1000);
 	bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1,
 		       MISC_REGISTERS_GPIO_OUTPUT_HIGH, port);
 }
@@ -7051,7 +7046,7 @@ static int bnx2x_8073_8727_external_rom_boot(struct bnx2x *bp,
 			 MDIO_PMA_REG_GEN_CTRL,
 			 0x0001);
 
-	/* ucode reboot and rst */
+	/* Ucode reboot and rst */
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD,
 			 MDIO_PMA_REG_GEN_CTRL,
@@ -7095,7 +7090,7 @@ static int bnx2x_8073_8727_external_rom_boot(struct bnx2x *bp,
 				MDIO_PMA_DEVAD,
 				MDIO_PMA_REG_M8051_MSGOUT_REG, &fw_msgout);
 
-		msleep(1);
+		usleep_range(1000, 1000);
 	} while (fw_ver1 == 0 || fw_ver1 == 0x4321 ||
 			((fw_msgout & 0xff) != 0x03 && (phy->type ==
 			PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8073)));
@@ -7267,7 +7262,7 @@ static int bnx2x_8073_config_init(struct bnx2x_phy *phy,
 	bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1,
 		       MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port);
 
-	/* enable LASI */
+	/* Enable LASI */
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, (1<<2));
 	bnx2x_cl45_write(bp, phy,
@@ -7415,7 +7410,7 @@ static u8 bnx2x_8073_read_status(struct bnx2x_phy *phy,
 
 	DP(NETIF_MSG_LINK, "8703 LASI status 0x%x\n", val1);
 
-	/* clear the interrupt LASI status register */
+	/* Clear the interrupt LASI status register */
 	bnx2x_cl45_read(bp, phy,
 			MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val2);
 	bnx2x_cl45_read(bp, phy,
@@ -7794,7 +7789,7 @@ static int bnx2x_8726_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 		if ((val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK) ==
 		    MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_IDLE)
 			return 0;
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 	return -EINVAL;
 }
@@ -7876,7 +7871,7 @@ static int bnx2x_8727_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 	/* Wait appropriate time for two-wire command to finish before
 	 * polling the status register
 	 */
-	msleep(1);
+	usleep_range(1000, 1000);
 
 	/* Wait up to 500us for command complete status */
 	for (i = 0; i < 100; i++) {
@@ -7912,7 +7907,7 @@ static int bnx2x_8727_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 		if ((val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK) ==
 		    MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_IDLE)
 			return 0;
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 
 	return -EINVAL;
@@ -8091,7 +8086,7 @@ static int bnx2x_verify_sfp_module(struct bnx2x_phy *phy,
 		return 0;
 	}
 
-	/* format the warning message */
+	/* Format the warning message */
 	if (bnx2x_read_sfp_module_eeprom(phy,
 					 params,
 					 SFP_EEPROM_VENDOR_NAME_ADDR,
@@ -8472,7 +8467,7 @@ int bnx2x_sfp_module_detection(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "Failed to get valid module type\n");
 		return -EINVAL;
 	} else if (bnx2x_verify_sfp_module(phy, params) != 0) {
-		/* check SFP+ module compatibility */
+		/* Check SFP+ module compatibility */
 		DP(NETIF_MSG_LINK, "Module verification failed!!\n");
 		rc = -EINVAL;
 		/* Turn on fault module-detected led */
@@ -8603,7 +8598,7 @@ static u8 bnx2x_8706_8726_read_status(struct bnx2x_phy *phy,
 	bnx2x_sfp_mask_fault(bp, phy, MDIO_PMA_LASI_TXSTAT,
 			     MDIO_PMA_LASI_TXCTRL);
 
-	/* clear LASI indication*/
+	/* Clear LASI indication*/
 	bnx2x_cl45_read(bp, phy,
 			MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
 	bnx2x_cl45_read(bp, phy,
@@ -8800,7 +8795,7 @@ static void bnx2x_8726_external_rom_boot(struct bnx2x_phy *phy,
 			 MDIO_PMA_REG_GEN_CTRL,
 			 MDIO_PMA_REG_GEN_CTRL_ROM_RESET_INTERNAL_MP);
 
-	/* wait for 150ms for microcode load */
+	/* Wait for 150ms for microcode load */
 	msleep(150);
 
 	/* Disable serial boot control, tristates pins SS_N, SCK, MOSI, MISO */
@@ -9011,7 +9006,7 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 	lasi_ctrl_val = 0x0006;
 
 	DP(NETIF_MSG_LINK, "Initializing BCM8727\n");
-	/* enable LASI */
+	/* Enable LASI */
 	bnx2x_cl45_write(bp, phy,
 			 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
 			 rx_alarm_ctrl_val);
@@ -9733,7 +9728,7 @@ static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
 				MDIO_84833_CMD_HDLR_STATUS, &val);
 		if (val == PHY84833_STATUS_CMD_OPEN_FOR_CMDS)
 			break;
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 	if (idx >= PHY84833_CMDHDLR_WAIT) {
 		DP(NETIF_MSG_LINK, "FW cmd: FW not ready.\n");
@@ -9754,7 +9749,7 @@ static int bnx2x_84833_cmd_hdlr(struct bnx2x_phy *phy,
 		if ((val == PHY84833_STATUS_CMD_COMPLETE_PASS) ||
 			(val == PHY84833_STATUS_CMD_COMPLETE_ERROR))
 			break;
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 	if ((idx >= PHY84833_CMDHDLR_WAIT) ||
 		(val == PHY84833_STATUS_CMD_COMPLETE_ERROR)) {
@@ -9924,7 +9919,7 @@ static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy,
 	/* Prevent Phy from working in EEE and advertising it */
 	rc = bnx2x_84833_cmd_hdlr(phy, params,
 		PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1);
-	if (rc != 0) {
+	if (rc) {
 		DP(NETIF_MSG_LINK, "EEE disable failed.\n");
 		return rc;
 	}
@@ -9947,7 +9942,7 @@ static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy,
 
 	rc = bnx2x_84833_cmd_hdlr(phy, params,
 		PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1);
-	if (rc != 0) {
+	if (rc) {
 		DP(NETIF_MSG_LINK, "EEE enable failed.\n");
 		return rc;
 	}
@@ -9975,7 +9970,7 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 	u16 cmd_args[PHY84833_CMDHDLR_MAX_ARGS];
 	int rc = 0;
 
-	msleep(1);
+	usleep_range(1000, 1000);
 
 	if (!(CHIP_IS_E1(bp)))
 		port = BP_PATH(bp);
@@ -10064,7 +10059,7 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 		rc = bnx2x_84833_cmd_hdlr(phy, params,
 			PHY84833_CMD_SET_EEE_MODE, cmd_args,
 			PHY84833_CMDHDLR_MAX_ARGS);
-		if (rc != 0)
+		if (rc)
 			DP(NETIF_MSG_LINK, "Cfg AutogrEEEn failed.\n");
 	}
 	if (initialize)
@@ -10108,7 +10103,7 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 			vars->eee_status &= ~SHMEM_EEE_REQUESTED_BIT;
 
 		rc = bnx2x_8483x_eee_timers(params, vars);
-		if (rc != 0) {
+		if (rc) {
 			DP(NETIF_MSG_LINK, "Failed to configure EEE timers\n");
 			bnx2x_8483x_disable_eee(phy, params, vars);
 			return rc;
@@ -10121,7 +10116,7 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy,
 			rc = bnx2x_8483x_enable_eee(phy, params, vars);
 		else
 			rc = bnx2x_8483x_disable_eee(phy, params, vars);
-		if (rc != 0) {
+		if (rc) {
 			DP(NETIF_MSG_LINK, "Failed to set EEE advertisment\n");
 			return rc;
 		}
@@ -10222,7 +10217,7 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy,
 		}
 	}
 	if (link_up) {
-		DP(NETIF_MSG_LINK, "BCM84823: link speed is %d\n",
+		DP(NETIF_MSG_LINK, "BCM848x3: link speed is %d\n",
 			   vars->line_speed);
 		bnx2x_ext_phy_resolve_fc(phy, params, vars);
 
@@ -10633,7 +10628,7 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 			MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH)
 		fc_val |= MDIO_AN_REG_ADV_PAUSE_PAUSE;
 
-	/* read all advertisement */
+	/* Read all advertisement */
 	bnx2x_cl22_read(bp, phy,
 			0x09,
 			&an_1000_val);
@@ -10670,7 +10665,7 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 			0x09,
 			&an_1000_val);
 
-	/* set 100 speed advertisement */
+	/* Set 100 speed advertisement */
 	if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
 			(phy->speed_cap_mask &
 			(PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL |
@@ -10684,7 +10679,7 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "Advertising 100M\n");
 	}
 
-	/* set 10 speed advertisement */
+	/* Set 10 speed advertisement */
 	if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
 			(phy->speed_cap_mask &
 			(PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL |
@@ -11050,7 +11045,7 @@ static u8 bnx2x_7101_read_status(struct bnx2x_phy *phy,
 	DP(NETIF_MSG_LINK, "10G-base-T PMA status 0x%x->0x%x\n",
 		   val2, val1);
 	link_up = ((val1 & 4) == 4);
-	/* if link is up print the AN outcome of the SFX7101 PHY */
+	/* If link is up print the AN outcome of the SFX7101 PHY */
 	if (link_up) {
 		bnx2x_cl45_read(bp, phy,
 				MDIO_AN_DEVAD, MDIO_AN_REG_MASTER_STATUS,
@@ -11062,7 +11057,7 @@ static u8 bnx2x_7101_read_status(struct bnx2x_phy *phy,
 		bnx2x_ext_phy_10G_an_resolve(bp, phy, vars);
 		bnx2x_ext_phy_resolve_fc(phy, params, vars);
 
-		/* read LP advertised speeds */
+		/* Read LP advertised speeds */
 		if (val2 & (1<<11))
 			vars->link_status |=
 				LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
@@ -12260,7 +12255,7 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars)
 	vars->mac_type = MAC_TYPE_NONE;
 	vars->phy_flags = 0;
 
-	/* disable attentions */
+	/* Disable attentions */
 	bnx2x_bits_dis(bp, NIG_REG_MASK_INTERRUPT_PORT0 + params->port*4,
 		       (NIG_MASK_XGXS0_LINK_STATUS |
 			NIG_MASK_XGXS0_LINK10G |
@@ -12320,7 +12315,7 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 	struct bnx2x *bp = params->bp;
 	u8 phy_index, port = params->port, clear_latch_ind = 0;
 	DP(NETIF_MSG_LINK, "Resetting the link of port %d\n", port);
-	/* disable attentions */
+	/* Disable attentions */
 	vars->link_status = 0;
 	bnx2x_update_mng(params, vars->link_status);
 	vars->eee_status &= ~(SHMEM_EEE_LP_ADV_STATUS_MASK |
@@ -12332,10 +12327,10 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 			NIG_MASK_SERDES0_LINK_STATUS |
 			NIG_MASK_MI_INT));
 
-	/* activate nig drain */
+	/* Activate nig drain */
 	REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + port*4, 1);
 
-	/* disable nig egress interface */
+	/* Disable nig egress interface */
 	if (!CHIP_IS_E3(bp)) {
 		REG_WR(bp, NIG_REG_BMAC0_OUT_EN + port*4, 0);
 		REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0);
@@ -12348,7 +12343,7 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 		bnx2x_xmac_disable(params);
 		bnx2x_umac_disable(params);
 	}
-	/* disable emac */
+	/* Disable emac */
 	if (!CHIP_IS_E3(bp))
 		REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0);
 
@@ -12356,7 +12351,7 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 	/* The PHY reset is controlled by GPIO 1
 	 * Hold it as vars low
 	 */
-	 /* clear link led */
+	 /* Clear link led */
 	bnx2x_set_mdio_clk(bp, params->chip_id, port);
 	bnx2x_set_led(params, vars, LED_MODE_OFF, 0);
 
@@ -12386,9 +12381,9 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
 		params->phy[INT_PHY].link_reset(
 			&params->phy[INT_PHY], params);
 
-	/* disable nig ingress interface */
+	/* Disable nig ingress interface */
 	if (!CHIP_IS_E3(bp)) {
-		/* reset BigMac */
+		/* Reset BigMac */
 		REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
 		       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
 		REG_WR(bp, NIG_REG_BMAC0_IN_EN + port*4, 0);
@@ -12445,7 +12440,7 @@ static int bnx2x_8073_common_init_phy(struct bnx2x *bp,
 			DP(NETIF_MSG_LINK, "populate_phy failed\n");
 			return -EINVAL;
 		}
-		/* disable attentions */
+		/* Disable attentions */
 		bnx2x_bits_dis(bp, NIG_REG_MASK_INTERRUPT_PORT0 +
 			       port_of_path*4,
 			       (NIG_MASK_XGXS0_LINK_STATUS |
@@ -12658,7 +12653,7 @@ static int bnx2x_8727_common_init_phy(struct bnx2x *bp,
 	/* Initiate PHY reset*/
 	bnx2x_set_gpio(bp, reset_gpio, MISC_REGISTERS_GPIO_OUTPUT_LOW,
 		       port);
-	msleep(1);
+	usleep_range(1000, 1000);
 	bnx2x_set_gpio(bp, reset_gpio, MISC_REGISTERS_GPIO_OUTPUT_HIGH,
 		       port);
 
@@ -12756,7 +12751,7 @@ static int bnx2x_84833_pre_init_phy(struct bnx2x *bp,
 				MDIO_PMA_REG_CTRL, &val);
 		if (!(val & (1<<15)))
 			break;
-		msleep(1);
+		usleep_range(1000, 1000);
 	}
 	if (cnt >= 1500) {
 		DP(NETIF_MSG_LINK, "84833 reset timeout\n");
@@ -12846,7 +12841,7 @@ static int bnx2x_ext_phy_common_init(struct bnx2x *bp, u32 shmem_base_path[],
 		break;
 	}
 
-	if (rc != 0)
+	if (rc)
 		netdev_err(bp->dev,  "Warning: PHY was not initialized,"
 				      " Port %d\n",
 			 0);
-- 
1.7.9.rc2

^ permalink raw reply related

* [PATCH net-next 5/6] bnx2x: 1G sfp+ module handling
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz, Yaniv Rosner
In-Reply-To: <1340182175-916-1-git-send-email-yuvalmin@broadcom.com>

Automatically lower requested link speed to 1G in case 1G SFP+
module is detected.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Yaniv Rosner <yaniv.rosner@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    |   16 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c   |  202 +++++++++++++-------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h   |   17 +-
 3 files changed, 151 insertions(+), 84 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 70c0881..819170e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -187,7 +187,8 @@ static int bnx2x_get_port_type(struct bnx2x *bp)
 	int port_type;
 	u32 phy_idx = bnx2x_get_cur_phy_idx(bp);
 	switch (bp->link_params.phy[phy_idx].media_type) {
-	case ETH_PHY_SFP_FIBER:
+	case ETH_PHY_SFPP_10G_FIBER:
+	case ETH_PHY_SFP_1G_FIBER:
 	case ETH_PHY_XFP_FIBER:
 	case ETH_PHY_KR:
 	case ETH_PHY_CX4:
@@ -220,6 +221,11 @@ static int bnx2x_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		(bp->port.supported[cfg_idx ^ 1] &
 		 (SUPPORTED_TP | SUPPORTED_FIBRE));
 	cmd->advertising = bp->port.advertising[cfg_idx];
+	if (bp->link_params.phy[bnx2x_get_cur_phy_idx(bp)].media_type ==
+	    ETH_PHY_SFP_1G_FIBER) {
+		cmd->supported &= ~(SUPPORTED_10000baseT_Full);
+		cmd->advertising &= ~(ADVERTISED_10000baseT_Full);
+	}
 
 	if ((bp->state == BNX2X_STATE_OPEN) && (bp->link_vars.link_up)) {
 		if (!(bp->flags & MF_FUNC_DIS)) {
@@ -295,7 +301,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u32 advertising, cfg_idx, old_multi_phy_config, new_multi_phy_config;
-	u32 speed;
+	u32 speed, phy_idx;
 
 	if (IS_MF_SD(bp))
 		return 0;
@@ -550,9 +556,11 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 				   "10G half not supported\n");
 				return -EINVAL;
 			}
-
+			phy_idx = bnx2x_get_cur_phy_idx(bp);
 			if (!(bp->port.supported[cfg_idx]
-			      & SUPPORTED_10000baseT_Full)) {
+			      & SUPPORTED_10000baseT_Full) ||
+			    (bp->link_params.phy[phy_idx].media_type ==
+			     ETH_PHY_SFP_1G_FIBER)) {
 				DP(BNX2X_MSG_ETHTOOL,
 				   "10G full not supported\n");
 				return -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index fc7ff06..36845f7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -4374,6 +4374,23 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy,
 	} /*params->rx_tx_asic_rst*/
 
 }
+static void bnx2x_warpcore_config_sfi(struct bnx2x_phy *phy,
+				      struct link_params *params)
+{
+	u16 lane = bnx2x_get_warpcore_lane(phy, params);
+	struct bnx2x *bp = params->bp;
+	bnx2x_warpcore_clear_regs(phy, params, lane);
+	if ((params->req_line_speed[LINK_CONFIG_IDX(INT_PHY)] ==
+	     SPEED_10000) &&
+	    (phy->media_type != ETH_PHY_SFP_1G_FIBER)) {
+		DP(NETIF_MSG_LINK, "Setting 10G SFI\n");
+		bnx2x_warpcore_set_10G_XFI(phy, params, 0);
+	} else {
+		DP(NETIF_MSG_LINK, "Setting 1G Fiber\n");
+		bnx2x_warpcore_set_sgmii_speed(phy, params, 1, 0);
+	}
+}
+
 static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 				       struct link_params *params,
 				       struct link_vars *vars)
@@ -4434,19 +4451,11 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
 			break;
 
 		case PORT_HW_CFG_NET_SERDES_IF_SFI:
-
-			bnx2x_warpcore_clear_regs(phy, params, lane);
-			if (vars->line_speed == SPEED_10000) {
-				DP(NETIF_MSG_LINK, "Setting 10G SFI\n");
-				bnx2x_warpcore_set_10G_XFI(phy, params, 0);
-			} else if (vars->line_speed == SPEED_1000) {
-				DP(NETIF_MSG_LINK, "Setting 1G Fiber\n");
-				bnx2x_warpcore_set_sgmii_speed(
-						phy, params, 1, 0);
-			}
 			/* Issue Module detection */
 			if (bnx2x_is_sfp_module_plugged(phy, params))
 				bnx2x_sfp_module_detection(phy, params);
+
+			bnx2x_warpcore_config_sfi(phy, params);
 			break;
 
 		case PORT_HW_CFG_NET_SERDES_IF_DXGXS:
@@ -6355,7 +6364,9 @@ int bnx2x_test_link(struct link_params *params, struct link_vars *vars,
 		for (phy_index = EXT_PHY1; phy_index < params->num_phys;
 		      phy_index++) {
 			serdes_phy_type = ((params->phy[phy_index].media_type ==
-					    ETH_PHY_SFP_FIBER) ||
+					    ETH_PHY_SFPP_10G_FIBER) ||
+					   (params->phy[phy_index].media_type ==
+					    ETH_PHY_SFP_1G_FIBER) ||
 					   (params->phy[phy_index].media_type ==
 					    ETH_PHY_XFP_FIBER) ||
 					   (params->phy[phy_index].media_type ==
@@ -7892,7 +7903,7 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy,
 {
 	struct bnx2x *bp = params->bp;
 	u32 sync_offset = 0, phy_idx, media_types;
-	u8 val, check_limiting_mode = 0;
+	u8 val[2], check_limiting_mode = 0;
 	*edc_mode = EDC_MODE_LIMITING;
 
 	phy->media_type = ETH_PHY_UNSPECIFIED;
@@ -7900,13 +7911,13 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy,
 	if (bnx2x_read_sfp_module_eeprom(phy,
 					 params,
 					 SFP_EEPROM_CON_TYPE_ADDR,
-					 1,
-					 &val) != 0) {
+					 2,
+					 (u8 *)val) != 0) {
 		DP(NETIF_MSG_LINK, "Failed to read from SFP+ module EEPROM\n");
 		return -EINVAL;
 	}
 
-	switch (val) {
+	switch (val[0]) {
 	case SFP_EEPROM_CON_TYPE_VAL_COPPER:
 	{
 		u8 copper_module_type;
@@ -7944,13 +7955,29 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy,
 		break;
 	}
 	case SFP_EEPROM_CON_TYPE_VAL_LC:
-		phy->media_type = ETH_PHY_SFP_FIBER;
-		DP(NETIF_MSG_LINK, "Optic module detected\n");
 		check_limiting_mode = 1;
+		if ((val[1] & (SFP_EEPROM_COMP_CODE_SR_MASK |
+			       SFP_EEPROM_COMP_CODE_LR_MASK |
+			       SFP_EEPROM_COMP_CODE_LRM_MASK)) == 0) {
+			DP(NETIF_MSG_LINK, "1G Optic module detected\n");
+			phy->media_type = ETH_PHY_SFP_1G_FIBER;
+			phy->req_line_speed = SPEED_1000;
+		} else {
+			int idx, cfg_idx = 0;
+			DP(NETIF_MSG_LINK, "10G Optic module detected\n");
+			for (idx = INT_PHY; idx < MAX_PHYS; idx++) {
+				if (params->phy[idx].type == phy->type) {
+					cfg_idx = LINK_CONFIG_IDX(idx);
+					break;
+				}
+			}
+			phy->media_type = ETH_PHY_SFPP_10G_FIBER;
+			phy->req_line_speed = params->req_line_speed[cfg_idx];
+		}
 		break;
 	default:
 		DP(NETIF_MSG_LINK, "Unable to determine module type 0x%x !!!\n",
-			 val);
+			 val[0]);
 		return -EINVAL;
 	}
 	sync_offset = params->shmem_base +
@@ -8480,14 +8507,34 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
 
 	/* Call the handling function in case module is detected */
 	if (gpio_val == 0) {
+		bnx2x_set_mdio_clk(bp, params->chip_id, params->port);
+		bnx2x_set_aer_mmd(params, phy);
+
 		bnx2x_power_sfp_module(params, phy, 1);
 		bnx2x_set_gpio_int(bp, gpio_num,
 				   MISC_REGISTERS_GPIO_INT_OUTPUT_CLR,
 				   gpio_port);
-		if (bnx2x_wait_for_sfp_module_initialized(phy, params) == 0)
+		if (bnx2x_wait_for_sfp_module_initialized(phy, params) == 0) {
 			bnx2x_sfp_module_detection(phy, params);
-		else
+			if (CHIP_IS_E3(bp)) {
+				u16 rx_tx_in_reset;
+				/* In case WC is out of reset, reconfigure the
+				 * link speed while taking into account 1G
+				 * module limitation.
+				 */
+				bnx2x_cl45_read(bp, phy,
+						MDIO_WC_DEVAD,
+						MDIO_WC_REG_DIGITAL5_MISC6,
+						&rx_tx_in_reset);
+				if (!rx_tx_in_reset) {
+					bnx2x_warpcore_reset_lane(bp, phy, 1);
+					bnx2x_warpcore_config_sfi(phy, params);
+					bnx2x_warpcore_reset_lane(bp, phy, 0);
+				}
+			}
+		} else {
 			DP(NETIF_MSG_LINK, "SFP+ module is not initialized\n");
+		}
 	} else {
 		u32 val = REG_RD(bp, params->shmem_base +
 				 offsetof(struct shmem_region, dev_info.
@@ -8939,6 +8986,63 @@ static void bnx2x_8727_hw_reset(struct bnx2x_phy *phy,
 		       MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
 }
 
+static void bnx2x_8727_config_speed(struct bnx2x_phy *phy,
+				    struct link_params *params)
+{
+	struct bnx2x *bp = params->bp;
+	u16 tmp1, val;
+	/* Set option 1G speed */
+	if ((phy->req_line_speed == SPEED_1000) ||
+	    (phy->media_type == ETH_PHY_SFP_1G_FIBER)) {
+		DP(NETIF_MSG_LINK, "Setting 1G force\n");
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x40);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, 0xD);
+		bnx2x_cl45_read(bp, phy,
+				MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, &tmp1);
+		DP(NETIF_MSG_LINK, "1.7 = 0x%x\n", tmp1);
+		/* Power down the XAUI until link is up in case of dual-media
+		 * and 1G
+		 */
+		if (DUAL_MEDIA(params)) {
+			bnx2x_cl45_read(bp, phy,
+					MDIO_PMA_DEVAD,
+					MDIO_PMA_REG_8727_PCS_GP, &val);
+			val |= (3<<10);
+			bnx2x_cl45_write(bp, phy,
+					 MDIO_PMA_DEVAD,
+					 MDIO_PMA_REG_8727_PCS_GP, val);
+		}
+	} else if ((phy->req_line_speed == SPEED_AUTO_NEG) &&
+		   ((phy->speed_cap_mask &
+		     PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) &&
+		   ((phy->speed_cap_mask &
+		      PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) !=
+		   PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
+
+		DP(NETIF_MSG_LINK, "Setting 1G clause37\n");
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL, 0);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x1300);
+	} else {
+		/* Since the 8727 has only single reset pin, need to set the 10G
+		 * registers although it is default
+		 */
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL,
+				 0x0020);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x0100);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x2040);
+		bnx2x_cl45_write(bp, phy,
+				 MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2,
+				 0x0008);
+	}
+}
+
 static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 				  struct link_params *params,
 				  struct link_vars *vars)
@@ -9008,56 +9112,7 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
 	bnx2x_cl45_read(bp, phy,
 			MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1);
 
-	/* Set option 1G speed */
-	if (phy->req_line_speed == SPEED_1000) {
-		DP(NETIF_MSG_LINK, "Setting 1G force\n");
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x40);
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, 0xD);
-		bnx2x_cl45_read(bp, phy,
-				MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, &tmp1);
-		DP(NETIF_MSG_LINK, "1.7 = 0x%x\n", tmp1);
-		/* Power down the XAUI until link is up in case of dual-media
-		 * and 1G
-		 */
-		if (DUAL_MEDIA(params)) {
-			bnx2x_cl45_read(bp, phy,
-					MDIO_PMA_DEVAD,
-					MDIO_PMA_REG_8727_PCS_GP, &val);
-			val |= (3<<10);
-			bnx2x_cl45_write(bp, phy,
-					 MDIO_PMA_DEVAD,
-					 MDIO_PMA_REG_8727_PCS_GP, val);
-		}
-	} else if ((phy->req_line_speed == SPEED_AUTO_NEG) &&
-		   ((phy->speed_cap_mask &
-		     PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) &&
-		   ((phy->speed_cap_mask &
-		      PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) !=
-		   PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
-
-		DP(NETIF_MSG_LINK, "Setting 1G clause37\n");
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL, 0);
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x1300);
-	} else {
-		/* Since the 8727 has only single reset pin, need to set the 10G
-		 * registers although it is default
-		 */
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL,
-				 0x0020);
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x0100);
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x2040);
-		bnx2x_cl45_write(bp, phy,
-				 MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2,
-				 0x0008);
-	}
-
+	bnx2x_8727_config_speed(phy, params);
 	/* Set 2-wire transfer rate of SFP+ module EEPROM
 	 * to 100Khz since some DACs(direct attached cables) do
 	 * not work at 400Khz.
@@ -9184,6 +9239,9 @@ static void bnx2x_8727_handle_mod_abs(struct bnx2x_phy *phy,
 			bnx2x_sfp_module_detection(phy, params);
 		else
 			DP(NETIF_MSG_LINK, "SFP+ module is not initialized\n");
+
+		/* Reconfigure link speed based on module type limitations */
+		bnx2x_8727_config_speed(phy, params);
 	}
 
 	DP(NETIF_MSG_LINK, "8727 RX_ALARM_STATUS 0x%x\n",
@@ -11328,7 +11386,7 @@ static struct bnx2x_phy phy_8706 = {
 			   SUPPORTED_FIBRE |
 			   SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
-	.media_type	= ETH_PHY_SFP_FIBER,
+	.media_type	= ETH_PHY_SFPP_10G_FIBER,
 	.ver_addr	= 0,
 	.req_flow_ctrl	= 0,
 	.req_line_speed	= 0,
@@ -11667,7 +11725,7 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
 					   SUPPORTED_FIBRE |
 					   SUPPORTED_Pause |
 					   SUPPORTED_Asym_Pause);
-			phy->media_type = ETH_PHY_SFP_FIBER;
+			phy->media_type = ETH_PHY_SFPP_10G_FIBER;
 			break;
 		case PORT_HW_CFG_NET_SERDES_IF_KR:
 			phy->media_type = ETH_PHY_KR;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index 7b6051b..017236b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -168,14 +168,15 @@ struct bnx2x_phy {
 	u32 supported;
 
 	u32 media_type;
-#define	ETH_PHY_UNSPECIFIED 0x0
-#define	ETH_PHY_SFP_FIBER   0x1
-#define	ETH_PHY_XFP_FIBER   0x2
-#define	ETH_PHY_DA_TWINAX   0x3
-#define	ETH_PHY_BASE_T      0x4
-#define	ETH_PHY_KR          0xf0
-#define	ETH_PHY_CX4         0xf1
-#define	ETH_PHY_NOT_PRESENT 0xff
+#define	ETH_PHY_UNSPECIFIED	0x0
+#define	ETH_PHY_SFPP_10G_FIBER	0x1
+#define	ETH_PHY_XFP_FIBER		0x2
+#define	ETH_PHY_DA_TWINAX		0x3
+#define	ETH_PHY_BASE_T		0x4
+#define	ETH_PHY_SFP_1G_FIBER	0x5
+#define	ETH_PHY_KR		0xf0
+#define	ETH_PHY_CX4		0xf1
+#define	ETH_PHY_NOT_PRESENT	0xff
 
 	/* The address in which version is located*/
 	u32 ver_addr;
-- 
1.7.9.rc2

^ permalink raw reply related

* [PATCH net-next 6/6] bnx2x: link module eeprom
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz, Yaniv Rosner
In-Reply-To: <1340182175-916-1-git-send-email-yuvalmin@broadcom.com>

Add the ethtool functionality of accessing optic modules'
information and eeprom to the bnx2x driver.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Yaniv Rosner <yaniv.rosner@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    |   63 +++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c   |    9 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h   |    1 +
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 819170e..0f158b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1160,6 +1160,65 @@ static int bnx2x_get_eeprom(struct net_device *dev,
 	return rc;
 }
 
+static int bnx2x_get_module_eeprom(struct net_device *dev,
+				   struct ethtool_eeprom *ee,
+				   u8 *data)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+	int rc = 0, phy_idx;
+	u8 *user_data = data;
+	int remaining_len = ee->len, xfer_size;
+	unsigned int page_off = ee->offset;
+
+	if (!netif_running(dev)) {
+		DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM,
+		   "cannot access eeprom when the interface is down\n");
+		return -EAGAIN;
+	}
+
+	phy_idx = bnx2x_get_cur_phy_idx(bp);
+	bnx2x_acquire_phy_lock(bp);
+	while (!rc && remaining_len > 0) {
+		xfer_size = (remaining_len > SFP_EEPROM_PAGE_SIZE) ?
+			SFP_EEPROM_PAGE_SIZE : remaining_len;
+		rc = bnx2x_read_sfp_module_eeprom(&bp->link_params.phy[phy_idx],
+						  &bp->link_params,
+						  page_off,
+						  xfer_size,
+						  user_data);
+		remaining_len -= xfer_size;
+		user_data += xfer_size;
+		page_off += xfer_size;
+	}
+
+	bnx2x_release_phy_lock(bp);
+	return rc;
+}
+
+static int bnx2x_get_module_info(struct net_device *dev,
+				 struct ethtool_modinfo *modinfo)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+	int phy_idx;
+	if (!netif_running(dev)) {
+		DP(BNX2X_MSG_ETHTOOL  | BNX2X_MSG_NVM,
+		   "cannot access eeprom when the interface is down\n");
+		return -EAGAIN;
+	}
+
+	phy_idx = bnx2x_get_cur_phy_idx(bp);
+	switch (bp->link_params.phy[phy_idx].media_type) {
+	case ETH_PHY_SFPP_10G_FIBER:
+	case ETH_PHY_SFP_1G_FIBER:
+	case ETH_PHY_DA_TWINAX:
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int bnx2x_nvram_write_dword(struct bnx2x *bp, u32 offset, u32 val,
 				   u32 cmd_flags)
 {
@@ -2915,7 +2974,9 @@ static const struct ethtool_ops bnx2x_ethtool_ops = {
 	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
-	.get_eee		= bnx2x_get_eee,
+	.get_module_info	= bnx2x_get_module_info,
+	.get_module_eeprom	= bnx2x_get_module_eeprom,
+	.get_eee		= bnx2x_get_eee,
 	.set_eee		= bnx2x_set_eee,
 };
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 36845f7..8803f77 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -7696,7 +7696,7 @@ static int bnx2x_8726_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 val = 0;
 	u16 i;
-	if (byte_cnt > 16) {
+	if (byte_cnt > SFP_EEPROM_PAGE_SIZE) {
 		DP(NETIF_MSG_LINK,
 		   "Reading from eeprom is limited to 0xf\n");
 		return -EINVAL;
@@ -7765,7 +7765,8 @@ static int bnx2x_warpcore_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 	u32 data_array[4];
 	u16 addr32;
 	struct bnx2x *bp = params->bp;
-	if (byte_cnt > 16) {
+
+	if (byte_cnt > SFP_EEPROM_PAGE_SIZE) {
 		DP(NETIF_MSG_LINK,
 		   "Reading from eeprom is limited to 16 bytes\n");
 		return -EINVAL;
@@ -7795,7 +7796,7 @@ static int bnx2x_8727_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 	struct bnx2x *bp = params->bp;
 	u16 val, i;
 
-	if (byte_cnt > 16) {
+	if (byte_cnt > SFP_EEPROM_PAGE_SIZE) {
 		DP(NETIF_MSG_LINK,
 		   "Reading from eeprom is limited to 0xf\n");
 		return -EINVAL;
@@ -7878,7 +7879,7 @@ int bnx2x_read_sfp_module_eeprom(struct bnx2x_phy *phy,
 				 struct link_params *params, u16 addr,
 				 u8 byte_cnt, u8 *o_buf)
 {
-	int rc = -EINVAL;
+	int rc = -EOPNOTSUPP;
 	switch (phy->type) {
 	case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8726:
 		rc = bnx2x_8726_read_sfp_module_eeprom(phy, params, addr,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
index 017236b..c05f9d9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h
@@ -41,6 +41,7 @@
 #define SPEED_AUTO_NEG		0
 #define SPEED_20000		20000
 
+#define SFP_EEPROM_PAGE_SIZE			16
 #define SFP_EEPROM_VENDOR_NAME_ADDR		0x14
 #define SFP_EEPROM_VENDOR_NAME_SIZE		16
 #define SFP_EEPROM_VENDOR_OUI_ADDR		0x25
-- 
1.7.9.rc2

^ permalink raw reply related

* RE
From: Mrs Donna Kwok @ 2012-06-20  8:35 UTC (permalink / raw)


Hello,
Please I have a transaction for you, if interested contact me.
Donna Kwok.

^ permalink raw reply

* [PATCH net-next 3/6] bnx2x: treat 0 speed as link down (copper)
From: Yuval Mintz @ 2012-06-20  8:49 UTC (permalink / raw)
  To: davem, netdev; +Cc: eilong, Yuval Mintz, Yaniv Rosner
In-Reply-To: <1340182175-916-1-git-send-email-yuvalmin@broadcom.com>

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Yaniv Rosner <yaniv.rosner@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c |   22 ++++++++++++----------
 1 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 121275e..17ddb9f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -10179,17 +10179,19 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy,
 		DP(NETIF_MSG_LINK, "Legacy speed status = 0x%x\n",
 		   legacy_status);
 		link_up = ((legacy_status & (1<<11)) == (1<<11));
-		if (link_up) {
-			legacy_speed = (legacy_status & (3<<9));
-			if (legacy_speed == (0<<9))
-				vars->line_speed = SPEED_10;
-			else if (legacy_speed == (1<<9))
-				vars->line_speed = SPEED_100;
-			else if (legacy_speed == (2<<9))
-				vars->line_speed = SPEED_1000;
-			else /* Should not happen */
-				vars->line_speed = 0;
+		legacy_speed = (legacy_status & (3<<9));
+		if (legacy_speed == (0<<9))
+			vars->line_speed = SPEED_10;
+		else if (legacy_speed == (1<<9))
+			vars->line_speed = SPEED_100;
+		else if (legacy_speed == (2<<9))
+			vars->line_speed = SPEED_1000;
+		else { /* Should not happen: Treat as link down */
+			vars->line_speed = 0;
+			link_up = 0;
+		}
 
+		if (link_up) {
 			if (legacy_status & (1<<8))
 				vars->duplex = DUPLEX_FULL;
 			else
-- 
1.7.9.rc2

^ permalink raw reply related

* Re: [RFC net-next 07/14] Fix intel/ixgbe
From: Eric Dumazet @ 2012-06-20  8:55 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: Yuval Mintz, netdev, davem, eilong, Jeff Kirsher
In-Reply-To: <4FE0A0CD.5050909@intel.com>

On Tue, 2012-06-19 at 08:54 -0700, Alexander Duyck wrote:

> This patch doesn't limit the number of queues.  It is limiting the
> number of interrupts.  The two are not directly related as we can
> support multiple queues per interrupt.
> 
> Also this change assumes we are only using receive side scaling.  We
> have other features such as DCB, FCoE, and Flow Director which require
> additional queues.
> 

Yet, it would be good if ixgbe doesnt allocate 36 queues on a 4 cpu
machine.

"tc -s class show dev eth0" output is full of not used classes.

^ permalink raw reply

* Re: [PATCH] usbnet: Activate halt interrupt endpoint before re-submit URB
From: Oliver Neukum @ 2012-06-20  8:58 UTC (permalink / raw)
  To: Ming Lei; +Cc: Huajun Li, David Miller, stern, linux-usb, netdev
In-Reply-To: <CACVXFVPDV5v3gW5wz44kBsXm0KreHJnkvoxEPZhnLVoaTDw-Yw@mail.gmail.com>

Am Mittwoch, 20. Juni 2012, 10:07:55 schrieb Ming Lei:
> On Wed, Jun 20, 2012 at 4:00 PM, Oliver Neukum <oneukum@suse.de> wrote:
> >
> > Very well, on second thought, this patch makes sense.
> > Could you resend and I'll ack?
> 
> BTW, maybe it is better to add below
> 
>     usbnet_defer_kevent(dev, EVENT_STS_HALT);
> 
> for -EPIPE returned from usb_urb_submit if it will be resent.

Why? If it failed once it'll probably also fail the next time.
In that case we'd need to do something more intrusive
like resetting the device, but that cannot be done well
in the generic usbnet part.

	Regards
		Oliver

^ permalink raw reply

* Re: linux-next: build failure after merge of the net-next tree
From: Federico Vaga @ 2012-06-20  9:03 UTC (permalink / raw)
  To: Marc Kleine-Budde
  Cc: Bhupesh SHARMA, David Miller, sfr@canb.auug.org.au,
	netdev@vger.kernel.org, linux-next@vger.kernel.org,
	linux-kernel@vger.kernel.org, Giancarlo ASNAGHI,
	wg@grandegger.com
In-Reply-To: <4FE18A5E.204@pengutronix.de>

> I think the driver is currently used/tested only on ARM only, so what
> about adding a HAVE_CLK dependency, until Viresh Kumar's patches are
> available in net-next. Later we can remove it.

This driver is tested only on an x86 with AMBA peripherals on PCI. We 
are developing the clk side for this new platform. 

-- 
Federico Vaga

^ permalink raw reply

* Re: [PATCH] net: Update netdev_alloc_frag to work more efficiently with TCP and GRO
From: David Miller @ 2012-06-20  9:04 UTC (permalink / raw)
  To: eric.dumazet; +Cc: alexander.h.duyck, netdev, jeffrey.t.kirsher
In-Reply-To: <1340181847.4604.840.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 20 Jun 2012 10:44:07 +0200

> (put_page assumes order-0 page)

But it certainly can handle compound pages, I thought?

^ permalink raw reply

* Re: linux-next: build failure after merge of the net-next tree
From: David Miller @ 2012-06-20  9:06 UTC (permalink / raw)
  To: federico.vaga
  Cc: mkl, bhupesh.sharma, sfr, netdev, linux-next, linux-kernel,
	giancarlo.asnaghi, wg
In-Reply-To: <3170667.1PZa2330KE@harkonnen>

From: Federico Vaga <federico.vaga@gmail.com>
Date: Wed, 20 Jun 2012 11:03:08 +0200

>> I think the driver is currently used/tested only on ARM only, so what
>> about adding a HAVE_CLK dependency, until Viresh Kumar's patches are
>> available in net-next. Later we can remove it.
> 
> This driver is tested only on an x86 with AMBA peripherals on PCI. We 
> are developing the clk side for this new platform. 

Then the driver should NEVER have been submitted without the
required infrastructure in place first.

If what you say is true, I'm extremely pissed off about this now.

This submission was a complete cluster fuck, and it means the person
who submitted this to me didn't test the damn build even.

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Stefan Priebe - Profihost AG @ 2012-06-20  9:06 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev List
In-Reply-To: <1340181676.4604.838.camel@edumazet-glaptop>

Am 20.06.2012 10:41, schrieb Eric Dumazet:
> On Wed, 2012-06-20 at 10:21 +0200, Stefan Priebe - Profihost AG wrote:
>> Am 20.06.2012 09:22, schrieb Eric Dumazet:
> Yes, you already told that in subject line.
>
> single tcp flow ?
I use iperf - i think it uses just a single tcp flow. But i'm not sure.

> You seem to have a switch or something that drops packets in this case.
> You could try to rate limit to 9Gb/s and see if it is better.
Sadly i can't rate limit to 9Gbit/s on the switch.

> Here, I roughly have same bandwidth with tcp_timestamps on or off, with
> ixgbe cards and net-next kernels.
Mhm strange. Do you have any vague idea what could cause this? Any wrong 
reordering of the packets without tcp_timestamps?

Stefan

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Stefan Priebe - Profihost AG @ 2012-06-20  9:12 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev List
In-Reply-To: <4FE192A1.6000000@profihost.ag>

Am 20.06.2012 11:06, schrieb Stefan Priebe - Profihost AG:
>> You seem to have a switch or something that drops packets in this case.
>> You could try to rate limit to 9Gb/s and see if it is better.
> Sadly i can't rate limit to 9Gbit/s on the switch.
>
>> Here, I roughly have same bandwidth with tcp_timestamps on or off, with
>> ixgbe cards and net-next kernels.
> Mhm strange. Do you have any vague idea what could cause this? Any wrong
> reordering of the packets without tcp_timestamps?

I've now done another test without the switch. So both systems where 
direct attached and still the same. Without tcp_timstamps speed drops to 
2-4Gbit/s.

Stefan

^ permalink raw reply

* Re: [PATCH] net: Update netdev_alloc_frag to work more efficiently with TCP and GRO
From: Eric Dumazet @ 2012-06-20  9:14 UTC (permalink / raw)
  To: David Miller; +Cc: alexander.h.duyck, netdev, jeffrey.t.kirsher
In-Reply-To: <20120620.020401.1089501454050120720.davem@davemloft.net>

On Wed, 2012-06-20 at 02:04 -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Wed, 20 Jun 2012 10:44:07 +0200
> 
> > (put_page assumes order-0 page)
> 
> But it certainly can handle compound pages, I thought?

Yes, I forgot the GFP_COMP ;)

Oh well, time for a coffee

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Eric Dumazet @ 2012-06-20  9:16 UTC (permalink / raw)
  To: Stefan Priebe - Profihost AG; +Cc: Linux Netdev List
In-Reply-To: <4FE192A1.6000000@profihost.ag>

On Wed, 2012-06-20 at 11:06 +0200, Stefan Priebe - Profihost AG wrote:
> Am 20.06.2012 10:41, schrieb Eric Dumazet:
> > On Wed, 2012-06-20 at 10:21 +0200, Stefan Priebe - Profihost AG wrote:
> >> Am 20.06.2012 09:22, schrieb Eric Dumazet:
> > Yes, you already told that in subject line.
> >
> > single tcp flow ?
> I use iperf - i think it uses just a single tcp flow. But i'm not sure.
> 
> > You seem to have a switch or something that drops packets in this case.
> > You could try to rate limit to 9Gb/s and see if it is better.
> Sadly i can't rate limit to 9Gbit/s on the switch.

I was suggesting rate limiting on your linux sender machine, just to
verify if its indeed a problem on the path.

Its a matter of a "tc qdisc ..." command ;)

Or maybe iperf has an option for rate limiting.

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Eric Dumazet @ 2012-06-20  9:17 UTC (permalink / raw)
  To: Stefan Priebe - Profihost AG; +Cc: Linux Netdev List
In-Reply-To: <4FE193FA.5030802@profihost.ag>

On Wed, 2012-06-20 at 11:12 +0200, Stefan Priebe - Profihost AG wrote:
> Am 20.06.2012 11:06, schrieb Stefan Priebe - Profihost AG:
> >> You seem to have a switch or something that drops packets in this case.
> >> You could try to rate limit to 9Gb/s and see if it is better.
> > Sadly i can't rate limit to 9Gbit/s on the switch.
> >
> >> Here, I roughly have same bandwidth with tcp_timestamps on or off, with
> >> ixgbe cards and net-next kernels.
> > Mhm strange. Do you have any vague idea what could cause this? Any wrong
> > reordering of the packets without tcp_timestamps?
> 
> I've now done another test without the switch. So both systems where 
> direct attached and still the same. Without tcp_timstamps speed drops to 
> 2-4Gbit/s.
> 
> Stefan


If you exchange sender/receiver role between linux kernel versions, is
it the same problem ?

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Stefan Priebe - Profihost AG @ 2012-06-20  9:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev List
In-Reply-To: <1340183854.4604.847.camel@edumazet-glaptop>

Am 20.06.2012 11:17, schrieb Eric Dumazet:
> On Wed, 2012-06-20 at 11:12 +0200, Stefan Priebe - Profihost AG wrote:
>> Am 20.06.2012 11:06, schrieb Stefan Priebe - Profihost AG:
>>>> You seem to have a switch or something that drops packets in this case.
>>>> You could try to rate limit to 9Gb/s and see if it is better.
>>> Sadly i can't rate limit to 9Gbit/s on the switch.

> If you exchange sender/receiver role between linux kernel versions, is
> it the same problem ?
I'm testing in both directions. So both are sending and receiving.

I've now made tests with only one sending an the other receiving.

When server B is the sender i get 4Gbit/s. When server A is the sender i 
get full 9,9Gbit/s.

Stefan

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Eric Dumazet @ 2012-06-20  9:28 UTC (permalink / raw)
  To: Stefan Priebe - Profihost AG; +Cc: Linux Netdev List
In-Reply-To: <4FE196F4.3040900@profihost.ag>

On Wed, 2012-06-20 at 11:25 +0200, Stefan Priebe - Profihost AG wrote:
> Am 20.06.2012 11:17, schrieb Eric Dumazet:
> > On Wed, 2012-06-20 at 11:12 +0200, Stefan Priebe - Profihost AG wrote:
> >> Am 20.06.2012 11:06, schrieb Stefan Priebe - Profihost AG:
> >>>> You seem to have a switch or something that drops packets in this case.
> >>>> You could try to rate limit to 9Gb/s and see if it is better.
> >>> Sadly i can't rate limit to 9Gbit/s on the switch.
> 
> > If you exchange sender/receiver role between linux kernel versions, is
> > it the same problem ?
> I'm testing in both directions. So both are sending and receiving.
> 
> I've now made tests with only one sending an the other receiving.
> 
> When server B is the sender i get 4Gbit/s. When server A is the sender i 
> get full 9,9Gbit/s.
> 
> Stefan

To rule out bad hardware, could you try a 3.5-rc3 kernel on B ?

^ permalink raw reply

* Re: [PATCH v3] ipv4: Early TCP socket demux.
From: David Miller @ 2012-06-20  9:30 UTC (permalink / raw)
  To: ja; +Cc: netdev
In-Reply-To: <20120620.005753.545788933895952782.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Wed, 20 Jun 2012 00:57:53 -0700 (PDT)

> From: Julian Anastasov <ja@ssi.bg>
> Date: Wed, 20 Jun 2012 10:00:37 +0300 (EEST)
> 
>> 			if (skb->dev != dst->dev)
>> 				dst = NULL;
> 
> That makes the most sense.

Doesn't work, dst->dev is &net->loopback_dev for these locally
destined input routes.

We have to instead check rt->rt_iif or similar.

^ permalink raw reply

* Re: 10GBE performance drop with net.ipv4.tcp_timestamps=0
From: Stefan Priebe - Profihost AG @ 2012-06-20  9:33 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Linux Netdev List
In-Reply-To: <1340184528.4604.849.camel@edumazet-glaptop>

Am 20.06.2012 11:28, schrieb Eric Dumazet:
> On Wed, 2012-06-20 at 11:25 +0200, Stefan Priebe - Profihost AG wrote:
>> Am 20.06.2012 11:17, schrieb Eric Dumazet:
>>> On Wed, 2012-06-20 at 11:12 +0200, Stefan Priebe - Profihost AG wrote:
>>>> Am 20.06.2012 11:06, schrieb Stefan Priebe - Profihost AG:
>>>>>> You seem to have a switch or something that drops packets in this case.
>>>>>> You could try to rate limit to 9Gb/s and see if it is better.
>>>>> Sadly i can't rate limit to 9Gbit/s on the switch.
>>
>>> If you exchange sender/receiver role between linux kernel versions, is
>>> it the same problem ?
>> I'm testing in both directions. So both are sending and receiving.
>>
>> I've now made tests with only one sending an the other receiving.
>>
>> When server B is the sender i get 4Gbit/s. When server A is the sender i
>> get full 9,9Gbit/s.
>>
> To rule out bad hardware, could you try a 3.5-rc3 kernel on B ?

Sure. In that case i get 4Gbit/s in both variants. I also tried two 
other different machines same result.

Stefan

^ permalink raw reply

* [PATCH 00/17] Swap-over-NBD without deadlocking V12
From: Mel Gorman @ 2012-06-20  9:35 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux-MM, Linux-Netdev, LKML, David Miller, Neil Brown,
	Peter Zijlstra, Mike Christie, Eric B Munson, Mel Gorman

Changelog since V11
  o Rebase to 3.5-rc3
  o Correct order of page flag free				      (sebastian)

Changelog since V10
  o Rebase to 3.4-rc5
  o Coding style fixups						      (davem)
  o API consistency						      (davem)
  o Rename sk_allocation to sk_gfp_atomic and use only when necessary (davem)
  o Use static branches for sk_memalloc_socks			      (davem)
  o Use static branch checks in fast paths			      (davem)
  o Document concerns about PF_MEMALLOC leaking flags		      (davem)
  o Locking fix in slab						      (mel)

Changelog since V9
  o Rebase to 3.4-rc5
  o Clarify comment on why PF_MEMALLOC is cleared in softirq handling (akpm)
  o Only set page->pfmemalloc if ALLOC_NO_WATERMARKS was required     (rientjes)

Changelog since V8
  o Rebase to 3.4-rc2
  o Use page flag instead of slab fields to keep structures the same size
  o Properly detect allocations from softirq context that use PF_MEMALLOC
  o Ensure kswapd does not sleep while processes are throttled
  o Do not accidentally throttle !_GFP_FS processes indefinitely

Changelog since V7
  o Rebase to 3.3-rc2
  o Take greater care propagating page->pfmemalloc to skb
  o Propagate pfmemalloc from netdev_alloc_page to skb where possible
  o Release RCU lock properly on preempt kernel

Changelog since V6
  o Rebase to 3.1-rc8
  o Use wake_up instead of wake_up_interruptible()
  o Do not throttle kernel threads
  o Avoid a potential race between kswapd going to sleep and processes being
    throttled

Changelog since V5
  o Rebase to 3.1-rc5

Changelog since V4
  o Update comment clarifying what protocols can be used		(Michal)
  o Rebase to 3.0-rc3

Changelog since V3
  o Propogate pfmemalloc from packet fragment pages to skb		(Neil)
  o Rebase to 3.0-rc2

Changelog since V2
  o Document that __GFP_NOMEMALLOC overrides __GFP_MEMALLOC		(Neil)
  o Use wait_event_interruptible					(Neil)
  o Use !! when casting to bool to avoid any possibilitity of type
    truncation								(Neil)
  o Nicer logic when using skb_pfmemalloc_protocol			(Neil)

Changelog since V1
  o Rebase on top of mmotm
  o Use atomic_t for memalloc_socks		(David Miller)
  o Remove use of sk_memalloc_socks in vmscan	(Neil Brown)
  o Check throttle within prepare_to_wait	(Neil Brown)
  o Add statistics on throttling instead of printk

When a user or administrator requires swap for their application, they
create a swap partition and file, format it with mkswap and activate it
with swapon. Swap over the network is considered as an option in diskless
systems. The two likely scenarios are when blade servers are used as part
of a cluster where the form factor or maintenance costs do not allow the
use of disks and thin clients.

The Linux Terminal Server Project recommends the use of the
Network Block Device (NBD) for swap according to the manual at
https://sourceforge.net/projects/ltsp/files/Docs-Admin-Guide/LTSPManual.pdf/download
There is also documentation and tutorials on how to setup swap over NBD
at places like https://help.ubuntu.com/community/UbuntuLTSP/EnableNBDSWAP
The nbd-client also documents the use of NBD as swap. Despite this, the
fact is that a machine using NBD for swap can deadlock within minutes if
swap is used intensively. This patch series addresses the problem.

The core issue is that network block devices do not use mempools like
normal block devices do. As the host cannot control where they receive
packets from, they cannot reliably work out in advance how much memory
they might need. Some years ago, Peter Zijlstra developed a series of
patches that supported swap over an NFS that at least one distribution
is carrying within their kernels. This patch series borrows very heavily
from Peter's work to support swapping over NBD as a pre-requisite to
supporting swap-over-NFS. The bulk of the complexity is concerned with
preserving memory that is allocated from the PFMEMALLOC reserves for use
by the network layer which is needed for both NBD and NFS.

Patch 1 adds knowledge of the PFMEMALLOC reserves to SLAB and SLUB to
	preserve access to pages allocated under low memory situations
	to callers that are freeing memory.

Patch 2 introduces __GFP_MEMALLOC to allow access to the PFMEMALLOC
	reserves without setting PFMEMALLOC.

Patch 3 opens the possibility for softirqs to use PFMEMALLOC reserves
	for later use by network packet processing.

Patch 4 ignores memory policies when ALLOC_NO_WATERMARKS is set.

Patch 5 only sets page->pfmemalloc when ALLOC_NO_WATERMARKS was required

Patches 6-13 allows network processing to use PFMEMALLOC reserves when
	the socket has been marked as being used by the VM to clean pages. If
	packets are received and stored in pages that were allocated under
	low-memory situations and are unrelated to the VM, the packets
	are dropped.

	Patch 11 reintroduces __skb_alloc_page which the networking
	folk may object to but is needed in some cases to propogate
	pfmemalloc from a newly allocated page to an skb. If there is a
	strong objection, this patch can be dropped with the impact being
	that swap-over-network will be slower in some cases but it should
	not fail.

Patch 14 is a micro-optimisation to avoid a function call in the
	common case.

Patch 15 tags NBD sockets as being SOCK_MEMALLOC so they can use
	PFMEMALLOC if necessary.

Patch 16 notes that it is still possible for the PFMEMALLOC reserve
	to be depleted. To prevent this, direct reclaimers get throttled on
	a waitqueue if 50% of the PFMEMALLOC reserves are depleted.  It is
	expected that kswapd and the direct reclaimers already running
	will clean enough pages for the low watermark to be reached and
	the throttled processes are woken up.

Patch 17 adds a statistic to track how often processes get throttled

Some basic performance testing was run using kernel builds, netperf
on loopback for UDP and TCP, hackbench (pipes and sockets), iozone
and sysbench. Each of them were expected to use the sl*b allocators
reasonably heavily but there did not appear to be significant
performance variances.

For testing swap-over-NBD, a machine was booted with 2G of RAM with a
swapfile backed by NBD. 8*NUM_CPU processes were started that create
anonymous memory mappings and read them linearly in a loop. The total
size of the mappings were 4*PHYSICAL_MEMORY to use swap heavily under
memory pressure.

Without the patches and using SLUB, the machine locks up within minutes and
runs to completion with them applied. With SLAB, the story is different
as an unpatched kernel run to completion. However, the patched kernel
completed the test 45% faster.

MICRO
                                         3.5.0-rc2 3.5.0-rc2
					 vanilla     swapnbd
Unrecognised test vmscan-anon-mmap-write
MMTests Statistics: duration
Sys Time Running Test (seconds)             197.80    173.07
User+Sys Time Running Test (seconds)        206.96    182.03
Total Elapsed Time (seconds)               3240.70   1762.09


 drivers/block/nbd.c                               |    6 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c          |    2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c        |    2 +-
 drivers/net/ethernet/intel/igb/igb_main.c         |    2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |    2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |    3 +-
 drivers/net/usb/cdc-phonet.c                      |    2 +-
 drivers/usb/gadget/f_phonet.c                     |    2 +-
 include/linux/gfp.h                               |   13 +-
 include/linux/mm_types.h                          |    9 +
 include/linux/mmzone.h                            |    1 +
 include/linux/page-flags.h                        |   28 +++
 include/linux/sched.h                             |    7 +
 include/linux/skbuff.h                            |   80 +++++++-
 include/linux/vm_event_item.h                     |    1 +
 include/net/sock.h                                |   26 ++-
 include/trace/events/gfpflags.h                   |    1 +
 kernel/softirq.c                                  |    9 +
 mm/page_alloc.c                                   |   46 ++++-
 mm/slab.c                                         |  216 +++++++++++++++++++--
 mm/slub.c                                         |   28 ++-
 mm/vmscan.c                                       |  131 ++++++++++++-
 mm/vmstat.c                                       |    1 +
 net/caif/caif_socket.c                            |    2 +-
 net/core/dev.c                                    |   53 ++++-
 net/core/filter.c                                 |    8 +
 net/core/skbuff.c                                 |  119 +++++++++---
 net/core/sock.c                                   |   56 +++++-
 net/ipv4/tcp_input.c                              |   12 +-
 net/ipv4/tcp_output.c                             |    9 +-
 net/ipv6/tcp_ipv6.c                               |    8 +-
 net/sctp/ulpevent.c                               |    2 +-
 32 files changed, 787 insertions(+), 100 deletions(-)

-- 
1.7.9.2

^ permalink raw reply

* [PATCH 01/17] mm: sl[au]b: Add knowledge of PFMEMALLOC reserve pages
From: Mel Gorman @ 2012-06-20  9:35 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux-MM, Linux-Netdev, LKML, David Miller, Neil Brown,
	Peter Zijlstra, Mike Christie, Eric B Munson, Mel Gorman
In-Reply-To: <1340184920-22288-1-git-send-email-mgorman@suse.de>

Allocations of pages below the min watermark run a risk of the
machine hanging due to a lack of memory. To prevent this, only
callers who have PF_MEMALLOC or TIF_MEMDIE set and are not processing
an interrupt are allowed to allocate with ALLOC_NO_WATERMARKS. Once
they are allocated to a slab though, nothing prevents other callers
consuming free objects within those slabs. This patch limits access
to slab pages that were alloced from the PFMEMALLOC reserves.

When this patch is applied, pages allocated from below the low watermark are
returned with page->pfmemalloc set and it is up to the caller to determine
how the page should be protected. SLAB restricts access to any page with
page->pfmemalloc set to callers which are known to able to access the
PFMEMALLOC reserve. If one is not available, an attempt is made to allocate
a new page rather than use a reserve. SLUB is a bit more relaxed in that
it only records if the current per-CPU page was allocated from PFMEMALLOC
reserve and uses another partial slab if the caller does not have the
necessary GFP or process flags. This was found to be sufficient in tests
to avoid hangs due to SLUB generally maintaining smaller lists than SLAB.

In low-memory conditions it does mean that !PFMEMALLOC allocators
can fail a slab allocation even though free objects are available
because they are being preserved for callers that are freeing pages.

[a.p.zijlstra@chello.nl: Original implementation]
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm_types.h   |    9 +++
 include/linux/page-flags.h |   28 +++++++
 mm/internal.h              |    3 +
 mm/page_alloc.c            |   27 +++++--
 mm/slab.c                  |  192 +++++++++++++++++++++++++++++++++++++++-----
 mm/slub.c                  |   27 ++++++-
 6 files changed, 261 insertions(+), 25 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index dad95bd..4bcc5b9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -54,6 +54,15 @@ struct page {
 		union {
 			pgoff_t index;		/* Our offset within mapping. */
 			void *freelist;		/* slub first free object */
+			bool pfmemalloc;	/* If set by the page allocator,
+						 * ALLOC_PFMEMALLOC was set
+						 * and the low watermark was not
+						 * met implying that the system
+						 * is under some pressure. The
+						 * caller should try ensure
+						 * this page is only used to
+						 * free other pages.
+						 */
 		};
 
 		union {
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c88d2a9..e66eb0d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -453,6 +453,34 @@ static inline int PageTransTail(struct page *page)
 }
 #endif
 
+/*
+ * If network-based swap is enabled, sl*b must keep track of whether pages
+ * were allocated from pfmemalloc reserves.
+ */
+static inline int PageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	return PageActive(page);
+}
+
+static inline void SetPageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	SetPageActive(page);
+}
+
+static inline void __ClearPageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	__ClearPageActive(page);
+}
+
+static inline void ClearPageSlabPfmemalloc(struct page *page)
+{
+	VM_BUG_ON(!PageSlab(page));
+	ClearPageActive(page);
+}
+
 #ifdef CONFIG_MMU
 #define __PG_MLOCKED		(1 << PG_mlocked)
 #else
diff --git a/mm/internal.h b/mm/internal.h
index 2ba87fb..2d0dd52 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -273,6 +273,9 @@ static inline struct page *mem_map_next(struct page *iter,
 #define __paginginit __init
 #endif
 
+/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
+bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
+
 /* Memory initialisation debug and verification */
 enum mminit_level {
 	MMINIT_WARNING,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4403009..a259384 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1507,6 +1507,7 @@ failed:
 #define ALLOC_HARDER		0x10 /* try to alloc harder */
 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
+#define ALLOC_PFMEMALLOC	0x80 /* Caller has PF_MEMALLOC set */
 
 #ifdef CONFIG_FAIL_PAGE_ALLOC
 
@@ -2264,16 +2265,22 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 	} else if (unlikely(rt_task(current)) && !in_interrupt())
 		alloc_flags |= ALLOC_HARDER;
 
-	if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
-		if (!in_interrupt() &&
-		    ((current->flags & PF_MEMALLOC) ||
-		     unlikely(test_thread_flag(TIF_MEMDIE))))
+	if ((current->flags & PF_MEMALLOC) ||
+			unlikely(test_thread_flag(TIF_MEMDIE))) {
+		alloc_flags |= ALLOC_PFMEMALLOC;
+
+		if (likely(!(gfp_mask & __GFP_NOMEMALLOC)) && !in_interrupt())
 			alloc_flags |= ALLOC_NO_WATERMARKS;
 	}
 
 	return alloc_flags;
 }
 
+bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
+{
+	return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_PFMEMALLOC);
+}
+
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
@@ -2461,10 +2468,18 @@ nopage:
 	warn_alloc_failed(gfp_mask, order, NULL);
 	return page;
 got_pg:
+	/*
+	 * page->pfmemalloc is set when the caller had PFMEMALLOC set or is
+	 * been OOM killed. The expectation is that the caller is taking
+	 * steps that will free more memory. The caller should avoid the
+	 * page being used for !PFMEMALLOC purposes.
+	 */
+	page->pfmemalloc = !!(alloc_flags & ALLOC_PFMEMALLOC);
+
 	if (kmemcheck_enabled)
 		kmemcheck_pagealloc_alloc(page, order, gfp_mask);
-	return page;
 
+	return page;
 }
 
 /*
@@ -2515,6 +2530,8 @@ retry_cpuset:
 		page = __alloc_pages_slowpath(gfp_mask, order,
 				zonelist, high_zoneidx, nodemask,
 				preferred_zone, migratetype);
+	else
+		page->pfmemalloc = false;
 
 	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 
diff --git a/mm/slab.c b/mm/slab.c
index e901a36..b190cac 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -123,6 +123,8 @@
 
 #include <trace/events/kmem.h>
 
+#include	"internal.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -151,6 +153,12 @@
 #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
 #endif
 
+/*
+ * true if a page was allocated from pfmemalloc reserves for network-based
+ * swap
+ */
+static bool pfmemalloc_active __read_mostly;
+
 /* Legal flag mask for kmem_cache_create(). */
 #if DEBUG
 # define CREATE_MASK	(SLAB_RED_ZONE | \
@@ -256,9 +264,30 @@ struct array_cache {
 			 * Must have this definition in here for the proper
 			 * alignment of array_cache. Also simplifies accessing
 			 * the entries.
+			 *
+			 * Entries should not be directly dereferenced as
+			 * entries belonging to slabs marked pfmemalloc will
+			 * have the lower bits set SLAB_OBJ_PFMEMALLOC
 			 */
 };
 
+#define SLAB_OBJ_PFMEMALLOC	1
+static inline bool is_obj_pfmemalloc(void *objp)
+{
+	return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
+}
+
+static inline void set_obj_pfmemalloc(void **objp)
+{
+	*objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
+	return;
+}
+
+static inline void clear_obj_pfmemalloc(void **objp)
+{
+	*objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
+}
+
 /*
  * bootstrap: The caches do not work without cpuarrays anymore, but the
  * cpuarrays are allocated from the generic caches...
@@ -951,6 +980,102 @@ static struct array_cache *alloc_arraycache(int node, int entries,
 	return nc;
 }
 
+static inline bool is_slab_pfmemalloc(struct slab *slabp)
+{
+	struct page *page = virt_to_page(slabp->s_mem);
+
+	return PageSlabPfmemalloc(page);
+}
+
+/* Clears ac->pfmemalloc if no slabs have pfmalloc set */
+static void check_ac_pfmemalloc(struct kmem_cache *cachep,
+						struct array_cache *ac)
+{
+	struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()];
+	struct slab *slabp;
+	unsigned long flags;
+
+	if (!pfmemalloc_active)
+		return;
+
+	spin_lock_irqsave(&l3->list_lock, flags);
+	list_for_each_entry(slabp, &l3->slabs_full, list)
+		if (is_slab_pfmemalloc(slabp))
+			goto out;
+
+	list_for_each_entry(slabp, &l3->slabs_partial, list)
+		if (is_slab_pfmemalloc(slabp))
+			goto out;
+
+	list_for_each_entry(slabp, &l3->slabs_free, list)
+		if (is_slab_pfmemalloc(slabp))
+			goto out;
+
+	pfmemalloc_active = false;
+out:
+	spin_unlock_irqrestore(&l3->list_lock, flags);
+}
+
+static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
+						gfp_t flags, bool force_refill)
+{
+	int i;
+	void *objp = ac->entry[--ac->avail];
+
+	/* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
+	if (unlikely(is_obj_pfmemalloc(objp))) {
+		struct kmem_list3 *l3;
+
+		if (gfp_pfmemalloc_allowed(flags)) {
+			clear_obj_pfmemalloc(&objp);
+			return objp;
+		}
+
+		/* The caller cannot use PFMEMALLOC objects, find another one */
+		for (i = 1; i < ac->avail; i++) {
+			/* If a !PFMEMALLOC object is found, swap them */
+			if (!is_obj_pfmemalloc(ac->entry[i])) {
+				objp = ac->entry[i];
+				ac->entry[i] = ac->entry[ac->avail];
+				ac->entry[ac->avail] = objp;
+				return objp;
+			}
+		}
+
+		/*
+		 * If there are empty slabs on the slabs_free list and we are
+		 * being forced to refill the cache, mark this one !pfmemalloc.
+		 */
+		l3 = cachep->nodelists[numa_mem_id()];
+		if (!list_empty(&l3->slabs_free) && force_refill) {
+			struct slab *slabp = virt_to_slab(objp);
+			ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
+			clear_obj_pfmemalloc(&objp);
+			check_ac_pfmemalloc(cachep, ac);
+			return objp;
+		}
+
+		/* No !PFMEMALLOC objects available */
+		ac->avail++;
+		objp = NULL;
+	}
+
+	return objp;
+}
+
+static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
+								void *objp)
+{
+	if (unlikely(pfmemalloc_active)) {
+		/* Some pfmemalloc slabs exist, check if this is one */
+		struct page *page = virt_to_page(objp);
+		if (PageSlabPfmemalloc(page))
+			set_obj_pfmemalloc(&objp);
+	}
+
+	ac->entry[ac->avail++] = objp;
+}
+
 /*
  * Transfer objects in one arraycache to another.
  * Locking must be handled by the caller.
@@ -1127,7 +1252,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 			STATS_INC_ACOVERFLOW(cachep);
 			__drain_alien_cache(cachep, alien, nodeid);
 		}
-		alien->entry[alien->avail++] = objp;
+		ac_put_obj(cachep, alien, objp);
 		spin_unlock(&alien->lock);
 	} else {
 		spin_lock(&(cachep->nodelists[nodeid])->list_lock);
@@ -1809,6 +1934,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 		return NULL;
 	}
 
+	/* Record if ALLOC_PFMEMALLOC was set when allocating the slab */
+	if (unlikely(page->pfmemalloc))
+		pfmemalloc_active = true;
+
 	nr_pages = (1 << cachep->gfporder);
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		add_zone_page_state(page_zone(page),
@@ -1816,9 +1945,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 	else
 		add_zone_page_state(page_zone(page),
 			NR_SLAB_UNRECLAIMABLE, nr_pages);
-	for (i = 0; i < nr_pages; i++)
+	for (i = 0; i < nr_pages; i++) {
 		__SetPageSlab(page + i);
 
+		if (page->pfmemalloc)
+			SetPageSlabPfmemalloc(page + i);
+	}
+
 	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
 		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
 
@@ -1851,6 +1984,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	while (i--) {
 		BUG_ON(!PageSlab(page));
 		__ClearPageSlab(page);
+		__ClearPageSlabPfmemalloc(page);
 		page++;
 	}
 	if (current->reclaim_state)
@@ -3120,16 +3254,19 @@ bad:
 #define check_slabp(x,y) do { } while(0)
 #endif
 
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
+							bool force_refill)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
 	struct array_cache *ac;
 	int node;
 
-retry:
 	check_irq_off();
 	node = numa_mem_id();
+	if (unlikely(force_refill))
+		goto force_grow;
+retry:
 	ac = cpu_cache_get(cachep);
 	batchcount = ac->batchcount;
 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3179,8 +3316,8 @@ retry:
 			STATS_INC_ACTIVE(cachep);
 			STATS_SET_HIGH(cachep);
 
-			ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
-							    node);
+			ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
+									node));
 		}
 		check_slabp(cachep, slabp);
 
@@ -3199,18 +3336,22 @@ alloc_done:
 
 	if (unlikely(!ac->avail)) {
 		int x;
+force_grow:
 		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
-		if (!x && ac->avail == 0)	/* no objects in sight? abort */
+
+		/* no objects in sight? abort */
+		if (!x && (ac->avail == 0 || force_refill))
 			return NULL;
 
 		if (!ac->avail)		/* objects refilled by interrupt? */
 			goto retry;
 	}
 	ac->touched = 1;
-	return ac->entry[--ac->avail];
+
+	return ac_get_obj(cachep, ac, flags, force_refill);
 }
 
 static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
@@ -3292,23 +3433,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *objp;
 	struct array_cache *ac;
+	bool force_refill = false;
 
 	check_irq_off();
 
 	ac = cpu_cache_get(cachep);
 	if (likely(ac->avail)) {
-		STATS_INC_ALLOCHIT(cachep);
 		ac->touched = 1;
-		objp = ac->entry[--ac->avail];
-	} else {
-		STATS_INC_ALLOCMISS(cachep);
-		objp = cache_alloc_refill(cachep, flags);
+		objp = ac_get_obj(cachep, ac, flags, false);
+
 		/*
-		 * the 'ac' may be updated by cache_alloc_refill(),
-		 * and kmemleak_erase() requires its correct value.
+		 * Allow for the possibility all avail objects are not allowed
+		 * by the current flags
 		 */
-		ac = cpu_cache_get(cachep);
+		if (objp) {
+			STATS_INC_ALLOCHIT(cachep);
+			goto out;
+		}
+		force_refill = true;
 	}
+
+	STATS_INC_ALLOCMISS(cachep);
+	objp = cache_alloc_refill(cachep, flags, force_refill);
+	/*
+	 * the 'ac' may be updated by cache_alloc_refill(),
+	 * and kmemleak_erase() requires its correct value.
+	 */
+	ac = cpu_cache_get(cachep);
+
+out:
 	/*
 	 * To avoid a false negative, if an object that is in one of the
 	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
@@ -3630,9 +3783,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
 	struct kmem_list3 *l3;
 
 	for (i = 0; i < nr_objects; i++) {
-		void *objp = objpp[i];
+		void *objp;
 		struct slab *slabp;
 
+		clear_obj_pfmemalloc(&objpp[i]);
+		objp = objpp[i];
+
 		slabp = virt_to_slab(objp);
 		l3 = cachep->nodelists[node];
 		list_del(&slabp->list);
@@ -3750,7 +3906,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
 		cache_flusharray(cachep, ac);
 	}
 
-	ac->entry[ac->avail++] = objp;
+	ac_put_obj(cachep, ac, objp);
 }
 
 /**
diff --git a/mm/slub.c b/mm/slub.c
index 8c691fa..43738c9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -33,6 +33,8 @@
 
 #include <trace/events/kmem.h>
 
+#include "internal.h"
+
 /*
  * Lock order:
  *   1. slub_lock (Global Semaphore)
@@ -1370,6 +1372,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	inc_slabs_node(s, page_to_nid(page), page->objects);
 	page->slab = s;
 	__SetPageSlab(page);
+	if (page->pfmemalloc)
+		SetPageSlabPfmemalloc(page);
 
 	start = page_address(page);
 
@@ -1414,6 +1418,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		-pages);
 
 	__ClearPageSlab(page);
+	__ClearPageSlabPfmemalloc(page);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
@@ -2156,6 +2161,14 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
 	return object;
 }
 
+static inline bool pfmemalloc_match(struct kmem_cache_cpu *c, gfp_t gfpflags)
+{
+	if (unlikely(PageSlabPfmemalloc(c->page)))
+		return gfp_pfmemalloc_allowed(gfpflags);
+
+	return true;
+}
+
 /*
  * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
  * or deactivate the page.
@@ -2228,6 +2241,16 @@ redo:
 		goto new_slab;
 	}
 
+	/*
+	 * By rights, we should be searching for a slab page that was
+	 * PFMEMALLOC but right now, we are losing the pfmemalloc
+	 * information when the page leaves the per-cpu allocator
+	 */
+	if (unlikely(!pfmemalloc_match(c, gfpflags))) {
+		deactivate_slab(s, c);
+		goto new_slab;
+	}
+
 	/* must check again c->freelist in case of cpu migration or IRQ */
 	object = c->freelist;
 	if (object)
@@ -2332,8 +2355,8 @@ redo:
 	barrier();
 
 	object = c->freelist;
-	if (unlikely(!object || !node_match(c, node)))
-
+	if (unlikely(!object || !node_match(c, node) ||
+					!pfmemalloc_match(c, gfpflags)))
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-- 
1.7.9.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox