Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH 2/2] netdev/fec: fix ifconfig eth0 down hang issue
From: David Miller @ 2010-05-28  7:48 UTC (permalink / raw)
  To: bryan.wu; +Cc: afleming, s.hauer, gerg, amit.kucheria, netdev, linux-kernel
In-Reply-To: <4BFF5412.1030303@canonical.com>

From: Bryan Wu <bryan.wu@canonical.com>
Date: Fri, 28 May 2010 13:26:42 +0800

> Since this patch is for another bug and doesn't depend on my first one patch.
> Could you please review this?

Resubmit it as a formal, new, seperate patch submission and it will
get looked at.

^ permalink raw reply

* [PATCH net-next]atl1c: Add AR8151 v2 support and change L0s/L1 routine
From: jie.yang @ 2010-05-28  7:40 UTC (permalink / raw)
  To: davem; +Cc: Luis.Rodriguez, netdev, linux-kernel, Jie.Yang

From: Jie.Yang@atheros.com

Add AR8151 v2.0 Gigabit 1000 support
Change jumbo frame size to 6K
Update L0s/L1 rountine
Update atl1c_suspend routine.

Signed-off-by: Jie.Yang@atheros.com
---
 drivers/net/atl1c/atl1c.h      |    9 +-
 drivers/net/atl1c/atl1c_hw.c   |  107 +++++++++++--
 drivers/net/atl1c/atl1c_hw.h   |   49 ++++++-
 drivers/net/atl1c/atl1c_main.c |  348 +++++++++++++++++++++++-----------------
 4 files changed, 345 insertions(+), 168 deletions(-)

diff --git a/drivers/net/atl1c/atl1c.h b/drivers/net/atl1c/atl1c.h
index 84ae905..52abbbd 100644
--- a/drivers/net/atl1c/atl1c.h
+++ b/drivers/net/atl1c/atl1c.h
@@ -73,7 +73,8 @@
 #define FULL_DUPLEX        2
 
 #define AT_RX_BUF_SIZE		(ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN)
-#define MAX_JUMBO_FRAME_SIZE 	(9*1024)
+#define MAX_JUMBO_FRAME_SIZE	(6*1024)
+#define MAX_TSO_FRAME_SIZE      (7*1024)
 #define MAX_TX_OFFLOAD_THRESH	(9*1024)
 
 #define AT_MAX_RECEIVE_QUEUE    4
@@ -87,10 +88,11 @@
 #define AT_MAX_INT_WORK		5
 #define AT_TWSI_EEPROM_TIMEOUT 	100
 #define AT_HW_MAX_IDLE_DELAY 	10
-#define AT_SUSPEND_LINK_TIMEOUT 28
+#define AT_SUSPEND_LINK_TIMEOUT 100
 
 #define AT_ASPM_L0S_TIMER	6
 #define AT_ASPM_L1_TIMER	12
+#define AT_LCKDET_TIMER		12
 
 #define ATL1C_PCIE_L0S_L1_DISABLE 	0x01
 #define ATL1C_PCIE_PHY_RESET		0x02
@@ -316,6 +318,7 @@ enum atl1c_nic_type {
 	athr_l2c_b,
 	athr_l2c_b2,
 	athr_l1d,
+	athr_l1d_2,
 };
 
 enum atl1c_trans_queue {
@@ -392,6 +395,8 @@ struct atl1c_hw {
 	u16 subsystem_id;
 	u16 subsystem_vendor_id;
 	u8 revision_id;
+	u16 phy_id1;
+	u16 phy_id2;
 
 	u32 intr_mask;
 	u8 dmaw_dly_cnt;
diff --git a/drivers/net/atl1c/atl1c_hw.c b/drivers/net/atl1c/atl1c_hw.c
index f1389d6..d8501f0 100644
--- a/drivers/net/atl1c/atl1c_hw.c
+++ b/drivers/net/atl1c/atl1c_hw.c
@@ -37,6 +37,9 @@ int atl1c_check_eeprom_exist(struct atl1c_hw *hw)
 	if (data & TWSI_DEBUG_DEV_EXIST)
 		return 1;
 
+	AT_READ_REG(hw, REG_MASTER_CTRL, &data);
+	if (data & MASTER_CTRL_OTP_SEL)
+		return 1;
 	return 0;
 }
 
@@ -69,6 +72,8 @@ static int atl1c_get_permanent_address(struct atl1c_hw *hw)
 	u32 i;
 	u32 otp_ctrl_data;
 	u32 twsi_ctrl_data;
+	u32 ltssm_ctrl_data;
+	u32 wol_data;
 	u8  eth_addr[ETH_ALEN];
 	u16 phy_data;
 	bool raise_vol = false;
@@ -104,6 +109,15 @@ static int atl1c_get_permanent_address(struct atl1c_hw *hw)
 			udelay(20);
 			raise_vol = true;
 		}
+		/* close open bit of ReadOnly*/
+		AT_READ_REG(hw, REG_LTSSM_ID_CTRL, &ltssm_ctrl_data);
+		ltssm_ctrl_data &= ~LTSSM_ID_EN_WRO;
+		AT_WRITE_REG(hw, REG_LTSSM_ID_CTRL, ltssm_ctrl_data);
+
+		/* clear any WOL settings */
+		AT_WRITE_REG(hw, REG_WOL_CTRL, 0);
+		AT_READ_REG(hw, REG_WOL_CTRL, &wol_data);
+
 
 		AT_READ_REG(hw, REG_TWSI_CTRL, &twsi_ctrl_data);
 		twsi_ctrl_data |= TWSI_CTRL_SW_LDSTART;
@@ -119,17 +133,15 @@ static int atl1c_get_permanent_address(struct atl1c_hw *hw)
 	}
 	/* Disable OTP_CLK */
 	if ((hw->nic_type == athr_l1c || hw->nic_type == athr_l2c)) {
-		if (otp_ctrl_data & OTP_CTRL_CLK_EN) {
-			otp_ctrl_data &= ~OTP_CTRL_CLK_EN;
-			AT_WRITE_REG(hw, REG_OTP_CTRL, otp_ctrl_data);
-			AT_WRITE_FLUSH(hw);
-			msleep(1);
-		}
+		otp_ctrl_data &= ~OTP_CTRL_CLK_EN;
+		AT_WRITE_REG(hw, REG_OTP_CTRL, otp_ctrl_data);
+		msleep(1);
 	}
 	if (raise_vol) {
 		if (hw->nic_type == athr_l2c_b ||
 		    hw->nic_type == athr_l2c_b2 ||
-		    hw->nic_type == athr_l1d) {
+		    hw->nic_type == athr_l1d ||
+		    hw->nic_type == athr_l1d_2) {
 			atl1c_write_phy_reg(hw, MII_DBG_ADDR, 0x00);
 			if (atl1c_read_phy_reg(hw, MII_DBG_DATA, &phy_data))
 				goto out;
@@ -456,14 +468,22 @@ int atl1c_phy_reset(struct atl1c_hw *hw)
 
 	if (hw->nic_type == athr_l2c_b ||
 	    hw->nic_type == athr_l2c_b2 ||
-	    hw->nic_type == athr_l1d) {
+	    hw->nic_type == athr_l1d ||
+	    hw->nic_type == athr_l1d_2) {
 		atl1c_write_phy_reg(hw, MII_DBG_ADDR, 0x3B);
 		atl1c_read_phy_reg(hw, MII_DBG_DATA, &phy_data);
 		atl1c_write_phy_reg(hw, MII_DBG_DATA, phy_data & 0xFFF7);
 		msleep(20);
 	}
-
-	/*Enable PHY LinkChange Interrupt */
+	if (hw->nic_type == athr_l1d) {
+		atl1c_write_phy_reg(hw, MII_DBG_ADDR, 0x29);
+		atl1c_write_phy_reg(hw, MII_DBG_DATA, 0x929D);
+	}
+	if (hw->nic_type == athr_l1c || hw->nic_type == athr_l2c_b2
+		|| hw->nic_type == athr_l2c || hw->nic_type == athr_l2c) {
+		atl1c_write_phy_reg(hw, MII_DBG_ADDR, 0x29);
+		atl1c_write_phy_reg(hw, MII_DBG_DATA, 0xB6DD);
+	}
 	err = atl1c_write_phy_reg(hw, MII_IER, mii_ier_data);
 	if (err) {
 		if (netif_msg_hw(adapter))
@@ -482,12 +502,10 @@ int atl1c_phy_init(struct atl1c_hw *hw)
 	struct pci_dev *pdev = adapter->pdev;
 	int ret_val;
 	u16 mii_bmcr_data = BMCR_RESET;
-	u16 phy_id1, phy_id2;
 
-	if ((atl1c_read_phy_reg(hw, MII_PHYSID1, &phy_id1) != 0) ||
-		(atl1c_read_phy_reg(hw, MII_PHYSID2, &phy_id2) != 0)) {
-			if (netif_msg_link(adapter))
-				dev_err(&pdev->dev, "Error get phy ID\n");
+	if ((atl1c_read_phy_reg(hw, MII_PHYSID1, &hw->phy_id1) != 0) ||
+		(atl1c_read_phy_reg(hw, MII_PHYSID2, &hw->phy_id2) != 0)) {
+		dev_err(&pdev->dev, "Error get phy ID\n");
 		return -1;
 	}
 	switch (hw->media_type) {
@@ -572,6 +590,65 @@ int atl1c_get_speed_and_duplex(struct atl1c_hw *hw, u16 *speed, u16 *duplex)
 	return 0;
 }
 
+int atl1c_phy_power_saving(struct atl1c_hw *hw)
+{
+	struct atl1c_adapter *adapter = (struct atl1c_adapter *)hw->adapter;
+	struct pci_dev *pdev = adapter->pdev;
+	int ret = 0;
+	u16 autoneg_advertised = ADVERTISED_10baseT_Half;
+	u16 save_autoneg_advertised;
+	u16 phy_data;
+	u16 mii_lpa_data;
+	u16 speed = SPEED_0;
+	u16 duplex = FULL_DUPLEX;
+	int i;
+
+	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+	atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+	if (phy_data & BMSR_LSTATUS) {
+		atl1c_read_phy_reg(hw, MII_LPA, &mii_lpa_data);
+		if (mii_lpa_data & LPA_10FULL)
+			autoneg_advertised = ADVERTISED_10baseT_Full;
+		else if (mii_lpa_data & LPA_10HALF)
+			autoneg_advertised = ADVERTISED_10baseT_Half;
+		else if (mii_lpa_data & LPA_100HALF)
+			autoneg_advertised = ADVERTISED_100baseT_Half;
+		else if (mii_lpa_data & LPA_100FULL)
+			autoneg_advertised = ADVERTISED_100baseT_Full;
+
+		save_autoneg_advertised = hw->autoneg_advertised;
+		hw->phy_configured = false;
+		hw->autoneg_advertised = autoneg_advertised;
+		if (atl1c_restart_autoneg(hw) != 0) {
+			dev_dbg(&pdev->dev, "phy autoneg failed\n");
+			ret = -1;
+		}
+		hw->autoneg_advertised = save_autoneg_advertised;
+
+		if (mii_lpa_data) {
+			for (i = 0; i < AT_SUSPEND_LINK_TIMEOUT; i++) {
+				mdelay(100);
+				atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+				atl1c_read_phy_reg(hw, MII_BMSR, &phy_data);
+				if (phy_data & BMSR_LSTATUS) {
+					if (atl1c_get_speed_and_duplex(hw, &speed,
+									&duplex) != 0)
+						dev_dbg(&pdev->dev,
+							"get speed and duplex failed\n");
+					break;
+				}
+			}
+		}
+	} else {
+		speed = SPEED_10;
+		duplex = HALF_DUPLEX;
+	}
+	adapter->link_speed = speed;
+	adapter->link_duplex = duplex;
+
+	return ret;
+}
+
 int atl1c_restart_autoneg(struct atl1c_hw *hw)
 {
 	int err = 0;
diff --git a/drivers/net/atl1c/atl1c_hw.h b/drivers/net/atl1c/atl1c_hw.h
index 1eeb3ed..3dd6759 100644
--- a/drivers/net/atl1c/atl1c_hw.h
+++ b/drivers/net/atl1c/atl1c_hw.h
@@ -42,7 +42,7 @@ bool atl1c_read_eeprom(struct atl1c_hw *hw, u32 offset, u32 *p_value);
 int atl1c_phy_init(struct atl1c_hw *hw);
 int atl1c_check_eeprom_exist(struct atl1c_hw *hw);
 int atl1c_restart_autoneg(struct atl1c_hw *hw);
-
+int atl1c_phy_power_saving(struct atl1c_hw *hw);
 /* register definition */
 #define REG_DEVICE_CAP              	0x5C
 #define DEVICE_CAP_MAX_PAYLOAD_MASK     0x7
@@ -120,6 +120,12 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw);
 #define REG_PCIE_PHYMISC	    	0x1000
 #define PCIE_PHYMISC_FORCE_RCV_DET	0x4
 
+#define REG_PCIE_PHYMISC2		0x1004
+#define PCIE_PHYMISC2_SERDES_CDR_MASK	0x3
+#define PCIE_PHYMISC2_SERDES_CDR_SHIFT	16
+#define PCIE_PHYMISC2_SERDES_TH_MASK	0x3
+#define PCIE_PHYMISC2_SERDES_TH_SHIFT	18
+
 #define REG_TWSI_DEBUG			0x1108
 #define TWSI_DEBUG_DEV_EXIST		0x20000000
 
@@ -150,24 +156,28 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw);
 #define PM_CTRL_ASPM_L0S_EN		0x00001000
 #define PM_CTRL_CLK_SWH_L1		0x00002000
 #define PM_CTRL_CLK_PWM_VER1_1		0x00004000
-#define PM_CTRL_PCIE_RECV		0x00008000
+#define PM_CTRL_RCVR_WT_TIMER		0x00008000
 #define PM_CTRL_L1_ENTRY_TIMER_MASK	0xF
 #define PM_CTRL_L1_ENTRY_TIMER_SHIFT	16
 #define PM_CTRL_PM_REQ_TIMER_MASK	0xF
 #define PM_CTRL_PM_REQ_TIMER_SHIFT	20
-#define PM_CTRL_LCKDET_TIMER_MASK	0x3F
+#define PM_CTRL_LCKDET_TIMER_MASK	0xF
 #define PM_CTRL_LCKDET_TIMER_SHIFT	24
 #define PM_CTRL_EN_BUFS_RX_L0S		0x10000000
 #define PM_CTRL_SA_DLY_EN		0x20000000
 #define PM_CTRL_MAC_ASPM_CHK		0x40000000
 #define PM_CTRL_HOTRST			0x80000000
 
+#define REG_LTSSM_ID_CTRL		0x12FC
+#define LTSSM_ID_EN_WRO			0x1000
 /* Selene Master Control Register */
 #define REG_MASTER_CTRL			0x1400
 #define MASTER_CTRL_SOFT_RST            0x1
 #define MASTER_CTRL_TEST_MODE_MASK	0x3
 #define MASTER_CTRL_TEST_MODE_SHIFT	2
 #define MASTER_CTRL_BERT_START		0x10
+#define MASTER_CTRL_OOB_DIS_OFF		0x40
+#define MASTER_CTRL_SA_TIMER_EN		0x80
 #define MASTER_CTRL_MTIMER_EN           0x100
 #define MASTER_CTRL_MANUAL_INT          0x200
 #define MASTER_CTRL_TX_ITIMER_EN	0x400
@@ -220,6 +230,12 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw);
 		GPHY_CTRL_PWDOWN_HW	|\
 		GPHY_CTRL_PHY_IDDQ)
 
+#define GPHY_CTRL_POWER_SAVING (	\
+		GPHY_CTRL_SEL_ANA_RST	|\
+		GPHY_CTRL_HIB_EN	|\
+		GPHY_CTRL_HIB_PULSE	|\
+		GPHY_CTRL_PWDOWN_HW	|\
+		GPHY_CTRL_PHY_IDDQ)
 /* Block IDLE Status Register */
 #define REG_IDLE_STATUS  		0x1410
 #define IDLE_STATUS_MASK		0x00FF
@@ -287,6 +303,14 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw);
 #define SERDES_LOCK_DETECT          	0x1  /* SerDes lock detected. This signal
 					      * comes from Analog SerDes */
 #define SERDES_LOCK_DETECT_EN       	0x2  /* 1: Enable SerDes Lock detect function */
+#define SERDES_LOCK_STS_SELFB_PLL_SHIFT 0xE
+#define SERDES_LOCK_STS_SELFB_PLL_MASK  0x3
+#define SERDES_OVCLK_18_25		0x0
+#define SERDES_OVCLK_12_18		0x1
+#define SERDES_OVCLK_0_4		0x2
+#define SERDES_OVCLK_4_12		0x3
+#define SERDES_MAC_CLK_SLOWDOWN		0x20000
+#define SERDES_PYH_CLK_SLOWDOWN		0x40000
 
 /* MAC Control Register  */
 #define REG_MAC_CTRL         		0x1480
@@ -693,6 +717,21 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw);
 #define REG_MAC_TX_STATUS_BIN 		0x1760
 #define REG_MAC_TX_STATUS_END 		0x17c0
 
+#define REG_CLK_GATING_CTRL		0x1814
+#define CLK_GATING_DMAW_EN		0x0001
+#define CLK_GATING_DMAR_EN		0x0002
+#define CLK_GATING_TXQ_EN		0x0004
+#define CLK_GATING_RXQ_EN		0x0008
+#define CLK_GATING_TXMAC_EN		0x0010
+#define CLK_GATING_RXMAC_EN		0x0020
+
+#define CLK_GATING_EN_ALL	(CLK_GATING_DMAW_EN |\
+				 CLK_GATING_DMAR_EN |\
+				 CLK_GATING_TXQ_EN  |\
+				 CLK_GATING_RXQ_EN  |\
+				 CLK_GATING_TXMAC_EN|\
+				 CLK_GATING_RXMAC_EN)
+
 /* DEBUG ADDR */
 #define REG_DEBUG_DATA0 		0x1900
 #define REG_DEBUG_DATA1 		0x1904
@@ -734,6 +773,10 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw);
 
 #define MII_PHYSID1			0x02
 #define MII_PHYSID2			0x03
+#define L1D_MPW_PHYID1			0xD01C  /* V7 */
+#define L1D_MPW_PHYID2			0xD01D  /* V1-V6 */
+#define L1D_MPW_PHYID3			0xD01E  /* V8 */
+
 
 /* Autoneg Advertisement Register */
 #define MII_ADVERTISE			0x04
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 1c3c046..c7b8ef5 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -21,7 +21,7 @@
 
 #include "atl1c.h"
 
-#define ATL1C_DRV_VERSION "1.0.0.2-NAPI"
+#define ATL1C_DRV_VERSION "1.0.1.0-NAPI"
 char atl1c_driver_name[] = "atl1c";
 char atl1c_driver_version[] = ATL1C_DRV_VERSION;
 #define PCI_DEVICE_ID_ATTANSIC_L2C      0x1062
@@ -29,7 +29,7 @@ char atl1c_driver_version[] = ATL1C_DRV_VERSION;
 #define PCI_DEVICE_ID_ATHEROS_L2C_B	0x2060 /* AR8152 v1.1 Fast 10/100 */
 #define PCI_DEVICE_ID_ATHEROS_L2C_B2	0x2062 /* AR8152 v2.0 Fast 10/100 */
 #define PCI_DEVICE_ID_ATHEROS_L1D	0x1073 /* AR8151 v1.0 Gigabit 1000 */
-
+#define PCI_DEVICE_ID_ATHEROS_L1D_2_0	0x1083 /* AR8151 v2.0 Gigabit 1000 */
 #define L2CB_V10			0xc0
 #define L2CB_V11			0xc1
 
@@ -97,7 +97,28 @@ static const u16 atl1c_rrd_addr_lo_regs[AT_MAX_RECEIVE_QUEUE] =
 
 static const u32 atl1c_default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
 	NETIF_MSG_LINK | NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP;
+static void atl1c_pcie_patch(struct atl1c_hw *hw)
+{
+	u32 data;
 
+	AT_READ_REG(hw, REG_PCIE_PHYMISC, &data);
+	data |= PCIE_PHYMISC_FORCE_RCV_DET;
+	AT_WRITE_REG(hw, REG_PCIE_PHYMISC, data);
+
+	if (hw->nic_type == athr_l2c_b && hw->revision_id == L2CB_V10) {
+		AT_READ_REG(hw, REG_PCIE_PHYMISC2, &data);
+
+		data &= ~(PCIE_PHYMISC2_SERDES_CDR_MASK <<
+			PCIE_PHYMISC2_SERDES_CDR_SHIFT);
+		data |= 3 << PCIE_PHYMISC2_SERDES_CDR_SHIFT;
+		data &= ~(PCIE_PHYMISC2_SERDES_TH_MASK <<
+			PCIE_PHYMISC2_SERDES_TH_SHIFT);
+		data |= 3 << PCIE_PHYMISC2_SERDES_TH_SHIFT;
+		AT_WRITE_REG(hw, REG_PCIE_PHYMISC2, data);
+	}
+}
+
+/* FIXME: no need any more ? */
 /*
  * atl1c_init_pcie - init PCIE module
  */
@@ -127,6 +148,11 @@ static void atl1c_reset_pcie(struct atl1c_hw *hw, u32 flag)
 	data &= ~PCIE_UC_SERVRITY_FCP;
 	AT_WRITE_REG(hw, REG_PCIE_UC_SEVERITY, data);
 
+	AT_READ_REG(hw, REG_LTSSM_ID_CTRL, &data);
+	data &= ~LTSSM_ID_EN_WRO;
+	AT_WRITE_REG(hw, REG_LTSSM_ID_CTRL, data);
+
+	atl1c_pcie_patch(hw);
 	if (flag & ATL1C_PCIE_L0S_L1_DISABLE)
 		atl1c_disable_l0s_l1(hw);
 	if (flag & ATL1C_PCIE_PHY_RESET)
@@ -135,7 +161,7 @@ static void atl1c_reset_pcie(struct atl1c_hw *hw, u32 flag)
 		AT_WRITE_REG(hw, REG_GPHY_CTRL,
 			GPHY_CTRL_DEFAULT | GPHY_CTRL_EXT_RESET);
 
-	msleep(1);
+	msleep(5);
 }
 
 /*
@@ -159,6 +185,7 @@ static inline void atl1c_irq_disable(struct atl1c_adapter *adapter)
 {
 	atomic_inc(&adapter->irq_sem);
 	AT_WRITE_REG(&adapter->hw, REG_IMR, 0);
+	AT_WRITE_REG(&adapter->hw, REG_ISR, ISR_DIS_INT);
 	AT_WRITE_FLUSH(&adapter->hw);
 	synchronize_irq(adapter->pdev->irq);
 }
@@ -231,15 +258,15 @@ static void atl1c_check_link_status(struct atl1c_adapter *adapter)
 
 	if ((phy_data & BMSR_LSTATUS) == 0) {
 		/* link down */
-		if (netif_carrier_ok(netdev)) {
-			hw->hibernate = true;
-			if (atl1c_stop_mac(hw) != 0)
-				if (netif_msg_hw(adapter))
-					dev_warn(&pdev->dev,
-						"stop mac failed\n");
-			atl1c_set_aspm(hw, false);
-		}
+		hw->hibernate = true;
+		if (atl1c_stop_mac(hw) != 0)
+			if (netif_msg_hw(adapter))
+				dev_warn(&pdev->dev, "stop mac failed\n");
+		atl1c_set_aspm(hw, false);
 		netif_carrier_off(netdev);
+		netif_stop_queue(netdev);
+		atl1c_phy_reset(hw);
+		atl1c_phy_init(&adapter->hw);
 	} else {
 		/* Link Up */
 		hw->hibernate = false;
@@ -308,6 +335,7 @@ static void atl1c_common_task(struct work_struct *work)
 	netdev = adapter->netdev;
 
 	if (adapter->work_event & ATL1C_WORK_EVENT_RESET) {
+		adapter->work_event &= ~ATL1C_WORK_EVENT_RESET;
 		netif_device_detach(netdev);
 		atl1c_down(adapter);
 		atl1c_up(adapter);
@@ -315,8 +343,11 @@ static void atl1c_common_task(struct work_struct *work)
 		return;
 	}
 
-	if (adapter->work_event & ATL1C_WORK_EVENT_LINK_CHANGE)
+	if (adapter->work_event & ATL1C_WORK_EVENT_LINK_CHANGE) {
+		adapter->work_event &= ~ATL1C_WORK_EVENT_LINK_CHANGE;
 		atl1c_check_link_status(adapter);
+	}
+	return;
 }
 
 
@@ -476,6 +507,13 @@ static int atl1c_change_mtu(struct net_device *netdev, int new_mtu)
 		netdev->mtu = new_mtu;
 		adapter->hw.max_frame_size = new_mtu;
 		atl1c_set_rxbufsize(adapter, netdev);
+		if (new_mtu > MAX_TSO_FRAME_SIZE) {
+			adapter->netdev->features &= ~NETIF_F_TSO;
+			adapter->netdev->features &= ~NETIF_F_TSO6;
+		} else {
+			adapter->netdev->features |= NETIF_F_TSO;
+			adapter->netdev->features |= NETIF_F_TSO6;
+		}
 		atl1c_down(adapter);
 		atl1c_up(adapter);
 		clear_bit(__AT_RESETTING, &adapter->flags);
@@ -613,6 +651,9 @@ static void atl1c_set_mac_type(struct atl1c_hw *hw)
 	case PCI_DEVICE_ID_ATHEROS_L1D:
 		hw->nic_type = athr_l1d;
 		break;
+	case PCI_DEVICE_ID_ATHEROS_L1D_2_0:
+		hw->nic_type = athr_l1d_2;
+		break;
 	default:
 		break;
 	}
@@ -627,9 +668,7 @@ static int atl1c_setup_mac_funcs(struct atl1c_hw *hw)
 	AT_READ_REG(hw, REG_PHY_STATUS, &phy_status_data);
 	AT_READ_REG(hw, REG_LINK_CTRL, &link_ctrl_data);
 
-	hw->ctrl_flags = ATL1C_INTR_CLEAR_ON_READ |
-			 ATL1C_INTR_MODRT_ENABLE  |
-			 ATL1C_RX_IPV6_CHKSUM	  |
+	hw->ctrl_flags = ATL1C_INTR_MODRT_ENABLE  |
 			 ATL1C_TXQ_MODE_ENHANCE;
 	if (link_ctrl_data & LINK_CTRL_L0S_EN)
 		hw->ctrl_flags |= ATL1C_ASPM_L0S_SUPPORT;
@@ -637,12 +676,12 @@ static int atl1c_setup_mac_funcs(struct atl1c_hw *hw)
 		hw->ctrl_flags |= ATL1C_ASPM_L1_SUPPORT;
 	if (link_ctrl_data & LINK_CTRL_EXT_SYNC)
 		hw->ctrl_flags |= ATL1C_LINK_EXT_SYNC;
+	hw->ctrl_flags |= ATL1C_ASPM_CTRL_MON;
 
 	if (hw->nic_type == athr_l1c ||
-	    hw->nic_type == athr_l1d) {
-		hw->ctrl_flags |= ATL1C_ASPM_CTRL_MON;
+	    hw->nic_type == athr_l1d ||
+	    hw->nic_type == athr_l1d_2)
 		hw->link_cap_flags |= ATL1C_LINK_CAP_1000M;
-	}
 	return 0;
 }
 /*
@@ -657,6 +696,8 @@ static int __devinit atl1c_sw_init(struct atl1c_adapter *adapter)
 {
 	struct atl1c_hw *hw   = &adapter->hw;
 	struct pci_dev	*pdev = adapter->pdev;
+	u32 revision;
+
 
 	adapter->wol = 0;
 	adapter->link_speed = SPEED_0;
@@ -669,7 +710,8 @@ static int __devinit atl1c_sw_init(struct atl1c_adapter *adapter)
 	hw->device_id = pdev->device;
 	hw->subsystem_vendor_id = pdev->subsystem_vendor;
 	hw->subsystem_id = pdev->subsystem_device;
-
+	AT_READ_REG(hw, PCI_CLASS_REVISION, &revision);
+	hw->revision_id = revision & 0xFF;
 	/* before link up, we assume hibernate is true */
 	hw->hibernate = true;
 	hw->media_type = MEDIA_TYPE_AUTO_SENSOR;
@@ -974,6 +1016,7 @@ static void atl1c_configure_des_ring(struct atl1c_adapter *adapter)
 	struct atl1c_cmb *cmb = (struct atl1c_cmb *) &adapter->cmb;
 	struct atl1c_smb *smb = (struct atl1c_smb *) &adapter->smb;
 	int i;
+	u32 data;
 
 	/* TPD */
 	AT_WRITE_REG(hw, REG_TX_BASE_ADDR_HI,
@@ -1017,6 +1060,23 @@ static void atl1c_configure_des_ring(struct atl1c_adapter *adapter)
 			(u32)((smb->dma & AT_DMA_HI_ADDR_MASK) >> 32));
 	AT_WRITE_REG(hw, REG_SMB_BASE_ADDR_LO,
 			(u32)(smb->dma & AT_DMA_LO_ADDR_MASK));
+	if (hw->nic_type == athr_l2c_b) {
+		AT_WRITE_REG(hw, REG_SRAM_RXF_LEN, 0x02a0L);
+		AT_WRITE_REG(hw, REG_SRAM_TXF_LEN, 0x0100L);
+		AT_WRITE_REG(hw, REG_SRAM_RXF_ADDR, 0x029f0000L);
+		AT_WRITE_REG(hw, REG_SRAM_RFD0_INFO, 0x02bf02a0L);
+		AT_WRITE_REG(hw, REG_SRAM_TXF_ADDR, 0x03bf02c0L);
+		AT_WRITE_REG(hw, REG_SRAM_TRD_ADDR, 0x03df03c0L);
+		AT_WRITE_REG(hw, REG_TXF_WATER_MARK, 0);	/* TX watermark, to enter l1 state.*/
+		AT_WRITE_REG(hw, REG_RXD_DMA_CTRL, 0);		/* RXD threshold.*/
+	}
+	if (hw->nic_type == athr_l2c_b || hw->nic_type == athr_l1d_2) {
+			/* Power Saving for L2c_B */
+		AT_READ_REG(hw, REG_SERDES_LOCK, &data);
+		data |= SERDES_MAC_CLK_SLOWDOWN;
+		data |= SERDES_PYH_CLK_SLOWDOWN;
+		AT_WRITE_REG(hw, REG_SERDES_LOCK, data);
+	}
 	/* Load all of base address above */
 	AT_WRITE_REG(hw, REG_LOAD_PTR, 1);
 }
@@ -1029,6 +1089,7 @@ static void atl1c_configure_tx(struct atl1c_adapter *adapter)
 	u16 tx_offload_thresh;
 	u32 txq_ctrl_data;
 	u32 extra_size = 0;     /* Jumbo frame threshold in QWORD unit */
+	u32 max_pay_load_data;
 
 	extra_size = ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
 	tx_offload_thresh = MAX_TX_OFFLOAD_THRESH;
@@ -1046,8 +1107,11 @@ static void atl1c_configure_tx(struct atl1c_adapter *adapter)
 			TXQ_NUM_TPD_BURST_SHIFT;
 	if (hw->ctrl_flags & ATL1C_TXQ_MODE_ENHANCE)
 		txq_ctrl_data |= TXQ_CTRL_ENH_MODE;
-	txq_ctrl_data |= (atl1c_pay_load_size[hw->dmar_block] &
+	max_pay_load_data = (atl1c_pay_load_size[hw->dmar_block] &
 			TXQ_TXF_BURST_NUM_MASK) << TXQ_TXF_BURST_NUM_SHIFT;
+	if (hw->nic_type == athr_l2c_b || hw->nic_type == athr_l2c_b2)
+		max_pay_load_data >>= 1;
+	txq_ctrl_data |= max_pay_load_data;
 
 	AT_WRITE_REG(hw, REG_TXQ_CTRL, txq_ctrl_data);
 }
@@ -1078,7 +1142,7 @@ static void atl1c_configure_rx(struct atl1c_adapter *adapter)
 	rxq_ctrl_data |= (hw->rss_hash_bits & RSS_HASH_BITS_MASK) <<
 			RSS_HASH_BITS_SHIFT;
 	if (hw->ctrl_flags & ATL1C_ASPM_CTRL_MON)
-		rxq_ctrl_data |= (ASPM_THRUPUT_LIMIT_100M &
+		rxq_ctrl_data |= (ASPM_THRUPUT_LIMIT_1M &
 			ASPM_THRUPUT_LIMIT_MASK) << ASPM_THRUPUT_LIMIT_SHIFT;
 
 	AT_WRITE_REG(hw, REG_RXQ_CTRL, rxq_ctrl_data);
@@ -1198,21 +1262,23 @@ static int atl1c_reset_mac(struct atl1c_hw *hw)
 {
 	struct atl1c_adapter *adapter = (struct atl1c_adapter *)hw->adapter;
 	struct pci_dev *pdev = adapter->pdev;
-	int ret;
+	u32 master_ctrl_data = 0;
 
 	AT_WRITE_REG(hw, REG_IMR, 0);
 	AT_WRITE_REG(hw, REG_ISR, ISR_DIS_INT);
 
-	ret = atl1c_stop_mac(hw);
-	if (ret)
-		return ret;
+	atl1c_stop_mac(hw);
 	/*
 	 * Issue Soft Reset to the MAC.  This will reset the chip's
 	 * transmit, receive, DMA.  It will not effect
 	 * the current PCI configuration.  The global reset bit is self-
 	 * clearing, and should clear within a microsecond.
 	 */
-	AT_WRITE_REGW(hw, REG_MASTER_CTRL, MASTER_CTRL_SOFT_RST);
+	AT_READ_REG(hw, REG_MASTER_CTRL, &master_ctrl_data);
+	master_ctrl_data |= MASTER_CTRL_OOB_DIS_OFF;
+	AT_WRITE_REGW(hw, REG_MASTER_CTRL, ((master_ctrl_data | MASTER_CTRL_SOFT_RST)
+			& 0xFFFF));
+
 	AT_WRITE_FLUSH(hw);
 	msleep(10);
 	/* Wait at least 10ms for All module to be Idle */
@@ -1253,42 +1319,39 @@ static void atl1c_set_aspm(struct atl1c_hw *hw, bool linkup)
 {
 	u32 pm_ctrl_data;
 	u32 link_ctrl_data;
+	u32 link_l1_timer = 0xF;
 
 	AT_READ_REG(hw, REG_PM_CTRL, &pm_ctrl_data);
 	AT_READ_REG(hw, REG_LINK_CTRL, &link_ctrl_data);
-	pm_ctrl_data &= ~PM_CTRL_SERDES_PD_EX_L1;
 
+	pm_ctrl_data &= ~PM_CTRL_SERDES_PD_EX_L1;
 	pm_ctrl_data &=  ~(PM_CTRL_L1_ENTRY_TIMER_MASK <<
 			PM_CTRL_L1_ENTRY_TIMER_SHIFT);
 	pm_ctrl_data &= ~(PM_CTRL_LCKDET_TIMER_MASK <<
-			  PM_CTRL_LCKDET_TIMER_SHIFT);
-
-	pm_ctrl_data |= PM_CTRL_MAC_ASPM_CHK;
-	pm_ctrl_data &= ~PM_CTRL_ASPM_L1_EN;
-	pm_ctrl_data |= PM_CTRL_RBER_EN;
-	pm_ctrl_data |= PM_CTRL_SDES_EN;
+			PM_CTRL_LCKDET_TIMER_SHIFT);
+	pm_ctrl_data |= AT_LCKDET_TIMER	<< PM_CTRL_LCKDET_TIMER_SHIFT;
 
-	if (hw->nic_type == athr_l2c_b ||
-	    hw->nic_type == athr_l1d ||
-	    hw->nic_type == athr_l2c_b2) {
+	if (hw->nic_type == athr_l2c_b || hw->nic_type == athr_l1d ||
+		hw->nic_type == athr_l2c_b2 || hw->nic_type == athr_l1d_2) {
 		link_ctrl_data &= ~LINK_CTRL_EXT_SYNC;
 		if (!(hw->ctrl_flags & ATL1C_APS_MODE_ENABLE)) {
-			if (hw->nic_type == athr_l2c_b &&
-			    hw->revision_id == L2CB_V10)
+			if (hw->nic_type == athr_l2c_b && hw->revision_id == L2CB_V10)
 				link_ctrl_data |= LINK_CTRL_EXT_SYNC;
 		}
 
 		AT_WRITE_REG(hw, REG_LINK_CTRL, link_ctrl_data);
 
-		pm_ctrl_data |= PM_CTRL_PCIE_RECV;
-		pm_ctrl_data |= AT_ASPM_L1_TIMER << PM_CTRL_PM_REQ_TIMER_SHIFT;
-		pm_ctrl_data &= ~PM_CTRL_EN_BUFS_RX_L0S;
+		pm_ctrl_data |= PM_CTRL_RCVR_WT_TIMER;
+		pm_ctrl_data &= ~(PM_CTRL_PM_REQ_TIMER_MASK <<
+			PM_CTRL_PM_REQ_TIMER_SHIFT);
+		pm_ctrl_data |= AT_ASPM_L1_TIMER <<
+			PM_CTRL_PM_REQ_TIMER_SHIFT;
 		pm_ctrl_data &= ~PM_CTRL_SA_DLY_EN;
 		pm_ctrl_data &= ~PM_CTRL_HOTRST;
 		pm_ctrl_data |= 1 << PM_CTRL_L1_ENTRY_TIMER_SHIFT;
 		pm_ctrl_data |= PM_CTRL_SERDES_PD_EX_L1;
 	}
-
+	pm_ctrl_data |= PM_CTRL_MAC_ASPM_CHK;
 	if (linkup) {
 		pm_ctrl_data &= ~PM_CTRL_ASPM_L1_EN;
 		pm_ctrl_data &= ~PM_CTRL_ASPM_L0S_EN;
@@ -1297,27 +1360,26 @@ static void atl1c_set_aspm(struct atl1c_hw *hw, bool linkup)
 		if (hw->ctrl_flags & ATL1C_ASPM_L0S_SUPPORT)
 			pm_ctrl_data |= PM_CTRL_ASPM_L0S_EN;
 
-		if (hw->nic_type == athr_l2c_b ||
-		    hw->nic_type == athr_l1d ||
-		    hw->nic_type == athr_l2c_b2) {
+		if (hw->nic_type == athr_l2c_b || hw->nic_type == athr_l1d ||
+			hw->nic_type == athr_l2c_b2 || hw->nic_type == athr_l1d_2) {
 			if (hw->nic_type == athr_l2c_b)
 				if (!(hw->ctrl_flags & ATL1C_APS_MODE_ENABLE))
-					pm_ctrl_data &= PM_CTRL_ASPM_L0S_EN;
+					pm_ctrl_data &= ~PM_CTRL_ASPM_L0S_EN;
 			pm_ctrl_data &= ~PM_CTRL_SERDES_L1_EN;
 			pm_ctrl_data &= ~PM_CTRL_SERDES_PLL_L1_EN;
 			pm_ctrl_data &= ~PM_CTRL_SERDES_BUDS_RX_L1_EN;
 			pm_ctrl_data |= PM_CTRL_CLK_SWH_L1;
-			if (hw->adapter->link_speed == SPEED_100 ||
-			    hw->adapter->link_speed == SPEED_1000) {
-				pm_ctrl_data &=
-					~(PM_CTRL_L1_ENTRY_TIMER_MASK <<
-					  PM_CTRL_L1_ENTRY_TIMER_SHIFT);
-				if (hw->nic_type == athr_l1d)
-					pm_ctrl_data |= 0xF <<
-						PM_CTRL_L1_ENTRY_TIMER_SHIFT;
-				else
-					pm_ctrl_data |= 7 <<
-						PM_CTRL_L1_ENTRY_TIMER_SHIFT;
+		if (hw->adapter->link_speed == SPEED_100 ||
+				hw->adapter->link_speed == SPEED_1000) {
+				pm_ctrl_data &=  ~(PM_CTRL_L1_ENTRY_TIMER_MASK <<
+					PM_CTRL_L1_ENTRY_TIMER_SHIFT);
+				if (hw->nic_type == athr_l2c_b)
+					link_l1_timer = 7;
+				else if (hw->nic_type == athr_l2c_b2 ||
+					hw->nic_type == athr_l1d_2)
+					link_l1_timer = 4;
+				pm_ctrl_data |= link_l1_timer <<
+					PM_CTRL_L1_ENTRY_TIMER_SHIFT;
 			}
 		} else {
 			pm_ctrl_data |= PM_CTRL_SERDES_L1_EN;
@@ -1326,24 +1388,12 @@ static void atl1c_set_aspm(struct atl1c_hw *hw, bool linkup)
 			pm_ctrl_data &= ~PM_CTRL_CLK_SWH_L1;
 			pm_ctrl_data &= ~PM_CTRL_ASPM_L0S_EN;
 			pm_ctrl_data &= ~PM_CTRL_ASPM_L1_EN;
-		}
-		atl1c_write_phy_reg(hw, MII_DBG_ADDR, 0x29);
-		if (hw->adapter->link_speed == SPEED_10)
-			if (hw->nic_type == athr_l1d)
-				atl1c_write_phy_reg(hw, MII_DBG_ADDR, 0xB69D);
-			else
-				atl1c_write_phy_reg(hw, MII_DBG_DATA, 0xB6DD);
-		else if (hw->adapter->link_speed == SPEED_100)
-			atl1c_write_phy_reg(hw, MII_DBG_DATA, 0xB2DD);
-		else
-			atl1c_write_phy_reg(hw, MII_DBG_DATA, 0x96DD);
 
+		}
 	} else {
-		pm_ctrl_data &= ~PM_CTRL_SERDES_BUDS_RX_L1_EN;
 		pm_ctrl_data &= ~PM_CTRL_SERDES_L1_EN;
 		pm_ctrl_data &= ~PM_CTRL_ASPM_L0S_EN;
 		pm_ctrl_data &= ~PM_CTRL_SERDES_PLL_L1_EN;
-
 		pm_ctrl_data |= PM_CTRL_CLK_SWH_L1;
 
 		if (hw->ctrl_flags & ATL1C_ASPM_L1_SUPPORT)
@@ -1351,8 +1401,9 @@ static void atl1c_set_aspm(struct atl1c_hw *hw, bool linkup)
 		else
 			pm_ctrl_data &= ~PM_CTRL_ASPM_L1_EN;
 	}
-
 	AT_WRITE_REG(hw, REG_PM_CTRL, pm_ctrl_data);
+
+	return;
 }
 
 static void atl1c_setup_mac_ctrl(struct atl1c_adapter *adapter)
@@ -1391,7 +1442,8 @@ static void atl1c_setup_mac_ctrl(struct atl1c_adapter *adapter)
 		mac_ctrl_data |= MAC_CTRL_MC_ALL_EN;
 
 	mac_ctrl_data |= MAC_CTRL_SINGLE_PAUSE_EN;
-	if (hw->nic_type == athr_l1d || hw->nic_type == athr_l2c_b2) {
+	if (hw->nic_type == athr_l1d || hw->nic_type == athr_l2c_b2 ||
+	    hw->nic_type == athr_l1d_2) {
 		mac_ctrl_data |= MAC_CTRL_SPEED_MODE_SW;
 		mac_ctrl_data |= MAC_CTRL_HASH_ALG_CRC32;
 	}
@@ -1409,6 +1461,7 @@ static int atl1c_configure(struct atl1c_adapter *adapter)
 	struct atl1c_hw *hw = &adapter->hw;
 	u32 master_ctrl_data = 0;
 	u32 intr_modrt_data;
+	u32 data;
 
 	/* clear interrupt status */
 	AT_WRITE_REG(hw, REG_ISR, 0xFFFFFFFF);
@@ -1418,6 +1471,15 @@ static int atl1c_configure(struct atl1c_adapter *adapter)
 	 * HW will enable self to assert interrupt event to system after
 	 * waiting x-time for software to notify it accept interrupt.
 	 */
+
+	data = CLK_GATING_EN_ALL;
+	if (hw->ctrl_flags & ATL1C_CLK_GATING_EN) {
+		if (hw->nic_type == athr_l2c_b)
+			data &= ~CLK_GATING_RXMAC_EN;
+	} else
+		data = 0;
+	AT_WRITE_REG(hw, REG_CLK_GATING_CTRL, data);
+
 	AT_WRITE_REG(hw, REG_INT_RETRIG_TIMER,
 		hw->ict & INT_RETRIG_TIMER_MASK);
 
@@ -1436,6 +1498,7 @@ static int atl1c_configure(struct atl1c_adapter *adapter)
 	if (hw->ctrl_flags & ATL1C_INTR_CLEAR_ON_READ)
 		master_ctrl_data |= MASTER_CTRL_INT_RDCLR;
 
+	master_ctrl_data |= MASTER_CTRL_SA_TIMER_EN;
 	AT_WRITE_REG(hw, REG_MASTER_CTRL, master_ctrl_data);
 
 	if (hw->ctrl_flags & ATL1C_CMB_ENABLE) {
@@ -1624,11 +1687,9 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 					"atl1c hardware error (status = 0x%x)\n",
 					status & ISR_ERROR);
 			/* reset MAC */
-			hw->intr_mask &= ~ISR_ERROR;
-			AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
 			adapter->work_event |= ATL1C_WORK_EVENT_RESET;
 			schedule_work(&adapter->common_task);
-			break;
+			return IRQ_HANDLED;
 		}
 
 		if (status & ISR_OVER)
@@ -2303,7 +2364,6 @@ void atl1c_down(struct atl1c_adapter *adapter)
 	napi_disable(&adapter->napi);
 	atl1c_irq_disable(adapter);
 	atl1c_free_irq(adapter);
-	AT_WRITE_REG(&adapter->hw, REG_ISR, ISR_DIS_INT);
 	/* reset MAC to disable all RX/TX */
 	atl1c_reset_mac(&adapter->hw);
 	msleep(1);
@@ -2387,79 +2447,68 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
-	u32 ctrl;
-	u32 mac_ctrl_data;
-	u32 master_ctrl_data;
+	u32 mac_ctrl_data = 0;
+	u32 master_ctrl_data = 0;
 	u32 wol_ctrl_data = 0;
-	u16 mii_bmsr_data;
-	u16 save_autoneg_advertised;
-	u16 mii_intr_status_data;
+	u16 mii_intr_status_data = 0;
 	u32 wufc = adapter->wol;
-	u32 i;
 	int retval = 0;
 
+	atl1c_disable_l0s_l1(hw);
 	if (netif_running(netdev)) {
 		WARN_ON(test_bit(__AT_RESETTING, &adapter->flags));
 		atl1c_down(adapter);
 	}
 	netif_device_detach(netdev);
-	atl1c_disable_l0s_l1(hw);
 	retval = pci_save_state(pdev);
 	if (retval)
 		return retval;
+
+	if (wufc)
+		if (atl1c_phy_power_saving(hw) != 0)
+			dev_dbg(&pdev->dev, "phy power saving failed");
+
+	AT_READ_REG(hw, REG_MASTER_CTRL, &master_ctrl_data);
+	AT_READ_REG(hw, REG_MAC_CTRL, &mac_ctrl_data);
+
+	master_ctrl_data &= ~MASTER_CTRL_CLK_SEL_DIS;
+	mac_ctrl_data &= ~(MAC_CTRL_PRMLEN_MASK << MAC_CTRL_PRMLEN_SHIFT);
+	mac_ctrl_data |= (((u32)adapter->hw.preamble_len &
+			MAC_CTRL_PRMLEN_MASK) <<
+			MAC_CTRL_PRMLEN_SHIFT);
+	mac_ctrl_data &= ~(MAC_CTRL_SPEED_MASK << MAC_CTRL_SPEED_SHIFT);
+	mac_ctrl_data &= ~MAC_CTRL_DUPLX;
+
 	if (wufc) {
-		AT_READ_REG(hw, REG_MASTER_CTRL, &master_ctrl_data);
-		master_ctrl_data &= ~MASTER_CTRL_CLK_SEL_DIS;
-
-		/* get link status */
-		atl1c_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
-		atl1c_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
-		save_autoneg_advertised = hw->autoneg_advertised;
-		hw->autoneg_advertised = ADVERTISED_10baseT_Half;
-		if (atl1c_restart_autoneg(hw) != 0)
-			if (netif_msg_link(adapter))
-				dev_warn(&pdev->dev, "phy autoneg failed\n");
-		hw->phy_configured = false; /* re-init PHY when resume */
-		hw->autoneg_advertised = save_autoneg_advertised;
+		mac_ctrl_data |= MAC_CTRL_RX_EN;
+		if (adapter->link_speed == SPEED_1000 ||
+			adapter->link_speed == SPEED_0) {
+			mac_ctrl_data |= atl1c_mac_speed_1000 <<
+					MAC_CTRL_SPEED_SHIFT;
+			mac_ctrl_data |= MAC_CTRL_DUPLX;
+		} else
+			mac_ctrl_data |= atl1c_mac_speed_10_100 <<
+					MAC_CTRL_SPEED_SHIFT;
+
+		if (adapter->link_duplex == DUPLEX_FULL)
+			mac_ctrl_data |= MAC_CTRL_DUPLX;
+
 		/* turn on magic packet wol */
 		if (wufc & AT_WUFC_MAG)
-			wol_ctrl_data = WOL_MAGIC_EN | WOL_MAGIC_PME_EN;
+			wol_ctrl_data |= WOL_MAGIC_EN | WOL_MAGIC_PME_EN;
 
 		if (wufc & AT_WUFC_LNKC) {
-			for (i = 0; i < AT_SUSPEND_LINK_TIMEOUT; i++) {
-				msleep(100);
-				atl1c_read_phy_reg(hw, MII_BMSR,
-					(u16 *)&mii_bmsr_data);
-				if (mii_bmsr_data & BMSR_LSTATUS)
-					break;
-			}
-			if ((mii_bmsr_data & BMSR_LSTATUS) == 0)
-				if (netif_msg_link(adapter))
-					dev_warn(&pdev->dev,
-						"%s: Link may change"
-						"when suspend\n",
-						atl1c_driver_name);
 			wol_ctrl_data |=  WOL_LINK_CHG_EN | WOL_LINK_CHG_PME_EN;
 			/* only link up can wake up */
 			if (atl1c_write_phy_reg(hw, MII_IER, IER_LINK_UP) != 0) {
-				if (netif_msg_link(adapter))
-					dev_err(&pdev->dev,
-						"%s: read write phy "
-						"register failed.\n",
-						atl1c_driver_name);
-				goto wol_dis;
+				dev_dbg(&pdev->dev, "%s: read write phy "
+						  "register failed.\n",
+						  atl1c_driver_name);
 			}
 		}
 		/* clear phy interrupt */
 		atl1c_read_phy_reg(hw, MII_ISR, &mii_intr_status_data);
 		/* Config MAC Ctrl register */
-		mac_ctrl_data = MAC_CTRL_RX_EN;
-		/* set to 10/100M halt duplex */
-		mac_ctrl_data |= atl1c_mac_speed_10_100 << MAC_CTRL_SPEED_SHIFT;
-		mac_ctrl_data |= (((u32)adapter->hw.preamble_len &
-				 MAC_CTRL_PRMLEN_MASK) <<
-				 MAC_CTRL_PRMLEN_SHIFT);
-
 		if (adapter->vlgrp)
 			mac_ctrl_data |= MAC_CTRL_RMV_VLAN;
 
@@ -2467,37 +2516,30 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
 		if (wufc & AT_WUFC_MAG)
 			mac_ctrl_data |= MAC_CTRL_BC_EN;
 
-		if (netif_msg_hw(adapter))
-			dev_dbg(&pdev->dev,
-				"%s: suspend MAC=0x%x\n",
-				atl1c_driver_name, mac_ctrl_data);
+		dev_dbg(&pdev->dev,
+			"%s: suspend MAC=0x%x\n",
+			atl1c_driver_name, mac_ctrl_data);
 		AT_WRITE_REG(hw, REG_MASTER_CTRL, master_ctrl_data);
 		AT_WRITE_REG(hw, REG_WOL_CTRL, wol_ctrl_data);
 		AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
 
 		/* pcie patch */
-		AT_READ_REG(hw, REG_PCIE_PHYMISC, &ctrl);
-		ctrl |= PCIE_PHYMISC_FORCE_RCV_DET;
-		AT_WRITE_REG(hw, REG_PCIE_PHYMISC, ctrl);
+		device_set_wakeup_enable(&pdev->dev, 1);
 
-		pci_enable_wake(pdev, pci_choose_state(pdev, state), 1);
-		goto suspend_exit;
+		AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_DEFAULT |
+			GPHY_CTRL_EXT_RESET);
+		pci_prepare_to_sleep(pdev);
+	} else {
+		AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_POWER_SAVING);
+		master_ctrl_data |= MASTER_CTRL_CLK_SEL_DIS;
+		mac_ctrl_data |= atl1c_mac_speed_10_100 << MAC_CTRL_SPEED_SHIFT;
+		mac_ctrl_data |= MAC_CTRL_DUPLX;
+		AT_WRITE_REG(hw, REG_MASTER_CTRL, master_ctrl_data);
+		AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
+		AT_WRITE_REG(hw, REG_WOL_CTRL, 0);
+		hw->phy_configured = false; /* re-init PHY when resume */
+		pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
 	}
-wol_dis:
-
-	/* WOL disabled */
-	AT_WRITE_REG(hw, REG_WOL_CTRL, 0);
-
-	/* pcie patch */
-	AT_READ_REG(hw, REG_PCIE_PHYMISC, &ctrl);
-	ctrl |= PCIE_PHYMISC_FORCE_RCV_DET;
-	AT_WRITE_REG(hw, REG_PCIE_PHYMISC, ctrl);
-
-	atl1c_phy_disable(hw);
-	hw->phy_configured = false; /* re-init PHY when resume */
-
-	pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
-suspend_exit:
 
 	pci_disable_device(pdev);
 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
@@ -2516,9 +2558,19 @@ static int atl1c_resume(struct pci_dev *pdev)
 	pci_enable_wake(pdev, PCI_D3cold, 0);
 
 	AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0);
+	atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE |
+			ATL1C_PCIE_PHY_RESET);
 
 	atl1c_phy_reset(&adapter->hw);
 	atl1c_reset_mac(&adapter->hw);
+	atl1c_phy_init(&adapter->hw);
+
+#if 0
+	AT_READ_REG(&adapter->hw, REG_PM_CTRLSTAT, &pm_data);
+	pm_data &= ~PM_CTRLSTAT_PME_EN;
+	AT_WRITE_REG(&adapter->hw, REG_PM_CTRLSTAT, pm_data);
+#endif
+
 	netif_device_attach(netdev);
 	if (netif_running(netdev))
 		atl1c_up(adapter);

^ permalink raw reply related

* Re: [patch 3/3] rds/iw_rdma: remove unneeded kfree()
From: Andrew Grover @ 2010-05-28  7:38 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Andy Grover, David S. Miller, rds-devel, netdev, kernel-janitors
In-Reply-To: <20100527091128.GT22515@bicker>

Thanks Dan, nice catches.

For all three:

Acked-by: Andy Grover <andy.grover@oracle.com>

-- Andy

On Thu, May 27, 2010 at 2:11 AM, Dan Carpenter <error27@gmail.com> wrote:
> The "dma_pages" variable is always NULL at this point so the kfree()
> isn't needed.
>
> Signed-off-by: Dan Carpenter <error27@gmail.com>
>
> diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
> index 48d5073..742ab6a 100644
> --- a/net/rds/iw_rdma.c
> +++ b/net/rds/iw_rdma.c
> @@ -334,7 +334,6 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
>  out_unmap:
>        ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
>        sg->dma_len = 0;
> -       kfree(dma_pages);
>        return ERR_PTR(ret);
>  }
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V4
From: Jiri Pirko @ 2010-05-28  7:33 UTC (permalink / raw)
  To: netdev; +Cc: davem, shemminger, kaber, eric.dumazet
In-Reply-To: <1275030163.2650.3.camel@edumazet-laptop>

changelog:
v3->v4
	- moved rx_handlers in net_device structure to be near to other data
	  used in rx path
v2->v3
	- used GPL-ed exports
v1->v2
	- writers are locked by rtnl_lock (removed unnecessary spinlock)
	- using call_rcu in unregister
	- nicer macvlan_port_create cleanup
	- struct rx_hanler is made const in funtion parameters

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a general
list of rx_handlers which is iterated thru instead.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 drivers/net/macvlan.c      |   27 ++++++--
 include/linux/if_bridge.h  |    2 -
 include/linux/if_macvlan.h |    4 -
 include/linux/netdevice.h  |   18 +++++
 net/bridge/br.c            |    2 -
 net/bridge/br_if.c         |   14 ++++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 +-
 net/core/dev.c             |  160 ++++++++++++++++++++++++++------------------
 9 files changed, 160 insertions(+), 82 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4c..f6b7924 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -511,10 +512,16 @@ static void macvlan_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 }
 
+static const struct netdev_rx_handler macvlan_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_MACVLAN,
+	.callback	= macvlan_handle_frame,
+};
+
 static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +535,26 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
+
+	err = netdev_rx_handler_register(dev, &macvlan_rx_handler);
+	if (err)
+		goto cleanup;
+
 	return 0;
+
+cleanup:
+	rcu_assign_pointer(dev->macvlan_port, NULL);
+	synchronize_rcu();
+	kfree(port);
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev, &macvlan_rx_handler);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +787,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +800,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e8..0d241a5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047a..c26a0e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1bff65..8c8bfe3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
+
+struct netdev_rx_handler {
+	struct list_head	list;
+	unsigned int		order;
+#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
+#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
+	struct sk_buff		*(*callback)(struct sk_buff *skb);
+	struct rcu_head		rcu;
+};
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
@@ -958,6 +968,9 @@ struct net_device {
 
 	struct netdev_queue	rx_queue;
 
+	/* receive handlers (hooks) list */
+	struct list_head	rx_handlers;
+
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
 	/* Number of TX queues allocated at alloc_netdev_mq() time  */
@@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      const struct netdev_rx_handler *rh);
+extern void netdev_rx_handler_unregister(struct net_device *dev,
+					 const struct netdev_rx_handler *rh);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b5..c8436fa 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e..45270e5 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -119,6 +119,11 @@ static void destroy_nbp_rcu(struct rcu_head *head)
 	destroy_nbp(p);
 }
 
+static const struct netdev_rx_handler br_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_BRIDGE,
+	.callback	= br_handle_frame,
+};
+
 /* Delete port(interface) from bridge is done in two steps.
  * via RCU. First step, marks device as down. That deletes
  * all the timers and stops new packets from flowing through.
@@ -147,6 +152,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev, &br_rx_handler);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +435,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, &br_rx_handler);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +462,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
+	synchronize_rcu();
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700..99647d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74b..c83519b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c82065..d2ab71a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2585,70 +2585,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2744,6 +2688,85 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static bool rx_handlers_equal(const struct netdev_rx_handler *rh1,
+			      const struct netdev_rx_handler *rh2)
+{
+	return (rh1->order == rh2->order) && (rh1->callback == rh2->callback);
+}
+
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rh: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       const struct netdev_rx_handler *rh)
+{
+	struct list_head *iter, *add_after;
+	struct netdev_rx_handler *rh1;
+	int err = 0;
+
+	ASSERT_RTNL();
+	add_after = &dev->rx_handlers;
+	list_for_each(iter, &dev->rx_handlers) {
+		rh1 = list_entry(iter, struct netdev_rx_handler, list);
+		if (rx_handlers_equal(rh, rh1))
+			return -EEXIST;
+		if (rh1->order > rh->order)
+			break;
+		add_after = iter;
+	}
+	rh1 = kzalloc(sizeof(*rh), GFP_KERNEL);
+	if (!rh1)
+		return -ENOMEM;
+
+	rh1->order = rh->order;
+	rh1->callback = rh->callback;
+	list_add_rcu(&rh1->list, add_after);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+static void rx_handler_rcu_free(struct rcu_head *head)
+{
+	struct netdev_rx_handler *rh;
+
+	rh = container_of(head, struct netdev_rx_handler, rcu);
+	kfree(rh);
+}
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *	@rh: receive handler to unregister
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev,
+				  const struct netdev_rx_handler *rh)
+{
+	struct netdev_rx_handler *rh1;
+
+	ASSERT_RTNL();
+	list_for_each_entry(rh1, &dev->rx_handlers, list) {
+		if (rx_handlers_equal(rh, rh1)) {
+			list_del_rcu(&rh1->list);
+			call_rcu(&rh1->rcu, rx_handler_rcu_free);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2796,6 +2819,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	struct netdev_rx_handler *rh;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2859,12 +2883,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/*
+	 * Go through various rx handlers, like bridge, macvlan etc.
+	 */
+	list_for_each_entry_rcu(rh, &skb->dev->rx_handlers, list) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rh->callback(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
@@ -5371,6 +5401,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 
+	INIT_LIST_HEAD(&dev->rx_handlers);
+
 	dev_net_set(dev, &init_net);
 
 	dev->_tx = tx;
-- 
1.6.6.1


^ permalink raw reply related

* Re: [Patch] fix packet loss and massive ping spikes with PPP  multi-link
From: walter harms @ 2010-05-28  7:28 UTC (permalink / raw)
  To: Richard Hartmann; +Cc: linux-kernel, netdev, linux-ppp, erik_list
In-Reply-To: <AANLkTikeld4fbr9r1Sq0jhfkWxf0ZlwOsufUIRvb1rFH@mail.gmail.com>



Richard Hartmann schrieb:
> ==It seems LKML & netdev were dropped from the To list, re-adding==
> 
> Hi Walter,
> 
> 
>>  if (ppp->rrsched % ppp->n_channels == i)
>>
>> since both do not change in that while() loop you can calculate in advance
>> perhaps ppp->rrsched %= ppp->n_channels before the while ?
>> (that would reduce my bad feels about variables that only increments also :)
> 
> rrsched and i do change when appropriate. As they are used as a cheap
> way to get round robin, rrsched is not even initialized. One can argue
> that this should be done, but as it literally does not matter where the
> value starts counting....
> 

yep,
the problem is that you will trigger a warning "variable uninitialised".
And as programmer you are trained to spot such kind of code.
in short you violated "the rule of least surprise", simply set it to 99
and add a comment that the value does not matter because it is actualy a
random seed.
Basicly the same reason for the ppp->rrsched %= ppp->n_channels outside
the loop. 1. people/compiler are happy because they see the variable
is used. 2. no need to recalculate the if in a loop (never trust optimisers).

/*
perhaps rr_chanel is a better name ? round robin channel
that would requiere the changes but explain what it actualy is
*/



> 
>> btw: you are doing  after loop() if(pch->chan == NULL) continue;
>> that means the else in the if below  if (pch->chan) should never be reached.
>> Or is it likely that some channel will be dropped (?) ?
> 
> Channels could be dropped and we need to guard against that.
> 

please add a comment about that. i can garantee you someone will spot it
and remove either the pch->chan == NULL or the else.



just my 2 cents,
wh


> 
>> btw: this is intentional ? looks strange
>>
>>        if(ppp_ml_noexplode) {
>> +       }
>> +       else {
> 
> Leftover from various printks for debugging reasons.
> 
> 
> Thanks for your feedback,
> Richard
> 

^ permalink raw reply

* [net-2.6 PATCH 1/2] netlink: bug fix: don't overrun skbs on vf_port dump
From: Scott Feldman @ 2010-05-28  7:15 UTC (permalink / raw)
  To: davem; +Cc: chrisw, netdev, kaber, arnd

From: Scott Feldman <scofeldm@cisco.com>

Noticed by Patrick McHardy: was continuing to fill skb after a
nla_put_failure, ignoring the size calculated by upper layer.  Now,
return -EMSGSIZE on any overruns, but also allow netdev to
fail ndo_get_vf_port with error other than -EMSGSIZE, thus unwinding
nest.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
---
 net/core/rtnetlink.c |   15 +++++++++------
 1 files changed, 9 insertions(+), 6 deletions(-)


diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7ab86f3..7331bb2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -722,14 +722,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
 
 	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
 		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
-		if (!vf_port) {
-			nla_nest_cancel(skb, vf_ports);
-			return -EMSGSIZE;
-		}
+		if (!vf_port)
+			goto nla_put_failure;
 		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
 		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err == -EMSGSIZE)
+			goto nla_put_failure;
 		if (err) {
-nla_put_failure:
 			nla_nest_cancel(skb, vf_port);
 			continue;
 		}
@@ -739,6 +738,10 @@ nla_put_failure:
 	nla_nest_end(skb, vf_ports);
 
 	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, vf_ports);
+	return -EMSGSIZE;
 }
 
 static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
@@ -753,7 +756,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
 	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
 	if (err) {
 		nla_nest_cancel(skb, port_self);
-		return err;
+		return (err == -EMSGSIZE) ? err : 0;
 	}
 
 	nla_nest_end(skb, port_self);


^ permalink raw reply related

* [net-2.6 PATCH 2/2] netlink: bug fix: wrong size was calculated for vfinfo list blob
From: Scott Feldman @ 2010-05-28  7:15 UTC (permalink / raw)
  To: davem; +Cc: chrisw, netdev, kaber, arnd
In-Reply-To: <20100528071546.4058.1332.stgit@localhost.localdomain>

From: Scott Feldman <scofeldm@cisco.com>

The wrong size was being calculated for vfinfo.  In one case, it was over-
calculating using nlmsg_total_size on attrs, in another case, it was
under-calculating by assuming ifla_vf_* structs are packed together, but
each struct is it's own attr w/ hdr (and padding).

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
---
 net/core/rtnetlink.c |   11 ++++++-----
 1 files changed, 6 insertions(+), 5 deletions(-)


diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7331bb2..1a2af24 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
 	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
 
 		int num_vfs = dev_num_vf(dev->dev.parent);
-		size_t size = nlmsg_total_size(sizeof(struct nlattr));
-		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
-		size += num_vfs * (sizeof(struct ifla_vf_mac) +
-				  sizeof(struct ifla_vf_vlan) +
-				  sizeof(struct ifla_vf_tx_rate));
+		size_t size = nla_total_size(sizeof(struct nlattr));
+		size += nla_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs *
+			(nla_total_size(sizeof(struct ifla_vf_mac)) +
+			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
 		return size;
 	} else
 		return 0;


^ permalink raw reply related

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3
From: Eric Dumazet @ 2010-05-28  7:02 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, shemminger, kaber
In-Reply-To: <20100528061241.GC2823@psychotron.redhat.com>

Le vendredi 28 mai 2010 à 08:12 +0200, Jiri Pirko a écrit :

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index a1bff65..f54b97d 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
>  #define netdev_for_each_mc_addr(ha, dev) \
>  	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
>  
> +
> +struct netdev_rx_handler {
> +	struct list_head	list;
> +	unsigned int		order;
> +#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
> +#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
> +	struct sk_buff		*(*callback)(struct sk_buff *skb);
> +	struct rcu_head		rcu;
> +};
> +
>  struct hh_cache {
>  	struct hh_cache *hh_next;	/* Next entry			     */
>  	atomic_t	hh_refcnt;	/* number of users                   */
> @@ -1031,6 +1041,9 @@ struct net_device {
>  	/* GARP */
>  	struct garp_port	*garp_port;
>  
> +	/* receive handlers (hooks) list */
> +	struct list_head	rx_handlers;
> +
>  	/* class/net/name entry */
>  	struct device		dev;
>  	/* space for optional device, statistics, and wireless sysfs groups */
> @@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
>  	napi->skb = NULL;
>  }
>  

Please chose another place to hold rx_handlers, to keep rx path memory
needs to minimum cache lines. Somewhere in the following section :

/*
 * Cache line mostly used on receive path (including eth_type_trans())
 */
        unsigned long           last_rx;        /* Time of last Rx      */
        /* Interface address info used in eth_type_trans() */
        unsigned char           *dev_addr;      /* hw address, (before bcast
                                                   because most packets are
                                                   unicast) */

        struct netdev_hw_addr_list      dev_addrs; /* list of device
                                                      hw addresses */

        unsigned char           broadcast[MAX_ADDR_LEN];        /* hw bcast add */

#ifdef CONFIG_RPS
        struct kset             *queues_kset;

        struct netdev_rx_queue  *_rx;

        /* Number of RX queues allocated at alloc_netdev_mq() time  */
        unsigned int            num_rx_queues;
#endif

        struct netdev_queue     rx_queue;


and before the :

	struct netdev_queue     *_tx ____cacheline_aligned_in_smp;




^ permalink raw reply

* Re: fn_trie_lookup and fn_hash_lookup
From: ratheesh k @ 2010-05-28  6:13 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, linux-net
In-Reply-To: <20100527204055.406343eb@nehalam>

On Fri, May 28, 2010 at 9:10 AM, Stephen Hemminger
<shemminger@vyatta.com> wrote:
> On Fri, 28 May 2010 08:53:11 +0530
> ratheesh k <ratheesh.ksz@gmail.com> wrote:
>
>> hi ,
>>
>> I was looking into 2.6.18 kernel . I can see two functions
>> fn_trie_lookup , fn_hash_lookup
>>
>> ipv4/fib_trie.c:      tb->tb_lookup = fn_trie_lookup;
>> ipv4/fib_hash.c:      tb->tb_lookup = fn_hash_lookup;
>>
>> What is the differnce between these two functions ?
>
> They are the two possible FIB algorithms configurable.
>
> net/ipv4/Kconfig:
>
> choice
>        prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
>        depends on IP_ADVANCED_ROUTER
>        default ASK_IP_FIB_HASH
>
> config ASK_IP_FIB_HASH
>        bool "FIB_HASH"
>        ---help---
>          Current FIB is very proven and good enough for most users.
>
> config IP_FIB_TRIE
>        bool "FIB_TRIE"
>        ---help---
>          Use new experimental LC-trie as FIB lookup algorithm.
>          This improves lookup performance if you have a large
>          number of routes.
>
>          LC-trie is a longest matching prefix lookup algorithm which
>          performs better than FIB_HASH for large routing tables.
>          But, it consumes more memory and is more complex.
>
>          LC-trie is described in:
>
>          IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
>          IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
>          June 1999
>
>          An experimental study of compression methods for dynamic tries
>          Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
>          http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
>
>
> Also see Documentation/networking/fib_trie.txt
>
>
>

Thanks ..

^ permalink raw reply

* [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V3
From: Jiri Pirko @ 2010-05-28  6:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, shemminger, kaber, eric.dumazet
In-Reply-To: <20100528055154.GB2823@psychotron.redhat.com>

changelog:
v2->v3
	- used GPL-ed exports
v1->v2
	- writers are locked by rtnl_lock (removed unnecessary spinlock)
	- using call_rcu in unregister
	- nicer macvlan_port_create cleanup
	- struct rx_hanler is made const in funtion parameters

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a general
list of rx_handlers which is iterated thru instead.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
 drivers/net/macvlan.c      |   27 ++++++--
 include/linux/if_bridge.h  |    2 -
 include/linux/if_macvlan.h |    4 -
 include/linux/netdevice.h  |   18 +++++
 net/bridge/br.c            |    2 -
 net/bridge/br_if.c         |   14 ++++
 net/bridge/br_input.c      |   12 +++-
 net/bridge/br_private.h    |    3 +-
 net/core/dev.c             |  160 ++++++++++++++++++++++++++------------------
 9 files changed, 160 insertions(+), 82 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4c..f6b7924 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -511,10 +512,16 @@ static void macvlan_setup(struct net_device *dev)
 	dev->tx_queue_len	= 0;
 }
 
+static const struct netdev_rx_handler macvlan_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_MACVLAN,
+	.callback	= macvlan_handle_frame,
+};
+
 static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +535,26 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
+
+	err = netdev_rx_handler_register(dev, &macvlan_rx_handler);
+	if (err)
+		goto cleanup;
+
 	return 0;
+
+cleanup:
+	rcu_assign_pointer(dev->macvlan_port, NULL);
+	synchronize_rcu();
+	kfree(port);
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev, &macvlan_rx_handler);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +787,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +800,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e8..0d241a5 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047a..c26a0e4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a1bff65..f54b97d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -254,6 +254,16 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
+
+struct netdev_rx_handler {
+	struct list_head	list;
+	unsigned int		order;
+#define NETDEV_RX_HANDLER_ORDER_BRIDGE	1
+#define NETDEV_RX_HANDLER_ORDER_MACVLAN	2
+	struct sk_buff		*(*callback)(struct sk_buff *skb);
+	struct rcu_head		rcu;
+};
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
@@ -1031,6 +1041,9 @@ struct net_device {
 	/* GARP */
 	struct garp_port	*garp_port;
 
+	/* receive handlers (hooks) list */
+	struct list_head	rx_handlers;
+
 	/* class/net/name entry */
 	struct device		dev;
 	/* space for optional device, statistics, and wireless sysfs groups */
@@ -1685,6 +1698,11 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      const struct netdev_rx_handler *rh);
+extern void netdev_rx_handler_unregister(struct net_device *dev,
+					 const struct netdev_rx_handler *rh);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b5..c8436fa 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e..45270e5 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -119,6 +119,11 @@ static void destroy_nbp_rcu(struct rcu_head *head)
 	destroy_nbp(p);
 }
 
+static const struct netdev_rx_handler br_rx_handler = {
+	.order		= NETDEV_RX_HANDLER_ORDER_BRIDGE,
+	.callback	= br_handle_frame,
+};
+
 /* Delete port(interface) from bridge is done in two steps.
  * via RCU. First step, marks device as down. That deletes
  * all the timers and stops new packets from flowing through.
@@ -147,6 +152,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev, &br_rx_handler);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +435,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, &br_rx_handler);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +462,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
+	synchronize_rcu();
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700..99647d8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74b..c83519b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c82065..d2ab71a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2585,70 +2585,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2744,6 +2688,85 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static bool rx_handlers_equal(const struct netdev_rx_handler *rh1,
+			      const struct netdev_rx_handler *rh2)
+{
+	return (rh1->order == rh2->order) && (rh1->callback == rh2->callback);
+}
+
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rh: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       const struct netdev_rx_handler *rh)
+{
+	struct list_head *iter, *add_after;
+	struct netdev_rx_handler *rh1;
+	int err = 0;
+
+	ASSERT_RTNL();
+	add_after = &dev->rx_handlers;
+	list_for_each(iter, &dev->rx_handlers) {
+		rh1 = list_entry(iter, struct netdev_rx_handler, list);
+		if (rx_handlers_equal(rh, rh1))
+			return -EEXIST;
+		if (rh1->order > rh->order)
+			break;
+		add_after = iter;
+	}
+	rh1 = kzalloc(sizeof(*rh), GFP_KERNEL);
+	if (!rh1)
+		return -ENOMEM;
+
+	rh1->order = rh->order;
+	rh1->callback = rh->callback;
+	list_add_rcu(&rh1->list, add_after);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+static void rx_handler_rcu_free(struct rcu_head *head)
+{
+	struct netdev_rx_handler *rh;
+
+	rh = container_of(head, struct netdev_rx_handler, rcu);
+	kfree(rh);
+}
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *	@rh: receive handler to unregister
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev,
+				  const struct netdev_rx_handler *rh)
+{
+	struct netdev_rx_handler *rh1;
+
+	ASSERT_RTNL();
+	list_for_each_entry(rh1, &dev->rx_handlers, list) {
+		if (rx_handlers_equal(rh, rh1)) {
+			list_del_rcu(&rh1->list);
+			call_rcu(&rh1->rcu, rx_handler_rcu_free);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2796,6 +2819,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	struct netdev_rx_handler *rh;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2859,12 +2883,18 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/*
+	 * Go through various rx handlers, like bridge, macvlan etc.
+	 */
+	list_for_each_entry_rcu(rh, &skb->dev->rx_handlers, list) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rh->callback(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
@@ -5371,6 +5401,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev_mc_init(dev);
 	dev_uc_init(dev);
 
+	INIT_LIST_HEAD(&dev->rx_handlers);
+
 	dev_net_set(dev, &init_net);
 
 	dev->_tx = tx;
-- 
1.6.6.1


^ permalink raw reply related

* Re: [PATCH net-next-2.6] net: replace hooks in __netif_receive_skb V2
From: Jiri Pirko @ 2010-05-28  5:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, davem, kaber, eric.dumazet
In-Reply-To: <20100527130822.02cb1661@nehalam>

Thu, May 27, 2010 at 10:08:22PM CEST, shemminger@vyatta.com wrote:
>On Thu, 27 May 2010 20:08:24 +0200
>Jiri Pirko <jpirko@redhat.com> wrote:
>
>> changelog:
>> v1->v2
>> 	- writers are locked by rtnl_lock (removed unnecessary spinlock)
>> 	- using call_rcu in unregister
>> 	- nicer macvlan_port_create cleanup
>> 	- struct rx_hanler is made const in funtion parameters
>> 
>> What this patch does is it removes two receive frame hooks (for bridge and for
>> macvlan) from __netif_receive_skb. These are replaced them with a general
>> list of rx_handlers which is iterated thru instead.
>> 
>> Then a network driver (of virtual netdev like macvlan or bridge) can register
>> an rx_handler for needed net device.
>> 
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>
>I wonder if we really need the chaining.  What about allowing only one
>hook per device (and return -EBUSY)?  That also gets rid of the allocation,
>and the extra overhead.

Hmm, that's a good question. I thought about it already. But I'm also wondering
if there is a possible scenario, when the chaining can be necessary. Also I do
not see any -significant- downside of having multiple handlers in a list, so I
would probably go this way now. Opinions?

>
>The handler hook, has to be export GPL, since we really don't want more
>network stack abuse by 3rd parties.

Noted, will be in the next patch version.

Jirka

^ permalink raw reply

* Re: [PATCH 2/2] netdev/fec: fix ifconfig eth0 down hang issue
From: Bryan Wu @ 2010-05-28  5:26 UTC (permalink / raw)
  To: afleming, davem
  Cc: Sascha Hauer, Greg Ungerer, Amit Kucheria, netdev, linux-kernel
In-Reply-To: <1273199239-11057-3-git-send-email-bryan.wu@canonical.com>

Since this patch is for another bug and doesn't depend on my first one patch.
Could you please review this?

Thanks,
-Bryan

On 05/07/2010 10:27 AM, Bryan Wu wrote:
> BugLink: http://bugs.launchpad.net/bugs/559065
> 
> In fec open/close function, we need to use phy_connect and phy_disconnect
> operation before we start/stop phy. Otherwise it will cause system hang.
> 
> Only call fec_enet_mii_probe() in open function, because the first open
> action will cause NULL pointer error.
> 
> Signed-off-by: Bryan Wu <bryan.wu@canonical.com>
> ---
>  drivers/net/fec.c |   28 ++++++++++++++++------------
>  1 files changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/fec.c b/drivers/net/fec.c
> index 9c58f6b..af4243f 100644
> --- a/drivers/net/fec.c
> +++ b/drivers/net/fec.c
> @@ -678,6 +678,8 @@ static int fec_enet_mii_probe(struct net_device *dev)
>  	struct phy_device *phy_dev = NULL;
>  	int phy_addr;
>  
> +	fep->phy_dev = NULL;
> +
>  	/* find the first phy */
>  	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
>  		if (fep->mii_bus->phy_map[phy_addr]) {
> @@ -708,6 +710,11 @@ static int fec_enet_mii_probe(struct net_device *dev)
>  	fep->link = 0;
>  	fep->full_duplex = 0;
>  
> +	printk(KERN_INFO "%s: Freescale FEC PHY driver [%s] "
> +		"(mii_bus:phy_addr=%s, irq=%d)\n", dev->name,
> +		fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev),
> +		fep->phy_dev->irq);
> +
>  	return 0;
>  }
>  
> @@ -753,13 +760,8 @@ static int fec_enet_mii_init(struct platform_device *pdev)
>  	if (mdiobus_register(fep->mii_bus))
>  		goto err_out_free_mdio_irq;
>  
> -	if (fec_enet_mii_probe(dev) != 0)
> -		goto err_out_unregister_bus;
> -
>  	return 0;
>  
> -err_out_unregister_bus:
> -	mdiobus_unregister(fep->mii_bus);
>  err_out_free_mdio_irq:
>  	kfree(fep->mii_bus->irq);
>  err_out_free_mdiobus:
> @@ -912,7 +914,12 @@ fec_enet_open(struct net_device *dev)
>  	if (ret)
>  		return ret;
>  
> -	/* schedule a link state check */
> +	/* Probe and connect to PHY when open the interface */
> +	ret = fec_enet_mii_probe(dev);
> +	if (ret) {
> +		fec_enet_free_buffers(dev);
> +		return ret;
> +	}
>  	phy_start(fep->phy_dev);
>  	netif_start_queue(dev);
>  	fep->opened = 1;
> @@ -926,10 +933,12 @@ fec_enet_close(struct net_device *dev)
>  
>  	/* Don't know what to do yet. */
>  	fep->opened = 0;
> -	phy_stop(fep->phy_dev);
>  	netif_stop_queue(dev);
>  	fec_stop(dev);
>  
> +	if (fep->phy_dev)
> +		phy_disconnect(fep->phy_dev);
> +
>          fec_enet_free_buffers(dev);
>  
>  	return 0;
> @@ -1293,11 +1302,6 @@ fec_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto failed_register;
>  
> -	printk(KERN_INFO "%s: Freescale FEC PHY driver [%s] "
> -		"(mii_bus:phy_addr=%s, irq=%d)\n", ndev->name,
> -		fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev),
> -		fep->phy_dev->irq);
> -
>  	return 0;
>  
>  failed_register:

^ permalink raw reply

* Re: [RFC] netfilter: WIP: Xtables idletimer target implementation
From: Luciano Coelho @ 2010-05-28  5:25 UTC (permalink / raw)
  To: ext Jan Engelhardt
  Cc: netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
	kaber@trash.net, Timo Teras
In-Reply-To: <alpine.LSU.2.01.1005280057190.14077@obet.zrqbmnf.qr>

Hi Jan,

Thanks a lot for your comments!

On Fri, 2010-05-28 at 01:17 +0200, ext Jan Engelhardt wrote:
> On Thursday 2010-05-27 22:54, Luciano Coelho wrote:
> >There are still some issues to be resolved:
> >
> >How to treat several rules for the same interface?
> >
> >We need a key for the timer list.  I'm using the targinfo pointer for that,
> >but this looks shaky, because there is no assurance that this pointer will
> >live for the entire lifetime of the rule.
> 
> Well xt_quota for example has its targinfo around at all times,
> as have other modules.

Great, so this will work.  I had checked the x_tables code and it seemed
that the lifetime of targinfo was sufficient, but I was not sure this
would be safe in the future.  Now, if this changes in the future, my
module won't be the only one to break ;)


> >This is now an x_tables target, so other protocols need to be implemented.
> 
> Huh? You're not looking at packets, so why does it need proto-specific
> stuff?

I need to associate the timers with specific interfaces, because it's
the idle time of the interfaces that the userspace in interested in.  I
didn't find any other way to associate the timers with them, except by
looking at the iniface and outiface values in ipt_ip (and eventually,
with IPv6 properly implemented, in ip6t_ip6).  This is what Patrick
suggested when he checked my previous patch [1] and triggered me to do a
major rework on my module ;)

Do you have any other suggestion on how I can associate the rules to
specific interfaces?


> >+static
> >+struct utimer_t *__utimer_find_by_info(const struct xt_idletimer_info *info)
> >+{
> >+	struct utimer_t *entry;
> >+
> >+	list_for_each_entry(entry, &active_utimer_head, entry) {
> >+		if (info == entry->info) {
> >+			return entry;
> >+		}
> >+	}
> >+
> >+	return NULL;
> >+}
> 
> Can do with less braces.

Sure.  These remained there after I removed some traces.  I'll clean
this up.


> >+static void utimer_expired(unsigned long data)
> >+{
> >+	struct utimer_t *timer = (struct utimer_t *) data;
> >+
> >+	pr_debug("xt_idletimer: timer '%s' ns %p expired\n",
> >+		 timer->name, timer->net);
> >+
> >+	schedule_work(&timer->work);
> >+}
> 
> You don't need xt_idletimer, because pr_debug already prints
> that (with #define pr_fmt(fmt) KBUILD_MODNAME ": " as many
> other modules do)

Ok, I'll change it.  Thanks for pointing out.


> >+
> >+static struct utimer_t *utimer_create(const char *name,
> >+				      struct net *net,
> >+				      const struct xt_idletimer_info *info)
> >+{
> >+	struct utimer_t *timer;
> >+
> >+	timer = kmalloc(sizeof(struct utimer_t), GFP_ATOMIC);
> >+	if (timer == NULL)
> >+		return NULL;
> 
> This is called from user context, so GFP_KERNEL will perfectly suffice.

Yup, will change.


> >+static int xt_idletimer_checkentry(const struct xt_tgchk_param *par)
> >+{
> >+	const struct xt_idletimer_info *info = par->targinfo;
> >+	const struct ipt_entry *entryinfo = par->entryinfo;
> >+	const struct ipt_ip *ip = &entryinfo->ip;
> 
> I'm not sure spying on ipt_ip is a long-term viable solution.

Do you have any other suggestions on how I could get an interface
associated with the rule? I thought about having the userspace pass the
interface as an option to the rule (like I already do for the timeout
value), but that looked ugly to me, since the interface can already be
defined as part of the ruleset.


> >+	pr_debug("xt_idletimer: checkentry targinfo %p\n", par->targinfo);
> >+
> >+	if (info->timeout == 0) {
> >+		pr_debug("xt_idletimer: timeout value is zero\n");
> >+		return -EINVAL;
> >+	}
> >+
> >+	/* FIXME: implement support for other protocol families */
> >+	switch (par->family) {
> >+	case NFPROTO_IPV4   :
> >+		pr_debug("xt_idletimer: NFPROTO_IPV4\n");
> >+		break;
> >+
> >+	default:
> >+		pr_debug("xt_idletimer: Unsupported protocol family family!\n");
> >+		return -EINVAL;
> >+	}
> >+
> >+	if (strlen(ip->iniface) == 0 && strlen(ip->outiface) == 0) {
> >+		pr_debug("xt_idletimer: in or out interface must "
> >+			 "be specified\n");
> >+		return -EINVAL;
> >+	}
> >+
> >+	if (utimer_create(ip->iniface, par->net, info) == NULL) {
> >+		pr_debug("xt_idletimer: failed to create timer\n");
> >+		return -ENOMEM;
> >+	}
> 
> What about outiface?
> What blows up when iniface is empty?

Ooops! I've redone this part of the code so many times and in this
version I completely forgot to include the outiface.  I'll fix it.


> >+static void xt_idletimer_destroy(const struct xt_tgdtor_param *par)
> >+{
> >+	const struct xt_idletimer_info *info = par->targinfo;
> >+
> >+	pr_debug("xt_idletimer: destroy targinfo %p\n", par->targinfo);
> >+
> >+	utimer_delete(info);
> >+}
> >+
> >+static int __init init(void)
> >+{
> >+	int ret;
> >+
> >+	ret = utimer_init();
> >+	if (ret)
> >+		return ret;
> >+
> >+	ret =  xt_register_target(&xt_idletimer);
> >+	if (ret < 0) {
> >+		utimer_fini();
> >+		return ret;
> >+	}
> >+
> >+	return 0;
> >+}
> >+
> >+static void __exit fini(void)
> >+{
> >+	xt_unregister_target(&xt_idletimer);
> >+	utimer_fini();
> >+}
> >+
> >+module_init(init);
> >+module_exit(fini);
> 
> Call it just exit?

Yes.


> Also give the functions better names (see other modules), that is going
> to be unrecognizable in stacktraces otherwise.

I agree.  These names are coming from the original code.  I thought
about changing them to something clearer, but didn't bother to do it
yet, because I was focusing on the actual functionality.  I'll change
the names.

Again, thanks for your comments.  I'll rework and submit v2 soon.

Ah, and please excuse my lameness of mistyping the netdev email address
when I submitted the patch.  I fixed it now.

[1]
http://article.gmane.org/gmane.comp.security.firewalls.netfilter.devel/33934


-- 
Cheers,
Luca.


^ permalink raw reply

* Re: [PATCH 1/2] netdev/fec: fix performance impact from mdio poll operation
From: Bryan Wu @ 2010-05-28  5:23 UTC (permalink / raw)
  To: David Miller; +Cc: s.hauer, gerg, amit.kucheria, netdev, linux-kernel
In-Reply-To: <20100516.002818.133869940.davem@davemloft.net>

On 05/16/2010 03:28 PM, David Miller wrote:
> From: Bryan Wu <bryan.wu@canonical.com>
> Date: Fri,  7 May 2010 10:27:18 +0800
> 
>> BugLink: http://bugs.launchpad.net/bugs/546649
>> BugLink: http://bugs.launchpad.net/bugs/457878
>>
>> After introducing phylib supporting, users experienced performace drop. That is
>> because of the mdio polling operation of phylib. Use msleep to replace the busy
>> waiting cpu_relax() and remove the warning message.
>>
>> Signed-off-by: Bryan Wu <bryan.wu@canonical.com>
>> Acked-by: Andy Whitcroft <apw@canonical.com>
> 
> As you've already been told, making these MDIO interfaces fail silently
> is not acceptable.
> 
> Please fix this bug properly and resubmit this patch series.
> 

So sorry for the delay. My board's broken for several weeks. After I got a new,
I will try to fix this and resubmit this patch.

Thanks
-- 
Bryan Wu <bryan.wu@canonical.com>
Kernel Developer    +86.138-1617-6545 Mobile
Ubuntu Kernel Team | Hardware Enablement Team
Canonical Ltd.      www.canonical.com
Ubuntu - Linux for human beings | www.ubuntu.com

^ permalink raw reply

* Re: fn_trie_lookup and fn_hash_lookup
From: Stephen Hemminger @ 2010-05-28  3:40 UTC (permalink / raw)
  To: ratheesh k; +Cc: netdev, linux-net
In-Reply-To: <AANLkTikmIzPaOKdlXtCwosfJXzaKY1vxib_MdqI0vN_5@mail.gmail.com>

On Fri, 28 May 2010 08:53:11 +0530
ratheesh k <ratheesh.ksz@gmail.com> wrote:

> hi ,
> 
> I was looking into 2.6.18 kernel . I can see two functions
> fn_trie_lookup , fn_hash_lookup
> 
> ipv4/fib_trie.c:	tb->tb_lookup = fn_trie_lookup;
> ipv4/fib_hash.c:	tb->tb_lookup = fn_hash_lookup;
> 
> What is the differnce between these two functions ?

They are the two possible FIB algorithms configurable.

net/ipv4/Kconfig:

choice
	prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
	depends on IP_ADVANCED_ROUTER
	default ASK_IP_FIB_HASH

config ASK_IP_FIB_HASH
	bool "FIB_HASH"
	---help---
	  Current FIB is very proven and good enough for most users.

config IP_FIB_TRIE
	bool "FIB_TRIE"
	---help---
	  Use new experimental LC-trie as FIB lookup algorithm.
	  This improves lookup performance if you have a large
	  number of routes.

	  LC-trie is a longest matching prefix lookup algorithm which
	  performs better than FIB_HASH for large routing tables.
	  But, it consumes more memory and is more complex.

	  LC-trie is described in:

	  IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
	  IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
	  June 1999

	  An experimental study of compression methods for dynamic tries
	  Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
	  http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/


Also see Documentation/networking/fib_trie.txt



^ permalink raw reply

* Re: [net-next-2.6 V9 PATCH 1/2] Add netlink support for virtual port management (was iovnl)
From: Scott Feldman @ 2010-05-28  3:24 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: davem, netdev, chrisw, arnd
In-Reply-To: <4BF54616.1020508@trash.net>

On 5/20/10 7:24 AM, "Patrick McHardy" <kaber@trash.net> wrote:

> Scott Feldman wrote:
>> +static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
>> +{
>> + struct nlattr *vf_ports;
>> + struct nlattr *vf_port;
>> + int vf;
>> + int err;
>> +
>> + vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
>> + if (!vf_ports)
>> +  return -EMSGSIZE;
>> +
>> + for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
>> +  vf_port = nla_nest_start(skb, IFLA_VF_PORT);
>> +  if (!vf_port) {
>> +   nla_nest_cancel(skb, vf_ports);
>> +   return -EMSGSIZE;
>> +  }
>> +  NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
>> +  err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
>> +  if (err) {
>> +nla_put_failure:
>> +   nla_nest_cancel(skb, vf_port);
>> +   continue;
> 
> Are you sure you want to continue here? During a full dump
> this means that the last VF port fitting in the skb will most
> likely be incomplete since the higher layer code won't
> notice that the size was exceeded and the entry should be
> dumped into another skb.

Drats, that's a bug.  I'll get that fixed.  Thanks Patrick for catching it.

-scott


^ permalink raw reply

* fn_trie_lookup and fn_hash_lookup
From: ratheesh k @ 2010-05-28  3:23 UTC (permalink / raw)
  To: netdev, linux-net

hi ,

I was looking into 2.6.18 kernel . I can see two functions
fn_trie_lookup , fn_hash_lookup

ipv4/fib_trie.c:	tb->tb_lookup = fn_trie_lookup;
ipv4/fib_hash.c:	tb->tb_lookup = fn_hash_lookup;

What is the differnce between these two functions ?


Thanks,
Ratheesh

^ permalink raw reply

* Re: [v5 Patch 1/3] netpoll: add generic support for bridge and bonding devices
From: Cong Wang @ 2010-05-28  2:47 UTC (permalink / raw)
  To: Flavio Leitner
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller
In-Reply-To: <20100527180545.GA2345@sysclose.org>

On 05/28/10 02:05, Flavio Leitner wrote:
>
> Hi guys!
>
> I finally could test this to see if an old problem reported on bugzilla[1] was
> fixed now, but unfortunately it is still there.
>
> The ticket is private I guess, but basically the problem happens when bonding
> driver tries to print something after it had taken the write_lock (monitor
> functions, enslave/de-enslave), so the printk() will pass through netpoll, then
> on bonding again which no matter what mode you use, it will try to read_lock()
> the lock again. The result is a deadlock and the entire system hangs.


This is true, I already fixed some similar issues.

>
> I manage to get a fresh backtrace with mode 1, see below:
>
>
> [   93.167079] Call Trace:
> [   93.167079]  [<ffffffff81034cf9>] warn_slowpath_common+0x77/0x8f
> [   93.167079]  [<ffffffff81034d5e>] warn_slowpath_fmt+0x3c/0x3e
> [   93.167079]  [<ffffffff81366aef>] ? _raw_read_trylock+0x11/0x4b
> [   93.167079]  [<ffffffffa02a2c42>] ? bond_start_xmit+0x12b/0x401 [bonding]
> ->  read_lock fails
> [   93.167079]  [<ffffffffa02a2c9f>] bond_start_xmit+0x188/0x401 [bonding]
> [   93.167079]  [<ffffffff81055b37>] ? trace_hardirqs_off+0xd/0xf
> [   93.167079]  [<ffffffff812dfdb9>] netpoll_send_skb+0xbd/0x1f3
> [   93.167079]  [<ffffffff812e00ed>] netpoll_send_udp+0x1fe/0x20d
> [   93.167079]  [<ffffffffa02c017c>] write_msg+0x89/0xcd [netconsole]
> [   93.167079]  [<ffffffff81034e65>] __call_console_drivers+0x67/0x79
> [   93.167079]  [<ffffffff81034ed0>] _call_console_drivers+0x59/0x5d
> [   93.167079]  [<ffffffff810352d3>] release_console_sem+0x121/0x1d7
> [   93.167079]  [<ffffffff8103590a>] vprintk+0x35d/0x393
> [   93.167079]  [<ffffffff8103f947>] ? add_timer+0x17/0x19
> [   93.167079]  [<ffffffff81046ddf>] ? queue_delayed_work_on+0xa2/0xa9
> [   93.167079]  [<ffffffff81363bb8>] printk+0x3c/0x44
> [   93.167079]  [<ffffffffa02a3b17>] bond_select_active_slave+0x105/0x109 [bonding]
> ->  write_locked
> [   93.167079]  [<ffffffffa02a4798>] bond_mii_monitor+0x479/0x4ed [bonding]
> [   93.167079]  [<ffffffff81046009>] worker_thread+0x1ef/0x2e2
>
> In this case, the message should be
>      "bonding: bond0: making interface eth0 the new active one"


Hmm, you triggered a warning here, let me check the source code
and try to reproduce it here.

>
> I did the following patch to discard the packet if it was IN_NETPOLL
> and the read_lock() fails, so I could go ahead testing it:
>
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 5e12462..a3b8bad 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -4258,8 +4258,19 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
>   	struct bonding *bond = netdev_priv(bond_dev);
>   	int res = 1;
>
> -	read_lock(&bond->lock);
> -	read_lock(&bond->curr_slave_lock);
> +	if (read_trylock(&bond->lock) == 0&&
> +		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> +			dev_kfree_skb(skb);
> +			return NETDEV_TX_OK;
> +	}
> +
> +	if (read_trylock(&bond->curr_slave_lock) == 0&&
> +		(bond_dev->flags&  IFF_IN_NETPOLL)) {
> +			read_unlock(&bond->lock);
> +			dev_kfree_skb(skb);
> +			return NETDEV_TX_OK;
> +	}
> +			
>
>   	if (!BOND_IS_OK(bond))
>   		goto out;
>


This looks like a workaround, not a fix. :)

>
> and I found another problem.  The function netpoll_send_skb() checks
> if the npinfo's queue length is zero and if it's not, it will queue
> the packet to make sure it's in order and then schedule the thread
> to run. Later, the thread wakes up running queue_process() which disables
> interrupts before calling ndo_start_xmit().  However, dev_queue_xmit()
> uses rcu_*_bh() and before return, it will enable the interrupts again,
> spitting this:
>
> ------------[ cut here ]------------
> WARNING: at kernel/softirq.c:143 local_bh_enable+0x3c/0x86()
> Hardware name: Precision WorkStation 490
> Modules linked in: netconsole bonding sunrpc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6]
> Pid: 17, comm: events/2 Not tainted 2.6.34-04700-gd938a70 #21
> Call Trace:
>   [<ffffffff810381d6>] warn_slowpath_common+0x77/0x8f
>   [<ffffffff810381fd>] warn_slowpath_null+0xf/0x11
>   [<ffffffff8103d691>] local_bh_enable+0x3c/0x86
>   [<ffffffff812e4d85>] dev_queue_xmit+0x462/0x493
>   [<ffffffffa018805f>] bond_dev_queue_xmit+0x1bd/0x1e3 [bonding]
>   [<ffffffffa01881dd>] bond_start_xmit+0x158/0x37b [bonding]
> ->  interrupts disabled
>   [<ffffffff812f3fca>] queue_process+0x9d/0xf9
>   [<ffffffff8104d022>] worker_thread+0x19d/0x224
>   [<ffffffff812f3f2d>] ? queue_process+0x0/0xf9
>   [<ffffffff81050819>] ? autoremove_wake_function+0x0/0x34
>   [<ffffffff8104ce85>] ? worker_thread+0x0/0x224
>   [<ffffffff8105040b>] kthread+0x7a/0x82
>   [<ffffffff810036d4>] kernel_thread_helper+0x4/0x10
>   [<ffffffff81050391>] ? kthread+0x0/0x82
>   [<ffffffff810036d0>] ? kernel_thread_helper+0x0/0x10
> ---[ end trace 74e3904503fdb632 ]---
>
> kernel/softirq.c:
> 141 static inline void _local_bh_enable_ip(unsigned long ip)
> 142 {
> 143         WARN_ON_ONCE(in_irq() || irqs_disabled());
> 144 #ifdef CONFIG_TRACE_IRQFLAGS
> 145         local_irq_disable();
> 146 #endif
> 147         /*
> 148          * Are softirqs going to be turned on now:
> 149          */
>
>

I am wondering if this was caused by the previous issue.


> The git is updated up to:
>    d938a70 be2net: increase POST timeout for EEH recovery
>
> Two slave interfaces, bonding mode 1, netconsole over bond0.
>
> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248374#c5

How did you reproduce this?
I will check that BZ to see if I can find how to reproduce this.

Thanks.


^ permalink raw reply

* RE: issue about virtio-net driver to suppoprt vhost mergeable buffer with zero-copy to support PS mode
From: Xin, Xiaohui @ 2010-05-28  1:13 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, Herbert Xu
In-Reply-To: <20100527082025.GA5579@redhat.com>

Michael,
What's you have suggested could avoid to taint the guest virtio-net driver. Really thanks!
For the two alternative, the first will do more trick with driver or net-core stuff. So currently, I prefer to try the second one. Anyway, let me have a good think of it. Thanks!

Thanks
Xiaohui

>-----Original Message-----
>From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf Of
>Michael S. Tsirkin
>Sent: Thursday, May 27, 2010 4:20 PM
>To: Xin, Xiaohui
>Cc: netdev@vger.kernel.org; kvm@vger.kernel.org; linux-kernel@vger.kernel.org; Herbert
>Xu
>Subject: Re: issue about virtio-net driver to suppoprt vhost mergeable buffer with zero-copy
>to support PS mode
>
>On Thu, May 27, 2010 at 09:21:02AM +0800, Xin, Xiaohui wrote:
>> Michael,
>> I'm now looking into the vhost mergeable buffer, and I tried to use it to support PS mode
>with zero-copy. And I found an issue there that I have to modify the guest virito-net driver.
>>
>> When guest virtio-net driver submits mergeable buffers, it submits multiple pages outside.
>In zero-copy case, vhost cannot know which page is used to put header, and which page is
>used to put payload. Then vhost can only reserves 12 bytes for each page. That means, the
>page_offset of the payload DMAed into the guest buffer is always 12 bytes. But guest
>virtio-net driver always use offset 0 to put the data (See receive_mergeable()). That's where
>the zero-copy use mergeable buffer must modify.
>>
>> Have I missed something here? And how do you think about it?
>>
>> Thanks
>> Xiaohui
>
>Maybe you can teach the hardware skip the first 12 bytes: qemu will
>call an ioctl telling hardware what the virtio header size is.
>This is how we plan to do it for tap.
>
>Alternatively, buffers can be used in any order.
>So we can have hardware use N buffers for the packet, and then
>have vhost put the header in buffer N+1.
>
>--
>MST
>--
>To unsubscribe from this list: send the line "unsubscribe kvm" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] cnic: Fix context memory init. on 5709.
From: David Miller @ 2010-05-27 23:31 UTC (permalink / raw)
  To: mchan; +Cc: netdev
In-Reply-To: <1274991858-6201-1-git-send-email-mchan@broadcom.com>

From: "Michael Chan" <mchan@broadcom.com>
Date: Thu, 27 May 2010 13:24:18 -0700

> We need to zero context memory on 5709 in the function cnic_init_context().
> Without this, iscsid restart on 5709 will not work because of stale data.
> TX context blocks should not be initialized by cnic_init_context() because
> of the special remapping on 5709.
> 
> Update version to 2.1.2.
> 
> Signed-off-by: Michael Chan <mchan@broadcom.com>

Applied, thanks Michael.

^ permalink raw reply

* Re: [PATCH 6/11] drivers/net: Eliminate a NULL pointer dereference
From: David Miller @ 2010-05-27 23:30 UTC (permalink / raw)
  To: julia; +Cc: netdev, linux-kernel, kernel-janitors
In-Reply-To: <Pine.LNX.4.64.1005271433180.5422@ask.diku.dk>

From: Julia Lawall <julia@diku.dk>
Date: Thu, 27 May 2010 14:33:32 +0200 (CEST)

> At the point of the print, dev is NULL.
 ...
> Signed-off-by: Julia Lawall <julia@diku.dk>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH 4/11] drivers/net/hamradio: Eliminate a NULL pointer dereference
From: David Miller @ 2010-05-27 23:29 UTC (permalink / raw)
  To: julia; +Cc: jpr, linux-hams, netdev, linux-kernel, kernel-janitors
In-Reply-To: <Pine.LNX.4.64.1005271432450.5422@ask.diku.dk>

From: Julia Lawall <julia@diku.dk>
Date: Thu, 27 May 2010 14:33:00 +0200 (CEST)

> At the point of the print, dev is NULL.
> 
> A simplified version of the semantic match that finds this problem is as
> follows: (http://coccinelle.lip6.fr/)
 ...
> Signed-off-by: Julia Lawall <julia@diku.dk>

Applied.

^ permalink raw reply

* Re: [PATCH net-2.6] be2net: Patch removes redundant while statement in loop.
From: David Miller @ 2010-05-27 23:28 UTC (permalink / raw)
  To: sarveshwarb; +Cc: netdev
In-Reply-To: <20100527093208.GA13740@serverengines.com>

From: Sarveshwar Bandi <sarveshwarb@serverengines.com>
Date: Thu, 27 May 2010 15:02:19 +0530

> Signed-off-by: Sarveshwar Bandi <sarveshwarb@serverengines.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH]: bnx2: Fix uninitialized variable warning in bnx2_disable_forced_2g5()
From: David Miller @ 2010-05-27 23:26 UTC (permalink / raw)
  To: prarit; +Cc: netdev, mchan
In-Reply-To: <20100527220856.25418.86593.sendpatchset@prarit.bos.redhat.com>

From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 27 May 2010 18:13:01 -0400

> Fix warning:
> 
> drivers/net/bnx2.c: In function 'bnx2_disable_forced_2g5':
> drivers/net/bnx2.c:1489: warning: 'bmcr' may be used uninitialized in this function
> 
> Signed-off-by: Prarit Bhargava <prarit@redhat.com>

This is another bad patch, you're hiding a bug by making this warning
go away.

If bnx2_read_phy() in fact does not initialize "bmcr" we might
want to do something about that instead of just blindly using
whatever garbage is in that local variable.

Please stop making these patches without applying at least some
intelligence to the analysis of why these warnings are being
printed at all.

None of these cases you're posting patches for are situations where
the compiler simply can't see the control flow properly, they are real
issues.

Thanks.

^ permalink raw reply

* Re: [PATCH]: niu: fix uninitialized variable warning
From: David Miller @ 2010-05-27 23:23 UTC (permalink / raw)
  To: prarit; +Cc: netdev
In-Reply-To: <20100527183100.24251.12228.sendpatchset@prarit.bos.redhat.com>

From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 27 May 2010 14:35:03 -0400

> Fixes warning:
> 
> drivers/net/niu.c: In function ‘niu_process_rx_pkt’:
> drivers/net/niu.c:3457: error: ‘link’ may be used uninitialized in this function
> 
> Signed-off-by: Prarit Bhargava <prarit@redhat.com>

These blind uninitialized_var() annotations are almost always the wrong
thing to do.  And that is the case here.

The code that looks up the page here can return NULL if there is some
error and the page hashes aren't setup properly, and then the callers
go and blindly assume that 'page' is non-NULL without any error
checking.

Applying this patch does more harm than good because it hides this
issue.  So I'm definitely not applying this patch.


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox