* [net-next 06/12] e1000e: enable EEE by default
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1364461215-7793-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
Now that IEEE802.3az-2010 Energy Efficient Ethernet has been approved as
standard (September 2010) and the driver can enable and disable it via
ethtool, enable the feature by default on parts which support it.
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/ich8lan.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 174507c..56c4935 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1034,10 +1034,6 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
- /* Disable EEE by default until IEEE802.3az spec is finalized */
- if (adapter->flags2 & FLAG2_HAS_EEE)
- adapter->hw.dev_spec.ich8lan.eee_disable = true;
-
return 0;
}
--
1.7.11.7
^ permalink raw reply related
* [net-next 05/12] e1000e: EEE capability advertisement not set/disabled as required
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1364461215-7793-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
Devices supported by the driver which support EEE (currently 82579, I217
and I218) are advertising EEE capabilities during auto-negotiation even
when EEE has been disabled. In addition to not acting as expected, this
also caused the EEE status reported by 'ethtool --show-eee' to be wrong
when two of these devices are connected back-to-back and EEE is disabled
on one. In addition to fixing this issue, the ability for the user to
specify which speeds (100 or 1000 full-duplex) to advertise EEE support
has been added.
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/e1000.h | 3 ++
drivers/net/ethernet/intel/e1000e/ethtool.c | 63 +++++++++--------------------
drivers/net/ethernet/intel/e1000e/ich8lan.c | 55 ++++++++++++-------------
drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 +-
drivers/net/ethernet/intel/e1000e/netdev.c | 36 +++++++++++++++++
5 files changed, 88 insertions(+), 72 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index e371f77..82f1c84 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -46,6 +46,7 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/ptp_classify.h>
#include <linux/mii.h>
+#include <linux/mdio.h>
#include "hw.h"
struct e1000_info;
@@ -350,6 +351,8 @@ struct e1000_adapter {
struct timecounter tc;
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
+
+ u16 eee_advert;
};
struct e1000_info {
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index e835e7b..7c8ca65 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -35,7 +35,6 @@
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/vmalloc.h>
-#include <linux/mdio.h>
#include <linux/pm_runtime.h>
#include "e1000.h"
@@ -2076,23 +2075,20 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
- u16 cap_addr, adv_addr, lpa_addr, pcs_stat_addr, phy_data, lpi_ctrl;
- u32 status, ret_val;
+ u16 cap_addr, lpa_addr, pcs_stat_addr, phy_data;
+ u32 ret_val;
- if (!(adapter->flags & FLAG_IS_ICH) ||
- !(adapter->flags2 & FLAG2_HAS_EEE))
+ if (!(adapter->flags2 & FLAG2_HAS_EEE))
return -EOPNOTSUPP;
switch (hw->phy.type) {
case e1000_phy_82579:
cap_addr = I82579_EEE_CAPABILITY;
- adv_addr = I82579_EEE_ADVERTISEMENT;
lpa_addr = I82579_EEE_LP_ABILITY;
pcs_stat_addr = I82579_EEE_PCS_STATUS;
break;
case e1000_phy_i217:
cap_addr = I217_EEE_CAPABILITY;
- adv_addr = I217_EEE_ADVERTISEMENT;
lpa_addr = I217_EEE_LP_ABILITY;
pcs_stat_addr = I217_EEE_PCS_STATUS;
break;
@@ -2111,10 +2107,7 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
edata->supported = mmd_eee_cap_to_ethtool_sup_t(phy_data);
/* EEE Advertised */
- ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &phy_data);
- if (ret_val)
- goto release;
- edata->advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+ edata->advertised = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
/* EEE Link Partner Advertised */
ret_val = e1000_read_emi_reg_locked(hw, lpa_addr, &phy_data);
@@ -2132,25 +2125,11 @@ release:
if (ret_val)
return -ENODATA;
- e1e_rphy(hw, I82579_LPI_CTRL, &lpi_ctrl);
- status = er32(STATUS);
-
/* Result of the EEE auto negotiation - there is no register that
* has the status of the EEE negotiation so do a best-guess based
- * on whether both Tx and Rx LPI indications have been received or
- * base it on the link speed, the EEE advertised speeds on both ends
- * and the speeds on which EEE is enabled locally.
+ * on whether Tx or Rx LPI indications have been received.
*/
- if (((phy_data & E1000_EEE_TX_LPI_RCVD) &&
- (phy_data & E1000_EEE_RX_LPI_RCVD)) ||
- ((status & E1000_STATUS_SPEED_100) &&
- (edata->advertised & ADVERTISED_100baseT_Full) &&
- (edata->lp_advertised & ADVERTISED_100baseT_Full) &&
- (lpi_ctrl & I82579_LPI_CTRL_100_ENABLE)) ||
- ((status & E1000_STATUS_SPEED_1000) &&
- (edata->advertised & ADVERTISED_1000baseT_Full) &&
- (edata->lp_advertised & ADVERTISED_1000baseT_Full) &&
- (lpi_ctrl & I82579_LPI_CTRL_1000_ENABLE)))
+ if (phy_data & (E1000_EEE_TX_LPI_RCVD | E1000_EEE_RX_LPI_RCVD))
edata->eee_active = true;
edata->eee_enabled = !hw->dev_spec.ich8lan.eee_disable;
@@ -2167,19 +2146,10 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
struct ethtool_eee eee_curr;
s32 ret_val;
- if (!(adapter->flags & FLAG_IS_ICH) ||
- !(adapter->flags2 & FLAG2_HAS_EEE))
- return -EOPNOTSUPP;
-
ret_val = e1000e_get_eee(netdev, &eee_curr);
if (ret_val)
return ret_val;
- if (eee_curr.advertised != edata->advertised) {
- e_err("Setting EEE advertisement is not supported\n");
- return -EINVAL;
- }
-
if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
e_err("Setting EEE tx-lpi is not supported\n");
return -EINVAL;
@@ -2190,16 +2160,21 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
return -EINVAL;
}
- if (hw->dev_spec.ich8lan.eee_disable != !edata->eee_enabled) {
- hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled;
-
- /* reset the link */
- if (netif_running(netdev))
- e1000e_reinit_locked(adapter);
- else
- e1000e_reset(adapter);
+ if (edata->advertised & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) {
+ e_err("EEE advertisement supports only 100TX and/or 1000T full-duplex\n");
+ return -EINVAL;
}
+ adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+
+ hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled;
+
+ /* reset the link */
+ if (netif_running(netdev))
+ e1000e_reinit_locked(adapter);
+ else
+ e1000e_reset(adapter);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 3b1c1a7..174507c 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -695,7 +695,7 @@ s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data)
*
* Assumes the SW/FW/HW Semaphore is already acquired.
**/
-static s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data)
+s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data)
{
return __e1000_access_emi_reg_locked(hw, addr, &data, false);
}
@@ -712,11 +712,22 @@ static s32 e1000_set_eee_pchlan(struct e1000_hw *hw)
{
struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
s32 ret_val;
- u16 lpi_ctrl;
+ u16 lpa, pcs_status, adv, adv_addr, lpi_ctrl, data;
- if ((hw->phy.type != e1000_phy_82579) &&
- (hw->phy.type != e1000_phy_i217))
+ switch (hw->phy.type) {
+ case e1000_phy_82579:
+ lpa = I82579_EEE_LP_ABILITY;
+ pcs_status = I82579_EEE_PCS_STATUS;
+ adv_addr = I82579_EEE_ADVERTISEMENT;
+ break;
+ case e1000_phy_i217:
+ lpa = I217_EEE_LP_ABILITY;
+ pcs_status = I217_EEE_PCS_STATUS;
+ adv_addr = I217_EEE_ADVERTISEMENT;
+ break;
+ default:
return 0;
+ }
ret_val = hw->phy.ops.acquire(hw);
if (ret_val)
@@ -731,34 +742,24 @@ static s32 e1000_set_eee_pchlan(struct e1000_hw *hw)
/* Enable EEE if not disabled by user */
if (!dev_spec->eee_disable) {
- u16 lpa, pcs_status, data;
-
/* Save off link partner's EEE ability */
- switch (hw->phy.type) {
- case e1000_phy_82579:
- lpa = I82579_EEE_LP_ABILITY;
- pcs_status = I82579_EEE_PCS_STATUS;
- break;
- case e1000_phy_i217:
- lpa = I217_EEE_LP_ABILITY;
- pcs_status = I217_EEE_PCS_STATUS;
- break;
- default:
- ret_val = -E1000_ERR_PHY;
- goto release;
- }
ret_val = e1000_read_emi_reg_locked(hw, lpa,
&dev_spec->eee_lp_ability);
if (ret_val)
goto release;
+ /* Read EEE advertisement */
+ ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &adv);
+ if (ret_val)
+ goto release;
+
/* Enable EEE only for speeds in which the link partner is
- * EEE capable.
+ * EEE capable and for which we advertise EEE.
*/
- if (dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED)
+ if (adv & dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED)
lpi_ctrl |= I82579_LPI_CTRL_1000_ENABLE;
- if (dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) {
+ if (adv & dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) {
e1e_rphy_locked(hw, MII_LPA, &data);
if (data & LPA_100FULL)
lpi_ctrl |= I82579_LPI_CTRL_100_ENABLE;
@@ -770,13 +771,13 @@ static s32 e1000_set_eee_pchlan(struct e1000_hw *hw)
dev_spec->eee_lp_ability &=
~I82579_EEE_100_SUPPORTED;
}
-
- /* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */
- ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data);
- if (ret_val)
- goto release;
}
+ /* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */
+ ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data);
+ if (ret_val)
+ goto release;
+
ret_val = e1e_wphy_locked(hw, I82579_LPI_CTRL, lpi_ctrl);
release:
hw->phy.ops.release(hw);
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 00ba6c9..21d21b9 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -212,7 +212,7 @@
#define I82577_MSE_THRESHOLD 0x0887 /* 82577 Mean Square Error Threshold */
#define I82579_MSE_LINK_DOWN 0x2411 /* MSE count before dropping link */
#define I82579_RX_CONFIG 0x3412 /* Receive configuration */
-#define I82579_EEE_PCS_STATUS 0x182D /* IEEE MMD Register 3.1 >> 8 */
+#define I82579_EEE_PCS_STATUS 0x182E /* IEEE MMD Register 3.1 >> 8 */
#define I82579_EEE_CAPABILITY 0x0410 /* IEEE MMD Register 3.20 */
#define I82579_EEE_ADVERTISEMENT 0x040E /* IEEE MMD Register 7.60 */
#define I82579_EEE_LP_ABILITY 0x040F /* IEEE MMD Register 7.61 */
@@ -268,4 +268,5 @@ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable);
void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw);
s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable);
s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data);
+s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data);
#endif /* _E1000E_ICH8LAN_H_ */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 142ca39..0459fe3 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3875,6 +3875,38 @@ void e1000e_reset(struct e1000_adapter *adapter)
/* initialize systim and reset the ns time counter */
e1000e_config_hwtstamp(adapter);
+ /* Set EEE advertisement as appropriate */
+ if (adapter->flags2 & FLAG2_HAS_EEE) {
+ s32 ret_val;
+ u16 adv_addr;
+
+ switch (hw->phy.type) {
+ case e1000_phy_82579:
+ adv_addr = I82579_EEE_ADVERTISEMENT;
+ break;
+ case e1000_phy_i217:
+ adv_addr = I217_EEE_ADVERTISEMENT;
+ break;
+ default:
+ dev_err(&adapter->pdev->dev,
+ "Invalid PHY type setting EEE advertisement\n");
+ return;
+ }
+
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val) {
+ dev_err(&adapter->pdev->dev,
+ "EEE advertisement - unable to acquire PHY\n");
+ return;
+ }
+
+ e1000_write_emi_reg_locked(hw, adv_addr,
+ hw->dev_spec.ich8lan.eee_disable ?
+ 0 : adapter->eee_advert);
+
+ hw->phy.ops.release(hw);
+ }
+
if (!netif_running(adapter->netdev) &&
!test_bit(__E1000_TESTING, &adapter->state)) {
e1000_power_down_phy(adapter);
@@ -6540,6 +6572,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_flashmap;
}
+ /* Set default EEE advertisement */
+ if (adapter->flags2 & FLAG2_HAS_EEE)
+ adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
+
/* construct the net_device struct */
netdev->netdev_ops = &e1000e_netdev_ops;
e1000e_set_ethtool_ops(netdev);
--
1.7.11.7
^ permalink raw reply related
* [net-next 04/12] e1000e: long access timeouts when I217/I218 MAC and PHY are out of sync
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1364461215-7793-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
When the MAC and PHY are in two different modes (different power levels
and interconnect speeds), it could take a long time before a PHY register
access timed out using the existing MAC-PHY interconnect configuration
coded into the driver for ICH- and PCH-based LOMs. Introduce an I217/I218-
specific .setup_physical_interface operation which does not override the
interconnect configuration in the NVM.
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/ich8lan.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 4f2f0f6..3b1c1a7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -142,6 +142,7 @@ static void e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index);
static void e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index);
static s32 e1000_k1_workaround_lv(struct e1000_hw *hw);
static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate);
+static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw);
static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg)
{
@@ -636,6 +637,8 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
if (mac->type == e1000_pch_lpt) {
mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES;
mac->ops.rar_set = e1000_rar_set_pch_lpt;
+ mac->ops.setup_physical_interface =
+ e1000_setup_copper_link_pch_lpt;
}
/* Enable PCS Lock-loss workaround for ICH8 */
@@ -3788,7 +3791,6 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
break;
case e1000_phy_82577:
case e1000_phy_82579:
- case e1000_phy_i217:
ret_val = e1000_copper_link_setup_82577(hw);
if (ret_val)
return ret_val;
@@ -3824,6 +3826,31 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
}
/**
+ * e1000_setup_copper_link_pch_lpt - Configure MAC/PHY interface
+ * @hw: pointer to the HW structure
+ *
+ * Calls the PHY specific link setup function and then calls the
+ * generic setup_copper_link to finish configuring the link for
+ * Lynxpoint PCH devices
+ **/
+static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw)
+{
+ u32 ctrl;
+ s32 ret_val;
+
+ ctrl = er32(CTRL);
+ ctrl |= E1000_CTRL_SLU;
+ ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+ ew32(CTRL, ctrl);
+
+ ret_val = e1000_copper_link_setup_82577(hw);
+ if (ret_val)
+ return ret_val;
+
+ return e1000e_setup_copper_link(hw);
+}
+
+/**
* e1000_get_link_up_info_ich8lan - Get current link speed and duplex
* @hw: pointer to the HW structure
* @speed: pointer to store current link speed
--
1.7.11.7
^ permalink raw reply related
* [net-next 03/12] e1000e: fix LED blink logic for designs with LEDs driven by cathode
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1364461215-7793-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
When the LEDs are driven by cathode, the bit logic is reversed. Use the
LED Invert bit to invert the logic. Cleanup use of a magic number and
change the for loop increment to reduce the number of shifts.
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/mac.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index b25cc43..2480c10 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -1600,15 +1600,28 @@ s32 e1000e_blink_led_generic(struct e1000_hw *hw)
ledctl_blink = E1000_LEDCTL_LED0_BLINK |
(E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
} else {
- /* set the blink bit for each LED that's "on" (0x0E)
- * in ledctl_mode2
+ /* Set the blink bit for each LED that's "on" (0x0E)
+ * (or "off" if inverted) in ledctl_mode2. The blink
+ * logic in hardware only works when mode is set to "on"
+ * so it must be changed accordingly when the mode is
+ * "off" and inverted.
*/
ledctl_blink = hw->mac.ledctl_mode2;
- for (i = 0; i < 4; i++)
- if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) ==
- E1000_LEDCTL_MODE_LED_ON)
- ledctl_blink |= (E1000_LEDCTL_LED0_BLINK <<
- (i * 8));
+ for (i = 0; i < 32; i += 8) {
+ u32 mode = (hw->mac.ledctl_mode2 >> i) &
+ E1000_LEDCTL_LED0_MODE_MASK;
+ u32 led_default = hw->mac.ledctl_default >> i;
+
+ if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
+ (mode == E1000_LEDCTL_MODE_LED_ON)) ||
+ ((led_default & E1000_LEDCTL_LED0_IVRT) &&
+ (mode == E1000_LEDCTL_MODE_LED_OFF))) {
+ ledctl_blink &=
+ ~(E1000_LEDCTL_LED0_MODE_MASK << i);
+ ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
+ E1000_LEDCTL_MODE_LED_ON) << i;
+ }
+ }
}
ew32(LEDCTL, ledctl_blink);
--
1.7.11.7
^ permalink raw reply related
* [net-next 02/12] e1000e: slow performance between two 82579 connected via 10Mbit hub
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1364461215-7793-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
Two 82579 LOMs connected via a 10Mb hub experience extraordinarily low
performance. This is because 82579 is excessively aggressive on transmit
at 10Mb half-duplex and will not provide sufficient time for the link
partner to transmit. When the link partner is also 82579, the result is a
lot of collisions (and corresponding re-transmits) that cause the poor
performance. To work-around this issue, significantly increase the IPG in
the MAC to allow enough gap for the link partner to transmit and reduce the
Rx latency in the analog PHY to 0 to reduce the number of collisions.
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/defines.h | 1 +
drivers/net/ethernet/intel/e1000e/ich8lan.c | 28 ++++++++++++++++++++++++++++
drivers/net/ethernet/intel/e1000e/ich8lan.h | 1 +
3 files changed, 30 insertions(+)
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index a6aeb19..351c94a 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -236,6 +236,7 @@
#define E1000_STATUS_FUNC_SHIFT 2
#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */
#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */
+#define E1000_STATUS_SPEED_MASK 0x000000C0
#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */
#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */
#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 1cdec5f..4f2f0f6 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -871,6 +871,34 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
return ret_val;
}
+ /* When connected at 10Mbps half-duplex, 82579 parts are excessively
+ * aggressive resulting in many collisions. To avoid this, increase
+ * the IPG and reduce Rx latency in the PHY.
+ */
+ if ((hw->mac.type == e1000_pch2lan) && link) {
+ u32 reg;
+ reg = er32(STATUS);
+ if (!(reg & (E1000_STATUS_FD | E1000_STATUS_SPEED_MASK))) {
+ reg = er32(TIPG);
+ reg &= ~E1000_TIPG_IPGT_MASK;
+ reg |= 0xFF;
+ ew32(TIPG, reg);
+
+ /* Reduce Rx latency in analog PHY */
+ ret_val = hw->phy.ops.acquire(hw);
+ if (ret_val)
+ return ret_val;
+
+ ret_val =
+ e1000_write_emi_reg_locked(hw, I82579_RX_CONFIG, 0);
+
+ hw->phy.ops.release(hw);
+
+ if (ret_val)
+ return ret_val;
+ }
+ }
+
/* Work-around I218 hang issue */
if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
(hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V)) {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 8bf4655..00ba6c9 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -211,6 +211,7 @@
#define I82579_MSE_THRESHOLD 0x084F /* 82579 Mean Square Error Threshold */
#define I82577_MSE_THRESHOLD 0x0887 /* 82577 Mean Square Error Threshold */
#define I82579_MSE_LINK_DOWN 0x2411 /* MSE count before dropping link */
+#define I82579_RX_CONFIG 0x3412 /* Receive configuration */
#define I82579_EEE_PCS_STATUS 0x182D /* IEEE MMD Register 3.1 >> 8 */
#define I82579_EEE_CAPABILITY 0x0410 /* IEEE MMD Register 3.20 */
#define I82579_EEE_ADVERTISEMENT 0x040E /* IEEE MMD Register 7.60 */
--
1.7.11.7
^ permalink raw reply related
* [net-next 01/12] e1000e: additional error handling on PHY register accesses
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1364461215-7793-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
PHY reads/writes via the MDIC register could potentially return results
from a previous PHY register access. If that happens, the offset in the
returned results will be that of the previous access and if that is
different from the expected offset, log a debug message and error out.
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/defines.h | 1 +
drivers/net/ethernet/intel/e1000e/phy.c | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index b7c664f..a6aeb19 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -788,6 +788,7 @@
GG82563_REG(194, 18) /* Inband Control */
/* MDI Control */
+#define E1000_MDIC_REG_MASK 0x001F0000
#define E1000_MDIC_REG_SHIFT 16
#define E1000_MDIC_PHY_SHIFT 21
#define E1000_MDIC_OP_WRITE 0x04000000
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 60dbf02..cbb310f 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -178,6 +178,12 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
e_dbg("MDI Error\n");
return -E1000_ERR_PHY;
}
+ if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+ e_dbg("MDI Read offset error - requested %d, returned %d\n",
+ offset,
+ (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+ return -E1000_ERR_PHY;
+ }
*data = (u16)mdic;
/* Allow some time after each MDIC transaction to avoid
@@ -236,6 +242,12 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
e_dbg("MDI Error\n");
return -E1000_ERR_PHY;
}
+ if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+ e_dbg("MDI Write offset error - requested %d, returned %d\n",
+ offset,
+ (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+ return -E1000_ERR_PHY;
+ }
/* Allow some time after each MDIC transaction to avoid
* reading duplicate data in the next MDIC transaction.
--
1.7.11.7
^ permalink raw reply related
* [net-next 00/12][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2013-03-28 9:00 UTC (permalink / raw)
To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann
This series contains updates to e1000e, ixgbe and ixgbevf.
Majority of the changes are against e1000e (from Bruce Allan).
Bruce adds additional error handling on PHY register access, as
well as improve slow performance on 82579 when connected to a
10Mbit hub. In addition, fixes LED blink logic for cathode
LED design. Most notable is added EEE support which is enabled
by default and the added support for LTR on I217/I218.
The ixgbe and ixgbevf from Greg Rose changes the VM so that if a user
does not assign a MAC address, the MAC address is set to all zeros
instead of a random MAC address. This ensures that we always know when
we have a random address and udev won't get upset about it.
The following are changes since commit 8b49a4c75965ed157e21450d23dcadd6b27c1aa3:
bnx2x: fix compilation without CONFIG_BNX2X_SRIOV
and are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next master
Bruce Allan (10):
e1000e: additional error handling on PHY register accesses
e1000e: slow performance between two 82579 connected via 10Mbit hub
e1000e: fix LED blink logic for designs with LEDs driven by cathode
e1000e: long access timeouts when I217/I218 MAC and PHY are out of
sync
e1000e: EEE capability advertisement not set/disabled as required
e1000e: enable EEE by default
e1000e: add support for LTR on I217/I218
e1000e: cleanup unused defines
e1000e: increase driver version number
e1000e: fix scheduling while atomic bugs
Greg Rose (2):
ixgbe: Don't give VFs random MAC addresses
ixgbevf: Adjust to handle unassigned MAC address from PF
drivers/net/ethernet/intel/e1000e/defines.h | 2 +
drivers/net/ethernet/intel/e1000e/e1000.h | 3 +
drivers/net/ethernet/intel/e1000e/ethtool.c | 63 ++-----
drivers/net/ethernet/intel/e1000e/ich8lan.c | 213 ++++++++++++++++++----
drivers/net/ethernet/intel/e1000e/ich8lan.h | 11 +-
drivers/net/ethernet/intel/e1000e/mac.c | 27 ++-
drivers/net/ethernet/intel/e1000e/netdev.c | 38 +++-
drivers/net/ethernet/intel/e1000e/phy.c | 20 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 23 ++-
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 21 ++-
drivers/net/ethernet/intel/ixgbevf/vf.c | 7 +-
11 files changed, 315 insertions(+), 113 deletions(-)
--
1.7.11.7
^ permalink raw reply
* Re: [PATCH net-next 00/19] IPVS optimizations, part 2
From: Julian Anastasov @ 2013-03-28 8:57 UTC (permalink / raw)
To: Simon Horman; +Cc: lvs-devel, netdev
In-Reply-To: <20130328054127.GC10171@verge.net.au>
Hello,
On Thu, 28 Mar 2013, Simon Horman wrote:
> On Fri, Mar 22, 2013 at 11:46:35AM +0200, Julian Anastasov wrote:
> > This is the second patchset with IPVS optimizations.
> > Now we convert the schedulers, dests and services to RCU.
> >
> > All patches are for net-next based on the first
> > patchset v3. The idea is after discussion and review Simon to
> > apply the patchset after a week or so to ipvs-next tree.
> >
> > The changes in this patchset eliminate global locks
> > from packet processing by using RCU. There are more details
> > in the patches.
> >
> > After this patchset the situation is as follows:
> >
> > - dests:
> > - lookups under RCU lock allow ip_vs_dest_hold, used
> > for binding dest to conn or to select dest by scheduler
> > - dests are freed by dest_trash code long after grace period
> >
> > - services:
> > - no global read_lock
> > - lookups under RCU lock allow scheduler to select
> > dests under RCU lock
> > - grace period implemented with IP_VS_WAIT_WHILE is
> > gone allowing scheduler's dest selection and scheduler
> > reconfiguration to run in parallel
> >
> > - schedulers:
> > - schedule method runs under RCU lock, needs _rcu
> > if using svc->destinations, needs _bh suffix to locks
> > because it can be called in LOCAL_OUT hook
> > - when dest is added, the add_dest method is called
> > instead of update_service
> > - when dest is deleted, the del_dest method is called
> > instead of update_service
> > - when dest is updated, the upd_dest method is called
> > instead of update_service
> > - scheduler can hold dests in its state long after they are
> > unlinked from svc, even without providing del_dest handler.
> > But such dests must not be returned by the
> > schedule method (needs IP_VS_DEST_F_AVAILABLE check)
> > - sched_data must be freed after grace period and
> > module exit should be delayed with synchronize_rcu
> > to wait all RCU read-side critical sections to
> > complete
> >
> > - BH:
> > - we do not disable BHs in LOCAL_OUT and sync code anymore
> > - _bh suffixes are added to all places that need them,
> > except timer handlers
>
> Hi Julian,
>
> what is the status of this series?
>
> N.B: THis series is different to "[PATCH 00/15 v3] IPVS optimizations (repost)"
Yes, I posted 2 parts with optimizations. Part 1 is
at v3 while part 2 is at its first version. Both series
are ready for applying. BTW, Hans plans more tests in
the next days, may be we will have some numbers for
comparison.
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply
* Re: Difference between Net and Net-Next
From: Yi Li @ 2013-03-28 8:06 UTC (permalink / raw)
To: Richard Cochran; +Cc: netdev
In-Reply-To: <20130328062814.GC2511@netboy.at.omicron.at>
于 2013年03月28日 14:28, Richard Cochran 写道:
> On Wed, Mar 27, 2013 at 05:02:28PM +0000, Jim Baxter wrote:
>> How do you decide which changes should go into "net" and which ones go into "net-
>> next"?
>>
>> At a guess I would think bug fixes are in "net" and feature changes into
>> "net-next" but it is not obvious from the commit log.
> I'll go out on a limb and try to explain this:
>
> net-next - changes for the next merge window
> net - bug fixes and cleanups for the current release candidate
>
> HTR (hope thats right),
> Richard
>
net-next - new features added in development and *experimental*
net - features(thought as steady and come frome net-next tree) and bug
fixes, can be merged into linux.git(which is the tree below) by Linus.
linux - the mainline release.
That's my opinion.
regards,
Yi
^ permalink raw reply
* [PATCH 11/11] batman-adv: use seq_puts instead of seq_printf when the format is constant
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
As reported by checkpatch, seq_puts has to be preferred with
respect to seq_printf when the format is a constant string
(no va_args)
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
net/batman-adv/gateway_client.c | 2 +-
net/batman-adv/main.c | 2 +-
net/batman-adv/network-coding.c | 8 ++++----
net/batman-adv/originator.c | 4 ++--
net/batman-adv/vis.c | 4 ++--
5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 34f99a4..f105219 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
rcu_read_unlock();
if (gw_count == 0)
- seq_printf(seq, "No gateways in range ...\n");
+ seq_puts(seq, "No gateways in range ...\n");
out:
if (primary_if)
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 62b1f89..6277735 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -419,7 +419,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
{
struct batadv_algo_ops *bat_algo_ops;
- seq_printf(seq, "Available routing algorithms:\n");
+ seq_puts(seq, "Available routing algorithms:\n");
hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
seq_printf(seq, "%s\n", bat_algo_ops->name);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 086c107..6b9a544 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1760,23 +1760,23 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
seq_printf(seq, "Node: %pM\n", orig_node->orig);
- seq_printf(seq, " Ingoing: ");
+ seq_puts(seq, " Ingoing: ");
/* For each in_nc_node to this orig_node */
list_for_each_entry_rcu(nc_node,
&orig_node->in_coding_list,
list)
seq_printf(seq, "%pM ",
nc_node->addr);
- seq_printf(seq, "\n");
+ seq_puts(seq, "\n");
- seq_printf(seq, " Outgoing: ");
+ seq_puts(seq, " Outgoing: ");
/* For out_nc_node to this orig_node */
list_for_each_entry_rcu(nc_node,
&orig_node->out_coding_list,
list)
seq_printf(seq, "%pM ",
nc_node->addr);
- seq_printf(seq, "\n\n");
+ seq_puts(seq, "\n\n");
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 585e684..2f34525 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -465,7 +465,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
neigh_node_tmp->tq_avg);
}
- seq_printf(seq, "\n");
+ seq_puts(seq, "\n");
batman_count++;
next:
@@ -475,7 +475,7 @@ next:
}
if (batman_count == 0)
- seq_printf(seq, "No batman nodes in range ...\n");
+ seq_puts(seq, "No batman nodes in range ...\n");
out:
if (primary_if)
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index c053244..962ccf3 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
hlist_for_each_entry(entry, if_list, list) {
if (entry->primary)
- seq_printf(seq, "PRIMARY, ");
+ seq_puts(seq, "PRIMARY, ");
else
seq_printf(seq, "SEC %pM, ", entry->addr);
}
@@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
if (batadv_compare_eth(entry->addr, packet->vis_orig))
batadv_vis_data_read_prim_sec(seq, list);
- seq_printf(seq, "\n");
+ seq_puts(seq, "\n");
}
}
--
1.8.1.5
^ permalink raw reply related
* [PATCH 10/11] batman-adv: update Makefile copyright years
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Simon Wunderlich
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 4b8f192..acbac2a 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
#
--
1.8.1.5
^ permalink raw reply related
* [PATCH 09/11] batman-adv: Start new development cycle
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Simon Wunderlich, Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/main.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index a9e12e6..f90f5bc 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.1.0"
+#define BATADV_SOURCE_VERSION "2013.2.0"
#endif
/* B.A.T.M.A.N. parameters */
--
1.8.1.5
^ permalink raw reply related
* [PATCH 08/11] batman-adv: free an hard-interface before adding it
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
When adding a new hard interface (e.g. wlan0) to a soft interface (e.g. bat0)
and the former is already enslaved in another virtual interface (e.g. a software
bridge) batman-adv has to free it first and then continue with the adding
mechanism.
In this way the behaviour becomes consistent with what "ip link set master"
does. At the moment batman-adv enslaves the hard interface without checking for
the master device, possibly causing strange behaviours which are never wanted by
the users.
Reported-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
net/batman-adv/hard-interface.c | 39 +++++++++++++++++++++++++++++++++------
1 file changed, 33 insertions(+), 6 deletions(-)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index f33ced8..522243a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
batadv_update_min_mtu(hard_iface->soft_iface);
}
+/**
+ * batadv_master_del_slave - remove hard_iface from the current master interface
+ * @slave: the interface enslaved in another master
+ * @master: the master from which slave has to be removed
+ *
+ * Invoke ndo_del_slave on master passing slave as argument. In this way slave
+ * is free'd and master can correctly change its internal state.
+ * Return 0 on success, a negative value representing the error otherwise
+ */
+static int batadv_master_del_slave(struct batadv_hard_iface *slave,
+ struct net_device *master)
+{
+ int ret;
+
+ if (!master)
+ return 0;
+
+ ret = -EBUSY;
+ if (master->netdev_ops->ndo_del_slave)
+ ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
+
+ return ret;
+}
+
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
const char *iface_name)
{
struct batadv_priv *bat_priv;
- struct net_device *soft_iface;
+ struct net_device *soft_iface, *master;
__be16 ethertype = __constant_htons(ETH_P_BATMAN);
int ret;
@@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (!atomic_inc_not_zero(&hard_iface->refcount))
goto out;
- /* hard-interface is part of a bridge */
- if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT)
- pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n",
- hard_iface->net_dev->name);
-
soft_iface = dev_get_by_name(&init_net, iface_name);
if (!soft_iface) {
@@ -347,6 +366,14 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
goto err_dev;
}
+ /* check if the interface is enslaved in another virtual one and
+ * in that case unlink it first
+ */
+ master = netdev_master_upper_dev_get(hard_iface->net_dev);
+ ret = batadv_master_del_slave(hard_iface, master);
+ if (ret)
+ goto err_dev;
+
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
--
1.8.1.5
^ permalink raw reply related
* [PATCH 07/11] batman-adv: Allow to modify slaves of soft-interfaces through rntl_link
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Marek Lindner,
Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Sven Eckelmann <sven@narfation.org>
The sysfs configuration interface of batman-adv to add/remove slaves of an
soft-iface is not deadlock free and doesn't follow the currently common way to
modify slaves of an interface.
An additional configuration interface though rtnl_link is introduced which
provides easy device adding/removing with tools like "ip":
$ ip link set dev eth0 master bat0
$ ip link set dev eth0 nomaster
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/hard-interface.c | 12 +++++++--
net/batman-adv/soft-interface.c | 56 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 6c32607..f33ced8 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -350,9 +350,13 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
+ ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
+ if (ret)
+ goto err_dev;
+
ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
if (ret < 0)
- goto err_dev;
+ goto err_upper;
hard_iface->if_num = bat_priv->num_ifaces;
bat_priv->num_ifaces++;
@@ -362,7 +366,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
bat_priv->num_ifaces--;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
- goto err_dev;
+ goto err_upper;
}
hard_iface->batman_adv_ptype.type = ethertype;
@@ -401,7 +405,10 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
out:
return 0;
+err_upper:
+ netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface);
err_dev:
+ hard_iface->soft_iface = NULL;
dev_put(soft_iface);
err:
batadv_hardif_free_ref(hard_iface);
@@ -450,6 +457,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
batadv_softif_destroy_sysfs(hard_iface->soft_iface);
+ netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
hard_iface->soft_iface = NULL;
batadv_hardif_free_ref(hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 545c863..403b8c4 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -509,6 +509,58 @@ free_bat_counters:
return ret;
}
+/**
+ * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
+ * @dev: batadv_soft_interface used as master interface
+ * @slave_dev: net_device which should become the slave interface
+ *
+ * Return 0 if successful or error otherwise.
+ */
+static int batadv_softif_slave_add(struct net_device *dev,
+ struct net_device *slave_dev)
+{
+ struct batadv_hard_iface *hard_iface;
+ int ret = -EINVAL;
+
+ hard_iface = batadv_hardif_get_by_netdev(slave_dev);
+ if (!hard_iface || hard_iface->soft_iface != NULL)
+ goto out;
+
+ ret = batadv_hardif_enable_interface(hard_iface, dev->name);
+
+out:
+ if (hard_iface)
+ batadv_hardif_free_ref(hard_iface);
+ return ret;
+}
+
+/**
+ * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface
+ * @dev: batadv_soft_interface used as master interface
+ * @slave_dev: net_device which should be removed from the master interface
+ *
+ * Return 0 if successful or error otherwise.
+ */
+static int batadv_softif_slave_del(struct net_device *dev,
+ struct net_device *slave_dev)
+{
+ struct batadv_hard_iface *hard_iface;
+ int ret = -EINVAL;
+
+ hard_iface = batadv_hardif_get_by_netdev(slave_dev);
+
+ if (!hard_iface || hard_iface->soft_iface != dev)
+ goto out;
+
+ batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP);
+ ret = 0;
+
+out:
+ if (hard_iface)
+ batadv_hardif_free_ref(hard_iface);
+ return ret;
+}
+
static const struct net_device_ops batadv_netdev_ops = {
.ndo_init = batadv_softif_init_late,
.ndo_open = batadv_interface_open,
@@ -517,7 +569,9 @@ static const struct net_device_ops batadv_netdev_ops = {
.ndo_set_mac_address = batadv_interface_set_mac_addr,
.ndo_change_mtu = batadv_interface_change_mtu,
.ndo_start_xmit = batadv_interface_tx,
- .ndo_validate_addr = eth_validate_addr
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_add_slave = batadv_softif_slave_add,
+ .ndo_del_slave = batadv_softif_slave_del,
};
/**
--
1.8.1.5
^ permalink raw reply related
* [PATCH 06/11] batman-adv: Allow to use rntl_link for device creation/deletion
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Marek Lindner,
Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Sven Eckelmann <sven@narfation.org>
The sysfs configuration interface of batman-adv to add/remove soft-interfaces
is not deadlock free and doesn't follow the currently common way to create new
virtual interfaces.
An additional interface though rtnl_link is introduced which provides easy device
creation/deletion with tools like "ip":
$ ip link add dev bat0 type batadv
$ ip link del dev bat0
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/main.c | 2 ++
net/batman-adv/main.h | 1 +
net/batman-adv/soft-interface.c | 29 +++++++++++++++++++++++++++++
net/batman-adv/soft-interface.h | 1 +
4 files changed, 33 insertions(+)
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 0495a7d..62b1f89 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -71,6 +71,7 @@ static int __init batadv_init(void)
batadv_debugfs_init();
register_netdevice_notifier(&batadv_hard_if_notifier);
+ rtnl_link_register(&batadv_link_ops);
pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -81,6 +82,7 @@ static int __init batadv_init(void)
static void __exit batadv_exit(void)
{
batadv_debugfs_destroy();
+ rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
batadv_hardif_remove_interfaces();
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 0afd4ee..a9e12e6 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -152,6 +152,7 @@ enum batadv_uev_type {
#include <linux/percpu.h>
#include <linux/slab.h>
#include <net/sock.h> /* struct sock */
+#include <net/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/seq_file.h>
#include "types.h"
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index bc77a5be..545c863 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -570,6 +570,8 @@ struct net_device *batadv_softif_create(const char *name)
if (!soft_iface)
return NULL;
+ soft_iface->rtnl_link_ops = &batadv_link_ops;
+
ret = register_netdevice(soft_iface);
if (ret < 0) {
pr_err("Unable to register the batman interface '%s': %i\n",
@@ -592,6 +594,26 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
}
+/**
+ * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink
+ * @soft_iface: the to-be-removed batman-adv interface
+ * @head: list pointer
+ */
+static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
+ struct list_head *head)
+{
+ struct batadv_hard_iface *hard_iface;
+
+ list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface == soft_iface)
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_KEEP);
+ }
+
+ batadv_sysfs_del_meshif(soft_iface);
+ unregister_netdevice_queue(soft_iface, head);
+}
+
int batadv_softif_is_valid(const struct net_device *net_dev)
{
if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
@@ -600,6 +622,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev)
return 0;
}
+struct rtnl_link_ops batadv_link_ops __read_mostly = {
+ .kind = "batadv",
+ .priv_size = sizeof(struct batadv_priv),
+ .setup = batadv_softif_init_early,
+ .dellink = batadv_softif_destroy_netlink,
+};
+
/* ethtool */
static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 49bc66b..2f2472c 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -27,5 +27,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct net_device *batadv_softif_create(const char *name);
void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
int batadv_softif_is_valid(const struct net_device *net_dev);
+extern struct rtnl_link_ops batadv_link_ops;
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
--
1.8.1.5
^ permalink raw reply related
* [PATCH 05/11] batman-adv: rename batadv_softif_destroy to reflect sysfs use case
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Marek Lindner, Sven Eckelmann,
Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
CC: Sven Eckelmann <sven@narfation.org>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/hard-interface.c | 2 +-
net/batman-adv/soft-interface.c | 6 +++++-
net/batman-adv/soft-interface.h | 2 +-
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 74e3ec2f..6c32607 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -448,7 +448,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
/* nobody uses this interface anymore */
if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
- batadv_softif_destroy(hard_iface->soft_iface);
+ batadv_softif_destroy_sysfs(hard_iface->soft_iface);
hard_iface->soft_iface = NULL;
batadv_hardif_free_ref(hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e889bfb..bc77a5be 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -581,7 +581,11 @@ struct net_device *batadv_softif_create(const char *name)
return soft_iface;
}
-void batadv_softif_destroy(struct net_device *soft_iface)
+/**
+ * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs
+ * @soft_iface: the to-be-removed batman-adv interface
+ */
+void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 43182e5..49bc66b 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -25,7 +25,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, struct batadv_hard_iface *recv_if,
int hdr_size, struct batadv_orig_node *orig_node);
struct net_device *batadv_softif_create(const char *name);
-void batadv_softif_destroy(struct net_device *soft_iface);
+void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
int batadv_softif_is_valid(const struct net_device *net_dev);
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
--
1.8.1.5
^ permalink raw reply related
* [PATCH 04/11] batman-adv: Don't always delete softif when last slave was removed
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Marek Lindner,
Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Sven Eckelmann <sven@narfation.org>
batman-adv has an unusual way to manage softinterfaces. These will be created
automatically when a user writes to the batman-adv/mesh_iface file in sysfs and
removed when no slave device exists anymore.
This behaviour cannot be changed without breaking compatibility with existing
code. Instead other interfaces should be able to slightly reduce this behaviour
and provide a more common reaction to a removal of a slave interface.
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/hard-interface.c | 8 +++++---
net/batman-adv/hard-interface.h | 13 ++++++++++++-
net/batman-adv/sysfs.c | 6 ++++--
3 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index da000e9..74e3ec2f 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -408,7 +408,8 @@ err:
return ret;
}
-void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
+void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
+ enum batadv_hard_if_cleanup autodel)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
struct batadv_hard_iface *primary_if = NULL;
@@ -446,7 +447,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
dev_put(hard_iface->soft_iface);
/* nobody uses this interface anymore */
- if (!bat_priv->num_ifaces)
+ if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
batadv_softif_destroy(hard_iface->soft_iface);
hard_iface->soft_iface = NULL;
@@ -533,7 +534,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
/* first deactivate interface */
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
- batadv_hardif_disable_interface(hard_iface);
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_AUTO);
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
return;
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 308437d..4989288 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -29,13 +29,24 @@ enum batadv_hard_if_state {
BATADV_IF_I_WANT_YOU,
};
+/**
+ * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
+ * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
+ * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
+ */
+enum batadv_hard_if_cleanup {
+ BATADV_IF_CLEANUP_KEEP,
+ BATADV_IF_CLEANUP_AUTO,
+};
+
extern struct notifier_block batadv_hard_if_notifier;
struct batadv_hard_iface*
batadv_hardif_get_by_netdev(const struct net_device *net_dev);
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
const char *iface_name);
-void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface);
+void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
+ enum batadv_hard_if_cleanup autodel);
void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index ce39f62..15a22ef 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -588,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
}
if (status_tmp == BATADV_IF_NOT_IN_USE) {
- batadv_hardif_disable_interface(hard_iface);
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_AUTO);
goto unlock;
}
/* if the interface already is in use */
if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
- batadv_hardif_disable_interface(hard_iface);
+ batadv_hardif_disable_interface(hard_iface,
+ BATADV_IF_CLEANUP_AUTO);
ret = batadv_hardif_enable_interface(hard_iface, buff);
--
1.8.1.5
^ permalink raw reply related
* [PATCH 03/11] batman-adv: Move deinitialization of soft-interface to destructor
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Marek Lindner,
Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Sven Eckelmann <sven@narfation.org>
The deinitialization of the soft-interface created in ndo_init/constructor
should be done in the destructor and not directly before calling
unregister_netdevice
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/soft-interface.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index c5cb0a7..e889bfb 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -417,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
cleanup_work);
soft_iface = bat_priv->soft_iface;
- batadv_debugfs_del_meshif(soft_iface);
batadv_sysfs_del_meshif(soft_iface);
rtnl_lock();
@@ -522,6 +521,17 @@ static const struct net_device_ops batadv_netdev_ops = {
};
/**
+ * batadv_softif_free - Deconstructor of batadv_soft_interface
+ * @dev: Device to cleanup and remove
+ */
+static void batadv_softif_free(struct net_device *dev)
+{
+ batadv_debugfs_del_meshif(dev);
+ batadv_mesh_free(dev);
+ free_netdev(dev);
+}
+
+/**
* batadv_softif_init_early - early stage initialization of soft interface
* @dev: registered network device to modify
*/
@@ -532,7 +542,7 @@ static void batadv_softif_init_early(struct net_device *dev)
ether_setup(dev);
dev->netdev_ops = &batadv_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = batadv_softif_free;
dev->tx_queue_len = 0;
/* can't call min_mtu, because the needed variables
@@ -575,7 +585,6 @@ void batadv_softif_destroy(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
- batadv_mesh_free(soft_iface);
queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
}
--
1.8.1.5
^ permalink raw reply related
* [PATCH 02/11] batman-adv: Move soft-interface initialization to ndo_init
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Marek Lindner,
Antonio Quartulli
In-Reply-To: <1364457031-3194-1-git-send-email-ordex@autistici.org>
From: Sven Eckelmann <sven@narfation.org>
The initialization of an net_device object should be done in the
init/constructor function and not from the outside after the register_netdevice
was done to avoid race conditions.
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
net/batman-adv/hard-interface.c | 5 ++
net/batman-adv/soft-interface.c | 169 +++++++++++++++++++---------------------
2 files changed, 86 insertions(+), 88 deletions(-)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 368219e..da000e9 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -563,6 +563,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
+ if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
+ batadv_sysfs_add_meshif(net_dev);
+ return NOTIFY_DONE;
+ }
+
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface && event == NETDEV_REGISTER)
hard_iface = batadv_hardif_add_interface(net_dev);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index f93ae42..c5cb0a7 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -402,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
}
/**
- * batadv_softif_init - Late stage initialization of soft interface
- * @dev: registered network device to modify
- *
- * Returns error code on failures
- */
-static int batadv_softif_init(struct net_device *dev)
-{
- batadv_set_lockdep_class(dev);
-
- return 0;
-}
-
-static const struct net_device_ops batadv_netdev_ops = {
- .ndo_init = batadv_softif_init,
- .ndo_open = batadv_interface_open,
- .ndo_stop = batadv_interface_release,
- .ndo_get_stats = batadv_interface_stats,
- .ndo_set_mac_address = batadv_interface_set_mac_addr,
- .ndo_change_mtu = batadv_interface_change_mtu,
- .ndo_start_xmit = batadv_interface_tx,
- .ndo_validate_addr = eth_validate_addr
-};
-
-static void batadv_interface_setup(struct net_device *dev)
-{
- struct batadv_priv *priv = netdev_priv(dev);
-
- ether_setup(dev);
-
- dev->netdev_ops = &batadv_netdev_ops;
- dev->destructor = free_netdev;
- dev->tx_queue_len = 0;
-
- /* can't call min_mtu, because the needed variables
- * have not been initialized yet
- */
- dev->mtu = ETH_DATA_LEN;
- /* reserve more space in the skbuff for our header */
- dev->hard_header_len = BATADV_HEADER_LEN;
-
- /* generate random address */
- eth_hw_addr_random(dev);
-
- SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
-
- memset(priv, 0, sizeof(*priv));
-}
-
-/**
* batadv_softif_destroy_finish - cleans up the remains of a softif
* @work: work queue item
*
@@ -474,21 +425,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
rtnl_unlock();
}
-struct net_device *batadv_softif_create(const char *name)
+/**
+ * batadv_softif_init_late - late stage initialization of soft interface
+ * @dev: registered network device to modify
+ *
+ * Returns error code on failures
+ */
+static int batadv_softif_init_late(struct net_device *dev)
{
- struct net_device *soft_iface;
struct batadv_priv *bat_priv;
int ret;
size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
- soft_iface = alloc_netdev(sizeof(*bat_priv), name,
- batadv_interface_setup);
+ batadv_set_lockdep_class(dev);
- if (!soft_iface)
- goto out;
-
- bat_priv = netdev_priv(soft_iface);
- bat_priv->soft_iface = soft_iface;
+ bat_priv = netdev_priv(dev);
+ bat_priv->soft_iface = dev;
INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
/* batadv_interface_stats() needs to be available as soon as
@@ -496,14 +448,7 @@ struct net_device *batadv_softif_create(const char *name)
*/
bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
if (!bat_priv->bat_counters)
- goto free_soft_iface;
-
- ret = register_netdevice(soft_iface);
- if (ret < 0) {
- pr_err("Unable to register the batman interface '%s': %i\n",
- name, ret);
- goto free_bat_counters;
- }
+ return -ENOMEM;
atomic_set(&bat_priv->aggregated_ogms, 1);
atomic_set(&bat_priv->bonding, 0);
@@ -541,41 +486,89 @@ struct net_device *batadv_softif_create(const char *name)
bat_priv->primary_if = NULL;
bat_priv->num_ifaces = 0;
- ret = batadv_algo_select(bat_priv, batadv_routing_algo);
- if (ret < 0)
- goto unreg_soft_iface;
-
batadv_nc_init_bat_priv(bat_priv);
- ret = batadv_sysfs_add_meshif(soft_iface);
+ ret = batadv_algo_select(bat_priv, batadv_routing_algo);
if (ret < 0)
- goto unreg_soft_iface;
+ goto free_bat_counters;
- ret = batadv_debugfs_add_meshif(soft_iface);
+ ret = batadv_debugfs_add_meshif(dev);
if (ret < 0)
- goto unreg_sysfs;
+ goto free_bat_counters;
- ret = batadv_mesh_init(soft_iface);
+ ret = batadv_mesh_init(dev);
if (ret < 0)
goto unreg_debugfs;
- return soft_iface;
+ return 0;
unreg_debugfs:
- batadv_debugfs_del_meshif(soft_iface);
-unreg_sysfs:
- batadv_sysfs_del_meshif(soft_iface);
-unreg_soft_iface:
- free_percpu(bat_priv->bat_counters);
- unregister_netdevice(soft_iface);
- return NULL;
-
+ batadv_debugfs_del_meshif(dev);
free_bat_counters:
free_percpu(bat_priv->bat_counters);
-free_soft_iface:
- free_netdev(soft_iface);
-out:
- return NULL;
+
+ return ret;
+}
+
+static const struct net_device_ops batadv_netdev_ops = {
+ .ndo_init = batadv_softif_init_late,
+ .ndo_open = batadv_interface_open,
+ .ndo_stop = batadv_interface_release,
+ .ndo_get_stats = batadv_interface_stats,
+ .ndo_set_mac_address = batadv_interface_set_mac_addr,
+ .ndo_change_mtu = batadv_interface_change_mtu,
+ .ndo_start_xmit = batadv_interface_tx,
+ .ndo_validate_addr = eth_validate_addr
+};
+
+/**
+ * batadv_softif_init_early - early stage initialization of soft interface
+ * @dev: registered network device to modify
+ */
+static void batadv_softif_init_early(struct net_device *dev)
+{
+ struct batadv_priv *priv = netdev_priv(dev);
+
+ ether_setup(dev);
+
+ dev->netdev_ops = &batadv_netdev_ops;
+ dev->destructor = free_netdev;
+ dev->tx_queue_len = 0;
+
+ /* can't call min_mtu, because the needed variables
+ * have not been initialized yet
+ */
+ dev->mtu = ETH_DATA_LEN;
+ /* reserve more space in the skbuff for our header */
+ dev->hard_header_len = BATADV_HEADER_LEN;
+
+ /* generate random address */
+ eth_hw_addr_random(dev);
+
+ SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+
+ memset(priv, 0, sizeof(*priv));
+}
+
+struct net_device *batadv_softif_create(const char *name)
+{
+ struct net_device *soft_iface;
+ int ret;
+
+ soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
+ batadv_softif_init_early);
+ if (!soft_iface)
+ return NULL;
+
+ ret = register_netdevice(soft_iface);
+ if (ret < 0) {
+ pr_err("Unable to register the batman interface '%s': %i\n",
+ name, ret);
+ free_netdev(soft_iface);
+ return NULL;
+ }
+
+ return soft_iface;
}
void batadv_softif_destroy(struct net_device *soft_iface)
--
1.8.1.5
^ permalink raw reply related
* pull request: batman-adv 2013-03-28
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem; +Cc: netdev, b.a.t.m.a.n
here you have another batch of patches intended for net-next/linux-3.10.
The first one fixes an endianess bug in the new network coding component,
sent within our last pull request. It was discovered thanks to
Fengguang Wu's automated daily checks on our tree (we upgraded our daily
check to look for endianess bugs as well ;-))
Then you have patches from 2/11 to 8/11 implementing the RTNL API for virtual
interfaces manipulation: now users can use the "ip" tool to create/delete
batman-adv interfaces and to enslave real ones below them. However, to avoid
breaking any user tool, we did not disrupt our sysfs API.
Patch 11/11 is addressing a new checkpatch complaining by substituting seq_printf
with seq_puts when possible (however, checkpatch seems to be broken because it
raises false positives in case of multi-line seq_printf).
The rest is ordinary house keeping.
Please pull or let me know if there is any problem!
Thank you,
Antonio
The following changes since commit 6bdeaba47d87f48a3943b6899d6c6e6f17d52f1d:
6lowpan: use IEEE802154_ADDR_LEN instead of a magic number (2013-03-27 00:52:16 -0400)
are available in the git repository at:
git://git.open-mesh.org/linux-merge.git tags/batman-adv-for-davem
for you to fetch changes up to 0c81465357ffe29da9ff20103afe4a59908e0d30:
batman-adv: use seq_puts instead of seq_printf when the format is constant (2013-03-27 10:29:55 +0100)
----------------------------------------------------------------
Included changes:
- A fix for the network coding component which has been added within the last
pull request (so it is in linux-3.10). The problem has been spotted thanks to
Fengguang Wu's automated daily checks on our tree.
- Implementation of the RTNL API for virtual interface creation/deletion and slave
manipulation
- substitution of seq_printf with seq_puts when possible
- minor cleanups
----------------------------------------------------------------
Antonio Quartulli (3):
batman-adv: free an hard-interface before adding it
batman-adv: update Makefile copyright years
batman-adv: use seq_puts instead of seq_printf when the format is constant
Marek Lindner (1):
batman-adv: rename batadv_softif_destroy to reflect sysfs use case
Martin Hundebøll (1):
batman-adv: Fix endianness errors for network coding
Simon Wunderlich (1):
batman-adv: Start new development cycle
Sven Eckelmann (5):
batman-adv: Move soft-interface initialization to ndo_init
batman-adv: Move deinitialization of soft-interface to destructor
batman-adv: Don't always delete softif when last slave was removed
batman-adv: Allow to use rntl_link for device creation/deletion
batman-adv: Allow to modify slaves of soft-interfaces through rntl_link
net/batman-adv/Makefile | 2 +-
net/batman-adv/gateway_client.c | 2 +-
net/batman-adv/hard-interface.c | 66 ++++++++--
net/batman-adv/hard-interface.h | 13 +-
net/batman-adv/main.c | 4 +-
net/batman-adv/main.h | 3 +-
net/batman-adv/network-coding.c | 10 +-
net/batman-adv/originator.c | 4 +-
net/batman-adv/packet.h | 2 +-
net/batman-adv/soft-interface.c | 269 ++++++++++++++++++++++++++--------------
net/batman-adv/soft-interface.h | 3 +-
net/batman-adv/sysfs.c | 6 +-
net/batman-adv/vis.c | 4 +-
13 files changed, 268 insertions(+), 120 deletions(-)
^ permalink raw reply
* [PATCH 01/11] batman-adv: Fix endianness errors for network coding
From: Antonio Quartulli @ 2013-03-28 7:50 UTC (permalink / raw)
To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Martin Hundebøll,
b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r, Marek Lindner
In-Reply-To: <1364457031-3194-1-git-send-email-ordex-GaUfNO9RBHfsrOwW+9ziJQ@public.gmane.org>
From: Martin Hundebøll <martin-SHBFXCSm21MJGwgDXS7ZQA@public.gmane.org>
Add a htonl() in network_coding.c when reading the sequence number
from received ogm_packet, to avoid wrong byte ordering when comparing
with a host value. This bug was introduced in
3ed7ada3f0bbcd058567bc0a8f9729a73eba7db6 ("batman-adv: network coding -
detect coding nodes and remove these after timeout").
Change the type of coded_packet->coded_len from uint16 to __be16 to
avoid wrong assumptions about endianness in later uses. Introduced in
c3289f3650d34b60296000a629c99f2488f7c3dd ("batman-adv: network coding -
code and transmit packets if possible").
Reported-by: Fengguang Wu <fengguang.wu-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Martin Hundebøll <martin-SHBFXCSm21MJGwgDXS7ZQA@public.gmane.org>
Signed-off-by: Marek Lindner <lindner_marek-LWAfsSFWpa4@public.gmane.org>
Signed-off-by: Antonio Quartulli <ordex-GaUfNO9RBHfsrOwW+9ziJQ@public.gmane.org>
---
net/batman-adv/network-coding.c | 2 +-
net/batman-adv/packet.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 5728079..086c107 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -654,7 +654,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_ogm_packet *ogm_packet)
{
- if (orig_node->last_real_seqno != ogm_packet->seqno)
+ if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno))
return false;
if (orig_node->last_ttl != ogm_packet->header.ttl + 1)
return false;
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index a079958..a51ccfc 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -308,7 +308,7 @@ struct batadv_coded_packet {
uint8_t second_source[ETH_ALEN];
uint8_t second_orig_dest[ETH_ALEN];
__be32 second_crc;
- uint16_t coded_len;
+ __be16 coded_len;
};
#endif /* _NET_BATMAN_ADV_PACKET_H_ */
--
1.8.1.5
^ permalink raw reply related
* Re: Difference between Net and Net-Next
From: Richard Cochran @ 2013-03-28 6:28 UTC (permalink / raw)
To: Jim Baxter; +Cc: netdev
In-Reply-To: <loom.20130327T180118-245@post.gmane.org>
On Wed, Mar 27, 2013 at 05:02:28PM +0000, Jim Baxter wrote:
> How do you decide which changes should go into "net" and which ones go into "net-
> next"?
>
> At a guess I would think bug fixes are in "net" and feature changes into
> "net-next" but it is not obvious from the commit log.
I'll go out on a limb and try to explain this:
net-next - changes for the next merge window
net - bug fixes and cleanups for the current release candidate
HTR (hope thats right),
Richard
^ permalink raw reply
* Re: [PATCH net-next 00/19] IPVS optimizations, part 2
From: Simon Horman @ 2013-03-28 5:41 UTC (permalink / raw)
To: Julian Anastasov; +Cc: lvs-devel, netdev
In-Reply-To: <1363945614-11752-1-git-send-email-ja@ssi.bg>
On Fri, Mar 22, 2013 at 11:46:35AM +0200, Julian Anastasov wrote:
> This is the second patchset with IPVS optimizations.
> Now we convert the schedulers, dests and services to RCU.
>
> All patches are for net-next based on the first
> patchset v3. The idea is after discussion and review Simon to
> apply the patchset after a week or so to ipvs-next tree.
>
> The changes in this patchset eliminate global locks
> from packet processing by using RCU. There are more details
> in the patches.
>
> After this patchset the situation is as follows:
>
> - dests:
> - lookups under RCU lock allow ip_vs_dest_hold, used
> for binding dest to conn or to select dest by scheduler
> - dests are freed by dest_trash code long after grace period
>
> - services:
> - no global read_lock
> - lookups under RCU lock allow scheduler to select
> dests under RCU lock
> - grace period implemented with IP_VS_WAIT_WHILE is
> gone allowing scheduler's dest selection and scheduler
> reconfiguration to run in parallel
>
> - schedulers:
> - schedule method runs under RCU lock, needs _rcu
> if using svc->destinations, needs _bh suffix to locks
> because it can be called in LOCAL_OUT hook
> - when dest is added, the add_dest method is called
> instead of update_service
> - when dest is deleted, the del_dest method is called
> instead of update_service
> - when dest is updated, the upd_dest method is called
> instead of update_service
> - scheduler can hold dests in its state long after they are
> unlinked from svc, even without providing del_dest handler.
> But such dests must not be returned by the
> schedule method (needs IP_VS_DEST_F_AVAILABLE check)
> - sched_data must be freed after grace period and
> module exit should be delayed with synchronize_rcu
> to wait all RCU read-side critical sections to
> complete
>
> - BH:
> - we do not disable BHs in LOCAL_OUT and sync code anymore
> - _bh suffixes are added to all places that need them,
> except timer handlers
Hi Julian,
what is the status of this series?
N.B: THis series is different to "[PATCH 00/15 v3] IPVS optimizations (repost)"
^ permalink raw reply
* [PATCH 15/15] ipvs: avoid kmem_cache_zalloc in ip_vs_conn_new
From: Simon Horman @ 2013-03-28 5:39 UTC (permalink / raw)
To: Pablo Neira Ayuso, David Miller
Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
Julian Anastasov, Simon Horman
In-Reply-To: <1364449184-26672-1-git-send-email-horms@verge.net.au>
From: Julian Anastasov <ja@ssi.bg>
We have many fields to set and few to reset,
use kmem_cache_alloc instead to save some cycles.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 15 +++++++++++++++
net/netfilter/ipvs/ip_vs_conn.c | 24 +++++++++++++++++++-----
2 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7122f7b..6b450c6 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -233,6 +233,21 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
dst->ip = src->ip;
}
+static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
+ const union nf_inet_addr *src)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ dst->in6 = src->in6;
+ return;
+ }
+#endif
+ dst->ip = src->ip;
+ dst->all[1] = 0;
+ dst->all[2] = 0;
+ dst->all[3] = 0;
+}
+
static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
const union nf_inet_addr *b)
{
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 416015b..e3e2b4d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -861,7 +861,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
p->protocol);
- cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+ cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
@@ -872,13 +872,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
- ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
+ ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
- ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
+ ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
cp->vport = p->vport;
/* proto should only be IPPROTO_IP if d_addr is a fwmark */
- ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
- &cp->daddr, daddr);
+ ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
+ &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
@@ -887,6 +887,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
cp->pe = p->pe;
cp->pe_data = p->pe_data;
cp->pe_data_len = p->pe_data_len;
+ } else {
+ cp->pe = NULL;
+ cp->pe_data = NULL;
+ cp->pe_data_len = 0;
}
spin_lock_init(&cp->lock);
@@ -897,18 +901,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
*/
atomic_set(&cp->refcnt, 1);
+ cp->control = NULL;
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
+ cp->packet_xmit = NULL;
+ cp->app = NULL;
+ cp->app_data = NULL;
+ /* reset struct ip_vs_seq */
+ cp->in_seq.delta = 0;
+ cp->out_seq.delta = 0;
+
atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
/* Bind the connection with a destination server */
+ cp->dest = NULL;
ip_vs_bind_dest(cp, dest);
/* Set its state and timeout */
cp->state = 0;
+ cp->old_state = 0;
cp->timeout = 3*HZ;
cp->sync_endtime = jiffies & ~3UL;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 11/15] ipvs: remove rs_lock by using RCU
From: Simon Horman @ 2013-03-28 5:39 UTC (permalink / raw)
To: Pablo Neira Ayuso, David Miller
Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
Julian Anastasov, Simon Horman
In-Reply-To: <1364449184-26672-1-git-send-email-horms@verge.net.au>
From: Julian Anastasov <ja@ssi.bg>
rs_lock was used to protect rs_table (hash table)
from updaters (under global mutex) and readers (packet handlers).
We can remove rs_lock by using RCU lock for readers. Reclaiming
dest only with kfree_rcu is enough because the readers access
only fields from the ip_vs_dest structure.
Use hlist for rs_table.
As we are now using hlist_del_rcu, introduce in_rs_table
flag as replacement for the list_empty checks which do not
work with RCU. It is needed because only NAT dests are in
the rs_table.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 14 ++++---
net/netfilter/ipvs/ip_vs_core.c | 5 +--
net/netfilter/ipvs/ip_vs_ctl.c | 88 ++++++++++++++-------------------------
3 files changed, 41 insertions(+), 66 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3c91b58..f46db3f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -738,7 +738,7 @@ struct ip_vs_dest_dst {
*/
struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */
- struct list_head d_list; /* for table with all the dests */
+ struct hlist_node d_list; /* for table with all the dests */
u16 af; /* address family */
__be16 port; /* port number of the server */
@@ -767,6 +767,9 @@ struct ip_vs_dest {
__be16 vport; /* virtual port number */
union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */
+
+ struct rcu_head rcu_head;
+ unsigned int in_rs_table:1; /* we are in rs_table */
};
@@ -897,7 +900,7 @@ struct netns_ipvs {
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
- struct list_head rs_table[IP_VS_RTAB_SIZE];
+ struct hlist_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
/* ip_vs_proto */
@@ -933,7 +936,6 @@ struct netns_ipvs {
int num_services; /* no of virtual services */
- rwlock_t rs_lock; /* real services table */
/* Trash for destinations */
struct list_head dest_trash;
/* Service counters */
@@ -1364,9 +1366,9 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
atomic_dec(&svc->usecnt);
}
-extern struct ip_vs_dest *
-ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
- const union nf_inet_addr *daddr, __be16 dport);
+extern bool
+ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 58d4114..0680644 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1161,9 +1161,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(net, af, iph.protocol,
- &iph.saddr,
- pptr[0])) {
+ if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+ pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6b72806..c9eeea7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -508,17 +508,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
& IP_VS_RTAB_MASK;
}
-/*
- * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
- * should be called with locked tables.
- */
-static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
+/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
+static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned int hash;
- if (!list_empty(&dest->d_list)) {
- return 0;
- }
+ if (dest->in_rs_table)
+ return;
/*
* Hash by proto,addr,port,
@@ -526,60 +522,47 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
*/
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
- list_add(&dest->d_list, &ipvs->rs_table[hash]);
-
- return 1;
+ hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
+ dest->in_rs_table = 1;
}
-/*
- * UNhashes ip_vs_dest from rs_table.
- * should be called with locked tables.
- */
-static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+/* Unhash ip_vs_dest from rs_table. */
+static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
/*
* Remove it from the rs_table table.
*/
- if (!list_empty(&dest->d_list)) {
- list_del_init(&dest->d_list);
+ if (dest->in_rs_table) {
+ hlist_del_rcu(&dest->d_list);
+ dest->in_rs_table = 0;
}
-
- return 1;
}
-/*
- * Lookup real service by <proto,addr,port> in the real service table.
- */
-struct ip_vs_dest *
-ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
- const union nf_inet_addr *daddr,
- __be16 dport)
+/* Check if real service by <proto,addr,port> is present */
+bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash;
struct ip_vs_dest *dest;
- /*
- * Check for "full" addressed entries
- * Return the first found entry
- */
+ /* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- read_lock(&ipvs->rs_lock);
- list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
- if ((dest->af == af)
- && ip_vs_addr_equal(af, &dest->addr, daddr)
- && (dest->port == dport)
- && ((dest->protocol == protocol) ||
- dest->vfwmark)) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->port == dport &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ (dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- read_unlock(&ipvs->rs_lock);
- return dest;
+ rcu_read_unlock();
+ return true;
}
}
- read_unlock(&ipvs->rs_lock);
+ rcu_read_unlock();
- return NULL;
+ return false;
}
/*
@@ -612,9 +595,6 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* the backup synchronization daemon. It finds the
* destination to be bound to the received connection
* on the backup.
- *
- * ip_vs_lookup_real_service() looked promissing, but
- * seems not working as expected.
*/
struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
const union nf_inet_addr *daddr,
@@ -715,7 +695,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
__ip_vs_dst_cache_reset(dest);
__ip_vs_unbind_svc(dest);
free_percpu(dest->stats.cpustats);
- kfree(dest);
+ kfree_rcu(dest, rcu_head);
}
}
@@ -742,7 +722,7 @@ static void ip_vs_trash_cleanup(struct net *net)
__ip_vs_dst_cache_reset(dest);
__ip_vs_unbind_svc(dest);
free_percpu(dest->stats.cpustats);
- kfree(dest);
+ kfree_rcu(dest, rcu_head);
}
}
@@ -807,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Put the real service in rs_table if not present.
* For now only for NAT!
*/
- write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_hash(ipvs, dest);
- write_unlock_bh(&ipvs->rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -905,7 +883,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->persistconns, 0);
atomic_set(&dest->refcnt, 1);
- INIT_LIST_HEAD(&dest->d_list);
+ INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest, 1);
@@ -1045,9 +1023,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
/*
* Remove it from the d-linked list with the real services.
*/
- write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest);
- write_unlock_bh(&ipvs->rs_lock);
/*
* Decrease the refcnt of the dest, and free the dest
@@ -1067,7 +1043,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
time, so the operation here is OK */
atomic_dec(&dest->svc->refcnt);
free_percpu(dest->stats.cpustats);
- kfree(dest);
+ kfree_rcu(dest, rcu_head);
} else {
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
"dest->refcnt=%d\n",
@@ -3804,11 +3780,9 @@ int __net_init ip_vs_control_net_init(struct net *net)
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
- rwlock_init(&ipvs->rs_lock);
-
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
- INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+ INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
INIT_LIST_HEAD(&ipvs->dest_trash);
atomic_set(&ipvs->ftpsvc_counter, 0);
@@ -3885,7 +3859,7 @@ int __init ip_vs_control_init(void)
EnterFunction(2);
- /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
+ /* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
--
1.7.10.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox