* [net-next.git 1/4 (v4)] phy: add the EEE support and the way to access to the MMD registers.
From: Giuseppe CAVALLARO @ 2012-06-12 12:45 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, rayagond, davem, yuvalmin, Giuseppe Cavallaro
In-Reply-To: <1339505153-26731-1-git-send-email-peppe.cavallaro@st.com>
This patch adds the support for the Energy-Efficient Ethernet (EEE)
to the Physical Abstraction Layer.
To support the EEE we have to access to the MMD registers 3.20 and
7.60/61. So two new functions have been added to read/write the MMD
registers (clause 45).
An Ethernet driver (I tested the stmmac) can invoke the phy_init_eee to properly
check if the EEE is supported by the PHYs and it can also set the clock
stop enable bit in the 3.0 register.
The phy_get_eee_err can be used for reporting the number of time where
the PHY failed to complete its normal wake sequence.
In the end, this patch also adds the EEE ethtool support implementing:
o phy_ethtool_set_eee
o phy_ethtool_get_eee
v1: initial patch
v2: fixed some errors especially on naming convention
v3: renamed again the mmd read/write functions thank to Ben's feedback
v4: moved file to phy.c and added the ethtool support.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
drivers/net/phy/phy.c | 262 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/mdio.h | 21 +++-
include/linux/mii.h | 11 ++
include/linux/phy.h | 5 +
4 files changed, 295 insertions(+), 4 deletions(-)
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 2e1c237..39ae424 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -35,6 +35,7 @@
#include <linux/phy.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
+#include <linux/mdio.h>
#include <linux/atomic.h>
#include <asm/io.h>
@@ -967,3 +968,264 @@ void phy_state_machine(struct work_struct *work)
schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ);
}
+
+static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
+ int addr)
+{
+ /* Write the desired MMD Devad */
+ bus->write(bus, addr, MII_MMD_CTRL, devad);
+
+ /* Write the desired MMD register address */
+ bus->write(bus, addr, MII_MMD_DATA, prtad);
+
+ /* Select the Function : DATA with no post increment */
+ bus->write(bus, addr, MII_MMD_CTRL,
+ (devad | MII_MMD_CTRL_FUNC_DATA_NOINCR));
+}
+
+/**
+ * phy_read_mmd_indirect - reads data from the MMC register (clause 22 to
+ * access to clause 45)
+ * @bus: the target MII bus
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ *
+ * Description: Reads data from the MMD regisetrs of the
+ * phy addr. To read these register we have:
+ * 1) Write reg 13 // DEVAD
+ * 2) Write reg 14 // MMD Address
+ * 3) Write reg 13 // MMD Data Command for MMD DEVAD
+ * 3) Read reg 14 // Read MMD data
+ */
+static int phy_read_mmd_indirect(struct mii_bus *bus, int prtad, int devad,
+ int addr)
+{
+ u32 ret;
+
+ mmd_phy_indirect(bus, prtad, devad, addr);
+
+ /* Read the content of the MMD's selected register */
+ ret = bus->read(bus, addr, MII_MMD_DATA);
+
+ return ret;
+}
+
+/**
+ * phy_write_mmd_indirect - writes data to the MMC register (clause 22 to
+ * access to clause 45)
+ * @bus: the target MII bus
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ * @data: data to write in the MMD register
+ *
+ * Description: Reads data from the MMD regisetrs of the
+ * phy addr. To read these register we have:
+ * 1) Write reg 13 // DEVAD
+ * 2) Write reg 14 // MMD Address
+ * 3) Write reg 13 // MMD Data Command for MMD DEVAD
+ * 3) Write reg 14 // Write MMD data
+ */
+static void phy_write_mmd_indirect(struct mii_bus *bus, int prtad, int devad,
+ int addr, u32 data)
+{
+ mmd_phy_indirect(bus, prtad, devad, addr);
+
+ /* Write the data into MMD's selected register */
+ bus->write(bus, addr, MII_MMD_DATA, data);
+}
+
+/* phy_init_eee
+ * @phydev: target phy_device struct
+ * @clk_stop_enable: PHY may stop the clock during LPI
+ *
+ * Description: it checks if the Energy-Efficient Ethernet (EEE)
+ * is supported by looking at the MMD registers 3.20 and 7.60/61
+ * and it programs the MMD register 3.0 setting the "Clock stop enable"
+ * bit if required.
+ * In fact, the clk_stop_enable can be passed to:
+ * 1 = The PHY may stop the clock during LPI
+ * 0 = Clock not stoppable
+ */
+int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
+{
+ int ret = -EPROTONOSUPPORT;
+
+ /* According to 802.3az,the EEE is supported only in full duplex-mode.
+ * Also EEE feature is active when core is operating with MII, GMII
+ * or RGMII.
+ */
+ if ((phydev->duplex == DUPLEX_FULL) &&
+ ((phydev->interface == PHY_INTERFACE_MODE_MII) ||
+ (phydev->interface == PHY_INTERFACE_MODE_GMII) ||
+ (phydev->interface == PHY_INTERFACE_MODE_RGMII))) {
+ int eee_cap, eee_link;
+
+ /* EEE ability must be supported in both local and remote
+ * PHY devices.
+ */
+ eee_cap = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_LPABLE,
+ MDIO_MMD_AN, phydev->addr);
+ if (eee_cap < 0)
+ return eee_cap;
+
+ eee_link = phy_read_mmd_indirect(phydev->bus, MDIO_PCS_EEE_ABLE,
+ MDIO_MMD_PCS, phydev->addr);
+ if (eee_link < 0)
+ return eee_link;
+
+ if (eee_cap && eee_link) {
+ if (clk_stop_enable) {
+ /* Configure the PHY to stop receiving xMII
+ * clock while it is signaling LPI
+ */
+ int ctrl;
+ ctrl = phy_read_mmd_indirect(phydev->bus,
+ MDIO_CTRL1,
+ MDIO_MMD_PCS,
+ phydev->addr);
+ if (ctrl < 0)
+ return ctrl;
+
+ ctrl |= MDIO_PCS_CTRL1_CLKSTOP_EN;
+ phy_write_mmd_indirect(phydev->bus, MDIO_CTRL1,
+ MDIO_MMD_PCS,
+ phydev->addr, ctrl);
+ }
+
+ ret = 0; /* EEE supported */
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(phy_init_eee);
+
+/* phy_get_eee_err
+ * @phydev: target phy_device struct
+ *
+ * Description: it is to report the number of time where the PHY
+ * failed to complete its normal wake sequence.
+ */
+int phy_get_eee_err(struct phy_device *phydev)
+{
+ return phy_read_mmd_indirect(phydev->bus, MDIO_PCS_EEE_WK_ERR,
+ MDIO_MMD_PCS, phydev->addr);
+
+}
+EXPORT_SYMBOL(phy_get_eee_err);
+
+static int phy_eee_to_adv(int eee_adv)
+{
+ int adv = 0;
+
+ if (eee_adv & MDIO_EEE_100TX)
+ adv |= ADVERTISED_100baseT_Full;
+ if (eee_adv & MDIO_EEE_1000T)
+ adv |= ADVERTISED_1000baseT_Full;
+ if (eee_adv & MDIO_EEE_10GT)
+ adv |= ADVERTISED_10000baseT_Full;
+ if (eee_adv & MDIO_EEE_1000KX)
+ adv |= ADVERTISED_1000baseKX_Full;
+ if (eee_adv & MDIO_EEE_10GKX4)
+ adv |= ADVERTISED_10000baseKX4_Full;
+ if (eee_adv & MDIO_EEE_10GKR)
+ adv |= ADVERTISED_10000baseKR_Full;
+
+ return adv;
+}
+
+static int phy_eee_to_supported(int eee_supported)
+{
+ int supported = 0;
+
+ if (eee_supported & MDIO_EEE_100TX)
+ supported |= SUPPORTED_100baseT_Full;
+ if (eee_supported & MDIO_EEE_1000T)
+ supported |= SUPPORTED_1000baseT_Full;
+ if (eee_supported & MDIO_EEE_10GT)
+ supported |= SUPPORTED_10000baseT_Full;
+ if (eee_supported & MDIO_EEE_1000KX)
+ supported |= SUPPORTED_1000baseKX_Full;
+ if (eee_supported & MDIO_EEE_10GKX4)
+ supported |= SUPPORTED_10000baseKX4_Full;
+ if (eee_supported & MDIO_EEE_10GKR)
+ supported |= SUPPORTED_10000baseKR_Full;
+
+ return supported;
+}
+
+/* phy_ethtool_get_eee
+ * @phydev: target phy_device struct
+ * @data: ethtool_eee data
+ *
+ * Description: it reportes the Supported/Advertisement/LP Advertisement
+ * capabilities.
+ */
+int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
+{
+ int val;
+
+ /* Get Supported EEE */
+ val = phy_read_mmd_indirect(phydev->bus, MDIO_PCS_EEE_ABLE,
+ MDIO_MMD_PCS, phydev->addr);
+ if (val < 0)
+ return val;
+ data->supported = phy_eee_to_supported(val);
+
+ /* Get advertisement EEE */
+ val = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV,
+ MDIO_MMD_AN, phydev->addr);
+ if (val < 0)
+ return val;
+ data->advertised = phy_eee_to_adv(val);
+
+ /* Get LP advertisement EEE */
+ val = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_LPABLE,
+ MDIO_MMD_AN, phydev->addr);
+ if (val < 0)
+ return val;
+ data->lp_advertised = phy_eee_to_adv(val);
+
+ return 0;
+}
+EXPORT_SYMBOL(phy_ethtool_get_eee);
+
+static int phy_adv_to_eee(int adv)
+{
+ int reg = 0;
+
+ if (adv & ADVERTISED_100baseT_Full)
+ reg |= MDIO_EEE_100TX;
+ if (adv & ADVERTISED_1000baseT_Full)
+ reg |= MDIO_EEE_1000T;
+ if (adv & ADVERTISED_10000baseT_Full)
+ reg |= MDIO_EEE_10GT;
+ if (adv & ADVERTISED_1000baseKX_Full)
+ reg |= MDIO_EEE_1000KX;
+ if (adv & ADVERTISED_10000baseKX4_Full)
+ reg |= MDIO_EEE_10GKX4;
+ if (adv & ADVERTISED_10000baseKR_Full)
+ reg |= MDIO_EEE_10GKR;
+
+ return reg;
+}
+
+/* phy_ethtool_set_eee
+ * @phydev: target phy_device struct
+ * @data: ethtool_eee data
+ *
+ * Description: it is to program the Advertisement EEE register.
+ */
+int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
+{
+ int val;
+
+ val = phy_adv_to_eee(data->advertised);
+ phy_write_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV, MDIO_MMD_AN,
+ phydev->addr, val);
+
+ return 0;
+}
+EXPORT_SYMBOL(phy_ethtool_set_eee);
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index dfb9479..4ad8f0e 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -43,7 +43,11 @@
#define MDIO_PKGID2 15
#define MDIO_AN_ADVERTISE 16 /* AN advertising (base page) */
#define MDIO_AN_LPA 19 /* AN LP abilities (base page) */
+#define MDIO_PCS_EEE_ABLE 20 /* EEE Capability register */
+#define MDIO_PCS_EEE_WK_ERR 22 /* EEE wake error counter */
#define MDIO_PHYXS_LNSTAT 24 /* PHY XGXS lane state */
+#define MDIO_AN_EEE_ADV 60 /* EEE advertisement */
+#define MDIO_AN_EEE_LPABLE 61 /* EEE link partner ability */
/* Media-dependent registers. */
#define MDIO_PMA_10GBT_SWAPPOL 130 /* 10GBASE-T pair swap & polarity */
@@ -56,7 +60,6 @@
#define MDIO_PCS_10GBRT_STAT2 33 /* 10GBASE-R/-T PCS status 2 */
#define MDIO_AN_10GBT_CTRL 32 /* 10GBASE-T auto-negotiation control */
#define MDIO_AN_10GBT_STAT 33 /* 10GBASE-T auto-negotiation status */
-#define MDIO_AN_EEE_ADV 60 /* EEE advertisement */
/* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */
#define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */
@@ -82,6 +85,7 @@
#define MDIO_AN_CTRL1_RESTART BMCR_ANRESTART
#define MDIO_AN_CTRL1_ENABLE BMCR_ANENABLE
#define MDIO_AN_CTRL1_XNP 0x2000 /* Enable extended next page */
+#define MDIO_PCS_CTRL1_CLKSTOP_EN 0x400 /* Stop the clock during LPI */
/* 10 Gb/s */
#define MDIO_CTRL1_SPEED10G (MDIO_CTRL1_SPEEDSELEXT | 0x00)
@@ -237,9 +241,18 @@
#define MDIO_AN_10GBT_STAT_MS 0x4000 /* Master/slave config */
#define MDIO_AN_10GBT_STAT_MSFLT 0x8000 /* Master/slave config fault */
-/* AN EEE Advertisement register. */
-#define MDIO_AN_EEE_ADV_100TX 0x0002 /* Advertise 100TX EEE cap */
-#define MDIO_AN_EEE_ADV_1000T 0x0004 /* Advertise 1000T EEE cap */
+/* EEE Supported/Advertisement/LP Advertisement registers.
+ *
+ * EEE capability Register (3.20), Advertisement (7.60) and
+ * Link partner ability (7.61) registers have and can use the same identical
+ * bit masks.
+ */
+#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */
+#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */
+#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */
+#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */
+#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */
+#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */
/* LASI RX_ALARM control/status registers. */
#define MDIO_PMA_LASI_RX_PHYXSLFLT 0x0001 /* PHY XS RX local fault */
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 2783eca..35ddda1 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -21,6 +21,8 @@
#define MII_EXPANSION 0x06 /* Expansion register */
#define MII_CTRL1000 0x09 /* 1000BASE-T control */
#define MII_STAT1000 0x0a /* 1000BASE-T status */
+#define MII_MMD_CTRL 0x0d /* MMD Access Control Register */
+#define MII_MMD_DATA 0x0e /* MMD Access Data Register */
#define MII_ESTATUS 0x0f /* Extended Status */
#define MII_DCOUNTER 0x12 /* Disconnect counter */
#define MII_FCSCOUNTER 0x13 /* False carrier counter */
@@ -141,6 +143,15 @@
#define FLOW_CTRL_TX 0x01
#define FLOW_CTRL_RX 0x02
+/* MMD Access Control register fields */
+#define MII_MMD_CTRL_DEVAD_MASK 0x1f /* Mask MMD DEVAD*/
+#define MII_MMD_CTRL_FUNC_ADDR 0x0000 /* Address */
+#define MII_MMD_CTRL_FUNC_DATA_NOINCR 0x4000 /* no post increment */
+#define MII_MMD_CTRL_FUNC_DATA_INCR_ON_RDWT 0x8000 /* post increment on
+ * reads & writes */
+#define MII_MMD_CTRL_FUNC_DATA_INCR_ON_WT 0xC000 /* post increment on
+ * writes only */
+
/* This structure is used in all SIOCxMIIxxx ioctl calls */
struct mii_ioctl_data {
__u16 phy_id;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c291cae..97fc4cf 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -532,6 +532,11 @@ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
int (*run)(struct phy_device *));
int phy_scan_fixups(struct phy_device *phydev);
+int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable);
+int phy_get_eee_err(struct phy_device *phydev);
+int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data);
+int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data);
+
int __init mdio_bus_init(void);
void mdio_bus_exit(void);
--
1.7.4.4
^ permalink raw reply related
* [net-next.git 0/4] EEE for PAL and stmmac
From: Giuseppe CAVALLARO @ 2012-06-12 12:45 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, rayagond, davem, yuvalmin, Giuseppe Cavallaro
These patches add the EEE support in the stmmac device driver
restoring an old work I had done some months ago and not
completed in time.
I've tested all on ST STB with the IC+ 101G PHY device that has
this feature.
The initial EEE support for the stmmac has been written by Rayagond
but I have reworked all his code adding new parts and especially
performing tests on a real hardware. Thx Rayagond!
In these patches, we can see that the stmmac supports the EEE
only if the DMA HW capability register says that this
feature is actually available. In that case, the driver can enter
in the Tx LPI mode by using a timer as recommended by Synopsys.
Note that EEE is supported in new chip generations; in particular
I used the 3.61a.
At any rate, further information about how the driver treats the EEE
can be found in the stmmac.txt file (there is a patch for that).
Another patch is for Physical Abstraction Layer now able to
manage the MMD registers (clause 45); it also provides the ethtool
support to manage supported/advertisement/lp adv features.
Giuseppe Cavallaro (4):
phy: add the EEE support and the way to access to the MMD registers.
stmmac: do not use strict_strtoul but kstrtoul
stmmac: update the driver Documentation and add EEE
stmmac: add the Energy Efficient Ethernet support
Documentation/networking/stmmac.txt | 36 +++-
drivers/net/ethernet/stmicro/stmmac/common.h | 31 +++-
drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 20 ++
.../net/ethernet/stmicro/stmmac/dwmac1000_core.c | 101 +++++++-
.../net/ethernet/stmicro/stmmac/dwmac100_core.c | 4 +-
drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 8 +
.../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 55 ++++
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 192 +++++++++++++--
.../net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +
drivers/net/phy/phy.c | 262 ++++++++++++++++++++
include/linux/mdio.h | 21 ++-
include/linux/mii.h | 11 +
include/linux/phy.h | 5 +
14 files changed, 705 insertions(+), 44 deletions(-)
--
1.7.4.4
^ permalink raw reply
* [net-next.git 4/4 (v4)] stmmac: add the Energy Efficient Ethernet support
From: Giuseppe CAVALLARO @ 2012-06-12 12:45 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, rayagond, davem, yuvalmin, Giuseppe Cavallaro
In-Reply-To: <1339505153-26731-1-git-send-email-peppe.cavallaro@st.com>
This patch adds the Energy Efficient Ethernet support to the stmmac.
Please see the driver's documentation for further details about this support
in the driver.
Thanks also goes to Rayagond Kokatanur for his first implementation.
v1: initial patch
v2: fixed some sparse issues (typos)
v3: erroneously sent the v2 renamed as v3
v4:
o Fixed the return value of the stmmac_eee_init as suggested by D.Miller
o Totally reviewed the ethtool support for EEE
o Added a new internal parameter to tune the SW timer for TX LPI.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
drivers/net/ethernet/stmicro/stmmac/common.h | 31 ++++-
drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 20 +++
.../net/ethernet/stmicro/stmmac/dwmac1000_core.c | 101 +++++++++++-
.../net/ethernet/stmicro/stmmac/dwmac100_core.c | 4 +-
drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 8 +
.../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 55 +++++++
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 166 +++++++++++++++++++-
.../net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +
9 files changed, 370 insertions(+), 18 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index bcd54d6..e2d0832 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -95,6 +95,16 @@ struct stmmac_extra_stats {
unsigned long poll_n;
unsigned long sched_timer_n;
unsigned long normal_irq_n;
+ unsigned long mmc_tx_irq_n;
+ unsigned long mmc_rx_irq_n;
+ unsigned long mmc_rx_csum_offload_irq_n;
+ /* EEE */
+ unsigned long irq_receive_pmt_irq_n;
+ unsigned long irq_tx_path_in_lpi_mode_n;
+ unsigned long irq_tx_path_exit_lpi_mode_n;
+ unsigned long irq_rx_path_in_lpi_mode_n;
+ unsigned long irq_rx_path_exit_lpi_mode_n;
+ unsigned long phy_eee_wakeup_error_n;
};
/* CSR Frequency Access Defines*/
@@ -162,6 +172,17 @@ enum tx_dma_irq_status {
handle_tx_rx = 3,
};
+enum core_specific_irq_mask {
+ core_mmc_tx_irq = 1,
+ core_mmc_rx_irq = 2,
+ core_mmc_rx_csum_offload_irq = 4,
+ core_irq_receive_pmt_irq = 8,
+ core_irq_tx_path_in_lpi_mode = 16,
+ core_irq_tx_path_exit_lpi_mode = 32,
+ core_irq_rx_path_in_lpi_mode = 64,
+ core_irq_rx_path_exit_lpi_mode = 128,
+};
+
/* DMA HW capabilities */
struct dma_features {
unsigned int mbps_10_100;
@@ -208,6 +229,10 @@ struct dma_features {
#define MAC_ENABLE_TX 0x00000008 /* Transmitter Enable */
#define MAC_RNABLE_RX 0x00000004 /* Receiver Enable */
+/* Default LPI timers */
+#define STMMAC_DEFAULT_LIT_LS_TIMER 0x3E8
+#define STMMAC_DEFAULT_TWT_LS_TIMER 0x0
+
struct stmmac_desc_ops {
/* DMA RX descriptor ring initialization */
void (*init_rx_desc) (struct dma_desc *p, unsigned int ring_size,
@@ -278,7 +303,7 @@ struct stmmac_ops {
/* Dump MAC registers */
void (*dump_regs) (void __iomem *ioaddr);
/* Handle extra events on specific interrupts hw dependent */
- void (*host_irq_status) (void __iomem *ioaddr);
+ int (*host_irq_status) (void __iomem *ioaddr);
/* Multicast filter setting */
void (*set_filter) (struct net_device *dev, int id);
/* Flow control setting */
@@ -291,6 +316,10 @@ struct stmmac_ops {
unsigned int reg_n);
void (*get_umac_addr) (void __iomem *ioaddr, unsigned char *addr,
unsigned int reg_n);
+ void (*set_eee_mode) (void __iomem *ioaddr);
+ void (*reset_eee_mode) (void __iomem *ioaddr);
+ void (*set_eee_timer) (void __iomem *ioaddr, int ls, int tw);
+ void (*set_eee_pls) (void __iomem *ioaddr, int link);
};
struct mac_link {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 23478bf..f90fcb5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -36,6 +36,7 @@
#define GMAC_INT_STATUS 0x00000038 /* interrupt status register */
enum dwmac1000_irq_status {
+ lpiis_irq = 0x400,
time_stamp_irq = 0x0200,
mmc_rx_csum_offload_irq = 0x0080,
mmc_tx_irq = 0x0040,
@@ -60,6 +61,25 @@ enum power_event {
power_down = 0x00000001,
};
+/* Energy Efficient Ethernet (EEE)
+ *
+ * LPI status, timer and control register offset
+ */
+#define LPI_CTRL_STATUS 0x0030
+#define LPI_TIMER_CTRL 0x0034
+
+/* LPI control and status defines */
+#define LPI_CTRL_STATUS_LPITXA 0x00080000 /* Enable LPI TX Automate */
+#define LPI_CTRL_STATUS_PLSEN 0x00040000 /* Enable PHY Link Status */
+#define LPI_CTRL_STATUS_PLS 0x00020000 /* PHY Link Status */
+#define LPI_CTRL_STATUS_LPIEN 0x00010000 /* LPI Enable */
+#define LPI_CTRL_STATUS_RLPIST 0x00000200 /* Receive LPI state */
+#define LPI_CTRL_STATUS_TLPIST 0x00000100 /* Transmit LPI state */
+#define LPI_CTRL_STATUS_RLPIEX 0x00000008 /* Receive LPI Exit */
+#define LPI_CTRL_STATUS_RLPIEN 0x00000004 /* Receive LPI Entry */
+#define LPI_CTRL_STATUS_TLPIEX 0x00000002 /* Transmit LPI Exit */
+#define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */
+
/* GMAC HW ADDR regs */
#define GMAC_ADDR_HIGH(reg) (((reg > 15) ? 0x00000800 : 0x00000040) + \
(reg * 8))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index b5e4d02..bfe0226 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -194,26 +194,107 @@ static void dwmac1000_pmt(void __iomem *ioaddr, unsigned long mode)
}
-static void dwmac1000_irq_status(void __iomem *ioaddr)
+static int dwmac1000_irq_status(void __iomem *ioaddr)
{
u32 intr_status = readl(ioaddr + GMAC_INT_STATUS);
+ int status = 0;
/* Not used events (e.g. MMC interrupts) are not handled. */
- if ((intr_status & mmc_tx_irq))
- CHIP_DBG(KERN_DEBUG "GMAC: MMC tx interrupt: 0x%08x\n",
+ if ((intr_status & mmc_tx_irq)) {
+ CHIP_DBG(KERN_INFO "GMAC: MMC tx interrupt: 0x%08x\n",
readl(ioaddr + GMAC_MMC_TX_INTR));
- if (unlikely(intr_status & mmc_rx_irq))
- CHIP_DBG(KERN_DEBUG "GMAC: MMC rx interrupt: 0x%08x\n",
+ status |= core_mmc_tx_irq;
+ }
+ if (unlikely(intr_status & mmc_rx_irq)) {
+ CHIP_DBG(KERN_INFO "GMAC: MMC rx interrupt: 0x%08x\n",
readl(ioaddr + GMAC_MMC_RX_INTR));
- if (unlikely(intr_status & mmc_rx_csum_offload_irq))
- CHIP_DBG(KERN_DEBUG "GMAC: MMC rx csum offload: 0x%08x\n",
+ status |= core_mmc_rx_irq;
+ }
+ if (unlikely(intr_status & mmc_rx_csum_offload_irq)) {
+ CHIP_DBG(KERN_INFO "GMAC: MMC rx csum offload: 0x%08x\n",
readl(ioaddr + GMAC_MMC_RX_CSUM_OFFLOAD));
+ status |= core_mmc_rx_csum_offload_irq;
+ }
if (unlikely(intr_status & pmt_irq)) {
- CHIP_DBG(KERN_DEBUG "GMAC: received Magic frame\n");
+ CHIP_DBG(KERN_INFO "GMAC: received Magic frame\n");
/* clear the PMT bits 5 and 6 by reading the PMT
* status register. */
readl(ioaddr + GMAC_PMT);
+ status |= core_irq_receive_pmt_irq;
}
+ /* MAC trx/rx EEE LPI entry/exit interrupts */
+ if (intr_status & lpiis_irq) {
+ /* Clean LPI interrupt by reading the Reg 12 */
+ u32 lpi_status = readl(ioaddr + LPI_CTRL_STATUS);
+
+ if (lpi_status & LPI_CTRL_STATUS_TLPIEN) {
+ CHIP_DBG(KERN_INFO "GMAC TX entered in LPI\n");
+ status |= core_irq_tx_path_in_lpi_mode;
+ }
+ if (lpi_status & LPI_CTRL_STATUS_TLPIEX) {
+ CHIP_DBG(KERN_INFO "GMAC TX exit from LPI\n");
+ status |= core_irq_tx_path_exit_lpi_mode;
+ }
+ if (lpi_status & LPI_CTRL_STATUS_RLPIEN) {
+ CHIP_DBG(KERN_INFO "GMAC RX entered in LPI\n");
+ status |= core_irq_rx_path_in_lpi_mode;
+ }
+ if (lpi_status & LPI_CTRL_STATUS_RLPIEX) {
+ CHIP_DBG(KERN_INFO "GMAC RX exit from LPI\n");
+ status |= core_irq_rx_path_exit_lpi_mode;
+ }
+ }
+
+ return status;
+}
+
+static void dwmac1000_set_eee_mode(void __iomem *ioaddr)
+{
+ u32 value;
+
+ /* Enable the link status receive on RGMII, SGMII ore SMII
+ * receive path and instruct the transmit to enter in LPI
+ * state. */
+ value = readl(ioaddr + LPI_CTRL_STATUS);
+ value |= LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA;
+ writel(value, ioaddr + LPI_CTRL_STATUS);
+}
+
+static void dwmac1000_reset_eee_mode(void __iomem *ioaddr)
+{
+ u32 value;
+
+ value = readl(ioaddr + LPI_CTRL_STATUS);
+ value &= ~(LPI_CTRL_STATUS_LPIEN | LPI_CTRL_STATUS_LPITXA);
+ writel(value, ioaddr + LPI_CTRL_STATUS);
+}
+
+static void dwmac1000_set_eee_pls(void __iomem *ioaddr, int link)
+{
+ u32 value;
+
+ value = readl(ioaddr + LPI_CTRL_STATUS);
+
+ if (link)
+ value |= LPI_CTRL_STATUS_PLS;
+ else
+ value &= ~LPI_CTRL_STATUS_PLS;
+
+ writel(value, ioaddr + LPI_CTRL_STATUS);
+}
+
+static void dwmac1000_set_eee_timer(void __iomem *ioaddr, int ls, int tw)
+{
+ int value = ((tw & 0xffff)) | ((ls & 0x7ff) << 16);
+
+ /* Program the timers in the LPI timer control register:
+ * LS: minimum time (ms) for which the link
+ * status from PHY should be ok before transmitting
+ * the LPI pattern.
+ * TW: minimum time (us) for which the core waits
+ * after it has stopped transmitting the LPI pattern.
+ */
+ writel(value, ioaddr + LPI_TIMER_CTRL);
}
static const struct stmmac_ops dwmac1000_ops = {
@@ -226,6 +307,10 @@ static const struct stmmac_ops dwmac1000_ops = {
.pmt = dwmac1000_pmt,
.set_umac_addr = dwmac1000_set_umac_addr,
.get_umac_addr = dwmac1000_get_umac_addr,
+ .set_eee_mode = dwmac1000_set_eee_mode,
+ .reset_eee_mode = dwmac1000_reset_eee_mode,
+ .set_eee_timer = dwmac1000_set_eee_timer,
+ .set_eee_pls = dwmac1000_set_eee_pls,
};
struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 19e0f4e..f83210e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -72,9 +72,9 @@ static int dwmac100_rx_ipc_enable(void __iomem *ioaddr)
return 0;
}
-static void dwmac100_irq_status(void __iomem *ioaddr)
+static int dwmac100_irq_status(void __iomem *ioaddr)
{
- return;
+ return 0;
}
static void dwmac100_set_umac_addr(void __iomem *ioaddr, unsigned char *addr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 6e0360f..e678ce3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -70,6 +70,7 @@
#define DMA_INTR_DEFAULT_MASK (DMA_INTR_NORMAL | DMA_INTR_ABNORMAL)
/* DMA Status register defines */
+#define DMA_STATUS_GLPII 0x40000000 /* GMAC LPI interrupt */
#define DMA_STATUS_GPI 0x10000000 /* PMT interrupt */
#define DMA_STATUS_GMI 0x08000000 /* MMC interrupt */
#define DMA_STATUS_GLI 0x04000000 /* GMAC Line interface int */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 6d07ba2..777771a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -86,6 +86,12 @@ struct stmmac_priv {
#endif
int clk_csr;
int synopsys_id;
+ struct timer_list eee_ctrl_timer;
+ bool tx_path_in_lpi_mode;
+ int lpi_irq;
+ int eee_enabled;
+ int eee_active;
+ int tx_lpi_timer;
};
extern int phyaddr;
@@ -105,6 +111,8 @@ int stmmac_dvr_remove(struct net_device *ndev);
struct stmmac_priv *stmmac_dvr_probe(struct device *device,
struct plat_stmmacenet_data *plat_dat,
void __iomem *addr);
+void stmmac_disable_eee_mode(struct stmmac_priv *priv);
+bool stmmac_eee_init(struct stmmac_priv *priv);
#ifdef CONFIG_HAVE_CLK
static inline int stmmac_clk_enable(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index ce43184..d6014f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -93,6 +93,16 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
STMMAC_STAT(poll_n),
STMMAC_STAT(sched_timer_n),
STMMAC_STAT(normal_irq_n),
+ STMMAC_STAT(normal_irq_n),
+ STMMAC_STAT(mmc_tx_irq_n),
+ STMMAC_STAT(mmc_rx_irq_n),
+ STMMAC_STAT(mmc_rx_csum_offload_irq_n),
+ STMMAC_STAT(irq_receive_pmt_irq_n),
+ STMMAC_STAT(irq_tx_path_in_lpi_mode_n),
+ STMMAC_STAT(irq_tx_path_exit_lpi_mode_n),
+ STMMAC_STAT(irq_rx_path_in_lpi_mode_n),
+ STMMAC_STAT(irq_rx_path_exit_lpi_mode_n),
+ STMMAC_STAT(phy_eee_wakeup_error_n),
};
#define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
@@ -366,6 +376,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
(*(u32 *)p);
}
}
+ if (priv->eee_enabled) {
+ int val = phy_get_eee_err(priv->phydev);
+ if (val)
+ priv->xstats.phy_eee_wakeup_error_n = val;
+ }
}
for (i = 0; i < STMMAC_STATS_LEN; i++) {
char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
@@ -464,6 +479,44 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
return 0;
}
+static int stmmac_ethtool_op_get_eee(struct net_device *dev,
+ struct ethtool_eee *edata)
+{
+ struct stmmac_priv *priv = netdev_priv(dev);
+
+ if (!priv->dma_cap.eee)
+ return -EOPNOTSUPP;
+
+ edata->eee_enabled = priv->eee_enabled;
+ edata->eee_active = priv->eee_active;
+ edata->tx_lpi_timer = priv->tx_lpi_timer;
+
+ return phy_ethtool_get_eee(priv->phydev, edata);
+}
+
+static int stmmac_ethtool_op_set_eee(struct net_device *dev,
+ struct ethtool_eee *edata)
+{
+ struct stmmac_priv *priv = netdev_priv(dev);
+
+ priv->eee_enabled = edata->eee_enabled;
+
+ if (!priv->eee_enabled)
+ stmmac_disable_eee_mode(priv);
+ else {
+ /* We are asking for enabling the EEE but it is safe
+ * to verify all by invoking the eee_init function.
+ * In case of failure it will return an error.
+ */
+ priv->tx_lpi_timer = edata->tx_lpi_timer;
+ priv->eee_enabled = stmmac_eee_init(priv);
+ if (!priv->eee_enabled)
+ return -EPERM;
+ }
+
+ return phy_ethtool_set_eee(priv->phydev, edata);
+}
+
static const struct ethtool_ops stmmac_ethtool_ops = {
.begin = stmmac_check_if_running,
.get_drvinfo = stmmac_ethtool_getdrvinfo,
@@ -480,6 +533,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
.get_strings = stmmac_get_strings,
.get_wol = stmmac_get_wol,
.set_wol = stmmac_set_wol,
+ .get_eee = stmmac_ethtool_op_get_eee,
+ .set_eee = stmmac_ethtool_op_set_eee,
.get_sset_count = stmmac_get_sset_count,
.get_ts_info = ethtool_op_get_ts_info,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e33abf5..05e1af7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -134,6 +134,12 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
NETIF_MSG_IFDOWN | NETIF_MSG_TIMER);
+#define STMMAC_DEFAULT_LPI_TIMER 1000
+static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
+module_param(eee_timer, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
+#define STMMAC_LPI_TIMER(x) (jiffies + msecs_to_jiffies(x))
+
static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
#ifdef CONFIG_STMMAC_DEBUG_FS
@@ -162,6 +168,8 @@ static void stmmac_verify_args(void)
flow_ctrl = FLOW_OFF;
if (unlikely((pause < 0) || (pause > 0xffff)))
pause = PAUSE_TIME;
+ if (eee_timer < 0)
+ eee_timer = STMMAC_DEFAULT_LPI_TIMER;
}
static void stmmac_clk_csr_set(struct stmmac_priv *priv)
@@ -230,6 +238,85 @@ static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
phydev->speed);
}
+static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
+{
+ /* Check and enter in LPI mode */
+ if ((priv->dirty_tx == priv->cur_tx) &&
+ (priv->tx_path_in_lpi_mode == false))
+ priv->hw->mac->set_eee_mode(priv->ioaddr);
+}
+
+void stmmac_disable_eee_mode(struct stmmac_priv *priv)
+{
+ /* Exit and disable EEE in case of we are are in LPI state. */
+ priv->hw->mac->reset_eee_mode(priv->ioaddr);
+ del_timer_sync(&priv->eee_ctrl_timer);
+ priv->tx_path_in_lpi_mode = false;
+}
+
+/**
+ * stmmac_eee_ctrl_timer
+ * @arg : data hook
+ * Description:
+ * If there is no data transfer and if we are not in LPI state,
+ * then MAC Transmitter can be moved to LPI state.
+ */
+static void stmmac_eee_ctrl_timer(unsigned long arg)
+{
+ struct stmmac_priv *priv = (struct stmmac_priv *)arg;
+
+ stmmac_enable_eee_mode(priv);
+ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(eee_timer));
+}
+
+/**
+ * stmmac_eee_init
+ * @priv: private device pointer
+ * Description:
+ * If the EEE support has been enabled while configuring the driver,
+ * if the GMAC actually supports the EEE (from the HW cap reg) and the
+ * phy can also manage EEE, so enable the LPI state and start the timer
+ * to verify if the tx path can enter in LPI state.
+ */
+bool stmmac_eee_init(struct stmmac_priv *priv)
+{
+ bool ret = false;
+
+ /* MAC core supports the EEE feature. */
+ if (priv->dma_cap.eee) {
+ /* Check if the PHY supports EEE */
+ if (phy_init_eee(priv->phydev, 1))
+ goto out;
+
+ priv->eee_active = 1;
+ init_timer(&priv->eee_ctrl_timer);
+ priv->eee_ctrl_timer.function = stmmac_eee_ctrl_timer;
+ priv->eee_ctrl_timer.data = (unsigned long)priv;
+ priv->eee_ctrl_timer.expires = STMMAC_LPI_TIMER(eee_timer);
+ add_timer(&priv->eee_ctrl_timer);
+
+ priv->hw->mac->set_eee_timer(priv->ioaddr,
+ STMMAC_DEFAULT_LIT_LS_TIMER,
+ priv->tx_lpi_timer);
+
+ pr_info("stmmac: Energy-Efficient Ethernet initialized\n");
+
+ ret = true;
+ }
+out:
+ return ret;
+}
+
+static void stmmac_eee_adjust(struct stmmac_priv *priv)
+{
+ /* When the EEE has been already initialised we have to
+ * modify the PLS bit in the LPI ctrl & status reg according
+ * to the PHY link status. For this reason.
+ */
+ if (priv->eee_enabled)
+ priv->hw->mac->set_eee_pls(priv->ioaddr, priv->phydev->link);
+}
+
/**
* stmmac_adjust_link
* @dev: net device structure
@@ -250,6 +337,7 @@ static void stmmac_adjust_link(struct net_device *dev)
phydev->addr, phydev->link);
spin_lock_irqsave(&priv->lock, flags);
+
if (phydev->link) {
u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
@@ -316,6 +404,8 @@ static void stmmac_adjust_link(struct net_device *dev)
if (new_state && netif_msg_link(priv))
phy_print_status(phydev);
+ stmmac_eee_adjust(priv);
+
spin_unlock_irqrestore(&priv->lock, flags);
DBG(probe, DEBUG, "stmmac_adjust_link: exiting\n");
@@ -333,7 +423,7 @@ static int stmmac_init_phy(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
struct phy_device *phydev;
- char phy_id[MII_BUS_ID_SIZE + 3];
+ char phy_id_fmt[MII_BUS_ID_SIZE + 3];
char bus_id[MII_BUS_ID_SIZE];
int interface = priv->plat->interface;
priv->oldlink = 0;
@@ -347,11 +437,12 @@ static int stmmac_init_phy(struct net_device *dev)
snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x",
priv->plat->bus_id);
- snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
+ snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
priv->plat->phy_addr);
- pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id);
+ pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id_fmt);
- phydev = phy_connect(dev, phy_id, &stmmac_adjust_link, 0, interface);
+ phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, 0,
+ interface);
if (IS_ERR(phydev)) {
pr_err("%s: Could not attach to PHY\n", dev->name);
@@ -690,6 +781,11 @@ static void stmmac_tx(struct stmmac_priv *priv)
}
netif_tx_unlock(priv->dev);
}
+
+ if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
+ stmmac_enable_eee_mode(priv);
+ mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(eee_timer));
+ }
spin_unlock(&priv->tx_lock);
}
@@ -1028,6 +1124,17 @@ static int stmmac_open(struct net_device *dev)
}
}
+ /* Request the IRQ lines */
+ if (priv->lpi_irq != -ENXIO) {
+ ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED,
+ dev->name, dev);
+ if (unlikely(ret < 0)) {
+ pr_err("%s: ERROR: allocating the LPI IRQ %d (%d)\n",
+ __func__, priv->lpi_irq, ret);
+ goto open_error_lpiirq;
+ }
+ }
+
/* Enable the MAC Rx/Tx */
stmmac_set_mac(priv->ioaddr, true);
@@ -1063,12 +1170,19 @@ static int stmmac_open(struct net_device *dev)
if (priv->phydev)
phy_start(priv->phydev);
+ priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS_TIMER;
+ priv->eee_enabled = stmmac_eee_init(priv);
+
napi_enable(&priv->napi);
skb_queue_head_init(&priv->rx_recycle);
netif_start_queue(dev);
return 0;
+open_error_lpiirq:
+ if (priv->wol_irq != dev->irq)
+ free_irq(priv->wol_irq, dev);
+
open_error_wolirq:
free_irq(dev->irq, dev);
@@ -1094,6 +1208,9 @@ static int stmmac_release(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ if (priv->eee_enabled)
+ del_timer_sync(&priv->eee_ctrl_timer);
+
/* Stop and disconnect the PHY */
if (priv->phydev) {
phy_stop(priv->phydev);
@@ -1116,6 +1233,8 @@ static int stmmac_release(struct net_device *dev)
free_irq(dev->irq, dev);
if (priv->wol_irq != dev->irq)
free_irq(priv->wol_irq, dev);
+ if (priv->lpi_irq != -ENXIO)
+ free_irq(priv->lpi_irq, dev);
/* Stop TX/RX DMA and clear the descriptors */
priv->hw->dma->stop_tx(priv->ioaddr);
@@ -1165,6 +1284,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
spin_lock(&priv->tx_lock);
+ if (priv->tx_path_in_lpi_mode)
+ stmmac_disable_eee_mode(priv);
+
entry = priv->cur_tx % txsize;
#ifdef STMMAC_XMIT_DEBUG
@@ -1541,10 +1663,37 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
return IRQ_NONE;
}
- if (priv->plat->has_gmac)
- /* To handle GMAC own interrupts */
- priv->hw->mac->host_irq_status((void __iomem *) dev->base_addr);
+ /* To handle GMAC own interrupts */
+ if (priv->plat->has_gmac) {
+ int status = priv->hw->mac->host_irq_status((void __iomem *)
+ dev->base_addr);
+ if (unlikely(status)) {
+ if (status & core_mmc_tx_irq)
+ priv->xstats.mmc_tx_irq_n++;
+ if (status & core_mmc_rx_irq)
+ priv->xstats.mmc_rx_irq_n++;
+ if (status & core_mmc_rx_csum_offload_irq)
+ priv->xstats.mmc_rx_csum_offload_irq_n++;
+ if (status & core_irq_receive_pmt_irq)
+ priv->xstats.irq_receive_pmt_irq_n++;
+
+ /* For LPI we need to save the tx status */
+ if (status & core_irq_tx_path_in_lpi_mode) {
+ priv->xstats.irq_tx_path_in_lpi_mode_n++;
+ priv->tx_path_in_lpi_mode = true;
+ }
+ if (status & core_irq_tx_path_exit_lpi_mode) {
+ priv->xstats.irq_tx_path_exit_lpi_mode_n++;
+ priv->tx_path_in_lpi_mode = false;
+ }
+ if (status & core_irq_rx_path_in_lpi_mode)
+ priv->xstats.irq_rx_path_in_lpi_mode_n++;
+ if (status & core_irq_rx_path_exit_lpi_mode)
+ priv->xstats.irq_rx_path_exit_lpi_mode_n++;
+ }
+ }
+ /* To handle DMA interrupts */
stmmac_dma_interrupt(priv);
return IRQ_HANDLED;
@@ -2153,6 +2302,9 @@ static int __init stmmac_cmdline_opt(char *str)
} else if (!strncmp(opt, "pause:", 6)) {
if (kstrtoul(opt + 6, 0, (unsigned long *)&pause))
goto err;
+ } else if (!strncmp(opt, "eee_timer:", 6)) {
+ if (kstrtoul(opt + 10, 0, (unsigned long *)&eee_timer))
+ goto err;
#ifdef CONFIG_STMMAC_TIMER
} else if (!strncmp(opt, "tmrate:", 7)) {
if (kstrtoul(opt + 7, 0, (unsigned long *)&tmrate))
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 20eb502..7d36163 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -156,6 +156,8 @@ static int stmmac_pltfr_probe(struct platform_device *pdev)
if (priv->wol_irq == -ENXIO)
priv->wol_irq = priv->dev->irq;
+ priv->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
+
platform_set_drvdata(pdev, priv->dev);
pr_debug("STMMAC platform driver registration completed");
--
1.7.4.4
^ permalink raw reply related
* [net-next.git 3/4] stmmac: update the driver Documentation and add EEE
From: Giuseppe CAVALLARO @ 2012-06-12 12:45 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, rayagond, davem, yuvalmin, Giuseppe Cavallaro
In-Reply-To: <1339505153-26731-1-git-send-email-peppe.cavallaro@st.com>
This patch updates the stmmac's documentation adding
some missing files in the section used to describe the
internal driver's structure.
Also the patch adds a new section to describe the EEE support.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
Documentation/networking/stmmac.txt | 36 +++++++++++++++++++++++++++++-----
1 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 5cb9a19..c676b9c 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -257,9 +257,11 @@ reset procedure etc).
o Makefile
o stmmac_main.c: main network device driver;
o stmmac_mdio.c: mdio functions;
+ o stmmac_pci: PCI driver;
+ o stmmac_platform.c: platform driver
o stmmac_ethtool.c: ethtool support;
o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
- Only tested on ST40 platforms based.
+ (only tested on ST40 platforms based);
o stmmac.h: private driver structure;
o common.h: common definitions and VFTs;
o descs.h: descriptor structure definitions;
@@ -269,9 +271,11 @@ reset procedure etc).
o dwmac100_core: MAC 100 core and dma code;
o dwmac100_dma.c: dma funtions for the MAC chip;
o dwmac1000.h: specific header file for the MAC;
- o dwmac_lib.c: generic DMA functions shared among chips
- o enh_desc.c: functions for handling enhanced descriptors
- o norm_desc.c: functions for handling normal descriptors
+ o dwmac_lib.c: generic DMA functions shared among chips;
+ o enh_desc.c: functions for handling enhanced descriptors;
+ o norm_desc.c: functions for handling normal descriptors;
+ o chain_mode.c/ring_mode.c:: functions to manage RING/CHAINED modes;
+ o mmc_core.c/mmc.h: Management MAC Counters;
5) Debug Information
@@ -304,7 +308,27 @@ All these are only useful during the developing stage
and should never enabled inside the code for general usage.
In fact, these can generate an huge amount of debug messages.
-6) TODO:
+6) Energy Efficient Ethernet
+
+Energy Efficient Ethernet(EEE) enables IEEE 802.3 MAC sublayer along
+with a family of Physical layer to operate in the Low power Idle(LPI)
+mode. The EEE mode supports the IEEE 802.3 MAC operation at 100Mbps,
+1000Mbps & 10Gbps.
+
+The LPI mode allows power saving by switching off parts of the
+communication device functionality when there is no data to be
+transmitted & received. The system on both the side of the link can
+disable some functionalities & save power during the period of low-link
+utilization. The MAC controls whether the system should enter or exit
+the LPI mode & communicate this to PHY.
+
+As soon as the interface is opened, the driver verifies if the EEE can
+be supported. This is done by looking at both the DMA HW capability
+register and the PHY devices MCD registers.
+To enter in Tx LPI mode the driver needs to have a software timer
+that enable and disable the LPI mode when there is nothing to be
+transmitted.
+
+7) TODO:
o XGMAC is not supported.
- o Add the EEE - Energy Efficient Ethernet
o Add the PTP - precision time protocol
--
1.7.4.4
^ permalink raw reply related
* Re: [PATCH] netpoll: Add support for hardware checksumming on egress
From: Eric Dumazet @ 2012-06-12 12:43 UTC (permalink / raw)
To: Bogdan Hamciuc; +Cc: davem, netdev
In-Reply-To: <1339496765-3093-2-git-send-email-bogdan.hamciuc@freescale.com>
On Tue, 2012-06-12 at 13:26 +0300, Bogdan Hamciuc wrote:
> Netpoll used to compute its own csum; but if the device supports, we
> should let it do the checksum itself.
>
> Signed-off-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
> ---
> net/core/netpoll.c | 14 ++++++++++----
> 1 files changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/net/core/netpoll.c b/net/core/netpoll.c
> index 9a08068..f5d00b4 100644
> --- a/net/core/netpoll.c
> +++ b/net/core/netpoll.c
> @@ -385,13 +385,19 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
> udph->source = htons(np->local_port);
> udph->dest = htons(np->remote_port);
> udph->len = htons(udp_len);
> - udph->check = 0;
> - udph->check = csum_tcpudp_magic(np->local_ip,
> +
> + /* Only querying the IPv4 csumming capabilities */
> + if (np->dev->features & NETIF_F_IP_CSUM)
> + skb->ip_summed = CHECKSUM_PARTIAL;
> + else {
> + skb->ip_summed = CHECKSUM_NONE;
> + udph->check = csum_tcpudp_magic(np->local_ip,
> np->remote_ip,
> udp_len, IPPROTO_UDP,
> csum_partial(udph, udp_len, 0));
> - if (udph->check == 0)
> - udph->check = CSUM_MANGLED_0;
> + if (udph->check == 0)
> + udph->check = CSUM_MANGLED_0;
> + }
>
> skb_push(skb, sizeof(*iph));
> skb_reset_network_header(skb);
Hi Bogdan
I cant see how this can possibly work ?
Which NIC was able to send a good UDP frame after this patch ?
^ permalink raw reply
* Re: [RFC PATCH net-next] hp100: delete VG/AnyLAN hp100
From: Pavel Machek @ 2012-06-12 12:36 UTC (permalink / raw)
To: Joe Perches
Cc: Paul Gortmaker, linux-kernel, JBottomley, David S. Miller, netdev
In-Reply-To: <1337292547.8872.20.camel@joe2Laptop>
Hi!
On Thu 2012-05-17 15:09:07, Joe Perches wrote:
> On Thu, 2012-05-17 at 17:20 -0400, Paul Gortmaker wrote:
> > [Re: [PATCH 2/5] drivers/net: delete all code/drivers depending on CONFIG_MCA
>
> If we're removing really old and unused stuff,
> how about the VG/AnyLAN driver too?
hp100... I believe it has PCI variants and I have that
somewhere. Please don't kill it.
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply
* Re: [PATCHv2 net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: Neil Horman @ 2012-06-12 12:32 UTC (permalink / raw)
To: David Miller, netdev
In-Reply-To: <20120612113115.GI28598@canuck.infradead.org>
On Tue, Jun 12, 2012 at 07:31:15AM -0400, Thomas Graf wrote:
> On Tue, Jun 12, 2012 at 07:14:44AM -0400, Neil Horman wrote:
> > Just out of curiosity, would it be more efficient to implement this by
> > optionally adding a prohibit route to the local table for 127.0.0.0/8 to every
> > interface that was brought up, based on weather or not that interfaces
> > route_localnet bool was true or not? It would save the additional checks in the
> > routing path I think. Not sure how much a savings that is, but I thought I
> > would ask.
>
> It's not that simple because we also use the local table for source
> address selection and local address verification. So we would have to
> exclude/include such routes conditionally based on some route lookup
> purpose indicator. Such a prohibit route would have to be valid only
> in the output context.
ah, understood, so that doesn't really save us anything, it just moves the point
at which we do the check.
Acked-by: Neil Horman <nhorman@tuxdriver.com>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply
* Re: [PATCH] netpoll: Fix skb tail pointer in netpoll_send_udp()
From: Eric Dumazet @ 2012-06-12 12:22 UTC (permalink / raw)
To: Bogdan Hamciuc; +Cc: davem, netdev
In-Reply-To: <1339496765-3093-1-git-send-email-bogdan.hamciuc@freescale.com>
On Tue, 2012-06-12 at 13:26 +0300, Bogdan Hamciuc wrote:
> As skb->tail wasn't updated after skb_copy_to_linear_data(), subsequent
> calls to skb_realloc_headroom() (as made by an ethernet driver's
> ndo_start_xmit routine) would only effectively copy the packet headers,
> leaving garbage in the payload.
>
> In the process, removed some unnecessary code.
>
> Signed-off-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
> ---
> net/core/netpoll.c | 8 ++++----
> 1 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/net/core/netpoll.c b/net/core/netpoll.c
> index 3d84fb9..9a08068 100644
> --- a/net/core/netpoll.c
> +++ b/net/core/netpoll.c
> @@ -362,22 +362,22 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev);
>
> void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
> {
> - int total_len, eth_len, ip_len, udp_len;
> + int total_len, ip_len, udp_len;
> struct sk_buff *skb;
> struct udphdr *udph;
> struct iphdr *iph;
> struct ethhdr *eth;
>
> udp_len = len + sizeof(*udph);
> - ip_len = eth_len = udp_len + sizeof(*iph);
> - total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
> + ip_len = udp_len + sizeof(*iph);
> + total_len = ip_len + ETH_HLEN + NET_IP_ALIGN;
>
> skb = find_skb(np, total_len, total_len - len);
> if (!skb)
> return;
>
> skb_copy_to_linear_data(skb, msg, len);
> - skb->len += len;
> + skb_put(skb, len);
>
> skb_push(skb, sizeof(*udph));
> skb_reset_transport_header(skb);
Hmm, real question is why skb_realloc_headroom() is even necessary...
I suspect we need to reserve more bytes.
total_len = ip_len + ETH_HLEN + NET_IP_ALIGN + NET_SKB_PAD;
or something like that ?
Which driver triggers the bug ?
^ permalink raw reply
* Hello
From: Mrs Anna Kennedy @ 2012-06-12 11:39 UTC (permalink / raw)
I have an urgent proposal for you kindly get to me asap.
^ permalink raw reply
* Re: [PATCH 2/5] ipv4: Kill ip_rt_frag_needed().
From: Steffen Klassert @ 2012-06-12 11:44 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20120611.160258.866525532025442350.davem@davemloft.net>
On Mon, Jun 11, 2012 at 04:02:58PM -0700, David Miller wrote:
>
> Here below is the kind of patch I was suggesting we make. I did a
> simple test to make sure the update MTU code path is taken in
> raw_err().
I can confirm that your patch restores the old behaviour of ping.
>
> But I'm having second thoughts about whether any of this is a good
> idea.
>
> UDP works by notifying userspace of PMTU events. And this is
> mandatory, if we're setting DF we have to get the user to decrease the
> size of it's datagram writes below the reported PMTU value.
>
> As a consequence I believe RAW sockets should also work via
> notifications.
>
> And therefore it can be argued that in neither case should we update
> the routing cache PMTU information.
Should be ok as long as all userspace applications that use UDP or
RAW sockets handle pmtu event notifications properly.
ping might be a special case, but now the behaviour of a big
sized ping (say 1400 byte on a network that has a router with
mtu 1300 along the path) with IP_PMTUDISC_WANT might depend on
whether the cached pmtu informations are updated by a recent
tcp connection.
If we had no tcp connection before, we see the behaviour that
I described in my first mail. All packets have the DF bit set.
If a tcp connection updated the cached pmtu informations recently,
the packets don't have the DF bit set. They are fragmented according
the cached pmtu informations instead.
Other applications that do not care for pmtu event notifications
might be in a similar situation. So perhaps we need the kind of
patch you are suggested.
^ permalink raw reply
* Re: [PATCHv2 net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: Thomas Graf @ 2012-06-12 11:31 UTC (permalink / raw)
To: Neil Horman; +Cc: David Miller, netdev
In-Reply-To: <20120612111444.GA15984@hmsreliant.think-freely.org>
On Tue, Jun 12, 2012 at 07:14:44AM -0400, Neil Horman wrote:
> Just out of curiosity, would it be more efficient to implement this by
> optionally adding a prohibit route to the local table for 127.0.0.0/8 to every
> interface that was brought up, based on weather or not that interfaces
> route_localnet bool was true or not? It would save the additional checks in the
> routing path I think. Not sure how much a savings that is, but I thought I
> would ask.
It's not that simple because we also use the local table for source
address selection and local address verification. So we would have to
exclude/include such routes conditionally based on some route lookup
purpose indicator. Such a prohibit route would have to be valid only
in the output context.
^ permalink raw reply
* Re: [PATCHv2 net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: Neil Horman @ 2012-06-12 11:14 UTC (permalink / raw)
To: David Miller, netdev
In-Reply-To: <20120612104401.GH28598@canuck.infradead.org>
On Tue, Jun 12, 2012 at 06:44:01AM -0400, Thomas Graf wrote:
> Routing of 127/8 is tradtionally forbidden, we consider
> packets from that address block martian when routing and do
> not process corresponding ARP requests.
>
> This is a sane default but renders a huge address space
> practically unuseable.
>
> The RFC states that no address within the 127/8 block should
> ever appear on any network anywhere but it does not forbid
> the use of such addresses outside of the loopback device in
> particular. For example to address a pool of virtual guests
> behind a load balancer.
>
> This patch adds a new interface option 'route_localnet'
> enabling routing of the 127/8 address block and processing
> of ARP requests on a specific interface.
>
> Note that for the feature to work, the default local route
> covering 127/8 dev lo needs to be removed.
>
> Example:
> $ sysctl -w net.ipv4.conf.eth0.route_localnet=1
> $ ip route del 127.0.0.0/8 dev lo table local
> $ ip addr add 127.1.0.1/16 dev eth0
> $ ip route flush cache
>
> V2: Fix invalid check to auto flush cache (thanks davem)
>
> Signed-off-by: Thomas Graf <tgraf@suug.ch>
Just out of curiosity, would it be more efficient to implement this by
optionally adding a prohibit route to the local table for 127.0.0.0/8 to every
interface that was brought up, based on weather or not that interfaces
route_localnet bool was true or not? It would save the additional checks in the
routing path I think. Not sure how much a savings that is, but I thought I
would ask.
Regards
Neil
^ permalink raw reply
* Re: net/netfilter/nf_conntrack_proto_tcp.c:1606:9: error: ‘struct nf_proto_net’ has no member named ‘user’
From: Gao feng @ 2012-06-12 11:03 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: David Miller, wfg, netdev
In-Reply-To: <20120612092940.GB30080@1984>
于 2012年06月12日 17:29, Pablo Neira Ayuso 写道:
>> nf_proto_net.users has different meaning when SYSCTL enabled or disabled.
>>
>> when SYSCTL enabled,it means if both tcpv4 and tcpv6 register the sysctl,
>> it is increased when register sysctl success and decreased when unregister sysctl.
>> we can regard it as the refcnt of ctl_table.
>>
>> when SYSCTL disabled,it just used to identify if the proto's pernet data
>> has been initialized.
>
> We have to use two different counters for this. The conditional
> meaning of that variable is really confusing.
>
Hi David & Pablo
Please have a look at this patch and tell me if it's OK.
it base on Pable's patch.
diff --git a/include/net/netfilter/nf_conntrack_tcp.h b/include/net/netfilter/nf_conntrack_tcp.h
index 8d16ebe..0945446 100644
--- a/include/net/netfilter/nf_conntrack_tcp.h
+++ b/include/net/netfilter/nf_conntrack_tcp.h
@@ -1,8 +1,16 @@
#ifndef _NF_CONNTRACK_TCP_H_
#define _NF_CONNTRACK_TCP_H_
-int nf_ct_tcp_kmemdup_sysctl_table(struct nf_proto_net *pn);
-int nf_ct_tcp_compat_kmemdup_sysctl_table(struct nf_proto_net *pn);
-void nf_ct_tcp_compat_kfree_sysctl_table(struct nf_proto_net *pn);
+#ifdef CONFIG_SYSCTL
+int nf_ct_tcpv4_init_sysctl(struct nf_proto_net *pn);
+int nf_ct_tcpv6_init_sysctl(struct nf_proto_net *pn);
+#else
+int nf_ct_tcpv4_init_sysctl(struct nf_proto_net *pn)
+{
+ pn->users++;
+ return 0;
+}
+#define nf_ct_tcpv6_init_sysctl nf_ct_tcpv4_init_sysctl
+#endif
#endif
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index cdf8b93..367153a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1368,12 +1368,11 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
static int tcpv4_init_net(struct net *net)
{
- int i;
- int ret = 0;
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_proto_net *pn = (struct nf_proto_net *)tn;
- if (!pn->users++) {
+ if (!pn->users) {
+ int i;
for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
tn->timeouts[i] = tcp_timeouts[i];
@@ -1382,25 +1381,16 @@ static int tcpv4_init_net(struct net *net)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- ret = nf_ct_tcp_compat_kmemdup_sysctl_table(pn);
- if (ret < 0)
- return ret;
-
- ret = nf_ct_tcp_kmemdup_sysctl_table(pn);
- if (ret < 0) {
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
- }
- return ret;
+ return nf_ct_tcpv4_init_sysctl(pn);
}
static int tcpv6_init_net(struct net *net)
{
- int i;
struct nf_tcp_net *tn = tcp_pernet(net);
struct nf_proto_net *pn = (struct nf_proto_net *)tn;
- if (!pn->users++) {
+ if (!pn->users) {
+ int i;
for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
tn->timeouts[i] = tcp_timeouts[i];
tn->tcp_loose = nf_ct_tcp_loose;
@@ -1408,7 +1398,7 @@ static int tcpv6_init_net(struct net *net)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- return nf_ct_tcp_kmemdup_sysctl_table(pn);
+ return nf_ct_tcpv6_init_sysctl(pn);
}
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
diff --git a/net/netfilter/nf_conntrack_proto_tcp_sysctl.c b/net/netfilter/nf_conntrack_proto_tcp_sysctl.c
index b9e027f..f038de4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp_sysctl.c
+++ b/net/netfilter/nf_conntrack_proto_tcp_sysctl.c
@@ -182,7 +182,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
};
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
-int nf_ct_tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int nf_ct_tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
{
struct nf_tcp_net *tn = (struct nf_tcp_net *)pn;
@@ -211,7 +211,7 @@ int nf_ct_tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-int nf_ct_tcp_compat_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int nf_ct_tcp_compat_kmemdup_sysctl_table(struct nf_proto_net *pn)
{
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
struct nf_tcp_net *tn = (struct nf_tcp_net *)pn;
@@ -245,3 +245,23 @@ void nf_ct_tcp_compat_kfree_sysctl_table(struct nf_proto_net *pn)
pn->ctl_compat_table = NULL;
#endif
}
+
+int nf_ct_tcpv4_init_sysctl(struct nf_proto_net *pn)
+{
+ int ret;
+
+ ret = nf_ct_tcp_compat_kmemdup_sysctl_table(pn);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_ct_tcp_kmemdup_sysctl_table(pn);
+ if (ret < 0)
+ nf_ct_tcp_compat_kfree_sysctl_table(pn);
+
+ return ret;
+}
+
+int nf_ct_tcpv6_init_sysctl(struct nf_proto_net *pn)
+{
+ return nf_ct_tcp_compat_kmemdup_sysctl_table(pn);
+}
^ permalink raw reply related
* Re: [patch] can: c_can: precedence error in c_can_chip_config()
From: Oliver Hartkopp @ 2012-06-12 11:01 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Dan Carpenter, Wolfgang Grandegger, AnilKumar Ch, David S. Miller,
Jiri Kosina, linux-can, netdev, kernel-janitors
In-Reply-To: <4FD70DD9.4010009@pengutronix.de>
On 12.06.2012 11:37, Marc Kleine-Budde wrote:
> On 06/11/2012 07:42 PM, Oliver Hartkopp wrote:
>> On 10.06.2012 19:52, Marc Kleine-Budde wrote:
>>
>>> On 06/09/2012 05:56 PM, Dan Carpenter wrote:
>>>> (CAN_CTRLMODE_LISTENONLY & CAN_CTRLMODE_LOOPBACK) is (0x02 & 0x01) which
>>>> is zero so the condition is never true. The intent here was to test
>>>> that both flags were set.
>>>>
>>>> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
>>>> ---
>>>> This is a static checker fix. I'm not super familiar with the c_can
>>>> code.
>>>
>>> Good catch. Applied to can-next.
>>>
>>> Marc
>>>
>>
>>
>> Shouldn't this fix go through the net-tree and stable instead of net-next?
>
> Can I add your Acked-by ... when adding to net?
Yes you can :-)
Oliver
^ permalink raw reply
* [PATCHv2 net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: Thomas Graf @ 2012-06-12 10:44 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20120611.165740.419299184892679723.davem@davemloft.net>
Routing of 127/8 is tradtionally forbidden, we consider
packets from that address block martian when routing and do
not process corresponding ARP requests.
This is a sane default but renders a huge address space
practically unuseable.
The RFC states that no address within the 127/8 block should
ever appear on any network anywhere but it does not forbid
the use of such addresses outside of the loopback device in
particular. For example to address a pool of virtual guests
behind a load balancer.
This patch adds a new interface option 'route_localnet'
enabling routing of the 127/8 address block and processing
of ARP requests on a specific interface.
Note that for the feature to work, the default local route
covering 127/8 dev lo needs to be removed.
Example:
$ sysctl -w net.ipv4.conf.eth0.route_localnet=1
$ ip route del 127.0.0.0/8 dev lo table local
$ ip addr add 127.1.0.1/16 dev eth0
$ ip route flush cache
V2: Fix invalid check to auto flush cache (thanks davem)
Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
Documentation/networking/ip-sysctl.txt | 5 +++++
include/linux/inetdevice.h | 2 ++
net/ipv4/arp.c | 3 ++-
net/ipv4/devinet.c | 5 ++++-
net/ipv4/route.c | 30 +++++++++++++++++++++---------
5 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 6f896b9..99d0e05 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -862,6 +862,11 @@ accept_local - BOOLEAN
local interfaces over the wire and have them accepted properly.
default FALSE
+route_localnet - BOOLEAN
+ Do not consider loopback addresses as martian source or destination
+ while routing. This enables the use of 127/8 for local routing purposes.
+ default FALSE
+
rp_filter - INTEGER
0 - No source validation.
1 - Strict mode as defined in RFC3704 Strict Reverse Path
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 597f4a9..67f9dda 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -38,6 +38,7 @@ enum
IPV4_DEVCONF_ACCEPT_LOCAL,
IPV4_DEVCONF_SRC_VMARK,
IPV4_DEVCONF_PROXY_ARP_PVLAN,
+ IPV4_DEVCONF_ROUTE_LOCALNET,
__IPV4_DEVCONF_MAX
};
@@ -131,6 +132,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
#define IN_DEV_PROMOTE_SECONDARIES(in_dev) \
IN_DEV_ORCONF((in_dev), \
PROMOTE_SECONDARIES)
+#define IN_DEV_ROUTE_LOCALNET(in_dev) IN_DEV_ORCONF(in_dev, ROUTE_LOCALNET)
#define IN_DEV_RX_REDIRECTS(in_dev) \
((IN_DEV_FORWARD(in_dev) && \
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cda37be..2e560f0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -790,7 +790,8 @@ static int arp_process(struct sk_buff *skb)
* Check for bad requests for 127.x.x.x and requests for multicast
* addresses. If this is one such, delete it.
*/
- if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+ if (ipv4_is_multicast(tip) ||
+ (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
goto out;
/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 10e15a1..44bf82e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1500,7 +1500,8 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
if (cnf == net->ipv4.devconf_dflt)
devinet_copy_dflt_conf(net, i);
- if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
+ if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
+ i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
if ((new_value == 0) && (old_value != 0))
rt_cache_flush(net, 0);
}
@@ -1617,6 +1618,8 @@ static struct devinet_sysctl_table {
"force_igmp_version"),
DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
"promote_secondaries"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
+ "route_localnet"),
},
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 842510d..655506a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1960,9 +1960,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return -EINVAL;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
+ skb->protocol != htons(ETH_P_IP))
goto e_inval;
+ if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
+ if (ipv4_is_loopback(saddr))
+ goto e_inval;
+
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr))
goto e_inval;
@@ -2203,8 +2207,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
by fib_lookup.
*/
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- ipv4_is_loopback(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
@@ -2216,9 +2219,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_zeronet(saddr))
goto martian_source;
- if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
+ if (ipv4_is_zeronet(daddr))
goto martian_destination;
+ if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
+ if (ipv4_is_loopback(daddr))
+ goto martian_destination;
+
+ if (ipv4_is_loopback(saddr))
+ goto martian_source;
+ }
+
/*
* Now we are ready to route packet.
*/
@@ -2457,9 +2468,14 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
u16 type = res->type;
struct rtable *rth;
- if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ in_dev = __in_dev_get_rcu(dev_out);
+ if (!in_dev)
return ERR_PTR(-EINVAL);
+ if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
+ if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ return ERR_PTR(-EINVAL);
+
if (ipv4_is_lbcast(fl4->daddr))
type = RTN_BROADCAST;
else if (ipv4_is_multicast(fl4->daddr))
@@ -2470,10 +2486,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
- in_dev = __in_dev_get_rcu(dev_out);
- if (!in_dev)
- return ERR_PTR(-EINVAL);
-
if (type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST | RTCF_LOCAL;
fi = NULL;
^ permalink raw reply related
* [PATCH] netpoll: Add support for hardware checksumming on egress
From: Bogdan Hamciuc @ 2012-06-12 10:26 UTC (permalink / raw)
To: davem; +Cc: netdev, Bogdan Hamciuc
In-Reply-To: <1339496765-3093-1-git-send-email-bogdan.hamciuc@freescale.com>
Netpoll used to compute its own csum; but if the device supports, we
should let it do the checksum itself.
Signed-off-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
---
net/core/netpoll.c | 14 ++++++++++----
1 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9a08068..f5d00b4 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -385,13 +385,19 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
udph->source = htons(np->local_port);
udph->dest = htons(np->remote_port);
udph->len = htons(udp_len);
- udph->check = 0;
- udph->check = csum_tcpudp_magic(np->local_ip,
+
+ /* Only querying the IPv4 csumming capabilities */
+ if (np->dev->features & NETIF_F_IP_CSUM)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ else {
+ skb->ip_summed = CHECKSUM_NONE;
+ udph->check = csum_tcpudp_magic(np->local_ip,
np->remote_ip,
udp_len, IPPROTO_UDP,
csum_partial(udph, udp_len, 0));
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ }
skb_push(skb, sizeof(*iph));
skb_reset_network_header(skb);
--
1.5.6.3
^ permalink raw reply related
* [PATCH] netpoll: Fix skb tail pointer in netpoll_send_udp()
From: Bogdan Hamciuc @ 2012-06-12 10:26 UTC (permalink / raw)
To: davem; +Cc: netdev, Bogdan Hamciuc
As skb->tail wasn't updated after skb_copy_to_linear_data(), subsequent
calls to skb_realloc_headroom() (as made by an ethernet driver's
ndo_start_xmit routine) would only effectively copy the packet headers,
leaving garbage in the payload.
In the process, removed some unnecessary code.
Signed-off-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
---
net/core/netpoll.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3d84fb9..9a08068 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -362,22 +362,22 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
- int total_len, eth_len, ip_len, udp_len;
+ int total_len, ip_len, udp_len;
struct sk_buff *skb;
struct udphdr *udph;
struct iphdr *iph;
struct ethhdr *eth;
udp_len = len + sizeof(*udph);
- ip_len = eth_len = udp_len + sizeof(*iph);
- total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
+ ip_len = udp_len + sizeof(*iph);
+ total_len = ip_len + ETH_HLEN + NET_IP_ALIGN;
skb = find_skb(np, total_len, total_len - len);
if (!skb)
return;
skb_copy_to_linear_data(skb, msg, len);
- skb->len += len;
+ skb_put(skb, len);
skb_push(skb, sizeof(*udph));
skb_reset_transport_header(skb);
--
1.5.6.3
^ permalink raw reply related
* Re: [PATCH] usbnet: Activate the halt interrupt endpoint to fix endless "XactErr" error
From: Huajun Li @ 2012-06-12 10:09 UTC (permalink / raw)
To: Bjørn Mork
Cc: David Miller, Ming Lei, Alan Stern,
linux-usb-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <87sje1podo.fsf-lbf33ChDnrE/G1V5fR+Y7Q@public.gmane.org>
On Tue, Jun 12, 2012 at 4:47 AM, Bjørn Mork <bjorn-yOkvZcmFvRU@public.gmane.org> wrote:
> Huajun Li <huajun.li.lee-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>
>> diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
>> index 76f4396..c0bcb61 100644
>> --- a/include/linux/usb/usbnet.h
>> +++ b/include/linux/usb/usbnet.h
>> @@ -62,13 +62,14 @@ struct usbnet {
>> unsigned long flags;
>> # define EVENT_TX_HALT 0
>> # define EVENT_RX_HALT 1
>> -# define EVENT_RX_MEMORY 2
>> -# define EVENT_STS_SPLIT 3
>> -# define EVENT_LINK_RESET 4
>> -# define EVENT_RX_PAUSED 5
>> -# define EVENT_DEV_WAKING 6
>> -# define EVENT_DEV_ASLEEP 7
>> -# define EVENT_DEV_OPEN 8
>> +# define EVENT_STS_HALT 2
>> +# define EVENT_RX_MEMORY 3
>> +# define EVENT_STS_SPLIT 4
>> +# define EVENT_LINK_RESET 5
>> +# define EVENT_RX_PAUSED 6
>> +# define EVENT_DEV_WAKING 7
>> +# define EVENT_DEV_ASLEEP 8
>> +# define EVENT_DEV_OPEN 9
>> };
>
> Why do you renumber all of these instead of adding the new
> EVENT_STS_HALT to the end of the list?
>
Thanks for your comments.
I think it's nice to sort these mask codes by their purposes.
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [patch] can: c_can: precedence error in c_can_chip_config()
From: Marc Kleine-Budde @ 2012-06-12 9:37 UTC (permalink / raw)
To: Oliver Hartkopp
Cc: Dan Carpenter, Wolfgang Grandegger, AnilKumar Ch, David S. Miller,
Jiri Kosina, linux-can, netdev, kernel-janitors
In-Reply-To: <4FD62E21.3020209@hartkopp.net>
[-- Attachment #1: Type: text/plain, Size: 973 bytes --]
On 06/11/2012 07:42 PM, Oliver Hartkopp wrote:
> On 10.06.2012 19:52, Marc Kleine-Budde wrote:
>
>> On 06/09/2012 05:56 PM, Dan Carpenter wrote:
>>> (CAN_CTRLMODE_LISTENONLY & CAN_CTRLMODE_LOOPBACK) is (0x02 & 0x01) which
>>> is zero so the condition is never true. The intent here was to test
>>> that both flags were set.
>>>
>>> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
>>> ---
>>> This is a static checker fix. I'm not super familiar with the c_can
>>> code.
>>
>> Good catch. Applied to can-next.
>>
>> Marc
>>
>
>
> Shouldn't this fix go through the net-tree and stable instead of net-next?
Can I add your Acked-by ... when adding to net?
Marc
--
Pengutronix e.K. | Marc Kleine-Budde |
Industrial Linux Solutions | Phone: +49-231-2826-924 |
Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]
^ permalink raw reply
* Re: net/netfilter/nf_conntrack_proto_tcp.c:1606:9: error: ‘struct nf_proto_net’ has no member named ‘user’
From: Pablo Neira Ayuso @ 2012-06-12 9:29 UTC (permalink / raw)
To: Gao feng; +Cc: David Miller, wfg, netdev
In-Reply-To: <4FD69F5E.3060900@cn.fujitsu.com>
On Tue, Jun 12, 2012 at 09:46:06AM +0800, Gao feng wrote:
> 于 2012年06月12日 08:26, Pablo Neira Ayuso 写道:
> > Hi again David,
> >
> > On Mon, Jun 11, 2012 at 03:23:44PM -0700, David Miller wrote:
> >> From: Pablo Neira Ayuso <pablo@netfilter.org>
> >> Date: Tue, 12 Jun 2012 00:15:21 +0200
> >>
> >>> Could you please apply the following patch to net-next to resolve
> >>> this? Thanks.
> >>
> >> Applied, but you have to be kidding me with those ifdefs.
> >>
> >> This is exactly the same kind of thing Gao suggested for
> >> the inetpeer code recently and which I flat out rejected.
> >>
> >> You can't pepper foo.c files with ifdefs all over the place.
> >
> > Would you be OK if I send you patches to move all sysctl part of
> > nf_conntrack_proto_*.c to nf_conntrack_proto_*_sysctl.c
> >
> > I can also do the same for nf_conntrack_proto.c.
> >
> > This means more files under the net/netfilter directory, but less
> > ifdef kludges in the code.
> >
> > Please, have a look at the patch enclosed to this email in case you
> > want to see how it would look like in the end with my proposal.
>
> I am sorry for all the trouble aroused by my negligence.
>
> > static int tcpv4_init_net(struct net *net)
> > {
> > int i;
> > @@ -1600,11 +1373,7 @@ static int tcpv4_init_net(struct net *net)
> > struct nf_tcp_net *tn = tcp_pernet(net);
> > struct nf_proto_net *pn = (struct nf_proto_net *)tn;
> >
> > -#ifdef CONFIG_SYSCTL
> > - if (!pn->ctl_table) {
> > -#else
> > if (!pn->users++) {
>
> nf_proto_net.users has different meaning when SYSCTL enabled or disabled.
>
> when SYSCTL enabled,it means if both tcpv4 and tcpv6 register the sysctl,
> it is increased when register sysctl success and decreased when unregister sysctl.
> we can regard it as the refcnt of ctl_table.
>
> when SYSCTL disabled,it just used to identify if the proto's pernet data
> has been initialized.
We have to use two different counters for this. The conditional
meaning of that variable is really confusing.
^ permalink raw reply
* Re: Difficulties to get 1Gbps on be2net ethernet card
From: Jean-Michel Hautbois @ 2012-06-12 9:10 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Sathya.Perla, netdev
In-Reply-To: <1339491979.22704.23.camel@edumazet-glaptop>
2012/6/12 Eric Dumazet <eric.dumazet@gmail.com>:
> On Tue, 2012-06-12 at 11:01 +0200, Jean-Michel Hautbois wrote:
>
>> Can I do that using netperf ?
>
>
> Sure, you could use netperf -t UDP_RR
>
>
It sends, but no change on TX...
^ permalink raw reply
* Re: Possible deadlock in ipv6?
From: Eric Dumazet @ 2012-06-12 9:09 UTC (permalink / raw)
To: David Miller; +Cc: vdavydov, netdev
In-Reply-To: <20120611.235453.953830769326224643.davem@davemloft.net>
On Mon, 2012-06-11 at 23:54 -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Wed, 06 Jun 2012 17:58:34 +0200
>
> > And it seems this neigh_down() can be removed, its called later
> > (after dev->ip6_ptr is cleared)
>
> It is unclear whether we need to do the the neigh_down() in both
> the 'how' and '!how' cases. If so then we can't make this change.
>
Hmm...
Is it expected we send traffic on device dismantle ?
If no, we could do :
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d81d026..16e0ddb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -681,8 +681,6 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
while ((n = *np) != NULL) {
if (!dev || n->dev == dev) {
*np = n->next;
- if (tbl->pdestructor)
- tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
release_net(pneigh_net(n));
^ permalink raw reply related
* Re: Difficulties to get 1Gbps on be2net ethernet card
From: Eric Dumazet @ 2012-06-12 9:06 UTC (permalink / raw)
To: Jean-Michel Hautbois; +Cc: Sathya.Perla, netdev
In-Reply-To: <CAL8zT=hKmnAG7kZE18noFNEksibvJpR_6Q9z59nGLhqbs5oxuw@mail.gmail.com>
On Tue, 2012-06-12 at 11:01 +0200, Jean-Michel Hautbois wrote:
> Can I do that using netperf ?
Sure, you could use netperf -t UDP_RR
^ permalink raw reply
* Re: Difficulties to get 1Gbps on be2net ethernet card
From: Jean-Michel Hautbois @ 2012-06-12 9:01 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Sathya.Perla, netdev
In-Reply-To: <1339491345.22704.22.camel@edumazet-glaptop>
2012/6/12 Eric Dumazet <eric.dumazet@gmail.com>:
> On Tue, 2012-06-12 at 10:24 +0200, Jean-Michel Hautbois wrote:
>> 2012/6/8 Jean-Michel Hautbois <jhautbois@gmail.com>:
>> > 2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
>> >> On Fri, 2012-06-08 at 10:14 +0200, Jean-Michel Hautbois wrote:
>> >>> 2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
>> >>> > On Thu, 2012-06-07 at 14:54 +0200, Jean-Michel Hautbois wrote:
>> >>> >
>> >>> >> eth1 Link encap:Ethernet HWaddr 68:b5:99:b9:8d:d4
>> >>> >> UP BROADCAST RUNNING SLAVE MULTICAST MTU:4096 Metric:1
>> >>> >> RX packets:0 errors:0 dropped:0 overruns:0 frame:0
>> >>> >> TX packets:15215387 errors:0 dropped:0 overruns:0 carrier:0
>> >>> >> collisions:0 txqueuelen:1000
>> >>> >> RX bytes:0 (0.0 B) TX bytes:61476524359 (57.2 GiB)
>> >>> >
>> >>> >> qdisc mq 0: dev eth1 root
>> >>> >> Sent 61476524359 bytes 15215387 pkt (dropped 45683472, overlimits 0
>> >>> >> requeues 17480)
>> >>> >
>> >>> > OK, and "tc -s -d cl show dev eth1"
>> >>> >
>> >>> > (How many queues are really used)
>> >>> >
>> >>> >
>> >>> >
>> >>>
>> >>> tc -s -d cl show dev eth1
>> >>> class mq :1 root
>> >>> Sent 9798071746 bytes 2425410 pkt (dropped 3442405, overlimits 0 requeues 2747)
>> >>> backlog 0b 0p requeues 2747
>> >>> class mq :2 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>> class mq :3 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>> class mq :4 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>> class mq :5 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>> class mq :6 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>> class mq :7 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>> class mq :8 root
>> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>> >>> backlog 0b 0p requeues 0
>> >>
>> >>
>> >> Do you have the same distribution on old kernels as well ?
>> >> (ie only queue 0 is used)
>> >>
>> >>
>> >>
>> >
>> > On the old kernel, there is nothing returned by this command.
>> >
>> > JM
>>
>> I used perf in order to get more information.
>
> What happens if you force some traffic in the other way (say 50.000
> (small) packets per second in RX) while doing your tests ?
>
Can I do that using netperf ?
^ permalink raw reply
* Re: Difficulties to get 1Gbps on be2net ethernet card
From: Eric Dumazet @ 2012-06-12 8:55 UTC (permalink / raw)
To: Jean-Michel Hautbois; +Cc: Sathya.Perla, netdev
In-Reply-To: <CAL8zT=gNtcdyyVcPt5hB6jyF1btzQArEuZgVKWkb0Wd=a4LcVA@mail.gmail.com>
On Tue, 2012-06-12 at 10:24 +0200, Jean-Michel Hautbois wrote:
> 2012/6/8 Jean-Michel Hautbois <jhautbois@gmail.com>:
> > 2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
> >> On Fri, 2012-06-08 at 10:14 +0200, Jean-Michel Hautbois wrote:
> >>> 2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
> >>> > On Thu, 2012-06-07 at 14:54 +0200, Jean-Michel Hautbois wrote:
> >>> >
> >>> >> eth1 Link encap:Ethernet HWaddr 68:b5:99:b9:8d:d4
> >>> >> UP BROADCAST RUNNING SLAVE MULTICAST MTU:4096 Metric:1
> >>> >> RX packets:0 errors:0 dropped:0 overruns:0 frame:0
> >>> >> TX packets:15215387 errors:0 dropped:0 overruns:0 carrier:0
> >>> >> collisions:0 txqueuelen:1000
> >>> >> RX bytes:0 (0.0 B) TX bytes:61476524359 (57.2 GiB)
> >>> >
> >>> >> qdisc mq 0: dev eth1 root
> >>> >> Sent 61476524359 bytes 15215387 pkt (dropped 45683472, overlimits 0
> >>> >> requeues 17480)
> >>> >
> >>> > OK, and "tc -s -d cl show dev eth1"
> >>> >
> >>> > (How many queues are really used)
> >>> >
> >>> >
> >>> >
> >>>
> >>> tc -s -d cl show dev eth1
> >>> class mq :1 root
> >>> Sent 9798071746 bytes 2425410 pkt (dropped 3442405, overlimits 0 requeues 2747)
> >>> backlog 0b 0p requeues 2747
> >>> class mq :2 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>> class mq :3 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>> class mq :4 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>> class mq :5 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>> class mq :6 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>> class mq :7 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>> class mq :8 root
> >>> Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
> >>> backlog 0b 0p requeues 0
> >>
> >>
> >> Do you have the same distribution on old kernels as well ?
> >> (ie only queue 0 is used)
> >>
> >>
> >>
> >
> > On the old kernel, there is nothing returned by this command.
> >
> > JM
>
> I used perf in order to get more information.
What happens if you force some traffic in the other way (say 50.000
(small) packets per second in RX) while doing your tests ?
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox