* [PATCH 14/14] net: dsa: Provide additional RMON statistics
From: Guenter Roeck @ 2014-10-23 4:03 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>
Display sw_in_discards, sw_in_filtered, and sw_out_filtered for chips
supported by mv88e6123_61_65 and mv88e6352 drivers.
The variables are provided in port registers, not the normal status registers.
Mark by adding 0x100 to the register offset and add special handling code
to mv88e6xxx_get_ethtool_stats.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
drivers/net/dsa/mv88e6123_61_65.c | 3 +++
drivers/net/dsa/mv88e6352.c | 3 +++
drivers/net/dsa/mv88e6xxx.c | 26 +++++++++++++++++++++-----
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c
index a3aeba4..18c026f 100644
--- a/drivers/net/dsa/mv88e6123_61_65.c
+++ b/drivers/net/dsa/mv88e6123_61_65.c
@@ -430,6 +430,9 @@ static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
{ "hist_256_511bytes", 4, 0x0b, },
{ "hist_512_1023bytes", 4, 0x0c, },
{ "hist_1024_max_bytes", 4, 0x0d, },
+ { "sw_in_discards", 4, 0x110, },
+ { "sw_in_filtered", 2, 0x112, },
+ { "sw_out_filtered", 2, 0x113, },
};
static void
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 2f31e28..85dabd9 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -536,6 +536,9 @@ static struct mv88e6xxx_hw_stat mv88e6352_hw_stats[] = {
{ "hist_256_511bytes", 4, 0x0b, },
{ "hist_512_1023bytes", 4, 0x0c, },
{ "hist_1024_max_bytes", 4, 0x0d, },
+ { "sw_in_discards", 4, 0x110, },
+ { "sw_in_filtered", 2, 0x112, },
+ { "sw_out_filtered", 2, 0x113, },
};
static int mv88e6352_get_eeprom_len(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index c071fde..da558d8 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -485,17 +485,33 @@ void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
for (i = 0; i < nr_stats; i++) {
struct mv88e6xxx_hw_stat *s = stats + i;
u32 low;
- u32 high;
-
+ u32 high = 0;
+
+ if (s->reg >= 0x100) {
+ int ret;
+
+ ret = mv88e6xxx_reg_read(ds, REG_PORT(port),
+ s->reg - 0x100);
+ if (ret < 0)
+ goto error;
+ low = ret;
+ if (s->sizeof_stat == 4) {
+ ret = mv88e6xxx_reg_read(ds, REG_PORT(port),
+ s->reg - 0x100 + 1);
+ if (ret < 0)
+ goto error;
+ high = ret;
+ }
+ data[i] = (((u64)high) << 16) | low;
+ continue;
+ }
mv88e6xxx_stats_read(ds, s->reg, &low);
if (s->sizeof_stat == 8)
mv88e6xxx_stats_read(ds, s->reg + 1, &high);
- else
- high = 0;
data[i] = (((u64)high) << 32) | low;
}
-
+error:
mutex_unlock(&ps->stats_mutex);
}
--
1.9.1
^ permalink raw reply related
* [PATCH 12/14] net: dsa/mv88e6123_61_65: Add support for reading switch registers
From: Guenter Roeck @ 2014-10-23 4:03 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>
The infrastructure can now report switch registers to ethtool.
Add support for it to the mv88e6123_61_65 driver.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
drivers/net/dsa/mv88e6123_61_65.c | 2 ++
drivers/net/dsa/mv88e6xxx.c | 24 ++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx.h | 3 +++
3 files changed, 29 insertions(+)
diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c
index 17dc60e..a3aeba4 100644
--- a/drivers/net/dsa/mv88e6123_61_65.c
+++ b/drivers/net/dsa/mv88e6123_61_65.c
@@ -465,6 +465,8 @@ struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
.get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats,
.get_sset_count = mv88e6123_61_65_get_sset_count,
.get_temp = mv88e6123_61_65_get_temp,
+ .get_regs_len = mv88e6xxx_get_regs_len,
+ .get_regs = mv88e6xxx_get_regs,
};
MODULE_ALIAS("platform:mv88e6123");
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 8e1090b..c071fde 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -499,6 +499,30 @@ void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
mutex_unlock(&ps->stats_mutex);
}
+int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
+{
+ return 32 * sizeof(u16);
+}
+
+void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
+ struct ethtool_regs *regs, void *_p)
+{
+ u16 *p = _p;
+ int i;
+
+ regs->version = 0;
+
+ memset(p, 0xff, 32 * sizeof(u16));
+
+ for (i = 0; i < 32; i++) {
+ int ret;
+
+ ret = mv88e6xxx_reg_read(ds, REG_PORT(port), i);
+ if (ret >= 0)
+ p[i] = ret;
+ }
+}
+
static int __init mv88e6xxx_init(void)
{
#if IS_ENABLED(CONFIG_NET_DSA_MV88E6131)
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index d4d53ae..8c75702 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -78,6 +78,9 @@ void mv88e6xxx_get_strings(struct dsa_switch *ds,
void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
int nr_stats, struct mv88e6xxx_hw_stat *stats,
int port, uint64_t *data);
+int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port);
+void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
+ struct ethtool_regs *regs, void *_p);
extern struct dsa_switch_driver mv88e6131_switch_driver;
extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
--
1.9.1
^ permalink raw reply related
* [PATCH 11/14] net: dsa: Add support for reading switch registers with ethtool
From: Guenter Roeck @ 2014-10-23 4:03 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>
Add support for reading switch registers with 'ethtool -d'.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
include/net/dsa.h | 7 +++++++
net/dsa/slave.c | 22 ++++++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 73146b7..edc5e71 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -255,6 +255,13 @@ struct dsa_switch_driver {
struct ethtool_eeprom *eeprom, u8 *data);
int (*set_eeprom)(struct dsa_switch *ds,
struct ethtool_eeprom *eeprom, u8 *data);
+
+ /*
+ * Register access.
+ */
+ int (*get_regs_len)(struct dsa_switch *ds, int port);
+ void (*get_regs)(struct dsa_switch *ds, int port,
+ struct ethtool_regs *regs, void *p);
};
void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a54ee43..e988d07 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -249,6 +249,26 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
+static int dsa_slave_get_regs_len(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_regs_len != NULL)
+ return ds->drv->get_regs_len(ds, p->port);
+
+ return -EOPNOTSUPP;
+}
+
+static void
+dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ ds->drv->get_regs(ds, p->port, regs, _p);
+}
+
static int dsa_slave_nway_reset(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -420,6 +440,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
.get_drvinfo = dsa_slave_get_drvinfo,
+ .get_regs_len = dsa_slave_get_regs_len,
+ .get_regs = dsa_slave_get_regs,
.nway_reset = dsa_slave_nway_reset,
.get_link = dsa_slave_get_link,
.get_eeprom_len = dsa_slave_get_eeprom_len,
--
1.9.1
^ permalink raw reply related
* [PATCH 10/14] net: dsa/mv88e6352: Implement EEPROM access functions
From: Guenter Roeck @ 2014-10-23 4:03 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>
MV88E6352 supports read and write access to its configuration eeprom.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
drivers/net/dsa/mv88e6352.c | 228 +++++++++++++++++++++++++++++++++++++++++++-
drivers/net/dsa/mv88e6xxx.h | 5 +
2 files changed, 230 insertions(+), 3 deletions(-)
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index aff6695..9dddcba 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -22,18 +22,18 @@
#include <net/dsa.h>
#include "mv88e6xxx.h"
-static int mv88e6352_phy_wait(struct dsa_switch *ds)
+static int mv88e6352_wait(struct dsa_switch *ds, int reg, u16 mask)
{
unsigned long timeout = jiffies + HZ / 10;
while (time_before(jiffies, timeout)) {
int ret;
- ret = REG_READ(REG_GLOBAL2, 0x18);
+ ret = REG_READ(REG_GLOBAL2, reg);
if (ret < 0)
return ret;
- if (!(ret & 0x8000))
+ if (!(ret & mask))
return 0;
usleep_range(1000, 2000);
@@ -41,6 +41,21 @@ static int mv88e6352_phy_wait(struct dsa_switch *ds)
return -ETIMEDOUT;
}
+static inline int mv88e6352_phy_wait(struct dsa_switch *ds)
+{
+ return mv88e6352_wait(ds, 0x18, 0x8000);
+}
+
+static inline int mv88e6352_eeprom_load_wait(struct dsa_switch *ds)
+{
+ return mv88e6352_wait(ds, 0x14, 0x0800);
+}
+
+static inline int mv88e6352_eeprom_busy_wait(struct dsa_switch *ds)
+{
+ return mv88e6352_wait(ds, 0x14, 0x8000);
+}
+
static int __mv88e6352_phy_read(struct dsa_switch *ds, int addr, int regnum)
{
int ret;
@@ -426,6 +441,7 @@ static int mv88e6352_setup(struct dsa_switch *ds)
mutex_init(&ps->smi_mutex);
mutex_init(&ps->stats_mutex);
mutex_init(&ps->phy_mutex);
+ mutex_init(&ps->eeprom_mutex);
ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
@@ -522,6 +538,209 @@ static struct mv88e6xxx_hw_stat mv88e6352_hw_stats[] = {
{ "hist_1024_max_bytes", 4, 0x0d, },
};
+static int mv88e6352_get_eeprom_len(struct dsa_switch *ds)
+{
+ return 0x200;
+}
+
+static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->eeprom_mutex);
+
+ ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, 0x14,
+ 0xc000 | (addr & 0xff));
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6352_eeprom_busy_wait(ds);
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2, 0x15);
+error:
+ mutex_unlock(&ps->eeprom_mutex);
+ return ret;
+}
+
+static int mv88e6352_get_eeprom(struct dsa_switch *ds,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ int offset;
+ int len;
+ int ret;
+
+ offset = eeprom->offset;
+ len = eeprom->len;
+ eeprom->len = 0;
+
+ eeprom->magic = 0xc3ec4951;
+
+ ret = mv88e6352_eeprom_load_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ if (offset & 1) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ *data++ = (word >> 8) & 0xff;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ while (len >= 2) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ *data++ = word & 0xff;
+ *data++ = (word >> 8) & 0xff;
+
+ offset += 2;
+ len -= 2;
+ eeprom->len += 2;
+ }
+
+ if (len) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ *data++ = word & 0xff;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ return 0;
+}
+
+static int mv88e6352_eeprom_is_readonly(struct dsa_switch *ds)
+{
+ int ret;
+
+ ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2, 0x14);
+ if (ret < 0)
+ return ret;
+
+ if (!(ret & 0x0400))
+ return -EROFS;
+
+ return 0;
+}
+
+static int mv88e6352_write_eeprom_word(struct dsa_switch *ds, int addr,
+ u16 data)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->eeprom_mutex);
+
+ ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, 0x15, data);
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, 0x14,
+ 0xb000 | (addr & 0xff));
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6352_eeprom_busy_wait(ds);
+error:
+ mutex_unlock(&ps->eeprom_mutex);
+ return ret;
+}
+
+static int mv88e6352_set_eeprom(struct dsa_switch *ds,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ int offset;
+ int ret;
+ int len;
+
+ if (eeprom->magic != 0xc3ec4951)
+ return -EINVAL;
+
+ ret = mv88e6352_eeprom_is_readonly(ds);
+ if (ret)
+ return ret;
+
+ offset = eeprom->offset;
+ len = eeprom->len;
+ eeprom->len = 0;
+
+ ret = mv88e6352_eeprom_load_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ if (offset & 1) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ word = (*data++ << 8) | (word & 0xff);
+
+ ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word);
+ if (ret < 0)
+ return ret;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ while (len >= 2) {
+ int word;
+
+ word = *data++;
+ word |= *data++ << 8;
+
+ ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word);
+ if (ret < 0)
+ return ret;
+
+ offset += 2;
+ len -= 2;
+ eeprom->len += 2;
+ }
+
+ if (len) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ word = (word & 0xff00) | *data++;
+
+ ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word);
+ if (ret < 0)
+ return ret;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ return 0;
+}
+
static void
mv88e6352_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
{
@@ -557,6 +776,9 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.get_temp_limit = mv88e6352_get_temp_limit,
.set_temp_limit = mv88e6352_set_temp_limit,
.get_temp_alarm = mv88e6352_get_temp_alarm,
+ .get_eeprom_len = mv88e6352_get_eeprom_len,
+ .get_eeprom = mv88e6352_get_eeprom,
+ .set_eeprom = mv88e6352_set_eeprom,
};
MODULE_ALIAS("platform:mv88e6352");
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 77beff5..d4d53ae 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -43,6 +43,11 @@ struct mv88e6xxx_priv_state {
*/
struct mutex phy_mutex;
+ /* This mutex serializes eeprom access for chips with
+ * eeprom support.
+ */
+ struct mutex eeprom_mutex;
+
int id; /* switch product id */
};
--
1.9.1
^ permalink raw reply related
* [PATCH 05/14] net: dsa/mv88e6352: Add support for MV88E6176
From: Guenter Roeck @ 2014-10-23 4:03 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>
MV88E6176 is mostly compatible to MV88E6352 and is documented
in the same functional specification. Add support for it.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
drivers/net/dsa/Kconfig | 5 +++--
drivers/net/dsa/mv88e6352.c | 2 ++
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 0987c33..2d1a55e 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -46,12 +46,13 @@ config NET_DSA_MV88E6171
chip.
config NET_DSA_MV88E6352
- tristate "Marvell 88E6352 ethernet switch chip support"
+ tristate "Marvell 88E6176/88E6352 ethernet switch chip support"
select NET_DSA
select NET_DSA_MV88E6XXX
select NET_DSA_TAG_EDSA
---help---
- This enables support for the Marvell 88E6352 ethernet switch chip.
+ This enables support for the Marvell 88E6176 and 88E6352 ethernet
+ switch chips.
config NET_DSA_BCM_SF2
tristate "Broadcom Starfighter 2 Ethernet switch support"
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 43a5826..f17364f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -73,6 +73,8 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr)
ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
if (ret >= 0) {
+ if ((ret & 0xfff0) == 0x1760)
+ return "Marvell 88E6176";
if (ret == 0x3521)
return "Marvell 88E6352 (A0)";
if (ret == 0x3522)
--
1.9.1
^ permalink raw reply related
* [PATCH 04/14] net: dsa: Add support for Marvell 88E6352
From: Guenter Roeck @ 2014-10-23 4:03 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>
Marvell 88E6352 is mostly compatible to MV88E6123/61/65,
but requires indirect phy access. Also, its configuration
registers are a bit different.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
MAINTAINERS | 5 +
drivers/net/dsa/Kconfig | 8 +
drivers/net/dsa/Makefile | 3 +
drivers/net/dsa/mv88e6352.c | 464 ++++++++++++++++++++++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx.c | 3 +
drivers/net/dsa/mv88e6xxx.h | 7 +
6 files changed, 490 insertions(+)
create mode 100644 drivers/net/dsa/mv88e6352.c
diff --git a/MAINTAINERS b/MAINTAINERS
index a20df9b..e48a05b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5845,6 +5845,11 @@ M: Russell King <rmk+kernel@arm.linux.org.uk>
S: Maintained
F: drivers/gpu/drm/armada/
+MARVELL 88E6352 DSA support
+M: Guenter Roeck <linux@roeck-us.net>
+S: Maintained
+F: drivers/net/dsa/mv88e6352.c
+
MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
M: Mirko Lindner <mlindner@marvell.com>
M: Stephen Hemminger <stephen@networkplumber.org>
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 9234d80..0987c33 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -45,6 +45,14 @@ config NET_DSA_MV88E6171
This enables support for the Marvell 88E6171 ethernet switch
chip.
+config NET_DSA_MV88E6352
+ tristate "Marvell 88E6352 ethernet switch chip support"
+ select NET_DSA
+ select NET_DSA_MV88E6XXX
+ select NET_DSA_TAG_EDSA
+ ---help---
+ This enables support for the Marvell 88E6352 ethernet switch chip.
+
config NET_DSA_BCM_SF2
tristate "Broadcom Starfighter 2 Ethernet switch support"
depends on HAS_IOMEM
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 23a90de..e2d51c4 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -7,6 +7,9 @@ endif
ifdef CONFIG_NET_DSA_MV88E6131
mv88e6xxx_drv-y += mv88e6131.o
endif
+ifdef CONFIG_NET_DSA_MV88E6352
+mv88e6xxx_drv-y += mv88e6352.o
+endif
ifdef CONFIG_NET_DSA_MV88E6171
mv88e6xxx_drv-y += mv88e6171.o
endif
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
new file mode 100644
index 0000000..43a5826
--- /dev/null
+++ b/drivers/net/dsa/mv88e6352.c
@@ -0,0 +1,464 @@
+/*
+ * net/dsa/mv88e6352.c - Marvell 88e6352 switch chip support
+ *
+ * Copyright (c) 2014 Guenter Roeck
+ *
+ * Derived from mv88e6123_61_65.c
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/phy.h>
+#include <net/dsa.h>
+#include "mv88e6xxx.h"
+
+static int mv88e6352_phy_wait(struct dsa_switch *ds)
+{
+ unsigned long timeout = jiffies + HZ / 10;
+
+ while (time_before(jiffies, timeout)) {
+ int ret;
+
+ ret = REG_READ(REG_GLOBAL2, 0x18);
+ if (ret < 0)
+ return ret;
+
+ if (!(ret & 0x8000))
+ return 0;
+
+ usleep_range(1000, 2000);
+ }
+ return -ETIMEDOUT;
+}
+
+static int __mv88e6352_phy_read(struct dsa_switch *ds, int addr, int regnum)
+{
+ int ret;
+
+ REG_WRITE(REG_GLOBAL2, 0x18, 0x9800 | (addr << 5) | regnum);
+
+ ret = mv88e6352_phy_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ return REG_READ(REG_GLOBAL2, 0x19);
+}
+
+static int __mv88e6352_phy_write(struct dsa_switch *ds, int addr, int regnum,
+ u16 val)
+{
+ REG_WRITE(REG_GLOBAL2, 0x19, val);
+ REG_WRITE(REG_GLOBAL2, 0x18, 0x9400 | (addr << 5) | regnum);
+
+ return mv88e6352_phy_wait(ds);
+}
+
+static char *mv88e6352_probe(struct device *host_dev, int sw_addr)
+{
+ struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev);
+ int ret;
+
+ if (bus == NULL)
+ return NULL;
+
+ ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
+ if (ret >= 0) {
+ if (ret == 0x3521)
+ return "Marvell 88E6352 (A0)";
+ if (ret == 0x3522)
+ return "Marvell 88E6352 (A1)";
+ if ((ret & 0xfff0) == 0x3520)
+ return "Marvell 88E6352";
+ }
+
+ return NULL;
+}
+
+static int mv88e6352_switch_reset(struct dsa_switch *ds)
+{
+ unsigned long timeout;
+ int ret;
+ int i;
+
+ /* Set all ports to the disabled state. */
+ for (i = 0; i < 7; i++) {
+ ret = REG_READ(REG_PORT(i), 0x04);
+ REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+ }
+
+ /* Wait for transmit queues to drain. */
+ usleep_range(2000, 4000);
+
+ /* Reset the switch. Keep PPU active (bit 14, undocumented).
+ * The PPU needs to be active to support indirect phy register
+ * accesses through global registers 0x18 and 0x19.
+ */
+ REG_WRITE(REG_GLOBAL, 0x04, 0xc000);
+
+ /* Wait up to one second for reset to complete. */
+ timeout = jiffies + 1 * HZ;
+ while (time_before(jiffies, timeout)) {
+ ret = REG_READ(REG_GLOBAL, 0x00);
+ if ((ret & 0x8800) == 0x8800)
+ break;
+ usleep_range(1000, 2000);
+ }
+ if (time_after(jiffies, timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int mv88e6352_setup_global(struct dsa_switch *ds)
+{
+ int ret;
+ int i;
+
+ /* Discard packets with excessive collisions,
+ * mask all interrupt sources, enable PPU (bit 14, undocumented).
+ */
+ REG_WRITE(REG_GLOBAL, 0x04, 0x6000);
+
+ /* Set the default address aging time to 5 minutes, and
+ * enable address learn messages to be sent to all message
+ * ports.
+ */
+ REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
+
+ /* Configure the priority mapping registers. */
+ ret = mv88e6xxx_config_prio(ds);
+ if (ret < 0)
+ return ret;
+
+ /* Configure the upstream port, and configure the upstream
+ * port as the port to which ingress and egress monitor frames
+ * are to be sent.
+ */
+ REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110));
+
+ /* Disable remote management for now, and set the switch's
+ * DSA device number.
+ */
+ REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
+
+ /* Send all frames with destination addresses matching
+ * 01:80:c2:00:00:2x to the CPU port.
+ */
+ REG_WRITE(REG_GLOBAL2, 0x02, 0xffff);
+
+ /* Send all frames with destination addresses matching
+ * 01:80:c2:00:00:0x to the CPU port.
+ */
+ REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
+
+ /* Disable the loopback filter, disable flow control
+ * messages, disable flood broadcast override, disable
+ * removing of provider tags, disable ATU age violation
+ * interrupts, disable tag flow control, force flow
+ * control priority to the highest, and send all special
+ * multicast frames to the CPU at the highest priority.
+ */
+ REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
+
+ /* Program the DSA routing table. */
+ for (i = 0; i < 32; i++) {
+ int nexthop = 0x1f;
+
+ if (i != ds->index && i < ds->dst->pd->nr_chips)
+ nexthop = ds->pd->rtable[i] & 0x1f;
+
+ REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
+ }
+
+ /* Clear all trunk masks. */
+ for (i = 0; i < 8; i++)
+ REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7f);
+
+ /* Clear all trunk mappings. */
+ for (i = 0; i < 16; i++)
+ REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
+
+ /* Disable ingress rate limiting by resetting all ingress
+ * rate limit registers to their initial state.
+ */
+ for (i = 0; i < 7; i++)
+ REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8));
+
+ /* Initialise cross-chip port VLAN table to reset defaults. */
+ REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000);
+
+ /* Clear the priority override table. */
+ for (i = 0; i < 16; i++)
+ REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8));
+
+ /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */
+
+ return 0;
+}
+
+static int mv88e6352_setup_port(struct dsa_switch *ds, int p)
+{
+ int addr = REG_PORT(p);
+ u16 val;
+
+ /* MAC Forcing register: don't force link, speed, duplex
+ * or flow control state to any particular values on physical
+ * ports, but force the CPU port and all DSA ports to 1000 Mb/s
+ * full duplex.
+ */
+ if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
+ REG_WRITE(addr, 0x01, 0x003e);
+ else
+ REG_WRITE(addr, 0x01, 0x0003);
+
+ /* Do not limit the period of time that this port can be
+ * paused for by the remote end or the period of time that
+ * this port can pause the remote end.
+ */
+ REG_WRITE(addr, 0x02, 0x0000);
+
+ /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
+ * disable Header mode, enable IGMP/MLD snooping, disable VLAN
+ * tunneling, determine priority by looking at 802.1p and IP
+ * priority fields (IP prio has precedence), and set STP state
+ * to Forwarding.
+ *
+ * If this is the CPU link, use DSA or EDSA tagging depending
+ * on which tagging mode was configured.
+ *
+ * If this is a link to another switch, use DSA tagging mode.
+ *
+ * If this is the upstream port for this switch, enable
+ * forwarding of unknown unicasts and multicasts.
+ */
+ val = 0x0433;
+ if (dsa_is_cpu_port(ds, p)) {
+ if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
+ val |= 0x3300;
+ else
+ val |= 0x0100;
+ }
+ if (ds->dsa_port_mask & (1 << p))
+ val |= 0x0100;
+ if (p == dsa_upstream_port(ds))
+ val |= 0x000c;
+ REG_WRITE(addr, 0x04, val);
+
+ /* Port Control 1: disable trunking. Also, if this is the
+ * CPU port, enable learn messages to be sent to this port.
+ */
+ REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
+
+ /* Port based VLAN map: give each port its own address
+ * database, allow the CPU port to talk to each of the 'real'
+ * ports, and allow each of the 'real' ports to only talk to
+ * the upstream port.
+ */
+ val = (p & 0xf) << 12;
+ if (dsa_is_cpu_port(ds, p))
+ val |= ds->phys_port_mask;
+ else
+ val |= 1 << dsa_upstream_port(ds);
+ REG_WRITE(addr, 0x06, val);
+
+ /* Default VLAN ID and priority: don't set a default VLAN
+ * ID, and set the default packet priority to zero.
+ */
+ REG_WRITE(addr, 0x07, 0x0000);
+
+ /* Port Control 2: don't force a good FCS, set the maximum
+ * frame size to 10240 bytes, don't let the switch add or
+ * strip 802.1q tags, don't discard tagged or untagged frames
+ * on this port, do a destination address lookup on all
+ * received packets as usual, disable ARP mirroring and don't
+ * send a copy of all transmitted/received frames on this port
+ * to the CPU.
+ */
+ REG_WRITE(addr, 0x08, 0x2080);
+
+ /* Egress rate control: disable egress rate control. */
+ REG_WRITE(addr, 0x09, 0x0001);
+
+ /* Egress rate control 2: disable egress rate control. */
+ REG_WRITE(addr, 0x0a, 0x0000);
+
+ /* Port Association Vector: when learning source addresses
+ * of packets, add the address to the address database using
+ * a port bitmap that has only the bit for this port set and
+ * the other bits clear.
+ */
+ REG_WRITE(addr, 0x0b, 1 << p);
+
+ /* Port ATU control: disable limiting the number of address
+ * database entries that this port is allowed to use.
+ */
+ REG_WRITE(addr, 0x0c, 0x0000);
+
+ /* Priority Override: disable DA, SA and VTU priority override. */
+ REG_WRITE(addr, 0x0d, 0x0000);
+
+ /* Port Ethertype: use the Ethertype DSA Ethertype value. */
+ REG_WRITE(addr, 0x0f, ETH_P_EDSA);
+
+ /* Tag Remap: use an identity 802.1p prio -> switch prio
+ * mapping.
+ */
+ REG_WRITE(addr, 0x18, 0x3210);
+
+ /* Tag Remap 2: use an identity 802.1p prio -> switch prio
+ * mapping.
+ */
+ REG_WRITE(addr, 0x19, 0x7654);
+
+ return 0;
+}
+
+static int mv88e6352_setup(struct dsa_switch *ds)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+ int i;
+
+ mutex_init(&ps->smi_mutex);
+ mutex_init(&ps->stats_mutex);
+ mutex_init(&ps->phy_mutex);
+
+ ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+
+ ret = mv88e6352_switch_reset(ds);
+ if (ret < 0)
+ return ret;
+
+ /* @@@ initialise vtu and atu */
+
+ ret = mv88e6352_setup_global(ds);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < 7; i++) {
+ ret = mv88e6352_setup_port(ds, i);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int mv88e6352_port_to_phy_addr(int port)
+{
+ if (port >= 0 && port <= 4)
+ return port;
+ return -EINVAL;
+}
+
+static int
+mv88e6352_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int addr = mv88e6352_port_to_phy_addr(port);
+ int ret;
+
+ if (addr < 0)
+ return addr;
+
+ mutex_lock(&ps->phy_mutex);
+ ret = __mv88e6352_phy_read(ds, addr, regnum);
+ mutex_unlock(&ps->phy_mutex);
+
+ return ret;
+}
+
+static int
+mv88e6352_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int addr = mv88e6352_port_to_phy_addr(port);
+ int ret;
+
+ if (addr < 0)
+ return addr;
+
+ mutex_lock(&ps->phy_mutex);
+ ret = __mv88e6352_phy_write(ds, addr, regnum, val);
+ mutex_unlock(&ps->phy_mutex);
+
+ return ret;
+}
+
+static struct mv88e6xxx_hw_stat mv88e6352_hw_stats[] = {
+ { "in_good_octets", 8, 0x00, },
+ { "in_bad_octets", 4, 0x02, },
+ { "in_unicast", 4, 0x04, },
+ { "in_broadcasts", 4, 0x06, },
+ { "in_multicasts", 4, 0x07, },
+ { "in_pause", 4, 0x16, },
+ { "in_undersize", 4, 0x18, },
+ { "in_fragments", 4, 0x19, },
+ { "in_oversize", 4, 0x1a, },
+ { "in_jabber", 4, 0x1b, },
+ { "in_rx_error", 4, 0x1c, },
+ { "in_fcs_error", 4, 0x1d, },
+ { "out_octets", 8, 0x0e, },
+ { "out_unicast", 4, 0x10, },
+ { "out_broadcasts", 4, 0x13, },
+ { "out_multicasts", 4, 0x12, },
+ { "out_pause", 4, 0x15, },
+ { "excessive", 4, 0x11, },
+ { "collisions", 4, 0x1e, },
+ { "deferred", 4, 0x05, },
+ { "single", 4, 0x14, },
+ { "multiple", 4, 0x17, },
+ { "out_fcs_error", 4, 0x03, },
+ { "late", 4, 0x1f, },
+ { "hist_64bytes", 4, 0x08, },
+ { "hist_65_127bytes", 4, 0x09, },
+ { "hist_128_255bytes", 4, 0x0a, },
+ { "hist_256_511bytes", 4, 0x0b, },
+ { "hist_512_1023bytes", 4, 0x0c, },
+ { "hist_1024_max_bytes", 4, 0x0d, },
+};
+
+static void
+mv88e6352_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+ mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6352_hw_stats),
+ mv88e6352_hw_stats, port, data);
+}
+
+static void
+mv88e6352_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
+{
+ mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6352_hw_stats),
+ mv88e6352_hw_stats, port, data);
+}
+
+static int mv88e6352_get_sset_count(struct dsa_switch *ds)
+{
+ return ARRAY_SIZE(mv88e6352_hw_stats);
+}
+
+struct dsa_switch_driver mv88e6352_switch_driver = {
+ .tag_protocol = DSA_TAG_PROTO_EDSA,
+ .priv_size = sizeof(struct mv88e6xxx_priv_state),
+ .probe = mv88e6352_probe,
+ .setup = mv88e6352_setup,
+ .set_addr = mv88e6xxx_set_addr_indirect,
+ .phy_read = mv88e6352_phy_read,
+ .phy_write = mv88e6352_phy_write,
+ .poll_link = mv88e6xxx_poll_link,
+ .get_strings = mv88e6352_get_strings,
+ .get_ethtool_stats = mv88e6352_get_ethtool_stats,
+ .get_sset_count = mv88e6352_get_sset_count,
+};
+
+MODULE_ALIAS("platform:mv88e6352");
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index a6c90cf..8e1090b 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -507,6 +507,9 @@ static int __init mv88e6xxx_init(void)
#if IS_ENABLED(CONFIG_NET_DSA_MV88E6123_61_65)
register_switch_driver(&mv88e6123_61_65_switch_driver);
#endif
+#if IS_ENABLED(CONFIG_NET_DSA_MV88E6352)
+ register_switch_driver(&mv88e6352_switch_driver);
+#endif
#if IS_ENABLED(CONFIG_NET_DSA_MV88E6171)
register_switch_driver(&mv88e6171_switch_driver);
#endif
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 5e5145a..77beff5 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -37,6 +37,12 @@ struct mv88e6xxx_priv_state {
*/
struct mutex stats_mutex;
+ /* This mutex serializes phy access for chips with
+ * indirect phy addressing. It is unused for chips
+ * with direct phy access.
+ */
+ struct mutex phy_mutex;
+
int id; /* switch product id */
};
@@ -70,6 +76,7 @@ void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
extern struct dsa_switch_driver mv88e6131_switch_driver;
extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
+extern struct dsa_switch_driver mv88e6352_switch_driver;
extern struct dsa_switch_driver mv88e6171_switch_driver;
#define REG_READ(addr, reg) \
--
1.9.1
^ permalink raw reply related
* Re: irq disable in __netdev_alloc_frag() ?
From: Eric Dumazet @ 2014-10-23 3:56 UTC (permalink / raw)
To: Alexander Duyck; +Cc: Alexei Starovoitov, Eric Dumazet, Network Development
In-Reply-To: <1414036276.2094.18.camel@edumazet-glaptop2.roam.corp.google.com>
On Wed, 2014-10-22 at 20:51 -0700, Eric Dumazet wrote:
> On Wed, 2014-10-22 at 20:19 -0700, Alexander Duyck wrote:
>
> > Couldn't __netdev_alloc_frag() be forked into two functions, one that is
> > only called from inside the NAPI context and one that is called for all
> > other contexts? It would mean having to double the number of pages
> > being held per CPU, but I would think something like that would be doable.
>
> Possibly, but this looks like code bloat for me.
>
> On my hosts, this hard irq masking is pure noise.
>
> What CPU are you using Alexander ?
Sorry, the question was for Alexei ;)
^ permalink raw reply
* Re: irq disable in __netdev_alloc_frag() ?
From: Eric Dumazet @ 2014-10-23 3:51 UTC (permalink / raw)
To: Alexander Duyck; +Cc: Alexei Starovoitov, Eric Dumazet, Network Development
In-Reply-To: <544873DF.1040403@gmail.com>
On Wed, 2014-10-22 at 20:19 -0700, Alexander Duyck wrote:
> Couldn't __netdev_alloc_frag() be forked into two functions, one that is
> only called from inside the NAPI context and one that is called for all
> other contexts? It would mean having to double the number of pages
> being held per CPU, but I would think something like that would be doable.
Possibly, but this looks like code bloat for me.
On my hosts, this hard irq masking is pure noise.
What CPU are you using Alexander ?
Same could be done with some kmem_cache_alloc() : SLAB uses hard irq
masking while some caches are never used from hard irq context.
^ permalink raw reply
* Re: irq disable in __netdev_alloc_frag() ?
From: Eric Dumazet @ 2014-10-23 3:48 UTC (permalink / raw)
To: Alexei Starovoitov; +Cc: Eric Dumazet, Network Development
In-Reply-To: <CAMEtUuxo7TT+9S1KSHxyTKR+pD6HWJKaTOO=A_VGsZVji3YU2w@mail.gmail.com>
On Wed, 2014-10-22 at 19:22 -0700, Alexei Starovoitov wrote:
> yes. I was thinking, since dev is already passed
> into __netdev_alloc_skb(), we can check whether
> dev registered with napi via dev->napi_list and if so,
> tell inner __netdev_alloc_frag() to skip irq disabling...
>
This does not matter. The problem is not _this_ device, the problem is
that another device might trigger a hard irq, and this hard irq could
mess your data.
^ permalink raw reply
* Re: [PATCH net] macvlan: fix a race on port dismantle and possible skb leaks
From: Herbert Xu @ 2014-10-23 3:28 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1414032226.2094.14.camel@edumazet-glaptop2.roam.corp.google.com>
On Wed, Oct 22, 2014 at 07:43:46PM -0700, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> We need to cancel the work queue after rcu grace period,
> otherwise it can be rescheduled by incoming packets.
>
> We need to purge queue if some skbs are still in it.
>
> We can use __skb_queue_head_init() variant in
> macvlan_process_broadcast()
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Fixes: 412ca1550cbec ("macvlan: Move broadcasts into a work queue")
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
Good catch! Your fix looks good to me.
Thanks,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [net] i40e: _MASK vs _SHIFT typo in i40e_handle_mdd_event()
From: Joe Perches @ 2014-10-23 3:22 UTC (permalink / raw)
To: Jeff Kirsher; +Cc: davem, Dan Carpenter, netdev, nhorman, sassmann, jogreene
In-Reply-To: <1414033589-7544-1-git-send-email-jeffrey.t.kirsher@intel.com>
On Wed, 2014-10-22 at 20:06 -0700, Jeff Kirsher wrote:
> We accidentally mask by the _SHIFT variable. It means that "event" is
> always zero.
[]
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
[]
> @@ -6151,7 +6151,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
> I40E_GL_MDET_TX_PF_NUM_SHIFT;
> u8 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
> I40E_GL_MDET_TX_VF_NUM_SHIFT;
> - u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT) >>
> + u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
> I40E_GL_MDET_TX_EVENT_SHIFT;
> u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
> I40E_GL_MDET_TX_QUEUE_SHIFT;
> @@ -6165,7 +6165,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
> if (reg & I40E_GL_MDET_RX_VALID_MASK) {
> u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
> I40E_GL_MDET_RX_FUNCTION_SHIFT;
> - u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT) >>
> + u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
> I40E_GL_MDET_RX_EVENT_SHIFT;
> u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
> I40E_GL_MDET_RX_QUEUE_SHIFT;
It might be useful to have a macro for that.
Something like:
#define GET_REG_VAL(reg, type) \
((reg & type##_MASK) >> type##_SHIFT)
so these could become:
u8 vf_num = GET_REG_VAL(reg, I40E_GL_MDET_TX_VF_NUM);
u8 event = GET_REG_VAL(reg, I40E_GL_MDET_TX_EVENT);
etc...
^ permalink raw reply
* Re: irq disable in __netdev_alloc_frag() ?
From: Alexander Duyck @ 2014-10-23 3:19 UTC (permalink / raw)
To: Eric Dumazet, Alexei Starovoitov; +Cc: Eric Dumazet, Network Development
In-Reply-To: <1414029160.2094.8.camel@edumazet-glaptop2.roam.corp.google.com>
On 10/22/2014 06:52 PM, Eric Dumazet wrote:
> On Wed, 2014-10-22 at 17:15 -0700, Alexei Starovoitov wrote:
>> Hi Eric,
>>
>> in the commit 6f532612cc24 ("net: introduce netdev_alloc_frag()")
>> you mentioned that the reason to disable interrupts
>> in __netdev_alloc_frag() is:
>> "- Must be IRQ safe (non NAPI drivers can use it)"
>>
>> Is there a way to do this conditionally?
>>
>> Without it I see 10% performance gain for my RX tests
>> (from 6.9Mpps to 7.7Mpps) and __netdev_alloc_frag()
>> itself goes from 6.6% to 2.1%
>> (popf seems to be quite costly)
> Well, your driver is probably a NAPI one, so you need to
> mask irqs, or to remove all non NAPI drivers from linux.
>
> __netdev_alloc_frag() (__netdev_alloc_skb()) is used by all.
>
> Problem is __netdev_alloc_frag() is generally deep inside caller
> chain, so using a private pool might have quite an overhead.
>
> Same could be said for skb_queue_head() /skb_queue_tail() /
> sock_queue_rcv_skb() :
> Many callers don't need to block irq.
Couldn't __netdev_alloc_frag() be forked into two functions, one that is
only called from inside the NAPI context and one that is called for all
other contexts? It would mean having to double the number of pages
being held per CPU, but I would think something like that would be doable.
Thanks,
Alex
^ permalink raw reply
* Possible wireless issue introduced in next-20140930
From: Murilo Opsfelder Araujo @ 2014-10-23 3:17 UTC (permalink / raw)
To: linux-kernel, netdev, linux-wireless; +Cc: Larry Finger, John W. Linville
Hello, everyone.
With next-20140930 my laptop does not work, i.e. after I enter my login
and password in KDM, the entire system becomes unresponsive and I need
to reset it in order to reboot (it does not even show the KDE splash
screen).
It was working pretty fine with next-20140926.
I've also tested with next-20141022 and v3.18-rc1 and no luck.
git bisect pointed me to the commit below [1]. My wireless card is a
RTL8191SEvA [2].
I need your help to troubleshoot this.
Thanks in advance.
[1]
commit 38506ecefab911785d5e1aa5889f6eeb462e0954
Author: Larry Finger <Larry.Finger@lwfinger.net>
Date: Mon Sep 22 09:39:19 2014 -0500
rtlwifi: rtl_pci: Start modification for new drivers
Future patches will move the drivers for RTL8192EE and RTL8821AE
from staging to the regular wireless tree. Here, the necessary features
are added to the PCI driver. Other files are touched due to changes
in the various data structs.
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
[2]
$ lspci -vvv
02:00.0 Network controller: Realtek Semiconductor Co., Ltd. RTL8191SEvA
Wireless LAN Controller (rev 10)
Subsystem: Hewlett-Packard Company Device 1467
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
<TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 16
Region 0: I/O ports at 3000 [size=256]
Region 1: Memory at d3400000 (32-bit, non-prefetchable) [size=16K]
Capabilities: <access denied>
Kernel driver in use: rtl8192se
--
Murilo
^ permalink raw reply
* [net] i40e: _MASK vs _SHIFT typo in i40e_handle_mdd_event()
From: Jeff Kirsher @ 2014-10-23 3:06 UTC (permalink / raw)
To: davem; +Cc: Dan Carpenter, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
From: Dan Carpenter <dan.carpenter@oracle.com>
We accidentally mask by the _SHIFT variable. It means that "event" is
always zero.
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Tested-by: Jim Young <jamesx.m.young@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ed5f1c1..c3a7f4a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6151,7 +6151,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
I40E_GL_MDET_TX_PF_NUM_SHIFT;
u8 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
I40E_GL_MDET_TX_VF_NUM_SHIFT;
- u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT) >>
+ u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
I40E_GL_MDET_TX_EVENT_SHIFT;
u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
I40E_GL_MDET_TX_QUEUE_SHIFT;
@@ -6165,7 +6165,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
if (reg & I40E_GL_MDET_RX_VALID_MASK) {
u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
I40E_GL_MDET_RX_FUNCTION_SHIFT;
- u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT) >>
+ u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
I40E_GL_MDET_RX_EVENT_SHIFT;
u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
I40E_GL_MDET_RX_QUEUE_SHIFT;
--
1.9.3
^ permalink raw reply related
* [PATCH net] macvlan: fix a race on port dismantle and possible skb leaks
From: Eric Dumazet @ 2014-10-23 2:43 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Herbert Xu
From: Eric Dumazet <edumazet@google.com>
We need to cancel the work queue after rcu grace period,
otherwise it can be rescheduled by incoming packets.
We need to purge queue if some skbs are still in it.
We can use __skb_queue_head_init() variant in
macvlan_process_broadcast()
Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 412ca1550cbec ("macvlan: Move broadcasts into a work queue")
Cc: Herbert Xu <herbert@gondor.apana.org.au>
---
drivers/net/macvlan.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 29b3bb410781..98c2732755ea 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -272,7 +272,7 @@ static void macvlan_process_broadcast(struct work_struct *w)
struct sk_buff *skb;
struct sk_buff_head list;
- skb_queue_head_init(&list);
+ __skb_queue_head_init(&list);
spin_lock_bh(&port->bc_queue.lock);
skb_queue_splice_tail_init(&port->bc_queue, &list);
@@ -1082,9 +1082,15 @@ static void macvlan_port_destroy(struct net_device *dev)
{
struct macvlan_port *port = macvlan_port_get_rtnl(dev);
- cancel_work_sync(&port->bc_work);
dev->priv_flags &= ~IFF_MACVLAN_PORT;
netdev_rx_handler_unregister(dev);
+
+ /* After this point, no packet can schedule bc_work anymore,
+ * but we need to cancel it and purge left skbs if any.
+ */
+ cancel_work_sync(&port->bc_work);
+ __skb_queue_purge(&port->bc_queue);
+
kfree_rcu(port, rcu);
}
^ permalink raw reply related
* Re: irq disable in __netdev_alloc_frag() ?
From: Alexei Starovoitov @ 2014-10-23 2:22 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Eric Dumazet, Network Development
In-Reply-To: <1414029160.2094.8.camel@edumazet-glaptop2.roam.corp.google.com>
On Wed, Oct 22, 2014 at 6:52 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> On Wed, 2014-10-22 at 17:15 -0700, Alexei Starovoitov wrote:
>> Hi Eric,
>>
>> in the commit 6f532612cc24 ("net: introduce netdev_alloc_frag()")
>> you mentioned that the reason to disable interrupts
>> in __netdev_alloc_frag() is:
>> "- Must be IRQ safe (non NAPI drivers can use it)"
>>
>> Is there a way to do this conditionally?
>>
>> Without it I see 10% performance gain for my RX tests
>> (from 6.9Mpps to 7.7Mpps) and __netdev_alloc_frag()
>> itself goes from 6.6% to 2.1%
>> (popf seems to be quite costly)
>
> Well, your driver is probably a NAPI one, so you need to
> mask irqs, or to remove all non NAPI drivers from linux.
yeah, the 10G+ nics I care about are all napi :)
> __netdev_alloc_frag() (__netdev_alloc_skb()) is used by all.
>
> Problem is __netdev_alloc_frag() is generally deep inside caller
> chain, so using a private pool might have quite an overhead.
yes. I was thinking, since dev is already passed
into __netdev_alloc_skb(), we can check whether
dev registered with napi via dev->napi_list and if so,
tell inner __netdev_alloc_frag() to skip irq disabling...
don't know about skb_queue_head() and friends.
I'm only looking at pure rx now. One challenge at a time.
^ permalink raw reply
* [PATCH net-next v2 6/6] ethernet: samsung: sxgbe: remove unnecessary check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
In-Reply-To: <1414029531-5067-1-git-send-email-varkab@cdac.in>
devm_ioremap_resource checks platform_get_resource() return value.
We can remove the duplicate check here.
Signed-off-by: Varka Bhadram <varkab@cdac.in>
---
.../net/ethernet/samsung/sxgbe/sxgbe_platform.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index b147d46..7fd6e27 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -90,9 +90,6 @@ static int sxgbe_platform_probe(struct platform_device *pdev)
/* Get memory resource */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- goto err_out;
-
addr = devm_ioremap_resource(dev, res);
if (IS_ERR(addr))
return PTR_ERR(addr);
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 5/6] ethernet: renesas: remove unnecessary check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
In-Reply-To: <1414029531-5067-1-git-send-email-varkab@cdac.in>
devm_ioremap_resource checks platform_get_resource() return value.
We can remove the duplicate check here.
Signed-off-by: Varka Bhadram <varkab@cdac.in>
---
drivers/net/ethernet/renesas/sh_eth.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 60e9c2c..ffb49f3 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2769,10 +2769,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
/* get base addr */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (unlikely(res == NULL)) {
- dev_err(&pdev->dev, "invalid resource\n");
- return -EINVAL;
- }
ndev = alloc_etherdev(sizeof(struct sh_eth_private));
if (!ndev)
@@ -2781,8 +2777,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
- /* The sh Ether-specific entries in the device structure. */
- ndev->base_addr = res->start;
devno = pdev->id;
if (devno < 0)
devno = 0;
@@ -2806,6 +2800,9 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
goto out_release;
}
+ /* The sh Ether-specific entries in the device structure. */
+ ndev->base_addr = res->start;
+
spin_lock_init(&mdp->lock);
mdp->pdev = pdev;
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 4/6] ethernet: marvell: remove unnecessary check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
In-Reply-To: <1414029531-5067-1-git-send-email-varkab@cdac.in>
devm_ioremap_resource checks platform_get_resource() return value.
We can remove the duplicate check here.
Signed-off-by: Varka Bhadram <varkab@cdac.in>
---
drivers/net/ethernet/marvell/pxa168_eth.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index c3b209c..a378c92 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1505,16 +1505,14 @@ static int pxa168_eth_probe(struct platform_device *pdev)
pep = netdev_priv(dev);
pep->dev = dev;
pep->clk = clk;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res == NULL) {
- err = -ENODEV;
- goto err_netdev;
- }
pep->base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(pep->base)) {
err = -ENOMEM;
goto err_netdev;
}
+
res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
BUG_ON(!res);
dev->irq = res->start;
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 3/6] ethernet: apm: xgene: remove unnecessary check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
In-Reply-To: <1414029531-5067-1-git-send-email-varkab@cdac.in>
devm_ioremap_resource checks platform_get_resource() return value.
We can remove the duplicate check here.
Signed-off-by: Varka Bhadram <varkab@cdac.in>
---
drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 3c208cc..f226594 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -761,10 +761,6 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
ndev = pdata->ndev;
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "enet_csr");
- if (!res) {
- dev_err(dev, "Resource enet_csr not defined\n");
- return -ENODEV;
- }
pdata->base_addr = devm_ioremap_resource(dev, res);
if (IS_ERR(pdata->base_addr)) {
dev_err(dev, "Unable to retrieve ENET Port CSR region\n");
@@ -772,10 +768,6 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
}
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ring_csr");
- if (!res) {
- dev_err(dev, "Resource ring_csr not defined\n");
- return -ENODEV;
- }
pdata->ring_csr_addr = devm_ioremap_resource(dev, res);
if (IS_ERR(pdata->ring_csr_addr)) {
dev_err(dev, "Unable to retrieve ENET Ring CSR region\n");
@@ -783,10 +775,6 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
}
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ring_cmd");
- if (!res) {
- dev_err(dev, "Resource ring_cmd not defined\n");
- return -ENODEV;
- }
pdata->ring_cmd_addr = devm_ioremap_resource(dev, res);
if (IS_ERR(pdata->ring_cmd_addr)) {
dev_err(dev, "Unable to retrieve ENET Ring command region\n");
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 2/6] ethernet: wiznet: remove unnecessary check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
In-Reply-To: <1414029531-5067-1-git-send-email-varkab@cdac.in>
devm_ioremap_resource checks platform_get_resource() return value.
We can remove the duplicate check here.
Signed-off-by: Varka Bhadram <varkab@cdac.in>
---
drivers/net/ethernet/wiznet/w5300.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index f961f14..7974b7d 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -558,14 +558,12 @@ static int w5300_hw_probe(struct platform_device *pdev)
}
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!mem)
- return -ENXIO;
- mem_size = resource_size(mem);
-
priv->base = devm_ioremap_resource(&pdev->dev, mem);
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
+ mem_size = resource_size(mem);
+
spin_lock_init(&priv->reg_lock);
priv->indirect = mem_size < W5300_BUS_DIRECT_SIZE;
if (priv->indirect) {
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 1/6] ethernet: wiznet: remove unnecessary check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
In-Reply-To: <1414029531-5067-1-git-send-email-varkab@cdac.in>
devm_ioremap_resource checks platform_get_resource() return value.
We can remove the duplicate check here.
Signed-off-by: Varka Bhadram <varkab@cdac.in>
---
drivers/net/ethernet/wiznet/w5100.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 0f56b1c..70a930a 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -638,14 +638,12 @@ static int w5100_hw_probe(struct platform_device *pdev)
}
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!mem)
- return -ENXIO;
- mem_size = resource_size(mem);
-
priv->base = devm_ioremap_resource(&pdev->dev, mem);
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
+ mem_size = resource_size(mem);
+
spin_lock_init(&priv->reg_lock);
priv->indirect = mem_size < W5100_BUS_DIRECT_SIZE;
if (priv->indirect) {
--
1.7.9.5
^ permalink raw reply related
* [PATCH net-next v2 0/6] cleanup on resource check
From: Varka Bhadram @ 2014-10-23 1:58 UTC (permalink / raw)
To: netdev; +Cc: davem, Varka Bhadram
This series removes the duplication of sanity check for
platform_get_resource() return resource. It will be checked
with devm_ioremap_resource()
changes since v1:
- remove NULL dereference on resource_size()
Varka Bhadram (6):
ethernet: wiznet: remove unnecessary check
ethernet: wiznet: remove unnecessary check
ethernet: apm: xgene: remove unnecessary check
ethernet: marvell: remove unnecessary check
ethernet: renesas: remove unnecessary check
ethernet: samsung: sxgbe: remove unnecessary check
drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 12 ------------
drivers/net/ethernet/marvell/pxa168_eth.c | 6 ++----
drivers/net/ethernet/renesas/sh_eth.c | 9 +++------
.../net/ethernet/samsung/sxgbe/sxgbe_platform.c | 3 ---
drivers/net/ethernet/wiznet/w5100.c | 6 ++----
drivers/net/ethernet/wiznet/w5300.c | 6 ++----
6 files changed, 9 insertions(+), 33 deletions(-)
--
1.7.9.5
^ permalink raw reply
* Re: irq disable in __netdev_alloc_frag() ?
From: Eric Dumazet @ 2014-10-23 1:52 UTC (permalink / raw)
To: Alexei Starovoitov; +Cc: Eric Dumazet, Network Development
In-Reply-To: <CAMEtUuwsUqd-U8ZSEXCB+a7cvLpbuRjPU2m0Ux84q6cxoWSx+g@mail.gmail.com>
On Wed, 2014-10-22 at 17:15 -0700, Alexei Starovoitov wrote:
> Hi Eric,
>
> in the commit 6f532612cc24 ("net: introduce netdev_alloc_frag()")
> you mentioned that the reason to disable interrupts
> in __netdev_alloc_frag() is:
> "- Must be IRQ safe (non NAPI drivers can use it)"
>
> Is there a way to do this conditionally?
>
> Without it I see 10% performance gain for my RX tests
> (from 6.9Mpps to 7.7Mpps) and __netdev_alloc_frag()
> itself goes from 6.6% to 2.1%
> (popf seems to be quite costly)
Well, your driver is probably a NAPI one, so you need to
mask irqs, or to remove all non NAPI drivers from linux.
__netdev_alloc_frag() (__netdev_alloc_skb()) is used by all.
Problem is __netdev_alloc_frag() is generally deep inside caller
chain, so using a private pool might have quite an overhead.
Same could be said for skb_queue_head() /skb_queue_tail() /
sock_queue_rcv_skb() :
Many callers don't need to block irq.
^ permalink raw reply
* Re: [RFC] tcp md5 use of alloc_percpu
From: Eric Dumazet @ 2014-10-23 1:47 UTC (permalink / raw)
To: Crestez Dan Leonard; +Cc: Jonathan Toppins, netdev
In-Reply-To: <54485337.5040108@gmail.com>
On Thu, 2014-10-23 at 04:00 +0300, Crestez Dan Leonard wrote:
> On 10/23/2014 02:38 AM, Jonathan Toppins wrote:
> > On 10/22/14, 2:55 PM, Crestez Dan Leonard wrote:
> >> sg_init_one does virt_addr on the pointer which assumes it is directly accessible. But the tcp_md5sig_pool pointer comes from alloc_percpu which can return memory from the vmalloc area after the pcpu_first_chunk is exhausted. This looks wrong to me. I'm am getting crashes on mips and I believe this to be the cause.
> >
> > Thinking about this more if the issue really is sg_init_one assumes a
> > directly accessible memory region, can we just modify the zone
> > allocation to GFP_DMA using alloc_percpu_gfp()? Does this satisfy the
> > assumptions made by sg_init_one?
> I don't think that alloc_percpu_gfp can be used that way. Looking at the
> code it only checks for GFP_KERNEL and behaves "atomically" if it is not
> present. This means that it fails rather than vmalloc a new percpu_chunk.
>
> The problem is not that the memory is not allocated with GFP_DMA but
> rather that the memory is allocated with vmalloc.
Could you try the following patch ?
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1bec4e76d88c..d253ad8ced64 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2868,30 +2868,29 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
+static bool tcp_md5sig_pool_populated = false;
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
+static void tcp_free_md5sig_pool(void)
{
int cpu;
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
+ struct crypto_hash *hash;
+
+ hash = per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm;
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
+ if (hash) {
+ crypto_free_hash(hash);
+ per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = NULL;
+ }
}
- free_percpu(pool);
}
static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
- struct tcp_md5sig_pool __percpu *pool;
-
- pool = alloc_percpu(struct tcp_md5sig_pool);
- if (!pool)
- return;
for_each_possible_cpu(cpu) {
struct crypto_hash *hash;
@@ -2900,29 +2899,29 @@ static void __tcp_alloc_md5sig_pool(void)
if (IS_ERR_OR_NULL(hash))
goto out_free;
- per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
+ per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
}
- /* before setting tcp_md5sig_pool, we must commit all writes
- * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool = pool;
+ tcp_md5sig_pool_populated = true;
return;
out_free:
- __tcp_free_md5sig_pool(pool);
+ tcp_free_md5sig_pool();
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool)) {
+ if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool)
+ if (!tcp_md5sig_pool_populated)
__tcp_alloc_md5sig_pool();
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool != NULL;
+ return tcp_md5sig_pool_populated;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -2936,13 +2935,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *p;
-
local_bh_disable();
- p = ACCESS_ONCE(tcp_md5sig_pool);
- if (p)
- return raw_cpu_ptr(p);
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
local_bh_enable();
return NULL;
}
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox