* [PATCH v2 11/15] net: dsa/mv88e6352: Implement EEPROM access functions
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
MV88E6352 supports read and write access to its configuration eeprom.
There is no means to detect if an EEPROM is connected to the switch.
Also, the switch supports EEPROMs with different sizes, but can not detect
or report the type or size of connected EEPROMs. Therefore, do not implement
the get_eeprom_len callback but depend on platform or devicetree data to
provide information about EEPROM presence and size.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- EEPROM length must now be provided through platform or devicetree data
drivers/net/dsa/mv88e6352.c | 222 +++++++++++++++++++++++++++++++++++++++++++-
drivers/net/dsa/mv88e6xxx.h | 5 +
2 files changed, 224 insertions(+), 3 deletions(-)
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 744e6fa..8a956f9 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -22,18 +22,18 @@
#include <net/dsa.h>
#include "mv88e6xxx.h"
-static int mv88e6352_phy_wait(struct dsa_switch *ds)
+static int mv88e6352_wait(struct dsa_switch *ds, int reg, u16 mask)
{
unsigned long timeout = jiffies + HZ / 10;
while (time_before(jiffies, timeout)) {
int ret;
- ret = REG_READ(REG_GLOBAL2, 0x18);
+ ret = REG_READ(REG_GLOBAL2, reg);
if (ret < 0)
return ret;
- if (!(ret & 0x8000))
+ if (!(ret & mask))
return 0;
usleep_range(1000, 2000);
@@ -41,6 +41,21 @@ static int mv88e6352_phy_wait(struct dsa_switch *ds)
return -ETIMEDOUT;
}
+static inline int mv88e6352_phy_wait(struct dsa_switch *ds)
+{
+ return mv88e6352_wait(ds, 0x18, 0x8000);
+}
+
+static inline int mv88e6352_eeprom_load_wait(struct dsa_switch *ds)
+{
+ return mv88e6352_wait(ds, 0x14, 0x0800);
+}
+
+static inline int mv88e6352_eeprom_busy_wait(struct dsa_switch *ds)
+{
+ return mv88e6352_wait(ds, 0x14, 0x8000);
+}
+
static int __mv88e6352_phy_read(struct dsa_switch *ds, int addr, int regnum)
{
int ret;
@@ -429,6 +444,7 @@ static int mv88e6352_setup(struct dsa_switch *ds)
mutex_init(&ps->smi_mutex);
mutex_init(&ps->stats_mutex);
mutex_init(&ps->phy_mutex);
+ mutex_init(&ps->eeprom_mutex);
ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
@@ -525,6 +541,204 @@ static struct mv88e6xxx_hw_stat mv88e6352_hw_stats[] = {
{ "hist_1024_max_bytes", 4, 0x0d, },
};
+static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->eeprom_mutex);
+
+ ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, 0x14,
+ 0xc000 | (addr & 0xff));
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6352_eeprom_busy_wait(ds);
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2, 0x15);
+error:
+ mutex_unlock(&ps->eeprom_mutex);
+ return ret;
+}
+
+static int mv88e6352_get_eeprom(struct dsa_switch *ds,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ int offset;
+ int len;
+ int ret;
+
+ offset = eeprom->offset;
+ len = eeprom->len;
+ eeprom->len = 0;
+
+ eeprom->magic = 0xc3ec4951;
+
+ ret = mv88e6352_eeprom_load_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ if (offset & 1) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ *data++ = (word >> 8) & 0xff;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ while (len >= 2) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ *data++ = word & 0xff;
+ *data++ = (word >> 8) & 0xff;
+
+ offset += 2;
+ len -= 2;
+ eeprom->len += 2;
+ }
+
+ if (len) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ *data++ = word & 0xff;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ return 0;
+}
+
+static int mv88e6352_eeprom_is_readonly(struct dsa_switch *ds)
+{
+ int ret;
+
+ ret = mv88e6xxx_reg_read(ds, REG_GLOBAL2, 0x14);
+ if (ret < 0)
+ return ret;
+
+ if (!(ret & 0x0400))
+ return -EROFS;
+
+ return 0;
+}
+
+static int mv88e6352_write_eeprom_word(struct dsa_switch *ds, int addr,
+ u16 data)
+{
+ struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+ int ret;
+
+ mutex_lock(&ps->eeprom_mutex);
+
+ ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, 0x15, data);
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6xxx_reg_write(ds, REG_GLOBAL2, 0x14,
+ 0xb000 | (addr & 0xff));
+ if (ret < 0)
+ goto error;
+
+ ret = mv88e6352_eeprom_busy_wait(ds);
+error:
+ mutex_unlock(&ps->eeprom_mutex);
+ return ret;
+}
+
+static int mv88e6352_set_eeprom(struct dsa_switch *ds,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ int offset;
+ int ret;
+ int len;
+
+ if (eeprom->magic != 0xc3ec4951)
+ return -EINVAL;
+
+ ret = mv88e6352_eeprom_is_readonly(ds);
+ if (ret)
+ return ret;
+
+ offset = eeprom->offset;
+ len = eeprom->len;
+ eeprom->len = 0;
+
+ ret = mv88e6352_eeprom_load_wait(ds);
+ if (ret < 0)
+ return ret;
+
+ if (offset & 1) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ word = (*data++ << 8) | (word & 0xff);
+
+ ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word);
+ if (ret < 0)
+ return ret;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ while (len >= 2) {
+ int word;
+
+ word = *data++;
+ word |= *data++ << 8;
+
+ ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word);
+ if (ret < 0)
+ return ret;
+
+ offset += 2;
+ len -= 2;
+ eeprom->len += 2;
+ }
+
+ if (len) {
+ int word;
+
+ word = mv88e6352_read_eeprom_word(ds, offset >> 1);
+ if (word < 0)
+ return word;
+
+ word = (word & 0xff00) | *data++;
+
+ ret = mv88e6352_write_eeprom_word(ds, offset >> 1, word);
+ if (ret < 0)
+ return ret;
+
+ offset++;
+ len--;
+ eeprom->len++;
+ }
+
+ return 0;
+}
+
static void
mv88e6352_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
{
@@ -562,6 +776,8 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.set_temp_limit = mv88e6352_set_temp_limit,
.get_temp_alarm = mv88e6352_get_temp_alarm,
#endif
+ .get_eeprom = mv88e6352_get_eeprom,
+ .set_eeprom = mv88e6352_set_eeprom,
};
MODULE_ALIAS("platform:mv88e6352");
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index c0ce133..29feed0 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -43,6 +43,11 @@ struct mv88e6xxx_priv_state {
*/
struct mutex phy_mutex;
+ /* This mutex serializes eeprom access for chips with
+ * eeprom support.
+ */
+ struct mutex eeprom_mutex;
+
int id; /* switch product id */
};
--
1.9.1
^ permalink raw reply related
* [PATCH v2 12/15] net: dsa: Add support for reading switch registers with ethtool
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
Add support for reading switch registers with 'ethtool -d'.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- Do not compare new function pointers against NULL
- Check if get_regs is set before calling it
include/net/dsa.h | 7 +++++++
net/dsa/slave.c | 23 +++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 37856a2..ed3c34b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -268,6 +268,13 @@ struct dsa_switch_driver {
struct ethtool_eeprom *eeprom, u8 *data);
int (*set_eeprom)(struct dsa_switch *ds,
struct ethtool_eeprom *eeprom, u8 *data);
+
+ /*
+ * Register access.
+ */
+ int (*get_regs_len)(struct dsa_switch *ds, int port);
+ void (*get_regs)(struct dsa_switch *ds, int port,
+ struct ethtool_regs *regs, void *p);
};
void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ff2fbe7..474f296 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -249,6 +249,27 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
+static int dsa_slave_get_regs_len(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_regs_len)
+ return ds->drv->get_regs_len(ds, p->port);
+
+ return -EOPNOTSUPP;
+}
+
+static void
+dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_regs)
+ ds->drv->get_regs(ds, p->port, regs, _p);
+}
+
static int dsa_slave_nway_reset(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -423,6 +444,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
.get_drvinfo = dsa_slave_get_drvinfo,
+ .get_regs_len = dsa_slave_get_regs_len,
+ .get_regs = dsa_slave_get_regs,
.nway_reset = dsa_slave_nway_reset,
.get_link = dsa_slave_get_link,
.get_eeprom_len = dsa_slave_get_eeprom_len,
--
1.9.1
^ permalink raw reply related
* [PATCH v2 13/15] net: dsa/mv88e6123_61_65: Add support for reading switch registers
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
The infrastructure can now report switch registers to ethtool.
Add support for it to the mv88e6123_61_65 driver.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- No change
drivers/net/dsa/mv88e6123_61_65.c | 2 ++
drivers/net/dsa/mv88e6xxx.c | 24 ++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx.h | 3 +++
3 files changed, 29 insertions(+)
diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c
index 9f43c9b..e2d2e37 100644
--- a/drivers/net/dsa/mv88e6123_61_65.c
+++ b/drivers/net/dsa/mv88e6123_61_65.c
@@ -470,6 +470,8 @@ struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
#ifdef CONFIG_NET_DSA_HWMON
.get_temp = mv88e6123_61_65_get_temp,
#endif
+ .get_regs_len = mv88e6xxx_get_regs_len,
+ .get_regs = mv88e6xxx_get_regs,
};
MODULE_ALIAS("platform:mv88e6123");
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 8e1090b..c071fde 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -499,6 +499,30 @@ void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
mutex_unlock(&ps->stats_mutex);
}
+int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
+{
+ return 32 * sizeof(u16);
+}
+
+void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
+ struct ethtool_regs *regs, void *_p)
+{
+ u16 *p = _p;
+ int i;
+
+ regs->version = 0;
+
+ memset(p, 0xff, 32 * sizeof(u16));
+
+ for (i = 0; i < 32; i++) {
+ int ret;
+
+ ret = mv88e6xxx_reg_read(ds, REG_PORT(port), i);
+ if (ret >= 0)
+ p[i] = ret;
+ }
+}
+
static int __init mv88e6xxx_init(void)
{
#if IS_ENABLED(CONFIG_NET_DSA_MV88E6131)
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 29feed0..a0780b0 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -78,6 +78,9 @@ void mv88e6xxx_get_strings(struct dsa_switch *ds,
void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
int nr_stats, struct mv88e6xxx_hw_stat *stats,
int port, uint64_t *data);
+int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port);
+void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
+ struct ethtool_regs *regs, void *_p);
extern struct dsa_switch_driver mv88e6131_switch_driver;
extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
--
1.9.1
^ permalink raw reply related
* [PATCH v2 15/15] net: dsa: Provide additional RMON statistics
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
Display sw_in_discards, sw_in_filtered, and sw_out_filtered for chips
supported by mv88e6123_61_65 and mv88e6352 drivers.
The variables are provided in port registers, not the normal status registers.
Mark by adding 0x100 to the register offset and add special handling code
to mv88e6xxx_get_ethtool_stats.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- No change
drivers/net/dsa/mv88e6123_61_65.c | 3 +++
drivers/net/dsa/mv88e6352.c | 3 +++
drivers/net/dsa/mv88e6xxx.c | 26 +++++++++++++++++++++-----
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c
index e2d2e37..9a3f9e0 100644
--- a/drivers/net/dsa/mv88e6123_61_65.c
+++ b/drivers/net/dsa/mv88e6123_61_65.c
@@ -433,6 +433,9 @@ static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
{ "hist_256_511bytes", 4, 0x0b, },
{ "hist_512_1023bytes", 4, 0x0c, },
{ "hist_1024_max_bytes", 4, 0x0d, },
+ { "sw_in_discards", 4, 0x110, },
+ { "sw_in_filtered", 2, 0x112, },
+ { "sw_out_filtered", 2, 0x113, },
};
static void
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index d5bbe49..258d9ef 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -539,6 +539,9 @@ static struct mv88e6xxx_hw_stat mv88e6352_hw_stats[] = {
{ "hist_256_511bytes", 4, 0x0b, },
{ "hist_512_1023bytes", 4, 0x0c, },
{ "hist_1024_max_bytes", 4, 0x0d, },
+ { "sw_in_discards", 4, 0x110, },
+ { "sw_in_filtered", 2, 0x112, },
+ { "sw_out_filtered", 2, 0x113, },
};
static int mv88e6352_read_eeprom_word(struct dsa_switch *ds, int addr)
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index c071fde..da558d8 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -485,17 +485,33 @@ void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
for (i = 0; i < nr_stats; i++) {
struct mv88e6xxx_hw_stat *s = stats + i;
u32 low;
- u32 high;
-
+ u32 high = 0;
+
+ if (s->reg >= 0x100) {
+ int ret;
+
+ ret = mv88e6xxx_reg_read(ds, REG_PORT(port),
+ s->reg - 0x100);
+ if (ret < 0)
+ goto error;
+ low = ret;
+ if (s->sizeof_stat == 4) {
+ ret = mv88e6xxx_reg_read(ds, REG_PORT(port),
+ s->reg - 0x100 + 1);
+ if (ret < 0)
+ goto error;
+ high = ret;
+ }
+ data[i] = (((u64)high) << 16) | low;
+ continue;
+ }
mv88e6xxx_stats_read(ds, s->reg, &low);
if (s->sizeof_stat == 8)
mv88e6xxx_stats_read(ds, s->reg + 1, &high);
- else
- high = 0;
data[i] = (((u64)high) << 32) | low;
}
-
+error:
mutex_unlock(&ps->stats_mutex);
}
--
1.9.1
^ permalink raw reply related
* [PATCH v2 00/15] net: dsa: Fixes and enhancements
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
Patch 01/15 addresses an annoying and unhelpful log message.
Patches 02/15 and 03/15 are minor enhancements, adding support for
known switch revisions.
Patches 04/15 and 05/15 add support for MV88E6352 and MV88E6176.
Patch 06/15 adds support for hardware monitoring, specifically for
reporting the chip temperature, to the dsa subsystem.
Patches 07/15 and 08/15 implement hardware monitoring for MV88E6352,
MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
Patch 09/15 and 10/15 add support for EEPROM access to the DSA subsystem.
Patch 11/15 implements EEPROM access for MV88E6352 and MV88E6176.
Patch 12/15 adds support for reading switch registers to the DSA
subsystem.
Patches 13/15 amd 14/15 implement support for reading switch registers
to the drivers for MV88E6352, MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
Patch 15/15 adds support for reading additional RMON registers to the drivers
for MV88E6352, MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
The series resides and was tested on top of v3.18-rc1 in a system
with MV88E6352. Testing in systems with 88E6131, 88E6060 and MV88E6165
was done earlier (I don't have access to those systems right now).
The series applies cleanly to net-next as of today (10/26).
v2:
- Made reporting chip temperatures through the hwmon subsystem optional
with new Kconfig option
- Changed the hwmon chip name to <network device name>_dsa<index>
- Made EEPROM presence and size configurable through platform and devicetree
data
- Various minor changes and fixes (see individual patches for details)
----------------------------------------------------------------
Guenter Roeck (15):
net: dsa: Don't set skb->protocol on outgoing tagged packets
net: dsa: Report known silicon revisions for Marvell 88E6060
net: dsa: Report known silicon revisions for Marvell 88E6131
net: dsa: Add support for Marvell 88E6352
net: dsa/mv88e6352: Add support for MV88E6176
net: dsa: Add support for reporting switch chip temperatures
net: dsa/mv88e6352: Report chip temperature
net: dsa/mv88e6123_61_65: Report chip temperature
net: dsa: Add support for switch EEPROM access
dsa: Add new optional devicetree property to describe EEPROM size
net: dsa/mv88e6352: Implement EEPROM access functions
net: dsa: Add support for reading switch registers with ethtool
net: dsa/mv88e6123_61_65: Add support for reading switch registers
net: dsa/mv88e6352: Add support for reading switch registers
net: dsa: Provide additional RMON statistics
Documentation/devicetree/bindings/net/dsa/dsa.txt | 6 +-
MAINTAINERS | 5 +
drivers/net/dsa/Kconfig | 9 +
drivers/net/dsa/Makefile | 3 +
drivers/net/dsa/mv88e6060.c | 5 +-
drivers/net/dsa/mv88e6123_61_65.c | 73 +-
drivers/net/dsa/mv88e6131.c | 12 +-
drivers/net/dsa/mv88e6352.c | 788 ++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx.c | 53 +-
drivers/net/dsa/mv88e6xxx.h | 15 +
include/net/dsa.h | 33 +
net/dsa/Kconfig | 11 +
net/dsa/dsa.c | 135 ++++
net/dsa/slave.c | 64 ++
net/dsa/tag_dsa.c | 2 -
net/dsa/tag_edsa.c | 2 -
net/dsa/tag_trailer.c | 2 -
17 files changed, 1199 insertions(+), 19 deletions(-)
create mode 100644 drivers/net/dsa/mv88e6352.c
^ permalink raw reply
* [PATCH v2 01/15] net: dsa: Don't set skb->protocol on outgoing tagged packets
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
Setting skb->protocol to a private protocol type may result in warning
messages such as
e1000e 0000:00:19.0 em1: checksum_partial proto=dada!
This happens if the L3 protocol is IP or IPv6 and skb->ip_summed is set
to CHECKSUM_PARTIAL. Looking through the code, it appears that changing
skb->protocol for transmitted packets is not necessary and may actually
be harmful. Drop it.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- No change
net/dsa/tag_dsa.c | 2 --
net/dsa/tag_edsa.c | 2 --
net/dsa/tag_trailer.c | 2 --
3 files changed, 6 deletions(-)
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index ce90c8b..2dab270 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -63,8 +63,6 @@ static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
dsa_header[3] = 0x00;
}
- skb->protocol = htons(ETH_P_DSA);
-
skb->dev = p->parent->dst->master_netdev;
dev_queue_xmit(skb);
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 94fcce7..9aeda59 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -76,8 +76,6 @@ static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[7] = 0x00;
}
- skb->protocol = htons(ETH_P_EDSA);
-
skb->dev = p->parent->dst->master_netdev;
dev_queue_xmit(skb);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 115fdca..e268f9d 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -57,8 +57,6 @@ static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
trailer[2] = 0x10;
trailer[3] = 0x00;
- nskb->protocol = htons(ETH_P_TRAILER);
-
nskb->dev = p->parent->dst->master_netdev;
dev_queue_xmit(nskb);
--
1.9.1
^ permalink raw reply related
* [PATCH v2 02/15] net: dsa: Report known silicon revisions for Marvell 88E6060
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
Report known silicon revisions when probing Marvell 88E6060 switches.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- No change
drivers/net/dsa/mv88e6060.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 05b0ca3..c29aebe 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -69,8 +69,11 @@ static char *mv88e6060_probe(struct device *host_dev, int sw_addr)
ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
if (ret >= 0) {
- ret &= 0xfff0;
if (ret == 0x0600)
+ return "Marvell 88E6060 (A0)";
+ if (ret == 0x0601 || ret == 0x0602)
+ return "Marvell 88E6060 (B0)";
+ if ((ret & 0xfff0) == 0x0600)
return "Marvell 88E6060";
}
--
1.9.1
^ permalink raw reply related
* [PATCH v2 03/15] net: dsa: Report known silicon revisions for Marvell 88E6131
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
Report known silicon revisions when probing Marvell 88E6131 switches.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- No change
drivers/net/dsa/mv88e6131.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c
index 244c735..1230f52 100644
--- a/drivers/net/dsa/mv88e6131.c
+++ b/drivers/net/dsa/mv88e6131.c
@@ -21,6 +21,7 @@
#define ID_6085 0x04a0
#define ID_6095 0x0950
#define ID_6131 0x1060
+#define ID_6131_B2 0x1066
static char *mv88e6131_probe(struct device *host_dev, int sw_addr)
{
@@ -32,12 +33,15 @@ static char *mv88e6131_probe(struct device *host_dev, int sw_addr)
ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
if (ret >= 0) {
- ret &= 0xfff0;
- if (ret == ID_6085)
+ int ret_masked = ret & 0xfff0;
+
+ if (ret_masked == ID_6085)
return "Marvell 88E6085";
- if (ret == ID_6095)
+ if (ret_masked == ID_6095)
return "Marvell 88E6095/88E6095F";
- if (ret == ID_6131)
+ if (ret == ID_6131_B2)
+ return "Marvell 88E6131 (B2)";
+ if (ret_masked == ID_6131)
return "Marvell 88E6131";
}
--
1.9.1
^ permalink raw reply related
* [PATCH v2 14/15] net: dsa/mv88e6352: Add support for reading switch registers
From: Guenter Roeck @ 2014-10-26 16:52 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
Guenter Roeck
In-Reply-To: <1414342365-27191-1-git-send-email-linux@roeck-us.net>
Report switch register values to ethtool.
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
v2:
- No change
drivers/net/dsa/mv88e6352.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 8a956f9..d5bbe49 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -778,6 +778,8 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
#endif
.get_eeprom = mv88e6352_get_eeprom,
.set_eeprom = mv88e6352_set_eeprom,
+ .get_regs_len = mv88e6xxx_get_regs_len,
+ .get_regs = mv88e6xxx_get_regs,
};
MODULE_ALIAS("platform:mv88e6352");
--
1.9.1
^ permalink raw reply related
* [PATCH] PPC: bpf_jit_comp: add SKF_AD_PKTTYPE instruction
From: Denis Kirjanov @ 2014-10-26 19:23 UTC (permalink / raw)
To: netdev; +Cc: linuxppc-dev, Denis Kirjanov, Matt Evans
Cc: Matt Evans <matt@ozlabs.org>
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
---
arch/powerpc/include/asm/ppc-opcode.h | 1 +
arch/powerpc/net/bpf_jit.h | 7 +++++++
arch/powerpc/net/bpf_jit_comp.c | 5 +++++
3 files changed, 13 insertions(+)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 6f85362..1a52877 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -204,6 +204,7 @@
#define PPC_INST_ERATSX_DOT 0x7c000127
/* Misc instructions for BPF compiler */
+#define PPC_INST_LBZ 0x88000000
#define PPC_INST_LD 0xe8000000
#define PPC_INST_LHZ 0xa0000000
#define PPC_INST_LHBRX 0x7c00062c
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 9aee27c..c406aa9 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -87,6 +87,9 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \
___PPC_RA(base) | ((i) & 0xfffc))
+
+#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \
+ ___PPC_RA(base) | IMM_L(i))
#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \
___PPC_RA(base) | IMM_L(i))
#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \
@@ -96,6 +99,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \
___PPC_RA(base) | ___PPC_RB(b))
/* Convenience helpers for the above with 'far' offsets: */
+#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \
+ else { PPC_ADDIS(r, base, IMM_HA(i)); \
+ PPC_LBZ(r, r, IMM_L(i)); } } while(0)
+
#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \
else { PPC_ADDIS(r, base, IMM_HA(i)); \
PPC_LD(r, r, IMM_L(i)); } } while(0)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index cbae2df..d110e28 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -407,6 +407,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
queue_mapping));
break;
+ case BPF_ANC | SKF_AD_PKTTYPE:
+ PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
+ PPC_ANDI(r_A, r_A, PKT_TYPE_MAX);
+ PPC_SRWI(r_A, r_A, 5);
+ break;
case BPF_ANC | SKF_AD_CPU:
#ifdef CONFIG_SMP
/*
--
2.1.0
^ permalink raw reply related
* Re: some failures with vxlan offloads..
From: Or Gerlitz @ 2014-10-26 22:23 UTC (permalink / raw)
To: Tom Herbert
Cc: Or Gerlitz, netdev@vger.kernel.org, John Fastabend, Jeff Kirsher
In-Reply-To: <CA+mtBx-Jp5Kps5WycYwN0PuyXLDtGM0s8USxdQoSLPRDVS19xw@mail.gmail.com>
On Sun, Oct 26, 2014 at 5:29 PM, Tom Herbert <therbert@google.com> wrote:
> On Sun, Oct 26, 2014 at 6:36 AM, Or Gerlitz <ogerlitz@mellanox.com> wrote:
>> In the cases where it breaks I can see
>> UDP: bad checksum. From 192.168.31.18:54748 to 192.168.31.17:4789
>> ulen 726
>> prints from __udp4_lib_rcv() in the kernel log of the node where offloads
>> are OFF, where the bad packet is sent from the host where offloading is
>> enabled. I guess the packet is just dropped:
> Can you determine what the TSO HW engine is setting in UDP checksum
> field? tcpdump -vv might be able to show this. The symptoms seem to
> indicate that it may not be zero.
Thanks for the quick response. I'll check what is placed in the UDP
checksum field for packets that went through the offloading HW and let
you know.
BTW, if following the direction you proposed, I wonder why this works
(e.g the kernel doesn't drops the encapsulated TCP packets) when both
sides are offloaded?
Tomorrow (Monday) I am OOO so will be able to do these further tests Tuesday.
Or.
^ permalink raw reply
* Re: [PATCH] ipv6: notify userspace when we added or changed an ipv6 token
From: Lubomir Rintel @ 2014-10-26 22:28 UTC (permalink / raw)
To: Daniel Borkmann
Cc: netdev, linux-kernel, David S. Miller, Hannes Frederic Sowa
In-Reply-To: <543B9F5A.8070909@redhat.com>
On Mon, 2014-10-13 at 11:46 +0200, Daniel Borkmann wrote:
> On 10/10/2014 04:08 PM, Lubomir Rintel wrote:
> > NetworkManager might want to know that it changed when the router advertisement
> > arrives.
> >
> > Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
> > Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> > Cc: Daniel Borkmann <dborkman@redhat.com>
> > ---
> > net/ipv6/addrconf.c | 1 +
> > 1 file changed, 1 insertion(+)
> >
> > diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> > index 3e118df..3d11390 100644
> > --- a/net/ipv6/addrconf.c
> > +++ b/net/ipv6/addrconf.c
> > @@ -4528,6 +4528,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
> > }
> >
> > write_unlock_bh(&idev->lock);
> > + netdev_state_change(dev);
>
> I'm wondering why netdev_state_change()? You are probably
> only after the netlink notification that is being invoked,
> i.e. rtmsg_ifinfo(RTM_NEWLINK, ...), and don't strictly want
> to call the device notifier chain.
Correct. I'll change it to just rtmsg_ifinfo().
> Perhaps it might be better to define a new RTM_SETTOKEN, and
> just call inet6_ifinfo_notify(RTM_SETTOKEN, idev) as this is
> only idev specific anyway?
I'm not really sure as that would require more userspace changes than
necessary.
> > addrconf_verify_rtnl();
> > return 0;
> > }
> >
^ permalink raw reply
* [PATCH v2] ipv6: notify userspace when we added or changed an ipv6 token
From: Lubomir Rintel @ 2014-10-26 22:41 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Lubomir Rintel, Hannes Frederic Sowa,
Daniel Borkmann
In-Reply-To: <1412950112-15593-1-git-send-email-lkundrak@v3.sk>
NetworkManager might want to know that it changed when the router advertisement
arrives.
Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Daniel Borkmann <dborkman@redhat.com>
---
Changes since v1:
- Do not call device notifier chain with netdev_state_change()
net/ipv6/addrconf.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3e118df..f6f92f5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4528,6 +4528,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
}
write_unlock_bh(&idev->lock);
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
addrconf_verify_rtnl();
return 0;
}
--
1.9.3
^ permalink raw reply related
* Re: [PATCH v2] ipv6: notify userspace when we added or changed an ipv6 token
From: Daniel Borkmann @ 2014-10-26 23:22 UTC (permalink / raw)
To: Lubomir Rintel; +Cc: netdev, David S. Miller, Hannes Frederic Sowa
In-Reply-To: <1414363283-31410-1-git-send-email-lkundrak@v3.sk>
On 10/26/2014 11:41 PM, Lubomir Rintel wrote:
> NetworkManager might want to know that it changed when the router advertisement
> arrives.
>
> Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Cc: Daniel Borkmann <dborkman@redhat.com>
The reason why I asked regarding the rtmsg_ifinfo() vs inet6_ifinfo_notify()
in v1 is actually that this is an idev-only specific action. By using
inet6_ifinfo_notify() for notification, the kernel would actually need to do
much less work: the notification only needs inet6_fill_ifinfo() as that would
contain the new token, while the rtmsg_ifinfo() is rather dev-centric and
fills out attributes about the _whole_ device (which surely includes the IPv6
idev attributes, but also a lot more, which might actually be unnecessary
here), see also:
$ git grep -n rtmsg_ifinfo net/ipv6/
$ git grep -n inet6_ifinfo_notify net/ipv6/
net/ipv6/addrconf.c:2919: inet6_ifinfo_notify(RTM_NEWLINK, idev);
net/ipv6/addrconf.c:4650:void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
net/ipv6/ndisc.c:1239: inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
net/ipv6/ndisc.c:1256: inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
net/ipv6/ndisc.c:1712: inet6_ifinfo_notify(RTM_NEWLINK, idev);
Maybe I'm missing something, so can you elaborate why it's _absolutely not_
possible to use inet6_ifinfo_notify()?
> ---
> Changes since v1:
> - Do not call device notifier chain with netdev_state_change()
>
> net/ipv6/addrconf.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 3e118df..f6f92f5 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -4528,6 +4528,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
> }
>
> write_unlock_bh(&idev->lock);
> + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
> addrconf_verify_rtnl();
> return 0;
> }
>
^ permalink raw reply
* Re: Fw: [Bug 86851] New: Reproducible panic on heavy UDP traffic
From: Nikolay Aleksandrov @ 2014-10-26 23:28 UTC (permalink / raw)
To: Florian Westphal, Stephen Hemminger; +Cc: netdev
In-Reply-To: <20141025214448.GB28407@breakpoint.cc>
On 10/25/2014 11:44 PM, Florian Westphal wrote:
> Stephen Hemminger <stephen@networkplumber.org> wrote:
>
> [ CC Nik ]
>
>> Date: Fri, 24 Oct 2014 11:34:08 -0700
>> From: "bugzilla-daemon@bugzilla.kernel.org" <bugzilla-daemon@bugzilla.kernel.org>
>> To: "stephen@networkplumber.org" <stephen@networkplumber.org>
>> Subject: [Bug 86851] New: Reproducible panic on heavy UDP traffic
>>
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=86851
>>
>> Bug ID: 86851
>> Summary: Reproducible panic on heavy UDP traffic
>> Product: Networking
>> Version: 2.5
>> Kernel Version: 3.18-rc1
>> Hardware: x86-64
>> OS: Linux
>> Tree: Mainline
>> Status: NEW
>> Severity: normal
>> Priority: P1
>> Component: IPV4
>> Assignee: shemminger@linux-foundation.org
>> Reporter: chutzpah@gentoo.org
>> Regression: No
>>
>> Created attachment 154861
>> --> https://bugzilla.kernel.org/attachment.cgi?id=154861&action=edit
>> Panic message captured over serial console
>
> general protection fault: 0000 [#1] SMP
> Modules linked in: nfs [..]
> CPU: 7 PID: 257 Comm: kworker/7:1 Tainted: G W 3.18.0-rc1-base-7+ #2
>
> asked reporter to check if there is a warning before the oops.
>
> Hardware name: Intel Corporation S2600GZ/S2600GZ, BIOS SE5C600.86B.02.03.0003.041920141333 04/19/2014
> Workqueue: events inet_frag_worker
> task: ffff882fd32e70e0 ti: ffff882fd0adc000 task.ti: ffff882fd0adc000
> RIP: 0010:[<ffffffff81592ab4>] [<ffffffff81592ab4>] inet_evict_bucket+0xf4/0x180
> RSP: 0018:ffff882fd0adfd58 EFLAGS: 00010286
> RAX: ffff8817c7230701 RBX: dead000000100100 RCX: 0000000180300013
>
> Hello LIST_POISON!
>
> RDX: 0000000180300014 RSI: 0000000000000001 RDI: dead0000001000c0
> RBP: 0000000000000002 R08: 0000000000000202 R09: ffff88303fc39ab0
> R10: ffffffff81592ac0 R11: ffffea005f1c8c00 R12: ffffffff81aa2820
> R13: ffff882fd0adfd70 R14: ffff8817c72307e0 R15: 0000000000000000
> FS: 0000000000000000(0000) GS:ffff88303fc20000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> device rack0a left promiscuous mode
> CR2: 00007f054c7ba034 CR3: 0000002fc4986000 CR4: 00000000001407e0
> Stack:
> ffffffff81aa3298 ffffffff81aa3290 ffff8817d0820a08 0000000000000000
> 0000000000000000 00000000000000a8 0000000000000008 ffff88303fc32780
> ffffffff81aa6820 0000000000000059[ 2415.026338] device rack1a left promiscuous mode
>
> 0000000000000000 ffffffff81592ba2
> Call Trace:
> [<ffffffff81592ba2>] ? inet_frag_worker+0x62/0x210
> [<ffffffff8112c312>] ? process_one_work+0x132/0x360
> [..]
> crash is in hlist_for_each_entry_safe() at the end of inet_evict_bucket(), looks like
> we encounter an already-list_del'd element while iterating.
>
> Will look at this tomorrow.
>
Thanks for CCing me.
I'll dig in the code tomorrow but my first thought when I saw this was
could it be possible that we have a race condition between
ip_frag_queue() and inet_frag_evict(), more precisely between the
ipq_kill() calls from ip_frag_queue and inet_frag_evict since the frag
could be found before we have entered the evictor which then can add it to
its expire list but the ipq_kill() from ip_frag_queue() can do a list_del
after we release the chain lock in the evictor so we may end up like this ?
^ permalink raw reply
* Re: [PATCH] netlink: don't copy over empty attribute data
From: Sasha Levin @ 2014-10-26 23:32 UTC (permalink / raw)
To: David Miller; +Cc: a.ryabinin, pablo, mschmidt, akpm, linux-kernel, netdev
In-Reply-To: <20141022.021508.2011745433893496421.davem@davemloft.net>
On 10/22/2014 02:15 AM, David Miller wrote:
> From: Sasha Levin <sasha.levin@oracle.com>
> Date: Tue, 21 Oct 2014 22:19:36 -0400
>
>> On 10/21/2014 09:39 PM, David Miller wrote:
>>> From: Sasha Levin <sasha.levin@oracle.com>
>>> Date: Tue, 21 Oct 2014 16:51:09 -0400
>>>
>>>>> netlink uses empty data to seperate different levels. However, we still
>>>>> try to copy that data from a NULL ptr using memcpy, which is an undefined
>>>>> behaviour.
>>>>>
>>>>> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
>>> This isn't a POSIX C library, this it the Linux kernel, and as such
>>> we can make sure none of our memcpy() implementations try to access
>>> any bytes if the given length is NULL.
>>
>> We can make *our* implementations work around that undefined behaviour if we
>> want, but right now our implementations is to call GCC's builtin memcpy(),
>> which follows the standards and doesn't allow you to call it with NULL 'from'
>> ptr.
>>
>> The fact that it doesn't die and behaves properly is just "luck".
>
> If GCC's internal memcpy() starts accessing past 'len', I'm going
> to report the bug rather than code around it.
How so? GCC states clearly that you should *never* pass a NULL pointer there:
"The pointers passed to memmove (and similar functions in <string.h>) must
be non-null even when nbytes==0" (https://gcc.gnu.org/gcc-4.9/porting_to.html).
Even if it doesn't dereference it, it can break somehow in a subtle way. Leaving
the kernel code assuming that gcc (or any other compiler) would always behave
the same in a situation that shouldn't occur.
Thanks,
Sasha
^ permalink raw reply
* [PATCH net] net: phy: Add SGMII Configuration for Marvell 88E1145 Initialization
From: Vince Bridgers @ 2014-10-26 19:22 UTC (permalink / raw)
To: f.fainelli, netdev; +Cc: vbridger, vbridger
Marvell phy 88E1145 configuration & initialization was missing a case
for initializing SGMII mode. This patch adds that case.
Signed-off-by: Vince Bridgers <vbridger@opensource.altera.com>
---
drivers/net/phy/marvell.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index bd37e45..225c033 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -50,10 +50,15 @@
#define MII_M1011_PHY_SCR 0x10
#define MII_M1011_PHY_SCR_AUTO_CROSS 0x0060
+#define MII_M1145_PHY_EXT_SR 0x1b
#define MII_M1145_PHY_EXT_CR 0x14
#define MII_M1145_RGMII_RX_DELAY 0x0080
#define MII_M1145_RGMII_TX_DELAY 0x0002
+#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK 0x4
+#define MII_M1145_HWCFG_MODE_MASK 0xf
+#define MII_M1145_HWCFG_FIBER_COPPER_AUTO 0x8000
+
#define MII_M1111_PHY_LED_CONTROL 0x18
#define MII_M1111_PHY_LED_DIRECT 0x4100
#define MII_M1111_PHY_LED_COMBINE 0x411c
@@ -676,6 +681,20 @@ static int m88e1145_config_init(struct phy_device *phydev)
}
}
+ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+ int temp = phy_read(phydev, MII_M1145_PHY_EXT_SR);
+ if (temp < 0)
+ return temp;
+
+ temp &= ~MII_M1145_HWCFG_MODE_MASK;
+ temp |= MII_M1145_HWCFG_MODE_SGMII_NO_CLK;
+ temp |= MII_M1145_HWCFG_FIBER_COPPER_AUTO;
+
+ err = phy_write(phydev, MII_M1145_PHY_EXT_SR, temp);
+ if (err < 0)
+ return err;
+ }
+
err = marvell_of_reg_init(phydev);
if (err < 0)
return err;
--
1.9.1
^ permalink raw reply related
* Re: Fw: [Bug 86851] New: Reproducible panic on heavy UDP traffic
From: Eric Dumazet @ 2014-10-27 0:47 UTC (permalink / raw)
To: Nikolay Aleksandrov; +Cc: Florian Westphal, Stephen Hemminger, netdev
In-Reply-To: <544D8386.9030609@redhat.com>
On Mon, 2014-10-27 at 00:28 +0100, Nikolay Aleksandrov wrote:
>
> Thanks for CCing me.
> I'll dig in the code tomorrow but my first thought when I saw this was
> could it be possible that we have a race condition between
> ip_frag_queue() and inet_frag_evict(), more precisely between the
> ipq_kill() calls from ip_frag_queue and inet_frag_evict since the frag
> could be found before we have entered the evictor which then can add it to
> its expire list but the ipq_kill() from ip_frag_queue() can do a list_del
> after we release the chain lock in the evictor so we may end up like this ?
Yes, either we use hlist_del_init() but loose poison aid, or test if
frag was evicted :
Not sure about refcount.
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 9eb89f3f0ee4..894ec30c5896 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -285,7 +285,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_bucket *hb;
hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
+ if (!(fq->flags & INET_FRAG_EVICTED))
+ hlist_del(&fq->list);
spin_unlock(&hb->chain_lock);
}
^ permalink raw reply related
* Re: [QA-TCP] How to send tcp small packages immediately?
From: Zhangjie (HZ) @ 2014-10-27 1:08 UTC (permalink / raw)
To: Rick Jones, kvm, Jason Wang, Michael S. Tsirkin, linux-kernel,
netdev, liuyongan, qinchuanyu
In-Reply-To: <544A6E12.2000007@hp.com>
Thanks!
On 2014/10/24 23:19, Rick Jones wrote:
> On 10/24/2014 12:41 AM, Zhangjie (HZ) wrote:
>> Hi,
>>
>> I use netperf to test the performance of small tcp package, with TCP_NODELAY set :
>>
>> netperf -H 129.9.7.164 -l 100 -- -m 512 -D
>>
>> Among the packages I got by tcpdump, there is not only small packages, also lost of
>> big ones (skb->len=65160).
>>
>> IP 129.9.7.186.60840 > 129.9.7.164.34607: tcp 65160
>> IP 129.9.7.164.34607 > 129.9.7.186.60840: tcp 0
>> IP 129.9.7.164.34607 > 129.9.7.186.60840: tcp 0
>> IP 129.9.7.164.34607 > 129.9.7.186.60840: tcp 0
>> IP 129.9.7.186.60840 > 129.9.7.164.34607: tcp 65160
>> IP 129.9.7.164.34607 > 129.9.7.186.60840: tcp 0
>> IP 129.9.7.164.34607 > 129.9.7.186.60840: tcp 0
>> IP 129.9.7.164.34607 > 129.9.7.186.60840: tcp 0
>> IP 129.9.7.186.60840 > 129.9.7.164.34607: tcp 80
>> IP 129.9.7.186.60840 > 129.9.7.164.34607: tcp 512
>> IP 129.9.7.186.60840 > 129.9.7.164.34607: tcp 512
>>
>> SO, how to test small tcp packages? Including TCP_NODELAY, What else should be set?
>
> Well, I don't think there is anything else you can set. Even with TCP_NODELAY set, segment size with TCP will still be controlled by factors such as congestion window.
>
> I am ass-u-me-ing your packet trace is at the sender. I suppose if your sender were fast enough compared to the path that might combine with congestion window to result in the very large segments.
>
> Not to say there cannot be a bug somewhere with TSO overriding TCP_NODELAY, but in broad terms, even TCP_NODELAY does not guarantee small TCP segments. That has been something of a bane on my attempts to use TCP for aggregate small-packet performance measurements via netperf for quite some time.
>
> And since you seem to have included a virtualization mailing list I would also ass-u-me that virtualization is involved somehow. Knuth only knows how that will affect the timing of events, which will be very much involved in matters of congestion window and such. I suppose it is even possible that if the packet trace is on a VM receiver that some delays in getting the VM running could mean that GRO would end-up making large segments being pushed up the stack.
>
> happy benchmarking,
Yes. Using netperf to send tcp packages frome physical nic has the same problems.
Thanks for your explanation!
>
> rick jones
> .
>
--
Best Wishes!
Zhang Jie
^ permalink raw reply
* Re: some failures with vxlan offloads..
From: Tom Herbert @ 2014-10-27 1:23 UTC (permalink / raw)
To: Or Gerlitz
Cc: Or Gerlitz, netdev@vger.kernel.org, John Fastabend, Jeff Kirsher
In-Reply-To: <CAJ3xEMi6UuCT+6iriX02qjS6trhEaoYh39UBL+p_i995MvZc2Q@mail.gmail.com>
On Sun, Oct 26, 2014 at 3:23 PM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
> On Sun, Oct 26, 2014 at 5:29 PM, Tom Herbert <therbert@google.com> wrote:
>> On Sun, Oct 26, 2014 at 6:36 AM, Or Gerlitz <ogerlitz@mellanox.com> wrote:
>
>>> In the cases where it breaks I can see
>>> UDP: bad checksum. From 192.168.31.18:54748 to 192.168.31.17:4789
>>> ulen 726
>>> prints from __udp4_lib_rcv() in the kernel log of the node where offloads
>>> are OFF, where the bad packet is sent from the host where offloading is
>>> enabled. I guess the packet is just dropped:
>
>> Can you determine what the TSO HW engine is setting in UDP checksum
>> field? tcpdump -vv might be able to show this. The symptoms seem to
>> indicate that it may not be zero.
>
> Thanks for the quick response. I'll check what is placed in the UDP
> checksum field for packets that went through the offloading HW and let
> you know.
>
> BTW, if following the direction you proposed, I wonder why this works
> (e.g the kernel doesn't drops the encapsulated TCP packets) when both
> sides are offloaded?
>
I'm just speculating, but the device may be returning checksum
unnecessary for the UDP checksum without actually checking it.
Technically, VXLAN RFC7348 allows an implementation to ignore the UDP
checksum, although this clearly violates RFC1122 UDP checksum
requirements. In the stack we now checksum all non-zero checksums
including UDP checksum in VXLAN if it's not marked
checksum-unnecessary.
Tom
> Tomorrow (Monday) I am OOO so will be able to do these further tests Tuesday.
>
> Or.
^ permalink raw reply
* Re: [PATCH] netlink: don't copy over empty attribute data
From: David Miller @ 2014-10-27 2:03 UTC (permalink / raw)
To: sasha.levin; +Cc: a.ryabinin, pablo, mschmidt, akpm, linux-kernel, netdev
In-Reply-To: <544D849A.4040304@oracle.com>
From: Sasha Levin <sasha.levin@oracle.com>
Date: Sun, 26 Oct 2014 19:32:42 -0400
> How so? GCC states clearly that you should *never* pass a NULL
> pointer there:
>
> "The pointers passed to memmove (and similar functions in <string.h>) must
> be non-null even when nbytes==0" (https://gcc.gnu.org/gcc-4.9/porting_to.html).
>
> Even if it doesn't dereference it, it can break somehow in a subtle way. Leaving
> the kernel code assuming that gcc (or any other compiler) would always behave
> the same in a situation that shouldn't occur.
Show me a legal way in which one could legally dereference the pointer
when length is zero, and I'll entertain this patch.
^ permalink raw reply
* [PATCH] skbuff.h: fix kernel-doc warning for headers_end
From: Randy Dunlap @ 2014-10-27 2:14 UTC (permalink / raw)
To: netdev@vger.kernel.org, David Miller
From: Randy Dunlap <rdunlap@infradead.org>
Fix kernel-doc warning in <linux/skbuff.h> by making both headers_start
and headers_end private fields.
Warning(..//include/linux/skbuff.h:654): No description found for parameter 'headers_end[0]'
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
---
include/linux/skbuff.h | 4 ++++
1 file changed, 4 insertions(+)
Or I can document those two fields if you would rather not have them
marked as private...
--- lnx-318-rc2.orig/include/linux/skbuff.h
+++ lnx-318-rc2/include/linux/skbuff.h
@@ -557,7 +557,9 @@ struct sk_buff {
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
+ /* private: */
__u32 headers_start[0];
+ /* public: */
/* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
@@ -642,7 +644,9 @@ struct sk_buff {
__u16 network_header;
__u16 mac_header;
+ /* private: */
__u32 headers_end[0];
+ /* public: */
/* These elements must be at the end, see alloc_skb() for details. */
sk_buff_data_t tail;
^ permalink raw reply
* Re: [PATCH v2 09/15] net: dsa: Add support for switch EEPROM access
From: Andrew Lunn @ 2014-10-27 2:41 UTC (permalink / raw)
To: Guenter Roeck
Cc: netdev, David S. Miller, Florian Fainelli, Andrew Lunn,
linux-kernel
In-Reply-To: <1414342365-27191-10-git-send-email-linux@roeck-us.net>
On Sun, Oct 26, 2014 at 09:52:39AM -0700, Guenter Roeck wrote:
> On some chips it is possible to access the switch eeprom.
> Add infrastructure support for it.
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
> v2:
> - Add support for configuring switch EEPROM size through platform data
> or devicetree data
> - Do not compare new pointers against NULL
> - Check if get_eeprom pointer is set before calling it
>
> include/net/dsa.h | 10 ++++++++++
> net/dsa/dsa.c | 4 ++++
> net/dsa/slave.c | 41 +++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 55 insertions(+)
>
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 55e75e7..37856a2 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -38,6 +38,9 @@ struct dsa_chip_data {
> struct device *host_dev;
> int sw_addr;
>
> + /* set to size of eeprom if supported by the switch */
> + int eeprom_len;
> +
> /* Device tree node pointer for this specific switch chip
> * used during switch setup in case additional properties
> * and resources needs to be used
> @@ -258,6 +261,13 @@ struct dsa_switch_driver {
> int (*set_temp_limit)(struct dsa_switch *ds, int temp);
> int (*get_temp_alarm)(struct dsa_switch *ds, bool *alarm);
> #endif
> +
> + /* EEPROM access */
> + int (*get_eeprom_len)(struct dsa_switch *ds);
> + int (*get_eeprom)(struct dsa_switch *ds,
> + struct ethtool_eeprom *eeprom, u8 *data);
> + int (*set_eeprom)(struct dsa_switch *ds,
> + struct ethtool_eeprom *eeprom, u8 *data);
> };
>
> void register_switch_driver(struct dsa_switch_driver *type);
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 5edbbca..1c1b925 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -575,6 +575,7 @@ static int dsa_of_probe(struct platform_device *pdev)
> const char *port_name;
> int chip_index, port_index;
> const unsigned int *sw_addr, *port_reg;
> + u32 eeprom_length;
> int ret;
>
> mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
> @@ -603,6 +604,9 @@ static int dsa_of_probe(struct platform_device *pdev)
> if (pd->nr_chips > DSA_MAX_SWITCHES)
> pd->nr_chips = DSA_MAX_SWITCHES;
>
> + if (!of_property_read_u32(np, "eeprom-length", &eeprom_length))
> + pd->eeprom_length = eeprom_length;
> +
Hi Guenter
I would of expected a property to indicate the eeprom is present, not
a length value. The switch determines the length, or at least the
minimum length. So the switch driver can return the length, if the
eeprom is indicated to be present.
Also, all device tree properties need to be documented. I've not
looked through all the patches, so maybe it is in a separate patch?
> +static int dsa_slave_get_eeprom_len(struct net_device *dev)
> +{
> + struct dsa_slave_priv *p = netdev_priv(dev);
> + struct dsa_switch *ds = p->parent;
> +
> + if (ds->pd->eeprom_len)
> + return ds->pd->eeprom_len;
> +
> + if (ds->drv->get_eeprom_len)
> + return ds->drv->get_eeprom_len(ds);
> +
> + return 0;
> +}
This also does not look right. The default is that there is no
property in DT, meaning the EEPROM is not present. So
ds->pd->eeprom_len is zero. So the first if is not taken. It then
calls into the driver which is return a length, independent of if it
is populated or not.
If we are going with your suggested binding, no value in DT, or a
value of 0 mean no EEPROM. If there is a value in DT
dsa_slave_get_eeprom_len() should return that value. Asking the driver
is redundant, you can remove the get_eeprom_len() call.
However, if DT just indicates the eeprom is populated or not, you
should call the driver get_eeprom_len() is the eeprom property is
present.
> +static int dsa_slave_get_eeprom(struct net_device *dev,
> + struct ethtool_eeprom *eeprom, u8 *data)
> +{
> + struct dsa_slave_priv *p = netdev_priv(dev);
> + struct dsa_switch *ds = p->parent;
> +
> + if (ds->drv->get_eeprom)
> + return ds->drv->get_eeprom(ds, eeprom, data);
This should be conditional on the length/present.
> +
> + return -EOPNOTSUPP;
> +}
> +
> +static int dsa_slave_set_eeprom(struct net_device *dev,
> + struct ethtool_eeprom *eeprom, u8 *data)
> +{
> + struct dsa_slave_priv *p = netdev_priv(dev);
> + struct dsa_switch *ds = p->parent;
> +
> + if (ds->drv->set_eeprom)
> + return ds->drv->set_eeprom(ds, eeprom, data);
Same again.
Andrew
> + return -EOPNOTSUPP;
> +}
> +
> static void dsa_slave_get_strings(struct net_device *dev,
> uint32_t stringset, uint8_t *data)
> {
> @@ -387,6 +425,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
> .get_drvinfo = dsa_slave_get_drvinfo,
> .nway_reset = dsa_slave_nway_reset,
> .get_link = dsa_slave_get_link,
> + .get_eeprom_len = dsa_slave_get_eeprom_len,
> + .get_eeprom = dsa_slave_get_eeprom,
> + .set_eeprom = dsa_slave_set_eeprom,
> .get_strings = dsa_slave_get_strings,
> .get_ethtool_stats = dsa_slave_get_ethtool_stats,
> .get_sset_count = dsa_slave_get_sset_count,
> --
> 1.9.1
>
^ permalink raw reply
* Re: [PATHC] net: napi_reuse_skb() should check pfmemalloc
From: David Miller @ 2014-10-27 2:47 UTC (permalink / raw)
To: eric.dumazet
Cc: klamm, jeffrey.t.kirsher, jesse.brandeburg, bruce.w.allan,
carolyn.wyborny, donald.c.skidmore, gregory.v.rose,
peter.p.waskiewicz.jr, alexander.h.duyck, john.ronciak,
tushar.n.dave, sassmann, gregkh, e1000-devel, netdev,
linux-kernel
In-Reply-To: <1414071030.2094.46.camel@edumazet-glaptop2.roam.corp.google.com>
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 23 Oct 2014 06:30:30 -0700
> From: Eric Dumazet <edumazet@google.com>
>
> Do not reuse skb if it was pfmemalloc tainted, otherwise
> future frame might be dropped anyway.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
Applied, thanks Eric.
^ permalink raw reply
* Re: IPv6 UFO for VMs
From: Ben Hutchings @ 2014-10-27 3:21 UTC (permalink / raw)
To: Hannes Frederic Sowa; +Cc: netdev, virtualization
In-Reply-To: <1413970541.2337.16.camel@localhost>
[-- Attachment #1: Type: text/plain, Size: 2173 bytes --]
On Wed, 2014-10-22 at 11:35 +0200, Hannes Frederic Sowa wrote:
> On Mi, 2014-10-22 at 00:44 +0100, Ben Hutchings wrote:
> > There are several ways that VMs can take advantage of UFO and get the
> > host to do fragmentation for them:
> >
> > drivers/net/macvtap.c: gso_type = SKB_GSO_UDP;
> > drivers/net/tun.c: skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
> > drivers/net/virtio_net.c: skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
> >
> > Our implementation of UFO for IPv6 does:
> >
> > fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
> > fptr->nexthdr = nexthdr;
> > fptr->reserved = 0;
> > fptr->identification = skb_shinfo(skb)->ip6_frag_id;
> >
> > which assumes ip6_frag_id has been set. That's only true if the local
> > stack constructed the skb; otherwise it appears we get zero.
> >
> > This seems to be a regression as a result of:
> >
> > commit 916e4cf46d0204806c062c8c6c4d1f633852c5b6
> > Author: Hannes Frederic Sowa <hannes@stressinduktion.org>
> > Date: Fri Feb 21 02:55:35 2014 +0100
> >
> > ipv6: reuse ip6_frag_id from ip6_ufo_append_data
> >
> > However, that change seems reasonable - we *shouldn't* be choosing IDs
> > for any other stack. Any paravirt net driver that can use IPv6 UFO
> > needs to have some way of passing a fragmentation ID to put in
> > skb_shared_info::ip6_frag_id.
>
> Do we really gain a lot of performance by enabling UFO on those devices
> or would it make sense to just drop support? It only helps fragmenting
> large UDP packets, so I don't think it is worth it.
It's not been important enough for anyone to bother implementing it in
hardware/firmware aside from Neterion.
I'll shortly post patches to disable it.
Ben.
> Otherwise I agree with Ben, we need to pass a fragmentation id from the
> host over to the system segmenting the gso frame. Fragmentation ids must
> be generated by the end system.
>
> Hmm...
--
Ben Hutchings
Theory and practice are closer in theory than in practice.
- John Levine, moderator of comp.compilers
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 811 bytes --]
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox