Netdev List
 help / color / mirror / Atom feed
* [PATCH 06/14] net: dsa: Add support for hardware monitoring
From: Guenter Roeck @ 2014-10-23  4:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
	Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>

Some Marvell switches provide chip temperature data.
Add support for reporting it to the dsa infrastructure.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/net/dsa.h |   6 +++
 net/dsa/dsa.c     | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b765592..d8b3057 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -242,6 +242,12 @@ struct dsa_switch_driver {
 			   struct ethtool_eee *e);
 	int	(*get_eee)(struct dsa_switch *ds, int port,
 			   struct ethtool_eee *e);
+
+	/* Hardware monitoring */
+	int	(*get_temp)(struct dsa_switch *ds, int *temp);
+	int	(*get_temp_limit)(struct dsa_switch *ds, int *temp);
+	int	(*set_temp_limit)(struct dsa_switch *ds, int temp);
+	int	(*get_temp_alarm)(struct dsa_switch *ds, bool *alarm);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 22f34cf..6567c8e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -9,6 +9,8 @@
  * (at your option) any later version.
  */
 
+#include <linux/device.h>
+#include <linux/hwmon.h>
 #include <linux/list.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -17,6 +19,7 @@
 #include <linux/of.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
+#include <linux/sysfs.h>
 #include "dsa_priv.h"
 
 char dsa_driver_version[] = "0.1";
@@ -71,6 +74,104 @@ dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name)
 	return ret;
 }
 
+/* hwmon support ************************************************************/
+
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dsa_switch *ds = dev_get_drvdata(dev);
+	int temp, ret;
+
+	ret = ds->drv->get_temp(ds, &temp);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", temp * 1000);
+}
+static DEVICE_ATTR_RO(temp1_input);
+
+static ssize_t temp1_max_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct dsa_switch *ds = dev_get_drvdata(dev);
+	int temp, ret;
+
+	ret = ds->drv->get_temp_limit(ds, &temp);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", temp * 1000);
+}
+
+static ssize_t temp1_max_store(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
+{
+	struct dsa_switch *ds = dev_get_drvdata(dev);
+	int temp, ret;
+
+	ret = kstrtoint(buf, 0, &temp);
+	if (ret < 0)
+		return ret;
+
+	ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR(temp1_max, S_IRUGO, temp1_max_show, temp1_max_store);
+
+static ssize_t temp1_max_alarm_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct dsa_switch *ds = dev_get_drvdata(dev);
+	bool alarm;
+	int ret;
+
+	ret = ds->drv->get_temp_alarm(ds, &alarm);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d\n", alarm);
+}
+static DEVICE_ATTR_RO(temp1_max_alarm);
+
+static struct attribute *dsa_hwmon_attrs[] = {
+	&dev_attr_temp1_input.attr,	/* 0 */
+	&dev_attr_temp1_max.attr,	/* 1 */
+	&dev_attr_temp1_max_alarm.attr,	/* 2 */
+	NULL
+};
+
+static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
+				       struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct dsa_switch *ds = dev_get_drvdata(dev);
+	struct dsa_switch_driver *drv = ds->drv;
+	umode_t mode = attr->mode;
+
+	if (index == 1) {
+		if (!drv->get_temp_limit)
+			mode = 0;
+		else if (drv->set_temp_limit)
+			mode |= S_IWUSR;
+	} else if (index == 2 && !drv->get_temp_alarm) {
+		mode = 0;
+	}
+	return mode;
+}
+
+static const struct attribute_group dsa_hwmon_group = {
+	.attrs = dsa_hwmon_attrs,
+	.is_visible = dsa_hwmon_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(dsa_hwmon);
+
+#endif /* CONFIG_HWMON */
 
 /* basic switch operations **************************************************/
 static struct dsa_switch *
@@ -225,6 +326,16 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 		ds->ports[i] = slave_dev;
 	}
 
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
+	/* If the switch provides a temperature sensor,
+	 * register with hardware monitoring subsystem.
+	 * Treat registration error as non-fatal and ignore it.
+	 */
+	if (drv->get_temp)
+		devm_hwmon_device_register_with_groups(parent, "dsa", ds,
+						       dsa_hwmon_groups);
+#endif
+
 	return ds;
 
 out_free:
-- 
1.9.1

^ permalink raw reply related

* [PATCH 07/14] net: dsa/mv88e6352: Report chip temperature
From: Guenter Roeck @ 2014-10-23  4:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
	Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>

MV88E6352 supports reading the chip temperature from two PHY registers,
6:26 and 6:27. Report it using the more accurate register 6:27.
Also report temperature limit and alarm.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/net/dsa/mv88e6352.c | 96 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index f17364f..aff6695 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -63,6 +63,41 @@ static int __mv88e6352_phy_write(struct dsa_switch *ds, int addr, int regnum,
 	return mv88e6352_phy_wait(ds);
 }
 
+static int mv88e6352_phy_page_read(struct dsa_switch *ds,
+				   int port, int page, int reg)
+{
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+	int ret;
+
+	mutex_lock(&ps->phy_mutex);
+	ret = __mv88e6352_phy_write(ds, port, 0x16, page);
+	if (ret < 0)
+		goto error;
+	ret = __mv88e6352_phy_read(ds, port, reg);
+error:
+	__mv88e6352_phy_write(ds, port, 0x16, 0x0);
+	mutex_unlock(&ps->phy_mutex);
+	return ret;
+}
+
+static int mv88e6352_phy_page_write(struct dsa_switch *ds,
+				    int port, int page, int reg, int val)
+{
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+	int ret;
+
+	mutex_lock(&ps->phy_mutex);
+	ret = __mv88e6352_phy_write(ds, port, 0x16, page);
+	if (ret < 0)
+		goto error;
+
+	ret = __mv88e6352_phy_write(ds, port, reg, val);
+error:
+	__mv88e6352_phy_write(ds, port, 0x16, 0x0);
+	mutex_unlock(&ps->phy_mutex);
+	return ret;
+}
+
 static char *mv88e6352_probe(struct device *host_dev, int sw_addr)
 {
 	struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev);
@@ -325,6 +360,63 @@ static int mv88e6352_setup_port(struct dsa_switch *ds, int p)
 	return 0;
 }
 
+static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp)
+{
+	int ret;
+
+	*temp = 0;
+
+	ret = mv88e6352_phy_page_read(ds, 0, 6, 27);
+	if (ret < 0)
+		return ret;
+
+	*temp = (ret & 0xff) - 25;
+
+	return 0;
+}
+
+static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp)
+{
+	int ret;
+
+	*temp = 0;
+
+	ret = mv88e6352_phy_page_read(ds, 0, 6, 26);
+	if (ret < 0)
+		return ret;
+
+	*temp = (((ret >> 8) & 0x1f) * 5) - 25;
+
+	return 0;
+}
+
+static int mv88e6352_set_temp_limit(struct dsa_switch *ds, int temp)
+{
+	int ret;
+
+	ret = mv88e6352_phy_page_read(ds, 0, 6, 26);
+	if (ret < 0)
+		return ret;
+	temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
+	return mv88e6352_phy_page_write(ds, 0, 6, 26,
+					(ret & 0xe0ff) | (temp << 8));
+}
+
+static int mv88e6352_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
+{
+	int ret;
+
+	*alarm = false;
+
+	ret = mv88e6352_phy_page_read(ds, 0, 6, 26);
+	if (ret < 0)
+		return ret;
+
+	*alarm = !!(ret & 0x40);
+
+	return 0;
+}
+
 static int mv88e6352_setup(struct dsa_switch *ds)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -461,6 +553,10 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
 	.get_strings		= mv88e6352_get_strings,
 	.get_ethtool_stats	= mv88e6352_get_ethtool_stats,
 	.get_sset_count		= mv88e6352_get_sset_count,
+	.get_temp		= mv88e6352_get_temp,
+	.get_temp_limit		= mv88e6352_get_temp_limit,
+	.set_temp_limit		= mv88e6352_set_temp_limit,
+	.get_temp_alarm		= mv88e6352_get_temp_alarm,
 };
 
 MODULE_ALIAS("platform:mv88e6352");
-- 
1.9.1

^ permalink raw reply related

* [PATCH 08/14] net: dsa/mv88e6123_61_65: Report chip temperature
From: Guenter Roeck @ 2014-10-23  4:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
	Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>

MV88E6123 and compatible chips support reading the chip temperature
from PHY register 6:26.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/net/dsa/mv88e6123_61_65.c | 63 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c
index a332c53..17dc60e 100644
--- a/drivers/net/dsa/mv88e6123_61_65.c
+++ b/drivers/net/dsa/mv88e6123_61_65.c
@@ -291,6 +291,51 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
 	return 0;
 }
 
+static int  mv88e6123_61_65_get_temp(struct dsa_switch *ds, int *temp)
+{
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+	int ret;
+	int val;
+
+	*temp = 0;
+
+	mutex_lock(&ps->phy_mutex);
+
+	ret = mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x6);
+	if (ret < 0)
+		goto error;
+
+	/* Enable temperature sensor */
+	ret = mv88e6xxx_phy_read(ds, 0x0, 0x1a);
+	if (ret < 0)
+		goto error;
+
+	ret = mv88e6xxx_phy_write(ds, 0x0, 0x1a, ret | (1 << 5));
+	if (ret < 0)
+		goto error;
+
+	/* Wait for temperature to stabilize */
+	usleep_range(10000, 12000);
+
+	val = mv88e6xxx_phy_read(ds, 0x0, 0x1a);
+	if (val < 0) {
+		ret = val;
+		goto error;
+	}
+
+	/* Disable temperature sensor */
+	ret = mv88e6xxx_phy_write(ds, 0x0, 0x1a, ret & ~(1 << 5));
+	if (ret < 0)
+		goto error;
+
+	*temp = ((val & 0x1f) - 5) * 5;
+
+error:
+	mv88e6xxx_phy_write(ds, 0x0, 0x16, 0x0);
+	mutex_unlock(&ps->phy_mutex);
+	return ret;
+}
+
 static int mv88e6123_61_65_setup(struct dsa_switch *ds)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -299,6 +344,7 @@ static int mv88e6123_61_65_setup(struct dsa_switch *ds)
 
 	mutex_init(&ps->smi_mutex);
 	mutex_init(&ps->stats_mutex);
+	mutex_init(&ps->phy_mutex);
 
 	ret = mv88e6123_61_65_switch_reset(ds);
 	if (ret < 0)
@@ -329,16 +375,28 @@ static int mv88e6123_61_65_port_to_phy_addr(int port)
 static int
 mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum)
 {
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int addr = mv88e6123_61_65_port_to_phy_addr(port);
-	return mv88e6xxx_phy_read(ds, addr, regnum);
+	int ret;
+
+	mutex_lock(&ps->phy_mutex);
+	ret = mv88e6xxx_phy_read(ds, addr, regnum);
+	mutex_unlock(&ps->phy_mutex);
+	return ret;
 }
 
 static int
 mv88e6123_61_65_phy_write(struct dsa_switch *ds,
 			      int port, int regnum, u16 val)
 {
+	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 	int addr = mv88e6123_61_65_port_to_phy_addr(port);
-	return mv88e6xxx_phy_write(ds, addr, regnum, val);
+	int ret;
+
+	mutex_lock(&ps->phy_mutex);
+	ret = mv88e6xxx_phy_write(ds, addr, regnum, val);
+	mutex_unlock(&ps->phy_mutex);
+	return ret;
 }
 
 static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
@@ -406,6 +464,7 @@ struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
 	.get_strings		= mv88e6123_61_65_get_strings,
 	.get_ethtool_stats	= mv88e6123_61_65_get_ethtool_stats,
 	.get_sset_count		= mv88e6123_61_65_get_sset_count,
+	.get_temp		= mv88e6123_61_65_get_temp,
 };
 
 MODULE_ALIAS("platform:mv88e6123");
-- 
1.9.1

^ permalink raw reply related

* [PATCH 09/14] net: dsa: Add support for switch EEPROM access
From: Guenter Roeck @ 2014-10-23  4:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
	Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>

On some chips it is possible to access the switch eeprom.
Add infrastructure support for it.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/net/dsa.h |  7 +++++++
 net/dsa/slave.c   | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index d8b3057..73146b7 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -248,6 +248,13 @@ struct dsa_switch_driver {
 	int	(*get_temp_limit)(struct dsa_switch *ds, int *temp);
 	int	(*set_temp_limit)(struct dsa_switch *ds, int temp);
 	int	(*get_temp_alarm)(struct dsa_switch *ds, bool *alarm);
+
+	/* EEPROM access */
+	int	(*get_eeprom_len)(struct dsa_switch *ds);
+	int	(*get_eeprom)(struct dsa_switch *ds,
+			      struct ethtool_eeprom *eeprom, u8 *data);
+	int	(*set_eeprom)(struct dsa_switch *ds,
+			      struct ethtool_eeprom *eeprom, u8 *data);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6d18174..a54ee43 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -271,6 +271,41 @@ static u32 dsa_slave_get_link(struct net_device *dev)
 	return -EOPNOTSUPP;
 }
 
+static int dsa_slave_get_eeprom_len(struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (ds->drv->get_eeprom_len != NULL)
+		return ds->drv->get_eeprom_len(ds);
+
+	return 0;
+}
+
+static int dsa_slave_get_eeprom(struct net_device *dev,
+				struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (ds->drv->get_eeprom != NULL)
+		return ds->drv->get_eeprom(ds, eeprom, data);
+
+	return -EOPNOTSUPP;
+}
+
+static int dsa_slave_set_eeprom(struct net_device *dev,
+				struct ethtool_eeprom *eeprom, u8 *data)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (ds->drv->set_eeprom != NULL)
+		return ds->drv->set_eeprom(ds, eeprom, data);
+
+	return -EOPNOTSUPP;
+}
+
 static void dsa_slave_get_strings(struct net_device *dev,
 				  uint32_t stringset, uint8_t *data)
 {
@@ -387,6 +422,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_drvinfo		= dsa_slave_get_drvinfo,
 	.nway_reset		= dsa_slave_nway_reset,
 	.get_link		= dsa_slave_get_link,
+	.get_eeprom_len		= dsa_slave_get_eeprom_len,
+	.get_eeprom		= dsa_slave_get_eeprom,
+	.set_eeprom		= dsa_slave_set_eeprom,
 	.get_strings		= dsa_slave_get_strings,
 	.get_ethtool_stats	= dsa_slave_get_ethtool_stats,
 	.get_sset_count		= dsa_slave_get_sset_count,
-- 
1.9.1

^ permalink raw reply related

* [PATCH 13/14] net: dsa/mv88e6352: Add support for reading switch registers
From: Guenter Roeck @ 2014-10-23  4:03 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Florian Fainelli, Andrew Lunn, linux-kernel,
	Guenter Roeck
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>

Report switch register values to ethtool.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/net/dsa/mv88e6352.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 9dddcba..2f31e28 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -779,6 +779,8 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
 	.get_eeprom_len		= mv88e6352_get_eeprom_len,
 	.get_eeprom		= mv88e6352_get_eeprom,
 	.set_eeprom		= mv88e6352_set_eeprom,
+	.get_regs_len		= mv88e6xxx_get_regs_len,
+	.get_regs		= mv88e6xxx_get_regs,
 };
 
 MODULE_ALIAS("platform:mv88e6352");
-- 
1.9.1

^ permalink raw reply related

* Re: [net] i40e: _MASK vs _SHIFT typo in i40e_handle_mdd_event()
From: Joe Perches @ 2014-10-23  4:05 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: davem, Dan Carpenter, netdev, nhorman, sassmann, jogreene
In-Reply-To: <1414034544.15751.2.camel@perches.com>

On Wed, 2014-10-22 at 20:22 -0700, Joe Perches wrote:
> On Wed, 2014-10-22 at 20:06 -0700, Jeff Kirsher wrote:
> > We accidentally mask by the _SHIFT variable.  It means that "event" is
> > always zero.
> []
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> []
> > @@ -6151,7 +6151,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
> >  				I40E_GL_MDET_TX_PF_NUM_SHIFT;
> >  		u8 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
> >  				I40E_GL_MDET_TX_VF_NUM_SHIFT;
> > -		u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT) >>
> > +		u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
> >  				I40E_GL_MDET_TX_EVENT_SHIFT;
> >  		u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
> >  				I40E_GL_MDET_TX_QUEUE_SHIFT;
> > @@ -6165,7 +6165,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
> >  	if (reg & I40E_GL_MDET_RX_VALID_MASK) {
> >  		u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
> >  				I40E_GL_MDET_RX_FUNCTION_SHIFT;
> > -		u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT) >>
> > +		u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
> >  				I40E_GL_MDET_RX_EVENT_SHIFT;
> >  		u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
> >  				I40E_GL_MDET_RX_QUEUE_SHIFT;
> 
> It might be useful to have a macro for that.
> Something like:
> 
> #define GET_REG_VAL(reg, type)			\
> 	((reg & type##_MASK) >> type##_SHIFT)
> 
> so these could become:
> 
> 	u8 vf_num = GET_REG_VAL(reg, I40E_GL_MDET_TX_VF_NUM);
> 	u8 event = GET_REG_VAL(reg, I40E_GL_MDET_TX_EVENT);
> 
> etc...

btw:  Here's a defect:

Using the macro would also prevent this sort of precedence defect

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
[]
@@ -809,8 +809,8 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
         * The grst delay value is in 100ms units, and we'll wait a
         * couple counts longer to be sure we don't just miss the end.
         */
-       grst_del = rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK
-                       >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;

bit shift has higher precedence than bitwise &

^ permalink raw reply

* Re: irq disable in __netdev_alloc_frag() ?
From: Alexei Starovoitov @ 2014-10-23  4:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Alexander Duyck, Eric Dumazet, Network Development
In-Reply-To: <1414036570.2094.19.camel@edumazet-glaptop2.roam.corp.google.com>

On Wed, Oct 22, 2014 at 8:56 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2014-10-22 at 20:51 -0700, Eric Dumazet wrote:
>> On Wed, 2014-10-22 at 20:19 -0700, Alexander Duyck wrote:
>>
>> > Couldn't __netdev_alloc_frag() be forked into two functions, one that is
>> > only called from inside the NAPI context and one that is called for all
>> > other contexts?  It would mean having to double the number of pages
>> > being held per CPU, but I would think something like that would be doable.
>>
>> Possibly, but this looks like code bloat for me.
>>
>> On my hosts, this hard irq masking is pure noise.
>>
>> What CPU are you using Alexander ?
>
> Sorry, the question was for Alexei ;)

my numbers were from just released Xeon E5-1630 v3
which is haswell-ep
I think Alexander's idea is worth pursuing.
Code-wise the bloat is minimal: one extra netdev_alloc_cache
and few 'if' which one to choose.
Most of the time only one of them is used, since
systems with napi and not-napi nics are very rare.
I think it should help virtualization case as well,
since virtio_net is napi already.

^ permalink raw reply

* Re: [PATCH 06/14] net: dsa: Add support for hardware monitoring
From: Florian Fainelli @ 2014-10-23  4:37 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: netdev, David S. Miller, Andrew Lunn,
	linux-kernel@vger.kernel.org
In-Reply-To: <1414037002-25528-7-git-send-email-linux@roeck-us.net>

2014-10-22 21:03 GMT-07:00 Guenter Roeck <linux@roeck-us.net>:
> Some Marvell switches provide chip temperature data.
> Add support for reporting it to the dsa infrastructure.
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
[snip]

> +/* hwmon support ************************************************************/
> +
> +#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))

IS_ENABLED(CONFIG_HWMON)?

> +
> +static ssize_t temp1_input_show(struct device *dev,
> +                               struct device_attribute *attr, char *buf)
> +{
> +       struct dsa_switch *ds = dev_get_drvdata(dev);
> +       int temp, ret;
> +
> +       ret = ds->drv->get_temp(ds, &temp);
> +       if (ret < 0)
> +               return ret;
> +
> +       return sprintf(buf, "%d\n", temp * 1000);
> +}
> +static DEVICE_ATTR_RO(temp1_input);

You probably want the number of temperature sensors to come from the
switch driver, and support arbitrary number of temperature sensors?
--
Florian

^ permalink raw reply

* Re: [PATCH 11/14] net: dsa: Add support for reading switch registers with ethtool
From: Florian Fainelli @ 2014-10-23  4:40 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: netdev, David S. Miller, Andrew Lunn,
	linux-kernel@vger.kernel.org
In-Reply-To: <1414037002-25528-12-git-send-email-linux@roeck-us.net>

2014-10-22 21:03 GMT-07:00 Guenter Roeck <linux@roeck-us.net>:
> Add support for reading switch registers with 'ethtool -d'.
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---

[snip]

>
> +static int dsa_slave_get_regs_len(struct net_device *dev)
> +{
> +       struct dsa_slave_priv *p = netdev_priv(dev);
> +       struct dsa_switch *ds = p->parent;
> +
> +       if (ds->drv->get_regs_len != NULL)
> +               return ds->drv->get_regs_len(ds, p->port);

Most of the checks in this file are just:

if (ds->drv->callback)
     return ds->drv->callback(...)

> +
> +       return -EOPNOTSUPP;
> +}
> +
> +static void
> +dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
> +{
> +       struct dsa_slave_priv *p = netdev_priv(dev);
> +       struct dsa_switch *ds = p->parent;
> +
> +       ds->drv->get_regs(ds, p->port, regs, _p);

We need to check that the driver does implement this callback here as well.
-- 
Florian

^ permalink raw reply

* Re: [RFC] tcp md5 use of alloc_percpu
From: David Ahern @ 2014-10-23  4:40 UTC (permalink / raw)
  To: Crestez Dan Leonard, netdev
In-Reply-To: <5447FDB2.2010906@gmail.com>

On 10/22/14, 12:55 PM, Crestez Dan Leonard wrote:
> Hello,
>
> It seems that the TCP MD5 feature allocates a percpu struct tcp_md5sig_pool and uses part of that memory for a scratch buffer to do crypto on. Here is the relevant code:

This is a forward port of a local change to address the problem (local 
kernel version is 3.4 so perhaps my quick bump to top of tree is off but 
it shows the general idea). Been on my to-do list to figure out why this 
is needed, but it seems related to your problem:

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1bec4e76d88c..833a676bd4b0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2941,7 +2941,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
     local_bh_disable();
     p = ACCESS_ONCE(tcp_md5sig_pool);
     if (p)
-       return raw_cpu_ptr(p);
+       return __va(per_cpu_ptr_to_phys(raw_cpu_ptr(p)));

     local_bh_enable();
     return NULL;

David

^ permalink raw reply related

* Re: [PATCH 00/14] net: dsa: Fixes and enhancements
From: Florian Fainelli @ 2014-10-23  4:45 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: netdev, David S. Miller, Andrew Lunn,
	linux-kernel@vger.kernel.org
In-Reply-To: <1414037002-25528-1-git-send-email-linux@roeck-us.net>

2014-10-22 21:03 GMT-07:00 Guenter Roeck <linux@roeck-us.net>:
> Patch 01/14 addresses an annoying and unhelpful log message.
>
> Patches 02/14 and 03/14 are minor enhancements, adding support for
> known switch revisions.
>
> Patches 04/14 and 05/14 add support for MV88E6352 and MV88E6176.
>
> Patch 06/14 adds support for hardware monitoring, specifically for
> reporting the chip temperature, to the dsa subsystem.
>
> Patches 07/14 and 08/14 implement hardware monitoring for MV88E6352,
> MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
>
> Patch 09/14 adds support for EEPROM access to the DSA subsystem.
>
> Patch 10/14 implements EEPROM access for MV88E6352 and MV88E6176.
>
> Patch 11/14 adds support for reading switch registers to the DSA
> subsystem.
>
> Patches 12/14 amd 13/14 implement support for reading switch registers
> to the drivers for MV88E6352, MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
>
> Patch 14/14 adds support for reading additional RMON registers to the drivers
> for  MV88E6352, MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
>
> The series resides and was tested on top of v3.18-rc1 in a system
> with MV88E6352. Testing in systems with 88E6131, 88E6060 and MV88E6165
> was done earlier (I don't have access to those systems right now).
> The series applies cleanly to net-next as of today (10/22).

Besides the two nitpicking comments, this looks really good to me, thanks!

>
> ----------------------------------------------------------------
> Guenter Roeck (14):
>       net: dsa: Don't set skb->protocol on outgoing tagged packets
>       net: dsa: Report known silicon revisions for Marvell 88E6060
>       net: dsa: Report known silicon revisions for Marvell 88E6131
>       net: dsa: Add support for Marvell 88E6352
>       net: dsa/mv88e6352: Add support for MV88E6176
>       net: dsa: Add support for hardware monitoring
>       net: dsa/mv88e6352: Report chip temperature
>       net: dsa/mv88e6123_61_65: Report chip temperature
>       net: dsa: Add support for switch EEPROM access
>       net: dsa/mv88e6352: Implement EEPROM access functions
>       net: dsa: Add support for reading switch registers with ethtool
>       net: dsa/mv88e6123_61_65: Add support for reading switch registers
>       net: dsa/mv88e6352: Add support for reading switch registers
>       net: dsa: Provide additional RMON statistics
>
>  MAINTAINERS                       |   5 +
>  drivers/net/dsa/Kconfig           |   9 +
>  drivers/net/dsa/Makefile          |   3 +
>  drivers/net/dsa/mv88e6060.c       |   5 +-
>  drivers/net/dsa/mv88e6123_61_65.c |  68 +++-
>  drivers/net/dsa/mv88e6131.c       |  12 +-
>  drivers/net/dsa/mv88e6352.c       | 789 ++++++++++++++++++++++++++++++++++++++
>  drivers/net/dsa/mv88e6xxx.c       |  53 ++-
>  drivers/net/dsa/mv88e6xxx.h       |  15 +
>  include/net/dsa.h                 |  20 +
>  net/dsa/dsa.c                     | 111 ++++++
>  net/dsa/slave.c                   |  60 +++
>  net/dsa/tag_dsa.c                 |   2 -
>  net/dsa/tag_edsa.c                |   2 -
>  net/dsa/tag_trailer.c             |   2 -
>  15 files changed, 1138 insertions(+), 18 deletions(-)
>  create mode 100644 drivers/net/dsa/mv88e6352.c



-- 
Florian

^ permalink raw reply

* Re: [PATCH 06/14] net: dsa: Add support for hardware monitoring
From: Guenter Roeck @ 2014-10-23  5:06 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev, David S. Miller, Andrew Lunn,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAGVrzcYjPJWai9bxkh+WRrEEAzBKBd-LQm7zDeeouxngr_v9SQ@mail.gmail.com>

On 10/22/2014 09:37 PM, Florian Fainelli wrote:
> 2014-10-22 21:03 GMT-07:00 Guenter Roeck <linux@roeck-us.net>:
>> Some Marvell switches provide chip temperature data.
>> Add support for reporting it to the dsa infrastructure.
>>
>> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
>> ---
> [snip]
>
>> +/* hwmon support ************************************************************/
>> +
>> +#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
>
> IS_ENABLED(CONFIG_HWMON)?
>

Hi Florian,

unfortunately, that won't work; I had it initially and got a nice error message
from Fengguang's build test bot.

Problem is that hwmon can be built as module and the dsa subsystem can be built
into the kernel. In that case, hwmon functionality in the driver must be disabled.
The above condition covers that case. The nouveau graphics driver has the same
problem and uses the same conditional to solve it.

An alternative would be to select HWMON in the NET_DSA Kconfig entry; the
Broadcom Tigon3 driver does that. I just don't know if that is a good idea.
I am open to suggestions.

>> +
>> +static ssize_t temp1_input_show(struct device *dev,
>> +                               struct device_attribute *attr, char *buf)
>> +{
>> +       struct dsa_switch *ds = dev_get_drvdata(dev);
>> +       int temp, ret;
>> +
>> +       ret = ds->drv->get_temp(ds, &temp);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       return sprintf(buf, "%d\n", temp * 1000);
>> +}
>> +static DEVICE_ATTR_RO(temp1_input);
>
> You probably want the number of temperature sensors to come from the
> switch driver, and support arbitrary number of temperature sensors?

In that case we would need the number of sensors, pass a sensor index,
and create a dynamic number of attributes. The code would get much more
complex without real benefit unless there is such a chip - and if there is,
we can still change the code at that time. I would prefer to keep it simple
for now.

Thanks,
Guenter

^ permalink raw reply

* Re: irq disable in __netdev_alloc_frag() ?
From: Eric Dumazet @ 2014-10-23  5:14 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: Alexander Duyck, Eric Dumazet, Network Development
In-Reply-To: <CAMEtUuytvhHJEGCmwkgH9EesCJdDhOn=5AujFymWPxg7g+E3og@mail.gmail.com>

On Wed, 2014-10-22 at 21:29 -0700, Alexei Starovoitov wrote:

> my numbers were from just released Xeon E5-1630 v3
> which is haswell-ep
> I think Alexander's idea is worth pursuing.
> Code-wise the bloat is minimal: one extra netdev_alloc_cache
> and few 'if' which one to choose.


> Most of the time only one of them is used, since
> systems with napi and not-napi nics are very rare.


> I think it should help virtualization case as well,
> since virtio_net is napi already.

Really, I doubt this will make any difference in real workloads
where bottleneck is memory bandwidth, not the time taken by
pushf/cli/popf.

What difference do you have on a real workload like : netperf TCP_RR or
UDP_RR ?

^ permalink raw reply

* Re: [PATCH 11/14] net: dsa: Add support for reading switch registers with ethtool
From: Guenter Roeck @ 2014-10-23  5:21 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev, David S. Miller, Andrew Lunn,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAGVrzcbfkSK58iCPouMc4g8MBrAPBemi8QyP6_33xtYiRWFCHg@mail.gmail.com>

On 10/22/2014 09:40 PM, Florian Fainelli wrote:
> 2014-10-22 21:03 GMT-07:00 Guenter Roeck <linux@roeck-us.net>:
>> Add support for reading switch registers with 'ethtool -d'.
>>
>> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
>> ---
>
> [snip]
>
>>
>> +static int dsa_slave_get_regs_len(struct net_device *dev)
>> +{
>> +       struct dsa_slave_priv *p = netdev_priv(dev);
>> +       struct dsa_switch *ds = p->parent;
>> +
>> +       if (ds->drv->get_regs_len != NULL)
>> +               return ds->drv->get_regs_len(ds, p->port);
>
> Most of the checks in this file are just:
>
> if (ds->drv->callback)
>       return ds->drv->callback(...)
>
Hi Florian,

To be fair, that wasn't the case when I wrote the code ;-).
No problem, I'll do the same.

>> +
>> +       return -EOPNOTSUPP;
>> +}
>> +
>> +static void
>> +dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
>> +{
>> +       struct dsa_slave_priv *p = netdev_priv(dev);
>> +       struct dsa_switch *ds = p->parent;
>> +
>> +       ds->drv->get_regs(ds, p->port, regs, _p);
>
> We need to check that the driver does implement this callback here as well.
>

Obviously, and embarrassing ;-).

Thanks a lot for the review!
Guenter

^ permalink raw reply

* Re: [PATCH 00/14] net: dsa: Fixes and enhancements
From: Guenter Roeck @ 2014-10-23  5:22 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: netdev, David S. Miller, Andrew Lunn,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAGVrzcbohXf09pvrzayZEfcpjr8U-h-hyuYMnU0yTBJny0U27A@mail.gmail.com>

On 10/22/2014 09:45 PM, Florian Fainelli wrote:
> 2014-10-22 21:03 GMT-07:00 Guenter Roeck <linux@roeck-us.net>:
>> Patch 01/14 addresses an annoying and unhelpful log message.
>>
>> Patches 02/14 and 03/14 are minor enhancements, adding support for
>> known switch revisions.
>>
>> Patches 04/14 and 05/14 add support for MV88E6352 and MV88E6176.
>>
>> Patch 06/14 adds support for hardware monitoring, specifically for
>> reporting the chip temperature, to the dsa subsystem.
>>
>> Patches 07/14 and 08/14 implement hardware monitoring for MV88E6352,
>> MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
>>
>> Patch 09/14 adds support for EEPROM access to the DSA subsystem.
>>
>> Patch 10/14 implements EEPROM access for MV88E6352 and MV88E6176.
>>
>> Patch 11/14 adds support for reading switch registers to the DSA
>> subsystem.
>>
>> Patches 12/14 amd 13/14 implement support for reading switch registers
>> to the drivers for MV88E6352, MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
>>
>> Patch 14/14 adds support for reading additional RMON registers to the drivers
>> for  MV88E6352, MV88E6176, MV88E6123, MV88E6161, and MV88E6165.
>>
>> The series resides and was tested on top of v3.18-rc1 in a system
>> with MV88E6352. Testing in systems with 88E6131, 88E6060 and MV88E6165
>> was done earlier (I don't have access to those systems right now).
>> The series applies cleanly to net-next as of today (10/22).
>
> Besides the two nitpicking comments, this looks really good to me, thanks!
>
Thanks a lot for the quick review!

I'll wait a couple of days for additional feedback before I resubmit.

Guenter

^ permalink raw reply

* Re: Possible wireless issue introduced in next-20140930
From: Mike Galbraith @ 2014-10-23  5:23 UTC (permalink / raw)
  To: Murilo Opsfelder Araujo
  Cc: linux-kernel, netdev, linux-wireless, Larry Finger,
	John W. Linville
In-Reply-To: <54487346.4080604@gmail.com>

On Thu, 2014-10-23 at 01:17 -0200, Murilo Opsfelder Araujo wrote: 
> Hello, everyone.
> 
> With next-20140930 my laptop does not work, i.e. after I enter my login 
> and password in KDM, the entire system becomes unresponsive and I need 
> to reset it in order to reboot (it does not even show the KDE splash 
> screen).
> 
> It was working pretty fine with next-20140926.
> 
> I've also tested with next-20141022 and v3.18-rc1 and no luck.
> 
> git bisect pointed me to the commit below [1].  My wireless card is a 
> RTL8191SEvA [2].

Mine is RTL8191SEvB.

I was going to bisect RTL8191SE regression when I got a chance, but you
beat me to it.

> commit 38506ecefab911785d5e1aa5889f6eeb462e0954
> Author: Larry Finger <Larry.Finger@lwfinger.net>
> Date:   Mon Sep 22 09:39:19 2014 -0500
> 
>      rtlwifi: rtl_pci: Start modification for new drivers

Did you confirm that bisection result, ie revert it at HEAD of whichever
tree you bisected?

The revert (master) removed some warnings on the way to kaboom here, but
the drivers is still toxic.  My log follows in case it's the same thing.
I can't go hunting atm, maybe during the weekend if the problem hasn't
evaporate by then.

Master:
[   28.028817] cfg80211: Calling CRDA to update world regulatory domain
[   28.102230] cfg80211: World regulatory domain updated:
[   28.103799] cfg80211:  DFS Master region: unset
[   28.103838] cfg80211:   (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)
[   28.107012] cfg80211:   (2402000 KHz - 2472000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[   28.108663] cfg80211:   (2457000 KHz - 2482000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[   28.110251] cfg80211:   (2474000 KHz - 2494000 KHz @ 20000 KHz), (300 mBi, 2000 mBm), (N/A)
[   28.111837] cfg80211:   (5170000 KHz - 5250000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[   28.113422] cfg80211:   (5735000 KHz - 5835000 KHz @ 40000 KHz), (300 mBi, 2000 mBm), (N/A)
[   29.577727] rtl8192se 0000:08:00.0: enabling device (0000 -> 0003)
[   29.618681] rtl8192se: FW Power Save off (module option)
[   29.620355] rtl8192se: Driver for Realtek RTL8192SE/RTL8191SE
[   29.620355] Loading firmware rtlwifi/rtl8192sefw.bin
[   29.648444] ------------[ cut here ]------------
[   29.650012] WARNING: CPU: 1 PID: 59 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x80()
[   29.651602] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:1c.5/0000:08:00.0/ieee80211/phy0'
[   29.653278] Modules linked in: rtl8192se(+) rtl_pci rtlwifi mac80211 cfg80211 btusb bluetooth coretemp iTCO_wdt iTCO_vendor_support serio_raw microcode toshiba_acpi sparse_keymap snd_hda_codec_hdmi snd_hda_codec_conexant rfkill snd_hda_codec_generic pcspkr uvcvideo videobuf2_core v4l2_common videodev videobuf2_vmalloc videobuf2_memops joydev i2c_i801 wmi lpc_ich mfd_core acpi_cpufreq toshiba_haps snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep toshiba_bluetooth snd_pcm snd_seq snd_timer snd_seq_device snd battery ac soundcore sg autofs4 i915 drm_kms_helper drm i2c_algo_bit thermal video processor thermal_sys button scsi_dh_rdac scsi_dh_alua scsi_dh_emc scsi_dh_hp_sw scsi_dh netconsole atl1c
[   29.655223] CPU: 1 PID: 59 Comm: kworker/1:4 Not tainted 3.18.0-master #48
[   29.655225] Hardware name: TOSHIBA ��������������������������������/��������������������������������, BIOS V1.70    09/29/2009
[   29.655231] Workqueue: events request_firmware_work_func
[   29.655235]  0000000000000009 ffff8800379bba98 ffffffff8158f1d4 000000000000c5c5
[   29.655238]  ffff8800379bbae8 ffff8800379bbad8 ffffffff8104c801 0000000000003664
[   29.655241]  ffff880037b0c000 ffff880136392dc0 ffff8800371feeb0 ffff88013b027098
[   29.655242] Call Trace:
[   29.655248]  [<ffffffff8158f1d4>] dump_stack+0x46/0x58
[   29.655253]  [<ffffffff8104c801>] warn_slowpath_common+0x81/0xa0
[   29.655256]  [<ffffffff8104c866>] warn_slowpath_fmt+0x46/0x50
[   29.655261]  [<ffffffff811d1108>] ? kernfs_path+0x48/0x60
[   29.655263]  [<ffffffff811d4588>] sysfs_warn_dup+0x68/0x80
[   29.655266]  [<ffffffff811d462e>] sysfs_create_dir_ns+0x8e/0xa0
[   29.655270]  [<ffffffff8128e149>] kobject_add_internal+0xc9/0x400
[   29.655273]  [<ffffffff8128e8b0>] kobject_add+0x60/0xb0
[   29.655276]  [<ffffffff81593b16>] ? mutex_lock+0x16/0x37
[   29.655281]  [<ffffffff81388624>] device_add+0x104/0x600
[   29.655285]  [<ffffffff8114c3ce>] ? lazy_max_pages+0x1e/0x30
[   29.655312]  [<ffffffffa0401d0d>] wiphy_register+0x3fd/0x710 [cfg80211]
[   29.655315]  [<ffffffff8114dd92>] ? __vunmap+0xc2/0x110
[   29.655346]  [<ffffffffa04a5cfc>] ? ieee80211_register_hw+0x1ec/0x9a0 [mac80211]
[   29.655361]  [<ffffffffa04a5e78>] ieee80211_register_hw+0x368/0x9a0 [mac80211]
[   29.655369]  [<ffffffffa049126b>] rtl92se_fw_cb+0xab/0x1d0 [rtl8192se]
[   29.655373]  [<ffffffff8139b650>] request_firmware_work_func+0x30/0x60
[   29.655378]  [<ffffffff810627ed>] process_one_work+0x14d/0x3d0
[   29.655381]  [<ffffffff81062b91>] worker_thread+0x121/0x480
[   29.655384]  [<ffffffff81062a70>] ? process_one_work+0x3d0/0x3d0
[   29.655387]  [<ffffffff810672c9>] kthread+0xc9/0xe0
[   29.655390]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   29.655394]  [<ffffffff81595b6c>] ret_from_fork+0x7c/0xb0
[   29.655397]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   29.655399] ---[ end trace 3935ea42665e29bc ]---
[   29.655401] ------------[ cut here ]------------
[   29.655404] WARNING: CPU: 1 PID: 59 at lib/kobject.c:240 kobject_add_internal+0x294/0x400()
[   29.655406] kobject_add_internal failed for phy0 with -EEXIST, don't try to register things with the same name in the same directory.
[   29.655442] Modules linked in: rtl8192se(+) rtl_pci rtlwifi mac80211 cfg80211 btusb bluetooth coretemp iTCO_wdt iTCO_vendor_support serio_raw microcode toshiba_acpi sparse_keymap snd_hda_codec_hdmi snd_hda_codec_conexant rfkill snd_hda_codec_generic pcspkr uvcvideo videobuf2_core v4l2_common videodev videobuf2_vmalloc videobuf2_memops joydev i2c_i801 wmi lpc_ich mfd_core acpi_cpufreq toshiba_haps snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep toshiba_bluetooth snd_pcm snd_seq snd_timer snd_seq_device snd battery ac soundcore sg autofs4 i915 drm_kms_helper drm i2c_algo_bit thermal video processor thermal_sys button scsi_dh_rdac scsi_dh_alua scsi_dh_emc scsi_dh_hp_sw scsi_dh netconsole atl1c
[   29.655444] CPU: 1 PID: 59 Comm: kworker/1:4 Tainted: G        W      3.18.0-master #48
[   29.655446] Hardware name: TOSHIBA ��������������������������������/��������������������������������, BIOS V1.70    09/29/2009
[   29.655449] Workqueue: events request_firmware_work_func
[   29.655452]  0000000000000009 ffff8800379bbaf8 ffffffff8158f1d4 0000000000004e4e
[   29.655455]  ffff8800379bbb48 ffff8800379bbb38 ffffffff8104c801 ffff8800379bbb38
[   29.655458]  ffff880036470350 00000000ffffffef ffff8800367d72c0 ffff88013b027098
[   29.655459] Call Trace:
[   29.655462]  [<ffffffff8158f1d4>] dump_stack+0x46/0x58
[   29.655465]  [<ffffffff8104c801>] warn_slowpath_common+0x81/0xa0
[   29.655468]  [<ffffffff8104c866>] warn_slowpath_fmt+0x46/0x50
[   29.655471]  [<ffffffff8128e314>] kobject_add_internal+0x294/0x400
[   29.655474]  [<ffffffff8128e8b0>] kobject_add+0x60/0xb0
[   29.655477]  [<ffffffff81593b16>] ? mutex_lock+0x16/0x37
[   29.655480]  [<ffffffff81388624>] device_add+0x104/0x600
[   29.655483]  [<ffffffff8114c3ce>] ? lazy_max_pages+0x1e/0x30
[   29.655496]  [<ffffffffa0401d0d>] wiphy_register+0x3fd/0x710 [cfg80211]
[   29.655498]  [<ffffffff8114dd92>] ? __vunmap+0xc2/0x110
[   29.655512]  [<ffffffffa04a5cfc>] ? ieee80211_register_hw+0x1ec/0x9a0 [mac80211]
[   29.655526]  [<ffffffffa04a5e78>] ieee80211_register_hw+0x368/0x9a0 [mac80211]
[   29.655533]  [<ffffffffa049126b>] rtl92se_fw_cb+0xab/0x1d0 [rtl8192se]
[   29.655536]  [<ffffffff8139b650>] request_firmware_work_func+0x30/0x60
[   29.655540]  [<ffffffff810627ed>] process_one_work+0x14d/0x3d0
[   29.655543]  [<ffffffff81062b91>] worker_thread+0x121/0x480
[   29.655546]  [<ffffffff81062a70>] ? process_one_work+0x3d0/0x3d0
[   29.655548]  [<ffffffff810672c9>] kthread+0xc9/0xe0
[   29.655551]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   29.655555]  [<ffffffff81595b6c>] ret_from_fork+0x7c/0xb0
[   29.655557]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   29.655559] ---[ end trace 3935ea42665e29bd ]---
[   29.655563] rtl8192se:rtl92se_fw_cb():<0-0> Can't register mac80211 hw
[   29.818806] ieee80211 phy0: Selected rate control algorithm 'rtl_rc'
[   30.265418] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.267617] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.269767] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.271919] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.274034] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.276117] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process

... more

[   30.684030] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.684408] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.684776] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.685139] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.685535] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.685901] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.686290] rtl8192se:rtl92se_get_desc(): ERR rxdesc :4 not process
[   30.686710] BUG: unable to handle kernel NULL pointer dereference at           (null)
[   30.687145] IP: [<          (null)>]           (null)
[   30.687592] PGD 137165067 PUD 135e2e067 PMD 0 
[   30.688094] Oops: 0010 [#1] SMP 
[   30.688601] Modules linked in: arc4 rtl8192se rtl_pci rtlwifi mac80211 cfg80211 btusb bluetooth[    0.000000] Initializing cgroup subsys cpuset
[    0.000000] Initializing cgroup subsys cpu
[    0.000000] Initializing cgroup subsys cpuacct
(kdump tried to do its thing, but fscking systemd busted it, so, no oops/crash)

Master with 38506ece reverted, kdump disabled:
[   19.383131] rtl8192se 0000:08:00.0: enabling device (0000 -> 0003)
[   19.424672] rtl8192se: FW Power Save off (module option)
[   19.427269] rtl8192se: Driver for Realtek RTL8192SE/RTL8191SE
[   19.427269] Loading firmware rtlwifi/rtl8192sefw.bin
[   19.525633] EXT4-fs (sda1): mounted filesystem with ordered data mode. Opts: acl
[   19.528188] ieee80211 phy0: Selected rate control algorithm 'rtl_rc'
[   25.515995] BUG: unable to handle kernel NULL pointer dereference at           (null)
[   25.516020] IP: [<          (null)>]           (null)
[   25.516020] PGD 362a6067 PUD 36d39067 PMD 0 
[   25.516020] Oops: 0010 [#1] SMP 
[   25.516020] Modules linked in: arc4 rtl8192se rtl_pci rtlwifi uvcvideo coretemp videobuf2_core mac80211 iTCO_wdt iTCO_vendor_support cfg80211 v4l2_common btusb videodev lpc_ich bluetooth mfd_core microcode toshiba_acpi sparse_keymap rfkill videobuf2_vmalloc videobuf2_memops serio_raw i2c_i801 joydev snd_hda_codec_hdmi snd_hda_codec_conexant snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep toshiba_haps wmi snd_pcm toshiba_bluetooth snd_seq snd_timer snd_seq_device snd battery[   25.526648] IPv6: ADDRCONF(NETDEV_UP): wlan0: link is not ready

[   25.526604]  soundcore ac acpi_cpufreq sg autofs4 i915 drm_kms_helper drm i2c_algo_bit thermal video processor thermal_sys button scsi_dh_rdac scsi_dh_alua scsi_dh_emc scsi_dh_hp_sw scsi_dh netconsole atl1c
[   25.526604] CPU: 1 PID: 21 Comm: kworker/1:1 Not tainted 3.18.0-master #49
[   25.526604] Hardware name: TOSHIBA ��������������������������������/��������������������������������, BIOS V1.70    09/29/2009
[   25.526604] Workqueue: rtl92s_pci rtl_watchdog_wq_callback [rtlwifi]
[   25.526604] task: ffff88013f2e2550 ti: ffff88013f2e4000 task.ti: ffff88013f2e4000
[   25.526604] RIP: 0010:[<0000000000000000>]  [<          (null)>]           (null)
[   25.526604] RSP: 0018:ffff88013f2e7d80  EFLAGS: 00010293
[   25.526604] RAX: ffffffffa05363c0 RBX: ffff8800364019c0 RCX: 0000000000000000
[   25.526604] RDX: 0000000000000001 RSI: 000000000000005d RDI: ffff880036400620
[   25.526604] RBP: ffff88013f2e7df8 R08: ffffffff81658a40 R09: ffffc90022306aa0
[   25.526604] R10: 000000000000037e R11: 0000000000aaaaaa R12: ffff880036400620
[   25.526604] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[   25.526604] FS:  0000000000000000(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000
[   25.526604] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   25.526604] CR2: 0000000000000000 CR3: 00000000365e2000 CR4: 00000000000407e0
[   25.526604] Stack:
[   25.526604]  ffffffffa04ff51e ffff880037bdbc00 0000000000000002 ffff880037b8e400
[   25.526604]  0000000037bdbe10 0000000000000000 0000000000000000 0000000000000000
[   25.526604]  0000000000000000 ffff88013f2e7e38 ffff880036401be0 ffff88013f239700
[   25.526604] Call Trace:
[   25.526604]  [<ffffffffa04ff51e>] ? rtl_watchdog_wq_callback+0xfe/0x420 [rtlwifi]
[   25.526604]  [<ffffffff810627ed>] process_one_work+0x14d/0x3d0
[   25.526604]  [<ffffffff81062b91>] worker_thread+0x121/0x480
[   25.526604]  [<ffffffff81062a70>] ? process_one_work+0x3d0/0x3d0
[   25.526604]  [<ffffffff810672c9>] kthread+0xc9/0xe0
[   25.526604]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   25.526604]  [<ffffffff81595b6c>] ret_from_fork+0x7c/0xb0
[   25.526604]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   25.526604] Code:  Bad RIP value.
[   25.526604] RIP  [<          (null)>]           (null)
[   25.526604]  RSP <ffff88013f2e7d80>
[   25.526604] CR2: 0000000000000000
[   25.526604] ---[ end trace fc110bb3836ecc51 ]---
[   25.607061] BUG: unable to handle kernel paging request at ffffffffffffffd8
[   25.609239] IP: [<ffffffff81067a71>] kthread_data+0x11/0x20
[   25.609239] PGD 1a16067 PUD 1a18067 PMD 0 
[   25.609239] Oops: 0000 [#2] SMP 
[   25.609239] Modules linked in: arc4 rtl8192se rtl_pci rtlwifi uvcvideo coretemp videobuf2_core mac80211 iTCO_wdt iTCO_vendor_support cfg80211 v4l2_common btusb videodev lpc_ich bluetooth mfd_core microcode toshiba_acpi sparse_keymap rfkill videobuf2_vmalloc videobuf2_memops serio_raw i2c_i801 joydev snd_hda_codec_hdmi snd_hda_codec_conexant snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep toshiba_haps wmi snd_pcm toshiba_bluetooth snd_seq snd_timer snd_seq_device snd battery soundcore ac acpi_cpufreq sg autofs4 i915 drm_kms_helper drm i2c_algo_bit thermal video processor thermal_sys button scsi_dh_rdac scsi_dh_alua scsi_dh_emc scsi_dh_hp_sw scsi_dh netconsole atl1c
[   25.609239] CPU: 1 PID: 21 Comm: kworker/1:1 Tainted: G      D        3.18.0-master #49
[   25.609239] Hardware name: TOSHIBA ��������������������������������/��������������������������������, BIOS V1.70    09/29/2009
[   25.609239] task: ffff88013f2e2550 ti: ffff88013f2e4000 task.ti: ffff88013f2e4000
[   25.609239] RIP: 0010:[<ffffffff81067a71>]  [<ffffffff81067a71>] kthread_data+0x11/0x20
[   25.609239] RSP: 0018:ffff88013f2e7990  EFLAGS: 00010092
[   25.609239] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 000000019c731d81
[   25.609239] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff88013f2e2550
[   25.609239] RBP: ffff88013f2e79a8 R08: ffff88013f1f8990 R09: 00000000000003ac
[   25.609239] R10: 000000000000bc00 R11: 000000000000b407 R12: ffff88013fd12b00
[   25.609239] R13: 0000000000000001 R14: 0000000000000000 R15: ffff88013f2e2550
[   25.609239] FS:  0000000000000000(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000
[   25.609239] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   25.609239] CR2: 0000000000000028 CR3: 0000000037075000 CR4: 00000000000407e0
[   25.609239] Stack:
[   25.609239]  ffffffff81063225 ffff88013f2e79a8 ffff88013f2e2550 ffff88013f2e7a18
[   25.609239]  ffffffff81591aa9 ffff88013f2e2550 0000000000012b00 ffff88013f2e7fd8
[   25.609239]  0000000000012b00 ffff88013f2e7a08 ffff88013f2e2550 ffff88013f2e2550
[   25.609239] Call Trace:
[   25.609239]  [<ffffffff81063225>] ? wq_worker_sleeping+0x15/0xa0
[   25.609239]  [<ffffffff81591aa9>] __schedule+0x549/0x810
[   25.609239]  [<ffffffff81591d99>] schedule+0x29/0x70
[   25.609239]  [<ffffffff8104dfa2>] do_exit+0x6a2/0x9e0
[   25.609239]  [<ffffffff8100645e>] oops_end+0x8e/0xd0
[   25.609239]  [<ffffffff8158a27a>] no_context+0x248/0x298
[   25.609239]  [<ffffffff8158a337>] __bad_area_nosemaphore+0x6d/0x1c6
[   25.609239]  [<ffffffff8158a4a3>] bad_area_nosemaphore+0x13/0x15
[   25.609239]  [<ffffffff8103d67c>] __do_page_fault+0x9c/0x530
[   25.609239]  [<ffffffff812ef124>] ? soft_cursor+0x1b4/0x250
[   25.609239]  [<ffffffff8103db1c>] do_page_fault+0xc/0x10
[   25.609239]  [<ffffffff81597722>] page_fault+0x22/0x30
[   25.609239]  [<ffffffffa04ff51e>] ? rtl_watchdog_wq_callback+0xfe/0x420 [rtlwifi]
[   25.609239]  [<ffffffff810627ed>] process_one_work+0x14d/0x3d0
[   25.609239]  [<ffffffff81062b91>] worker_thread+0x121/0x480
[   25.609239]  [<ffffffff81062a70>] ? process_one_work+0x3d0/0x3d0
[   25.609239]  [<ffffffff810672c9>] kthread+0xc9/0xe0
[   25.609239]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   25.609239]  [<ffffffff81595b6c>] ret_from_fork+0x7c/0xb0
[   25.609239]  [<ffffffff81067200>] ? __kthread_parkme+0x80/0x80
[   25.609239] Code: 48 89 e5 5d 48 8b 40 c8 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 48 8b 87 60 04 00 00 55 48 89 e5 5d <48> 8b 40 d8 c3 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 
[   25.609239] RIP  [<ffffffff81067a71>] kthread_data+0x11/0x20
[   25.609239]  RSP <ffff88013f2e7990>
[   25.609239] CR2: ffffffffffffffd8
[   25.609239] ---[ end trace fc110bb3836ecc52 ]---
[   25.609239] Fixing recursive fault but reboot is needed!
[   25.609239] Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 1
[   25.609239] Shutting down cpus with NMI
[   25.609239] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff)
[   25.609239] drm_kms_helper: panic occurred, switching back to text console
[   25.609239] Rebooting in 60 seconds..

^ permalink raw reply

* Re: [RFC] tcp md5 use of alloc_percpu
From: Eric Dumazet @ 2014-10-23  5:23 UTC (permalink / raw)
  To: David Ahern; +Cc: Crestez Dan Leonard, netdev
In-Reply-To: <544886C4.4020702@gmail.com>

On Wed, 2014-10-22 at 22:40 -0600, David Ahern wrote:
> On 10/22/14, 12:55 PM, Crestez Dan Leonard wrote:
> > Hello,
> >
> > It seems that the TCP MD5 feature allocates a percpu struct tcp_md5sig_pool and uses part of that memory for a scratch buffer to do crypto on. Here is the relevant code:
> 
> This is a forward port of a local change to address the problem (local 
> kernel version is 3.4 so perhaps my quick bump to top of tree is off but 
> it shows the general idea). Been on my to-do list to figure out why this 
> is needed, but it seems related to your problem:
> 
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 1bec4e76d88c..833a676bd4b0 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2941,7 +2941,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
>      local_bh_disable();
>      p = ACCESS_ONCE(tcp_md5sig_pool);
>      if (p)
> -       return raw_cpu_ptr(p);
> +       return __va(per_cpu_ptr_to_phys(raw_cpu_ptr(p)));
> 
>      local_bh_enable();
>      return NULL;

per_cpu_ptr_to_phys() can be pretty expensive and should not be called
in fast path.

^ permalink raw reply

* Re: [RFC] tcp md5 use of alloc_percpu
From: Eric Dumazet @ 2014-10-23  5:38 UTC (permalink / raw)
  To: David Ahern; +Cc: Crestez Dan Leonard, netdev
In-Reply-To: <1414041833.2094.28.camel@edumazet-glaptop2.roam.corp.google.com>

On Wed, 2014-10-22 at 22:23 -0700, Eric Dumazet wrote:
> On Wed, 2014-10-22 at 22:40 -0600, David Ahern wrote:
> > On 10/22/14, 12:55 PM, Crestez Dan Leonard wrote:
> > > Hello,
> > >
> > > It seems that the TCP MD5 feature allocates a percpu struct tcp_md5sig_pool and uses part of that memory for a scratch buffer to do crypto on. Here is the relevant code:
> > 
> > This is a forward port of a local change to address the problem (local 
> > kernel version is 3.4 so perhaps my quick bump to top of tree is off but 
> > it shows the general idea). Been on my to-do list to figure out why this 
> > is needed, but it seems related to your problem:
> > 
> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> > index 1bec4e76d88c..833a676bd4b0 100644
> > --- a/net/ipv4/tcp.c
> > +++ b/net/ipv4/tcp.c
> > @@ -2941,7 +2941,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
> >      local_bh_disable();
> >      p = ACCESS_ONCE(tcp_md5sig_pool);
> >      if (p)
> > -       return raw_cpu_ptr(p);
> > +       return __va(per_cpu_ptr_to_phys(raw_cpu_ptr(p)));
> > 
> >      local_bh_enable();
> >      return NULL;
> 
> per_cpu_ptr_to_phys() can be pretty expensive and should not be called
> in fast path.
> 

My updated patch would be :

 net/ipv4/tcp.c |   66 +++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 38 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1bec4e76d88c..af4dc16b61f6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2868,61 +2868,51 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, *tcp_md5sig_pool);
 static DEFINE_MUTEX(tcp_md5sig_mutex);
-
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
-
-		if (p->md5_desc.tfm)
-			crypto_free_hash(p->md5_desc.tfm);
-	}
-	free_percpu(pool);
-}
+static bool tcp_md5sig_pool_populated = false;
 
 static void __tcp_alloc_md5sig_pool(void)
 {
 	int cpu;
-	struct tcp_md5sig_pool __percpu *pool;
-
-	pool = alloc_percpu(struct tcp_md5sig_pool);
-	if (!pool)
-		return;
 
 	for_each_possible_cpu(cpu) {
+		struct tcp_md5sig_pool *pool;
 		struct crypto_hash *hash;
 
-		hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR_OR_NULL(hash))
-			goto out_free;
-
-		per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
+		pool = per_cpu(tcp_md5sig_pool, cpu);
+		if (!pool) {
+			pool = kzalloc_node(sizeof(*pool), GFP_KERNEL,
+					    cpu_to_node(cpu));
+			if (!pool)
+				return;
+			per_cpu(tcp_md5sig_pool, cpu) = pool;
+		}
+		if (!pool->md5_desc.tfm) {
+			hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+			if (IS_ERR_OR_NULL(hash))
+				return;
+			pool->md5_desc.tfm = hash;
+		}
 	}
-	/* before setting tcp_md5sig_pool, we must commit all writes
-	 * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+	/* before setting tcp_md5sig_pool_populated, we must commit all writes
+	 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
 	 */
 	smp_wmb();
-	tcp_md5sig_pool = pool;
-	return;
-out_free:
-	__tcp_free_md5sig_pool(pool);
+	tcp_md5sig_pool_populated = true;
 }
 
 bool tcp_alloc_md5sig_pool(void)
 {
-	if (unlikely(!tcp_md5sig_pool)) {
+	if (unlikely(!tcp_md5sig_pool_populated)) {
 		mutex_lock(&tcp_md5sig_mutex);
 
-		if (!tcp_md5sig_pool)
+		if (!tcp_md5sig_pool_populated)
 			__tcp_alloc_md5sig_pool();
 
 		mutex_unlock(&tcp_md5sig_mutex);
 	}
-	return tcp_md5sig_pool != NULL;
+	return tcp_md5sig_pool_populated;
 }
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
@@ -2936,13 +2926,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
  */
 struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 {
-	struct tcp_md5sig_pool __percpu *p;
-
 	local_bh_disable();
-	p = ACCESS_ONCE(tcp_md5sig_pool);
-	if (p)
-		return raw_cpu_ptr(p);
 
+	if (tcp_md5sig_pool_populated) {
+		/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool */
+		smp_rmb();
+		return this_cpu_read(tcp_md5sig_pool);
+	}
 	local_bh_enable();
 	return NULL;
 }

^ permalink raw reply related

* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Yanko Kaneti @ 2014-10-23  6:09 UTC (permalink / raw)
  To: paulmck
  Cc: Josh Boyer, Eric W. Biederman, Cong Wang, Kevin Fenzi, netdev,
	Linux-Kernel@Vger. Kernel. Org
In-Reply-To: <20141022232421.GN4977@linux.vnet.ibm.com>

On Wed, 2014-10-22 at 16:24 -0700, Paul E. McKenney wrote:
> On Thu, Oct 23, 2014 at 01:40:32AM +0300, Yanko Kaneti wrote:
> > On Wed-10/22/14-2014 15:33, Josh Boyer wrote:
> > > On Wed, Oct 22, 2014 at 2:55 PM, Paul E. McKenney
> > > <paulmck@linux.vnet.ibm.com> wrote:
> 
> [ . . . ]
> 
> > > > Don't get me wrong -- the fact that this kthread appears to 
> > > > have
> > > > blocked within rcu_barrier() for 120 seconds means that 
> > > > something is
> > > > most definitely wrong here.  I am surprised that there are no 
> > > > RCU CPU
> > > > stall warnings, but perhaps the blockage is in the callback 
> > > > execution
> > > > rather than grace-period completion.  Or something is 
> > > > preventing this
> > > > kthread from starting up after the wake-up callback executes.  
> > > > Or...
> > > > 
> > > > Is this thing reproducible?
> > > 
> > > I've added Yanko on CC, who reported the backtrace above and can
> > > recreate it reliably.  Apparently reverting the RCU merge commit
> > > (d6dd50e) and rebuilding the latest after that does not show the
> > > issue.  I'll let Yanko explain more and answer any questions you 
> > > have.
> > 
> > - It is reproducible
> > - I've done another build here to double check and its definitely 
> > the rcu merge
> >   that's causing it.
> > 
> > Don't think I'll be able to dig deeper, but I can do testing if 
> > needed.
> 
> Please!  Does the following patch help?

Nope, doesn't seem to make a difference to the modprobe ppp_generic 
test


INFO: task kworker/u16:6:101 blocked for more than 120 seconds.
      Not tainted 3.18.0-0.rc1.git2.3.fc22.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
kworker/u16:6   D ffff88022067cec0 11680   101      2 0x00000000
Workqueue: netns cleanup_net
 ffff8802206939e8 0000000000000096 ffff88022067cec0 00000000001d5f00
 ffff880220693fd8 00000000001d5f00 ffff880223263480 ffff88022067cec0
 ffffffff82c51d60 7fffffffffffffff ffffffff81ee2698 ffffffff81ee2690
Call Trace:
 [<ffffffff8185e289>] schedule+0x29/0x70
 [<ffffffff818634ac>] schedule_timeout+0x26c/0x410
 [<ffffffff81028c4a>] ? native_sched_clock+0x2a/0xa0
 [<ffffffff81107afc>] ? mark_held_locks+0x7c/0xb0
 [<ffffffff81864530>] ? _raw_spin_unlock_irq+0x30/0x50
 [<ffffffff81107c8d>] ? trace_hardirqs_on_caller+0x15d/0x200
 [<ffffffff8185fcbc>] wait_for_completion+0x10c/0x150
 [<ffffffff810e5430>] ? wake_up_state+0x20/0x20
 [<ffffffff8112a799>] _rcu_barrier+0x159/0x200
 [<ffffffff8112a895>] rcu_barrier+0x15/0x20
 [<ffffffff81718f0f>] netdev_run_todo+0x6f/0x310
 [<ffffffff8170dad5>] ? rollback_registered_many+0x265/0x2e0
 [<ffffffff81725f7e>] rtnl_unlock+0xe/0x10
 [<ffffffff8170f936>] default_device_exit_batch+0x156/0x180
 [<ffffffff810fd8f0>] ? abort_exclusive_wait+0xb0/0xb0
 [<ffffffff817079e3>] ops_exit_list.isra.1+0x53/0x60
 [<ffffffff81708590>] cleanup_net+0x100/0x1f0
 [<ffffffff810ccff8>] process_one_work+0x218/0x850
 [<ffffffff810ccf5f>] ? process_one_work+0x17f/0x850
 [<ffffffff810cd717>] ? worker_thread+0xe7/0x4a0
 [<ffffffff810cd69b>] worker_thread+0x6b/0x4a0
 [<ffffffff810cd630>] ? process_one_work+0x850/0x850
 [<ffffffff810d39eb>] kthread+0x10b/0x130
 [<ffffffff81028cc9>] ? sched_clock+0x9/0x10
 [<ffffffff810d38e0>] ? kthread_create_on_node+0x250/0x250
 [<ffffffff8186527c>] ret_from_fork+0x7c/0xb0
 [<ffffffff810d38e0>] ? kthread_create_on_node+0x250/0x250
4 locks held by kworker/u16:6/101:
 #0:  ("%s""netns"){.+.+.+}, at: [<ffffffff810ccf5f>] process_one_work+0x17f/0x850
 #1:  (net_cleanup_work){+.+.+.}, at: [<ffffffff810ccf5f>] process_one_work+0x17f/0x850
 #2:  (net_mutex){+.+.+.}, at: [<ffffffff8170851c>] cleanup_net+0x8c/0x1f0
 #3:  (rcu_sched_state.barrier_mutex){+.+...}, at: [<ffffffff8112a675>] _rcu_barrier+0x35/0x200
INFO: task modprobe:1139 blocked for more than 120 seconds.
      Not tainted 3.18.0-0.rc1.git2.3.fc22.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
modprobe        D ffff880213ac1a40 13112  1139   1138 0x00000080
 ffff880036ab3be8 0000000000000096 ffff880213ac1a40 00000000001d5f00
 ffff880036ab3fd8 00000000001d5f00 ffff880223264ec0 ffff880213ac1a40
 ffff880213ac1a40 ffffffff81f8fb48 0000000000000246 ffff880213ac1a40
Call Trace:
 [<ffffffff8185e831>] schedule_preempt_disabled+0x31/0x80
 [<ffffffff81860083>] mutex_lock_nested+0x183/0x440
 [<ffffffff817083af>] ? register_pernet_subsys+0x1f/0x50
 [<ffffffff817083af>] ? register_pernet_subsys+0x1f/0x50
 [<ffffffffa06f3000>] ? 0xffffffffa06f3000
 [<ffffffff817083af>] register_pernet_subsys+0x1f/0x50
 [<ffffffffa06f3048>] br_init+0x48/0xd3 [bridge]
 [<ffffffff81002148>] do_one_initcall+0xd8/0x210
 [<ffffffff81153c52>] load_module+0x20c2/0x2870
 [<ffffffff8114ec30>] ? store_uevent+0x70/0x70
 [<ffffffff8110ac76>] ? lock_release_non_nested+0x3c6/0x3d0
 [<ffffffff811544e7>] SyS_init_module+0xe7/0x140
 [<ffffffff81865329>] system_call_fastpath+0x12/0x17
1 lock held by modprobe/1139:
 #0:  (net_mutex){+.+.+.}, at: [<ffffffff817083af>] 
register_pernet_subsys+0x1f/0x50
INFO: task modprobe:1209 blocked for more than 120 seconds.
      Not tainted 3.18.0-0.rc1.git2.3.fc22.x86_64 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this 
message.
modprobe        D ffff8800c5324ec0 13368  1209   1151 0x00000080
 ffff88020d14bbe8 0000000000000096 ffff8800c5324ec0 00000000001d5f00
 ffff88020d14bfd8 00000000001d5f00 ffff880223280000 ffff8800c5324ec0
 ffff8800c5324ec0 ffffffff81f8fb48 0000000000000246 ffff8800c5324ec0
Call Trace:
 [<ffffffff8185e831>] schedule_preempt_disabled+0x31/0x80
 [<ffffffff81860083>] mutex_lock_nested+0x183/0x440
 [<ffffffff817083fd>] ? register_pernet_device+0x1d/0x70
 [<ffffffff817083fd>] ? register_pernet_device+0x1d/0x70
 [<ffffffffa070f000>] ? 0xffffffffa070f000
 [<ffffffff817083fd>] register_pernet_device+0x1d/0x70
 [<ffffffffa070f020>] ppp_init+0x20/0x1000 [ppp_generic]
 [<ffffffff81002148>] do_one_initcall+0xd8/0x210
 [<ffffffff81153c52>] load_module+0x20c2/0x2870
 [<ffffffff8114ec30>] ? store_uevent+0x70/0x70
 [<ffffffff8110ac76>] ? lock_release_non_nested+0x3c6/0x3d0
 [<ffffffff811544e7>] SyS_init_module+0xe7/0x140
 [<ffffffff81865329>] system_call_fastpath+0x12/0x17
1 lock held by modprobe/1209:
 #0:  (net_mutex){+.+.+.}, at: [<ffffffff817083fd>] register_pernet_device+0x1d/0x70


>                 Thanx, Paul
> 
> ---------------------------------------------------------------------
> ---
> 
> rcu: More on deadlock between CPU hotplug and expedited grace periods
> 
> Commit dd56af42bd82 (rcu: Eliminate deadlock between CPU hotplug and
> expedited grace periods) was incomplete.  Although it did eliminate
> deadlocks involving synchronize_sched_expedited()'s acquisition of
> cpu_hotplug.lock via get_online_cpus(), it did nothing about the 
> similar
> deadlock involving acquisition of this same lock via 
> put_online_cpus().
> This deadlock became apparent with testing involving hibernation.
> 
> This commit therefore changes put_online_cpus() acquisition of this 
> lock
> to be conditional, and increments a new cpu_hotplug.puts_pending 
> field
> in case of acquisition failure.  Then cpu_hotplug_begin() checks for 
> this
> new field being non-zero, and applies any changes to 
> cpu_hotplug.refcount.
> 
> Reported-by: Jiri Kosina <jkosina@suse.cz>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> Tested-by: Jiri Kosina <jkosina@suse.cz>
> 
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 356450f09c1f..90a3d017b90c 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -64,6 +64,8 @@ static struct {
>         * an ongoing cpu hotplug operation.
>         */
>         int refcount;
> +       /* And allows lockless put_online_cpus(). */
> +       atomic_t puts_pending;
> 
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>         struct lockdep_map dep_map;
> @@ -113,7 +115,11 @@ void put_online_cpus(void)
>  {
>         if (cpu_hotplug.active_writer == current)
>         return;
> -       mutex_lock(&cpu_hotplug.lock);
> +       if (!mutex_trylock(&cpu_hotplug.lock)) {
> +       atomic_inc(&cpu_hotplug.puts_pending);
> +       cpuhp_lock_release();
> +       return;
> +       }
> 
>         if (WARN_ON(!cpu_hotplug.refcount))
>         cpu_hotplug.refcount++; /* try to fix things up */
> @@ -155,6 +161,12 @@ void cpu_hotplug_begin(void)
>         cpuhp_lock_acquire();
>         for (;;) {
>         mutex_lock(&cpu_hotplug.lock);
> +       if (atomic_read(&cpu_hotplug.puts_pending)) {
> +       int delta;
> +
> +       delta = atomic_xchg(&cpu_hotplug.puts_pending, 0);
> +       cpu_hotplug.refcount -= delta;
> +       }
>         if (likely(!cpu_hotplug.refcount))
>         break;
>         __set_current_state(TASK_UNINTERRUPTIBLE);
> 
> 

^ permalink raw reply

* Re: irq disable in __netdev_alloc_frag() ?
From: Alexei Starovoitov @ 2014-10-23  6:12 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Alexander Duyck, Eric Dumazet, Network Development
In-Reply-To: <1414041244.2094.26.camel@edumazet-glaptop2.roam.corp.google.com>

On Wed, Oct 22, 2014 at 10:14 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Really, I doubt this will make any difference in real workloads
> where bottleneck is memory bandwidth, not the time taken by
> pushf/cli/popf.
>
> What difference do you have on a real workload like : netperf TCP_RR or
> UDP_RR ?

there are different workloads. l2/l3 switching is very real
as well. For tcp_rr on the host the difference will be
minimal, since the time is spent elsewhere. For cases
that don't use tcp stack, but do pure l2/l3 processing
all improvements will add up. imo the kernel should be
optimized for all use cases when possible.

^ permalink raw reply

* Re: [RFC] tcp md5 use of alloc_percpu
From: Jonathan Toppins @ 2014-10-23  6:58 UTC (permalink / raw)
  To: Eric Dumazet, David Ahern; +Cc: Crestez Dan Leonard, netdev
In-Reply-To: <1414042688.2094.30.camel@edumazet-glaptop2.roam.corp.google.com>

On 10/23/14, 1:38 AM, Eric Dumazet wrote:
> On Wed, 2014-10-22 at 22:23 -0700, Eric Dumazet wrote:
>> On Wed, 2014-10-22 at 22:40 -0600, David Ahern wrote:
>>> On 10/22/14, 12:55 PM, Crestez Dan Leonard wrote:
>>>> Hello,
>>>>
>>>> It seems that the TCP MD5 feature allocates a percpu struct tcp_md5sig_pool and uses part of that memory for a scratch buffer to do crypto on. Here is the relevant code:
>>>
>>> This is a forward port of a local change to address the problem (local 
>>> kernel version is 3.4 so perhaps my quick bump to top of tree is off but 
>>> it shows the general idea). Been on my to-do list to figure out why this 
>>> is needed, but it seems related to your problem:
>>>
>>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>>> index 1bec4e76d88c..833a676bd4b0 100644
>>> --- a/net/ipv4/tcp.c
>>> +++ b/net/ipv4/tcp.c
>>> @@ -2941,7 +2941,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
>>>      local_bh_disable();
>>>      p = ACCESS_ONCE(tcp_md5sig_pool);
>>>      if (p)
>>> -       return raw_cpu_ptr(p);
>>> +       return __va(per_cpu_ptr_to_phys(raw_cpu_ptr(p)));
>>>
>>>      local_bh_enable();
>>>      return NULL;
>>
>> per_cpu_ptr_to_phys() can be pretty expensive and should not be called
>> in fast path.
>>
> 
> My updated patch would be :
> 
>  net/ipv4/tcp.c |   66 +++++++++++++++++++----------------------------
>  1 file changed, 28 insertions(+), 38 deletions(-)
> 
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 1bec4e76d88c..af4dc16b61f6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2868,61 +2868,51 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
>  #endif
>  
>  #ifdef CONFIG_TCP_MD5SIG
> -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
> +static DEFINE_PER_CPU(struct tcp_md5sig_pool, *tcp_md5sig_pool);
>  static DEFINE_MUTEX(tcp_md5sig_mutex);
> -
> -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
> -{
> -	int cpu;
> -
> -	for_each_possible_cpu(cpu) {
> -		struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
> -
> -		if (p->md5_desc.tfm)
> -			crypto_free_hash(p->md5_desc.tfm);
> -	}
> -	free_percpu(pool);
> -}
> +static bool tcp_md5sig_pool_populated = false;
>  
>  static void __tcp_alloc_md5sig_pool(void)
>  {
>  	int cpu;
> -	struct tcp_md5sig_pool __percpu *pool;
> -
> -	pool = alloc_percpu(struct tcp_md5sig_pool);
> -	if (!pool)
> -		return;
>  
>  	for_each_possible_cpu(cpu) {
> +		struct tcp_md5sig_pool *pool;
>  		struct crypto_hash *hash;
>  
> -		hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
> -		if (IS_ERR_OR_NULL(hash))
> -			goto out_free;
> -
> -		per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
> +		pool = per_cpu(tcp_md5sig_pool, cpu);
> +		if (!pool) {
> +			pool = kzalloc_node(sizeof(*pool), GFP_KERNEL,
GFP_DMA | GFP_KERNEL
This memory will possibly be used in a DMA correct? (thinking crypto
hardware offload)
> +					    cpu_to_node(cpu));
> +			if (!pool)
> +				return;
> +			per_cpu(tcp_md5sig_pool, cpu) = pool;
> +		}
> +		if (!pool->md5_desc.tfm) {
> +			hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
> +			if (IS_ERR_OR_NULL(hash))
> +				return;
> +			pool->md5_desc.tfm = hash;
> +		}
>  	}
> -	/* before setting tcp_md5sig_pool, we must commit all writes
> -	 * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
> +	/* before setting tcp_md5sig_pool_populated, we must commit all writes
> +	 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
>  	 */
>  	smp_wmb();
> -	tcp_md5sig_pool = pool;
> -	return;
> -out_free:
> -	__tcp_free_md5sig_pool(pool);
> +	tcp_md5sig_pool_populated = true;
>  }
>  
>  bool tcp_alloc_md5sig_pool(void)
>  {
> -	if (unlikely(!tcp_md5sig_pool)) {
> +	if (unlikely(!tcp_md5sig_pool_populated)) {
>  		mutex_lock(&tcp_md5sig_mutex);
>  
> -		if (!tcp_md5sig_pool)
> +		if (!tcp_md5sig_pool_populated)
>  			__tcp_alloc_md5sig_pool();
>  
>  		mutex_unlock(&tcp_md5sig_mutex);
>  	}
> -	return tcp_md5sig_pool != NULL;
> +	return tcp_md5sig_pool_populated;
>  }
>  EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
>  
> @@ -2936,13 +2926,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
>   */
>  struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
>  {
> -	struct tcp_md5sig_pool __percpu *p;
> -
>  	local_bh_disable();
> -	p = ACCESS_ONCE(tcp_md5sig_pool);
> -	if (p)
> -		return raw_cpu_ptr(p);
>  
> +	if (tcp_md5sig_pool_populated) {
> +		/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool */
> +		smp_rmb();
> +		return this_cpu_read(tcp_md5sig_pool);
> +	}
>  	local_bh_enable();
>  	return NULL;
>  }
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [PATCH v2] drivers: net: xgene: Rewrite loop in xgene_enet_ecc_init()
From: Geert Uytterhoeven @ 2014-10-23  7:05 UTC (permalink / raw)
  To: David Miller
  Cc: Iyappan Subramanian, kchudgar, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20141022.161257.290251379136182774.davem@davemloft.net>

On Wed, Oct 22, 2014 at 10:12 PM, David Miller <davem@davemloft.net> wrote:
> From: Geert Uytterhoeven <geert@linux-m68k.org>
> Date: Wed, 22 Oct 2014 21:50:06 +0200
>
>> On Wed, Oct 22, 2014 at 9:34 PM, David Miller <davem@davemloft.net> wrote:
>>> From: Geert Uytterhoeven <geert@linux-m68k.org>
>>> Date: Wed, 22 Oct 2014 09:39:41 +0200
>>>
>>>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’:
>>>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function
>>>>
>>>> Depending on the arbitrary value on the stack, the loop may terminate
>>>> too early, and cause a bogus -ENODEV failure.
>>>>
>>>> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
>>>> ---
>>>> v2: Rewrite the loop instead of pre-initializing data.
>>>
>>> I hate to be a pest, but like the other patch of your's I think
>>> a do { } while() works best here because the intent is clearly
>>> to run the loop at least once, right?
>>
>> I wanted to avoid checking for "data != ~0U" twice: once to abort the loop,
>> and once to check if a timeout happened.
>
> Hmmm:
>
>         do {
>                 usleep_range(...);
>                 data = ...();
>                 if (data == ~0)
>                         return 0;
>         } while (++i < 10);
>
>         netdev_err(...);
>         return -ENODEV;
>
> Why would you have to check data twice?



-- 
Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH v2] drivers: net: xgene: Rewrite loop in xgene_enet_ecc_init()
From: Geert Uytterhoeven @ 2014-10-23  7:06 UTC (permalink / raw)
  To: David Miller
  Cc: Iyappan Subramanian, kchudgar, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20141022.161257.290251379136182774.davem@davemloft.net>

On Wed, Oct 22, 2014 at 10:12 PM, David Miller <davem@davemloft.net> wrote:
> From: Geert Uytterhoeven <geert@linux-m68k.org>
> Date: Wed, 22 Oct 2014 21:50:06 +0200
>
>> On Wed, Oct 22, 2014 at 9:34 PM, David Miller <davem@davemloft.net> wrote:
>>> From: Geert Uytterhoeven <geert@linux-m68k.org>
>>> Date: Wed, 22 Oct 2014 09:39:41 +0200
>>>
>>>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’:
>>>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function
>>>>
>>>> Depending on the arbitrary value on the stack, the loop may terminate
>>>> too early, and cause a bogus -ENODEV failure.
>>>>
>>>> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
>>>> ---
>>>> v2: Rewrite the loop instead of pre-initializing data.
>>>
>>> I hate to be a pest, but like the other patch of your's I think
>>> a do { } while() works best here because the intent is clearly
>>> to run the loop at least once, right?
>>
>> I wanted to avoid checking for "data != ~0U" twice: once to abort the loop,
>> and once to check if a timeout happened.
>
> Hmmm:
>
>         do {
>                 usleep_range(...);
>                 data = ...();
>                 if (data == ~0)
>                         return 0;
>         } while (++i < 10);
>
>         netdev_err(...);
>         return -ENODEV;
>
> Why would you have to check data twice?

Yes, that would work to.

Feel free to do s/for (i = 0; i < 10; i++)/do/ and s/}/} while (++i < 10);/

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH] igb: don't reuse pages with pfmemalloc flag
From: Roman Gushchin @ 2014-10-23  7:52 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: alexander.h.duyck@intel.com, netdev@vger.kernel.org,
	sassmann@kpanic.de, e1000-devel@lists.sourceforge.net,
	peter.p.waskiewicz.jr@intel.com, bruce.w.allan@intel.com,
	jesse.brandeburg@intel.com, davem@davemloft.net,
	john.ronciak@intel.com, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1414002630.2179.0.camel@jtkirshe-mobl>

Thank you!

Probably we should add it to stable trees too?

--
Regards,
Roman

22.10.2014, 22:30, "Jeff Kirsher" <jeffrey.t.kirsher@intel.com>:
> On Wed, 2014-10-22 at 17:50 +0400, Roman Gushchin wrote:
>>  Incoming packet is dropped silently by sk_filter(), if the skb was
>>  allocated from pfmemalloc reserves and the corresponding socket is
>>  not marked with the SOCK_MEMALLOC flag.
>>
>>  Igb driver allocates pages for DMA with __skb_alloc_page(), which
>>  calls alloc_pages_node() with the __GFP_MEMALLOC flag. So, in case
>>  of OOM condition, igb can get pages with pfmemalloc flag set.
>>
>>  If an incoming packet hits the pfmemalloc page and is large enough
>>  (small packets are copying into the memory, allocated with
>>  netdev_alloc_skb_ip_align(), so they are not affected), it will be
>>  dropped.
>>
>>  This behavior is ok under high memory pressure, but the problem is
>>  that the igb driver reuses these mapped pages. So, packets are still
>>  dropping even if all memory issues are gone and there is a plenty
>>  of free memory.
>>
>>  In my case, some TCP sessions hang on a small percentage (< 0.1%)
>>  of machines days after OOMs.
>>
>>  Fix this by avoiding reuse of such pages.
>>
>>  Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
>>  ---
>>   drivers/net/ethernet/intel/igb/igb_main.c | 6 +++++-
>>   1 file changed, 5 insertions(+), 1 deletion(-)
>
> Thanks Roman, I have added you patch to my queue.

------------------------------------------------------------------------------
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* [PATCH net-next 2/3] gianfar: Fix the way the local advertising flow options are determined
From: Matei Pavaluca @ 2014-10-23  7:57 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Claudiu Manoil, Pavaluca Matei
In-Reply-To: <1414051077-4330-1-git-send-email-matei.pavaluca@freescale.com>

From: Pavaluca Matei <matei.pavaluca@freescale.com>

Local flow control options needed in order to resolve the negotiation
are incorrectly calculated.

Previously 'mii_advertise_flowctrl' was called to determine the local advertising
options, but these were determined based on FLOW_CTRL_RX/TX flags which are
never set through ethtool.
The patch simply translates from ethtool flow options to mii flow options.

Signed-off-by: Pavaluca Matei <matei.pavaluca@freescale.com>
---
 drivers/net/ethernet/freescale/gianfar.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 2485b74..329efca 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3373,7 +3373,11 @@ static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
 		if (phydev->asym_pause)
 			rmt_adv |= LPA_PAUSE_ASYM;
 
-		lcl_adv = mii_advertise_flowctrl(phydev->advertising);
+		lcl_adv = 0;
+		if (phydev->advertising & ADVERTISED_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_CAP;
+		if (phydev->advertising & ADVERTISED_Asym_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
 		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
 		if (flowctrl & FLOW_CTRL_TX)
-- 
1.7.11.7

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox