Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/2] net: dsa: introduce MICREL KSZ8893MQL/BL ethernet switch chip support
From: Mike Frysinger @ 2010-07-21 13:37 UTC (permalink / raw)
  To: netdev, David S. Miller
  Cc: uclinux-dist-devel, Karl Beldan, Lennert Buytenhek, Graf Yang,
	Bryan Wu
In-Reply-To: <1279719442-10174-1-git-send-email-vapier@gentoo.org>

From: Graf Yang <graf.yang@analog.com>

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 net/dsa/Kconfig    |    7 +
 net/dsa/Makefile   |    1 +
 net/dsa/ksz8893m.c |  359 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dsa/ksz8893m.h |  223 ++++++++++++++++++++++++++++++++
 4 files changed, 590 insertions(+), 0 deletions(-)
 create mode 100644 net/dsa/ksz8893m.c
 create mode 100644 net/dsa/ksz8893m.h

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index ee8d705..4a87436 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -60,4 +60,11 @@ config NET_DSA_MV88E6123_61_65
 	  This enables support for the Marvell 88E6123/6161/6165
 	  ethernet switch chips.
 
+config NET_DSA_KSZ8893M
+	bool "MICREL KSZ8893MQL/BL ethernet switch chip support"
+	select NET_DSA_TAG_STPID
+	---help---
+	  This enables support for the Micrel KSZ8893MQL/BL
+	  ethernet switch chips.
+
 endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 4881577..c4295e3 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
 obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o
+obj-$(CONFIG_NET_DSA_KSZ8893M) += ksz8893m.o
 
 # the core
 obj-$(CONFIG_NET_DSA) += dsa.o slave.o
diff --git a/net/dsa/ksz8893m.c b/net/dsa/ksz8893m.c
new file mode 100644
index 0000000..98cce04
--- /dev/null
+++ b/net/dsa/ksz8893m.c
@@ -0,0 +1,359 @@
+/*
+ * Integrated 3-Port 10/100 Managed Switch with PHYs
+ *
+ * - KSZ8893M support
+ *
+ * Copyright 2008-2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#define pr_fmt(fmt) "ksz8893m: " fmt
+
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+#include <linux/spi/spi.h>
+#include "dsa_priv.h"
+#include "ksz8893m.h"
+
+#define  BUF_LEN 6
+
+static struct _spi_switch {
+	struct spi_transfer xfer;
+	struct spi_device *dev;
+} sw;
+
+static int ksz8893m_read(unsigned char *din, unsigned char reg, int len)
+{
+	int i, ret;
+	struct spi_message message;
+	unsigned char dout[BUF_LEN];
+	struct spi_transfer *t = &sw.xfer;
+
+	t->len = len;
+	t->tx_buf = dout;
+	t->rx_buf = din;
+	dout[0] = SPI_READ;
+	dout[1] = reg;
+	for (i = 2; i < len; i++)
+		dout[i] = 0;
+
+	spi_message_init(&message);
+	spi_message_add_tail(t, &message);
+	ret = spi_sync(sw.dev, &message);
+	if (!ret)
+		return message.status;
+
+	pr_err("read reg%d failed, ret=%d\n", reg, ret);
+	return ret;
+}
+
+static int ksz8893m_write(unsigned char *dout, unsigned char reg, int len)
+{
+	int ret;
+	struct spi_message message;
+	unsigned char din[BUF_LEN];
+	struct spi_transfer *t = &sw.xfer;
+
+	t->len = len;
+	t->tx_buf = dout;
+	t->rx_buf = din;
+	dout[0] = SPI_WRITE;
+	dout[1] = reg;
+
+	spi_message_init(&message);
+	spi_message_add_tail(t, &message);
+	ret = spi_sync(sw.dev, &message);
+	if (!ret)
+		return message.status;
+
+	pr_err("write reg%d failed, ret=%d\n", reg, ret);
+	return ret;
+}
+
+static char *ksz8893m_probe(struct mii_bus *bus, int sw_addr)
+{
+	int ret, phyid_low, phyid_high;
+	unsigned char din[BUF_LEN];
+
+	phyid_high = mdiobus_read(bus, KSZ8893M_CPU_PORT, MII_PHYSID1);
+	phyid_low = mdiobus_read(bus, KSZ8893M_CPU_PORT, MII_PHYSID2);
+	if (phyid_high != PHYID_HIGH || phyid_low != PHYID_LOW)
+		return NULL;
+
+	ret = ksz8893m_read(din, ChipID0, 3);
+
+	if (!ret && FAMILY_ID == din[2])
+		return "KSZ8893M";
+
+	return NULL;
+}
+
+static int ksz8893m_switch_reset(struct dsa_switch *ds)
+{
+	return 0;
+}
+
+static int ksz8893m_setup_global(struct dsa_switch *ds)
+{
+	int ret;
+	unsigned char dout[BUF_LEN];
+	unsigned char din[BUF_LEN];
+
+	/* Set VLAN VID of port1 */
+	ret = ksz8893m_read(din, Port1Control3, 3);
+	if (ret)
+		return ret;
+	din[2] &= 0xf0;
+	dout[2] = (DEFAULT_PORT_VID & 0xfff) >> 8 | din[2];
+	dout[3] = DEFAULT_PORT_VID & 0xff;
+	ret = ksz8893m_write(dout, Port1Control3, 4);
+	if (ret)
+		return ret;
+
+	/* Set VLAN VID of port2 */
+	ret = ksz8893m_read(din, Port2Control3, 3);
+	if (ret)
+		return ret;
+	din[2] &= 0xf0;
+	dout[2] = (DEFAULT_PORT_VID & 0xfff) >> 8 | din[2];
+	dout[3] = DEFAULT_PORT_VID & 0xff;
+	ret = ksz8893m_write(dout, Port2Control3, 4);
+	if (ret)
+		return ret;
+
+	/* Set VLAN VID of port3 */
+	ret = ksz8893m_read(din, Port3Control3, 3);
+	if (ret)
+		return ret;
+	din[2] &= 0xf0;
+	dout[2] = (DEFAULT_PORT_VID & 0xfff) >> 8 | din[2];
+	dout[3] = DEFAULT_PORT_VID & 0xff;
+	ret = ksz8893m_write(dout, Port3Control3, 4);
+	if (ret)
+		return ret;
+
+	/* Insert VLAN tag that egress Port3 */
+	ret = ksz8893m_read(din, Port3Control0, 3);
+	if (ret)
+		return ret;
+	dout[2] = TAG_INSERTION | din[2];
+	ret = ksz8893m_write(dout, Port3Control0, 3);
+	if (ret)
+		return ret;
+
+	/* Enable STPID Mode */
+	ret = ksz8893m_read(din, GlobalControl9, 3);
+	if (ret)
+		return ret;
+	dout[2] = SPECIAL_TPID_MODE | din[2];
+	ret = ksz8893m_write(dout, GlobalControl9, 3);
+	if (ret)
+		return ret;
+
+	/* Start switch */
+	dout[2] = START_SWITCH;
+	ret = ksz8893m_write(dout, ChipID1_StartSwitch, 3);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int ksz8893m_setup_port(struct dsa_switch *ds, int p)
+{
+	int val, ret;
+	val = mdiobus_read(ds->master_mii_bus, p, MII_BMCR);
+	if (val < 0)
+		return val;
+	val |= AN_ENABLE | FORCE_100 | FORCE_FULL_DUPLEX;
+	val &= ~(POWER_DOWN | DISABLE_MDIX | DIS_FAR_END_FAULT |\
+			DISABLE_TRANSMIT | DISABLE_LED);
+	ret = mdiobus_write(ds->master_mii_bus, p, MII_BMCR, val);
+	if (ret < 0)
+		return ret;
+
+	val = mdiobus_read(ds->master_mii_bus, p, MII_ADVERTISE);
+	if (val < 0)
+		return val;
+	val |= ADV_10_HALF | ADV_10_FULL | ADV_100_HALF | ADV_100_FULL;
+	ret = mdiobus_write(ds->master_mii_bus, p, MII_ADVERTISE, val);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int ksz8893m_setup(struct dsa_switch *ds)
+{
+	int i;
+	int ret;
+
+	ret = ksz8893m_switch_reset(ds);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz8893m_setup_global(ds);
+	if (ret < 0)
+		return ret;
+
+	for (i = 1; i < KSZ8893M_PORT_NUM; i++) {
+		ret = ksz8893m_setup_port(ds, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ksz8893m_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+	return 0;
+}
+
+static int ksz8893m_port_to_phy_addr(int port)
+{
+	if (port >= 1 && port <= KSZ8893M_PORT_NUM)
+		return port;
+
+	pr_warning("use default phy addr 3\n");
+	return 3;
+}
+
+static int
+ksz8893m_phy_read(struct dsa_switch *ds, int port, int regnum)
+{
+	int phy_addr = ksz8893m_port_to_phy_addr(port);
+	return mdiobus_read(ds->master_mii_bus, phy_addr, regnum);
+}
+
+static int
+ksz8893m_phy_write(struct dsa_switch *ds,
+			      int port, int regnum, u16 val)
+{
+	int phy_addr = ksz8893m_port_to_phy_addr(port);
+	return mdiobus_write(ds->master_mii_bus, phy_addr, regnum, val);
+}
+
+static void ksz8893m_poll_link(struct dsa_switch *ds)
+{
+	int i;
+
+	for (i = 1; i < KSZ8893M_PORT_NUM; i++) {
+		struct net_device *dev;
+		int val;
+		int link;
+		int speed;
+		int duplex;
+		int anc;
+
+		dev = ds->ports[i];
+		if (dev == NULL)
+			continue;
+
+		link = 0;
+		if (dev->flags & IFF_UP) {
+			val = mdiobus_read(ds->master_mii_bus, i, MII_BMSR);
+			if (val < 0)
+				continue;
+
+			link = !!(val & LINK_STATUS);
+			anc = !!(val & AN_COMPLETE);
+		}
+
+		if (!link) {
+			if (netif_carrier_ok(dev)) {
+				printk(KERN_INFO "%s: link down\n", dev->name);
+				netif_carrier_off(dev);
+			}
+			continue;
+		}
+
+		speed = 10;
+		duplex = 0;
+		val = mdiobus_read(ds->master_mii_bus, i, MII_BMSR);
+		if (val < 0)
+			continue;
+		val &= HALF_10_CAPABLE | FULL_10_CAPABLE |\
+		       HALF_100_CAPABLE | FULL_100_CAPABLE;
+		if (val & FULL_100_CAPABLE) {
+			speed = 100;
+			duplex = 1;
+		} else if (val & HALF_100_CAPABLE) {
+			speed = 100;
+			duplex = 0;
+		} else if (val & FULL_10_CAPABLE) {
+			speed = 10;
+			duplex = 1;
+		}
+
+		if (!netif_carrier_ok(dev)) {
+			printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex\n",
+					dev->name, speed, duplex ? "full" : "half");
+			netif_carrier_on(dev);
+		}
+	}
+}
+
+static int __devinit spi_switch_probe(struct spi_device *spi)
+{
+	if (sw.dev) {
+		pr_err("only one instance supported at a time\n");
+		return 1;
+	}
+	memset(&sw.xfer, 0, sizeof(sw.xfer));
+	sw.dev = spi;
+	return 0;
+}
+
+static int __devexit spi_switch_remove(struct spi_device *spi)
+{
+	sw.dev = NULL;
+	return 0;
+}
+
+static struct spi_driver spi_switch_driver = {
+	.driver = {
+		.name	= "ksz8893m",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= spi_switch_probe,
+	.remove	= __devexit_p(spi_switch_remove),
+};
+
+static struct dsa_switch_driver ksz8893m_switch_driver = {
+	.tag_protocol		= __constant_htons(ETH_P_STPID),
+	.probe			= ksz8893m_probe,
+	.setup			= ksz8893m_setup,
+	.set_addr		= ksz8893m_set_addr,
+	.phy_read		= ksz8893m_phy_read,
+	.phy_write		= ksz8893m_phy_write,
+	.poll_link		= ksz8893m_poll_link,
+};
+
+static int __init ksz8893m_init(void)
+{
+	int ret;
+
+	ret = spi_register_driver(&spi_switch_driver);
+	if (ret) {
+		pr_err("can't register driver\n");
+		return ret;
+	}
+
+	register_switch_driver(&ksz8893m_switch_driver);
+	return 0;
+}
+module_init(ksz8893m_init);
+
+static void __exit ksz8893m_cleanup(void)
+{
+	spi_unregister_driver(&spi_switch_driver);
+	unregister_switch_driver(&ksz8893m_switch_driver);
+}
+module_exit(ksz8893m_cleanup);
+
+MODULE_AUTHOR("Graf Yang <graf.yang@analog.com>");
+MODULE_DESCRIPTION("KSZ8893M driver for DSA");
+MODULE_LICENSE("GPL");
diff --git a/net/dsa/ksz8893m.h b/net/dsa/ksz8893m.h
new file mode 100644
index 0000000..84f44e9
--- /dev/null
+++ b/net/dsa/ksz8893m.h
@@ -0,0 +1,223 @@
+/*
+ * Integrated 3-Port 10/100 Managed Switch with PHYs
+ *
+ * - KSZ8893M support
+ *
+ * Copyright 2008-2010 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __KSZ8893M_H__
+#define __KSZ8893M_H__
+
+#include <linux/netdevice.h>
+
+#define KSZ8893M_PORT_NUM 3
+#define KSZ8893M_CPU_PORT 3
+
+#define DEFAULT_PORT_VID  0
+
+#define SPI_READ          3
+#define SPI_WRITE         2
+
+/* PHYID High */
+#define PHYID_HIGH        0x22
+/* PHYID Low */
+#define PHYID_LOW         0x1430
+
+/* MII Basic Control */
+#define SOFT_RESET        0x8000
+#define LOOPBACK          0x4000
+#define FORCE_100         0x2000
+#define AN_ENABLE         0x1000
+#define POWER_DOWN        0x0800
+#define ISOLATE           0x0400
+#define RESTART_AN        0x0200
+#define FORCE_FULL_DUPLEX 0x0100
+#define COLLISION_TEST    0x0080
+/* Bit Reserved */
+#define HP_MDIX           0x0020
+#define Force_MDI         0x0010
+#define DISABLE_MDIX      0x0008
+#define DIS_FAR_END_FAULT 0x0004
+#define DISABLE_TRANSMIT  0x0002
+#define DISABLE_LED       0x0001
+
+/* MII Basic Status */
+#define T4_CAPABLE        0x8000
+#define FULL_100_CAPABLE  0x4000
+#define HALF_100_CAPABLE  0x2000
+#define FULL_10_CAPABLE   0x1000
+#define HALF_10_CAPABLE   0x0800
+/* 4 Bits Reserved */
+#define PREAMBLE_SUPPRESS 0x0040
+#define AN_COMPLETE       0x0020
+#define FAR_END_FAULT     0x0010
+#define AN_CAPABLE        0x0008
+#define LINK_STATUS       0x0004
+#define JABBER_TEST       0x0002
+#define EXTENDED_CAPABLE  0x0001
+
+/* Auto-Negotiation Advertisement Ability */
+#define NEXT_PAGE         0x8000
+/* Bit Reserved */
+#define REMOTE_FAULT      0x2000
+/* 2 Bits Reserved */
+#define PAUSE             0x0400
+/* Bit Reserved */
+#define ADV_100_FULL      0x0100
+#define ADV_100_HALF      0x0080
+#define ADV_10_FULL       0x0040
+#define ADV_10_HALF       0x0020
+#define SELECTOR_FIELD    0x001F
+
+/* Auto-Negotiation Link Partner Ability */
+#define NEXT_PAGE         0x8000
+#define LP_ACK            0x4000
+#define REMOTE_FAULT      0x2000
+/* 2 Bits Reserved */
+#define PAUSE             0x0400
+/* Bit Reserved */
+#define ADV_100_FULL      0x0100
+#define ADV_100_HALF      0x0080
+#define ADV_10_FULL       0x0040
+#define ADV_10_HALF       0x0020
+/* 5 Bits Reserved */
+
+/* LinkMD Control/Status */
+#define VCT_ENABLE        0x8000
+#define VCT_RESULT        0x6000
+#define VCT_10M_SHORT     0x1000
+/* 3 Bits Reserved */
+#define VCT_FAULT_COUNT   0x01FF
+
+/* PHY Special Control/Status */
+/* 10 Bits Reserved */
+#define POLRVS            0x0020
+#define MDI_X_STATUS      0x0010
+#define FORCE_LNK         0x0008
+#define PWRSAVE           0x0004
+#define REMOTE_LOOPBACK   0x0002
+/* Bit Reserved */
+
+
+#define FAMILY_ID         0x88
+#define START_SWITCH      0x01
+#define TAG_INSERTION     0x04
+#define SPECIAL_TPID_MODE 0x01
+
+
+enum switch_phy_reg {
+	/* Global Registers: 0-15 */
+	ChipID0 = 0,
+	ChipID1_StartSwitch,
+	GlobalControl0,
+	GlobalControl1,
+	GlobalControl2, /* 4 */
+	GlobalControl3,
+	GlobalControl4,
+	GlobalControl5,
+	GlobalControl6, /* 8 */
+	GlobalControl7,
+	GlobalControl8,
+	GlobalControl9,
+	GlobalControl10, /* 12 */
+	GlobalControl11,
+	GlobalControl12,
+	GlobalControl13,
+	/* Port Registers: 16-95 */
+	Port1Control0 = 16,
+	Port1Control1,
+	Port1Control2,
+	Port1Control3,
+	Port1Control4, /* 20 */
+	Port1Control5,
+	Port1Control6,
+	Port1Control7,
+	Port1Control8, /* 24 */
+	Port1Control9,
+	Port1PHYSpecialControl_Status,
+	Port1LinkMDResult,
+	Port1Control12, /* 28 */
+	Port1Control13,
+	Port1Status0,
+	Port1Status1,
+	Port2Control0, /* 32 */
+	Port2Control1,
+	Port2Control2,
+	Port2Control3,
+	Port2Control4, /* 36 */
+	Port2Control5,
+	Port2Control6,
+	Port2Control7,
+	Port2Control8, /* 40 */
+	Port2Control9,
+	Port2PHYSpecialControl_Status,
+	Port2LinkMDResult,
+	Port2Control12, /* 44 */
+	Port2Control13,
+	Port2Status0,
+	Port2Status1,
+	Port3Control0, /* 48 */
+	Port3Control1,
+	Port3Control2,
+	Port3Control3,
+	Port3Control4, /* 52 */
+	Port3Control5,
+	Port3Control6,
+	Port3Control7,
+	Port3Control8, /* 56 */
+	Port3Control9,
+	Reservednotappliedtoport3, /* 58-62 */
+	Port3Status1 = 63,
+	/* Advanced Control Registers: 96-141 */
+	TOSPriorityControlRegister0 = 96,
+	TOSPriorityControlRegister1,
+	TOSPriorityControlRegister2,
+	TOSPriorityControlRegister3,
+	TOSPriorityControlRegister4, /* 100 */
+	TOSPriorityControlRegister5,
+	TOSPriorityControlRegister6,
+	TOSPriorityControlRegister7,
+	TOSPriorityControlRegister8, /* 104 */
+	TOSPriorityControlRegister9,
+	TOSPriorityControlRegister10,
+	TOSPriorityControlRegister11,
+	TOSPriorityControlRegister12, /* 108 */
+	TOSPriorityControlRegister13,
+	TOSPriorityControlRegister14,
+	TOSPriorityControlRegister15,
+	MACAddressRegister0 = 112,
+	MACAddressRegister1,
+	MACAddressRegister2,
+	MACAddressRegister3,
+	MACAddressRegister4,
+	MACAddressRegister5,
+	UserDefinedRegister1 = 118,
+	UserDefinedRegister2,
+	UserDefinedRegister3,
+	IndirectAccessControl0 = 121,
+	IndirectAccessControl1,
+	IndirectDataRegister8 = 123,
+	IndirectDataRegister7,
+	IndirectDataRegister6,
+	IndirectDataRegister5,
+	IndirectDataRegister4,
+	IndirectDataRegister3,
+	IndirectDataRegister2,
+	IndirectDataRegister1,
+	IndirectDataRegister0,
+	DigitalTestingStatus0 = 132,
+	DigitalTestingControl0,
+	AnalogTestingControl0,
+	AnalogTestingControl1,
+	AnalogTestingControl2,
+	AnalogTestingControl3,
+	AnalogTestingStatus,
+	AnalogTestingControl4,
+	QMDebug1,
+	QMDebug2,
+};
+
+#endif
-- 
1.7.1.1


^ permalink raw reply related

* [PATCH 1/2] net: dsa: introduce STPID switch tagging handling code
From: Mike Frysinger @ 2010-07-21 13:37 UTC (permalink / raw)
  To: netdev, David S. Miller
  Cc: uclinux-dist-devel, Karl Beldan, Lennert Buytenhek, Graf Yang,
	Bryan Wu

From: Graf Yang <graf.yang@analog.com>

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 include/linux/if_ether.h  |    1 +
 include/linux/netdevice.h |   10 +++
 include/net/dsa.h         |    2 +-
 net/dsa/Kconfig           |    3 +
 net/dsa/Makefile          |    1 +
 net/dsa/dsa.c             |    6 ++
 net/dsa/dsa_priv.h        |    2 +
 net/dsa/slave.c           |   18 ++++++
 net/dsa/tag_stpid.c       |  147 +++++++++++++++++++++++++++++++++++++++++++++
 net/ethernet/eth.c        |    2 +
 10 files changed, 191 insertions(+), 1 deletions(-)
 create mode 100644 net/dsa/tag_stpid.c

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index c831467..cb9e2be 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -107,6 +107,7 @@
 #define ETH_P_ARCNET	0x001A		/* 1A for ArcNet :-)            */
 #define ETH_P_DSA	0x001B		/* Distributed Switch Arch.	*/
 #define ETH_P_TRAILER	0x001C		/* Trailer switch tagging	*/
+#define ETH_P_STPID	0x001D		/* STPID switch tagging		*/
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b626289..a13dca4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1140,6 +1140,16 @@ static inline bool netdev_uses_trailer_tags(struct net_device *dev)
 	return 0;
 }
 
+static inline bool netdev_uses_stpid_tags(struct net_device *dev)
+{
+#ifdef CONFIG_NET_DSA_TAG_STPID
+	if (dev->dsa_ptr != NULL)
+		return dsa_uses_stpid_tags(dev->dsa_ptr);
+#endif
+
+	return 0;
+}
+
 /**
  *	netdev_priv - access network device private data
  *	@dev: network device
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 839f768..21a5e2e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -56,6 +56,6 @@ struct dsa_platform_data {
 
 extern bool dsa_uses_dsa_tags(void *dsa_ptr);
 extern bool dsa_uses_trailer_tags(void *dsa_ptr);
-
+extern bool dsa_uses_stpid_tags(void *dsa_ptr);
 
 #endif
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 1120178..ee8d705 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -23,6 +23,9 @@ config NET_DSA_TAG_TRAILER
 	bool
 	default n
 
+config NET_DSA_TAG_STPID
+	bool
+	default n
 
 # switch drivers
 config NET_DSA_MV88E6XXX
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 2374faf..4881577 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
 obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+obj-$(CONFIG_NET_DSA_TAG_STPID) += tag_stpid.o
 
 # switch drivers
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6112a12..8cb4dfa 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -220,6 +220,12 @@ bool dsa_uses_trailer_tags(void *dsa_ptr)
 	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
 }
 
+bool dsa_uses_stpid_tags(void *dsa_ptr)
+{
+	struct dsa_switch_tree *dst = dsa_ptr;
+
+	return !!(dst->tag_protocol == htons(ETH_P_STPID));
+}
 
 /* link polling *************************************************************/
 static void dsa_link_poll_work(struct work_struct *ugly)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 4b0ea05..be76ded 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -177,5 +177,7 @@ netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev);
 /* tag_trailer.c */
 netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev);
 
+/* tag_stpid.c */
+int stpid_xmit(struct sk_buff *skb, struct net_device *dev);
 
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 64ca2a6..a1b30a7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -333,6 +333,19 @@ static const struct net_device_ops trailer_netdev_ops = {
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
 #endif
+#ifdef CONFIG_NET_DSA_TAG_STPID
+static const struct net_device_ops stpid_netdev_ops = {
+	.ndo_init		= dsa_slave_init,
+	.ndo_open		= dsa_slave_open,
+	.ndo_stop		= dsa_slave_close,
+	.ndo_start_xmit		= stpid_xmit,
+	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
+	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
+	.ndo_set_multicast_list = dsa_slave_set_rx_mode,
+	.ndo_set_mac_address	= dsa_slave_set_mac_address,
+	.ndo_do_ioctl		= dsa_slave_ioctl,
+};
+#endif
 
 /* slave device setup *******************************************************/
 struct net_device *
@@ -370,6 +383,11 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		slave_dev->netdev_ops = &trailer_netdev_ops;
 		break;
 #endif
+#ifdef CONFIG_NET_DSA_TAG_STPID
+	case htons(ETH_P_STPID):
+		slave_dev->netdev_ops = &stpid_netdev_ops;
+		break;
+#endif
 	default:
 		BUG();
 	}
diff --git a/net/dsa/tag_stpid.c b/net/dsa/tag_stpid.c
new file mode 100644
index 0000000..b5d9829
--- /dev/null
+++ b/net/dsa/tag_stpid.c
@@ -0,0 +1,147 @@
+/*
+ * net/dsa/tag_stpid.c - special tag identifier,
+ * 0x810 + 4 bit "port mask" + 3 bit 8021p + 1 bit CFI + 12 bit VLAN ID
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include "dsa_priv.h"
+
+#define ETH_P_8021QH      (ETH_P_8021Q >> 8)
+#define ETH_P_8021QL      (ETH_P_8021Q & 0xFF)
+#define STPID_HLEN        4
+
+#define ZERO_VID          0
+#define RESERVED_VID      0xFFF
+#define STPID_VID         ZERO_VID
+
+int stpid_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	u8 *dsa_header;
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	/*
+	 * For 802.1Q frames, convert to STPID tagged frames,
+	 * do nothing for common frames.
+	 */
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		if (skb_cow_head(skb, 0) < 0)
+			goto out_free;
+
+		dsa_header = skb->data + 2 * ETH_ALEN;
+		dsa_header[1] = p->port & 0x03;
+	}
+
+	skb->protocol = htons(ETH_P_STPID);
+
+	skb->dev = p->parent->dst->master_netdev;
+	dev_queue_xmit(skb);
+
+	return NETDEV_TX_OK;
+
+out_free:
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int stpid_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch_tree *dst = dev->dsa_ptr;
+	struct dsa_switch *ds = dst->ds[0];
+	u8 *dsa_header;
+	int source_port;
+	int vid;
+
+	if (unlikely(ds == NULL))
+		goto out_drop;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		goto out;
+
+	/* The ether_head has been pulled by master driver */
+	dsa_header = skb->data - 2;
+
+	vid = ((dsa_header[2] & 0x0f)<<8 | dsa_header[3]);
+
+	source_port = dsa_header[1] & 0x03;
+	if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
+		goto out_drop;
+
+	if (((dsa_header[0] & ETH_P_8021QH) == ETH_P_8021QH) &&
+			(vid != STPID_VID)) {
+		u8 new_header[STPID_HLEN];
+
+		/* Convert STPID tag to 802.1q tag */
+		new_header[0] = ETH_P_8021QH;
+		new_header[1] = ETH_P_8021QL;
+
+		if (skb->ip_summed == CHECKSUM_COMPLETE) {
+			__wsum c = skb->csum;
+			c = csum_add(c, csum_partial(new_header, 2, 0));
+			c = csum_sub(c, csum_partial(dsa_header, 2, 0));
+			skb->csum = c;
+		}
+		memcpy(dsa_header, new_header, STPID_HLEN / 2);
+
+	} else if ((dsa_header[0] & ETH_P_8021QH) &&
+			(vid == STPID_VID)) {
+
+		if (unlikely(!pskb_may_pull(skb, STPID_HLEN)))
+			goto out_drop;
+
+		/* Remove STPID tag and update checksum. */
+		if (skb->ip_summed == CHECKSUM_COMPLETE) {
+			__wsum c = skb->csum;
+			c = csum_sub(c, csum_partial(dsa_header, STPID_HLEN, 0));
+			skb->csum = c;
+		}
+		memmove(skb->data - ETH_HLEN + STPID_HLEN,
+				skb->data - ETH_HLEN, 2 * ETH_ALEN);
+		skb_pull(skb, STPID_HLEN);
+	}
+
+	skb->dev = ds->ports[source_port];
+	skb_push(skb, ETH_HLEN);
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->dev->last_rx = jiffies;
+	skb->dev->stats.rx_packets++;
+	skb->dev->stats.rx_bytes += skb->len;
+	netif_receive_skb(skb);
+
+	return 0;
+
+out_drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+static struct packet_type stpid_packet_type = {
+	.type	= __constant_htons(ETH_P_STPID),
+	.func	= stpid_rcv,
+};
+
+static int __init stpid_init_module(void)
+{
+	dev_add_pack(&stpid_packet_type);
+	return 0;
+}
+module_init(stpid_init_module);
+
+static void __exit stpid_cleanup_module(void)
+{
+	dev_remove_pack(&stpid_packet_type);
+}
+module_exit(stpid_cleanup_module);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c839..964f9d2 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -194,6 +194,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 		return htons(ETH_P_DSA);
 	if (netdev_uses_trailer_tags(dev))
 		return htons(ETH_P_TRAILER);
+	if (netdev_uses_stpid_tags(dev))
+		return htons(ETH_P_STPID);
 
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
-- 
1.7.1.1


^ permalink raw reply related

* Re: [patch v2.6 4/4] libxt_ipvs: user-space lib for netfilter matcher xt_ipvs
From: Jan Engelhardt @ 2010-07-21 13:28 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, linux-kernel, netfilter, netfilter-devel,
	Malcolm Turnbull, Wensong Zhang, Julius Volz, Patrick McHardy,
	David S. Miller, Hannes Eder
In-Reply-To: <20100721132159.GN6418@verge.net.au>


On Wednesday 2010-07-21 15:21, Simon Horman wrote:
>> On Wednesday 2010-07-21 03:21, Simon Horman wrote:
>> >> +
>> >> +#define XT_IPVS_IPVS_PROPERTY	(1 << 0) /* all other options imply this one */
>> >> +#define XT_IPVS_PROTO		(1 << 1)
>> >> +#define XT_IPVS_VADDR		(1 << 2)
>> >> +#define XT_IPVS_VPORT		(1 << 3)
>> >> +#define XT_IPVS_DIR		(1 << 4)
>> >> +#define XT_IPVS_METHOD		(1 << 5)
>> >> +#define XT_IPVS_VPORTCTL	(1 << 6)
>> >> +#define XT_IPVS_MASK		((1 << 7) - 1)
>> >> +#define XT_IPVS_ONCE_MASK	(XT_IPVS_MASK & ~XT_IPVS_IPVS_PROPERTY)
>> 
>> Can't these just be an enum?
>
>More than one option can be used at once - they form a mini bitmap -
>so no, I don't think we can use an enum.

An enum does not dictate that you cannot combine values of it with itself.

	enum { A = 1 << 0, B = 1 << 0, };
	unsigned int flags = A | B;

is perfectly fine, which is what other modules do.

^ permalink raw reply

* Re: [patch v2.6 4/4] libxt_ipvs: user-space lib for netfilter matcher xt_ipvs
From: Simon Horman @ 2010-07-21 13:21 UTC (permalink / raw)
  To: Jan Engelhardt
  Cc: lvs-devel, netdev, linux-kernel, netfilter, netfilter-devel,
	Malcolm Turnbull, Wensong Zhang, Julius Volz, Patrick McHardy,
	David S. Miller, Hannes Eder
In-Reply-To: <alpine.LSU.2.01.1007210834570.10568@obet.zrqbmnf.qr>

On Wed, Jul 21, 2010 at 08:35:19AM +0200, Jan Engelhardt wrote:
> 
> On Wednesday 2010-07-21 03:21, Simon Horman wrote:
> >> +
> >> +#define XT_IPVS_IPVS_PROPERTY	(1 << 0) /* all other options imply this one */
> >> +#define XT_IPVS_PROTO		(1 << 1)
> >> +#define XT_IPVS_VADDR		(1 << 2)
> >> +#define XT_IPVS_VPORT		(1 << 3)
> >> +#define XT_IPVS_DIR		(1 << 4)
> >> +#define XT_IPVS_METHOD		(1 << 5)
> >> +#define XT_IPVS_VPORTCTL	(1 << 6)
> >> +#define XT_IPVS_MASK		((1 << 7) - 1)
> >> +#define XT_IPVS_ONCE_MASK	(XT_IPVS_MASK & ~XT_IPVS_IPVS_PROPERTY)
> 
> Can't these just be an enum?

More than one option can be used at once - they form a mini bitmap -
so no, I don't think we can use an enum.


^ permalink raw reply

* Re: [RFC PATCH v3 1/5] irq: add tracepoint to softirq_raise
From: KOSAKI Motohiro @ 2010-07-21 13:01 UTC (permalink / raw)
  To: Neil Horman
  Cc: kosaki.motohiro, Koki Sanagi, netdev, linux-kernel, davem,
	kaneshige.kenji, izumi.taku, laijs, scott.a.mcmillan, rostedt,
	eric.dumazet, fweisbec, mathieu.desnoyers
In-Reply-To: <20100721111419.GB21259@hmsreliant.think-freely.org>

> > >>  #endif /*  _TRACE_IRQ_H */
> > >> diff --git a/kernel/softirq.c b/kernel/softirq.c
> > >> index 825e112..6790599 100644
> > >> --- a/kernel/softirq.c
> > >> +++ b/kernel/softirq.c
> > >> @@ -215,9 +215,9 @@ restart:
> > >>  			int prev_count = preempt_count();
> > >>  			kstat_incr_softirqs_this_cpu(h - softirq_vec);
> > >>  
> > >> -			trace_softirq_entry(h, softirq_vec);
> > >> +			trace_softirq_entry(h - softirq_vec);
> > >>  			h->action(h);
> > >> -			trace_softirq_exit(h, softirq_vec);
> > >> +			trace_softirq_exit(h - softirq_vec);
> > > 
> > > You're loosing information here by reducing the numbers of parameters in this
> > > tracepoint.  How many other tracepoint scripts rely on having both pointers
> > > handy?  Why not just do the pointer math inside your tracehook instead?
> > 
> > In __raise_softirq_irqoff macro there is no method to refer softirq_vec, so it
> > can't use softirq DECLARE_EVENT_CLASS as is.
> > Currently,  there is no script using softirq_entry or softirq_exit.
> > 
> That shouldn't matter, just pass in NULL for softirq_vec in
> __raise_softirq_irqoff as the second argument to the trace function.  You may
> need to fix up the class definition so that the assignment or printk doesn't try
> to dereference that pointer when its NULL, but thats easy enough, and it avoids
> breaking any other perf scripts floating out there.

please see 5 lines above. we already have 'h - softirq_vec' calculation in
this function. so, Sanagi-san's change don't makes any overhead.

So, if the overhead is zero, I'd prefer simplest tracepoint definition :)




^ permalink raw reply

* Re: netfilter/iptables stopped logging 2.6.35-rc
From: auto401300 @ 2010-07-21 12:51 UTC (permalink / raw)
  To: maciej.rutecki; +Cc: netdev, linux-kernel



On Tue, 20 Jul 2010 15:51:03 +0300 Maciej Rutecki 
<maciej.rutecki@gmail.com> wrote:
>On sobota, 17 lipca 2010 o 09:20:36 auto401300@hushmail.com wrote:
>> Hi!
>> 
>> Has something broken with netfilter/iptables logging in 2.6.35-
>rc,
>> or is there something new I should set in .config since .34?
>> 
>> 
>> I just verified that if I boot .34 and ping the pc it does 
>logging:
>> 
>> Jul 17 09:42:49 xxxxx kernel: Linux version 2.6.34-ab 
>(root@xxxxx)
>> (gcc version 4.4.4 (Debian 4.4.4-1) ) #1 SMP PREEMPT Mon May 17
>> 09:15
>> 
>> :15 EEST 2010
>> 
>> ....
>> Jul 17 09:44:52 xxxxx kernel: DENY  in: IN=eth0 OUT= MAC=xxxxx
>> SRC=xxxxx DST=xxxxx LEN=60 TOS=0x00 PREC=0x00 TTL=127 ID=38945
>> PROTO=ICMP TYPE=8 CODE=0 ID=512 SEQ=256
>> 
>> 
>> but if I boot .35-rc4 and ping:
>> 
>> Jul 17 09:48:08 xxxxx kernel: Linux version 2.6.35-rc4-aa
>> (root@xxxxx) (gcc version 4.4.4 (Debian 4.4.4-6) ) #1 SMP 
>PREEMPT
>> Mon Jul 5 15:22:02 EEST 2010
>> ....
>> nothing from iptables in log
>> 
>> 
>> userspace is same, only booted different kernel versions
>
>I created a Bugzilla entry at 
>https://bugzilla.kernel.org/show_bug.cgi?id=16423
>for your bug report, please add your address to the CC list in 
>there, thanks!
>
>-- 
>Maciej Rutecki
>http://www.maciek.unixy.pl




Verified now with debian kernel-package 12.036

2.6.34 does netfilter/iptables logging
2.6.35-rc5 does not
both compiled with same toolchain



thanks. 

^ permalink raw reply

* Re: [PATCH] fec: use interrupt for MDIO completion indication
From: Wolfram Sang @ 2010-07-21 12:51 UTC (permalink / raw)
  To: Baruch Siach
  Cc: Bryan Wu, netdev, linux-arm-kernel, Sascha Hauer, Greg Ungerer
In-Reply-To: <20100715040956.GA7690@jasper.tkos.co.il>

[-- Attachment #1: Type: text/plain, Size: 2886 bytes --]


> > Thanks for this patch, we tested on our i.MX51 board with Ubuntu. It works 
> > fine.
> > 
> > Wolfram, you can pick up this, too. -;)
> 
> Dave has already applied this patch to his net-next tree.

Bryan, thanks for letting me know, I missed this one.

However, have you guys ever tried pulling the cable off/on or restarting
the interface with 'ifconfig down/up'? This always caused a stalled PHY
for me. This patch helps:

==========================

From: Wolfram Sang <w.sang@pengutronix.de>
Subject: [PATCH] net/fec: restore interrupt mask after software-reset in fec_stop()

After the change from mdio polling to irq, it became necessary to
restore the interrupt mask after resetting the chip in fec_stop().
Otherwise, with all irqs disabled, no communication with the PHY will be
possible after e.g. un-/replugging the cable and the device gets
stalled.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/net/fec.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 391a553..768b840 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -118,6 +118,8 @@ static unsigned char	fec_mac_default[] = {
 #define FEC_ENET_MII	((uint)0x00800000)	/* MII interrupt */
 #define FEC_ENET_EBERR	((uint)0x00400000)	/* SDMA bus error */
 
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
+
 /* The FEC stores dest/src/type, data, and checksum for receive packets.
  */
 #define PKT_MAXBUF_SIZE		1518
@@ -1213,8 +1215,7 @@ fec_restart(struct net_device *dev, int duplex)
 	writel(0, fep->hwp + FEC_R_DES_ACTIVE);
 
 	/* Enable interrupts we wish to service */
-	writel(FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII,
-			fep->hwp + FEC_IMASK);
+	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 }
 
 static void
@@ -1233,8 +1234,8 @@ fec_stop(struct net_device *dev)
 	/* Whack a reset.  We should wait for this. */
 	writel(1, fep->hwp + FEC_ECNTRL);
 	udelay(10);
-
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
+	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 }
 
 static int __devinit
-- 
1.7.1

==========================

BUT, while it helps and may possibly be a quick fix for 2.6.35,
resetting the chip in fec_stop() looks like a wrong thing to do for me.
In the long run, it probably is better to make sure the chip is set up
correctly during initialization, so the reset in fec_stop() is not
needed at all. I had a quick shot at this, but seem to have missed
something as it didn't work. As I will be away from the computers for
two weeks in about 24 hours, I at least wanted to bring up the issue.

Regards,

   Wolfram

-- 
Pengutronix e.K.                           | Wolfram Sang                |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply related

* [PATCH] au1000_eth: get ethernet address from platform_data
From: Manuel Lauss @ 2010-07-21 12:30 UTC (permalink / raw)
  To: Linux-MIPS; +Cc: netdev, Manuel Lauss

au1000_eth uses firmware calls to get a valid MAC address, and changes
it depending on platform device id.  This patch moves this logic out
of the driver into the platform device registration part, where boards
with supported chips can use whatever firmware interface they need;
the default implementation maintains compatibility with existing,
YAMON-based firmware.

Tested-by: Wolfgang Grandegger <wg@denx.de>
Acked-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
---
This patch depends on another patch to the MIPS tree to
apply cleanly, so I'd prefer if it went in through it as well.

 arch/mips/alchemy/common/platform.c            |   15 ++++++++++-
 arch/mips/include/asm/mach-au1x00/au1xxx_eth.h |    1 +
 drivers/net/au1000_eth.c                       |   31 +++++------------------
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index f9e5622..e9c354f 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
 #include <linux/init.h>
@@ -21,6 +22,8 @@
 #include <asm/mach-au1x00/au1100_mmc.h>
 #include <asm/mach-au1x00/au1xxx_eth.h>
 
+#include <prom.h>
+
 #define PORT(_base, _irq)					\
 	{							\
 		.mapbase	= _base,			\
@@ -436,17 +439,27 @@ static int __init au1xxx_platform_init(void)
 {
 	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
 	int err, i;
+	unsigned char ethaddr[6];
 
 	/* Fill up uartclk. */
 	for (i = 0; au1x00_uart_data[i].flags; i++)
 		au1x00_uart_data[i].uartclk = uartclk;
 
+	/* use firmware-provided mac addr if available and necessary */
+	i = prom_get_ethernet_addr(ethaddr);
+	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
+		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+
 	err = platform_add_devices(au1xxx_platform_devices,
 				   ARRAY_SIZE(au1xxx_platform_devices));
 #ifndef CONFIG_SOC_AU1100
+	ethaddr[5] += 1;	/* next addr for 2nd MAC */
+	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
+		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
+
 	/* Register second MAC if enabled in pinfunc */
 	if (!err && !(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2))
-		platform_device_register(&au1xxx_eth1_device);
+		err = platform_device_register(&au1xxx_eth1_device);
 #endif
 
 	return err;
diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx_eth.h b/arch/mips/include/asm/mach-au1x00/au1xxx_eth.h
index bae9b75..49dc8d9 100644
--- a/arch/mips/include/asm/mach-au1x00/au1xxx_eth.h
+++ b/arch/mips/include/asm/mach-au1x00/au1xxx_eth.h
@@ -9,6 +9,7 @@ struct au1000_eth_platform_data {
 	int phy_addr;
 	int phy_busid;
 	int phy_irq;
+	char mac[6];
 };
 
 void __init au1xxx_override_eth_cfg(unsigned port,
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index ece6128..17e7e27 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -104,14 +104,6 @@ MODULE_VERSION(DRV_VERSION);
  * complete immediately.
  */
 
-/* These addresses are only used if yamon doesn't tell us what
- * the mac address is, and the mac address is not passed on the
- * command line.
- */
-static unsigned char au1000_mac_addr[6] __devinitdata = {
-	0x00, 0x50, 0xc2, 0x0c, 0x30, 0x00
-};
-
 struct au1000_private *au_macs[NUM_ETH_INTERFACES];
 
 /*
@@ -1002,7 +994,6 @@ static int __devinit au1000_probe(struct platform_device *pdev)
 	db_dest_t *pDB, *pDBfree;
 	int irq, i, err = 0;
 	struct resource *base, *macen;
-	char ethaddr[6];
 
 	base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!base) {
@@ -1079,24 +1070,13 @@ static int __devinit au1000_probe(struct platform_device *pdev)
 	}
 	aup->mac_id = pdev->id;
 
-	if (pdev->id == 0) {
-		if (prom_get_ethernet_addr(ethaddr) == 0)
-			memcpy(au1000_mac_addr, ethaddr, sizeof(au1000_mac_addr));
-		else {
-			netdev_info(dev, "No MAC address found\n");
-				/* Use the hard coded MAC addresses */
-		}
-
+	if (pdev->id == 0)
 		au1000_setup_hw_rings(aup, MAC0_RX_DMA_ADDR, MAC0_TX_DMA_ADDR);
-	} else if (pdev->id == 1)
+	else if (pdev->id == 1)
 		au1000_setup_hw_rings(aup, MAC1_RX_DMA_ADDR, MAC1_TX_DMA_ADDR);
 
-	/*
-	 * Assign to the Ethernet ports two consecutive MAC addresses
-	 * to match those that are printed on their stickers
-	 */
-	memcpy(dev->dev_addr, au1000_mac_addr, sizeof(au1000_mac_addr));
-	dev->dev_addr[5] += pdev->id;
+	/* set a random MAC now in case platform_data doesn't provide one */
+	random_ether_addr(dev->dev_addr);
 
 	*aup->enable = 0;
 	aup->mac_enabled = 0;
@@ -1106,6 +1086,9 @@ static int __devinit au1000_probe(struct platform_device *pdev)
 		dev_info(&pdev->dev, "no platform_data passed, PHY search on MAC0\n");
 		aup->phy1_search_mac0 = 1;
 	} else {
+		if (is_valid_ether_addr(pd->mac))
+			memcpy(dev->dev_addr, pd->mac, 6);
+
 		aup->phy_static_config = pd->phy_static_config;
 		aup->phy_search_highest_addr = pd->phy_search_highest_addr;
 		aup->phy1_search_mac0 = pd->phy1_search_mac0;
-- 
1.7.1.1


^ permalink raw reply related

* [PATCH] 3c59x: handle pci_iomap() errors
From: Kulikov Vasiliy @ 2010-07-21 12:00 UTC (permalink / raw)
  To: kernel-janitors
  Cc: Steffen Klassert, David S. Miller, Eric Dumazet, Ben Hutchings,
	Alexey Dobriyan, Joe Perches, netdev

pci_iomap() can fail, handle this case and return -ENOMEM from probe
function.

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
---
 drivers/net/3c59x.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 069a03f..9b137e1 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -1020,6 +1020,11 @@ static int __devinit vortex_init_one(struct pci_dev *pdev,
 	ioaddr = pci_iomap(pdev, pci_bar, 0);
 	if (!ioaddr) /* If mapping fails, fall-back to BAR 0... */
 		ioaddr = pci_iomap(pdev, 0, 0);
+	if (!ioaddr) {
+		pci_disable_device(pdev);
+		rc = -ENOMEM;
+		goto out;
+	}
 
 	rc = vortex_probe1(&pdev->dev, ioaddr, pdev->irq,
 			   ent->driver_data, unit);
-- 
1.7.0.4


^ permalink raw reply related

* Re: [RFC PATCH v3 2/5] napi: convert trace_napi_poll to TRACE_EVENT
From: Neil Horman @ 2010-07-21 11:24 UTC (permalink / raw)
  To: Koki Sanagi
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <4C469AFF.1090204@jp.fujitsu.com>

On Wed, Jul 21, 2010 at 04:00:15PM +0900, Koki Sanagi wrote:
> (2010/07/20 20:09), Neil Horman wrote:
> > On Tue, Jul 20, 2010 at 09:46:51AM +0900, Koki Sanagi wrote:
> >> From: Neil Horman <nhorman@tuxdriver.com>
> >>
> >> This patch converts trace_napi_poll from DECLARE_EVENT to TRACE_EVENT to improve
> >> the usability of napi_poll tracepoint.
> >>
> >>           <idle>-0     [001] 241302.750777: napi_poll: napi poll on napi struct f6acc480 for device eth3
> >>           <idle>-0     [000] 241302.852389: napi_poll: napi poll on napi struct f5d0d70c for device eth1
> >>
> >> An original patch is below.
> >> http://marc.info/?l=linux-kernel&m=126021713809450&w=2
> >> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> >>
> >> And add a fix by Steven Rostedt.
> >> http://marc.info/?l=linux-kernel&m=126150506519173&w=2
> >>
> >> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
> >> ---
> >>  include/trace/events/napi.h |   25 +++++++++++++++++++++++--
> >>  1 files changed, 23 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
> >> index 188deca..8fe1e93 100644
> >> --- a/include/trace/events/napi.h
> >> +++ b/include/trace/events/napi.h
> >> @@ -6,10 +6,31 @@
> >>  
> >>  #include <linux/netdevice.h>
> >>  #include <linux/tracepoint.h>
> >> +#include <linux/ftrace.h>
> >> +
> >> +#define NO_DEV "(no_device)"
> >> +
> >> +TRACE_EVENT(napi_poll,
> >>  
> >> -DECLARE_TRACE(napi_poll,
> >>  	TP_PROTO(struct napi_struct *napi),
> >> -	TP_ARGS(napi));
> >> +
> >> +	TP_ARGS(napi),
> >> +
> >> +	TP_STRUCT__entry(
> >> +		__field(	struct napi_struct *,	napi)
> >> +		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
> >> +	),
> >> +
> >> +	TP_fast_assign(
> >> +		__entry->napi = napi;
> >> +		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
> >> +	),
> >> +
> >> +	TP_printk("napi poll on napi struct %p for device %s",
> >> +		__entry->napi, __get_str(dev_name))
> >> +);
> >> +
> >> +#undef NO_DEV
> >>  
> >>  #endif /* _TRACE_NAPI_H_ */
> >>  
> >>
> > NAK, This change will create a build break in the drop monitor code.  You'll
> > need to fix that up if you want to make this change.
> > Neil
> > 
> I built a kernel with CONFIG_NET_DROP_MONITOR=y.
> But build break did not occur.
> 
My fault, sorry.  I misunderstood what the macro change was doing here.  It
should be fine.

Thanks
Neil

> Thanks,
> Koki Sanagi.
> 
> > --
> > To unsubscribe from this list: send the line "unsubscribe netdev" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> > 
> 
> 
> 

^ permalink raw reply

* Re: [RFC PATCH v3 1/5] irq: add tracepoint to softirq_raise
From: Neil Horman @ 2010-07-21 11:14 UTC (permalink / raw)
  To: Koki Sanagi
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <4C469A41.4020807@jp.fujitsu.com>

On Wed, Jul 21, 2010 at 03:57:05PM +0900, Koki Sanagi wrote:
> (2010/07/20 20:04), Neil Horman wrote:
> > On Tue, Jul 20, 2010 at 09:45:31AM +0900, Koki Sanagi wrote:
> >> From: Lai Jiangshan <laijs@cn.fujitsu.com>
> >>
> >> Add a tracepoint for tracing when softirq action is raised.
> >>
> >> It and the existed tracepoints complete softirq's tracepoints:
> >> softirq_raise, softirq_entry and softirq_exit.
> >>
> >> And when this tracepoint is used in combination with
> >> the softirq_entry tracepoint we can determine
> >> the softirq raise latency.
> >>
> >> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> >> Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> >> Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
> >>
> >> [ factorize softirq events with DECLARE_EVENT_CLASS ]
> >> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
> >> ---
> >>  include/linux/interrupt.h  |    8 +++++-
> >>  include/trace/events/irq.h |   57 ++++++++++++++++++++++++++-----------------
> >>  kernel/softirq.c           |    4 +-
> >>  3 files changed, 43 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
> >> index c233113..1cb5726 100644
> >> --- a/include/linux/interrupt.h
> >> +++ b/include/linux/interrupt.h
> >> @@ -18,6 +18,7 @@
> >>  #include <asm/atomic.h>
> >>  #include <asm/ptrace.h>
> >>  #include <asm/system.h>
> >> +#include <trace/events/irq.h>
> >>  
> >>  /*
> >>   * These correspond to the IORESOURCE_IRQ_* defines in
> >> @@ -402,7 +403,12 @@ asmlinkage void do_softirq(void);
> >>  asmlinkage void __do_softirq(void);
> >>  extern void open_softirq(int nr, void (*action)(struct softirq_action *));
> >>  extern void softirq_init(void);
> >> -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
> >> +static inline void __raise_softirq_irqoff(unsigned int nr)
> >> +{
> >> +	trace_softirq_raise(nr);
> >> +	or_softirq_pending(1UL << nr);
> >> +}
> >> +
> > We already have tracepoints in irq_enter and irq_exit.  If the goal here is to
> > detect latency during packet processing, cant the delta in time between those
> > two points be used to determine interrupt handling latency?
> 
> Certainly, the time between irq_entry and irq_exit is not directly related to
> latency during packet processing. But it's indirectly related it.
> Because softirq_entry isn't passed until irq exits and softirq_entry time is
> related to packet processing latency. So I show it as a reference.
> 
Its not directly related no, but look at it, the amount of processing between
irq_exit and softirq_entry is minimal.  The information you are trying to
extract by computing the delta from irq_entry to softirq_entry is almost exactly
the same as that from irq_entry to irq_exit.  For that matter, since you're
trying to guage lantency for packet processing, I expect you could get the same
delta by measuring irq_entry to napi_poll tracepoint time, and save the hassle
of needing to filter on softirq processing that doesn't relate to packet
processing.

> > 
> > 
> >>  extern void raise_softirq_irqoff(unsigned int nr);
> >>  extern void raise_softirq(unsigned int nr);
> >>  extern void wakeup_softirqd(void);
> >> diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
> >> index 0e4cfb6..717744c 100644
> >> --- a/include/trace/events/irq.h
> >> +++ b/include/trace/events/irq.h
> >> @@ -5,7 +5,9 @@
> >>  #define _TRACE_IRQ_H
> >>  
> >>  #include <linux/tracepoint.h>
> >> -#include <linux/interrupt.h>
> >> +
> >> +struct irqaction;
> >> +struct softirq_action;
> >>  
> >>  #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
> >>  #define show_softirq_name(val)				\
> >> @@ -84,56 +86,65 @@ TRACE_EVENT(irq_handler_exit,
> >>  
> >>  DECLARE_EVENT_CLASS(softirq,
> >>  
> >> -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
> >> +	TP_PROTO(unsigned int nr),
> >>  
> >> -	TP_ARGS(h, vec),
> >> +	TP_ARGS(nr),
> >>  
> >>  	TP_STRUCT__entry(
> >> -		__field(	int,	vec			)
> >> +		__field(	unsigned int,	vec	)
> >>  	),
> >>  
> >>  	TP_fast_assign(
> >> -		__entry->vec = (int)(h - vec);
> >> +		__entry->vec	= nr;
> >>  	),
> >>  
> >>  	TP_printk("vec=%d [action=%s]", __entry->vec,
> >> -		  show_softirq_name(__entry->vec))
> >> +		show_softirq_name(__entry->vec))
> >> +);
> >> +
> >> +/**
> >> + * softirq_raise - called immediately when a softirq is raised
> >> + * @nr: softirq vector number
> >> + *
> >> + * Tracepoint for tracing when softirq action is raised.
> >> + * Also, when used in combination with the softirq_entry tracepoint
> >> + * we can determine the softirq raise latency.
> >> + */
> >> +DEFINE_EVENT(softirq, softirq_raise,
> >> +
> >> +	TP_PROTO(unsigned int nr),
> >> +
> >> +	TP_ARGS(nr)
> >>  );
> >>  
> >>  /**
> >>   * softirq_entry - called immediately before the softirq handler
> >> - * @h: pointer to struct softirq_action
> >> - * @vec: pointer to first struct softirq_action in softirq_vec array
> >> + * @nr: softirq vector number
> >>   *
> >> - * The @h parameter, contains a pointer to the struct softirq_action
> >> - * which has a pointer to the action handler that is called. By subtracting
> >> - * the @vec pointer from the @h pointer, we can determine the softirq
> >> - * number. Also, when used in combination with the softirq_exit tracepoint
> >> + * Tracepoint for tracing when softirq action starts.
> >> + * Also, when used in combination with the softirq_exit tracepoint
> >>   * we can determine the softirq latency.
> >>   */
> >>  DEFINE_EVENT(softirq, softirq_entry,
> >>  
> >> -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
> >> +	TP_PROTO(unsigned int nr),
> >>  
> >> -	TP_ARGS(h, vec)
> >> +	TP_ARGS(nr)
> >>  );
> >>  
> >>  /**
> >>   * softirq_exit - called immediately after the softirq handler returns
> >> - * @h: pointer to struct softirq_action
> >> - * @vec: pointer to first struct softirq_action in softirq_vec array
> >> + * @nr: softirq vector number
> >>   *
> >> - * The @h parameter contains a pointer to the struct softirq_action
> >> - * that has handled the softirq. By subtracting the @vec pointer from
> >> - * the @h pointer, we can determine the softirq number. Also, when used in
> >> - * combination with the softirq_entry tracepoint we can determine the softirq
> >> - * latency.
> >> + * Tracepoint for tracing when softirq action ends.
> >> + * Also, when used in combination with the softirq_entry tracepoint
> >> + * we can determine the softirq latency.
> >>   */
> >>  DEFINE_EVENT(softirq, softirq_exit,
> >>  
> >> -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
> >> +	TP_PROTO(unsigned int nr),
> >>  
> >> -	TP_ARGS(h, vec)
> >> +	TP_ARGS(nr)
> >>  );
> >>  
> >>  #endif /*  _TRACE_IRQ_H */
> >> diff --git a/kernel/softirq.c b/kernel/softirq.c
> >> index 825e112..6790599 100644
> >> --- a/kernel/softirq.c
> >> +++ b/kernel/softirq.c
> >> @@ -215,9 +215,9 @@ restart:
> >>  			int prev_count = preempt_count();
> >>  			kstat_incr_softirqs_this_cpu(h - softirq_vec);
> >>  
> >> -			trace_softirq_entry(h, softirq_vec);
> >> +			trace_softirq_entry(h - softirq_vec);
> >>  			h->action(h);
> >> -			trace_softirq_exit(h, softirq_vec);
> >> +			trace_softirq_exit(h - softirq_vec);
> > 
> > You're loosing information here by reducing the numbers of parameters in this
> > tracepoint.  How many other tracepoint scripts rely on having both pointers
> > handy?  Why not just do the pointer math inside your tracehook instead?
> 
> In __raise_softirq_irqoff macro there is no method to refer softirq_vec, so it
> can't use softirq DECLARE_EVENT_CLASS as is.
> Currently,  there is no script using softirq_entry or softirq_exit.
> 
That shouldn't matter, just pass in NULL for softirq_vec in
__raise_softirq_irqoff as the second argument to the trace function.  You may
need to fix up the class definition so that the assignment or printk doesn't try
to dereference that pointer when its NULL, but thats easy enough, and it avoids
breaking any other perf scripts floating out there.
Neil

> Thanks,
> Koki Sanagi.
> 
> > 
> >>  			if (unlikely(prev_count != preempt_count())) {
> >>  				printk(KERN_ERR "huh, entered softirq %td %s %p"
> >>  				       "with preempt_count %08x,"
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe netdev" in
> >> the body of a message to majordomo@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>
> > 
> > 
> 
> 
> 

^ permalink raw reply

* Re: [RFC PATCH v3 4/5] skb: add tracepoints to freeing skb
From: Neil Horman @ 2010-07-21 10:56 UTC (permalink / raw)
  To: Koki Sanagi
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <4C469BA1.7050900@jp.fujitsu.com>

On Wed, Jul 21, 2010 at 04:02:57PM +0900, Koki Sanagi wrote:
> (2010/07/20 20:50), Neil Horman wrote:
> > On Tue, Jul 20, 2010 at 09:49:10AM +0900, Koki Sanagi wrote:
> >> [RFC PATCH v3 4/5] skb: add tracepoints to freeing skb
> >> This patch adds tracepoint to consume_skb, dev_kfree_skb_irq and
> >> skb_free_datagram_locked. Combinating with tracepoint on dev_hard_start_xmit,
> >> we can check how long it takes to free transmited packets. And using it, we can
> >> calculate how many packets driver had at that time. It is useful when a drop of
> >> transmited packet is a problem.
> >>
> >>           <idle>-0     [001] 241409.218333: consume_skb: skbaddr=dd6b2fb8
> >>           <idle>-0     [001] 241409.490555: dev_kfree_skb_irq: skbaddr=f5e29840
> >>
> >>         udp-recv-302   [001] 515031.206008: skb_free_datagram_locked: skbaddr=f5b1d900
> >>
> >>
> >> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
> >> ---
> >>  include/trace/events/skb.h |   42 ++++++++++++++++++++++++++++++++++++++++++
> >>  net/core/datagram.c        |    1 +
> >>  net/core/dev.c             |    2 ++
> >>  net/core/skbuff.c          |    1 +
> >>  4 files changed, 46 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
> >> index 4b2be6d..84c9041 100644
> >> --- a/include/trace/events/skb.h
> >> +++ b/include/trace/events/skb.h
> >> @@ -35,6 +35,48 @@ TRACE_EVENT(kfree_skb,
> >>  		__entry->skbaddr, __entry->protocol, __entry->location)
> >>  );
> >>  
> >> +DECLARE_EVENT_CLASS(free_skb,
> >> +
> >> +	TP_PROTO(struct sk_buff *skb),
> >> +
> >> +	TP_ARGS(skb),
> >> +
> >> +	TP_STRUCT__entry(
> >> +		__field(	void *,	skbaddr	)
> >> +	),
> >> +
> >> +	TP_fast_assign(
> >> +		__entry->skbaddr = skb;
> >> +	),
> >> +
> >> +	TP_printk("skbaddr=%p", __entry->skbaddr)
> >> +
> >> +);
> >> +
> >> +DEFINE_EVENT(free_skb, consume_skb,
> >> +
> >> +	TP_PROTO(struct sk_buff *skb),
> >> +
> >> +	TP_ARGS(skb)
> >> +
> >> +);
> >> +
> >> +DEFINE_EVENT(free_skb, dev_kfree_skb_irq,
> >> +
> >> +	TP_PROTO(struct sk_buff *skb),
> >> +
> >> +	TP_ARGS(skb)
> >> +
> >> +);
> >> +
> >> +DEFINE_EVENT(free_skb, skb_free_datagram_locked,
> >> +
> >> +	TP_PROTO(struct sk_buff *skb),
> >> +
> >> +	TP_ARGS(skb)
> >> +
> >> +);
> >> +
> > 
> > Why create these last two tracepoints at all?  dev_kfree_skb_irq will eventually
> > pass through kfree_skb anyway, getting picked up by the tracepoint there, the
> > while the latter won't (since it uses __kfree_skb instead), I think that could
> > be fixed up by add a call to trace_kfree_skb there directly, saving you two
> > tracepoints.
> > 
> > Neil
> > 
> I think dev_kfree_skb_irq isn't chased by trace_kfree_skb or trace_consume_skb
> completely. Because net_tx_action frees skb by __kfree_skb. So it is better to
> add trace_kfree_skb before it. skb_free_datagram_locked is same.
> 
It isn't, you're right, but that was the point I made above.  Those missed areas
could be easily handled by adding calls to trace_kfree_skb which already exists,
to the missed areas.  Then you don't need to create those new tracepoints.  The
way your doing this, if someone wants to trace all skb frees in debugfs, they
would have to enable three tracepoints, not just one.  Not that thats the point
of your patch, but its something to consider, and it simplifies your code.
Neil

> Thanks,
> Koki Sanagi.
> >>
> > 
> > 
> 
> 
> 

^ permalink raw reply

* [PATCH iproute2] ip: add RTA_MARK support
From: Eric Dumazet @ 2010-07-21  9:42 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, David Miller

Adds support for RTA_MARK rt attribute added in linux-2.6.36

$ ip route get ADDR mark 4
192.168.20.110 dev eth1  src 192.168.20.108  mark 4
    cache  mtu 1500 advmss 1460 hoplimit 64

$ ip route get 192.168.20.108 from ADDR iif STRING mark 256
local 192.168.20.108 from 192.168.20.110 dev lo  src 192.168.20.108  mark 0x100
    cache <local,src-direct>  iif eth1

$ ip route list cache [ADDR] mark NUMBER

Hexadecimal output if mark >= 16
null marks are not displayed.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/rtnetlink.h |    1 +
 ip/iproute.c              |   30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 0d8ef9e..a3ccbe4 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -282,6 +282,7 @@ enum rtattr_type_t {
 	RTA_SESSION, /* no longer used */
 	RTA_MP_ALGO, /* no longer used */
 	RTA_TABLE,
+	RTA_MARK,
 	__RTA_MAX
 };
 
diff --git a/ip/iproute.c b/ip/iproute.c
index 8252e18..cbb89c5 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -55,6 +55,7 @@ static void usage(void)
 	fprintf(stderr, "Usage: ip route { list | flush } SELECTOR\n");
 	fprintf(stderr, "       ip route get ADDRESS [ from ADDRESS iif STRING ]\n");
 	fprintf(stderr, "                            [ oif STRING ]  [ tos TOS ]\n");
+	fprintf(stderr, "                            [ mark NUMBER ]\n");
 	fprintf(stderr, "       ip route { add | del | change | append | replace | monitor } ROUTE\n");
 	fprintf(stderr, "SELECTOR := [ root PREFIX ] [ match PREFIX ] [ exact PREFIX ]\n");
 	fprintf(stderr, "            [ table TABLE_ID ] [ proto RTPROTO ]\n");
@@ -96,6 +97,7 @@ static struct
 	int tos, tosmask;
 	int iif, iifmask;
 	int oif, oifmask;
+	int mark, markmask;
 	int realm, realmmask;
 	inet_prefix rprefsrc;
 	inet_prefix rvia;
@@ -273,6 +275,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 		if ((oif^filter.oif)&filter.oifmask)
 			return 0;
 	}
+	if (filter.markmask) {
+		int mark = 0;
+		if (tb[RTA_MARK])
+			mark = *(int *)RTA_DATA(tb[RTA_MARK]);
+		if ((mark ^ filter.mark) & filter.markmask)
+			return 0;
+	}
 	if (filter.flushb &&
 	    r->rtm_family == AF_INET6 &&
 	    r->rtm_dst_len == 0 &&
@@ -384,6 +393,15 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 		fprintf(fp, "pervasive ");
 	if (r->rtm_flags & RTM_F_NOTIFY)
 		fprintf(fp, "notify ");
+	if (tb[RTA_MARK]) {
+		unsigned int mark = *(unsigned int*)RTA_DATA(tb[RTA_MARK]);
+		if (mark) {
+			if (mark >= 16)
+				fprintf(fp, " mark 0x%x", mark);
+			else
+				fprintf(fp, " mark %u", mark);
+		}
+	}
 
 	if (tb[RTA_FLOW] && filter.realmmask != ~0U) {
 		__u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
@@ -1047,6 +1065,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
 	int do_ipv6 = preferred_family;
 	char *id = NULL;
 	char *od = NULL;
+	unsigned int mark = 0;
 
 	iproute_reset_filter();
 	filter.tb = RT_TABLE_MAIN;
@@ -1119,6 +1138,10 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
 		} else if (strcmp(*argv, "iif") == 0) {
 			NEXT_ARG();
 			id = *argv;
+		} else if (strcmp(*argv, "mark") == 0) {
+			NEXT_ARG();
+			get_unsigned(&mark, *argv, 0);
+			filter.markmask = -1;
 		} else if (strcmp(*argv, "via") == 0) {
 			NEXT_ARG();
 			get_prefix(&filter.rvia, *argv, do_ipv6);
@@ -1200,6 +1223,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
 			filter.oifmask = -1;
 		}
 	}
+	filter.mark = mark;
 
 	if (flush) {
 		int round = 0;
@@ -1289,6 +1313,7 @@ int iproute_get(int argc, char **argv)
 	char  *odev = NULL;
 	int connected = 0;
 	int from_ok = 0;
+	unsigned int mark = 0;
 
 	memset(&req, 0, sizeof(req));
 
@@ -1329,6 +1354,9 @@ int iproute_get(int argc, char **argv)
 		} else if (matches(*argv, "iif") == 0) {
 			NEXT_ARG();
 			idev = *argv;
+		} else if (matches(*argv, "mark") == 0) {
+			NEXT_ARG();
+			get_unsigned(&mark, *argv, 0);
 		} else if (matches(*argv, "oif") == 0 ||
 			   strcmp(*argv, "dev") == 0) {
 			NEXT_ARG();
@@ -1379,6 +1407,8 @@ int iproute_get(int argc, char **argv)
 			addattr32(&req.n, sizeof(req), RTA_OIF, idx);
 		}
 	}
+	if (mark)
+		addattr32(&req.n, sizeof(req), RTA_MARK, mark);
 
 	if (req.r.rtm_family == AF_UNSPEC)
 		req.r.rtm_family = AF_INET;



^ permalink raw reply related

* Re: [PATCH net-next-2.6] netlink: netlink_recvmsg() fix
From: Eric Dumazet @ 2010-07-21  8:43 UTC (permalink / raw)
  To: Johannes Berg; +Cc: David Miller, netdev
In-Reply-To: <1279700420.2452.15.camel@edumazet-laptop>

Le mercredi 21 juillet 2010 à 10:20 +0200, Eric Dumazet a écrit :

> Nothing can touch this skb at the same time but us and our friends
> (consumers that did a skb_recv_datagram( MSG_PEEK ) operation).
> 
> Oh well, I see skb_unshare() tests skb_cloned(). This is not what we
> want.
> 

Here is V3 of this patch.

Thanks

[PATCH net-next-2.6 v3] netlink: netlink_recvmsg() fix

commit 1dacc76d0014 
(net/compat/wext: send different messages to compat tasks)
introduced a race condition on netlink, in case MSG_PEEK is used.

An skb given by skb_recv_datagram() might be shared, we must copy it
before any modification, or risk fatal corruption.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/netlink/af_netlink.c |   22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7aeaa83..0c86657 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1405,7 +1405,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
-	struct sk_buff *skb, *frag __maybe_unused = NULL;
+	struct sk_buff *skb;
 	int err;
 
 	if (flags&MSG_OOB)
@@ -1440,7 +1440,21 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 			kfree_skb(skb);
 			skb = compskb;
 		} else {
-			frag = skb_shinfo(skb)->frag_list;
+			/*
+			 * Before setting frag_list to NULL, we must get a
+			 * private copy of skb if shared (because of MSG_PEEK)
+			 */
+			if (skb_shared(skb) {
+				struct sk_buff *nskb;
+
+				nskb = pskb_copy(skb, GFP_KERNEL);
+				kfree_skb(skb);
+				skb = nskb;
+				err = -ENOMEM;
+				if (!skb)
+					goto out;
+			}
+			kfree_skb(skb_shinfo(skb)->frag_list);
 			skb_shinfo(skb)->frag_list = NULL;
 		}
 	}
@@ -1477,10 +1491,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
 
-#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
-	skb_shinfo(skb)->frag_list = frag;
-#endif
-
 	skb_free_datagram(sk, skb);
 
 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)



^ permalink raw reply related

* Re: [PATCH net-next-2.6] netlink: netlink_recvmsg() fix
From: Eric Dumazet @ 2010-07-21  8:20 UTC (permalink / raw)
  To: Johannes Berg; +Cc: David Miller, netdev
In-Reply-To: <1279699529.3707.5.camel@jlt3.sipsolutions.net>

Le mercredi 21 juillet 2010 à 10:05 +0200, Johannes Berg a écrit :
> On Tue, 2010-07-20 at 17:20 +0200, Eric Dumazet wrote:
> 
> > [PATCH net-next-2.6 v2] netlink: netlink_recvmsg() fix
> > 
> > commit 1dacc76d0014 
> > (net/compat/wext: send different messages to compat tasks)
> > introduced a race condition on netlink, in case MSG_PEEK is used.
> > 
> > An skb given by skb_recv_datagram() might be shared, we must copy it
> > before any modification, or risk fatal corruption.
> 
> Makes sense to me, seeing that if you MSG_PEEK it just increases
> skb->users. But nothing could touch the other skb at the same time?
> Although I guess with netlink multicast we have a similar situation.

Nothing can touch this skb at the same time but us and our friends
(consumers that did a skb_recv_datagram( MSG_PEEK ) operation).

Oh well, I see skb_unshare() tests skb_cloned(). This is not what we
want.

We probably wants something like :

if (skb_shared(skb)) {
	nsbk = skb_copy(skb, GFP_KERNEL);
	...
}




^ permalink raw reply

* Re: [PATCH net-next] sysfs: add entry to indicate network interfaces with random MAC address
From: Stefan Assmann @ 2010-07-21  8:10 UTC (permalink / raw)
  To: David Miller
  Cc: abadea, bhutchings, netdev, linux-kernel, gospo, gregory.v.rose,
	alexander.h.duyck, leedom, harald
In-Reply-To: <20100720.131839.134093789.davem@davemloft.net>

On 20.07.2010 22:18, David Miller wrote:
> From: Stefan Assmann <sassmann@redhat.com>
> Date: Tue, 20 Jul 2010 14:17:30 +0200
> 
>> On 20.07.2010 13:58, Alex Badea wrote:
>>> Hi,
>>>
>>> On 07/20/2010 02:47 PM, Stefan Assmann wrote:
>>>>> What about devices that 'steal' MAC addresses from slave devices?
>>>
>>> Might I suggest an attribute such as "address_type", which could report
>>> "permanent", "random", "stolen", or something else in the future?
>>
>> In case there's demand for such a multi-purpose attribute I see no
>> reason to object. More thoughts on this?
> 
> I think this is a great idea.
> 
> Now it makes sense to use a new 'u8' in struct netdevice or similar to
> store this, since we'll have more than a boolean here.
> 

I put Alex' idea into code for further discussion, keeping the names
mentioned here until we agree on the scope of this attribute. When we
have settled I'll post a patch with proper patch description.

  Stefan
---
 include/linux/etherdevice.h |   14 ++++++++++++++
 include/linux/netdevice.h   |    6 ++++++
 net/core/net-sysfs.c        |    2 ++
 3 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 3d7a668..f15cac8 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -127,6 +127,20 @@ static inline void random_ether_addr(u8 *addr)
 }

 /**
+ * dev_hw_addr_random - Create random MAC and set device flag
+ * @dev: pointer to net_device structure
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Generate random MAC to be used by a device and set addr_type
+ * so the state can be read by sysfs and be used by udev.
+ */
+static inline void dev_hw_addr_random(struct net_device *dev, u8 *hwaddr)
+{
+	dev->addr_type |= NET_ADDR_RANDOM;
+	random_ether_addr(hwaddr);
+}
+
+/**
  * compare_ether_addr - Compare two Ethernet addresses
  * @addr1: Pointer to a six-byte array containing the Ethernet address
  * @addr2: Pointer other six-byte array containing the Ethernet address
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b626289..2718895 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -66,6 +66,11 @@ struct wireless_dev;
 #define HAVE_FREE_NETDEV		/* free_netdev() */
 #define HAVE_NETDEV_PRIV		/* netdev_priv() */

+/* hardware address types */
+#define NET_ADDR_PERM		0	/* address is permanent (default) */
+#define NET_ADDR_RANDOM		1	/* address is generated randomly */
+#define NET_ADDR_STOLEN		2	/* address is stolen from other device */
+
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
 #define NET_RX_DROP		1	/* packet dropped */
@@ -920,6 +925,7 @@ struct net_device {
 	/* Interface address info. */
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_len;	/* hardware address length	*/
+	unsigned char		addr_type;	/* hardware address type	*/
 	unsigned short          dev_id;		/* for shared network cards */

 	spinlock_t		addr_list_lock;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index d2b5965..052ab14 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -96,6 +96,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,

 NETDEVICE_SHOW(dev_id, fmt_hex);
 NETDEVICE_SHOW(addr_len, fmt_dec);
+NETDEVICE_SHOW(addr_type, fmt_dec);
 NETDEVICE_SHOW(iflink, fmt_dec);
 NETDEVICE_SHOW(ifindex, fmt_dec);
 NETDEVICE_SHOW(features, fmt_long_hex);
@@ -296,6 +297,7 @@ static ssize_t show_ifalias(struct device *dev,

 static struct device_attribute net_class_attributes[] = {
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
+	__ATTR(addr_type, S_IRUGO, show_addr_type, NULL),
 	__ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
 	__ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
 	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
-- 
1.7.1


^ permalink raw reply related

* Re: [PATCH net-next-2.6] netlink: netlink_recvmsg() fix
From: Johannes Berg @ 2010-07-21  8:05 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1279639232.2498.82.camel@edumazet-laptop>

On Tue, 2010-07-20 at 17:20 +0200, Eric Dumazet wrote:

> [PATCH net-next-2.6 v2] netlink: netlink_recvmsg() fix
> 
> commit 1dacc76d0014 
> (net/compat/wext: send different messages to compat tasks)
> introduced a race condition on netlink, in case MSG_PEEK is used.
> 
> An skb given by skb_recv_datagram() might be shared, we must copy it
> before any modification, or risk fatal corruption.

Makes sense to me, seeing that if you MSG_PEEK it just increases
skb->users. But nothing could touch the other skb at the same time?
Although I guess with netlink multicast we have a similar situation.

johannes

> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  net/netlink/af_netlink.c |   13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 7aeaa83..1537fa5 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -1405,7 +1405,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
>  	struct netlink_sock *nlk = nlk_sk(sk);
>  	int noblock = flags&MSG_DONTWAIT;
>  	size_t copied;
> -	struct sk_buff *skb, *frag __maybe_unused = NULL;
> +	struct sk_buff *skb;
>  	int err;
>  
>  	if (flags&MSG_OOB)
> @@ -1440,7 +1440,12 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
>  			kfree_skb(skb);
>  			skb = compskb;
>  		} else {
> -			frag = skb_shinfo(skb)->frag_list;
> +			skb = skb_unshare(skb, GFP_KERNEL);
> +			if (!skb) {
> +				err = -ENOMEM;
> +				goto out;
> +			}
> +			kfree_skb(skb_shinfo(skb)->frag_list);
>  			skb_shinfo(skb)->frag_list = NULL;
>  		}
>  	}
> @@ -1477,10 +1482,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
>  	if (flags & MSG_TRUNC)
>  		copied = skb->len;
>  
> -#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
> -	skb_shinfo(skb)->frag_list = frag;
> -#endif
> -
>  	skb_free_datagram(sk, skb);
>  
>  	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
> 
> 
> 



^ permalink raw reply

* [PATCH net-next-2.6] net: RTA_MARK addition
From: Eric Dumazet @ 2010-07-21  8:03 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Add a new rt attribute, RTA_MARK, and use it in
rt_fill_info()/inet_rtm_getroute() to support following commands :

ip route get 192.168.20.110 mark NUMBER
ip route get 192.168.20.108 from 192.168.20.110 iif eth1 mark NUMBER
ip route list cache [192.168.20.110] mark NUMBER

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/rtnetlink.h |    1 +
 net/ipv4/route.c          |    7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index fbc8cb0..58d4449 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -282,6 +282,7 @@ enum rtattr_type_t {
 	RTA_SESSION, /* no longer used */
 	RTA_MP_ALGO, /* no longer used */
 	RTA_TABLE,
+	RTA_MARK,
 	__RTA_MAX
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 562ce92..3f56b6e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2878,6 +2878,9 @@ static int rt_fill_info(struct net *net,
 	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
+	if (rt->fl.mark)
+		NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
+
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
@@ -2933,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	__be32 src = 0;
 	u32 iif;
 	int err;
+	int mark;
 	struct sk_buff *skb;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
@@ -2960,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
 	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
+	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
 
 	if (iif) {
 		struct net_device *dev;
@@ -2972,6 +2977,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 
 		skb->protocol	= htons(ETH_P_IP);
 		skb->dev	= dev;
+		skb->mark	= mark;
 		local_bh_disable();
 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
 		local_bh_enable();
@@ -2989,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 				},
 			},
 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
+			.mark = mark,
 		};
 		err = ip_route_output_key(net, &rt, &fl);
 	}



^ permalink raw reply related

* CONNMARK getsockopt/setsockopt functionality
From: Daniel Wagner @ 2010-07-21  7:20 UTC (permalink / raw)
  To: netdev

Hi,

There used to be a way to mark packets in userspace by invoking the
setsocketopt with SO_CONNMARK as it was proposed by Krisztian [1].
This code was superseeded by Jan's work [2]. Now I wonder how I get
the same result out of the new code. Something with conditions? 

thanks,
daniel

[1] http://lists.netfilter.org/pipermail/netfilter-devel/2004-May/015385.html
[2] http://www.kerneltrap.com/mailarchive/git-commits-head/2009/9/14/4291

^ permalink raw reply

* [PATCH] Re: mmotm 2010-07-19 - e1000e vs. pm_qos_update_request issues
From: Florian Mickler @ 2010-07-21  7:12 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Valdis.Kletnieks, Rafael J. Wysocki, mark gross, e1000-devel,
	netdev, linux-kernel, James Bottomley, Thomas Gleixner,
	David S. Miller
In-Reply-To: <20100720140751.71ee83a8.akpm@linux-foundation.org>

On Tue, 20 Jul 2010 14:07:51 -0700
Andrew Morton <akpm@linux-foundation.org> wrote:

> On Tue, 20 Jul 2010 16:35:25 -0400
> Valdis.Kletnieks@vt.edu wrote:
> 
> > On Mon, 19 Jul 2010 16:38:09 PDT, akpm@linux-foundation.org said:
> > > The mm-of-the-moment snapshot 2010-07-19-16-37 has been uploaded to
> > > 
> > >    http://userweb.kernel.org/~akpm/mmotm/
> > 
> > Throws a warning at boot:
> > 
> > [    1.786060] WARNING: at kernel/pm_qos_params.c:264 pm_qos_update_request+0x28/0x54()
> > [    1.786088] Hardware name: Latitude E6500
> > [    1.787045] pm_qos_update_request() called for unknown object
> > [    1.787966] Modules linked in:
> > [    1.788940] Pid: 1, comm: swapper Not tainted 2.6.35-rc5-mmotm0719 #1
> > [    1.790035] Call Trace:
> > [    1.791121]  [<ffffffff81037335>] warn_slowpath_common+0x80/0x98
> > [    1.792205]  [<ffffffff810373e1>] warn_slowpath_fmt+0x41/0x43
> > [    1.793279]  [<ffffffff81057c14>] pm_qos_update_request+0x28/0x54
> > [    1.794347]  [<ffffffff8134889e>] e1000_configure+0x421/0x459
> > [    1.795393]  [<ffffffff8134afbd>] e1000_open+0xbd/0x37c
> > [    1.796436]  [<ffffffff8105743a>] ? raw_notifier_call_chain+0xf/0x11
> > [    1.797491]  [<ffffffff8145f948>] __dev_open+0xae/0xe2
> > [    1.798547]  [<ffffffff8145f997>] dev_open+0x1b/0x49
> > [    1.799612]  [<ffffffff8146e36e>] netpoll_setup+0x84/0x259
> > [    1.800685]  [<ffffffff81b5037c>] init_netconsole+0xbc/0x21f
> > [    1.801744]  [<ffffffff81b5026c>] ? sir_wq_init+0x0/0x35
> > [    1.802793]  [<ffffffff81b502c0>] ? init_netconsole+0x0/0x21f
> > [    1.803845]  [<ffffffff810002ff>] do_one_initcall+0x7a/0x12f
> > [    1.804885]  [<ffffffff81b2ccae>] kernel_init+0x138/0x1c2
> > [    1.805915]  [<ffffffff81003554>] kernel_thread_helper+0x4/0x10
> > [    1.806937]  [<ffffffff81590e00>] ? restore_args+0x0/0x30
> > [    1.807955]  [<ffffffff81b2cb76>] ? kernel_init+0x0/0x1c2
> > [    1.808958]  [<ffffffff81003550>] ? kernel_thread_helper+0x0/0x10
> > [    1.809958] ---[ end trace 84b562a00a60539e ]---
> > 
> > Looks like a repeat of something I reported against -mmotm 2010-05-11, though a
> > WARNING rather than an outright crash - the traceback is pretty much identical.
> >  I have *no* idea why -rc3-mmotm0701 doesn't whinge similarly.
> > 
> 
> I don't recall you reporting that, sorry.
> 
> The warning was added by
> 
> : commit 82f682514a5df89ffb3890627eebf0897b7a84ec
> : Author:     James Bottomley <James.Bottomley@suse.de>
> : AuthorDate: Mon Jul 5 22:53:06 2010 +0200
> : Commit:     Rafael J. Wysocki <rjw@sisk.pl>
> : CommitDate: Mon Jul 19 02:00:34 2010 +0200
> : 
> :     pm_qos: Get rid of the allocation in pm_qos_add_request()
> 
> 
> It's a pretty crappy warning too.  Neither the warning nor the code
> comments provide developers with any hint as to what they have done
> wrong, nor what they must do to fix things.  And the patch changelog
> doesn't mention the new warnings *at all*.
> 
> So one must assume that the people who stuck this thing in the tree
> have volunteered to fix e1000e.  Let's cc 'em.
> 

e1000 calls update_request before registering said request with pm_qos.
This was silently ignored before but now emits a warning. The warning
is sound, because it means, that the constraint-request didn't take
effect.

The right thing is probably to register the request before
calling update_request. 

Attached patch moves the registering from e1000_up to e1000_open and
the unregistering from e1000_down to e1000_close. 
It is only compile-tested as I don't have the hardware.

Cheers,
Flo

p.s.: sorry if this get's mangled or is wrongly formatted, i'm just using
 the "insert file" option of my mailclient and crossing my fingers...


>From 693c71b911ff0845c872261d5704a1d40960722d Mon Sep 17 00:00:00 2001
From: Florian Mickler <florian@mickler.org>
Date: Wed, 21 Jul 2010 08:44:21 +0200
Subject: [PATCH] e1000e: register pm_qos request on hardware activation

The pm_qos_add_request call has to register the pm_qos request with the pm_qos
susbsystem before first use of the pm_qos request via
pm_qos_update_request.

As pm_qos changed to use plists there is no benefit in registering and
unregistering the pm_qos request on ifup/ifdown and thus we move the
registering into e1000_open and the unregistering in e1000_close.

This fixes the following warning:

[    1.786060] WARNING: at kernel/pm_qos_params.c:264
pm_qos_update_request+0x28/0x54()
[    1.786088] Hardware name: Latitude E6500
[    1.787045] pm_qos_update_request() called for unknown object
[    1.787966] Modules linked in:
[    1.788940] Pid: 1, comm: swapper Not tainted 2.6.35-rc5-mmotm0719 #1
[    1.790035] Call Trace:
[    1.791121]  [<ffffffff81037335>] warn_slowpath_common+0x80/0x98
[    1.792205]  [<ffffffff810373e1>] warn_slowpath_fmt+0x41/0x43
[    1.793279]  [<ffffffff81057c14>] pm_qos_update_request+0x28/0x54
[    1.794347]  [<ffffffff8134889e>] e1000_configure+0x421/0x459
[    1.795393]  [<ffffffff8134afbd>] e1000_open+0xbd/0x37c
[    1.796436]  [<ffffffff8105743a>] ? raw_notifier_call_chain+0xf/0x11
[    1.797491]  [<ffffffff8145f948>] __dev_open+0xae/0xe2
[    1.798547]  [<ffffffff8145f997>] dev_open+0x1b/0x49
[    1.799612]  [<ffffffff8146e36e>] netpoll_setup+0x84/0x259
[    1.800685]  [<ffffffff81b5037c>] init_netconsole+0xbc/0x21f
[    1.801744]  [<ffffffff81b5026c>] ? sir_wq_init+0x0/0x35
[    1.802793]  [<ffffffff81b502c0>] ? init_netconsole+0x0/0x21f
[    1.803845]  [<ffffffff810002ff>] do_one_initcall+0x7a/0x12f
[    1.804885]  [<ffffffff81b2ccae>] kernel_init+0x138/0x1c2
[    1.805915]  [<ffffffff81003554>] kernel_thread_helper+0x4/0x10
[    1.806937]  [<ffffffff81590e00>] ? restore_args+0x0/0x30
[    1.807955]  [<ffffffff81b2cb76>] ? kernel_init+0x0/0x1c2
[    1.808958]  [<ffffffff81003550>] ? kernel_thread_helper+0x0/0x10
[    1.809958] ---[ end trace 84b562a00a60539e ]---

Signed-off-by: Florian Mickler <florian@mickler.org>
---
 drivers/net/e1000e/netdev.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 8ba366a..1bd9054 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -3218,12 +3218,6 @@ int e1000e_up(struct e1000_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 
-	/* DMA latency requirement to workaround early-receive/jumbo issue */
-	if (adapter->flags & FLAG_HAS_ERT)
-		pm_qos_add_request(&adapter->netdev->pm_qos_req,
-				   PM_QOS_CPU_DMA_LATENCY,
-				   PM_QOS_DEFAULT_VALUE);
-
 	/* hardware has been reset, we need to reload some things */
 	e1000_configure(adapter);
 
@@ -3287,9 +3281,6 @@ void e1000e_down(struct e1000_adapter *adapter)
 	e1000_clean_tx_ring(adapter);
 	e1000_clean_rx_ring(adapter);
 
-	if (adapter->flags & FLAG_HAS_ERT)
-		pm_qos_remove_request(&adapter->netdev->pm_qos_req);
-
 	/*
 	 * TODO: for power management, we could drop the link and
 	 * pci_disable_device here.
@@ -3524,6 +3515,12 @@ static int e1000_open(struct net_device *netdev)
 	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
 		e1000_update_mng_vlan(adapter);
 
+	/* DMA latency requirement to workaround early-receive/jumbo issue */
+	if (adapter->flags & FLAG_HAS_ERT)
+		pm_qos_add_request(&adapter->netdev->pm_qos_req,
+				   PM_QOS_CPU_DMA_LATENCY,
+				   PM_QOS_DEFAULT_VALUE);
+
 	/*
 	 * before we allocate an interrupt, we must be ready to handle it.
 	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
@@ -3628,6 +3625,9 @@ static int e1000_close(struct net_device *netdev)
 	if (adapter->flags & FLAG_HAS_AMT)
 		e1000_release_hw_control(adapter);
 
+	if (adapter->flags & FLAG_HAS_ERT)
+		pm_qos_remove_request(&adapter->netdev->pm_qos_req);
+
 	pm_runtime_put_sync(&pdev->dev);
 
 	return 0;
-- 
1.7.1.1


^ permalink raw reply related

* Re: [RFC PATCH] dst: check if dst is freed in dst_check()
From: Nicolas Dichtel @ 2010-07-21  7:03 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1279679288.2492.15.camel@edumazet-laptop>

Hi Eric,

I was thinking to use this function in sctp, but I misread xfrm part.
Sorry for the noise.


Regards,
Nicolas

Le 21.07.2010 04:28, Eric Dumazet a écrit :
> Le mardi 20 juillet 2010 à 11:49 +0200, Nicolas Dichtel a écrit :
>> Hi,
>>
>> I probably missed something, but I cannot find where obsolete field is checked 
>> when dst_check() is called. If dst->obsolete is > 1, dst cannot be used!
>>
>> Attached is a proposal to fix this issue.
>>
>>
> 
>> diff --git a/include/net/dst.h b/include/net/dst.h
>> index 81d1413..7bf4f9a 100644
>> --- a/include/net/dst.h
>> +++ b/include/net/dst.h
>> @@ -319,6 +319,8 @@ static inline int dst_input(struct sk_buff *skb)
>>  
>>  static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
>>  {
>> +	if (dst->obsolete > 1)
>> +		return NULL;
>>  	if (dst->obsolete)
>>  		dst = dst->ops->check(dst, cookie);
>>  	return dst;
> 
> I believe this is not needed and redundant.
> 
> In what case do you think this matters ?
> 
> To my knowledge dst_check() is only used by net/xfrm/xfrm_policy.c
> 
> And xfrm_dst_check() does the necessary checks.
> 
> static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
> {
>         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
>          * to "-1" to force all XFRM destinations to get validated by
>          * dst_ops->check on every use.  We do this because when a
>          * normal route referenced by an XFRM dst is obsoleted we do
>          * not go looking around for all parent referencing XFRM dsts
>          * so that we can invalidate them.  It is just too much work.
>          * Instead we make the checks here on every use.  For example:
>          *
>          *      XFRM dst A --> IPv4 dst X
>          *
>          * X is the "xdst->route" of A (X is also the "dst->path" of A
>          * in this example).  If X is marked obsolete, "A" will not
>          * notice.  That's what we are validating here via the
>          * stale_bundle() check.
>          *
>          * When a policy's bundle is pruned, we dst_free() the XFRM
>          * dst which causes it's ->obsolete field to be set to a
>          * positive non-zero integer.  If an XFRM dst has been pruned
>          * like this, we want to force a new route lookup.
>          */
>         if (dst->obsolete < 0 && !stale_bundle(dst))
>                 return dst;
> 
>         return NULL;
> }

^ permalink raw reply

* Re: [RFC PATCH v3 4/5] skb: add tracepoints to freeing skb
From: Koki Sanagi @ 2010-07-21  7:02 UTC (permalink / raw)
  To: Neil Horman
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <20100720115044.GD1995@hmsreliant.think-freely.org>

(2010/07/20 20:50), Neil Horman wrote:
> On Tue, Jul 20, 2010 at 09:49:10AM +0900, Koki Sanagi wrote:
>> [RFC PATCH v3 4/5] skb: add tracepoints to freeing skb
>> This patch adds tracepoint to consume_skb, dev_kfree_skb_irq and
>> skb_free_datagram_locked. Combinating with tracepoint on dev_hard_start_xmit,
>> we can check how long it takes to free transmited packets. And using it, we can
>> calculate how many packets driver had at that time. It is useful when a drop of
>> transmited packet is a problem.
>>
>>           <idle>-0     [001] 241409.218333: consume_skb: skbaddr=dd6b2fb8
>>           <idle>-0     [001] 241409.490555: dev_kfree_skb_irq: skbaddr=f5e29840
>>
>>         udp-recv-302   [001] 515031.206008: skb_free_datagram_locked: skbaddr=f5b1d900
>>
>>
>> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
>> ---
>>  include/trace/events/skb.h |   42 ++++++++++++++++++++++++++++++++++++++++++
>>  net/core/datagram.c        |    1 +
>>  net/core/dev.c             |    2 ++
>>  net/core/skbuff.c          |    1 +
>>  4 files changed, 46 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
>> index 4b2be6d..84c9041 100644
>> --- a/include/trace/events/skb.h
>> +++ b/include/trace/events/skb.h
>> @@ -35,6 +35,48 @@ TRACE_EVENT(kfree_skb,
>>  		__entry->skbaddr, __entry->protocol, __entry->location)
>>  );
>>  
>> +DECLARE_EVENT_CLASS(free_skb,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(	void *,	skbaddr	)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->skbaddr = skb;
>> +	),
>> +
>> +	TP_printk("skbaddr=%p", __entry->skbaddr)
>> +
>> +);
>> +
>> +DEFINE_EVENT(free_skb, consume_skb,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb)
>> +
>> +);
>> +
>> +DEFINE_EVENT(free_skb, dev_kfree_skb_irq,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb)
>> +
>> +);
>> +
>> +DEFINE_EVENT(free_skb, skb_free_datagram_locked,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb)
>> +
>> +);
>> +
> 
> Why create these last two tracepoints at all?  dev_kfree_skb_irq will eventually
> pass through kfree_skb anyway, getting picked up by the tracepoint there, the
> while the latter won't (since it uses __kfree_skb instead), I think that could
> be fixed up by add a call to trace_kfree_skb there directly, saving you two
> tracepoints.
> 
> Neil
> 
I think dev_kfree_skb_irq isn't chased by trace_kfree_skb or trace_consume_skb
completely. Because net_tx_action frees skb by __kfree_skb. So it is better to
add trace_kfree_skb before it. skb_free_datagram_locked is same.

Thanks,
Koki Sanagi.
>>
> 
> 



^ permalink raw reply

* Re: [RFC PATCH v3 3/5] netdev: add tracepoints to netdev layer
From: Koki Sanagi @ 2010-07-21  7:01 UTC (permalink / raw)
  To: Neil Horman
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <20100720114152.GC1995@hmsreliant.think-freely.org>

(2010/07/20 20:41), Neil Horman wrote:
> On Tue, Jul 20, 2010 at 09:47:59AM +0900, Koki Sanagi wrote:
>> This patch adds tracepoint to dev_queue_xmit, dev_hard_start_xmit and
>> netif_receive_skb. These tracepoints help you to monitor network driver's
>> input/output.
>>
>>             sshd-4445  [001] 241367.066046: net_dev_queue: dev=eth3 skbaddr=dd6b2538 len=114
>>             sshd-4445  [001] 241367.066047: net_dev_xmit: dev=eth3 skbaddr=dd6b2538 len=114 rc=0
>>           <idle>-0     [001] 241367.067472: net_dev_receive: dev=eth3 skbaddr=f5e59000 len=52
>>
>> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
>> ---
>>  include/trace/events/net.h |   75 ++++++++++++++++++++++++++++++++++++++++++++
>>  net/core/dev.c             |    5 +++
>>  net/core/net-traces.c      |    1 +
>>  3 files changed, 81 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
>> new file mode 100644
>> index 0000000..8a21361
>> --- /dev/null
>> +++ b/include/trace/events/net.h
>> @@ -0,0 +1,75 @@
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM net
>> +
>> +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_NET_H
>> +
>> +#include <linux/skbuff.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/ip.h>
>> +#include <linux/tracepoint.h>
>> +
>> +TRACE_EVENT(net_dev_xmit,
>> +
>> +	TP_PROTO(struct sk_buff *skb,
>> +		 int rc),
>> +
>> +	TP_ARGS(skb, rc),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(	void *,		skbaddr		)
>> +		__field(	unsigned int,	len		)
>> +		__field(	int,		rc		)
>> +		__string(	name,		skb->dev->name	)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->skbaddr = skb;
>> +		__entry->len = skb->len;
>> +		__entry->rc = rc;
>> +		__assign_str(name, skb->dev->name);
>> +	),
>> +
>> +	TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
>> +		__get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
>> +);
>> +
>> +DECLARE_EVENT_CLASS(net_dev_template,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(	void *,		skbaddr		)
>> +		__field(	unsigned int,	len		)
>> +		__string(	name,		skb->dev->name	)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->skbaddr = skb;
>> +		__entry->len = skb->len;
>> +		__assign_str(name, skb->dev->name);
>> +	),
>> +
>> +	TP_printk("dev=%s skbaddr=%p len=%u",
>> +		__get_str(name), __entry->skbaddr, __entry->len)
>> +)
>> +
>> +DEFINE_EVENT(net_dev_template, net_dev_queue,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb)
>> +);
>> +
>> +DEFINE_EVENT(net_dev_template, net_dev_receive,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb)
>> +);
>> +#endif /* _TRACE_NET_H */
>> +
>> +/* This part must be outside protection */
>> +#include <trace/define_trace.h>
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 93b8929..4acfec6 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -130,6 +130,7 @@
>>  #include <linux/jhash.h>
>>  #include <linux/random.h>
>>  #include <trace/events/napi.h>
>> +#include <trace/events/net.h>
>>  #include <linux/pci.h>
>>  
>>  #include "net-sysfs.h"
>> @@ -1955,6 +1956,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>>  		}
>>  
>>  		rc = ops->ndo_start_xmit(skb, dev);
>> +		trace_net_dev_xmit(skb, rc);
>>  		if (rc == NETDEV_TX_OK)
>>  			txq_trans_update(txq);
>>  		return rc;
>> @@ -1975,6 +1977,7 @@ gso:
>>  			skb_dst_drop(nskb);
>>  
>>  		rc = ops->ndo_start_xmit(nskb, dev);
>> +		trace_net_dev_xmit(nskb, rc);
>>  		if (unlikely(rc != NETDEV_TX_OK)) {
>>  			if (rc & ~NETDEV_TX_MASK)
>>  				goto out_kfree_gso_skb;
>> @@ -2165,6 +2168,7 @@ int dev_queue_xmit(struct sk_buff *skb)
>>  #ifdef CONFIG_NET_CLS_ACT
>>  	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
>>  #endif
>> +	trace_net_dev_queue(skb);
>>  	if (q->enqueue) {
>>  		rc = __dev_xmit_skb(skb, q, dev, txq);
>>  		goto out;
>> @@ -2939,6 +2943,7 @@ int netif_receive_skb(struct sk_buff *skb)
>>  	if (netdev_tstamp_prequeue)
>>  		net_timestamp_check(skb);
>>  
>> +	trace_net_dev_receive(skb);
> 
> I imagine for completeness you'll want to make a call to this in netif_rx and
> netif_rx_ni as well.
> 
It might be better to add tracepoint to netif_rx to determine a time between
netif_rx and netif_receive_skb.

Thanks,
Koki Sanagi.

> 
> 
> 



^ permalink raw reply

* Re: [RFC PATCH v3 2/5] napi: convert trace_napi_poll to TRACE_EVENT
From: Koki Sanagi @ 2010-07-21  7:00 UTC (permalink / raw)
  To: Neil Horman
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <20100720110952.GB1995@hmsreliant.think-freely.org>

(2010/07/20 20:09), Neil Horman wrote:
> On Tue, Jul 20, 2010 at 09:46:51AM +0900, Koki Sanagi wrote:
>> From: Neil Horman <nhorman@tuxdriver.com>
>>
>> This patch converts trace_napi_poll from DECLARE_EVENT to TRACE_EVENT to improve
>> the usability of napi_poll tracepoint.
>>
>>           <idle>-0     [001] 241302.750777: napi_poll: napi poll on napi struct f6acc480 for device eth3
>>           <idle>-0     [000] 241302.852389: napi_poll: napi poll on napi struct f5d0d70c for device eth1
>>
>> An original patch is below.
>> http://marc.info/?l=linux-kernel&m=126021713809450&w=2
>> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>>
>> And add a fix by Steven Rostedt.
>> http://marc.info/?l=linux-kernel&m=126150506519173&w=2
>>
>> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
>> ---
>>  include/trace/events/napi.h |   25 +++++++++++++++++++++++--
>>  1 files changed, 23 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
>> index 188deca..8fe1e93 100644
>> --- a/include/trace/events/napi.h
>> +++ b/include/trace/events/napi.h
>> @@ -6,10 +6,31 @@
>>  
>>  #include <linux/netdevice.h>
>>  #include <linux/tracepoint.h>
>> +#include <linux/ftrace.h>
>> +
>> +#define NO_DEV "(no_device)"
>> +
>> +TRACE_EVENT(napi_poll,
>>  
>> -DECLARE_TRACE(napi_poll,
>>  	TP_PROTO(struct napi_struct *napi),
>> -	TP_ARGS(napi));
>> +
>> +	TP_ARGS(napi),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(	struct napi_struct *,	napi)
>> +		__string(	dev_name, napi->dev ? napi->dev->name : NO_DEV)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->napi = napi;
>> +		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
>> +	),
>> +
>> +	TP_printk("napi poll on napi struct %p for device %s",
>> +		__entry->napi, __get_str(dev_name))
>> +);
>> +
>> +#undef NO_DEV
>>  
>>  #endif /* _TRACE_NAPI_H_ */
>>  
>>
> NAK, This change will create a build break in the drop monitor code.  You'll
> need to fix that up if you want to make this change.
> Neil
> 
I built a kernel with CONFIG_NET_DROP_MONITOR=y.
But build break did not occur.

Thanks,
Koki Sanagi.

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 



^ permalink raw reply

* Re: [RFC PATCH v3 1/5] irq: add tracepoint to softirq_raise
From: Koki Sanagi @ 2010-07-21  6:57 UTC (permalink / raw)
  To: Neil Horman
  Cc: netdev, linux-kernel, davem, kaneshige.kenji, izumi.taku,
	kosaki.motohiro, laijs, scott.a.mcmillan, rostedt, eric.dumazet,
	fweisbec, mathieu.desnoyers
In-Reply-To: <20100720110439.GA1995@hmsreliant.think-freely.org>

(2010/07/20 20:04), Neil Horman wrote:
> On Tue, Jul 20, 2010 at 09:45:31AM +0900, Koki Sanagi wrote:
>> From: Lai Jiangshan <laijs@cn.fujitsu.com>
>>
>> Add a tracepoint for tracing when softirq action is raised.
>>
>> It and the existed tracepoints complete softirq's tracepoints:
>> softirq_raise, softirq_entry and softirq_exit.
>>
>> And when this tracepoint is used in combination with
>> the softirq_entry tracepoint we can determine
>> the softirq raise latency.
>>
>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>> Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
>> Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
>>
>> [ factorize softirq events with DECLARE_EVENT_CLASS ]
>> Signed-off-by: Koki Sanagi <sanagi.koki@jp.fujitsu.com>
>> ---
>>  include/linux/interrupt.h  |    8 +++++-
>>  include/trace/events/irq.h |   57 ++++++++++++++++++++++++++-----------------
>>  kernel/softirq.c           |    4 +-
>>  3 files changed, 43 insertions(+), 26 deletions(-)
>>
>> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
>> index c233113..1cb5726 100644
>> --- a/include/linux/interrupt.h
>> +++ b/include/linux/interrupt.h
>> @@ -18,6 +18,7 @@
>>  #include <asm/atomic.h>
>>  #include <asm/ptrace.h>
>>  #include <asm/system.h>
>> +#include <trace/events/irq.h>
>>  
>>  /*
>>   * These correspond to the IORESOURCE_IRQ_* defines in
>> @@ -402,7 +403,12 @@ asmlinkage void do_softirq(void);
>>  asmlinkage void __do_softirq(void);
>>  extern void open_softirq(int nr, void (*action)(struct softirq_action *));
>>  extern void softirq_init(void);
>> -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
>> +static inline void __raise_softirq_irqoff(unsigned int nr)
>> +{
>> +	trace_softirq_raise(nr);
>> +	or_softirq_pending(1UL << nr);
>> +}
>> +
> We already have tracepoints in irq_enter and irq_exit.  If the goal here is to
> detect latency during packet processing, cant the delta in time between those
> two points be used to determine interrupt handling latency?

Certainly, the time between irq_entry and irq_exit is not directly related to
latency during packet processing. But it's indirectly related it.
Because softirq_entry isn't passed until irq exits and softirq_entry time is
related to packet processing latency. So I show it as a reference.

> 
> 
>>  extern void raise_softirq_irqoff(unsigned int nr);
>>  extern void raise_softirq(unsigned int nr);
>>  extern void wakeup_softirqd(void);
>> diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
>> index 0e4cfb6..717744c 100644
>> --- a/include/trace/events/irq.h
>> +++ b/include/trace/events/irq.h
>> @@ -5,7 +5,9 @@
>>  #define _TRACE_IRQ_H
>>  
>>  #include <linux/tracepoint.h>
>> -#include <linux/interrupt.h>
>> +
>> +struct irqaction;
>> +struct softirq_action;
>>  
>>  #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
>>  #define show_softirq_name(val)				\
>> @@ -84,56 +86,65 @@ TRACE_EVENT(irq_handler_exit,
>>  
>>  DECLARE_EVENT_CLASS(softirq,
>>  
>> -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
>> +	TP_PROTO(unsigned int nr),
>>  
>> -	TP_ARGS(h, vec),
>> +	TP_ARGS(nr),
>>  
>>  	TP_STRUCT__entry(
>> -		__field(	int,	vec			)
>> +		__field(	unsigned int,	vec	)
>>  	),
>>  
>>  	TP_fast_assign(
>> -		__entry->vec = (int)(h - vec);
>> +		__entry->vec	= nr;
>>  	),
>>  
>>  	TP_printk("vec=%d [action=%s]", __entry->vec,
>> -		  show_softirq_name(__entry->vec))
>> +		show_softirq_name(__entry->vec))
>> +);
>> +
>> +/**
>> + * softirq_raise - called immediately when a softirq is raised
>> + * @nr: softirq vector number
>> + *
>> + * Tracepoint for tracing when softirq action is raised.
>> + * Also, when used in combination with the softirq_entry tracepoint
>> + * we can determine the softirq raise latency.
>> + */
>> +DEFINE_EVENT(softirq, softirq_raise,
>> +
>> +	TP_PROTO(unsigned int nr),
>> +
>> +	TP_ARGS(nr)
>>  );
>>  
>>  /**
>>   * softirq_entry - called immediately before the softirq handler
>> - * @h: pointer to struct softirq_action
>> - * @vec: pointer to first struct softirq_action in softirq_vec array
>> + * @nr: softirq vector number
>>   *
>> - * The @h parameter, contains a pointer to the struct softirq_action
>> - * which has a pointer to the action handler that is called. By subtracting
>> - * the @vec pointer from the @h pointer, we can determine the softirq
>> - * number. Also, when used in combination with the softirq_exit tracepoint
>> + * Tracepoint for tracing when softirq action starts.
>> + * Also, when used in combination with the softirq_exit tracepoint
>>   * we can determine the softirq latency.
>>   */
>>  DEFINE_EVENT(softirq, softirq_entry,
>>  
>> -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
>> +	TP_PROTO(unsigned int nr),
>>  
>> -	TP_ARGS(h, vec)
>> +	TP_ARGS(nr)
>>  );
>>  
>>  /**
>>   * softirq_exit - called immediately after the softirq handler returns
>> - * @h: pointer to struct softirq_action
>> - * @vec: pointer to first struct softirq_action in softirq_vec array
>> + * @nr: softirq vector number
>>   *
>> - * The @h parameter contains a pointer to the struct softirq_action
>> - * that has handled the softirq. By subtracting the @vec pointer from
>> - * the @h pointer, we can determine the softirq number. Also, when used in
>> - * combination with the softirq_entry tracepoint we can determine the softirq
>> - * latency.
>> + * Tracepoint for tracing when softirq action ends.
>> + * Also, when used in combination with the softirq_entry tracepoint
>> + * we can determine the softirq latency.
>>   */
>>  DEFINE_EVENT(softirq, softirq_exit,
>>  
>> -	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
>> +	TP_PROTO(unsigned int nr),
>>  
>> -	TP_ARGS(h, vec)
>> +	TP_ARGS(nr)
>>  );
>>  
>>  #endif /*  _TRACE_IRQ_H */
>> diff --git a/kernel/softirq.c b/kernel/softirq.c
>> index 825e112..6790599 100644
>> --- a/kernel/softirq.c
>> +++ b/kernel/softirq.c
>> @@ -215,9 +215,9 @@ restart:
>>  			int prev_count = preempt_count();
>>  			kstat_incr_softirqs_this_cpu(h - softirq_vec);
>>  
>> -			trace_softirq_entry(h, softirq_vec);
>> +			trace_softirq_entry(h - softirq_vec);
>>  			h->action(h);
>> -			trace_softirq_exit(h, softirq_vec);
>> +			trace_softirq_exit(h - softirq_vec);
> 
> You're loosing information here by reducing the numbers of parameters in this
> tracepoint.  How many other tracepoint scripts rely on having both pointers
> handy?  Why not just do the pointer math inside your tracehook instead?

In __raise_softirq_irqoff macro there is no method to refer softirq_vec, so it
can't use softirq DECLARE_EVENT_CLASS as is.
Currently,  there is no script using softirq_entry or softirq_exit.

Thanks,
Koki Sanagi.

> 
>>  			if (unlikely(prev_count != preempt_count())) {
>>  				printk(KERN_ERR "huh, entered softirq %td %s %p"
>>  				       "with preempt_count %08x,"
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> 



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox