Netdev List

Netdev List
 help / color / mirror / Atom feed

* [patch 07/13] KSZ8851-SNL: Add ethtool support for EEPROM
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support, Simtec Linux Team
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: ks8851-add-eeprom-ethtool.patch --]
[-- Type: text/plain, Size: 6382 bytes --]

Add ethtool EEPROM read/write support for the KS8851 driver.

Depends on eeprom_93cx6 driver getting EEPROM write support.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>

---
 drivers/net/Kconfig  |    1 
 drivers/net/ks8851.c |  149 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/ks8851.h |    1 
 3 files changed, 151 insertions(+)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-29 01:01:31.118391091 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 01:22:42.667052653 +0900
@@ -19,6 +19,7 @@
 #include <linux/cache.h>
 #include <linux/crc32.h>
 #include <linux/mii.h>
+#include <linux/eeprom_93cx6.h>
 
 #include <linux/spi/spi.h>
 
@@ -78,6 +79,7 @@ union ks8851_tx_hdr {
  * @rc_ier: Cached copy of KS_IER.
  * @rc_ccr: Cached copy of KS_CCR.
  * @rc_rxqcr: Cached copy of KS_RXQCR.
+ * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM.
  *
  * The @lock ensures that the chip is protected when certain operations are
  * in progress. When the read or write packet transfer is in progress, most
@@ -125,6 +127,8 @@ struct ks8851_net {
 	struct spi_message	spi_msg2;
 	struct spi_transfer	spi_xfer1;
 	struct spi_transfer	spi_xfer2[2];
+
+	struct eeprom_93cx6	eeprom;
 };
 
 static int msg_enable;
@@ -1149,6 +1153,141 @@ static int ks8851_nway_reset(struct net_
 	return mii_nway_restart(&ks->mii);
 }
 
+/* EEPROM support */
+
+static void ks8851_eeprom_regread(struct eeprom_93cx6 *ee)
+{
+	struct ks8851_net *ks = ee->data;
+	unsigned val;
+
+	val = ks8851_rdreg16(ks, KS_EEPCR);
+
+	ee->reg_data_out = (val & EEPCR_EESB) ? 1 : 0;
+	ee->reg_data_clock = (val & EEPCR_EESCK) ? 1 : 0;
+	ee->reg_chip_select = (val & EEPCR_EECS) ? 1 : 0;
+}
+
+static void ks8851_eeprom_regwrite(struct eeprom_93cx6 *ee)
+{
+	struct ks8851_net *ks = ee->data;
+	unsigned val = EEPCR_EESA;	/* default - eeprom access on */
+
+	if (ee->drive_data)
+		val |= EEPRC_EESRW;
+	if (ee->reg_data_in)
+		val |= EEPCR_EEDO;
+	if (ee->reg_data_clock)
+		val |= EEPCR_EESCK;
+	if (ee->reg_chip_select)
+		val |= EEPCR_EECS;
+
+	printk(KERN_INFO "%s: wr %04x\n", __func__, val);
+	ks8851_wrreg16(ks, KS_EEPCR, val);
+}
+
+/**
+ * ks8851_eeprom_claim - claim device EEPROM and activate the interface
+ * @ks: The network deice state.
+ *
+ * Check for the presence of an EEPROM, and then activate software access
+ * to the device.
+ */
+static int ks8851_eeprom_claim(struct ks8851_net *ks)
+{
+	if (!(ks->rc_ccr & CCR_EEPROM))
+		return -ENOENT;
+
+	/* start with clock low, cs high */
+	ks8851_wrreg16(ks, KS_EEPCR, EEPCR_EESA | EEPCR_EECS);
+	return 0;
+}
+
+/**
+ * ks8851_eeprom_release - release the EEPROM interface
+ * @ks: The device state
+ *
+ * Release the software access to the device EEPROM
+ */
+static void ks8851_eeprom_release(struct ks8851_net *ks)
+{
+	unsigned val = ks8851_rdreg16(ks,KS_EEPCR);
+
+	ks8851_wrreg16(ks, KS_EEPCR, val & ~EEPCR_EESA);
+}
+
+#define KS_EEPROM_MAGIC (0x00008851)
+
+static int ks8851_set_eeprom(struct net_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	int offset = ee->offset;
+	int len = ee->len;
+	u16 tmp;
+
+	/* currently only support byte writing */
+	if (len != 1)
+		return -EINVAL;
+
+	if (ee->magic != KS_EEPROM_MAGIC)
+		return -EINVAL;
+
+	if (ks8851_eeprom_claim(ks))
+		return -ENOENT;
+
+	eeprom_93cx6_wren(&ks->eeprom, true);
+
+	/* ethtool currently only supports writing bytes, which means
+	 * we have to read/modify/write our 16bit EEPROMs */
+
+	eeprom_93cx6_read(&ks->eeprom, offset/2, &tmp);
+
+	if (offset & 1) {
+		tmp &= 0xff;
+		tmp |= *data << 8;
+	} else {
+		tmp &= 0xff00;
+		tmp |= *data;
+	}
+
+	eeprom_93cx6_write(&ks->eeprom, offset/2, tmp);
+	eeprom_93cx6_wren(&ks->eeprom, false);
+
+	ks8851_eeprom_release(ks);
+
+	return 0;
+}
+
+static int ks8851_get_eeprom(struct net_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	int offset = ee->offset;
+	int len = ee->len;
+
+	/* must be 2 byte aligned */
+	if (len & 1 || offset & 1)
+		return -EINVAL;
+
+	if (ks8851_eeprom_claim(ks))
+		return -ENOENT;
+
+	ee->magic = KS_EEPROM_MAGIC;
+
+	eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2);
+	ks8851_eeprom_release(ks);
+
+	return 0;
+}
+
+static int ks8851_get_eeprom_len(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+
+	/* currently, we assume it is an 93C46 attached, so return 128 */
+	return ks->rc_ccr & CCR_EEPROM ? 128 : 0;
+}
+
 static const struct ethtool_ops ks8851_ethtool_ops = {
 	.get_drvinfo	= ks8851_get_drvinfo,
 	.get_msglevel	= ks8851_get_msglevel,
@@ -1157,6 +1296,9 @@ static const struct ethtool_ops ks8851_e
 	.set_settings	= ks8851_set_settings,
 	.get_link	= ks8851_get_link,
 	.nway_reset	= ks8851_nway_reset,
+	.get_eeprom_len = ks8851_get_eeprom_len,
+	.get_eeprom	= ks8851_get_eeprom,
+	.set_eeprom	= ks8851_set_eeprom,
 };
 
 /* MII interface controls */
@@ -1305,6 +1447,13 @@ static int __devinit ks8851_probe(struct
 	spi_message_add_tail(&ks->spi_xfer2[0], &ks->spi_msg2);
 	spi_message_add_tail(&ks->spi_xfer2[1], &ks->spi_msg2);
 
+	/* setup EEPROM state */
+
+	ks->eeprom.data = ks;
+	ks->eeprom.width = PCI_EEPROM_WIDTH_93C46;
+	ks->eeprom.register_read = ks8851_eeprom_regread;
+	ks->eeprom.register_write = ks8851_eeprom_regwrite;
+
 	/* setup mii state */
 	ks->mii.dev		= ndev;
 	ks->mii.phy_id		= 1,
Index: b/drivers/net/Kconfig
===================================================================
--- a/drivers/net/Kconfig	2010-04-28 23:24:20.657052849 +0900
+++ b/drivers/net/Kconfig	2010-04-29 01:22:42.667052653 +0900
@@ -1766,6 +1766,7 @@ config KS8851
        depends on SPI
        select MII
 	select CRC32
+	select EEPROM_93CX6
        help
          SPI driver for Micrel KS8851 SPI attached network chip.
 
Index: b/drivers/net/ks8851.h
===================================================================
--- a/drivers/net/ks8851.h	2010-04-29 01:00:35.029526937 +0900
+++ b/drivers/net/ks8851.h	2010-04-29 01:22:42.667052653 +0900
@@ -25,6 +25,7 @@
 #define OBCR_ODS_16mA				(1 << 6)
 
 #define KS_EEPCR				0x22
+#define EEPRC_EESRW				(1 << 5)
 #define EEPCR_EESA				(1 << 4)
 #define EEPCR_EESB				(1 << 3)
 #define EEPCR_EEDO				(1 << 2)


^ permalink raw reply

* [patch 08/13] KSZ8851-SNL: Fix EEPROM access problem
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support, Tristram Ha
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: thirdparty/micrel/ksz8851-fix-eeprom.patch --]
[-- Type: text/plain, Size: 1304 bytes --]

From: Tristram Ha <Tristram.Ha@micrel.com>

Accessing the EEPROM when the device is receiving sometimes hangs the
system as the hardware is not locked down.

Signed-off-by: Tristram Ha <Tristram.Ha@micrel.com>
[ben@simtec.co.uk: fix description text]
Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---
---
 drivers/net/ks8851.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-29 01:22:42.667052653 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 01:22:47.779528264 +0900
@@ -1181,7 +1181,6 @@ static void ks8851_eeprom_regwrite(struc
 	if (ee->reg_chip_select)
 		val |= EEPCR_EECS;
 
-	printk(KERN_INFO "%s: wr %04x\n", __func__, val);
 	ks8851_wrreg16(ks, KS_EEPCR, val);
 }
 
@@ -1197,6 +1196,8 @@ static int ks8851_eeprom_claim(struct ks
 	if (!(ks->rc_ccr & CCR_EEPROM))
 		return -ENOENT;
 
+	mutex_lock(&ks->lock);
+
 	/* start with clock low, cs high */
 	ks8851_wrreg16(ks, KS_EEPCR, EEPCR_EESA | EEPCR_EECS);
 	return 0;
@@ -1213,6 +1214,7 @@ static void ks8851_eeprom_release(struct
 	unsigned val = ks8851_rdreg16(ks,KS_EEPCR);
 
 	ks8851_wrreg16(ks, KS_EEPCR, val & ~EEPCR_EESA);
+	mutex_unlock(&ks->lock);
 }
 
 #define KS_EEPROM_MAGIC (0x00008851)


^ permalink raw reply

* [patch 09/13] KSZ8851-SNL: Add debugfs export for driver state
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: ks8851-add-debugfs.patch --]
[-- Type: text/plain, Size: 3868 bytes --]

Add the ability to export the state of each network chip via debugfs
to show the cached register state and some of the network device state
information.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---
 drivers/net/ks8851.c |   94 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-29 01:22:47.779528264 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 01:25:54.217027162 +0900
@@ -21,6 +21,9 @@
 #include <linux/mii.h>
 #include <linux/eeprom_93cx6.h>
 
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
 #include <linux/spi/spi.h>
 
 #include "ks8851.h"
@@ -128,6 +131,9 @@ struct ks8851_net {
 	struct spi_transfer	spi_xfer1;
 	struct spi_transfer	spi_xfer2[2];
 
+	struct dentry		*debug_root;
+	struct dentry		*debug_file;
+
 	struct eeprom_93cx6	eeprom;
 };
 
@@ -1411,6 +1417,91 @@ static int ks8851_read_selftest(struct k
 	return 0;
 }
 
+/* debugfs code */
+static int state_show(struct seq_file *seq, void *v)
+{
+	struct ks8851_net *ks = seq->private;
+	struct net_device *ndev = ks->netdev;
+
+	seq_printf(seq, "Register cache:\n");
+	seq_printf(seq, "IEQ\t 0x%04x\n", ks->rc_ier);
+	seq_printf(seq, "RXQCR\t 0x%04x\n", ks->rc_rxqcr);
+	seq_printf(seq, "CCR\t 0x%04x\n", ks->rc_ccr);
+	seq_printf(seq, "RXCR1\t 0x%04x\n", ks->rxctrl.rxcr1);
+	seq_printf(seq, "RXCR2\t 0x%04x\n", ks->rxctrl.rxcr2);
+	seq_printf(seq, "MCHASH\t 0=0x%04x, 1=%04x, 2=0x%04x, 3=0x%04x\n",
+		   ks->rxctrl.mchash[0], ks->rxctrl.mchash[1],
+		   ks->rxctrl.mchash[2], ks->rxctrl.mchash[3]);
+
+	seq_printf(seq, "\n");
+
+	seq_printf(seq, "tx_space = 0x%04x\n", ks->tx_space);
+	seq_printf(seq, "tx fid\t= 0x%02x\n", ks->fid);
+
+	seq_printf(seq, "\n");
+
+	if (ndev->flags & IFF_MULTICAST) {
+		struct dev_mc_list *mcptr = ndev->mc_list;
+		int i;
+
+		seq_printf(seq, "MC list is %d entries\n", ndev->mc_count);
+
+		for (i = 0; i < ndev->mc_count; i++) {
+			seq_printf(seq, "\t%d: %pM\n", i, mcptr->dmi_addr);
+			mcptr = mcptr->next;
+		}
+	} else
+		seq_printf(seq, "No multicast list set\n");
+
+	return 0;
+}
+
+static int state_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, state_show, inode->i_private);
+}
+
+static const struct file_operations state_fops = {
+	.owner	= THIS_MODULE,
+	.open	= state_open,
+	.read	= seq_read,
+	.llseek	= seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * ks8851_create_debugfs - create debugfs directory and files
+ * @ks: The driver state
+ *
+ * Create the debugfs entries for the specific device.
+ */
+static void __devinit ks8851_create_debugfs(struct ks8851_net *ks)
+{
+	struct dentry *root;
+	char root_name[32];
+
+	snprintf(root_name, sizeof(root_name), "ks8851_%s",
+		 dev_name(&ks->spidev->dev));
+
+	root = debugfs_create_dir(root_name, NULL);
+	if (IS_ERR(root)) {
+		ks_err(ks, "cannot create debugfs root\n");
+		return;
+	}
+
+	ks->debug_root = root;
+	ks->debug_file = debugfs_create_file("state", 0444, root,
+					     ks, &state_fops);
+	if (IS_ERR(ks->debug_file))
+		ks_err(ks, "cannot create debugfs state file\n");
+}
+
+static void __devexit ks8851_delete_debugfs(struct ks8851_net *ks)
+{
+	debugfs_remove(ks->debug_file);
+	debugfs_remove(ks->debug_root);
+}
+
 /* driver bus management functions */
 
 static int __devinit ks8851_probe(struct spi_device *spi)
@@ -1518,6 +1609,8 @@ static int __devinit ks8851_probe(struct
 		 ndev->dev_addr, ndev->irq,
 		 ks->rc_ccr & CCR_EEPROM ? "has" : "no");
 
+	ks8851_create_debugfs(ks);
+
 	return 0;
 
 
@@ -1537,6 +1630,7 @@ static int __devexit ks8851_remove(struc
 	if (netif_msg_drv(priv))
 		dev_info(&spi->dev, "remove");
 
+	ks8851_delete_debugfs(priv);
 	unregister_netdev(priv->netdev);
 	free_irq(spi->irq, priv);
 	free_netdev(priv->netdev);


^ permalink raw reply

* [patch 00/13] Micrel KSZ8851 updates and fixes
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support

Please consider this set for the next merge window.

Included 93CX^ driver as didn't find anywhere better to send it.

-- 
Ben (ben@fluff.org, http://www.fluff.org/)

  'a smiley only costs 4 bytes'

^ permalink raw reply

* [patch 02/13] KSZ8851-SNL: Fix receive interrupt problem.
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support, Tristram Ha
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: thirdparty/micrel/ks8851-rx-fixeml.txt --]
[-- Type: text/plain, Size: 1438 bytes --]

From: Tristram Ha <Tristram.Ha@micrel.com>

This fixes a receive problem of the ks8851 snl network driver.

Under heavy TCP traffic the device will stop operating correctly. First
the receive interrupt is not triggered anymore.  After then the driver
cannot retrieve the correct packets from the device.  A workaround for
this problem is to disable the transmit done interrupt.

Signed-off-by: Tristram Ha <Tristram.Ha@micrel.com>
[ben@simtec.co.uk: cleaned up header]
Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---
---
 drivers/net/ks8851.c |   10 ++++++++++
 1 file changed, 10 insertions(+)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-28 23:24:20.737026841 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 00:51:12.489525834 +0900
@@ -553,6 +553,13 @@ static void ks8851_irq_work(struct work_
 
 	mutex_lock(&ks->lock);
 
+	/*
+	 * Turn off hardware interrupt during receive processing.  This fixes
+	 * the receive problem under heavy TCP traffic while transmit done
+	 * is enabled.
+	 */
+	ks8851_wrreg16(ks, KS_IER, 0);
+
 	status = ks8851_rdreg16(ks, KS_ISR);
 
 	if (netif_msg_intr(ks))
@@ -610,6 +617,9 @@ static void ks8851_irq_work(struct work_
 		ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1);
 	}
 
+	/* Re-enable hardware interrupt. */
+	ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
+
 	mutex_unlock(&ks->lock);
 
 	enable_irq(ks->netdev->irq);


^ permalink raw reply

* [patch 06/13] KSZ8851-SNL: Fix MAC address change problem
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support, Tristram Ha
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: thirdparty/micrel/ksz8851-fix-mac.patch --]
[-- Type: text/plain, Size: 2946 bytes --]

From: Tristram Ha <Tristram.Ha@micrel.com>

When device is off it is under power saving mode. Changing the MAC address
in that situation will result in the device not communicating as the first
write to the MAC address register is not executed.

Signed-off-by: Tristram Ha <Tristram.Ha@micrel.com>
[ben@simtec.co.uk: cleaned up header]
Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---
---
 drivers/net/ks8851.c |   30 ++++++++++++++++++++++++++++--
 drivers/net/ks8851.h |    2 +-
 2 files changed, 29 insertions(+), 3 deletions(-)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-29 01:00:15.519525666 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 01:01:31.118391091 +0900
@@ -345,6 +345,26 @@ static void ks8851_soft_reset(struct ks8
 }
 
 /**
+ * ks8851_set_powermode - set power mode of the device
+ * @ks: The device state
+ * @pwrmode: The power mode value to write to KS_PMECR.
+ *
+ * Change the power mode of the chip.
+ */
+static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode)
+{
+	unsigned pmecr;
+
+	netif_dbg(ks, hw, ks->netdev, "setting power mode %d\n", pwrmode);
+
+	pmecr = ks8851_rdreg16(ks, KS_PMECR);
+	pmecr &= ~PMECR_PM_MASK;
+	pmecr |= pwrmode;
+
+	ks8851_wrreg16(ks, KS_PMECR, pmecr);
+}
+
+/**
  * ks8851_write_mac_addr - write mac address to device registers
  * @dev: The network device
  *
@@ -360,8 +380,15 @@ static int ks8851_write_mac_addr(struct 
 
 	mutex_lock(&ks->lock);
 
+	/*
+	 * Wake up chip in case it was powered off when stopped; otherwise,
+	 * the first write to the MAC address does not take effect.
+	 */
+	ks8851_set_powermode(ks, PMECR_PM_NORMAL);
 	for (i = 0; i < ETH_ALEN; i++)
 		ks8851_wrreg8(ks, KS_MAR(i), dev->dev_addr[i]);
+	if (!netif_running(dev))
+		ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
 
 	mutex_unlock(&ks->lock);
 
@@ -1260,7 +1287,6 @@ static int __devinit ks8851_probe(struct
 
 	ks->netdev = ndev;
 	ks->spidev = spi;
-	ks->tx_space = 6144;
 
 	mutex_init(&ks->lock);
 	spin_lock_init(&ks->statelock);
@@ -1318,10 +1344,10 @@ static int __devinit ks8851_probe(struct
 
   	/* cache the contents of the CCR register for EEPROM, etc. */
   	ks->rc_ccr = ks8851_rdreg16(ks, KS_CCR);
+	ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
 
 	ks8851_read_selftest(ks);
 	ks8851_init_mac(ks);
-	ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
 
 	ret = request_irq(spi->irq, ks8851_irq, IRQF_TRIGGER_LOW,
 			  ndev->name, ks);
Index: b/drivers/net/ks8851.h
===================================================================
--- a/drivers/net/ks8851.h	2010-04-28 23:24:20.647026644 +0900
+++ b/drivers/net/ks8851.h	2010-04-29 01:00:35.029526937 +0900
@@ -16,7 +16,7 @@
 #define CCR_32PIN				(1 << 0)
 
 /* MAC address registers */
-#define KS_MAR(_m)				0x15 - (_m)
+#define KS_MAR(_m)				(0x15 - (_m))
 #define KS_MARL					0x10
 #define KS_MARM					0x12
 #define KS_MARH					0x14


^ permalink raw reply

* [patch 13/13] KSZ8851-SNL: Add platform data to specific IRQ trigger type.
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: ks8851-add-platform-data.patch --]
[-- Type: text/plain, Size: 3786 bytes --]

Add platform data to allow the board registering the SPI device to
pass what IRQ trigger type it needs to the driver. The default of
low-level trigger is used if no data is specified, or the field is
zero.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---
 drivers/net/ks8851.c   |   27 +++++++++++++++++++++++----
 include/linux/ks8851.h |   23 +++++++++++++++++++++++
 2 files changed, 46 insertions(+), 4 deletions(-)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-29 01:28:32.719525804 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 01:28:37.229527494 +0900
@@ -20,6 +20,7 @@
 #include <linux/crc32.h>
 #include <linux/mii.h>
 #include <linux/eeprom_93cx6.h>
+#include <linux/ks8851.h>
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -70,6 +71,7 @@ struct ks8851_rxctrl {
  * @rc_ccr: Cached copy of KS_CCR.
  * @rc_rxqcr: Cached copy of KS_RXQCR.
  * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM.
+ * @irq_flags: The IRQ flags passed to request_irq().
  *
  * The @lock ensures that the chip is protected when certain operations are
  * in progress. When the read or write packet transfer is in progress, most
@@ -439,6 +441,15 @@ static void ks8851_init_mac(struct ks885
 }
 
 /**
+ * is_level_irq() - return if the given IRQ flags are level triggered
+ * @flags: The flags passed to request_irq().
+*/
+static bool is_level_irq(unsigned flags)
+{
+	return flags & (IRQF_TIRGGER_LOW | IRQF_TRIGGER_HIGH);
+}
+
+/**
  * ks8851_irq - device interrupt handler
  * @irq: Interrupt number passed from the IRQ hnalder.
  * @pw: The private word passed to register_irq(), our struct ks8851_net.
@@ -450,7 +461,9 @@ static irqreturn_t ks8851_irq(int irq, v
 {
 	struct ks8851_net *ks = pw;
 
-	disable_irq_nosync(irq);
+	if (is_level_irq(ks->irq_flags))
+		disable_irq_nosync(irq);
+
 	schedule_work(&ks->irq_work);
 	return IRQ_HANDLED;
 }
@@ -674,7 +687,8 @@ static void ks8851_irq_work(struct work_
 
 	mutex_unlock(&ks->lock);
 
-	enable_irq(ks->netdev->irq);
+	if (is_level_irq(ks->irq_flags))
+		enable_irq(ks->netdev->irq);
 }
 
 /**
@@ -1493,6 +1507,7 @@ static void __devexit ks8851_delete_debu
 
 static int __devinit ks8851_probe(struct spi_device *spi)
 {
+	struct ks8851_pdata *pd = spi->dev.platform_data;
 	struct net_device *ndev;
 	struct ks8851_net *ks;
 	int ret;
@@ -1578,8 +1593,12 @@ static int __devinit ks8851_probe(struct
 	ks8851_read_selftest(ks);
 	ks8851_init_mac(ks);
 
-	ret = request_irq(spi->irq, ks8851_irq, IRQF_TRIGGER_LOW,
-			  ndev->name, ks);
+	if (pd && pd->irq_flags)
+		ks->irq_flags = pd->irq_flags;
+	else
+		ks->irq_flags = IRQF_TRIGGER_LOW;
+
+	ret = request_irq(spi->irq, ks8851_irq, ks->irq_flags, ndev->name, ks);
 	if (ret < 0) {
 		dev_err(&spi->dev, "failed to get irq\n");
 		goto err_irq;
Index: b/include/linux/ks8851.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/include/linux/ks8851.h	2010-04-29 01:28:37.000000000 +0900
@@ -0,0 +1,23 @@
+/* include/linux/ks8851.h
+ *
+ * Platform specific configuration data for KS8851 driver.
+ *
+ * Copyright 2009 Simtec Electronics
+ *	http://www.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * struct ks8851_pdata - platform specific configuration data
+ * @irq_flags: The IRQ trigger flags to pass to request_irq().
+ *
+ * Platform specific configuration to be passed from board support
+ * registering the spi device to the driver.
+ */
+struct ks8851_pdata {
+	unsigned	irq_flags;
+};


^ permalink raw reply

* [patch 05/13] KSZ8851-SNL: Add support for EEPROM MAC address
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: ks8851-mac-from-eeprom.patch --]
[-- Type: text/plain, Size: 3404 bytes --]

Add support for reading the MAC address from the system registers if there
is an EEPROM present. This involves caching the KS_CCR register for later
use (will also be useful for ETHTOOL support) and adding a print to say
that there is an EEPROM present.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---

---
 drivers/net/ks8851.c |   46 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

Index: b/drivers/net/ks8851.c
===================================================================
--- a/drivers/net/ks8851.c	2010-04-29 00:51:12.489525834 +0900
+++ b/drivers/net/ks8851.c	2010-04-29 00:52:02.377026762 +0900
@@ -76,6 +76,7 @@ union ks8851_tx_hdr {
  * @msg_enable: The message flags controlling driver output (see ethtool).
  * @fid: Incrementing frame id tag.
  * @rc_ier: Cached copy of KS_IER.
+ * @rc_ccr: Cached copy of KS_CCR.
  * @rc_rxqcr: Cached copy of KS_RXQCR.
  *
  * The @lock ensures that the chip is protected when certain operations are
@@ -107,6 +108,7 @@ struct ks8851_net {
 
 	u16			rc_ier;
 	u16			rc_rxqcr;
+	u16			rc_ccr;
 
 	struct mii_if_info	mii;
 	struct ks8851_rxctrl	rxctrl;
@@ -367,21 +369,47 @@ static int ks8851_write_mac_addr(struct 
 }
 
 /**
+ * ks8851_read_mac_addr - read mac address from device registers
+ * @dev: The network device
+ *
+ * Update our copy of the KS8851 MAC address from the registers of @dev.
+*/
+static void ks8851_read_mac_addr(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	int i;
+
+	mutex_lock(&ks->lock);
+
+	for (i = 0; i < ETH_ALEN; i++)
+		dev->dev_addr[i] = ks8851_rdreg8(ks, KS_MAR(i));
+
+	mutex_unlock(&ks->lock);
+}
+
+/**
  * ks8851_init_mac - initialise the mac address
  * @ks: The device structure
  *
  * Get or create the initial mac address for the device and then set that
- * into the station address register. Currently we assume that the device
- * does not have a valid mac address in it, and so we use random_ether_addr()
+ * into the station address register. If there is an EEPROM present, then
+ * we try that. If no valid mac address is found we use random_ether_addr()
  * to create a new one.
- *
- * In future, the driver should check to see if the device has an EEPROM
- * attached and whether that has a valid ethernet address in it.
  */
 static void ks8851_init_mac(struct ks8851_net *ks)
 {
 	struct net_device *dev = ks->netdev;
 
+	/* first, try reading what we've got already */
+	if (ks->rc_ccr & CCR_EEPROM) {
+		ks8851_read_mac_addr(dev);
+		if (is_valid_ether_addr(dev->dev_addr))
+			return;
+
+		ks_err(ks, "invalid mac address read %pM\n",
+			dev->dev_addr);
+	}
+
 	random_ether_addr(dev->dev_addr);
 	ks8851_write_mac_addr(dev);
 }
@@ -1288,6 +1316,9 @@ static int __devinit ks8851_probe(struct
 		goto err_id;
 	}
 
+  	/* cache the contents of the CCR register for EEPROM, etc. */
+  	ks->rc_ccr = ks8851_rdreg16(ks, KS_CCR);
+
 	ks8851_read_selftest(ks);
 	ks8851_init_mac(ks);
 	ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
@@ -1305,9 +1336,10 @@ static int __devinit ks8851_probe(struct
 		goto err_netdev;
 	}
 
-	dev_info(&spi->dev, "revision %d, MAC %pM, IRQ %d\n",
+	dev_info(&spi->dev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n",
 		 CIDER_REV_GET(ks8851_rdreg16(ks, KS_CIDER)),
-		 ndev->dev_addr, ndev->irq);
+		 ndev->dev_addr, ndev->irq,
+		 ks->rc_ccr & CCR_EEPROM ? "has" : "no");
 
 	return 0;
 


^ permalink raw reply

* [patch 11/13] KS8851: Update ks8851.h header from ks8851_mll.c
From: Ben Dooks @ 2010-04-29 23:16 UTC (permalink / raw)
  To: netdev; +Cc: tristram.ha, support
In-Reply-To: <20100429231621.015936077@fluff.org.uk>

[-- Attachment #1: ks8851-reduce-header-duplication2.patch --]
[-- Type: text/plain, Size: 4272 bytes --]

Move more useful definitions from ks8851_mll.c into ks8851.h and include
a brief copyright update from the mll source file. Also mop up a few
definitions that got missed in the first patch.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>

---

---
 drivers/net/ks8851.h     |   27 +++++++++++++++++++++++++++
 drivers/net/ks8851_mll.c |   40 +---------------------------------------
 2 files changed, 28 insertions(+), 39 deletions(-)

Index: b/drivers/net/ks8851.h
===================================================================
--- a/drivers/net/ks8851.h	2010-04-20 18:38:38.000000000 +0100
+++ b/drivers/net/ks8851.h	2010-04-20 18:39:02.000000000 +0100
@@ -3,6 +3,9 @@
  * Copyright 2009 Simtec Electronics
  *      Ben Dooks <ben@simtec.co.uk>
  *
+ * portions from drivers/net/ks8851_mll.c,
+ *	Copyright (c) 2009 Micrel Inc.
+ *
  * KS8851 register definitions
  *
  * This program is free software; you can redistribute it and/or modify
@@ -13,6 +16,10 @@
 #define KS_CCR					0x08
 #define CCR_EEPROM				(1 << 9)
 #define CCR_SPI					(1 << 8)
+#define CCR_8BIT				(1 << 7)
+#define CCR_16BIT				(1 << 6)
+#define CCR_32BIT				(1 << 5)
+#define CCR_SHARED				(1 << 4)
 #define CCR_32PIN				(1 << 0)
 
 /* MAC address registers */
@@ -250,7 +257,13 @@
 
 /* Standard MII PHY data */
 #define KS_P1MBCR				0xE4
+#define P1MBCR_FORCE_FDX			(1 << 8)
+
 #define KS_P1MBSR				0xE6
+#define P1MBSR_AN_COMPLETE			(1 << 5)
+#define P1MBSR_AN_CAPABLE			(1 << 3)
+#define P1MBSR_LINK_UP				(1 << 2)
+
 #define KS_PHY1ILR				0xE8
 #define KS_PHY1IHR				0xEA
 #define KS_P1ANAR				0xEC
@@ -285,6 +298,20 @@
 #define P1CR_PNTR_10BT_FDX			(1 << 1)
 #define P1CR_PNTR_10BT_HDX			(1 << 0)
 
+#define KS_P1SR					0xF8
+#define P1SR_HP_MDIX				(1 << 15)
+#define P1SR_REV_POL				(1 << 13)
+#define P1SR_OP_100M				(1 << 10)
+#define P1SR_OP_FDX				(1 << 9)
+#define P1SR_OP_MDI				(1 << 7)
+#define P1SR_AN_DONE				(1 << 6)
+#define P1SR_LINK_GOOD				(1 << 5)
+#define P1SR_PNTR_FLOW				(1 << 4)
+#define P1SR_PNTR_100BT_FDX			(1 << 3)
+#define P1SR_PNTR_100BT_HDX			(1 << 2)
+#define P1SR_PNTR_10BT_FDX			(1 << 1)
+#define P1SR_PNTR_10BT_HDX			(1 << 0)
+
 /* TX Frame control */
 
 #define TXFR_TXIC				(1 << 15)
Index: b/drivers/net/ks8851_mll.c
===================================================================
--- a/drivers/net/ks8851_mll.c	2010-04-20 18:38:42.000000000 +0100
+++ b/drivers/net/ks8851_mll.c	2010-04-20 18:39:02.000000000 +0100
@@ -43,13 +43,6 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0
 #define TX_BUF_SIZE			2000
 #define RX_BUF_SIZE			2000
 
-#define CCR_8BIT			(1 << 7)
-#define CCR_16BIT			(1 << 6)
-#define CCR_32BIT			(1 << 5)
-#define CCR_SHARED			(1 << 4)
-
-#define OBCR_ODS_16MA			(1 << 6)
-
 #define RXCR1_FILTER_MASK    		(RXCR1_RXINVF | RXCR1_RXAE | \
 					 RXCR1_RXMAFMA | RXCR1_RXPAFMA)
 
@@ -65,41 +58,10 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0
 
 #define RXQCR_CMD_CNTL                	(RXQCR_RXFCTE|RXQCR_ADRFE)
 
-#define KS_TXFDPR			0x84
-#define TXFDPR_TXFPAI			(1 << 14)
-#define TXFDPR_TXFP_MASK		(0x7ff << 0)
-#define TXFDPR_TXFP_SHIFT		(0)
-
-#define KS_RXFDPR			0x86
-#define RXFDPR_RXFPAI			(1 << 14)
-
 #define RXFCTR_THRESHOLD_MASK     	0x00FF
 
-#define P1MBCR_FORCE_FDX		(1 << 8)
-
-#define P1MBSR_AN_COMPLETE		(1 << 5)
-#define P1MBSR_AN_CAPABLE		(1 << 3)
-#define P1MBSR_LINK_UP			(1 << 2)
-
 /* TX Frame control */
 
-#define TXFR_TXIC			(1 << 15)
-#define TXFR_TXFID_MASK			(0x3f << 0)
-#define TXFR_TXFID_SHIFT		(0)
-
-#define KS_P1SR				0xF8
-#define P1SR_HP_MDIX			(1 << 15)
-#define P1SR_REV_POL			(1 << 13)
-#define P1SR_OP_100M			(1 << 10)
-#define P1SR_OP_FDX			(1 << 9)
-#define P1SR_OP_MDI			(1 << 7)
-#define P1SR_AN_DONE			(1 << 6)
-#define P1SR_LINK_GOOD			(1 << 5)
-#define P1SR_PNTR_FLOW			(1 << 4)
-#define P1SR_PNTR_100BT_FDX		(1 << 3)
-#define P1SR_PNTR_100BT_HDX		(1 << 2)
-#define P1SR_PNTR_10BT_FDX		(1 << 1)
-#define P1SR_PNTR_10BT_HDX		(1 << 0)
 
 #define	ENUM_BUS_NONE			0
 #define	ENUM_BUS_8BIT			1
@@ -1362,7 +1324,7 @@ static int __devinit ks8851_probe(struct
 	memcpy(netdev->dev_addr, ks->mac_addr, 6);
 
 	data = ks_rdreg16(ks, KS_OBCR);
-	ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16MA);
+	ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA);
 
 	/**
 	 * If you want to use the default MAC addr,


^ permalink raw reply

* Re: [PATCH linux-next v4 2/2] ixgbe: Example usage of the new IRQ affinity_hint callback
From: Thomas Gleixner @ 2010-04-30 21:47 UTC (permalink / raw)
  To: Peter P Waskiewicz Jr; +Cc: davem, arjan, bhutchings, netdev, linux-kernel
In-Reply-To: <20100430214517.3992.92545.stgit@ppwaskie-hc2.jf.intel.com>

On Fri, 30 Apr 2010, Peter P Waskiewicz Jr wrote:
> +	for (i = 0; i < num_q_vectors; i++) {
> +		struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
> +		/* release the CPU mask memory */
> +		free_cpumask_var(q_vector->affinity_mask);
> +		/* clear the affinity_mask in the IRQ descriptor */
> +		irq_set_affinity_hint(adapter->msix_entries[i].vector, NULL);

Freeing the mask _AFTER_ clearing the hint might be a worthwhile
exercise :)

Thanks,

	tglx

^ permalink raw reply

* [PATCH linux-next v4 2/2] ixgbe: Example usage of the new IRQ affinity_hint callback
From: Peter P Waskiewicz Jr @ 2010-04-30 21:45 UTC (permalink / raw)
  To: tglx, davem, arjan, bhutchings; +Cc: netdev, linux-kernel
In-Reply-To: <20100430214445.3992.41647.stgit@ppwaskie-hc2.jf.intel.com>

This patch uses the new IRQ affinity_hint callback mechanism.
It serves purely as an example of how a low-level driver can
utilize this new interface.

An official ixgbe patch will be pushed through netdev once the
IRQ patches have been accepted and merged.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---

 drivers/net/ixgbe/ixgbe.h      |    2 ++
 drivers/net/ixgbe/ixgbe_main.c |   20 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 79c35ae..c220b9f 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -32,6 +32,7 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/aer.h>
+#include <linux/cpumask.h>
 
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
@@ -236,6 +237,7 @@ struct ixgbe_q_vector {
 	u8 tx_itr;
 	u8 rx_itr;
 	u32 eitr;
+	cpumask_var_t affinity_mask;
 };
 
 /* Helper macros to switch between ints/sec and what the register uses.
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1b1419c..c13b932 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1083,6 +1083,16 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 			q_vector->eitr = adapter->rx_eitr_param;
 
 		ixgbe_write_eitr(q_vector);
+
+		/*
+		 * Allocate the affinity_hint cpumask, assign the mask for
+		 * this vector, and set our affinity_hint for this irq.
+		 */
+		if (!alloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL))
+			return;
+		cpumask_set_cpu(v_idx, q_vector->affinity_mask);
+		irq_set_affinity_hint(adapter->msix_entries[v_idx].vector,
+		                      q_vector->affinity_mask);
 	}
 
 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
@@ -3218,7 +3228,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 rxctrl;
 	u32 txdctl;
-	int i, j;
+	int i, j, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
 	/* signal that we are down to the interrupt handler */
 	set_bit(__IXGBE_DOWN, &adapter->state);
@@ -3251,6 +3261,14 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 
 	ixgbe_napi_disable_all(adapter);
 
+	for (i = 0; i < num_q_vectors; i++) {
+		struct ixgbe_q_vector *q_vector = adapter->q_vector[i];
+		/* release the CPU mask memory */
+		free_cpumask_var(q_vector->affinity_mask);
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(adapter->msix_entries[i].vector, NULL);
+	}
+
 	clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
 	del_timer_sync(&adapter->sfp_timer);
 	del_timer_sync(&adapter->watchdog_timer);


^ permalink raw reply related

* [PATCH linux-next v4 1/2] irq: Add CPU mask affinity hint
From: Peter P Waskiewicz Jr @ 2010-04-30 21:44 UTC (permalink / raw)
  To: tglx, davem, arjan, bhutchings; +Cc: netdev, linux-kernel

This patch adds a cpumask affinity hint to the irq_desc
structure, along with a registration function and a read-only
proc entry for each interrupt.

This affinity_hint handle for each interrupt can be used by
underlying drivers that need a better mechanism to control
interrupt affinity.  The underlying driver can register a
cpumask for the interrupt, which will allow the driver to
provide the CPU mask for the interrupt to anything that
requests it.  The intent is to extend the userspace daemon,
irqbalance, to help hint to it a preferred CPU mask to balance
the interrupt into.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---

 include/linux/interrupt.h |    6 ++++++
 include/linux/irq.h       |    1 +
 kernel/irq/manage.c       |   19 +++++++++++++++++++
 kernel/irq/proc.c         |   39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 0 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00..4ae3b2d 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -209,6 +209,7 @@ extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
+extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -223,6 +224,11 @@ static inline int irq_can_set_affinity(unsigned int irq)
 
 static inline int irq_select_affinity(unsigned int irq)  { return 0; }
 
+static inline int irq_set_affinity_hint(unsigned int irq,
+                                        const struct cpumask *m)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 707ab12..83b16d7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -206,6 +206,7 @@ struct irq_desc {
 	struct proc_dir_entry	*dir;
 #endif
 	const char		*name;
+	struct cpumask		*affinity_hint;
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488..1451c38 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	return 0;
 }
 
+int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc->affinity_hint = m;
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
  * Generic version of the affinity autoselector.
@@ -916,6 +932,9 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 			desc->chip->disable(irq);
 	}
 
+	/* make sure affinity_hint is cleaned up */
+	desc->affinity_hint = NULL;
+
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	unregister_handler_proc(irq, action);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7a6eb04..e1e7408 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,29 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
+{
+	struct irq_desc *desc = irq_to_desc((long)m->private);
+	unsigned long flags;
+	cpumask_var_t mask;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	if (desc->affinity_hint)
+		cpumask_copy(mask, desc->affinity_hint);
+	else
+		cpumask_setall(mask);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	seq_cpumask(m, mask);
+	seq_putc(m, '\n');
+	free_cpumask_var(mask);
+
+	return 0;
+}
+
 #ifndef is_affinity_mask_valid
 #define is_affinity_mask_valid(val) 1
 #endif
@@ -84,6 +107,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
+}
+
 static const struct file_operations irq_affinity_proc_fops = {
 	.open		= irq_affinity_proc_open,
 	.read		= seq_read,
@@ -92,6 +120,13 @@ static const struct file_operations irq_affinity_proc_fops = {
 	.write		= irq_affinity_proc_write,
 };
 
+static const struct file_operations irq_affinity_hint_proc_fops = {
+	.open		= irq_affinity_hint_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -231,6 +266,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	/* create /proc/irq/<irq>/smp_affinity */
 	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
+
+	/* create /proc/irq/<irq>/affinity_hint */
+	proc_create_data("affinity_hint", 0400, desc->dir,
+			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
 #endif
 
 	proc_create_data("spurious", 0444, desc->dir,


^ permalink raw reply related

* possible off by one error in drivers/isdn/hysdn/hysdn_proclog.c
From: d binderman @ 2010-04-30 21:23 UTC (permalink / raw)
  To: isdn; +Cc: netdev



Hello there,

I've just been looking at the Linux kernel linux-2.6.34-rc6.  I notice the
source code in file drivers/isdn/hysdn/hysdn_proclog.c, around line 115 is

        if (!(ib = kmalloc(sizeof(struct log_data) + strlen(cp), GFP_ATOMIC)))
                 return;        /* no memory */
        strcpy(ib->log_start, cp);      /* set output string */


Shouldn't that be

        if (!(ib = kmalloc(sizeof(struct log_data) + strlen(cp) + 1, GFP_ATOMIC)))
                 return;        /* no memory */
        strcpy(ib->log_start, cp);      /* set output string */

+1 for the zero byte ?

Regards

David Binderman

 		 	   		  
_________________________________________________________________
http://clk.atdmt.com/UKM/go/195013117/direct/01/
We want to hear all your funny, exciting and crazy Hotmail stories. Tell us now

^ permalink raw reply

* Re: [PATCH] [RFC] C/R: inet4 and inet6 unicast routes (v2)
From: Dan Smith @ 2010-04-30 21:24 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: containers, Vlad Yasevich, netdev, David Miller
In-Reply-To: <4BDB3F07.2030900@free.fr>

DL> Is it possible to enter the namespace and dump / restore the
DL> routes with NETLINK_ROUTE from userspace ? Or is it something not
DL> possible ?

I'm sure it would be doable.  However, checkpointing the routes that
way would:

(a) Be inconsistent with how we checkpoint all the other resources,
    including the other network resources we handle from the kernel
    with rtnl
(b) Require merging of the data from the resources saved in userspace
    with those saved in kernelspace
(c) Eliminate the ability for an application to easily checkpoint
    itself by making a single syscall
(d) Require this same sort of jumping back and forth between
    namespaces by the userspace task doing the checkpoint/restart

-- 
Dan Smith
IBM Linux Technology Center
email: danms@us.ibm.com

^ permalink raw reply

* Re: [PATCH linux-next v3 1/2] irq: Add CPU mask affinity hint
From: Peter P Waskiewicz Jr @ 2010-04-30 21:18 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Ben Hutchings, davem@davemloft.net, arjan@linux.jf.intel.com,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <alpine.LFD.2.00.1004302311550.2951@localhost.localdomain>

On Fri, 30 Apr 2010, Thomas Gleixner wrote:

> On Fri, 30 Apr 2010, Ben Hutchings wrote:
>> On Fri, 2010-04-30 at 13:23 -0700, Peter P Waskiewicz Jr wrote:
>>> +int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
>>> +{
>>> +	struct irq_desc *desc = irq_to_desc(irq);
>>> +	unsigned long flags;
>>> +
>>> +	if (!desc)
>>> +		return -EINVAL;
>>
>> Is it possible for irq_to_desc(irq) to be NULL?  This function already
>> assumes that the caller 'owns' the IRQ.
>
> Oh come on. Driver writers get everything wrong and not checking on an
> invalid irq number is better than crashing :)
>
>>> +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
>>> +{
>>> +	struct irq_desc *desc = irq_to_desc((long)m->private);
>>> +	unsigned long flags;
>>> +	cpumask_var_t mask;
>>> +	int ret = -EINVAL;
>>
>> I don't think this should be returning -EINVAL if the affinity hint is
>> missing.  That means 'invalid argument', but there is nothing invalid
>> about trying to read() the corresponding file.  The file should simply
>> be empty if there is no hint.  (Actually it might be better if it didn't
>> appear at all, but that would be a pain to implement.)
>
> I agree that -EINVAL is not really a good match.
>
> How about just returning CPU_MASK_ALL if desc->affinity_hint is not
> set ?

That seems reasonable to me.

cheers,
-PJ

^ permalink raw reply

* Re: [PATCH linux-next v3 1/2] irq: Add CPU mask affinity hint
From: Thomas Gleixner @ 2010-04-30 21:17 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Peter P Waskiewicz Jr, davem, arjan, netdev, linux-kernel
In-Reply-To: <1272661345.2110.28.camel@achroite.uk.solarflarecom.com>

On Fri, 30 Apr 2010, Ben Hutchings wrote:
> On Fri, 2010-04-30 at 13:23 -0700, Peter P Waskiewicz Jr wrote:
> > +int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
> > +{
> > +	struct irq_desc *desc = irq_to_desc(irq);
> > +	unsigned long flags;
> > +
> > +	if (!desc)
> > +		return -EINVAL;
> 
> Is it possible for irq_to_desc(irq) to be NULL?  This function already
> assumes that the caller 'owns' the IRQ.

Oh come on. Driver writers get everything wrong and not checking on an
invalid irq number is better than crashing :)

> > +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
> > +{
> > +	struct irq_desc *desc = irq_to_desc((long)m->private);
> > +	unsigned long flags;
> > +	cpumask_var_t mask;
> > +	int ret = -EINVAL;
> 
> I don't think this should be returning -EINVAL if the affinity hint is
> missing.  That means 'invalid argument', but there is nothing invalid
> about trying to read() the corresponding file.  The file should simply
> be empty if there is no hint.  (Actually it might be better if it didn't
> appear at all, but that would be a pain to implement.)

I agree that -EINVAL is not really a good match.

How about just returning CPU_MASK_ALL if desc->affinity_hint is not
set ?

Thanks,

	tglx

^ permalink raw reply

* Re: r8169 INFO: inconsistent lock state
From: Francois Romieu @ 2010-04-30 21:15 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Sergey Senozhatsky, Oleg Nesterov, David Miller, Ingo Molnar,
	Peter Zijlstra, netdev, linux-kernel
In-Reply-To: <1272661149.2230.11.camel@edumazet-laptop>

Eric Dumazet <eric.dumazet@gmail.com> :
[...]
> So we have following illegal chain (process context, not softirq)
> 
> rtl8169_reset_task() -> tl8169_rx_interrupt() -> netif_receive_skb()
> 
> And normally, commit 630b943c tried to change this chain to :
> 
> rtl8169_reset_task() -> tl8169_rx_interrupt() -> netif_rx()
> 
> I have no idea why it doesnt work.

630b943c appears to be in net-next.

Oops ?

-- 
Ueimor

^ permalink raw reply

* RE: ixgbe and mac-vlans problem
From: Tantilov, Emil S @ 2010-04-30 21:13 UTC (permalink / raw)
  To: Ben Greear, Arnd Bergmann; +Cc: NetDev, Patrick McHardy
In-Reply-To: <4BDB1CF3.8030906@candelatech.com>

Ben Greear wrote:
> On 04/30/2010 11:00 AM, Arnd Bergmann wrote:
>> On Friday 30 April 2010 00:27:39 Ben Greear wrote:
>>> Basically, we create 50 mac-vlans, with sequential MAC addresses
>>> and sequential IP addresses, and set up ip rules properly.
>>> 
>>> The issue is that only 10 or so of the mac-vlans receive other than
>>> broadcast packets.  The ixgbe NIC doesn't show PROMISC mode.
>> 
>> I just took a brief look at the driver and noticed that 82599 should
>> be able to handle 128 entries before going into promisc mode, while
>> 82598 (the same driver) does 16.
>> 
>> Maybe the logic for>16 entries is wrong, so you could try forcing
>> hw->mac.num_rar_entries to 16 for 82599 as well.
> 
> I think I was actually on an 825998 system when I saw it yesterday,
> but I have seen similar issues on 82599, though I didn't take time
> to debug it fully, so it could have been something else.
> 
> I will double-check the NIC chipset on the system that showed the
> problem yesterday.

I ran a quick test in my setup with 82599 and was able to pass traffic 
on all 50 mac-vlans without issues. This is on net-next.

Thanks,
Emil


^ permalink raw reply

* Re: [PATCH linux-next v3 1/2] irq: Add CPU mask affinity hint
From: Ben Hutchings @ 2010-04-30 21:02 UTC (permalink / raw)
  To: Peter P Waskiewicz Jr; +Cc: tglx, davem, arjan, netdev, linux-kernel
In-Reply-To: <20100430202343.4591.66240.stgit@ppwaskie-hc2.jf.intel.com>

On Fri, 2010-04-30 at 13:23 -0700, Peter P Waskiewicz Jr wrote:
> This patch adds a cpumask affinity hint to the irq_desc
> structure, along with a registration function and a read-only
> proc entry for each interrupt.
> 
> This affinity_hint handle for each interrupt can be used by
> underlying drivers that need a better mechanism to control
> interrupt affinity.  The underlying driver can register a
> cpumask for the interrupt, which will allow the driver to
> provide the CPU mask for the interrupt to anything that
> requests it.  The intent is to extend the userspace daemon,
> irqbalance, to help hint to it a preferred CPU mask to balance
> the interrupt into.
> 
> Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
[...]
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 704e488..1354fc9 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
>  	return 0;
>  }
>  
> +int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
> +{
> +	struct irq_desc *desc = irq_to_desc(irq);
> +	unsigned long flags;
> +
> +	if (!desc)
> +		return -EINVAL;

Is it possible for irq_to_desc(irq) to be NULL?  This function already
assumes that the caller 'owns' the IRQ.

> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	desc->affinity_hint = m;
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> +
> +	return 0;
> +}
[...]
> diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
> index 7a6eb04..1aa7939 100644
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -32,6 +32,32 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
>  	return 0;
>  }
>  
> +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
> +{
> +	struct irq_desc *desc = irq_to_desc((long)m->private);
> +	unsigned long flags;
> +	cpumask_var_t mask;
> +	int ret = -EINVAL;
> +
> +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	if (desc->affinity_hint) {
> +		cpumask_copy(mask, desc->affinity_hint);
> +		ret = 0;
> +	}
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> +
> +	if (!ret) {
> +		seq_cpumask(m, mask);
> +		seq_putc(m, '\n');
> +	}
> +	free_cpumask_var(mask);
> +
> +	return ret;
> +}
[...]

I don't think this should be returning -EINVAL if the affinity hint is
missing.  That means 'invalid argument', but there is nothing invalid
about trying to read() the corresponding file.  The file should simply
be empty if there is no hint.  (Actually it might be better if it didn't
appear at all, but that would be a pain to implement.)

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: r8169 INFO: inconsistent lock state
From: Eric Dumazet @ 2010-04-30 20:59 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Oleg Nesterov, David Miller, Ingo Molnar, Francois Romieu,
	Peter Zijlstra, netdev, linux-kernel
In-Reply-To: <20100430182012.GA3329@swordfish.minsk.epam.com>

Le vendredi 30 avril 2010 à 21:20 +0300, Sergey Senozhatsky a écrit :
> Hello,
> 
> Yet another one (during resume):
> 
> kernel: [ 1968.334646] 
> kernel: [ 1968.334648] =================================
> kernel: [ 1968.334651] [ INFO: inconsistent lock state ]
> kernel: [ 1968.334654] 2.6.34-rc6-dbg #105
> kernel: [ 1968.334656] ---------------------------------
> kernel: [ 1968.334659] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
> kernel: [ 1968.334663] events/1/3854 [HC0[0]:SC0[0]:HE1:SE1] takes:
> kernel: [ 1968.334666]  (&(&table->hash[i].lock)->rlock){+.?...}, at: [<c1292ec4>] __udp4_lib_mcast_deliver+0x3c/0x143
> kernel: [ 1968.334678] {IN-SOFTIRQ-W} state was registered at:
> kernel: [ 1968.334681]   [<c104fc8d>] __lock_acquire+0x2ba/0xc01
> kernel: [ 1968.334688]   [<c10509df>] lock_acquire+0x5e/0x75
> kernel: [ 1968.334693]   [<c12c366a>] _raw_spin_lock+0x28/0x58
> kernel: [ 1968.334699]   [<c1292ec4>] __udp4_lib_mcast_deliver+0x3c/0x143
> kernel: [ 1968.334704]   [<c12931a7>] __udp4_lib_rcv+0x1dc/0x3ac
> kernel: [ 1968.334708]   [<c1293389>] udp_rcv+0x12/0x14
> kernel: [ 1968.334713]   [<c127605f>] ip_local_deliver_finish+0xd2/0x137
> kernel: [ 1968.334719]   [<c12760fc>] NF_HOOK.clone.1+0x38/0x3f
> kernel: [ 1968.334724]   [<c1276220>] ip_local_deliver+0x3c/0x42
> kernel: [ 1968.334728]   [<c1275f2e>] ip_rcv_finish+0x25c/0x27e
> kernel: [ 1968.334733]   [<c12760fc>] NF_HOOK.clone.1+0x38/0x3f
> kernel: [ 1968.334737]   [<c12763c9>] ip_rcv+0x1a3/0x1c6
> kernel: [ 1968.334741]   [<c12593d7>] netif_receive_skb+0x38b/0x3ab
> kernel: [ 1968.334747]   [<fd20f911>] rtl8169_rx_interrupt+0x2de/0x3eb [r8169]
> kernel: [ 1968.334756]   [<fd211cde>] rtl8169_poll+0x28/0x15d [r8169]
> kernel: [ 1968.334763]   [<c12596b3>] net_rx_action+0x93/0x181
> kernel: [ 1968.334767]   [<c1032a72>] __do_softirq+0x88/0x10c
> kernel: [ 1968.334773]   [<c1032b25>] do_softirq+0x2f/0x47
> kernel: [ 1968.334778]   [<c1032de2>] irq_exit+0x38/0x75
> kernel: [ 1968.334782]   [<c1004489>] do_IRQ+0x79/0x8d
> kernel: [ 1968.334787]   [<c1002db5>] common_interrupt+0x35/0x3c
> kernel: [ 1968.334791]   [<c1246f43>] cpuidle_idle_call+0x6a/0xa0
> kernel: [ 1968.334799]   [<c100171b>] cpu_idle+0x89/0xbe
> kernel: [ 1968.334802]   [<c12b3d49>] rest_init+0xd1/0xd6
> kernel: [ 1968.334807]   [<c147e7bd>] start_kernel+0x339/0x33e
> kernel: [ 1968.334813]   [<c147e0c9>] i386_start_kernel+0xc9/0xd0
> kernel: [ 1968.334818] irq event stamp: 63
> kernel: [ 1968.334820] hardirqs last  enabled at (63): [<c109d7ff>] kmem_cache_free+0x83/0x8f
> kernel: [ 1968.334828] hardirqs last disabled at (62): [<c109d7a6>] kmem_cache_free+0x2a/0x8f
> kernel: [ 1968.334833] softirqs last  enabled at (60): [<c126400a>] rcu_read_unlock_bh+0x1c/0x1e
> kernel: [ 1968.334839] softirqs last disabled at (58): [<c1263faf>] rcu_read_lock_bh+0x8/0x26
> kernel: [ 1968.334845] 
> kernel: [ 1968.334846] other info that might help us debug this:
> kernel: [ 1968.334849] 5 locks held by events/1/3854:
> kernel: [ 1968.334851]  #0:  (events){+.+.+.}, at: [<c103c8e9>] worker_thread+0x128/0x23c
> kernel: [ 1968.334859]  #1:  ((&(&tp->task)->work)){+.+...}, at: [<c103c8e9>] worker_thread+0x128/0x23c
> kernel: [ 1968.334865]  #2:  (rtnl_mutex){+.+.+.}, at: [<c1262b8f>] rtnl_lock+0xf/0x11
> kernel: [ 1968.334871]  #3:  (rcu_read_lock){.+.+..}, at: [<c125784b>] rcu_read_lock+0x0/0x2b
> kernel: [ 1968.334877]  #4:  (rcu_read_lock){.+.+..}, at: [<c1275c56>] rcu_read_lock+0x0/0x2b
> kernel: [ 1968.334884] 
> kernel: [ 1968.334885] stack backtrace:
> kernel: [ 1968.334888] Pid: 3854, comm: events/1 Not tainted 2.6.34-rc6-dbg #105
> kernel: [ 1968.334891] Call Trace:
> kernel: [ 1968.334895]  [<c12c1906>] ? printk+0xf/0x11
> kernel: [ 1968.334901]  [<c104e7d9>] valid_state+0x133/0x141
> kernel: [ 1968.334906]  [<c104e8b6>] mark_lock+0xcf/0x1bc
> kernel: [ 1968.334911]  [<c104e11f>] ? check_usage_backwards+0x0/0x72
> kernel: [ 1968.334915]  [<c104fcff>] __lock_acquire+0x32c/0xc01
> kernel: [ 1968.334922]  [<c129ee2d>] ? fib_table_lookup+0x81/0x8e
> kernel: [ 1968.334927]  [<c100772e>] ? __cycles_2_ns+0xf/0x3e
> kernel: [ 1968.334932]  [<c12671b6>] ? rcu_read_unlock+0x0/0x38
> kernel: [ 1968.334937]  [<c1007a30>] ? native_sched_clock+0x49/0x4f
> kernel: [ 1968.334943]  [<c10443a9>] ? sched_clock_local+0x11/0x11f
> kernel: [ 1968.334948]  [<c10509df>] lock_acquire+0x5e/0x75
> kernel: [ 1968.334953]  [<c1292ec4>] ? __udp4_lib_mcast_deliver+0x3c/0x143
> kernel: [ 1968.334958]  [<c12c366a>] _raw_spin_lock+0x28/0x58
> kernel: [ 1968.334963]  [<c1292ec4>] ? __udp4_lib_mcast_deliver+0x3c/0x143
> kernel: [ 1968.334967]  [<c1292ec4>] __udp4_lib_mcast_deliver+0x3c/0x143
> kernel: [ 1968.334973]  [<c104463c>] ? sched_clock_cpu+0x121/0x131
> kernel: [ 1968.334978]  [<c12735b5>] ? rcu_read_unlock+0x0/0x38
> kernel: [ 1968.334983]  [<c104463c>] ? sched_clock_cpu+0x121/0x131
> kernel: [ 1968.334988]  [<c10505c5>] ? __lock_acquire+0xbf2/0xc01
> kernel: [ 1968.334994]  [<c12735e2>] ? rcu_read_unlock+0x2d/0x38
> kernel: [ 1968.334998]  [<c1274034>] ? ip_route_input+0x101/0xaf4
> kernel: [ 1968.335003]  [<c12931a7>] __udp4_lib_rcv+0x1dc/0x3ac
> kernel: [ 1968.335008]  [<c1293389>] udp_rcv+0x12/0x14
> kernel: [ 1968.335013]  [<c127605f>] ip_local_deliver_finish+0xd2/0x137
> kernel: [ 1968.335017]  [<c1275f8d>] ? ip_local_deliver_finish+0x0/0x137
> kernel: [ 1968.335022]  [<c12760fc>] NF_HOOK.clone.1+0x38/0x3f
> kernel: [ 1968.335026]  [<c1276220>] ip_local_deliver+0x3c/0x42
> kernel: [ 1968.335031]  [<c1275f8d>] ? ip_local_deliver_finish+0x0/0x137
> kernel: [ 1968.335035]  [<c1275f2e>] ip_rcv_finish+0x25c/0x27e
> kernel: [ 1968.335040]  [<c1275cd2>] ? ip_rcv_finish+0x0/0x27e
> kernel: [ 1968.335044]  [<c12760fc>] NF_HOOK.clone.1+0x38/0x3f
> kernel: [ 1968.335048]  [<c12763c9>] ip_rcv+0x1a3/0x1c6
> kernel: [ 1968.335052]  [<c1275cd2>] ? ip_rcv_finish+0x0/0x27e


> kernel: [ 1968.335057]  [<c12593d7>] netif_receive_skb+0x38b/0x3ab
> kernel: [ 1968.335066]  [<fd20f911>] rtl8169_rx_interrupt+0x2de/0x3eb [r8169]
> kernel: [ 1968.335073]  [<fd20fc9b>] rtl8169_reset_task+0x33/0xe8 [r8169]

So we have following illegal chain (process context, not softirq)

rtl8169_reset_task() -> tl8169_rx_interrupt() -> netif_receive_skb()

And normally, commit 630b943c tried to change this chain to :

rtl8169_reset_task() -> tl8169_rx_interrupt() -> netif_rx()

I have no idea why it doesnt work.

commit 630b943c182d1aed69f244405131902fbcba7ec6
Author: Eric Dumazet <eric.dumazet@gmail.com>
Date:   Wed Mar 31 02:08:31 2010 +0000

    r8169: Fix rtl8169_rx_interrupt()
    
    In case a reset is performed, rtl8169_rx_interrupt() is called from
    process context instead of softirq context. Special care must be taken
    to call appropriate network core services (netif_rx() instead of
    netif_receive_skb()). VLAN handling also corrected.
    
    Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
    Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
    Diagnosed-by: Oleg Nesterov <oleg@redhat.com>
    Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 964305c..f7ffa5d 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1054,14 +1054,14 @@ static void rtl8169_vlan_rx_register(struct net_device *dev,
 }
 
 static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
-			       struct sk_buff *skb)
+			       struct sk_buff *skb, int polling)
 {
 	u32 opts2 = le32_to_cpu(desc->opts2);
 	struct vlan_group *vlgrp = tp->vlgrp;
 	int ret;
 
 	if (vlgrp && (opts2 & RxVlanTag)) {
-		vlan_hwaccel_receive_skb(skb, vlgrp, swab16(opts2 & 0xffff));
+		__vlan_hwaccel_rx(skb, vlgrp, swab16(opts2 & 0xffff), polling);
 		ret = 0;
 	} else
 		ret = -1;
@@ -1078,7 +1078,7 @@ static inline u32 rtl8169_tx_vlan_tag(struct rtl8169_private *tp,
 }
 
 static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
-			       struct sk_buff *skb)
+			       struct sk_buff *skb, int polling)
 {
 	return -1;
 }
@@ -4467,12 +4467,20 @@ out:
 	return done;
 }
 
+/*
+ * Warning : rtl8169_rx_interrupt() might be called :
+ * 1) from NAPI (softirq) context
+ *	(polling = 1 : we should call netif_receive_skb())
+ * 2) from process context (rtl8169_reset_task())
+ *	(polling = 0 : we must call netif_rx() instead)
+ */
 static int rtl8169_rx_interrupt(struct net_device *dev,
 				struct rtl8169_private *tp,
 				void __iomem *ioaddr, u32 budget)
 {
 	unsigned int cur_rx, rx_left;
 	unsigned int delta, count;
+	int polling = (budget != ~(u32)0) ? 1 : 0;
 
 	cur_rx = tp->cur_rx;
 	rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
@@ -4534,8 +4542,12 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
-			if (rtl8169_rx_vlan_skb(tp, desc, skb) < 0)
-				netif_receive_skb(skb);
+			if (rtl8169_rx_vlan_skb(tp, desc, skb, polling) < 0) {
+				if (likely(polling))
+					netif_receive_skb(skb);
+				else
+					netif_rx(skb);
+			}
 
 			dev->stats.rx_bytes += pkt_size;
 			dev->stats.rx_packets++;



^ permalink raw reply related

* Re: OFT - reserving CPU's for networking
From: Andi Kleen @ 2010-04-30 21:01 UTC (permalink / raw)
  To: David Miller; +Cc: tglx, shemminger, eric.dumazet, netdev, peterz
In-Reply-To: <20100430.115715.216750975.davem@davemloft.net>

> Then we can do cool tricks like having the cpu spin on a mwait() on the
> network device's status descriptor in memory.

When you specify a deep C state in that mwait then it will also have the long 
wakeup latency in the idle case.  When you don't then you just killed higher
Turbo mode on that socket and give away a lot of performance on the other
cores.

So you have to solve the idle state governour issue anyways, and then
you likely don't need it anymore.

Besides it seems to me that dispatching is something the NIC should
just do directly. "RPS only CPU" would be essentially just an 
interrupt mitigation/flow redirection scheme that a lot of NICs
do anyways.

> In any event I agree with you, it's a cool idea at best, and likely
> not really practical.

s/cool//

-Andi

^ permalink raw reply

* Re: [PATCH 0/3] [RFC] ptp: IEEE 1588 clock support
From: Richard Cochran @ 2010-04-29 15:34 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: netdev
In-Reply-To: <4BD97573.5050101@grandegger.com>

On Thu, Apr 29, 2010 at 02:02:59PM +0200, Wolfgang Grandegger wrote:
> 
> I realized two other netdev drivers already supporting PTP timestamping:
> igb and bfin_mac. From the PTP developer point of view, the interface
> looks rather complete to me and it works fine on my MPC8313 setup.

Do you know whether these two also have PTP clocks? If so, is the API
that I suggested going to work for controlling those clocks, too?

> The only thing I stumbled over was that PTP clock registration
> failed when PTP support is statically linked into the kernel.

Okay, will look into that...

Thanks,
Richard


^ permalink raw reply

* Re: [PATCH net-next-2.6] net: speedup udp receive path
From: Eric Dumazet @ 2010-04-30 20:40 UTC (permalink / raw)
  To: hadi
  Cc: Changli Gao, David Miller, therbert, shemminger, netdev,
	Eilon Greenstein, Brian Bloniarz
In-Reply-To: <1272655814.3879.8.camel@bigi>

Le vendredi 30 avril 2010 à 15:30 -0400, jamal a écrit :
> Eric!
> 
> I managed to mod your program to look conceptually similar to mine
> and i reproduced the results with same test kernel from yesterday. 
> So it is likely the issue is in using epoll vs not using any async as
> in your case.
> Results attached as well as modified program.
> 
> Note: the key things to remember:
> rps with this program gets worse over time and different net-next
> kernels since Apr14 (look at graph i supplied). Sorry, I am really
> busy-ed out to dig any further.
> 
> cheers,
> jamal
> 

I am lost.

I used your program, and with RPS off, I can get at most 220.000 pps
with my "old" hardware. I dont understand how you can reach 700.000 pps
with RPS off. Or is it with your Nehalem ?




^ permalink raw reply

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
From: Arnd Bergmann @ 2010-04-29 15:48 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw, Jens Osterkamp
In-Reply-To: <C7FEE68A.2CBEF%scofeldm@cisco.com>

On Thursday 29 April 2010, Scott Feldman wrote:
> On 4/29/10 5:27 AM, "Arnd Bergmann" <arnd@arndb.de> wrote:
> 
> I don't believe those links are available at this time.
> 
> > Is it possible or planned to implement the same protocol in Linux so you
> > can do it with Cisco switches and cheap non-IOV NICs?
> 
> That seems very possible from a technical standpoint.  I don't think the
> port-profile netlink API we're specing out excludes that option.

Ok, good.

> >>    ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
> >>                                                   pre_associate_rr } ]
> >>                               { name PORT-PROFILE | vsi MGR:VTID:VER }
> 
> BTW, I was meaning to ask: is there a way to role the vsi tuple and the
> flags up into a single identifier, say a string like PORT-PROFILE?  I'm
> asking because it seems awkward from an admin's perspective to know how to
> construct a vsi tuple or to know what pre_associate_rr means. I have to
> admit I didn't fully grok what pre_associate_rr means myself.  Even if there
> was a simple local database to map named port-profiles to the underlying
> {vsi tuple, flags}, that would bring us closer to a more consistent user
> interface.  Is this possible?

I think that's technically possible but may not be helpful to make the
user interface easier. Some background on pre-associate:

The purpose of this is to assist guest migration. A single VSI (i.e. guest
network adapter) may only be connected to a single switch port at any
given time. The VSI is identified by its UUID and it has a unique
MAC address.

When migrating a guest to a new hypervisor, we need to ask the switch
to associate that VSI at the destination switch port (which may or may
not be on the same different switch as the source port). This operation
may fail for a number of reasons and can take some time. Since we want
migration to alway succeed and take as little time as possible, we
do a pre-associate-with-resource-reservation before the migration and
only start the actual guest migration if that completes successfully.

After a successful pre-associate-with-resource-reservation step, we
know that the actual associate step will be both fast and successful.
After it completes, the VSI is known to be on the destination
and all traffic goes there (replacing the gratuitous ARP method we do
today).

I don't think we'd ever do a pre-associate without the
resource-reservation, but the standard defines both. In theory,
we could do a pre-associate at every switch in the data center
in order to find out if it's possible to migrate there.

If you want to have more details, please look at the draft spec at
http://www.ieee802.org/1/files/public/docs2010/bg-joint-evb-0410v1.pdf

> >> 2. Future enic for pass-thru case where base != target.  We get:
> >> 
> >>     ip port_profile set eth1 base eth0 name joes-garage ...
> >> 
> >> And
> >> 
> >>     eth0:ndi_set_port_profile(eth1, ...)
> > 
> > Is eth1 the static device and eth0 the dynamic device in this scenario
> > or the other way round?
> 
> eth0 is the static and eth1 is the dynamic.  So eth0 is the base device.
> (The PF in SR-IOV parlance).

ok.

> > Wouldn't you still require access to both devices from the host root
> > network namespace here or do you just ignore the identifier for the
> > dynamic device here?
> 
> The dynamic device is the one to apply the port-profile to (we'll, I should
> say to apply to the dynamic's devices switch port).  So we need the dynamic
> device identified.

What I mean is: how do you identify it when it belongs to someone else?
Do we always have a proxy netdev for an SR-IOV VF that is assigned to
the guest?

For the separate network namespace case, I guess we could still require
doing it before assigning the device to the guest namespace, but it's
still not ideal.

> >> Does this work?  I want to get agreement before coding up patch attempt #4.
> > 
> > Seems ok for all I can see at this point, other than the complexity
> > that results from doing two network protocols through a single netlink
> > protocol. Maybe Jens and Chris can comment some more on this.
> 
> Ok, thanks Arnd.  I'll start coding this up now, hedging that the design is
> set before hearing back from Jens/Chris.

I believe Chris is the one that was pushing most for having a single interface
for both VDP/LLDPAD and enic.
While I now understand your reasons for doing it in firmware and requiring the
kernel interface in addition to the user interface, my doubts on whether VDP
and your protocol should be part of the same interface are increasing.

While I'm convinced that you can make it work for both now, the alternative
to split the two may turn out to be cleaner. We'd still be able to do
either of the two in kernel or user space. Using iproute2 syntax to describe
this again, it would mean an interface like

   ip iov set  port-profile DEVICE [ base BASE-DEVICE ] name PORT-PROFILE
	                              [ host_uuid HOST_UUID ]
        	                      [ client_name CLIENT_NAME ]
                                      [ client_uuid CLIENT_UUID ]
   ip iov set  vsi { associate | pre-associate | pre-associate-rr } BASE-DEVICE
                                      vsi MGR:VTID:VER
                                      mac LLADDR [ vlan VID ]
                                      client_uuid CLIENT_UUID

   ip iov del  port_profile DEVICE      [ base BASE-DEVICE ]
   ip iov del  vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
				        [ client_uuid CLIENT_UUID ]

   ip iov show port_profile DEVICE      [ base BASE-DEVICE ]
   ip iov show vsi          BASE-DEVICE [ mac LLADDR [ vlan VID ] ]
					[ client_uuid CLIENT_UUID ]

You would obvioulsy only implement the kernel support for the port-profile
stuff as callbacks, because no driver yet does VDP in the kernel, but we should
have a common netlink header that defines both variants.

Chris, any opinion on this interface as opposed to the combined one?
Either one should work, but splitting it seems cleaner to me.

	Arnd

^ permalink raw reply

* Re: [PATCH] [RFC] C/R: inet4 and inet6 unicast routes (v2)
From: Daniel Lezcano @ 2010-04-30 20:35 UTC (permalink / raw)
  To: Dan Smith; +Cc: containers, Vlad Yasevich, netdev, David Miller
In-Reply-To: <1272646855-17327-1-git-send-email-danms@us.ibm.com>

Dan Smith wrote:
> This patch adds support for checkpointing and restoring route information.
> It keeps enough information to restore basic routes at the level of detail
> of /proc/net/route.  It uses RTNETLINK to extract the information during
> checkpoint and also to insert it back during restore.  This gives us a
> nice layer of isolation between us and the various "fib" implementations.
>
> Changes in v2:
>
> This version of the patch actually moves the current task into the
> desired network namespace temporarily, for the purposes of examining and
> restoring the route information.  This is a instead of creating a cross-
> namespace socket to do the job, as was done in v1.
>
> This is just an RFC to see if this is an acceptable method.  For a final
> version, adding a helper to nsproxy.c would allow us to create a new
> nsproxy with the desired netns instead of creating one with
> copy_namespaces() just to kill it off and use the target one.
>
> I still think the previous method is cleaner, but this way may violate
> fewer namespace boundaries (I'm still undecided :)
>
> Signed-off-by: Dan Smith <danms@us.ibm.com>
> Cc: David Miller <davem@davemloft.net>
> Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
> Cc: jamal <hadi@cyberus.ca>
> ---
Hi Dan,

Eric did a patchset (as Jamal mentioned it) where you can have a process 
to enter a specific namespace from userspace.

http://git.kernel.org/?p=linux/kernel/git/ebiederm/linux-2.6.33-nsfd-v5.git;a=commit;h=9c2f86a44d9ca93e78fd8e81a4e2a8c2a4cdb054

Is it possible to enter the namespace and dump / restore the routes with 
NETLINK_ROUTE from userspace ? Or is it something not possible ?

Thanks
  -- Daniel



^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox