Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next-2.6 v2 0/2] can: add driver for Softing card
From: Kurt Van Dijck @ 2011-01-04 15:05 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA,
	socketcan-core-0fE9KPoRgkgATYTw5x5z8w

This series will add a driver for Softing PCMCIA CAN card.

This core CAN networking code exists for a few years in the
socketCAN repository.
The updates since the latest socketCAN version:
* PCMCIA interfacing changed
* seperation between the two drivers via a platform:softing device
* added conditional bus-error reporting

About the platform_device ...
Softing Gmbh has PCMCIA & PCI cards. Both share the same
Dual Port RAM (DPRAM) interface. Therefore, the driver is split in 2 stages:

[1/2] softing.ko: Generic platform bus device driver
It expects a platform:softing device with an IO range that contains
the DPRAM, and an IRQ line.

[2/2] softing_cs.ko: PCMCIA driver
This driver will create a platform:softing device on top of the
pcmcia device.

The 2 driver are not linked in a way that softing.ko depends
on softing_cs.ko or vice versa. The reason for doing so is that
the DPRAM interface takes quite some code, and building it directly
on the PCMCIA or PCI device was difficult to follow.
The present design eliminates the need for exotic sysfs API's since
all sysfs attributes know they are attached to a platform_device.

Differences since v1 of this series:
* whitespace issues
* use of time_after() to measure elapsed time.
* don't copy data for RTR frames (see commit since v1)
* use usleep_range(), not schedule().
* threaded irq
* fix iomem access (verify with sparse)
* totally 'endian-safe'
* drop error frame detection again. It's not right that when
  bus 1 enables error frames, bus 2 would get error frames too.
* drop the Kconfig dependency between softingcs.ko & softing.ko

Kurt

^ permalink raw reply

* [PATCH net-next-2.6 v2 1/2] can: add driver for Softing card
From: Kurt Van Dijck @ 2011-01-04 15:07 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA,
	socketcan-core-0fE9KPoRgkgATYTw5x5z8w
In-Reply-To: <20110104150513.GA321-MxZ6Iy/zr/UdbCeoMzGj59i2O/JbrIOy@public.gmane.org>

This patch adds a driver for the platform:softing device.
This will create (up to) 2 CAN network devices from 1
platform:softing device

Signed-off-by: Kurt Van Dijck <kurt.van.dijck-/BeEPy95v10@public.gmane.org>

---
 drivers/net/can/Kconfig                    |    2 +
 drivers/net/can/Makefile                   |    1 +
 drivers/net/can/softing/Kconfig            |   16 +
 drivers/net/can/softing/Makefile           |    5 +
 drivers/net/can/softing/softing.h          |  193 ++++++
 drivers/net/can/softing/softing_fw.c       |  648 ++++++++++++++++++++
 drivers/net/can/softing/softing_main.c     |  903 ++++++++++++++++++++++++++++
 drivers/net/can/softing/softing_platform.h |   38 ++
 8 files changed, 1806 insertions(+), 0 deletions(-)

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index d5a9db6..986195e 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -117,6 +117,8 @@ source "drivers/net/can/sja1000/Kconfig"
 
 source "drivers/net/can/usb/Kconfig"
 
+source "drivers/net/can/softing/Kconfig"
+
 config CAN_DEBUG_DEVICES
 	bool "CAN devices debugging messages"
 	depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 07ca159..53c82a7 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CAN_DEV)		+= can-dev.o
 can-dev-y			:= dev.o
 
 obj-y				+= usb/
+obj-y				+= softing/
 
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_MSCAN)		+= mscan/
diff --git a/drivers/net/can/softing/Kconfig b/drivers/net/can/softing/Kconfig
new file mode 100644
index 0000000..072f337
--- /dev/null
+++ b/drivers/net/can/softing/Kconfig
@@ -0,0 +1,16 @@
+config CAN_SOFTING
+	tristate "Softing Gmbh CAN generic support"
+	depends on CAN_DEV
+	---help---
+	  Support for CAN cards from Softing Gmbh & some cards
+	  from Vector Gmbh.
+	  Softing Gmbh CAN cards come with 1 or 2 physical busses.
+	  Those cards typically use Dual Port RAM to communicate
+	  with the host CPU. The interface is then identical for PCI
+	  and PCMCIA cards. This driver operates on a platform device,
+	  which has been created by softing_cs or softing_pci driver.
+	  Warning:
+	  The API of the card does not allow fine control per bus, but
+	  controls the 2 busses on the card together.
+	  As such, some actions (start/stop/busoff recovery) on 1 bus
+	  must bring down the other bus too temporarily.
diff --git a/drivers/net/can/softing/Makefile b/drivers/net/can/softing/Makefile
new file mode 100644
index 0000000..7878b7b
--- /dev/null
+++ b/drivers/net/can/softing/Makefile
@@ -0,0 +1,5 @@
+
+softing-y := softing_main.o softing_fw.o
+obj-$(CONFIG_CAN_SOFTING)        += softing.o
+
+ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/softing/softing.h b/drivers/net/can/softing/softing.h
new file mode 100644
index 0000000..72d3adb
--- /dev/null
+++ b/drivers/net/can/softing/softing.h
@@ -0,0 +1,193 @@
+/*
+ * softing common interfaces
+ *
+ * by Kurt Van Dijck, 06-2008
+ */
+
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+
+#include "softing_platform.h"
+
+#ifndef CAN_CTRLMODE_BERR_REPORTING
+#define CAN_CTRLMODE_BERR_REPORTING 0
+#endif
+
+struct softing;
+
+struct softing_priv {
+	struct can_priv can;	/* must be the first member! */
+	struct net_device *netdev;
+	struct softing *card;
+	struct {
+		int pending;
+		/* variables wich hold the circular buffer */
+		int echo_put;
+		int echo_get;
+	} tx;
+	struct can_bittiming_const btr_const;
+	int index;
+	u8 output;
+	u16 chip;
+};
+#define netdev2softing(netdev)	((struct softing_priv *)netdev_priv(netdev))
+
+struct softing {
+	const struct softing_platform_data *pdat;
+	struct platform_device *pdev;
+	struct net_device *net[2];
+	spinlock_t spin; /* protect this structure & DPRAM access */
+	ktime_t ts_ref;
+	ktime_t ts_overflow; /* timestamp overflow value, in ktime */
+
+	struct {
+		/* indication of firmware status */
+		int up;
+		/* protection of the 'up' variable */
+		struct mutex lock;
+	} fw;
+	struct {
+		int nr;
+		int requested;
+		int svc_count;
+		unsigned int dpram_position;
+	} irq;
+	struct {
+		int pending;
+		int last_bus;
+		/*
+		 * keep the bus that last tx'd a message,
+		 * in order to let every netdev queue resume
+		 */
+	} tx;
+	__iomem uint8_t *dpram;
+	unsigned long dpram_phys;
+	unsigned long dpram_size;
+	struct {
+		u32  serial, fw, hw, lic;
+		u16  chip[2];
+		u32  freq;
+	} id;
+};
+
+extern int softing_default_output(struct net_device *netdev);
+
+extern ktime_t softing_raw2ktime(struct softing *card, u32 raw);
+
+extern int softing_fct_cmd(struct softing *card, int16_t cmd, uint16_t vector,
+		const char *msg);
+
+extern int softing_bootloader_command(struct softing *card, int16_t cmd,
+		const char *msg);
+
+/* Load firmware after reset */
+extern int softing_load_fw(const char *file, struct softing *card,
+			__iomem uint8_t *virt, unsigned int size, int offset);
+
+/* Load final application firmware after bootloader */
+extern int softing_load_app_fw(const char *file, struct softing *card);
+
+/*
+ * enable or disable irq
+ * only called with fw.lock locked
+ */
+extern int softing_enable_irq(struct softing *card, int enable);
+
+/* start/stop 1 bus on card */
+extern int softing_startstop(struct net_device *netdev, int up);
+
+/* netif_rx() */
+extern int softing_netdev_rx(struct net_device *netdev,
+		const struct can_frame *msg, ktime_t ktime);
+
+/* SOFTING DPRAM mappings */
+#define DPRAM_RX		0x0000
+	#define DPRAM_RX_SIZE	32
+	#define DPRAM_RX_CNT	16
+#define DPRAM_RX_RD		0x0201	/* uint8_t */
+#define DPRAM_RX_WR		0x0205	/* uint8_t */
+#define DPRAM_RX_LOST		0x0207	/* uint8_t */
+
+#define DPRAM_FCT_PARAM		0x0300	/* int16_t [20] */
+#define DPRAM_FCT_RESULT	0x0328	/* int16_t */
+#define DPRAM_FCT_HOST		0x032b	/* uint16_t */
+
+#define DPRAM_INFO_BUSSTATE	0x0331	/* uint16_t */
+#define DPRAM_INFO_BUSSTATE2	0x0335	/* uint16_t */
+#define DPRAM_INFO_ERRSTATE	0x0339	/* uint16_t */
+#define DPRAM_INFO_ERRSTATE2	0x033d	/* uint16_t */
+#define DPRAM_RESET		0x0341	/* uint16_t */
+#define DPRAM_CLR_RECV_FIFO	0x0345	/* uint16_t */
+#define DPRAM_RESET_TIME	0x034d	/* uint16_t */
+#define DPRAM_TIME		0x0350	/* uint64_t */
+#define DPRAM_WR_START		0x0358	/* uint8_t */
+#define DPRAM_WR_END		0x0359	/* uint8_t */
+#define DPRAM_RESET_RX_FIFO	0x0361	/* uint16_t */
+#define DPRAM_RESET_TX_FIFO	0x0364	/* uint8_t */
+#define DPRAM_READ_FIFO_LEVEL	0x0365	/* uint8_t */
+#define DPRAM_RX_FIFO_LEVEL	0x0366	/* uint16_t */
+#define DPRAM_TX_FIFO_LEVEL	0x0366	/* uint16_t */
+
+#define DPRAM_TX		0x0400	/* uint16_t */
+	#define DPRAM_TX_SIZE	16
+	#define DPRAM_TX_CNT	32
+#define DPRAM_TX_RD		0x0601	/* uint8_t */
+#define DPRAM_TX_WR		0x0605	/* uint8_t */
+
+#define DPRAM_COMMAND		0x07e0	/* uint16_t */
+#define DPRAM_RECEIPT		0x07f0	/* uint16_t */
+#define DPRAM_IRQ_TOHOST	0x07fe	/* uint8_t */
+#define DPRAM_IRQ_TOCARD	0x07ff	/* uint8_t */
+
+#define DPRAM_V2_RESET		0x0e00	/* uint8_t */
+#define DPRAM_V2_IRQ_TOHOST	0x0e02	/* uint8_t */
+
+#define TXMAX	(DPRAM_TX_CNT - 1)
+
+/* DPRAM return codes */
+#define RES_NONE	0
+#define RES_OK		1
+#define RES_NOK		2
+#define RES_UNKNOWN	3
+/* DPRAM flags */
+#define CMD_TX		0x01
+#define CMD_ACK		0x02
+#define CMD_XTD		0x04
+#define CMD_RTR		0x08
+#define CMD_ERR		0x10
+#define CMD_BUS2	0x80
+
+/*
+ * some inline DPRAM acces function
+ * to prevent extra dependency between softing & softingcs
+ */
+/* reset DPRAM */
+static inline void softing_set_reset_dpram(struct softing *card)
+{
+	if (card->pdat->generation >= 2) {
+		uint8_t tmp;
+		spin_lock_bh(&card->spin);
+		tmp = ioread8(&card->dpram[DPRAM_V2_RESET]);
+		tmp &= ~1;
+		iowrite8(tmp, &card->dpram[DPRAM_V2_RESET]);
+		spin_unlock_bh(&card->spin);
+	}
+}
+
+static inline void softing_clr_reset_dpram(struct softing *card)
+{
+	if (card->pdat->generation >= 2) {
+		uint8_t tmp;
+		spin_lock_bh(&card->spin);
+		tmp = ioread8(&card->dpram[DPRAM_V2_RESET]);
+		tmp |= 1;
+		iowrite8(tmp, &card->dpram[DPRAM_V2_RESET]);
+		spin_unlock_bh(&card->spin);
+	}
+}
+
diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
new file mode 100644
index 0000000..03ed853
--- /dev/null
+++ b/drivers/net/can/softing/softing_fw.c
@@ -0,0 +1,648 @@
+/*
+* drivers/net/can/softing/softing_fw.c
+*
+* Copyright (C) 2008-2010
+*
+* - Kurt Van Dijck, EIA Electronics
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the version 2 of the GNU General Public License
+* as published by the Free Software Foundation
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/firmware.h>
+#include <linux/sched.h>
+#include <asm/div64.h>
+
+#include "softing.h"
+
+int softing_fct_cmd(struct softing *card, int16_t cmd, uint16_t vector,
+		const char *msg)
+{
+	int ret;
+	unsigned long stamp;
+
+	if (vector == RES_OK)
+		vector = RES_NONE;
+	iowrite16(cmd, &card->dpram[DPRAM_FCT_PARAM]);
+	iowrite8(vector >> 8, &card->dpram[DPRAM_FCT_HOST + 1]);
+	iowrite8(vector, &card->dpram[DPRAM_FCT_HOST]);
+
+	/* be sure to flush this to the card */
+	wmb();
+	stamp = jiffies + 1 * HZ;
+	/* wait for card */
+	do {
+		/* DPRAM_FCT_HOST is _not_ aligned */
+		ret = ioread8(&card->dpram[DPRAM_FCT_HOST]) +
+			(ioread8(&card->dpram[DPRAM_FCT_HOST + 1]) << 8);
+		/* don't have any cached variables */
+		rmb();
+		if (ret == RES_OK) {
+			/* don't read return-value now */
+			ret = ioread16(&card->dpram[DPRAM_FCT_RESULT]);
+			if (ret)
+				dev_alert(&card->pdev->dev,
+					"%s returned %u\n", msg, ret);
+			return 0;
+		}
+		if (time_after(jiffies, stamp))
+			break;
+		/* process context => relax */
+		usleep_range(500, 10000);
+	} while (!signal_pending(current));
+
+	if (ret == RES_NONE) {
+		dev_alert(&card->pdev->dev,
+			"%s, no response from card on %u/0x%02x\n",
+			msg, cmd, vector);
+		return 1;
+	} else {
+		dev_alert(&card->pdev->dev,
+			"%s, bad response from card on %u/0x%02x, 0x%04x\n",
+			msg, cmd, vector, ret);
+		/* make sure to return something not 0 */
+		return ret ?: 1;
+	}
+}
+
+int softing_bootloader_command(struct softing *card, int16_t cmd,
+		const char *msg)
+{
+	int ret;
+	unsigned long stamp;
+	iowrite16(RES_NONE, &card->dpram[DPRAM_RECEIPT]);
+	iowrite16(cmd, &card->dpram[DPRAM_COMMAND]);
+	/* be sure to flush this to the card */
+	wmb();
+	stamp = jiffies + 3 * HZ;
+	/* wait for card */
+	do {
+		ret = ioread16(&card->dpram[DPRAM_RECEIPT]);
+		/* don't have any cached variables */
+		rmb();
+		if (ret == RES_OK)
+			return 0;
+		if (time_after(jiffies, stamp))
+			break;
+		/* process context => relax */
+		usleep_range(500, 10000);
+	} while (!signal_pending(current));
+
+	switch (ret) {
+	case RES_NONE:
+		dev_alert(&card->pdev->dev, "%s: no response from card\n", msg);
+		break;
+	case RES_NOK:
+		dev_alert(&card->pdev->dev, "%s: response from card nok\n",
+				msg);
+		break;
+	case RES_UNKNOWN:
+		dev_alert(&card->pdev->dev, "%s: command 0x%04x unknown\n",
+			msg, cmd);
+		break;
+	default:
+		dev_alert(&card->pdev->dev, "%s: bad response from card: %i\n",
+			msg, ret);
+		break;
+	}
+	return ret ?: 1;
+}
+
+static int fw_parse(const uint8_t **pmem, uint16_t *ptype, uint32_t *paddr,
+		uint16_t *plen, const uint8_t **pdat)
+{
+	uint16_t checksum[2];
+	const uint8_t *mem;
+	const uint8_t *end;
+
+	mem = *pmem;
+	*ptype = le16_to_cpup((void *)&mem[0]);
+	*paddr = le32_to_cpup((void *)&mem[2]);
+	*plen = le16_to_cpup((void *)&mem[6]);
+	*pdat = &mem[8];
+	/* verify checksum */
+	end = &mem[8 + *plen];
+	checksum[0] = le16_to_cpup((void *)end);
+	for (checksum[1] = 0; mem < end; ++mem)
+		checksum[1] += *mem;
+	if (checksum[0] != checksum[1])
+		return -EINVAL;
+	/* increment */
+	*pmem += 10 + *plen;
+	return 0;
+}
+
+int softing_load_fw(const char *file, struct softing *card,
+			__iomem uint8_t *dpram, unsigned int size, int offset)
+{
+	const struct firmware *fw;
+	int ret, ok = 0;
+	const uint8_t *mem, *end, *dat;
+	uint16_t type, len;
+	uint32_t addr;
+	uint8_t buf[1024];
+
+	ret = request_firmware(&fw, file, &card->pdev->dev);
+	if (ret) {
+		dev_alert(&card->pdev->dev, "request_firmware(%s) got %i\n",
+			file, ret);
+		return ret;
+	}
+	dev_dbg(&card->pdev->dev, "%s, firmware(%s) got %u bytes"
+		", offset %c0x%04x\n",
+		card->pdat->name, file, (unsigned int)fw->size,
+		(offset >= 0) ? '+' : '-', (unsigned int)abs(offset));
+	/* parse the firmware */
+	mem = fw->data;
+	end = &mem[fw->size];
+	/* look for header record */
+	ret = fw_parse(&mem, &type, &addr, &len, &dat);
+	if (ret < 0)
+		goto fw_end;
+	if (type != 0xffff) {
+		dev_alert(&card->pdev->dev, "firware starts with type 0x%04x\n",
+			type);
+		goto fw_end;
+	}
+	if (strncmp("Structured Binary Format, Softing GmbH" , dat, len)) {
+		dev_info(&card->pdev->dev, "firware string '%.*s'\n", len, dat);
+		goto fw_end;
+	}
+	ok |= 1;
+	/* ok, we had a header */
+	while (mem < end) {
+		ret = fw_parse(&mem, &type, &addr, &len, &dat);
+		if (ret)
+			break;
+		if (type == 3) {
+			/* start address */
+			ok |= 2;
+			continue;
+		} else if (type == 1) {
+			/* eof */
+			ok |= 4;
+			goto fw_end;
+		} else if (type != 0) {
+			dev_alert(&card->pdev->dev,
+					"unknown record type 0x%04x\n", type);
+			break;
+		}
+
+		if ((addr + len + offset) > size) {
+			dev_alert(&card->pdev->dev,
+				"firmware out of range (0x%08x / 0x%08x)\n",
+				(addr + len + offset), size);
+			goto fw_end;
+		}
+		memcpy_toio(&dpram[addr + offset], dat, len);
+		/* be sure to flush caches from IO space */
+		mb();
+		if (len > sizeof(buf)) {
+			dev_info(&card->pdev->dev,
+				"record too big for verify (%u)\n", len);
+			continue;
+		}
+		/* verify record data */
+		memcpy_fromio(buf, &dpram[addr + offset], len);
+		if (!memcmp(buf, dat, len))
+			/* is ok */
+			continue;
+		dev_alert(&card->pdev->dev, "0x%08x:0x%03x at 0x%u failed\n",
+				addr, len, addr + offset);
+		goto fw_end;
+	}
+fw_end:
+	release_firmware(fw);
+	if (0x5 == (ok & 0x5))
+		/* got eof & start */
+		return 0;
+	dev_info(&card->pdev->dev, "firmware %s failed\n", file);
+	return ret ?: -EINVAL;
+}
+
+int softing_load_app_fw(const char *file, struct softing *card)
+{
+	const struct firmware *fw;
+	const uint8_t *mem, *end, *dat;
+	int ret, ok = 0, j;
+	uint16_t type, len;
+	uint32_t addr, start_addr = 0;
+	unsigned int sum, rx_sum;
+
+	ret = request_firmware(&fw, file, &card->pdev->dev);
+	if (ret) {
+		dev_alert(&card->pdev->dev, "request_firmware(%s) got %i\n",
+			file, ret);
+		return ret;
+	}
+	dev_dbg(&card->pdev->dev, "firmware(%s) got %lu bytes\n",
+		file, (unsigned long)fw->size);
+	/* parse the firmware */
+	mem = fw->data;
+	end = &mem[fw->size];
+	/* look for header record */
+	ret = fw_parse(&mem, &type, &addr, &len, &dat);
+	if (ret)
+		goto fw_end;
+	if (type != 0xffff) {
+		dev_alert(&card->pdev->dev, "firware starts with type 0x%04x\n",
+			type);
+		goto fw_end;
+	}
+	if (strncmp("Structured Binary Format, Softing GmbH", dat, len)) {
+		dev_alert(&card->pdev->dev, "firware string '%.*s' fault\n",
+				len, dat);
+		goto fw_end;
+	}
+	ok |= 1;
+	/* ok, we had a header */
+	while (mem < end) {
+		ret = fw_parse(&mem, &type, &addr, &len, &dat);
+		if (ret)
+			break;
+
+		if (type == 3) {
+			/* start address */
+			start_addr = addr;
+			ok |= 2;
+			continue;
+		} else if (type == 1) {
+			/* eof */
+			ok |= 4;
+			goto fw_end;
+		} else if (type != 0) {
+			dev_alert(&card->pdev->dev,
+					"unknown record type 0x%04x\n", type);
+			break;
+		}
+
+		/* regualar data */
+		for (sum = 0, j = 0; j < len; ++j)
+			sum += dat[j];
+		/* work in 16bit (target) */
+		sum &= 0xffff;
+
+		memcpy_toio(&card->dpram[card->pdat->app.offs], dat, len);
+		iowrite32(card->pdat->app.offs + card->pdat->app.addr,
+				&card->dpram[DPRAM_COMMAND + 2]);
+		iowrite32(addr, &card->dpram[DPRAM_COMMAND + 6]);
+		iowrite16(len, &card->dpram[DPRAM_COMMAND + 10]);
+		iowrite8(1, &card->dpram[DPRAM_COMMAND + 12]);
+		if (softing_bootloader_command(card, 1, "loading app."))
+			goto fw_end;
+		/* verify checksum */
+		rx_sum = ioread16(&card->dpram[DPRAM_RECEIPT + 2]);
+		if (rx_sum != sum) {
+			dev_alert(&card->pdev->dev, "SRAM seems to be damaged"
+				", wanted 0x%04x, got 0x%04x\n", sum, rx_sum);
+			goto fw_end;
+		}
+	}
+fw_end:
+	release_firmware(fw);
+	if (ok != 7)
+		goto fw_failed;
+	/* got start, start_addr, & eof */
+	iowrite32(start_addr, &card->dpram[DPRAM_COMMAND + 2]);
+	iowrite8(1, &card->dpram[DPRAM_COMMAND + 6]);
+	if (softing_bootloader_command(card, 3, "start app."))
+		goto fw_failed;
+	dev_info(&card->pdev->dev, "firmware %s up\n", file);
+	return 0;
+fw_failed:
+	dev_info(&card->pdev->dev, "firmware %s failed\n", file);
+	return ret ?: -EINVAL;
+}
+
+static int softing_reset_chip(struct softing *card)
+{
+	do {
+		/* reset chip */
+		iowrite8(0, &card->dpram[DPRAM_RESET_RX_FIFO]);
+		iowrite8(0, &card->dpram[DPRAM_RESET_RX_FIFO+1]);
+		iowrite8(1, &card->dpram[DPRAM_RESET]);
+		iowrite8(0, &card->dpram[DPRAM_RESET+1]);
+
+		if (!softing_fct_cmd(card, 0, 0, "reset_chip"))
+			break;
+		if (signal_pending(current))
+			goto failed;
+		/* sync */
+		if (softing_fct_cmd(card, 99, 0x55, "sync-a"))
+			goto failed;
+		if (softing_fct_cmd(card, 99, 0xaa, "sync-a"))
+			goto failed;
+	} while (1);
+	card->tx.pending = 0;
+	return 0;
+failed:
+	return -EIO;
+}
+
+static void softing_initialize_timestamp(struct softing *card)
+{
+	uint64_t ovf;
+
+	card->ts_ref = ktime_get();
+
+	/* 16MHz is the reference */
+	ovf = 0x100000000ULL * 16;
+	do_div(ovf, card->pdat->freq ?: 16);
+
+	card->ts_overflow = ktime_add_us(ktime_set(0, 0), ovf);
+}
+
+ktime_t softing_raw2ktime(struct softing *card, u32 raw)
+{
+	uint64_t rawl;
+	ktime_t now, real_offset;
+	ktime_t target;
+	ktime_t tmp;
+
+	now = ktime_get();
+	real_offset = ktime_sub(ktime_get_real(), now);
+
+	/* find nsec from card */
+	rawl = raw * 16;
+	do_div(rawl, card->pdat->freq ?: 16);
+	target = ktime_add_us(card->ts_ref, rawl);
+	/* test for overflows */
+	tmp = ktime_add(target, card->ts_overflow);
+	while (unlikely(ktime_to_ns(tmp) > ktime_to_ns(now))) {
+		card->ts_ref = ktime_add(card->ts_ref, card->ts_overflow);
+		target = tmp;
+		tmp = ktime_add(target, card->ts_overflow);
+	}
+	return ktime_add(target, real_offset);
+}
+
+static inline int softing_error_reporting(struct net_device *netdev)
+{
+	struct softing_priv *priv = netdev_priv(netdev);
+
+	return (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+		? 1 : 0;
+}
+
+int softing_startstop(struct net_device *dev, int up)
+{
+	int ret;
+	struct softing *card;
+	struct softing_priv *priv;
+	struct net_device *netdev;
+	int mask_start;
+	int j, error_reporting;
+	struct can_frame msg;
+	const struct can_bittiming *bt;
+
+	priv = netdev_priv(dev);
+	card = priv->card;
+
+	if (!card->fw.up)
+		return -EIO;
+
+	ret = mutex_lock_interruptible(&card->fw.lock);
+	if (ret)
+		return ret;
+
+	mask_start = 0;
+	if (dev && up)
+		/* prepare to start this bus as well */
+		mask_start |= (1 << priv->index);
+	/* bring netdevs down */
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		netdev = card->net[j];
+		if (!netdev)
+			continue;
+		priv = netdev_priv(netdev);
+
+		if (dev != netdev)
+			netif_stop_queue(netdev);
+
+		if (netif_running(netdev)) {
+			if (dev != netdev)
+				mask_start |= (1 << j);
+			priv->tx.pending = 0;
+			priv->tx.echo_put = 0;
+			priv->tx.echo_get = 0;
+			/* this bus' may just have called open_candev()
+			 * which is rather stupid to call close_candev()
+			 * already
+			 * but we may come here from busoff recovery too
+			 * in which case the echo_skb _needs_ flushing too.
+			 * just be sure to call open_candev() again
+			 */
+			close_candev(netdev);
+		}
+		priv->can.state = CAN_STATE_STOPPED;
+	}
+	card->tx.pending = 0;
+
+	softing_enable_irq(card, 0);
+	ret = softing_reset_chip(card);
+	if (ret)
+		goto failed;
+	if (!mask_start)
+		/* no busses to be brought up */
+		goto card_done;
+
+	if ((mask_start & 1) && (mask_start & 2)
+			&& (softing_error_reporting(card->net[0])
+				!= softing_error_reporting(card->net[1]))) {
+		dev_alert(&card->pdev->dev,
+				"err_reporting flag differs for busses\n");
+		goto invalid;
+	}
+	error_reporting = 0;
+	if (mask_start & 1) {
+		netdev = card->net[0];
+		priv = netdev_priv(netdev);
+		error_reporting += softing_error_reporting(netdev);
+		/* init chip 1 */
+		bt = &priv->can.bittiming;
+		iowrite16(bt->brp, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		iowrite16(bt->sjw, &card->dpram[DPRAM_FCT_PARAM + 4]);
+		iowrite16(bt->phase_seg1 + bt->prop_seg,
+				&card->dpram[DPRAM_FCT_PARAM + 6]);
+		iowrite16(bt->phase_seg2, &card->dpram[DPRAM_FCT_PARAM + 8]);
+		iowrite16((priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) ? 1 : 0,
+				&card->dpram[DPRAM_FCT_PARAM + 10]);
+		if (softing_fct_cmd(card, 1, 0, "initialize_chip[0]"))
+			goto failed;
+		/* set mode */
+		iowrite16(0, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		iowrite16(0, &card->dpram[DPRAM_FCT_PARAM + 4]);
+		if (softing_fct_cmd(card, 3, 0, "set_mode[0]"))
+			goto failed;
+		/* set filter */
+		/* 11bit id & mask */
+		iowrite16(0x0000, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		iowrite16(0x07ff, &card->dpram[DPRAM_FCT_PARAM + 4]);
+		/* 29bit id.lo & mask.lo & id.hi & mask.hi */
+		iowrite16(0x0000, &card->dpram[DPRAM_FCT_PARAM + 6]);
+		iowrite16(0xffff, &card->dpram[DPRAM_FCT_PARAM + 8]);
+		iowrite16(0x0000, &card->dpram[DPRAM_FCT_PARAM + 10]);
+		iowrite16(0x1fff, &card->dpram[DPRAM_FCT_PARAM + 12]);
+		if (softing_fct_cmd(card, 7, 0, "set_filter[0]"))
+			goto failed;
+		/* set output control */
+		iowrite16(priv->output, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		if (softing_fct_cmd(card, 5, 0, "set_output[0]"))
+			goto failed;
+	}
+	if (mask_start & 2) {
+		netdev = card->net[1];
+		priv = netdev_priv(netdev);
+		error_reporting += softing_error_reporting(netdev);
+		/* init chip2 */
+		bt = &priv->can.bittiming;
+		iowrite16(bt->brp, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		iowrite16(bt->sjw, &card->dpram[DPRAM_FCT_PARAM + 4]);
+		iowrite16(bt->phase_seg1 + bt->prop_seg,
+				&card->dpram[DPRAM_FCT_PARAM + 6]);
+		iowrite16(bt->phase_seg2, &card->dpram[DPRAM_FCT_PARAM + 8]);
+		iowrite16((priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) ? 1 : 0,
+				&card->dpram[DPRAM_FCT_PARAM + 10]);
+		if (softing_fct_cmd(card, 2, 0, "initialize_chip[1]"))
+			goto failed;
+		/* set mode2 */
+		iowrite16(0, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		iowrite16(0, &card->dpram[DPRAM_FCT_PARAM + 4]);
+		if (softing_fct_cmd(card, 4, 0, "set_mode[1]"))
+			goto failed;
+		/* set filter2 */
+		/* 11bit id & mask */
+		iowrite16(0x0000, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		iowrite16(0x07ff, &card->dpram[DPRAM_FCT_PARAM + 4]);
+		/* 29bit id.lo & mask.lo & id.hi & mask.hi */
+		iowrite16(0x0000, &card->dpram[DPRAM_FCT_PARAM + 6]);
+		iowrite16(0xffff, &card->dpram[DPRAM_FCT_PARAM + 8]);
+		iowrite16(0x0000, &card->dpram[DPRAM_FCT_PARAM + 10]);
+		iowrite16(0x1fff, &card->dpram[DPRAM_FCT_PARAM + 12]);
+		if (softing_fct_cmd(card, 8, 0, "set_filter[1]"))
+			goto failed;
+		/* set output control2 */
+		iowrite16(priv->output, &card->dpram[DPRAM_FCT_PARAM + 2]);
+		if (softing_fct_cmd(card, 6, 0, "set_output[1]"))
+			goto failed;
+	}
+	/* enable_error_frame */
+	/*
+	if (error_reporting) {
+		if (softing_fct_cmd(card, 51, 0, "enable_error_frame"))
+			goto failed;
+	}
+	*/
+	/* initialize interface */
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 2]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 4]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 6]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 8]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 10]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 12]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 14]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 16]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 18]);
+	iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 20]);
+	if (softing_fct_cmd(card, 17, 0, "initialize_interface"))
+		goto failed;
+	/* enable_fifo */
+	if (softing_fct_cmd(card, 36, 0, "enable_fifo"))
+		goto failed;
+	/* enable fifo tx ack */
+	if (softing_fct_cmd(card, 13, 0, "fifo_tx_ack[0]"))
+		goto failed;
+	/* enable fifo tx ack2 */
+	if (softing_fct_cmd(card, 14, 0, "fifo_tx_ack[1]"))
+		goto failed;
+	/* enable timestamps */
+	/* is default, no code found */
+	/* start_chip */
+	if (softing_fct_cmd(card, 11, 0, "start_chip"))
+		goto failed;
+	iowrite8(0, &card->dpram[DPRAM_INFO_BUSSTATE]);
+	iowrite8(0, &card->dpram[DPRAM_INFO_BUSSTATE2]);
+	dev_info(&card->pdev->dev, "%s up\n", __func__);
+	if (card->pdat->generation < 2) {
+		iowrite8(0, &card->dpram[DPRAM_V2_IRQ_TOHOST]);
+		/* flush the DPRAM caches */
+		wmb();
+	}
+
+	softing_initialize_timestamp(card);
+
+	/*
+	 * do socketcan notifications/status changes
+	 * from here, no errors should occur, or the failed: part
+	 * must be reviewed
+	 */
+	memset(&msg, 0, sizeof(msg));
+	msg.can_id = CAN_ERR_FLAG | CAN_ERR_RESTARTED;
+	msg.can_dlc = CAN_ERR_DLC;
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		if (!(mask_start & (1 << j)))
+			continue;
+		netdev = card->net[j];
+		if (!netdev)
+			continue;
+		priv = netdev_priv(netdev);
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+		open_candev(netdev);
+		if (dev != netdev) {
+			/* notify other busses on the restart */
+			softing_netdev_rx(netdev, &msg, ktime_set(0, 0));
+			++priv->can.can_stats.restarts;
+		}
+		netif_wake_queue(netdev);
+	}
+
+	/* enable interrupts */
+	ret = softing_enable_irq(card, 1);
+	if (ret)
+		goto failed;
+card_done:
+	mutex_unlock(&card->fw.lock);
+	return 0;
+failed:
+	dev_alert(&card->pdev->dev, "firmware failed, going idle\n");
+invalid:
+	softing_enable_irq(card, 0);
+	softing_reset_chip(card);
+	mutex_unlock(&card->fw.lock);
+	/* bring all other interfaces down */
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		netdev = card->net[j];
+		if (!netdev)
+			continue;
+		dev_close(netdev);
+	}
+	return -EIO;
+}
+
+int softing_default_output(struct net_device *netdev)
+{
+	struct softing_priv *priv = netdev_priv(netdev);
+	struct softing *card = priv->card;
+
+	switch (priv->chip) {
+	case 1000:
+		if (card->pdat->generation < 2)
+			return 0xfb;
+		return 0xfa;
+	case 5:
+		return 0x60;
+	default:
+		return 0x40;
+	}
+}
+
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
new file mode 100644
index 0000000..4f74075
--- /dev/null
+++ b/drivers/net/can/softing/softing_main.c
@@ -0,0 +1,903 @@
+/*
+* drivers/net/can/softing/softing_main.c
+*
+* Copyright (C) 2008-2010
+*
+* - Kurt Van Dijck, EIA Electronics
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the version 2 of the GNU General Public License
+* as published by the Free Software Foundation
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+
+#include "softing.h"
+
+#define TX_ECHO_SKB_MAX (TXMAX/2)
+
+/*
+ * test is a specific CAN netdev
+ * is online (ie. up 'n running, not sleeping, not busoff
+ */
+static inline int canif_is_active(struct net_device *netdev)
+{
+	struct can_priv *can = netdev_priv(netdev);
+	if (!netif_running(netdev))
+		return 0;
+	return (can->state <= CAN_STATE_ERROR_PASSIVE);
+}
+
+/* trigger the tx queue-ing */
+static netdev_tx_t
+softing_netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct softing_priv *priv = netdev_priv(dev);
+	struct softing *card = priv->card;
+	int ret;
+	uint8_t *ptr;
+	uint8_t fifo_wr, fifo_rd;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	uint8_t buf[DPRAM_TX_SIZE];
+
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
+	spin_lock(&card->spin);
+
+	ret = NETDEV_TX_BUSY;
+	if (!card->fw.up)
+		goto xmit_done;
+	if (card->tx.pending >= TXMAX)
+		goto xmit_done;
+	if (priv->tx.pending >= TX_ECHO_SKB_MAX)
+		goto xmit_done;
+	fifo_wr = ioread8(&card->dpram[DPRAM_TX_WR]);
+	fifo_rd = ioread8(&card->dpram[DPRAM_TX_RD]);
+	if (fifo_wr == fifo_rd)
+		/* fifo full */
+		goto xmit_done;
+	memset(buf, 0, sizeof(buf));
+	ptr = buf;
+	*ptr = CMD_TX;
+	if (cf->can_id & CAN_RTR_FLAG)
+		*ptr |= CMD_RTR;
+	if (cf->can_id & CAN_EFF_FLAG)
+		*ptr |= CMD_XTD;
+	if (priv->index)
+		*ptr |= CMD_BUS2;
+	++ptr;
+	*ptr++ = cf->can_dlc;
+	*ptr++ = (cf->can_id >> 0);
+	*ptr++ = (cf->can_id >> 8);
+	if (cf->can_id & CAN_EFF_FLAG) {
+		*ptr++ = (cf->can_id >> 16);
+		*ptr++ = (cf->can_id >> 24);
+	} else {
+		/* increment 1, not 2 as you might think */
+		ptr += 1;
+	}
+	if (!(cf->can_id & CAN_RTR_FLAG))
+		memcpy(ptr, &cf->data[0], cf->can_dlc);
+	memcpy_toio(&card->dpram[DPRAM_TX + DPRAM_TX_SIZE * fifo_wr],
+			buf, DPRAM_TX_SIZE);
+	if (++fifo_wr >= DPRAM_TX_CNT)
+		fifo_wr = 0;
+	iowrite8(fifo_wr, &card->dpram[DPRAM_TX_WR]);
+	card->tx.last_bus = priv->index;
+	++card->tx.pending;
+	++priv->tx.pending;
+	can_put_echo_skb(skb, dev, priv->tx.echo_put);
+	++priv->tx.echo_put;
+	if (priv->tx.echo_put >= TX_ECHO_SKB_MAX)
+		priv->tx.echo_put = 0;
+	/* can_put_echo_skb() saves the skb, safe to return TX_OK */
+	ret = NETDEV_TX_OK;
+xmit_done:
+	spin_unlock(&card->spin);
+	if (card->tx.pending >= TXMAX) {
+		int j;
+		for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+			if (card->net[j])
+				netif_stop_queue(card->net[j]);
+		}
+	}
+	if (ret != NETDEV_TX_OK)
+		netif_stop_queue(dev);
+
+	return ret;
+}
+
+/*
+ * shortcut for skb delivery
+ */
+int softing_netdev_rx(struct net_device *netdev,
+		const struct can_frame *msg, ktime_t ktime)
+{
+	struct sk_buff *skb;
+	struct can_frame *cf;
+	int ret;
+
+	skb = alloc_can_skb(netdev, &cf);
+	if (!skb)
+		return -ENOMEM;
+	memcpy(cf, msg, sizeof(*msg));
+	skb->tstamp = ktime;
+	ret = netif_rx(skb);
+	if (ret == NET_RX_DROP)
+		++netdev->stats.rx_dropped;
+	return ret;
+}
+
+/*
+ * softing_handle_1
+ * pop 1 entry from the DPRAM queue, and process
+ */
+static int softing_handle_1(struct softing *card)
+{
+	int j;
+	struct net_device *netdev;
+	struct softing_priv *priv;
+	ktime_t ktime;
+	struct can_frame msg;
+
+	int lost_msg;
+	uint8_t fifo_rd, fifo_wr;
+	unsigned int cnt = 0;
+	uint8_t *ptr;
+	u32 tmp;
+	u8 cmd;
+	uint8_t buf[DPRAM_RX_SIZE];
+
+	memset(&msg, 0, sizeof(msg));
+	/* test for lost msgs */
+	lost_msg = ioread8(&card->dpram[DPRAM_RX_LOST]);
+	if (lost_msg) {
+		/* reset condition */
+		iowrite8(0, &card->dpram[DPRAM_RX_LOST]);
+		/* prepare msg */
+		msg.can_id = CAN_ERR_FLAG | CAN_ERR_CRTL;
+		msg.can_dlc = CAN_ERR_DLC;
+		msg.data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+		/*
+		 * service to all busses, we don't know which it was applicable
+		 * but only service busses that are online
+		 */
+		for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+			netdev = card->net[j];
+			if (!netdev)
+				continue;
+			if (!canif_is_active(netdev))
+				/* a dead bus has no overflows */
+				continue;
+			++netdev->stats.rx_over_errors;
+			softing_netdev_rx(netdev, &msg, ktime_set(0, 0));
+		}
+		/* prepare for other use */
+		memset(&msg, 0, sizeof(msg));
+		++cnt;
+	}
+
+	fifo_rd = ioread8(&card->dpram[DPRAM_RX_RD]);
+	fifo_wr = ioread8(&card->dpram[DPRAM_RX_WR]);
+
+	if (++fifo_rd >= DPRAM_RX_CNT)
+		fifo_rd = 0;
+	if (fifo_wr == fifo_rd)
+		return cnt;
+
+	memcpy_fromio(buf, &card->dpram[DPRAM_RX + DPRAM_RX_SIZE*fifo_rd],
+			DPRAM_RX_SIZE);
+	mb();
+	/* trigger dual port RAM */
+	iowrite8(fifo_rd, &card->dpram[DPRAM_RX_RD]);
+
+	ptr = buf;
+	cmd = *ptr++;
+	if (cmd == 0xff) {
+		/* not quite usefull, probably the card has got out */
+		dev_alert(&card->pdev->dev, "got cmd 0x%02x,"
+			" I suspect the card is lost\n", cmd);
+		return 0;
+	}
+	netdev = card->net[0];
+	if (cmd & CMD_BUS2)
+		netdev = card->net[1];
+	priv = netdev_priv(netdev);
+
+	if (cmd & CMD_ERR) {
+		u8 can_state;
+		u8 state;
+		state = *ptr++;
+
+		msg.can_id = CAN_ERR_FLAG;
+		msg.can_dlc = CAN_ERR_DLC;
+
+		if (state & 0x80) {
+			can_state = CAN_STATE_BUS_OFF;
+			msg.can_id |= CAN_ERR_BUSOFF;
+			state = 2;
+		} else if (state & 0x60) {
+			can_state = CAN_STATE_ERROR_PASSIVE;
+			msg.can_id |= CAN_ERR_BUSERROR;
+			msg.data[1] = CAN_ERR_CRTL_TX_PASSIVE;
+			state = 1;
+		} else {
+			can_state = CAN_STATE_ERROR_ACTIVE;
+			state = 0;
+			msg.can_id |= CAN_ERR_BUSERROR;
+		}
+		/* update DPRAM */
+		iowrite8(state, &card->dpram[priv->index ?
+				DPRAM_INFO_BUSSTATE2 : DPRAM_INFO_BUSSTATE]);
+		/* timestamp */
+		tmp = le32_to_cpup((void *)ptr);
+		ptr += 4;
+		ktime = softing_raw2ktime(card, tmp);
+
+		++priv->can.can_stats.bus_error;
+		++netdev->stats.rx_errors;
+		/* update internal status */
+		if (can_state != priv->can.state) {
+			priv->can.state = can_state;
+			if (can_state == CAN_STATE_ERROR_PASSIVE)
+				++priv->can.can_stats.error_passive;
+			if (can_state == CAN_STATE_BUS_OFF) {
+				/* this calls can_close_cleanup() */
+				can_bus_off(netdev);
+				netif_stop_queue(netdev);
+			}
+			/* trigger socketcan */
+			softing_netdev_rx(netdev, &msg, ktime);
+		}
+
+	} else {
+		if (cmd & CMD_RTR)
+			msg.can_id |= CAN_RTR_FLAG;
+		/* acknowledge, was tx msg
+		 * no real tx flag to set
+		if (cmd & CMD_ACK) {
+		}
+		 */
+		msg.can_dlc = get_can_dlc(*ptr++);
+		if (cmd & CMD_XTD) {
+			msg.can_id |= CAN_EFF_FLAG;
+			msg.can_id |= le32_to_cpup((void *)ptr);
+			ptr += 4;
+		} else {
+			msg.can_id |= le16_to_cpup((void *)ptr);
+			ptr += 2;
+		}
+		/* timestamp */
+		tmp = le32_to_cpup((void *)ptr);
+		ptr += 4;
+		ktime = softing_raw2ktime(card, tmp);
+		if (!(msg.can_id & CAN_RTR_FLAG))
+			memcpy(&msg.data[0], ptr, 8);
+		ptr += 8;
+		/* update socket */
+		if (cmd & CMD_ACK) {
+			struct sk_buff *skb;
+			skb = priv->can.echo_skb[priv->tx.echo_get];
+			if (skb)
+				skb->tstamp = ktime;
+			can_get_echo_skb(netdev, priv->tx.echo_get);
+			++priv->tx.echo_get;
+			if (priv->tx.echo_get >= TX_ECHO_SKB_MAX)
+				priv->tx.echo_get = 0;
+			if (priv->tx.pending)
+				--priv->tx.pending;
+			if (card->tx.pending)
+				--card->tx.pending;
+			++netdev->stats.tx_packets;
+			netdev->stats.tx_bytes += msg.can_dlc;
+		} else {
+			++netdev->stats.rx_packets;
+			netdev->stats.rx_bytes += msg.can_dlc;
+			softing_netdev_rx(netdev, &msg, ktime);
+		}
+	}
+	++cnt;
+	return cnt;
+}
+
+/*
+ * real interrupt handler
+ */
+static irqreturn_t softing_irq_thread(int irq, void *dev_id)
+{
+	struct softing *card = (struct softing *)dev_id;
+	struct net_device *netdev;
+	struct softing_priv *priv;
+	int j, offset, work_done;
+
+	work_done = 0;
+	spin_lock_bh(&card->spin);
+	while (softing_handle_1(card) > 0) {
+		++card->irq.svc_count;
+		++work_done;
+	}
+	spin_unlock_bh(&card->spin);
+	/* resume tx queue's */
+	offset = card->tx.last_bus;
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		if (card->tx.pending >= TXMAX)
+			break;
+		netdev = card->net[(j + offset + 1) % card->pdat->nbus];
+		if (!netdev)
+			continue;
+		priv = netdev_priv(netdev);
+		if (!canif_is_active(netdev))
+			/* it makes no sense to wake dead busses */
+			continue;
+		if (priv->tx.pending >= TX_ECHO_SKB_MAX)
+			continue;
+		++work_done;
+		netif_wake_queue(netdev);
+	}
+	return work_done ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*
+ * interrupt routines:
+ * schedule the 'real interrupt handler'
+ */
+static
+irqreturn_t softing_irq_v2(int irq, void *dev_id)
+{
+	struct softing *card = (struct softing *)dev_id;
+	uint8_t ir;
+
+	ir = ioread8(&card->dpram[DPRAM_V2_IRQ_TOHOST]);
+	iowrite8(0, &card->dpram[DPRAM_V2_IRQ_TOHOST]);
+	return (1 == ir) ? IRQ_WAKE_THREAD : IRQ_NONE;
+}
+
+static
+irqreturn_t softing_irq_v1(int irq, void *dev_id)
+{
+	struct softing *card = (struct softing *)dev_id;
+	uint8_t ir;
+
+	ir = ioread8(&card->dpram[DPRAM_IRQ_TOHOST]);
+	iowrite8(0, &card->dpram[DPRAM_IRQ_TOHOST]);
+	return ir ? IRQ_WAKE_THREAD : IRQ_NONE;
+}
+
+/*
+ * netdev/candev inter-operability
+ */
+static int softing_netdev_open(struct net_device *ndev)
+{
+	int ret;
+
+	/* check or determine and set bittime */
+	ret = open_candev(ndev);
+	if (ret)
+		goto failed;
+	ret = softing_startstop(ndev, 1);
+	if (ret)
+		goto failed;
+	return 0;
+failed:
+	return ret;
+}
+
+static int softing_netdev_stop(struct net_device *ndev)
+{
+	int ret;
+
+	netif_stop_queue(ndev);
+
+	/* softing cycle does close_candev() */
+	ret = softing_startstop(ndev, 0);
+	return ret;
+}
+
+static int softing_candev_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	int ret;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		/* softing_startstop does close_candev() */
+		ret = softing_startstop(ndev, 1);
+		return ret;
+	case CAN_MODE_STOP:
+	case CAN_MODE_SLEEP:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/*
+ * Softing device management helpers
+ */
+int softing_enable_irq(struct softing *card, int enable)
+{
+	int ret;
+	if (!enable) {
+		if (card->irq.requested && card->irq.nr) {
+			free_irq(card->irq.nr, card);
+			card->irq.requested = 0;
+		}
+		return 0;
+	}
+	if (!card->irq.requested && (card->irq.nr)) {
+		ret = request_threaded_irq(card->irq.nr,
+				(card->pdat->generation >= 2)
+					? softing_irq_v2 : softing_irq_v1,
+				softing_irq_thread, IRQF_SHARED,
+				dev_name(&card->pdev->dev), card);
+		if (ret) {
+			dev_alert(&card->pdev->dev,
+					"request_threaded_irq(%u) failed\n",
+					card->irq.nr);
+			return ret;
+		}
+		card->irq.requested = 1;
+	}
+	return 0;
+}
+
+static void softing_card_shutdown(struct softing *card)
+{
+	int fw_up = 0;
+	dev_dbg(&card->pdev->dev, "%s()\n", __func__);
+	if (mutex_lock_interruptible(&card->fw.lock))
+		/* return -ERESTARTSYS*/;
+	fw_up = card->fw.up;
+	card->fw.up = 0;
+
+	if (card->irq.requested && card->irq.nr) {
+		free_irq(card->irq.nr, card);
+		card->irq.requested = 0;
+	}
+	if (fw_up) {
+		if (card->pdat->enable_irq)
+			card->pdat->enable_irq(card->pdev, 0);
+		softing_set_reset_dpram(card);
+		if (card->pdat->reset)
+			card->pdat->reset(card->pdev, 1);
+	}
+	mutex_unlock(&card->fw.lock);
+}
+
+static int softing_card_boot(struct softing *card)
+{
+	int j;
+	static const uint8_t stream[] = {
+		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, };
+	unsigned char back[sizeof(stream)];
+	dev_dbg(&card->pdev->dev, "%s()\n", __func__);
+
+	if (mutex_lock_interruptible(&card->fw.lock))
+		return -ERESTARTSYS;
+	if (card->fw.up) {
+		mutex_unlock(&card->fw.lock);
+		return 0;
+	}
+	/* reset board */
+	if (card->pdat->enable_irq)
+		card->pdat->enable_irq(card->pdev, 1);
+	/* boot card */
+	softing_set_reset_dpram(card);
+	if (card->pdat->reset)
+		card->pdat->reset(card->pdev, 1);
+	for (j = 0; (j + sizeof(stream)) < card->dpram_size;
+			j += sizeof(stream)) {
+
+		memcpy_toio(&card->dpram[j], stream, sizeof(stream));
+		/* flush IO cache */
+		mb();
+		memcpy_fromio(back, &card->dpram[j], sizeof(stream));
+
+		if (!memcmp(back, stream, sizeof(stream)))
+			continue;
+		/* memory is not equal */
+		dev_alert(&card->pdev->dev, "dpram failed at 0x%04x\n", j);
+		goto open_failed;
+	}
+	wmb();
+	/* load boot firmware */
+	if (softing_load_fw(card->pdat->boot.fw, card, card->dpram,
+				 card->dpram_size,
+				 card->pdat->boot.offs -
+				 card->pdat->boot.addr))
+		goto open_failed;
+	/* load loader firmware */
+	if (softing_load_fw(card->pdat->load.fw, card, card->dpram,
+				 card->dpram_size,
+				 card->pdat->load.offs -
+				 card->pdat->load.addr))
+		goto open_failed;
+
+	if (card->pdat->reset)
+		card->pdat->reset(card->pdev, 0);
+	softing_clr_reset_dpram(card);
+	if (softing_bootloader_command(card, 0, "card boot"))
+		goto open_failed;
+	if (softing_load_app_fw(card->pdat->app.fw, card))
+		goto open_failed;
+	/* reset chip */
+	iowrite8(0, &card->dpram[DPRAM_RESET_RX_FIFO]);
+	iowrite8(0, &card->dpram[DPRAM_RESET_RX_FIFO+1]);
+	iowrite8(1, &card->dpram[DPRAM_RESET]);
+	iowrite8(0, &card->dpram[DPRAM_RESET+1]);
+	/* sync */
+	if (softing_fct_cmd(card, 99, 0x55, "sync-a"))
+		goto open_failed;
+	if (softing_fct_cmd(card, 99, 0xaa, "sync-a"))
+		goto open_failed;
+	/* reset chip */
+	if (softing_fct_cmd(card, 0, 0, "reset_chip"))
+		goto open_failed;
+	/* get_serial */
+	if (softing_fct_cmd(card, 43, 0, "get_serial_number"))
+		goto open_failed;
+	card->id.serial = ioread32(&card->dpram[DPRAM_FCT_PARAM]);
+	/* get_version */
+	if (softing_fct_cmd(card, 12, 0, "get_version"))
+		goto open_failed;
+	card->id.fw = ioread16(&card->dpram[DPRAM_FCT_PARAM + 2]);
+	card->id.hw = ioread16(&card->dpram[DPRAM_FCT_PARAM + 4]);
+	card->id.lic = ioread16(&card->dpram[DPRAM_FCT_PARAM + 6]);
+	card->id.chip[0] = ioread16(&card->dpram[DPRAM_FCT_PARAM + 8]);
+	card->id.chip[1] = ioread16(&card->dpram[DPRAM_FCT_PARAM + 10]);
+
+	dev_info(&card->pdev->dev, "card booted, type %s, "
+			"serial %u, fw %u, hw %u, lic %u, chip (%u,%u)\n",
+		  card->pdat->name, card->id.serial, card->id.fw, card->id.hw,
+		  card->id.lic, card->id.chip[0], card->id.chip[1]);
+
+	card->fw.up = 1;
+	mutex_unlock(&card->fw.lock);
+	return 0;
+open_failed:
+	card->fw.up = 0;
+	if (card->pdat->enable_irq)
+		card->pdat->enable_irq(card->pdev, 0);
+	softing_set_reset_dpram(card);
+	if (card->pdat->reset)
+		card->pdat->reset(card->pdev, 1);
+	mutex_unlock(&card->fw.lock);
+	return -EIO;
+}
+
+/*
+ * netdev sysfs
+ */
+static ssize_t show_channel(struct device *dev
+		, struct device_attribute *attr, char *buf)
+{
+	struct net_device *ndev = to_net_dev(dev);
+	struct softing_priv *priv = netdev2softing(ndev);
+	return sprintf(buf, "%i\n", priv->index);
+}
+
+static ssize_t show_chip(struct device *dev
+		, struct device_attribute *attr, char *buf)
+{
+	struct net_device *ndev = to_net_dev(dev);
+	struct softing_priv *priv = netdev2softing(ndev);
+	return sprintf(buf, "%i\n", priv->chip);
+}
+
+static ssize_t show_output(struct device *dev
+		, struct device_attribute *attr, char *buf)
+{
+	struct net_device *ndev = to_net_dev(dev);
+	struct softing_priv *priv = netdev2softing(ndev);
+	return sprintf(buf, "0x%02x\n", priv->output);
+}
+
+static ssize_t store_output(struct device *dev
+		, struct device_attribute *attr
+		, const char *buf, size_t count)
+{
+	struct net_device *ndev = to_net_dev(dev);
+	struct softing_priv *priv = netdev2softing(ndev);
+	struct softing *card = priv->card;
+	unsigned long val;
+	int ret;
+
+	ret = strict_strtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	val &= 0xFF;
+
+	ret = mutex_lock_interruptible(&card->fw.lock);
+	if (ret)
+		return -ERESTARTSYS;
+	if (netif_running(ndev)) {
+		mutex_unlock(&card->fw.lock);
+		return -EBUSY;
+	}
+	priv->output = val;
+	mutex_unlock(&card->fw.lock);
+	return count;
+}
+
+static const DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
+static const DEVICE_ATTR(chip, S_IRUGO, show_chip, NULL);
+static const DEVICE_ATTR(output, S_IRUGO | S_IWUSR, show_output, store_output);
+
+static const struct attribute *const netdev_sysfs_attrs[] = {
+	&dev_attr_channel.attr,
+	&dev_attr_chip.attr,
+	&dev_attr_output.attr,
+	NULL,
+};
+static const struct attribute_group netdev_sysfs_group = {
+	.name  = NULL,
+	.attrs = (struct attribute **)netdev_sysfs_attrs,
+};
+
+static const struct net_device_ops softing_netdev_ops = {
+	.ndo_open = softing_netdev_open,
+	.ndo_stop = softing_netdev_stop,
+	.ndo_start_xmit	= softing_netdev_start_xmit,
+};
+
+static const struct can_bittiming_const softing_btr_const = {
+	.tseg1_min = 1,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4, /* overruled */
+	.brp_min = 1,
+	.brp_max = 32, /* overruled */
+	.brp_inc = 1,
+};
+
+
+static struct net_device *softing_netdev_create(
+		struct softing *card, u16 chip_id)
+{
+	struct net_device *netdev;
+	struct softing_priv *priv;
+
+	netdev = alloc_candev(sizeof(*priv), TX_ECHO_SKB_MAX);
+	if (!netdev) {
+		dev_alert(&card->pdev->dev, "alloc_candev failed\n");
+		return NULL;
+	}
+	priv = netdev_priv(netdev);
+	priv->netdev = netdev;
+	priv->card = card;
+	memcpy(&priv->btr_const, &softing_btr_const, sizeof(priv->btr_const));
+	priv->btr_const.brp_max = card->pdat->max_brp;
+	priv->btr_const.sjw_max = card->pdat->max_sjw;
+	priv->can.bittiming_const = &priv->btr_const;
+	priv->can.clock.freq = 8000000;
+	priv->chip = chip_id;
+	priv->output = softing_default_output(netdev);
+	SET_NETDEV_DEV(netdev, &card->pdev->dev);
+
+	netdev->flags |= IFF_ECHO;
+	netdev->netdev_ops	= &softing_netdev_ops;
+	priv->can.do_set_mode	= softing_candev_set_mode;
+	priv->can.ctrlmode_supported =
+		CAN_CTRLMODE_3_SAMPLES;/* | CAN_CTRLMODE_BERR_REPORTING */;
+
+	return netdev;
+}
+
+static int softing_netdev_register(struct net_device *netdev)
+{
+	int ret;
+
+	/*
+	 * provide bus-specific sysfs attributes _during_ the uevent
+	 */
+	netdev->sysfs_groups[0] = &netdev_sysfs_group;
+	ret = register_candev(netdev);
+	if (ret) {
+		dev_alert(&netdev->dev, "register failed\n");
+		return ret;
+	}
+	return 0;
+}
+
+static void softing_netdev_cleanup(struct net_device *netdev)
+{
+	unregister_candev(netdev);
+	free_candev(netdev);
+}
+
+/*
+ * sysfs for Platform device
+ */
+#define DEV_ATTR_RO(name, member) \
+static ssize_t show_##name(struct device *dev, \
+		struct device_attribute *attr, char *buf) \
+{ \
+	struct softing *card = platform_get_drvdata(to_platform_device(dev)); \
+	return sprintf(buf, "%u\n", card->member); \
+} \
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+DEV_ATTR_RO(serial	, id.serial);
+DEV_ATTR_RO(firmware	, id.fw);
+DEV_ATTR_RO(hardware	, id.hw);
+DEV_ATTR_RO(license	, id.lic);
+DEV_ATTR_RO(freq	, id.freq);
+DEV_ATTR_RO(txpending	, tx.pending);
+
+static struct attribute *softing_pdev_attrs[] = {
+	&dev_attr_serial.attr,
+	&dev_attr_firmware.attr,
+	&dev_attr_hardware.attr,
+	&dev_attr_license.attr,
+	&dev_attr_freq.attr,
+	&dev_attr_txpending.attr,
+	NULL,
+};
+
+static const struct attribute_group softing_pdev_group = {
+	.attrs = softing_pdev_attrs,
+};
+
+/*
+ * platform driver
+ */
+static int softing_pdev_remove(struct platform_device *pdev)
+{
+	struct softing *card = platform_get_drvdata(pdev);
+	int j;
+
+	/* first, disable card*/
+	softing_card_shutdown(card);
+
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		if (!card->net[j])
+			continue;
+		softing_netdev_cleanup(card->net[j]);
+		card->net[j] = NULL;
+	}
+	sysfs_remove_group(&pdev->dev.kobj, &softing_pdev_group);
+
+	iounmap(card->dpram);
+	kfree(card);
+	return 0;
+}
+
+static int softing_pdev_probe(struct platform_device *pdev)
+{
+	const struct softing_platform_data *pdat = pdev->dev.platform_data;
+	struct softing *card;
+	struct net_device *netdev;
+	struct softing_priv *priv;
+	struct resource *pres;
+	int ret;
+	int j;
+
+	if (!pdat) {
+		dev_warn(&pdev->dev, "no platform data\n");
+		return -EINVAL;
+	}
+	if (pdat->nbus > ARRAY_SIZE(card->net)) {
+		dev_warn(&pdev->dev, "%u nets??\n", pdat->nbus);
+		return -EINVAL;
+	}
+
+	card = kzalloc(sizeof(*card), GFP_KERNEL);
+	if (!card)
+		return -ENOMEM;
+	card->pdat = pdat;
+	card->pdev = pdev;
+	platform_set_drvdata(pdev, card);
+	/* try_module_get(THIS_MODULE); */
+	mutex_init(&card->fw.lock);
+	spin_lock_init(&card->spin);
+
+	ret = -EINVAL;
+	pres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!pres)
+		goto platform_resource_failed;;
+	card->dpram_phys = pres->start;
+	card->dpram_size = pres->end - pres->start + 1;
+	card->dpram = ioremap_nocache(card->dpram_phys, card->dpram_size);
+	if (!card->dpram) {
+		dev_alert(&card->pdev->dev, "dpram ioremap failed\n");
+		goto ioremap_failed;
+	}
+
+	pres = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (pres)
+		card->irq.nr = pres->start;
+
+	/* reset card */
+	ret = -EIO;
+	if (softing_card_boot(card)) {
+		dev_alert(&pdev->dev, "failed to boot\n");
+		goto boot_failed;
+	}
+
+	/* only now, the chip's are known */
+	card->id.freq = card->pdat->freq * 1000000UL;
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &softing_pdev_group);
+	if (ret < 0) {
+		dev_alert(&card->pdev->dev, "sysfs failed\n");
+		goto sysfs_failed;
+	}
+
+	ret = -ENOMEM;
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		card->net[j] = netdev =
+			softing_netdev_create(card, card->id.chip[j]);
+		if (!netdev) {
+			dev_alert(&pdev->dev, "failed to make can[%i]", j);
+			goto netdev_failed;
+		}
+		priv = netdev_priv(card->net[j]);
+		priv->index = j;
+		ret = softing_netdev_register(netdev);
+		if (ret) {
+			free_candev(netdev);
+			card->net[j] = NULL;
+			dev_alert(&card->pdev->dev,
+				"failed to register can[%i]\n", j);
+			goto netdev_failed;
+		}
+	}
+	dev_info(&card->pdev->dev, "card initialised\n");
+	return 0;
+
+netdev_failed:
+	for (j = 0; j < ARRAY_SIZE(card->net); ++j) {
+		if (!card->net[j])
+			continue;
+		softing_netdev_cleanup(card->net[j]);
+	}
+	sysfs_remove_group(&pdev->dev.kobj, &softing_pdev_group);
+sysfs_failed:
+	softing_card_shutdown(card);
+boot_failed:
+	iounmap(card->dpram);
+ioremap_failed:
+platform_resource_failed:
+	kfree(card);
+	return ret;
+}
+
+static struct platform_driver softing_driver = {
+	.driver = {
+		.name = "softing",
+		.owner = THIS_MODULE,
+	},
+	.probe = softing_pdev_probe,
+	.remove = softing_pdev_remove,
+};
+
+MODULE_ALIAS("platform:softing");
+
+static int __init softing_start(void)
+{
+	return platform_driver_register(&softing_driver);
+}
+
+static void __exit softing_stop(void)
+{
+	platform_driver_unregister(&softing_driver);
+}
+
+module_init(softing_start);
+module_exit(softing_stop);
+
+MODULE_DESCRIPTION("Softing DPRAM CAN driver");
+MODULE_AUTHOR("Kurt Van Dijck <kurt.van.dijck-/BeEPy95v10@public.gmane.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/can/softing/softing_platform.h b/drivers/net/can/softing/softing_platform.h
new file mode 100644
index 0000000..678df36
--- /dev/null
+++ b/drivers/net/can/softing/softing_platform.h
@@ -0,0 +1,38 @@
+
+#include <linux/platform_device.h>
+
+#ifndef _SOFTING_DEVICE_H_
+#define _SOFTING_DEVICE_H_
+
+/* softing firmware directory prefix */
+#define fw_dir "softing-4.6/"
+
+struct softing_platform_data {
+	unsigned int manf;
+	unsigned int prod;
+	/* generation
+	 * 1st with NEC or SJA1000
+	 * 8bit, exclusive interrupt, ...
+	 * 2nd only SJA1000
+	 * 16bit, shared interrupt
+	 */
+	int generation;
+	int nbus; /* # busses on device */
+	unsigned int freq; /* crystal in MHz */
+	unsigned int max_brp;
+	unsigned int max_sjw;
+	unsigned long dpram_size;
+	char name[32];
+	struct {
+		unsigned long offs;
+		unsigned long addr;
+		const char *fw;
+	} boot, load, app;
+	/* reset() function, bring pdev in or out of reset, depending on
+	   value */
+	int (*reset)(struct platform_device *pdev, int value);
+	int (*enable_irq)(struct platform_device *pdev, int value);
+};
+
+#endif
+

^ permalink raw reply related

* Re: [PATCH v2 05/10] net/fec: add dual fec support for mx28
From: Baruch Siach @ 2011-01-04 15:07 UTC (permalink / raw)
  To: Shawn Guo
  Cc: davem, gerg, eric, bryan.wu, r64343, B32542, u.kleine-koenig, lw,
	w.sang, s.hauer, netdev, linux-arm-kernel
In-Reply-To: <20110104141259.GA21274@freescale.com>

Hi Shawn,

On Tue, Jan 04, 2011 at 10:13:09PM +0800, Shawn Guo wrote:
> On Tue, Jan 04, 2011 at 11:59:16AM +0200, Baruch Siach wrote:
> > On Tue, Jan 04, 2011 at 05:24:11PM +0800, Shawn Guo wrote:

[snip]

> > > -#ifdef CONFIG_ARCH_MXC
> > > -#include <mach/hardware.h>
> > 
> > Since you now remove mach/hardware.h for ARCH_MXC, does this build for all 
> > i.MX variants?
> > 
> Did the test build for mx25, mx27, mx3 and mx51.

This is surprising. It means that this include was not needed in the first 
place. git blame says this was added in 196719ec (fec: Add support for 
Freescale MX27) by Sascha.

> > > +#ifdef CONFIG_SOC_IMX28
> > > +/*
> > > + * mx28 does not have MIIGSK registers
> > > + */
> > > +#undef FEC_MIIGSK_ENR
> > > +#include <mach/mxs.h>
> > > +#else
> > > +#define cpu_is_mx28()	(0)
> > > +#endif
> > 
> > This breaks kernels for multiple archs (e.g. i.MX28 and i.MX25). Please use 
> > run-time detection of CPU type, and do the MII/RMII etc. configuration 
> > accordingly.
> > 
> I do not find a good way to detect cpu type.  Neither adding a new
> platform data field nor using __machine_arch_type to enumerate all
> mx28 based machine (though there is only one currently) seems to be
> good for me.

How about:

#ifdef CONFIG_SOC_IMX28
#include <mach/mxs.h>
#else
#define cpu_is_mx28()    (0)
#endif

if (cpu_is_mx28() {
    /* Do i.MX28 stuff */
} else {
    /* Do other i.MX stuff */
}

Note that the '#ifdef FEC_MIIGSK_ENR' section in fec_restart() is there only 
to allow build for M5272 which does not have this define in fec.h. Physically, 
i.MX27 does not have this register either.

> I will try to manipulate some mx28 unique register to identify mx28
> from other i.mx variants.  Hopefully, it will work.
> 
> Thanks for the comments.

baruch

-- 
                                                     ~. .~   Tk Open Systems
=}------------------------------------------------ooO--U--Ooo------------{=
   - baruch@tkos.co.il - tel: +972.2.679.5364, http://www.tkos.co.il -

^ permalink raw reply

* [PATCH net-next-2.6 v2 2/2] can: add driver for Softing card
From: Kurt Van Dijck @ 2011-01-04 15:09 UTC (permalink / raw)
  To: netdev, socketcan-core
In-Reply-To: <20110104150513.GA321@e-circ.dyndns.org>

This patch adds the driver that creates a platform:softing device
from a pcmcia_device
Note: the Kconfig indicates a dependency on the softing.ko driver,
but this is purely to make configuration intuitive. This driver will
work independent, but no CAN network devices appear until softing.ko is
loaded too.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>

---
 drivers/net/can/softing/Kconfig      |   13 ++
 drivers/net/can/softing/Makefile     |    1 +
 drivers/net/can/softing/softing_cs.c |  361 ++++++++++++++++++++++++++++++++++
 3 files changed, 375 insertions(+), 0 deletions(-)

diff --git a/drivers/net/can/softing/Kconfig b/drivers/net/can/softing/Kconfig
index 072f337..14ebe14 100644
--- a/drivers/net/can/softing/Kconfig
+++ b/drivers/net/can/softing/Kconfig
@@ -14,3 +14,16 @@ config CAN_SOFTING
 	  controls the 2 busses on the card together.
 	  As such, some actions (start/stop/busoff recovery) on 1 bus
 	  must bring down the other bus too temporarily.
+
+config CAN_SOFTING_CS
+	tristate "Softing CAN pcmcia cards"
+	depends on PCMCIA
+	---help---
+	  Support for PCMCIA cards from Softing Gmbh & some cards
+	  from Vector Gmbh.
+	  You need firmware for these, which you can get at
+	  http://developer.berlios.de/projects/socketcan/
+	  This version of the driver is written against
+	  firmware version 4.6 (softing-fw-4.6-binaries.tar.gz)
+	  In order to use the card as CAN device, you need the Softing generic
+	  support too.
diff --git a/drivers/net/can/softing/Makefile b/drivers/net/can/softing/Makefile
index 7878b7b..5f0f527 100644
--- a/drivers/net/can/softing/Makefile
+++ b/drivers/net/can/softing/Makefile
@@ -1,5 +1,6 @@
 
 softing-y := softing_main.o softing_fw.o
 obj-$(CONFIG_CAN_SOFTING)        += softing.o
+obj-$(CONFIG_CAN_SOFTING_CS)     += softing_cs.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/softing/softing_cs.c b/drivers/net/can/softing/softing_cs.c
new file mode 100644
index 0000000..cffd4d1
--- /dev/null
+++ b/drivers/net/can/softing/softing_cs.c
@@ -0,0 +1,361 @@
+/*
+ * drivers/net/can/softing/softing_cs.c
+ *
+ * Copyright (C) 2008-2010
+ *
+ * - Kurt Van Dijck, EIA Electronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+
+#include "softing_platform.h"
+
+static int softingcs_index;
+static spinlock_t softingcs_index_lock;
+
+static int softingcs_reset(struct platform_device *pdev, int v);
+static int softingcs_enable_irq(struct platform_device *pdev, int v);
+
+/*
+ * platform_data descriptions
+ */
+static const struct softing_platform_data softingcs_platform_data[] = {
+{
+	.name = "CANcard",
+	.manf = 0x0168, .prod = 0x001,
+	.generation = 1,
+	.nbus = 2,
+	.freq = 16, .max_brp = 32, .max_sjw = 4,
+	.dpram_size = 0x0800,
+	.boot = {0x0000, 0x000000, fw_dir "bcard.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancard.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = softingcs_enable_irq,
+}, {
+	.name = "CANcard-NEC",
+	.manf = 0x0168, .prod = 0x002,
+	.generation = 1,
+	.nbus = 2,
+	.freq = 16, .max_brp = 32, .max_sjw = 4,
+	.dpram_size = 0x0800,
+	.boot = {0x0000, 0x000000, fw_dir "bcard.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancard.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = softingcs_enable_irq,
+}, {
+	.name = "CANcard-SJA",
+	.manf = 0x0168, .prod = 0x004,
+	.generation = 1,
+	.nbus = 2,
+	.freq = 20, .max_brp = 32, .max_sjw = 4,
+	.dpram_size = 0x0800,
+	.boot = {0x0000, 0x000000, fw_dir "bcard.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cansja.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = softingcs_enable_irq,
+}, {
+	.name = "CANcard-2",
+	.manf = 0x0168, .prod = 0x005,
+	.generation = 2,
+	.nbus = 2,
+	.freq = 24, .max_brp = 64, .max_sjw = 4,
+	.dpram_size = 0x1000,
+	.boot = {0x0000, 0x000000, fw_dir "bcard2.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard2.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancrd2.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = 0,
+}, {
+	.name = "Vector-CANcard",
+	.manf = 0x0168, .prod = 0x081,
+	.generation = 1,
+	.nbus = 2,
+	.freq = 16, .max_brp = 64, .max_sjw = 4,
+	.dpram_size = 0x0800,
+	.boot = {0x0000, 0x000000, fw_dir "bcard.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancard.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = softingcs_enable_irq,
+}, {
+	.name = "Vector-CANcard-SJA",
+	.manf = 0x0168, .prod = 0x084,
+	.generation = 1,
+	.nbus = 2,
+	.freq = 20, .max_brp = 32, .max_sjw = 4,
+	.dpram_size = 0x0800,
+	.boot = {0x0000, 0x000000, fw_dir "bcard.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cansja.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = softingcs_enable_irq,
+}, {
+	.name = "Vector-CANcard-2",
+	.manf = 0x0168, .prod = 0x085,
+	.generation = 2,
+	.nbus = 2,
+	.freq = 24, .max_brp = 64, .max_sjw = 4,
+	.dpram_size = 0x1000,
+	.boot = {0x0000, 0x000000, fw_dir "bcard2.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard2.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancrd2.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = 0,
+}, {
+	.name = "EDICcard-NEC",
+	.manf = 0x0168, .prod = 0x102,
+	.generation = 1,
+	.nbus = 2,
+	.freq = 16, .max_brp = 64, .max_sjw = 4,
+	.dpram_size = 0x0800,
+	.boot = {0x0000, 0x000000, fw_dir "bcard.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancard.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = softingcs_enable_irq,
+}, {
+	.name = "EDICcard-2",
+	.manf = 0x0168, .prod = 0x105,
+	.generation = 2,
+	.nbus = 2,
+	.freq = 24, .max_brp = 64, .max_sjw = 4,
+	.dpram_size = 0x1000,
+	.boot = {0x0000, 0x000000, fw_dir "bcard2.bin",},
+	.load = {0x0120, 0x00f600, fw_dir "ldcard2.bin",},
+	.app = {0x0010, 0x0d0000, fw_dir "cancrd2.bin",},
+	.reset = softingcs_reset,
+	.enable_irq = 0,
+}, {
+	0, 0,
+},
+};
+
+MODULE_FIRMWARE(fw_dir "bcard.bin");
+MODULE_FIRMWARE(fw_dir "ldcard.bin");
+MODULE_FIRMWARE(fw_dir "cancard.bin");
+MODULE_FIRMWARE(fw_dir "cansja.bin");
+
+MODULE_FIRMWARE(fw_dir "bcard2.bin");
+MODULE_FIRMWARE(fw_dir "ldcard2.bin");
+MODULE_FIRMWARE(fw_dir "cancrd2.bin");
+
+static const struct softing_platform_data *softingcs_find_platform_data(
+		unsigned int manf, unsigned int prod)
+{
+	const struct softing_platform_data *lp;
+
+	for (lp = softingcs_platform_data; lp->manf; ++lp) {
+		if ((lp->manf == manf) && (lp->prod == prod))
+			return lp;
+	}
+	return 0;
+}
+
+/*
+ * platformdata callbacks
+ */
+static int softingcs_reset(struct platform_device *pdev, int v)
+{
+	struct pcmcia_device *pcmcia = to_pcmcia_dev(pdev->dev.parent);
+
+	dev_dbg(&pdev->dev, "pcmcia config [2] %02x\n", v ? 0 : 0x20);
+	return pcmcia_write_config_byte(pcmcia, 2, v ? 0 : 0x20);
+}
+
+static int softingcs_enable_irq(struct platform_device *pdev, int v)
+{
+	struct pcmcia_device *pcmcia = to_pcmcia_dev(pdev->dev.parent);
+
+	dev_dbg(&pdev->dev, "pcmcia config [0] %02x\n", v ? 0x60 : 0);
+	return pcmcia_write_config_byte(pcmcia, 0, v ? 0x60 : 0);
+}
+
+/*
+ * pcmcia check
+ */
+static int softingcs_probe_config(struct pcmcia_device *pcmcia,
+		void *priv_data)
+{
+	struct softing_platform_data *pdat = priv_data;
+	struct resource *pres;
+	int memspeed = 0;
+
+	WARN_ON(!pdat);
+	pres = pcmcia->resource[PCMCIA_IOMEM_0];
+	if (resource_size(pres) < 0x1000)
+		return -ERANGE;
+
+	pres->flags |= WIN_MEMORY_TYPE_CM | WIN_ENABLE;
+	if (pdat->generation < 2) {
+		pres->flags |= WIN_USE_WAIT | WIN_DATA_WIDTH_8;
+		memspeed = 3;
+	} else {
+		pres->flags |= WIN_DATA_WIDTH_16;
+	}
+	return pcmcia_request_window(pcmcia, pres, memspeed);
+}
+
+static void softingcs_remove(struct pcmcia_device *pcmcia)
+{
+	struct platform_device *pdev = pcmcia->priv;
+
+	/* free bits */
+	platform_device_unregister(pdev);
+	/* release pcmcia stuff */
+	pcmcia_disable_device(pcmcia);
+}
+
+/*
+ * platform_device wrapper
+ * pdev->resource has 2 entries: io & irq
+ */
+static void softingcs_pdev_release(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	kfree(pdev);
+}
+
+static int softingcs_probe(struct pcmcia_device *pcmcia)
+{
+	int ret;
+	struct platform_device *pdev;
+	const struct softing_platform_data *pdat;
+	struct resource *pres;
+	struct dev {
+		struct platform_device pdev;
+		struct resource res[2];
+	} *dev;
+
+	/* find matching platform_data */
+	pdat = softingcs_find_platform_data(pcmcia->manf_id, pcmcia->card_id);
+	if (!pdat)
+		return -ENOTTY;
+
+	/* setup pcmcia device */
+	pcmcia->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IOMEM |
+		CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
+	ret = pcmcia_loop_config(pcmcia, softingcs_probe_config, (void *)pdat);
+	if (ret)
+		goto pcmcia_failed;
+
+	ret = pcmcia_enable_device(pcmcia);
+	if (ret < 0)
+		goto pcmcia_failed;
+
+	pres = pcmcia->resource[PCMCIA_IOMEM_0];
+	if (!pres) {
+		ret = -EBADF;
+		goto pcmcia_bad;
+	}
+
+	/* create softing platform device */
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		ret = -ENOMEM;
+		goto mem_failed;
+	}
+	dev->pdev.resource = dev->res;
+	dev->pdev.num_resources = ARRAY_SIZE(dev->res);
+	dev->pdev.dev.release = softingcs_pdev_release;
+
+	pdev = &dev->pdev;
+	pdev->dev.platform_data = (void *)pdat;
+	pdev->dev.parent = &pcmcia->dev;
+	pcmcia->priv = pdev;
+
+	/* platform device resources */
+	pdev->resource[0].flags = IORESOURCE_MEM;
+	pdev->resource[0].start = pres->start;
+	pdev->resource[0].end = pres->end;
+
+	pdev->resource[1].flags = IORESOURCE_IRQ;
+	pdev->resource[1].start = pcmcia->irq;
+	pdev->resource[1].end = pdev->resource[1].start;
+
+	/* platform device setup */
+	spin_lock(&softingcs_index_lock);
+	pdev->id = softingcs_index++;
+	spin_unlock(&softingcs_index_lock);
+	pdev->name = "softing";
+	dev_set_name(&pdev->dev, "softingcs.%i", pdev->id);
+	ret = platform_device_register(pdev);
+	if (ret < 0)
+		goto platform_failed;
+
+	dev_info(&pcmcia->dev, "created %s\n", dev_name(&pdev->dev));
+	return 0;
+
+platform_failed:
+	kfree(dev);
+mem_failed:
+pcmcia_bad:
+pcmcia_failed:
+	pcmcia_disable_device(pcmcia);
+	pcmcia->priv = 0;
+	return ret ?: -ENODEV;
+}
+
+static /*const*/ struct pcmcia_device_id softingcs_ids[] = {
+	/* softing */
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0001),
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0002),
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0004),
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0005),
+	/* vector, manufacturer? */
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0081),
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0084),
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0085),
+	/* EDIC */
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0102),
+	PCMCIA_DEVICE_MANF_CARD(0x0168, 0x0105),
+	PCMCIA_DEVICE_NULL,
+};
+
+MODULE_DEVICE_TABLE(pcmcia, softingcs_ids);
+
+static struct pcmcia_driver softingcs_driver = {
+	.owner		= THIS_MODULE,
+	.name		= "softingcs",
+	.id_table	= softingcs_ids,
+	.probe		= softingcs_probe,
+	.remove		= softingcs_remove,
+};
+
+static int __init softingcs_start(void)
+{
+	spin_lock_init(&softingcs_index_lock);
+	return pcmcia_register_driver(&softingcs_driver);
+}
+
+static void __exit softingcs_stop(void)
+{
+	pcmcia_unregister_driver(&softingcs_driver);
+}
+
+module_init(softingcs_start);
+module_exit(softingcs_stop);
+
+MODULE_DESCRIPTION("softing CANcard driver"
+		", links PCMCIA card to softing driver");
+MODULE_LICENSE("GPL");
+

^ permalink raw reply related

* Checking for closed TCP connection in kernel space
From: Martin Sustrik @ 2011-01-04 15:16 UTC (permalink / raw)
  To: netdev

Hi all,

I open a TCP socket in the kernel space (sock_create_kern). I connect or 
bind it, I send and receive data. Everything works as expected.

The problem is how to find out that the peer have closed the connection.

In user space you get 0 when calling recv() is such case. However, in 
kernel space kernel_recvmsg seems to return 0 anytime when there are no 
more data available. It doesn't return an error either.

Should I monitor the socket state using sk_state_change callback? But 
the callback seems to work in async manner, i.e. I can accidentally use 
closed socket before I get notified about the state change.

Any ideas?
Martin

^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Joe Perches @ 2011-01-04 15:32 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Eric Dumazet, Changli Gao, Joel Sing, netdev
In-Reply-To: <1294150591.3435.0.camel@bwh-desktop>

On Tue, 2011-01-04 at 14:16 +0000, Ben Hutchings wrote:
> On Tue, 2011-01-04 at 09:38 +0100, Eric Dumazet wrote:
> > Le mardi 04 janvier 2011 à 16:07 +0800, Changli Gao a écrit :
> > > On Tue, Jan 4, 2011 at 3:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > > > Le mardi 04 janvier 2011 à 08:24 +0100, Eric Dumazet a écrit :
> > > >> Please use FIB_RES_PREFSRC(res)
> > > > Hmm no, this is not applicable, but this could be shorter :
> > > > fl.fl4_src = res.fi->fib_prefsrc ? : fl.fl4_dst;
> > > I think Joe may object the use of "? :"
> > Ternary operator is standard C idiom, used in networking stuff, for
> > example in FIB_RES_PREFSRC() ;)
> However, the option to omit the second operand is a GNU extension.

I don't object to using GNU extensions.

The ?: extension is used in most every major subsystem.

$ grep -rP --include=*.[ch] "\?\s*:" * | wc -l
515

(with some false positives)


^ permalink raw reply

* Re: [RFC PATCH 0/3] Simplified 16 bit Toeplitz hash algorithm
From: Ben Hutchings @ 2011-01-04 15:43 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Alexander Duyck, David Miller, netdev@vger.kernel.org
In-Reply-To: <AANLkTimFArsv1gF2BJabvbbQujpHdPC=bO204wkMbR_Z@mail.gmail.com>

On Mon, 2011-01-03 at 19:25 -0800, Tom Herbert wrote:
> >> The general idea is to at least keep the traffic local to one TX/RX
> >> queue pair so that if we cannot match the queue pair to the application,
> >> perhaps the application can be affinitized to match up with the queue
> >> pair.  Otherwise we end up with traffic getting routed to one TX queue
> >> on one CPU, and the RX being routed to another queue on perhaps a
> >> different CPU and it becomes quite difficult to match up the queues and
> >> the applications.
> >
> > Right.  That certainly seems like a Good Thing, though I believe it can
> > be implemented generically by recording the RX queue number on the
> > socket:
> >
> > http://article.gmane.org/gmane.linux.network/158477
> >
> I still don't see the value in doing this RX/TX queue pairing (unless
> you're considering the possibility of explicitly binding sockets to
> queue pairs).

Sure, the real value is in getting TX completions to line up with TX
initiation and queue pairing is not a reliable way to do that.

> XPS should be sufficient mechanism to get affinity on
> sending side.
[...]

At least if it's configured properly... or if this is automated using my
irq_cpu_rmap.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [RFC] net_sched: mark packet staying on queue too long
From: Stephen Hemminger @ 2011-01-04 16:05 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jesper Dangaard Brouer, hadi, Jarek Poplawski, David Miller,
	Patrick McHardy, netdev
In-Reply-To: <1294153329.3579.99.camel@edumazet-laptop>

I am playing with Quick Fair Queue (QFQ) from FreeBSD.
The authors started with Linux first and sent me their version,
it needs some work first.

http://info.iet.unipi.it/~luigi/qfq/

http://www.google.com/search?client=ubuntu&channel=fs&q=QFQ+rizzo&ie=utf-8&oe=utf-8

^ permalink raw reply

* [PATCH] r8169: support control of advertising
From: Oliver Neukum @ 2011-01-04 16:15 UTC (permalink / raw)
  To: Francois Romieu, davem, netdev, machen

>From 91e84e6b6ca416e8200f04b60383c398f9b93bd2 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 4 Jan 2011 17:11:29 +0100
Subject: [PATCH] r8169: support control of advertising

This allows "ethtool advertise" to control the speed and duplex
features the device offers the switch.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
---
 drivers/net/r8169.c |   41 +++++++++++++++++++++++++++++++----------
 1 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index e165d96..f2c8583 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -490,7 +490,7 @@ struct rtl8169_private {
 #ifdef CONFIG_R8169_VLAN
 	struct vlan_group *vlgrp;
 #endif
-	int (*set_speed)(struct net_device *, u8 autoneg, u16 speed, u8 duplex);
+	int (*set_speed)(struct net_device *, u8 autoneg, u16 speed, u8 duplex, u32 advertising);
 	int (*get_settings)(struct net_device *, struct ethtool_cmd *);
 	void (*phy_reset_enable)(void __iomem *);
 	void (*hw_start)(struct net_device *);
@@ -879,7 +879,7 @@ static int rtl8169_get_regs_len(struct net_device *dev)
 }
 
 static int rtl8169_set_speed_tbi(struct net_device *dev,
-				 u8 autoneg, u16 speed, u8 duplex)
+				 u8 autoneg, u16 speed, u8 duplex, u32 d)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -902,7 +902,7 @@ static int rtl8169_set_speed_tbi(struct net_device *dev,
 }
 
 static int rtl8169_set_speed_xmii(struct net_device *dev,
-				  u8 autoneg, u16 speed, u8 duplex)
+				  u8 autoneg, u16 speed, u8 duplex, u32 adv)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -912,8 +912,20 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
 		int auto_nego;
 
 		auto_nego = mdio_read(ioaddr, MII_ADVERTISE);
-		auto_nego |= (ADVERTISE_10HALF | ADVERTISE_10FULL |
-			      ADVERTISE_100HALF | ADVERTISE_100FULL);
+		auto_nego &= ~(ADVERTISED_10baseT_Half |
+				ADVERTISE_10FULL |
+				ADVERTISE_100HALF |
+				ADVERTISE_100FULL);
+		
+		if (adv & ADVERTISED_10baseT_Half)
+			auto_nego |= ADVERTISE_10HALF;
+		if (adv & ADVERTISED_10baseT_Full)
+			auto_nego |= ADVERTISE_10FULL;
+		if (adv & ADVERTISED_100baseT_Half)
+			auto_nego |= ADVERTISE_100HALF;
+		if (adv & ADVERTISED_100baseT_Full)
+			auto_nego |=  ADVERTISE_100FULL;
+
 		auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
 
 		giga_ctrl = mdio_read(ioaddr, MII_CTRL1000);
@@ -928,7 +940,10 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
 		    (tp->mac_version != RTL_GIGA_MAC_VER_14) &&
 		    (tp->mac_version != RTL_GIGA_MAC_VER_15) &&
 		    (tp->mac_version != RTL_GIGA_MAC_VER_16)) {
-			giga_ctrl |= ADVERTISE_1000FULL | ADVERTISE_1000HALF;
+			if (adv & ADVERTISED_1000baseT_Half)
+				giga_ctrl |= ADVERTISE_1000HALF;
+			if (adv &ADVERTISED_1000baseT_Full )
+				giga_ctrl |= ADVERTISE_1000FULL;
 		} else {
 			netif_info(tp, link, dev,
 				   "PHY does not support 1000Mbps\n");
@@ -985,12 +1000,12 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
 }
 
 static int rtl8169_set_speed(struct net_device *dev,
-			     u8 autoneg, u16 speed, u8 duplex)
+			     u8 autoneg, u16 speed, u8 duplex, u32 advertising)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	int ret;
 
-	ret = tp->set_speed(dev, autoneg, speed, duplex);
+	ret = tp->set_speed(dev, autoneg, speed, duplex, advertising);
 
 	if (netif_running(dev) && (tp->phy_1000_ctrl_reg & ADVERTISE_1000FULL))
 		mod_timer(&tp->timer, jiffies + RTL8169_PHY_TIMEOUT);
@@ -1005,7 +1020,7 @@ static int rtl8169_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	int ret;
 
 	spin_lock_irqsave(&tp->lock, flags);
-	ret = rtl8169_set_speed(dev, cmd->autoneg, cmd->speed, cmd->duplex);
+	ret = rtl8169_set_speed(dev, cmd->autoneg, cmd->speed, cmd->duplex, cmd->advertising);
 	spin_unlock_irqrestore(&tp->lock, flags);
 
 	return ret;
@@ -2842,7 +2857,13 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 	 * rtl8169_set_speed_xmii takes good care of the Fast Ethernet
 	 * only 8101. Don't panic.
 	 */
-	rtl8169_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL);
+	rtl8169_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
+			ADVERTISED_10baseT_Half |
+			ADVERTISED_10baseT_Full |
+			ADVERTISED_100baseT_Half |
+			ADVERTISED_100baseT_Full |
+			ADVERTISED_1000baseT_Half |
+			ADVERTISED_1000baseT_Full );
 
 	if (RTL_R8(PHYstatus) & TBI_Enable)
 		netif_info(tp, link, dev, "TBI auto-negotiating\n");
-- 
1.7.1


^ permalink raw reply related

* Re: [PATCH] r8169: support control of advertising
From: Ben Hutchings @ 2011-01-04 17:20 UTC (permalink / raw)
  To: Oliver Neukum; +Cc: Francois Romieu, davem, netdev, machen
In-Reply-To: <201101041715.09622.oneukum@suse.de>

On Tue, 2011-01-04 at 17:15 +0100, Oliver Neukum wrote:
> From 91e84e6b6ca416e8200f04b60383c398f9b93bd2 Mon Sep 17 00:00:00 2001
> From: Oliver Neukum <oliver@neukum.org>
> Date: Tue, 4 Jan 2011 17:11:29 +0100
> Subject: [PATCH] r8169: support control of advertising
> 
> This allows "ethtool advertise" to control the speed and duplex
> features the device offers the switch.
> 
> Signed-off-by: Oliver Neukum <oneukum@suse.de>
> ---
>  drivers/net/r8169.c |   41 +++++++++++++++++++++++++++++++----------
>  1 files changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
> index e165d96..f2c8583 100644
> --- a/drivers/net/r8169.c
> +++ b/drivers/net/r8169.c
[...]
> @@ -912,8 +912,20 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
>  		int auto_nego;
>  
>  		auto_nego = mdio_read(ioaddr, MII_ADVERTISE);
> -		auto_nego |= (ADVERTISE_10HALF | ADVERTISE_10FULL |
> -			      ADVERTISE_100HALF | ADVERTISE_100FULL);
> +		auto_nego &= ~(ADVERTISED_10baseT_Half |

Should be ADVERTISE_10HALF.

> +				ADVERTISE_10FULL |
> +				ADVERTISE_100HALF |
> +				ADVERTISE_100FULL);
> +		
> +		if (adv & ADVERTISED_10baseT_Half)
> +			auto_nego |= ADVERTISE_10HALF;
> +		if (adv & ADVERTISED_10baseT_Full)
> +			auto_nego |= ADVERTISE_10FULL;
> +		if (adv & ADVERTISED_100baseT_Half)
> +			auto_nego |= ADVERTISE_100HALF;
> +		if (adv & ADVERTISED_100baseT_Full)
> +			auto_nego |=  ADVERTISE_100FULL;
> +
>  		auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
[...]

Pause advertising should also be controllable through ethtool, if flow
control can be altered in the MAC.  (It's not clear whether it can.)

This should also check for unsupported advertising flags (e.g.
ADVERTISED_1000baseT_Full when the MAC doesn't support 1000M) and return
-EINVAL if they're set.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH net-next-2.6 1/2] can: add driver for Softing card
From: David Miller @ 2011-01-04 17:31 UTC (permalink / raw)
  To: mkl; +Cc: kurt.van.dijck, socketcan-core, netdev
In-Reply-To: <4D22FC72.7040706@pengutronix.de>

From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 04 Jan 2011 11:54:42 +0100

> There's ioread() and get_unaligned(). Do we have a combination of this?

No.  And there will never be.

Because it's never well defined whether it is safe to break up an
I/O operation into two sub-operations, nor in what order those
sub-operations can be done in.

For example, just reading one byte of a two-byte register might
clear the status bits in other byte if the two bytes are read in
the wrong order.

On some chips, avoiding losing information might be completely
impossible.

You have to do it by hand in your driver, because only you know
what implementation will work correctly.

^ permalink raw reply

* Re: [PATCH 07/15]drivers:net:wireless:iwlwifi Typo change diable to disable.
From: John W. Linville @ 2011-01-04 17:56 UTC (permalink / raw)
  To: Justin P. Mattock
  Cc: trivial, linux-m68k, linux-kernel, netdev, ivtv-devel,
	linux-media, linux-wireless, linux-scsi, spi-devel-general, devel,
	linux-usb
In-Reply-To: <1293750484-1161-7-git-send-email-justinmattock@gmail.com>

On Thu, Dec 30, 2010 at 03:07:56PM -0800, Justin P. Mattock wrote:
> The below patch fixes a typo "diable" to "disable". Please let me know if this 
> is correct or not.
> 
> Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Acked-by: John W. Linville <linville@tuxdriver.com>

-- 
John W. Linville		Someday the world will need a hero, and you
linville@tuxdriver.com			might be all we have.  Be ready.

^ permalink raw reply

* Re: [net-next-2.6 PATCH v4 2/2] net_sched: implement a root container qdisc sch_mclass
From: John Fastabend @ 2011-01-04 18:16 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: davem@davemloft.net, hadi@cyberus.ca, shemminger@vyatta.com,
	tgraf@infradead.org, eric.dumazet@gmail.com,
	bhutchings@solarflare.com, nhorman@tuxdriver.com,
	netdev@vger.kernel.org
In-Reply-To: <20110104111816.GA8085@ff.dom.local>

On 1/4/2011 3:18 AM, Jarek Poplawski wrote:
> On Mon, Jan 03, 2011 at 07:05:58PM -0800, John Fastabend wrote:
>> This implements a mqprio queueing discipline that by default creates
>> a pfifo_fast qdisc per tx queue and provides the needed configuration
>> interface.
>>
>> Using the mqprio qdisc the number of tcs currently in use along
>> with the range of queues alloted to each class can be configured. By
>> default skbs are mapped to traffic classes using the skb priority.
>> This mapping is configurable.
>>
>> Configurable parameters,
>>
>> struct tc_mclass_qopt {
> 
> Now *_mqprio here and in the subject of the message.
> 
>>         __u8    num_tc;
>>         __u8    prio_tc_map[16];
> 
> s/16/TC_SOMETHING/
> 
>>         __u8    hw;
>>         __u16   count[16];
>>         __u16   offset[16];
>> };
>>
>> Here the count/offset pairing give the queue alignment and the
>> prio_tc_map gives the mapping from skb->priority to tc.
>>
>> The hw bit determines if the hardware should configure the count
>> and offset values. If the hardware bit is set then the operation
>> will fail if the hardware does not implement the ndo_setup_tc
>> operation. This is to avoid undetermined states where the hardware
>> may or may not control the queue mapping. Also minimal bounds
>> checking is done on the count/offset to verify a queue does not
>> exceed num_tx_queues and that queue ranges do not overlap. Otherwise
>> it is left to user policy or hardware configuration to create
>> useful mappings.
>>
>> It is expected that hardware QOS schemes can be implemented by
>> creating appropriate mappings of queues in ndo_tc_setup().
>>
>> One expected use case is drivers will use the ndo_setup_tc to map
>> queue ranges onto 802.1Q traffic classes. This provides a generic
>> mechanism to map network traffic onto these traffic classes and
>> removes the need for lower layer drivers to no specifics about
>> traffic types.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---
>>
>>  include/linux/netdevice.h |    3
>>  include/linux/pkt_sched.h |    9 +
>>  net/sched/Kconfig         |   10 +
>>  net/sched/Makefile        |    1
>>  net/sched/sch_generic.c   |    4 +
>>  net/sched/sch_mqprio.c    |  357 +++++++++++++++++++++++++++++++++++++++++++++
>>  6 files changed, 384 insertions(+), 0 deletions(-)
>>  create mode 100644 net/sched/sch_mqprio.c
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 073c48d..f90a863 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -764,6 +764,8 @@ struct netdev_tc_txq {
>>   * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
>>   *                     struct nlattr *port[]);
>>   * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
>> + *
>> + * int (*ndo_setup_tc)(struct net_device *dev, int tc);
>>   */
>>  #define HAVE_NET_DEVICE_OPS
>>  struct net_device_ops {
>> @@ -822,6 +824,7 @@ struct net_device_ops {
>>                                                  struct nlattr *port[]);
>>       int                     (*ndo_get_vf_port)(struct net_device *dev,
>>                                                  int vf, struct sk_buff *skb);
>> +     int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
>>  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
>>       int                     (*ndo_fcoe_enable)(struct net_device *dev);
>>       int                     (*ndo_fcoe_disable)(struct net_device *dev);
>> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
>> index 2cfa4bc..8e29fa6 100644
>> --- a/include/linux/pkt_sched.h
>> +++ b/include/linux/pkt_sched.h
>> @@ -481,4 +481,13 @@ struct tc_drr_stats {
>>       __u32   deficit;
>>  };
>>
>> +/* MCLASS */
> 
> /* MQPRIO */
> 
>> +struct tc_mqprio_qopt {
>> +     __u8    num_tc;
>> +     __u8    prio_tc_map[16];
>> +     __u8    hw;
>> +     __u16   count[16];
>> +     __u16   offset[16];
> 
> s/16/TC_SOMETHING/

fixed.

> 
>> +};
>> +
>>  #endif
>> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>> index a36270a..e42853b 100644
>> --- a/net/sched/Kconfig
>> +++ b/net/sched/Kconfig
>> @@ -205,6 +205,16 @@ config NET_SCH_DRR
>>
>>         If unsure, say N.
>>
>> +config NET_SCH_MQPRIO
>> +     tristate "Multi-queue priority scheduler (MQPRIO)"
>> +     help
>> +       Say Y here if you want to use the Multi-queue Priority scheduler.
> 
> We might mention about a proper NIC required.
> 

Probably good to add a note here. Although you could use qdisc without NIC support.

>> +
>> +       To compile this driver as a module, choose M here: the module will
>> +       be called sch_mqprio.
>> +
>> +       If unsure, say N.
>> +
>>  config NET_SCH_INGRESS
>>       tristate "Ingress Qdisc"
>>       depends on NET_CLS_ACT
>> diff --git a/net/sched/Makefile b/net/sched/Makefile
>> index 960f5db..26ce681 100644
>> --- a/net/sched/Makefile
>> +++ b/net/sched/Makefile
>> @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)        += sch_multiq.o
>>  obj-$(CONFIG_NET_SCH_ATM)    += sch_atm.o
>>  obj-$(CONFIG_NET_SCH_NETEM)  += sch_netem.o
>>  obj-$(CONFIG_NET_SCH_DRR)    += sch_drr.o
>> +obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
>>  obj-$(CONFIG_NET_CLS_U32)    += cls_u32.o
>>  obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
>>  obj-$(CONFIG_NET_CLS_FW)     += cls_fw.o
>> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
>> index 34dc598..723b278 100644
>> --- a/net/sched/sch_generic.c
>> +++ b/net/sched/sch_generic.c
>> @@ -540,6 +540,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
>>       .dump           =       pfifo_fast_dump,
>>       .owner          =       THIS_MODULE,
>>  };
>> +EXPORT_SYMBOL(pfifo_fast_ops);
>>
>>  struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
>>                         struct Qdisc_ops *ops)
>> @@ -674,6 +675,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
>>
>>       return oqdisc;
>>  }
>> +EXPORT_SYMBOL(dev_graft_qdisc);
>>
>>  static void attach_one_default_qdisc(struct net_device *dev,
>>                                    struct netdev_queue *dev_queue,
>> @@ -761,6 +763,7 @@ void dev_activate(struct net_device *dev)
>>               dev_watchdog_up(dev);
>>       }
>>  }
>> +EXPORT_SYMBOL(dev_activate);
>>
>>  static void dev_deactivate_queue(struct net_device *dev,
>>                                struct netdev_queue *dev_queue,
>> @@ -840,6 +843,7 @@ void dev_deactivate(struct net_device *dev)
>>       list_add(&dev->unreg_list, &single);
>>       dev_deactivate_many(&single);
>>  }
>> +EXPORT_SYMBOL(dev_deactivate);
>>
>>  static void dev_init_scheduler_queue(struct net_device *dev,
>>                                    struct netdev_queue *dev_queue,
>> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
>> new file mode 100644
>> index 0000000..e9e74c7
>> --- /dev/null
>> +++ b/net/sched/sch_mqprio.c
>> @@ -0,0 +1,357 @@
>> +/*
>> + * net/sched/sch_mqprio.c
>> + *
>> + * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License
>> + * version 2 as published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/types.h>
>> +#include <linux/slab.h>
>> +#include <linux/kernel.h>
>> +#include <linux/string.h>
>> +#include <linux/errno.h>
>> +#include <linux/skbuff.h>
>> +#include <net/netlink.h>
>> +#include <net/pkt_sched.h>
>> +#include <net/sch_generic.h>
>> +
>> +struct mqprio_sched {
>> +     struct Qdisc            **qdiscs;
>> +     int hw_owned;
>> +};
>> +
>> +static void mqprio_destroy(struct Qdisc *sch)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     unsigned int ntx;
>> +
>> +     if (!priv->qdiscs)
>> +             return;
>> +
>> +     for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
>> +             qdisc_destroy(priv->qdiscs[ntx]);
>> +
>> +     if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
>> +             dev->netdev_ops->ndo_setup_tc(dev, 0);
>> +     else
>> +             netdev_set_num_tc(dev, 0);
> 
> I'm not sure why this unsetting is needed in case it was set by a
> driver before mqprio was created.
> 

ndo_setup_tc(dev,x) is called at init to be symetric I think we need to call it here as well. Otherwise we could leave num_tc set unecessarily. Maybe saving the original value and setting it back to the orignal num_tc would be more correct.

>> +
>> +     kfree(priv->qdiscs);
>> +}
>> +
>> +static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
>> +{
>> +     int i, j;
>> +
>> +     /* Verify num_tc is not out of max range */
>> +     if (qopt->num_tc > TC_MAX_QUEUE)
>> +             return -EINVAL;
>> +
>> +     for (i = 0; i < qopt->num_tc; i++) {
>> +             unsigned int last = qopt->offset[i] + qopt->count[i];
>> +             /* Verify the queue offset is in the num tx range */
>> +             if (qopt->offset[i] >= dev->num_tx_queues)
>> +                     return -EINVAL;
>> +             /* Verify the queue count is in tx range being equal to the
>> +              * num_tx_queues indicates the last queue is in use.
>> +              */
>> +             else if (last > dev->num_tx_queues)
>> +                     return -EINVAL;
>> +
>> +             /* Verify that the offset and counts do not overlap */
>> +             for (j = i + 1; j < qopt->num_tc; j++) {
>> +                     if (last > qopt->offset[j])
>> +                             return -EINVAL;
>> +             }
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     struct netdev_queue *dev_queue;
>> +     struct Qdisc *qdisc;
>> +     int i, err = -EOPNOTSUPP;
>> +     struct tc_mqprio_qopt *qopt = NULL;
>> +
>> +     /* Unwind attributes on failure */
>> +     u8 unwnd_tc = dev->num_tc;
>> +     u8 unwnd_map[TC_BITMASK+1];
> 
>         u8 unwnd_map[TC_BITMASK + 1];
> 

fixed

>> +     struct netdev_tc_txq unwnd_txq[TC_MAX_QUEUE];
>> +
>> +     if (sch->parent != TC_H_ROOT)
>> +             return -EOPNOTSUPP;
>> +
>> +     if (!netif_is_multiqueue(dev))
>> +             return -EOPNOTSUPP;
>> +
>> +     if (nla_len(opt) < sizeof(*qopt))
>> +             return -EINVAL;
>> +     qopt = nla_data(opt);
>> +
>> +     memcpy(unwnd_map, dev->prio_tc_map, sizeof(unwnd_map));
>> +     memcpy(unwnd_txq, dev->tc_to_txq, sizeof(unwnd_txq));
>> +
>> +     /* If the mqprio options indicate that hardware should own
>> +      * the queue mapping then run ndo_setup_tc if this can not
>> +      * be done fail immediately.
>> +      */
>> +     if (qopt->hw && dev->netdev_ops->ndo_setup_tc) {
>> +             priv->hw_owned = 1;
>> +             err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
>> +             if (err)
>> +                     return err;
>> +     } else if (!qopt->hw) {
>> +             if (mqprio_parse_opt(dev, qopt))
>> +                     return -EINVAL;
>> +
>> +             if (netdev_set_num_tc(dev, qopt->num_tc))
>> +                     return -EINVAL;
>> +
>> +             for (i = 0; i < qopt->num_tc; i++)
>> +                     netdev_set_tc_queue(dev, i,
>> +                                         qopt->count[i], qopt->offset[i]);
>> +     } else {
>> +             return -EINVAL;
>> +     }
>> +
>> +     /* Always use supplied priority mappings */
>> +     for (i = 0; i < TC_BITMASK+1; i++) {
> 
>         for (i = 0; i < TC_BITMASK + 1; i++) {
> 

fixed.

>> +             if (netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i])) {
>> +                     err = -EINVAL;
>> +                     goto tc_err;
>> +             }
>> +     }
>> +
>> +     /* pre-allocate qdisc, attachment can't fail */
>> +     priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
>> +                            GFP_KERNEL);
>> +     if (priv->qdiscs == NULL) {
>> +             err = -ENOMEM;
>> +             goto tc_err;
>> +     }
>> +
>> +     for (i = 0; i < dev->num_tx_queues; i++) {
>> +             dev_queue = netdev_get_tx_queue(dev, i);
>> +             qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
>> +                                       TC_H_MAKE(TC_H_MAJ(sch->handle),
>> +                                                 TC_H_MIN(i + 1)));
>> +             if (qdisc == NULL) {
>> +                     err = -ENOMEM;
>> +                     goto err;
>> +             }
>> +             qdisc->flags |= TCQ_F_CAN_BYPASS;
>> +             priv->qdiscs[i] = qdisc;
>> +     }
>> +
>> +     sch->flags |= TCQ_F_MQROOT;
>> +     return 0;
>> +
>> +err:
>> +     mqprio_destroy(sch);
>> +tc_err:
>> +     if (priv->hw_owned)
>> +             dev->netdev_ops->ndo_setup_tc(dev, unwnd_tc);
>> +     else
>> +             netdev_set_num_tc(dev, unwnd_tc);
>> +
>> +     memcpy(dev->prio_tc_map, unwnd_map, sizeof(unwnd_map));
>> +     memcpy(dev->tc_to_txq, unwnd_txq, sizeof(unwnd_txq));
>> +
>> +     return err;
>> +}
>> +
>> +static void mqprio_attach(struct Qdisc *sch)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     struct Qdisc *qdisc;
>> +     unsigned int ntx;
>> +
>> +     /* Attach underlying qdisc */
>> +     for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
>> +             qdisc = priv->qdiscs[ntx];
>> +             qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
>> +             if (qdisc)
>> +                     qdisc_destroy(qdisc);
>> +     }
>> +     kfree(priv->qdiscs);
>> +     priv->qdiscs = NULL;
>> +}
>> +
>> +static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
>> +                                          unsigned long cl)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     unsigned long ntx = cl - 1;
>> +
>> +     if (ntx >= dev->num_tx_queues)
>> +             return NULL;
>> +     return netdev_get_tx_queue(dev, ntx);
>> +}
>> +
>> +static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
>> +                 struct Qdisc **old)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +     if (dev->flags & IFF_UP)
>> +             dev_deactivate(dev);
>> +
>> +     *old = dev_graft_qdisc(dev_queue, new);
>> +
>> +     if (dev->flags & IFF_UP)
>> +             dev_activate(dev);
>> +
>> +     return 0;
>> +}
>> +
>> +static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     struct mqprio_sched *priv = qdisc_priv(sch);
>> +     unsigned char *b = skb_tail_pointer(skb);
>> +     struct tc_mqprio_qopt opt;
>> +     struct Qdisc *qdisc;
>> +     unsigned int i;
>> +
>> +     sch->q.qlen = 0;
>> +     memset(&sch->bstats, 0, sizeof(sch->bstats));
>> +     memset(&sch->qstats, 0, sizeof(sch->qstats));
>> +
>> +     for (i = 0; i < dev->num_tx_queues; i++) {
>> +             qdisc = netdev_get_tx_queue(dev, i)->qdisc;
>> +             spin_lock_bh(qdisc_lock(qdisc));
>> +             sch->q.qlen             += qdisc->q.qlen;
>> +             sch->bstats.bytes       += qdisc->bstats.bytes;
>> +             sch->bstats.packets     += qdisc->bstats.packets;
>> +             sch->qstats.qlen        += qdisc->qstats.qlen;
>> +             sch->qstats.backlog     += qdisc->qstats.backlog;
>> +             sch->qstats.drops       += qdisc->qstats.drops;
>> +             sch->qstats.requeues    += qdisc->qstats.requeues;
>> +             sch->qstats.overlimits  += qdisc->qstats.overlimits;
>> +             spin_unlock_bh(qdisc_lock(qdisc));
>> +     }
>> +
>> +     opt.num_tc = dev->num_tc;
>> +     memcpy(opt.prio_tc_map, dev->prio_tc_map, 16);
> 
> s/16/TC_SOMETHING

fixed

> 
>> +     opt.hw = priv->hw_owned;
>> +
>> +     for (i = 0; i < dev->num_tc; i++) {
>> +             opt.count[i] = dev->tc_to_txq[i].count;
>> +             opt.offset[i] = dev->tc_to_txq[i].offset;
>> +     }
>> +
>> +     NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
>> +
>> +     return skb->len;
>> +nla_put_failure:
>> +     nlmsg_trim(skb, b);
>> +     return -1;
>> +}
>> +
>> +static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
>> +{
>> +     struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +     return dev_queue->qdisc_sleeping;
>> +}
>> +
>> +static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
>> +{
>> +     unsigned int ntx = TC_H_MIN(classid);
>> +
>> +     if (!mqprio_queue_get(sch, ntx))
>> +             return 0;
>> +     return ntx;
>> +}
>> +
>> +static void mqprio_put(struct Qdisc *sch, unsigned long cl)
>> +{
>> +}
>> +
>> +static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
>> +                      struct sk_buff *skb, struct tcmsg *tcm)
>> +{
>> +     struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +     tcm->tcm_parent = TC_H_ROOT;
>> +     tcm->tcm_handle |= TC_H_MIN(cl);
>> +     tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
>> +     return 0;
>> +}
>> +
>> +static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
>> +                            struct gnet_dump *d)
>> +{
>> +     struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
>> +
>> +     sch = dev_queue->qdisc_sleeping;
>> +     sch->qstats.qlen = sch->q.qlen;
>> +     if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
>> +         gnet_stats_copy_queue(d, &sch->qstats) < 0)
>> +             return -1;
>> +     return 0;
>> +}
>> +
>> +static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
>> +{
>> +     struct net_device *dev = qdisc_dev(sch);
>> +     unsigned long ntx;
>> +
>> +     if (arg->stop)
>> +             return;
>> +
>> +     arg->count = arg->skip;
>> +     for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
> 
> Did you give up those stats per tc class? You only show the leaf
> classes here, but you could first loop per num_tc (as virtual
> parent classes). So in dump_class_stats you should be able to
> distinguish class 'level' by cl and do the second loop if necessary.
> To show the class hierarchy you change tcm_parent in dump_class
> for 'leaf' classes (like eg in sch_htb/htb_dump_class).
> 

I did give up... but got it working with your hint in v5 thanks.

John.

^ permalink raw reply

* Re: [RFC] net_sched: mark packet staying on queue too long
From: Eric Dumazet @ 2011-01-04 18:20 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Stephen Hemminger, hadi, Jarek Poplawski, David Miller,
	Patrick McHardy, netdev
In-Reply-To: <1294153329.3579.99.camel@edumazet-laptop>

Le mardi 04 janvier 2011 à 16:02 +0100, Eric Dumazet a écrit :

> I'd like to try kind of a SFQRED implementation, ie :
> 
> classify flows, then instead of using plain pfifo queues (currently done
> in SFQ), use N pseudo RED queues.
> 
> RED is a bit complex because it tries to make the probability estimation
> given queue backlog average. It has to use expensive time services (on
> some machines at least, if TSC not available)
> 
> My idea was to take into account the delay packets stay in its queue, so
> that no extra state is needed : Only take a timestamp when packet is
> enqueued, compute delta when dequeued, get 
> 
> Px = delta * Prob_per_time_unit;
> and drop/mark packet with Px probability.
> 
> Ram usage of SFQRED would be the same than SFQ, and cost roughly the
> same (because we could use jiffies based time sampling, (and HZ=1000 for
> a ms unit)).
> 
> 

Here is the POC patch I am currently testing, with a probability to
"early drop" a packet of one percent per ms (HZ=1000 here), only if
packet stayed at least 4 ms on queue.

Of course, this only apply where SFQ is used, with known SFQ limits :)

The term "early drop" is a lie. RED really early mark/drop a packet at
enqueue() time, while I do it at dequeue() time [since I need to compute
the delay]. But effect is the same on sent packets. This might use a bit
more memory, but no more than current SFQ [and only if flows dont react
to mark/drops]

insmod net/sched/sch_sfq.ko red_delay=4

By the way, I do think we should lower SFQ_DEPTH a bit and increase
SFQ_SLOTS by same amount. Allowing 127 packets per flow seems not
necessary in most situations SFQ might be used.

 net/sched/sch_sfq.c |   37 +++++++++++++++++++++++++++++++++----
 1 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d54ac94..4f958e3 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -24,6 +24,8 @@
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <linux/moduleparam.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -86,6 +88,10 @@
 /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
 
+static int red_delay; /* default : no RED handling */
+module_param(red_delay, int, 0);
+MODULE_PARM_DESC(red_delay, "mark/drop packets if they stay in queue longer than red_delay ticks");
+
 /*
  * We dont use pointers to save space.
  * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
@@ -391,6 +397,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
+	qdisc_skb_cb(skb)->timestamp = jiffies;
 	sfq_inc(q, x);
 	if (slot->qlen == 1) {		/* The flow is new */
 		if (q->tail == NULL) {	/* It is the first flow */
@@ -402,11 +409,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->tail = slot;
 		slot->allot = q->scaled_quantum;
 	}
-	if (++sch->q.qlen <= q->limit) {
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+	if (++sch->q.qlen <= q->limit)
 		return NET_XMIT_SUCCESS;
-	}
 
 	sfq_drop(sch);
 	return NET_XMIT_CN;
@@ -432,6 +436,7 @@ sfq_dequeue(struct Qdisc *sch)
 	sfq_index a, next_a;
 	struct sfq_slot *slot;
 
+restart:
 	/* No active slots */
 	if (q->tail == NULL)
 		return NULL;
@@ -455,12 +460,36 @@ next_slot:
 		next_a = slot->next;
 		if (a == next_a) {
 			q->tail = NULL; /* no more active slots */
+			/* last packet queued, dont even try to apply RED */
 			return skb;
 		}
 		q->tail->next = next_a;
 	} else {
 		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
 	}
+	if (red_delay) {
+		long delay = jiffies - qdisc_skb_cb(skb)->timestamp;
+
+		if (delay >= red_delay) {
+			long Px = delay * (0xFFFFFF / 100); /* 1 percent per jiffy */
+			if ((net_random() & 0xFFFFFF) < Px) {
+				if (INET_ECN_set_ce(skb)) {
+					/* no ecnmark counter yet :) */
+					sch->qstats.overlimits++;
+				} else {
+					/* penalize this flow : we drop the 
+					 * packet while we changed slot->allot
+					 */
+					kfree_skb(skb);
+					/* no early_drop counter yet :) */
+					sch->qstats.drops++;
+					goto restart;
+				}
+			}
+		}
+	}
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	sch->bstats.packets++;
 	return skb;
 }
 



^ permalink raw reply related

* Re: [RFC] net_sched: mark packet staying on queue too long
From: Eric Dumazet @ 2011-01-04 18:29 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Stephen Hemminger, hadi, Jarek Poplawski, David Miller,
	Patrick McHardy, netdev
In-Reply-To: <1294165215.3579.133.camel@edumazet-laptop>

Le mardi 04 janvier 2011 à 19:20 +0100, Eric Dumazet a écrit :

> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
> index d54ac94..4f958e3 100644
> --- a/net/sched/sch_sfq.c
> +++ b/net/sched/sch_sfq.c
> @@ -24,6 +24,8 @@
>  #include <net/ip.h>
>  #include <net/netlink.h>
>  #include <net/pkt_sched.h>
> +#include <net/inet_ecn.h>
> +#include <linux/moduleparam.h>
>  
> 
>  /*	Stochastic Fairness Queuing algorithm.
> @@ -86,6 +88,10 @@
>  /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
>  typedef unsigned char sfq_index;
>  
> +static int red_delay; /* default : no RED handling */
> +module_param(red_delay, int, 0);
> +MODULE_PARM_DESC(red_delay, "mark/drop packets if they stay in queue longer than red_delay ticks");
> +
>  /*
>   * We dont use pointers to save space.
>   * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
> @@ -391,6 +397,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  
>  	sch->qstats.backlog += qdisc_pkt_len(skb);
>  	slot_queue_add(slot, skb);
> +	qdisc_skb_cb(skb)->timestamp = jiffies;
>  	sfq_inc(q, x);
>  	if (slot->qlen == 1) {		/* The flow is new */
>  		if (q->tail == NULL) {	/* It is the first flow */
> @@ -402,11 +409,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
>  		q->tail = slot;
>  		slot->allot = q->scaled_quantum;
>  	}
> -	if (++sch->q.qlen <= q->limit) {
> -		sch->bstats.bytes += qdisc_pkt_len(skb);
> -		sch->bstats.packets++;
> +	if (++sch->q.qlen <= q->limit)
>  		return NET_XMIT_SUCCESS;
> -	}
>  
>  	sfq_drop(sch);
>  	return NET_XMIT_CN;
> @@ -432,6 +436,7 @@ sfq_dequeue(struct Qdisc *sch)
>  	sfq_index a, next_a;
>  	struct sfq_slot *slot;
>  
> +restart:
>  	/* No active slots */
>  	if (q->tail == NULL)
>  		return NULL;
> @@ -455,12 +460,36 @@ next_slot:
>  		next_a = slot->next;
>  		if (a == next_a) {
>  			q->tail = NULL; /* no more active slots */
> +			/* last packet queued, dont even try to apply RED */

And of course I should do "goto end;" to properly do accounting,
or maybe do the RED thing after all ;)

>  			return skb;
>  		}
>  		q->tail->next = next_a;
>  	} else {
>  		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
>  	}
> +	if (red_delay) {
> +		long delay = jiffies - qdisc_skb_cb(skb)->timestamp;
> +
> +		if (delay >= red_delay) {
> +			long Px = delay * (0xFFFFFF / 100); /* 1 percent per jiffy */
> +			if ((net_random() & 0xFFFFFF) < Px) {
> +				if (INET_ECN_set_ce(skb)) {
> +					/* no ecnmark counter yet :) */
> +					sch->qstats.overlimits++;
> +				} else {
> +					/* penalize this flow : we drop the 
> +					 * packet while we changed slot->allot
> +					 */
> +					kfree_skb(skb);
> +					/* no early_drop counter yet :) */
> +					sch->qstats.drops++;
> +					goto restart;
> +				}
> +			}
> +		}
> +	}

end:

> +	sch->bstats.bytes += qdisc_pkt_len(skb);
> +	sch->bstats.packets++;
>  	return skb;
>  }
>  
> 



^ permalink raw reply

* Re: [RFC PATCH 04/12] net: introduce NETIF_F_RXCSUM
From: Michał Mirosław @ 2011-01-04 18:33 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <20101219005715.GC12005@rere.qmqm.pl>

On Sun, Dec 19, 2010 at 01:57:15AM +0100, Michał Mirosław wrote:
> On Thu, Dec 16, 2010 at 11:27:29PM +0000, Ben Hutchings wrote:
> > On Wed, 2010-12-15 at 23:24 +0100, Michał Mirosław wrote:
> > > Introduce NETIF_F_RXCSUM to replace device-private flags for RX checksum
> > > offload. Integrate it with ndo_fix_features.
> > > 
> > > ethtool_op_get_rx_csum() is removed altogether as nothing in-tree uses it.
> > Not explicitly, but what about drivers that turn TX and RX checksumming
> > on and off together?  They are implicitly covered by the case which
> > you're removing:
> > [...]
> > > -	case ETHTOOL_GRXCSUM:
> > > -		rc = ethtool_get_value(dev, useraddr, ethcmd,
> > > -				       (dev->ethtool_ops->get_rx_csum ?
> > > -					dev->ethtool_ops->get_rx_csum :
> > > -					ethtool_op_get_rx_csum));
> > [...]
> I had an impression that drivers not defining get_rx_csum() had no
> RX checksumming or didn't allow it to be toggled. I'll have a closer look
> on this.

All drivers that have set_rx_csum() also define matching get_rx_csum().

Anyway, the code this is removing is broken in that it assumes TX csum
capability implying RX csum capability.

As a side note, GRO receive functions (TCPv4 and v6 - no other protocols
use it) are checking skb->ip_summed != CHECKSUM_NONE before allowing the
packet to be passed on, so it's not even necessary to force disabling GRO
when RX csum is off.

Best Regards,
Michał Mirosław

^ permalink raw reply

* [net-next-2.6 PATCH v5 1/2] net: implement mechanism for HW based QOS
From: John Fastabend @ 2011-01-04 18:56 UTC (permalink / raw)
  To: davem, jarkao2
  Cc: john.r.fastabend, hadi, shemminger, tgraf, eric.dumazet,
	bhutchings, nhorman, netdev

This patch provides a mechanism for lower layer devices to
steer traffic using skb->priority to tx queues. This allows
for hardware based QOS schemes to use the default qdisc without
incurring the penalties related to global state and the qdisc
lock. While reliably receiving skbs on the correct tx ring
to avoid head of line blocking resulting from shuffling in
the LLD. Finally, all the goodness from txq caching and xps/rps
can still be leveraged.

Many drivers and hardware exist with the ability to implement
QOS schemes in the hardware but currently these drivers tend
to rely on firmware to reroute specific traffic, a driver
specific select_queue or the queue_mapping action in the
qdisc.

By using select_queue for this drivers need to be updated for
each and every traffic type and we lose the goodness of much
of the upstream work. Firmware solutions are inherently
inflexible. And finally if admins are expected to build a
qdisc and filter rules to steer traffic this requires knowledge
of how the hardware is currently configured. The number of tx
queues and the queue offsets may change depending on resources.
Also this approach incurs all the overhead of a qdisc with filters.

With the mechanism in this patch users can set skb priority using
expected methods ie setsockopt() or the stack can set the priority
directly. Then the skb will be steered to the correct tx queues
aligned with hardware QOS traffic classes. In the normal case with
a single traffic class and all queues in this class everything
works as is until the LLD enables multiple tcs.

To steer the skb we mask out the lower 4 bits of the priority
and allow the hardware to configure upto 15 distinct classes
of traffic. This is expected to be sufficient for most applications
at any rate it is more then the 8021Q spec designates and is
equal to the number of prio bands currently implemented in
the default qdisc.

This in conjunction with a userspace application such as
lldpad can be used to implement 8021Q transmission selection
algorithms one of these algorithms being the extended transmission
selection algorithm currently being used for DCB.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/netdevice.h |   62 +++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c            |   10 +++++++
 2 files changed, 71 insertions(+), 1 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0f6b1c9..ae51323 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -646,6 +646,14 @@ struct xps_dev_maps {
     (nr_cpu_ids * sizeof(struct xps_map *)))
 #endif /* CONFIG_XPS */

+#define TC_MAX_QUEUE	16
+#define TC_BITMASK	15
+/* HW offloaded queuing disciplines txq count and offset maps */
+struct netdev_tc_txq {
+	u16 count;
+	u16 offset;
+};
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1146,6 +1154,9 @@ struct net_device {
 	/* Data Center Bridging netlink ops */
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
+	u8 num_tc;
+	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+	u8 prio_tc_map[TC_BITMASK+1];

 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	/* max exchange id for FCoE LRO by ddp */
@@ -1162,6 +1173,57 @@ struct net_device {
 #define	NETDEV_ALIGN		32

 static inline
+int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
+{
+	return dev->prio_tc_map[prio & TC_BITMASK];
+}
+
+static inline
+int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
+{
+	if (tc >= dev->num_tc)
+		return -EINVAL;
+
+	dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
+	return 0;
+}
+
+static inline
+void netdev_reset_tc(struct net_device *dev)
+{
+	dev->num_tc = 0;
+	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+
+static inline
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+	if (tc >= dev->num_tc)
+		return -EINVAL;
+
+	dev->tc_to_txq[tc].count = count;
+	dev->tc_to_txq[tc].offset = offset;
+	return 0;
+}
+
+static inline
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+	if (num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	dev->num_tc = num_tc;
+	return 0;
+}
+
+static inline
+u8 netdev_get_num_tc(const struct net_device *dev)
+{
+	return dev->num_tc;
+}
+
+static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 					 unsigned int index)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index a215269..38318e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2165,6 +2165,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues)
 {
 	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;

 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
@@ -2173,13 +2175,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		return hash;
 	}

+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);

-	return (u16) (((u64) hash * num_tx_queues) >> 32);
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
 }
 EXPORT_SYMBOL(__skb_tx_hash);

^ permalink raw reply related

* [net-next-2.6 PATCH v5 2/2] net_sched: implement a root container qdisc sch_mqprio
From: John Fastabend @ 2011-01-04 18:56 UTC (permalink / raw)
  To: davem, jarkao2
  Cc: john.r.fastabend, hadi, shemminger, tgraf, eric.dumazet,
	bhutchings, nhorman, netdev
In-Reply-To: <20110104185600.13692.47967.stgit@jf-dev1-dcblab>

This implements a mqprio queueing discipline that by default creates
a pfifo_fast qdisc per tx queue and provides the needed configuration
interface.

Using the mqprio qdisc the number of tcs currently in use along
with the range of queues alloted to each class can be configured. By
default skbs are mapped to traffic classes using the skb priority.
This mapping is configurable.

Configurable parameters,

struct tc_mqprio_qopt {
        __u8    num_tc;
        __u8    prio_tc_map[TC_BITMASK + 1];
        __u8    hw;
        __u16   count[TC_MAX_QUEUE];
        __u16   offset[TC_MAX_QUEUE];
};

Here the count/offset pairing give the queue alignment and the
prio_tc_map gives the mapping from skb->priority to tc.

The hw bit determines if the hardware should configure the count
and offset values. If the hardware bit is set then the operation
will fail if the hardware does not implement the ndo_setup_tc
operation. This is to avoid undetermined states where the hardware
may or may not control the queue mapping. Also minimal bounds
checking is done on the count/offset to verify a queue does not
exceed num_tx_queues and that queue ranges do not overlap. Otherwise
it is left to user policy or hardware configuration to create
useful mappings.

It is expected that hardware QOS schemes can be implemented by
creating appropriate mappings of queues in ndo_tc_setup().

One expected use case is drivers will use the ndo_setup_tc to map
queue ranges onto 802.1Q traffic classes. This provides a generic
mechanism to map network traffic onto these traffic classes and
removes the need for lower layer drivers to know specifics about
traffic types.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/netdevice.h |    3 
 include/linux/pkt_sched.h |   10 +
 net/sched/Kconfig         |   12 +
 net/sched/Makefile        |    1 
 net/sched/sch_generic.c   |    4 
 net/sched/sch_mqprio.c    |  413 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 443 insertions(+), 0 deletions(-)
 create mode 100644 net/sched/sch_mqprio.c

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae51323..19a855b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -764,6 +764,8 @@ struct netdev_tc_txq {
  * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
  *			  struct nlattr *port[]);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ *
+ * int (*ndo_setup_tc)(struct net_device *dev, int tc);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -822,6 +824,7 @@ struct net_device_ops {
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 2cfa4bc..1c5310a 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -2,6 +2,7 @@
 #define __LINUX_PKT_SCHED_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 /* Logical priority bands not depending on specific packet scheduler.
    Every scheduler will map them to real traffic classes, if it has
@@ -481,4 +482,13 @@ struct tc_drr_stats {
 	__u32	deficit;
 };
 
+/* MQPRIO */
+struct tc_mqprio_qopt {
+	__u8	num_tc;
+	__u8	prio_tc_map[TC_BITMASK + 1];
+	__u8	hw;
+	__u16	count[TC_MAX_QUEUE];
+	__u16	offset[TC_MAX_QUEUE];
+};
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a36270a..f52f5eb 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -205,6 +205,18 @@ config NET_SCH_DRR
 
 	  If unsure, say N.
 
+config NET_SCH_MQPRIO
+	tristate "Multi-queue priority scheduler (MQPRIO)"
+	help
+	  Say Y here if you want to use the Multi-queue Priority scheduler.
+	  This scheduler allows QOS to be offloaded on NICs that have support
+	  for offloading QOS schedulers.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_mqprio.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5db..26ce681 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598..723b278 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -540,6 +540,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.dump		=	pfifo_fast_dump,
 	.owner		=	THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_fast_ops);
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  struct Qdisc_ops *ops)
@@ -674,6 +675,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 
 	return oqdisc;
 }
+EXPORT_SYMBOL(dev_graft_qdisc);
 
 static void attach_one_default_qdisc(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
@@ -761,6 +763,7 @@ void dev_activate(struct net_device *dev)
 		dev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(dev_activate);
 
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
@@ -840,6 +843,7 @@ void dev_deactivate(struct net_device *dev)
 	list_add(&dev->unreg_list, &single);
 	dev_deactivate_many(&single);
 }
+EXPORT_SYMBOL(dev_deactivate);
 
 static void dev_init_scheduler_queue(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 0000000..b16dc2c
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,413 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+	struct Qdisc		**qdiscs;
+	int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned int ntx;
+
+	if (!priv->qdiscs)
+		return;
+
+	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+		qdisc_destroy(priv->qdiscs[ntx]);
+
+	if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+		dev->netdev_ops->ndo_setup_tc(dev, 0);
+	else
+		netdev_set_num_tc(dev, 0);
+
+	kfree(priv->qdiscs);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+	int i, j;
+
+	/* Verify num_tc is not out of max range */
+	if (qopt->num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	for (i = 0; i < qopt->num_tc; i++) {
+		unsigned int last = qopt->offset[i] + qopt->count[i];
+		/* Verify the queue offset is in the num tx range */
+		if (qopt->offset[i] >= dev->num_tx_queues)
+			return -EINVAL;
+		/* Verify the queue count is in tx range being equal to the
+		 * num_tx_queues indicates the last queue is in use.
+		 */
+		else if (last > dev->num_tx_queues)
+			return -EINVAL;
+
+		/* Verify that the offset and counts do not overlap */
+		for (j = i + 1; j < qopt->num_tc; j++) {
+			if (last > qopt->offset[j])
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct netdev_queue *dev_queue;
+	struct Qdisc *qdisc;
+	int i, err = -EOPNOTSUPP;
+	struct tc_mqprio_qopt *qopt = NULL;
+
+	/* Unwind attributes on failure */
+	u8 unwnd_tc = dev->num_tc;
+	u8 unwnd_map[TC_BITMASK + 1];
+	struct netdev_tc_txq unwnd_txq[TC_MAX_QUEUE];
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = nla_data(opt);
+
+	memcpy(unwnd_map, dev->prio_tc_map, sizeof(unwnd_map));
+	memcpy(unwnd_txq, dev->tc_to_txq, sizeof(unwnd_txq));
+
+	/* If the mqprio options indicate that hardware should own
+	 * the queue mapping then run ndo_setup_tc if this can not
+	 * be done fail immediately.
+	 */
+	if (qopt->hw && dev->netdev_ops->ndo_setup_tc) {
+		priv->hw_owned = 1;
+		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+		if (err)
+			return err;
+	} else if (!qopt->hw) {
+		if (mqprio_parse_opt(dev, qopt))
+			return -EINVAL;
+
+		if (netdev_set_num_tc(dev, qopt->num_tc))
+			return -EINVAL;
+
+		for (i = 0; i < qopt->num_tc; i++)
+			netdev_set_tc_queue(dev, i,
+					    qopt->count[i], qopt->offset[i]);
+	} else {
+		return -EINVAL;
+	}
+
+	/* Always use supplied priority mappings */
+	for (i = 0; i < TC_BITMASK + 1; i++) {
+		if (netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i])) {
+			err = -EINVAL;
+			goto tc_err;
+		}
+	}
+
+	/* pre-allocate qdisc, attachment can't fail */
+	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+			       GFP_KERNEL);
+	if (priv->qdiscs == NULL) {
+		err = -ENOMEM;
+		goto tc_err;
+	}
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		dev_queue = netdev_get_tx_queue(dev, i);
+		qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(i + 1)));
+		if (qdisc == NULL) {
+			err = -ENOMEM;
+			goto err;
+		}
+		qdisc->flags |= TCQ_F_CAN_BYPASS;
+		priv->qdiscs[i] = qdisc;
+	}
+
+	sch->flags |= TCQ_F_MQROOT;
+	return 0;
+
+err:
+	mqprio_destroy(sch);
+tc_err:
+	if (priv->hw_owned)
+		dev->netdev_ops->ndo_setup_tc(dev, unwnd_tc);
+	else
+		netdev_set_num_tc(dev, unwnd_tc);
+
+	memcpy(dev->prio_tc_map, unwnd_map, sizeof(unwnd_map));
+	memcpy(dev->tc_to_txq, unwnd_txq, sizeof(unwnd_txq));
+
+	return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	/* Attach underlying qdisc */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = priv->qdiscs[ntx];
+		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		if (qdisc)
+			qdisc_destroy(qdisc);
+	}
+	kfree(priv->qdiscs);
+	priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+					     unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+		    struct Qdisc **old)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = dev_graft_qdisc(dev_queue, new);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_mqprio_qopt opt;
+	struct Qdisc *qdisc;
+	unsigned int i;
+
+	sch->q.qlen = 0;
+	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		spin_lock_bh(qdisc_lock(qdisc));
+		sch->q.qlen		+= qdisc->q.qlen;
+		sch->bstats.bytes	+= qdisc->bstats.bytes;
+		sch->bstats.packets	+= qdisc->bstats.packets;
+		sch->qstats.qlen	+= qdisc->qstats.qlen;
+		sch->qstats.backlog	+= qdisc->qstats.backlog;
+		sch->qstats.drops	+= qdisc->qstats.drops;
+		sch->qstats.requeues	+= qdisc->qstats.requeues;
+		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+		spin_unlock_bh(qdisc_lock(qdisc));
+	}
+
+	opt.num_tc = dev->num_tc;
+	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+	opt.hw = priv->hw_owned;
+
+	for (i = 0; i < dev->num_tc; i++) {
+		opt.count[i] = dev->tc_to_txq[i].count;
+		opt.offset[i] = dev->tc_to_txq[i].offset;
+	}
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (!mqprio_queue_get(sch, ntx))
+		return 0;
+	return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			 struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (cl <= dev->num_tc) {
+		tcm->tcm_parent = TC_H_ROOT;
+		tcm->tcm_info = 0;
+	} else {
+		int i;
+		struct netdev_queue *dev_queue;
+		dev_queue = mqprio_queue_get(sch, cl);
+
+		tcm->tcm_parent = 0;
+		for (i = 0; i < netdev_get_num_tc(dev); i++) {
+			struct netdev_tc_txq tc = dev->tc_to_txq[i];
+			int q_idx = cl - dev->num_tc;
+			if (q_idx >= tc.offset &&
+			    q_idx < tc.offset + tc.count) {
+				tcm->tcm_parent =
+					TC_H_MAKE(TC_H_MAJ(sch->handle),
+						  TC_H_MIN(i + 1));
+				break;
+			}
+		}
+		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	}
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+			       struct gnet_dump *d)
+{
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (cl <= netdev_get_num_tc(dev)) {
+		int i;
+		struct Qdisc *qdisc;
+		struct gnet_stats_queue qstats = {0};
+		struct gnet_stats_basic_packed bstats = {0};
+		struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+
+		/* Drop lock here it will be reclaimed before touching
+		 * statistics this is required because the d->lock we
+		 * hold here is the look on dev_queue->qdisc_sleeping
+		 * also acquired below.
+		 */
+		spin_unlock_bh(d->lock);
+
+		for (i = tc.offset; i < tc.offset + tc.count; i++) {
+			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			spin_lock_bh(qdisc_lock(qdisc));
+			bstats.bytes      += qdisc->bstats.bytes;
+			bstats.packets    += qdisc->bstats.packets;
+			qstats.qlen       += qdisc->qstats.qlen;
+			qstats.backlog    += qdisc->qstats.backlog;
+			qstats.drops      += qdisc->qstats.drops;
+			qstats.requeues   += qdisc->qstats.requeues;
+			qstats.overlimits += qdisc->qstats.overlimits;
+			spin_unlock_bh(qdisc_lock(qdisc));
+		}
+		/* Reclaim root sleeping lock before completing stats */
+		spin_lock_bh(d->lock);
+		if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+		    gnet_stats_copy_queue(d, &qstats) < 0)
+			return -1;
+	} else {
+		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+		sch = dev_queue->qdisc_sleeping;
+		sch->qstats.qlen = sch->q.qlen;
+		if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+		    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx;
+	u8 num_tc = netdev_get_num_tc(dev);
+
+	if (arg->stop)
+		return;
+
+	/* Walk hierarchy with a virtual class per tc */
+	arg->count = arg->skip;
+	for (ntx = arg->skip; ntx < dev->num_tx_queues + num_tc; ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+	.graft		= mqprio_graft,
+	.leaf		= mqprio_leaf,
+	.get		= mqprio_get,
+	.put		= mqprio_put,
+	.walk		= mqprio_walk,
+	.dump		= mqprio_dump_class,
+	.dump_stats	= mqprio_dump_class_stats,
+};
+
+struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+	.cl_ops		= &mqprio_class_ops,
+	.id		= "mqprio",
+	.priv_size	= sizeof(struct mqprio_sched),
+	.init		= mqprio_init,
+	.destroy	= mqprio_destroy,
+	.attach		= mqprio_attach,
+	.dump		= mqprio_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+	return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+	unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");


^ permalink raw reply related

* [PATCH v2] net: typos in comments in include/linux/igmp.h
From: Francois-Xavier Le Bail @ 2011-01-04 19:10 UTC (permalink / raw)
  To: netdev

There are typos in comments in include/linux/igmp.h:

83 #define IGMP_HOST_MEMBERSHIP_QUERY      0x11    /* From RFC1112 */
84 #define IGMP_HOST_MEMBERSHIP_REPORT     0x12    /* Ditto */
[snip]
88 #define IGMPV2_HOST_MEMBERSHIP_REPORT   0x16    /* V2 version of 0x11 */
89 #define IGMP_HOST_LEAVE_MESSAGE         0x17
90 #define IGMPV3_HOST_MEMBERSHIP_REPORT   0x22    /* V3 version of 0x11 */

The line 88 and 90 are about REPORT messages.
The IGMP_HOST_MEMBERSHIP_REPORT (IGMP V1) value is 0x12.
So the comment on line 88 must be /* V2 version of 0x12 */,
and the comment on line 90 must be /* V3 version of 0x12 */.

Signed-off-by: Francois-Xavier Le Bail <fx.lebail@orange.fr>

---

diff -ru a/include/linux/igmp.h b/include/linux/igmp.h
--- a/include/linux/igmp.h	2010-08-27 01:47:12.000000000 +0200
+++ b/include/linux/igmp.h	2010-12-15 09:50:47.808363144 +0100
@@ -85,9 +85,9 @@
 #define IGMP_DVMRP			0x13	/* DVMRP routing */
 #define IGMP_PIM			0x14	/* PIM routing */
 #define IGMP_TRACE			0x15
-#define IGMPV2_HOST_MEMBERSHIP_REPORT	0x16	/* V2 version of 0x11 */
+#define IGMPV2_HOST_MEMBERSHIP_REPORT	0x16	/* V2 version of 0x12 */
 #define IGMP_HOST_LEAVE_MESSAGE 	0x17
-#define IGMPV3_HOST_MEMBERSHIP_REPORT	0x22	/* V3 version of 0x11 */
+#define IGMPV3_HOST_MEMBERSHIP_REPORT	0x22	/* V3 version of 0x12 */
 
 #define IGMP_MTRACE_RESP		0x1e
 #define IGMP_MTRACE			0x1f

^ permalink raw reply

* Re: [PATCH v2] net: typos in comments in include/linux/igmp.h
From: David Miller @ 2011-01-04 19:30 UTC (permalink / raw)
  To: fx.lebail; +Cc: netdev
In-Reply-To: <4D23709C.1040408@orange.fr>

From: Francois-Xavier Le Bail <fx.lebail@orange.fr>
Date: Tue, 04 Jan 2011 20:10:20 +0100

> There are typos in comments in include/linux/igmp.h:
> 
> 83 #define IGMP_HOST_MEMBERSHIP_QUERY      0x11    /* From RFC1112 */
> 84 #define IGMP_HOST_MEMBERSHIP_REPORT     0x12    /* Ditto */
> [snip]
> 88 #define IGMPV2_HOST_MEMBERSHIP_REPORT   0x16    /* V2 version of 0x11 */
> 89 #define IGMP_HOST_LEAVE_MESSAGE         0x17
> 90 #define IGMPV3_HOST_MEMBERSHIP_REPORT   0x22    /* V3 version of 0x11 */
> 
> The line 88 and 90 are about REPORT messages.
> The IGMP_HOST_MEMBERSHIP_REPORT (IGMP V1) value is 0x12.
> So the comment on line 88 must be /* V2 version of 0x12 */,
> and the comment on line 90 must be /* V3 version of 0x12 */.
> 
> Signed-off-by: Francois-Xavier Le Bail <fx.lebail@orange.fr>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: David Miller @ 2011-01-04 19:35 UTC (permalink / raw)
  To: jsing; +Cc: netdev
In-Reply-To: <1294122260-13245-1-git-send-email-jsing@google.com>

From: Joel Sing <jsing@google.com>
Date: Tue,  4 Jan 2011 17:24:20 +1100

> The preferred source address is currently ignored for local routes,
> which results in all local connections having a src address that is the
> same as the local dst address. Fix this by respecting the preferred source
> address when it is provided for local routes.
> 
> This bug can be demonstrated as follows:
> 
>  # ifconfig dummy0 192.168.0.1
>  # ip route show table local | grep local.*dummy0
>  local 192.168.0.1 dev dummy0  proto kernel  scope host  src 192.168.0.1
>  # ip route change table local local 192.168.0.1 dev dummy0 \
>      proto kernel scope host src 127.0.0.1
>  # ip route show table local | grep local.*dummy0
>  local 192.168.0.1 dev dummy0  proto kernel  scope host  src 127.0.0.1
> 
> We now establish a local connection and verify the source IP
> address selection:
> 
>  # nc -l 192.168.0.1 3128 &
>  # nc 192.168.0.1 3128 &
>  # netstat -ant | grep 192.168.0.1:3128.*EST
>  tcp        0      0 192.168.0.1:3128        192.168.0.1:33228 ESTABLISHED
>  tcp        0      0 192.168.0.1:33228       192.168.0.1:3128  ESTABLISHED
> 
> Signed-off-by: Joel Sing <jsing@google.com>

Applied to net-2.6, thanks Joel.

If you guys want to mess with ternary operators and new macros,
please do that in net-next-2.6 the next time I merge or similar.

Thanks.

^ permalink raw reply

* [PATCH 0/2] IRQ affinity reverse-mapping
From: Ben Hutchings @ 2011-01-04 19:37 UTC (permalink / raw)
  To: Thomas Gleixner, David Miller
  Cc: Tom Herbert, linux-kernel, netdev, linux-net-drivers

This patch series is intended to support queue selection on multiqueue
IRQ-per-queue network devices (accelerated RFS and XPS-MQ) and
potentially queue selection for other classes of multiqueue device.

The first patch implements IRQ affinity notifiers, based on the outline
that Thomas wrote in response to my earlier patch series for accelerated RFS.

The second patch is a generalisation of the CPU affinity reverse-
mapping, plus functions to maintain such a mapping based on the new IRQ
affinity notifiers.

I would like to be able to use this functionality in networking for
2.6.38.  Thomas, if you are happy with this, could these changes go
through net-next-2.6?  Alternately, if Linus pulls from linux-2.6-tip
and David pulls from Linus during the merge window, I can (re-)submit
the dependent changes after that.

Ben.

Ben Hutchings (2):
  genirq: Add IRQ affinity notifiers
  lib: cpu_rmap: CPU affinity reverse-mapping

 include/linux/cpu_rmap.h  |   73 +++++++++++++
 include/linux/interrupt.h |   41 +++++++
 include/linux/irqdesc.h   |    3 +
 kernel/irq/manage.c       |   81 ++++++++++++++
 lib/Kconfig               |    4 +
 lib/Makefile              |    2 +
 lib/cpu_rmap.c            |  262 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 466 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/cpu_rmap.h
 create mode 100644 lib/cpu_rmap.c

-- 
1.7.3.4

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [net-next-2.6 #2 00/08] r8169 driver updates
From: David Miller @ 2011-01-04 19:38 UTC (permalink / raw)
  To: romieu; +Cc: netdev, hayeswang, benh
In-Reply-To: <20110104010716.GA5957@electric-eye.fr.zoreil.com>

From: Francois Romieu <romieu@fr.zoreil.com>
Date: Tue, 4 Jan 2011 02:07:16 +0100

> The following series adds on top of the previous one :
> - missing TxPacketMax and FW_LOADER (Ben Hutchings)
> - wrong OCPDR_GPHY_REG offset and mask.
>   Hayes, are you ok with this one ?
> 
> The RTL_GIGA_MAC_VER_27 changes suggested by Hayes at the end of the
> previous series are not included yet (feedback anyone ?).
> 
> The previous series included :
> - Hayes'firmware removal. The firmware is already included in linux-firmware.
> - my sauce to diminish the gap between the in-kernel r8169 driver
>   and Realtek's one(s).
> 
> The series is available as:
> git://git.kernel.org/pub/scm/linux/kernel/git/romieu/netdev-2.6.git r8169-davem

I applied all of these patches, thanks Francois.

Because I am an idiot, I didn't notice you had a GIT tree with this
stuff prepared for me until after I pushed the patches out to
kernel.org :-/

Don't let this discourage you, using a GIT tree helps me a lot and I
just didn't expect it from you for whatever reason.  :-) So please
keep doing this and I will pull directly from your GIT tree next time.

^ permalink raw reply

* [PATCH 1/2] genirq: Add IRQ affinity notifiers
From: Ben Hutchings @ 2011-01-04 19:38 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: David Miller, Tom Herbert, linux-kernel, netdev,
	linux-net-drivers
In-Reply-To: <1294169842.3636.31.camel@bwh-desktop>

When initiating I/O on a multiqueue and multi-IRQ device, we may want
to select a queue for which the response will be handled on the same
or a nearby CPU.  This requires a reverse-map of IRQ affinity.  Add a
notification mechanism to support this.

This is based closely on work by Thomas Gleixner <tglx@linutronix.de>.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/interrupt.h |   41 +++++++++++++++++++++++
 include/linux/irqdesc.h   |    3 ++
 kernel/irq/manage.c       |   81 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 0 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d42..09d6039 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,8 @@
 #include <linux/smp.h>
 #include <linux/percpu.h>
 #include <linux/hrtimer.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
 
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
@@ -231,6 +233,28 @@ static inline void resume_device_irqs(void) { };
 static inline int check_wakeup_irqs(void) { return 0; }
 #endif
 
+/**
+ * struct irq_affinity_notify - context for notification of IRQ affinity changes
+ * @irq:		Interrupt to which notification applies
+ * @kref:		Reference count, for internal use
+ * @work:		Work item, for internal use
+ * @notify:		Function to be called on change.  This will be
+ *			called in process context.
+ * @release:		Function to be called on release.  This will be
+ *			called in process context.  Once registered, the
+ *			structure must only be freed when this function is
+ *			called or later.
+ */
+struct irq_affinity_notify {
+        unsigned int irq;
+        struct kref kref;
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+        struct work_struct work;
+#endif
+        void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
+        void (*release)(struct kref *ref);
+};
+
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
 extern cpumask_var_t irq_default_affinity;
@@ -240,6 +264,13 @@ extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
 extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
+extern int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
+
+static inline void irq_run_affinity_notifiers(void)
+{
+	flush_scheduled_work();
+}
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -259,6 +290,16 @@ static inline int irq_set_affinity_hint(unsigned int irq,
 {
 	return -EINVAL;
 }
+
+static inline int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+{
+	return 0;
+}
+
+static inline void irq_run_affinity_notifiers(void)
+{
+}
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 979c68c..5e0d2e4 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -8,6 +8,7 @@
  * For now it's included from <linux/irq.h>
  */
 
+struct irq_affinity_notify;
 struct proc_dir_entry;
 struct timer_rand_state;
 /**
@@ -24,6 +25,7 @@ struct timer_rand_state;
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
+ * @affinity_notify:	context for notification of affinity changes
  * @pending_mask:	pending rebalanced interrupts
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
@@ -70,6 +72,7 @@ struct irq_desc {
 	raw_spinlock_t		lock;
 #ifdef CONFIG_SMP
 	const struct cpumask	*affinity_hint;
+	struct irq_affinity_notify *affinity_notify;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 91a5fa2..fb6525a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -134,6 +134,10 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 		irq_set_thread_affinity(desc);
 	}
 #endif
+	if (desc->affinity_notify) {
+		kref_get(&desc->affinity_notify->kref);
+		schedule_work(&desc->affinity_notify->work);
+	}
 	desc->status |= IRQ_AFFINITY_SET;
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return 0;
@@ -155,6 +159,79 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
 }
 EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 
+static void irq_affinity_notify(struct work_struct *work)
+{
+	struct irq_affinity_notify *notify =
+		container_of(work, struct irq_affinity_notify, work);
+	struct irq_desc *desc = irq_to_desc(notify->irq);
+	cpumask_var_t cpumask;
+	unsigned long flags;
+
+	if (!desc)
+		goto out;
+
+	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
+		goto out;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	if (desc->status & IRQ_MOVE_PENDING)
+		cpumask_copy(cpumask, desc->pending_mask);
+	else
+#endif
+		cpumask_copy(cpumask, desc->affinity);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	notify->notify(notify, cpumask);
+
+	free_cpumask_var(cpumask);
+out:
+	kref_put(&notify->kref, notify->release);
+}
+
+/**
+ *	irq_set_affinity_notifier - control notification of IRQ affinity changes
+ *	@irq:		Interrupt for which to enable/disable notification
+ *	@notify:	Context for notification, or %NULL to disable
+ *			notification.  Function pointers must be initialised;
+ *			the other fields will be initialised by this function.
+ *
+ *	Must be called in process context.  Notification may only be enabled
+ *	after the IRQ is allocated but before it is bound with request_irq()
+ *	and must be disabled before the IRQ is freed using free_irq().
+ */
+int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_affinity_notify *old_notify;
+	unsigned long flags;
+
+	/* The release function is promised process context */
+	might_sleep();
+
+	if (!desc)
+		return -EINVAL;
+
+	/* Complete initialisation of *notify */
+	if (notify) {
+		notify->irq = irq;
+		kref_init(&notify->kref);
+		INIT_WORK(&notify->work, irq_affinity_notify);
+	}
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	old_notify = desc->affinity_notify;
+	desc->affinity_notify = notify;
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	if (old_notify)
+		kref_put(&old_notify->kref, old_notify->release);
+	
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
+
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
  * Generic version of the affinity autoselector.
@@ -1004,6 +1081,10 @@ void free_irq(unsigned int irq, void *dev_id)
 	if (!desc)
 		return;
 
+#ifdef CONFIG_SMP
+	BUG_ON(desc->affinity_notify);
+#endif
+
 	chip_bus_lock(desc);
 	kfree(__free_irq(irq, dev_id));
 	chip_bus_sync_unlock(desc);
-- 
1.7.3.4


-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply related

* [PATCH 2/2] lib: cpu_rmap: CPU affinity reverse-mapping
From: Ben Hutchings @ 2011-01-04 19:39 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: David Miller, Tom Herbert, linux-kernel, netdev,
	linux-net-drivers
In-Reply-To: <1294169842.3636.31.camel@bwh-desktop>

When initiating I/O on a multiqueue and multi-IRQ device, we may want
to select a queue for which the response will be handled on the same
or a nearby CPU.  This requires a reverse-map of IRQ affinity.  Add
library functions to support a generic reverse-mapping from CPUs to
objects with affinity and the specific case where the objects are
IRQs.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 include/linux/cpu_rmap.h |   73 +++++++++++++
 lib/Kconfig              |    4 +
 lib/Makefile             |    2 +
 lib/cpu_rmap.c           |  262 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 341 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/cpu_rmap.h
 create mode 100644 lib/cpu_rmap.c

diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
new file mode 100644
index 0000000..6e2f5ff
--- /dev/null
+++ b/include/linux/cpu_rmap.h
@@ -0,0 +1,73 @@
+/*
+ * cpu_rmap.c: CPU affinity reverse-map support
+ * Copyright 2010 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+
+/**
+ * struct cpu_rmap - CPU affinity reverse-map
+ * @near: For each CPU, the index and distance to the nearest object,
+ *      based on affinity masks
+ * @size: Number of objects to be reverse-mapped
+ * @used: Number of objects added
+ * @obj: Array of object pointers
+ */
+struct cpu_rmap {
+	struct {
+		u16     index;
+		u16     dist;
+	} near[NR_CPUS];
+	u16		size, used;
+	void		*obj[0];
+};
+#define CPU_RMAP_DIST_INF 0xffff
+
+extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
+
+/**
+ * free_cpu_rmap - free CPU affinity reverse-map
+ * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
+ */
+static inline void free_cpu_rmap(struct cpu_rmap *rmap)
+{
+	kfree(rmap);
+}
+
+extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
+extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
+			   const struct cpumask *affinity);
+
+static inline u16 cpu_rmap_lookup_index(struct cpu_rmap *rmap, unsigned int cpu)
+{
+	return rmap->near[cpu].index;
+}
+
+static inline void *cpu_rmap_lookup_obj(struct cpu_rmap *rmap, unsigned int cpu)
+{
+	return rmap->obj[rmap->near[cpu].index];
+}
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+/**
+ * alloc_irq_cpu_rmap - allocate CPU affinity reverse-map for IRQs
+ * @size: Number of objects to be mapped
+ *
+ * Must be called in process context.
+ */
+static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size)
+{
+	return alloc_cpu_rmap(size, GFP_KERNEL);
+}
+extern void free_irq_cpu_rmap(struct cpu_rmap *rmap);
+
+extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 3d498b2..f43cb2e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -195,6 +195,10 @@ config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
        bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
        depends on EXPERIMENTAL && BROKEN
 
+config CPU_RMAP
+	bool
+	depends on SMP
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index 0248767..001b528 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -110,6 +110,8 @@ obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
 
 obj-$(CONFIG_AVERAGE) += average.o
 
+obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
new file mode 100644
index 0000000..8f7f6c9
--- /dev/null
+++ b/lib/cpu_rmap.c
@@ -0,0 +1,262 @@
+/*
+ * cpu_rmap.c: CPU affinity reverse-map support
+ * Copyright 2010 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/cpu_rmap.h>
+#ifdef CONFIG_GENERIC_HARDIRQS
+#include <linux/interrupt.h>
+#endif
+#include <linux/module.h>
+
+/*
+ * These functions maintain a mapping from CPUs to some ordered set of
+ * objects with CPU affinities.  This can be seen as a reverse-map of
+ * CPU affinity.  However, we do not assume that the object affinities
+ * cover all CPUs in the system.  For those CPUs not directly covered
+ * by object affinities, we attempt to find a nearest object based on
+ * CPU topology.
+ */
+
+/**
+ * alloc_cpu_rmap - allocate CPU affinity reverse-map
+ * @size: Number of objects to be mapped
+ * @flags: Allocation flags e.g. %GFP_KERNEL
+ */
+struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
+{
+	struct cpu_rmap *rmap;
+	unsigned int cpu;
+
+	/* This is a silly number of objects, and we use u16 indices. */
+	if (size > 0xffff)
+		return NULL;
+
+	rmap = kzalloc(sizeof(*rmap) + size * sizeof(rmap->obj[0]), flags);
+	if (!rmap)
+		return NULL;
+
+	/* Initially assign CPUs to objects on a rota, since we have
+	 * no idea where the objects are.  Use infinite distance, so
+	 * any object with known distance is preferable.  Include the
+	 * CPUs that are not present/online, since we definitely want
+	 * any newly-hotplugged CPUs to have some object assigned.
+	 */
+	for_each_possible_cpu(cpu) {
+		rmap->near[cpu].index = cpu % size;
+		rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+	}
+
+	rmap->size = size;
+	return rmap;
+}
+EXPORT_SYMBOL(alloc_cpu_rmap);
+
+/* Reevaluate nearest object for given CPU, comparing with the given
+ * neighbours at the given distance.
+ */
+static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
+				const struct cpumask *mask, u16 dist)
+{
+	int neigh;
+
+	for_each_cpu(neigh, mask) {
+		if (rmap->near[cpu].dist > dist &&
+		    rmap->near[neigh].dist <= dist) {
+			rmap->near[cpu].index = rmap->near[neigh].index;
+			rmap->near[cpu].dist = dist;
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef DEBUG
+static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+	unsigned index;
+	unsigned int cpu;
+
+	pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
+
+	for_each_possible_cpu(cpu) {
+		index = rmap->near[cpu].index;
+		pr_info("cpu %d -> obj %u (distance %u)\n",
+			cpu, index, rmap->near[cpu].dist);
+	}
+}
+#else
+static inline void
+debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
+{
+}
+#endif
+
+/**
+ * cpu_rmap_add - add object to a rmap
+ * @rmap: CPU rmap allocated with alloc_cpu_rmap()
+ * @obj: Object to add to rmap
+ *
+ * Return index of object.
+ */
+int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
+{
+	u16 index;
+
+	BUG_ON(rmap->used >= rmap->size);
+	index = rmap->used++;
+	rmap->obj[index] = obj;
+	return index;
+}
+EXPORT_SYMBOL(cpu_rmap_add);
+
+/**
+ * cpu_rmap_update - update CPU rmap following a change of object affinity
+ * @rmap: CPU rmap to update
+ * @index: Index of object whose affinity changed
+ * @affinity: New CPU affinity of object
+ */
+int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
+		    const struct cpumask *affinity)
+{
+	cpumask_var_t update_mask;
+	unsigned int cpu;
+
+	if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
+		return -ENOMEM;
+
+	/* Invalidate distance for all CPUs for which this used to be
+	 * the nearest object.  Mark those CPUs for update.
+	 */
+	for_each_online_cpu(cpu) {
+		if (rmap->near[cpu].index == index) {
+			rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
+			cpumask_set_cpu(cpu, update_mask);
+		}
+	}
+
+	debug_print_rmap(rmap, "after invalidating old distances");
+
+	/* Set distance to 0 for all CPUs in the new affinity mask.
+	 * Mark all CPUs within their NUMA nodes for update.
+	 */
+	for_each_cpu(cpu, affinity) {
+		rmap->near[cpu].index = index;
+		rmap->near[cpu].dist = 0;
+		cpumask_or(update_mask, update_mask,
+			   cpumask_of_node(cpu_to_node(cpu)));
+	}
+
+	debug_print_rmap(rmap, "after updating neighbours");
+
+	/* Update distances based on topology */
+	for_each_cpu(cpu, update_mask) {
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					topology_thread_cpumask(cpu), 1))
+			continue;
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					topology_core_cpumask(cpu), 2))
+			continue;
+		if (cpu_rmap_copy_neigh(rmap, cpu,
+					cpumask_of_node(cpu_to_node(cpu)), 3))
+			continue;
+		/* We could continue into NUMA node distances, but for now
+		 * we give up.
+		 */
+	}
+
+	debug_print_rmap(rmap, "after copying neighbours");
+
+	free_cpumask_var(update_mask);
+	return 0;
+}
+EXPORT_SYMBOL(cpu_rmap_update);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+
+/* Glue between IRQ affinity notifiers and CPU rmaps */
+
+struct irq_glue {
+	struct irq_affinity_notify notify;
+	struct cpu_rmap *rmap;
+	u16 index;
+};
+
+/**
+ * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
+ * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
+ *
+ * Must be called in process context, before freeing the IRQs, and
+ * without holding any locks required by global workqueue items.
+ */
+void free_irq_cpu_rmap(struct cpu_rmap *rmap)
+{
+	struct irq_glue *glue;
+	u16 index;
+
+	if (!rmap)
+		return;
+
+	for (index = 0; index < rmap->used; index++) {
+		glue = rmap->obj[index];
+		irq_set_affinity_notifier(glue->notify.irq, NULL);
+	}
+	irq_run_affinity_notifiers();
+
+	kfree(rmap);
+}
+EXPORT_SYMBOL(free_irq_cpu_rmap);
+
+static void
+irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
+{
+	struct irq_glue *glue =
+		container_of(notify, struct irq_glue, notify);
+	int rc;
+
+	rc = cpu_rmap_update(glue->rmap, glue->index, mask);
+	if (rc)
+		pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
+}
+
+static void irq_cpu_rmap_release(struct kref *ref)
+{
+	struct irq_glue *glue =
+		container_of(ref, struct irq_glue, notify.kref);
+	kfree(glue);
+}
+
+/**
+ * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
+ * @rmap: The reverse-map
+ * @irq: The IRQ number
+ *
+ * This adds an IRQ affinity notifier that will update the reverse-map
+ * automatically.
+ *
+ * Must be called in process context, after the IRQ is allocated but
+ * before it is bound with request_irq().
+ */
+int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
+{
+	struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
+	int rc;
+
+	if (!glue)
+		return -ENOMEM;
+	glue->notify.notify = irq_cpu_rmap_notify;
+	glue->notify.release = irq_cpu_rmap_release;
+	glue->rmap = rmap;
+	glue->index = cpu_rmap_add(rmap, glue);
+	rc = irq_set_affinity_notifier(irq, &glue->notify);
+	if (rc)
+		kfree(glue);
+	return rc;
+}
+EXPORT_SYMBOL(irq_cpu_rmap_add);
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
-- 
1.7.3.4


-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox