Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 net-next 4/7] mac802154: page and channel setter
From: Alexander Smirnov @ 2012-06-26  9:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340702694-24706-1-git-send-email-alex.bluesman.smirnov@gmail.com>

A new method to set page and channel values for a transceiver
was added to the MIB.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/mac802154.h |    1 +
 net/mac802154/mib.c       |   44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 9951072..6967864 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -112,5 +112,6 @@ void mac802154_dev_set_short_addr(struct net_device *dev, u16 val);
 void mac802154_dev_set_ieee_addr(struct net_device *dev);
 u16 mac802154_dev_get_pan_id(const struct net_device *dev);
 void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
+void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
 
 #endif /* MAC802154_H */
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index d74503b..380829d 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -28,6 +28,11 @@
 
 #include "mac802154.h"
 
+struct phy_chan_notify_work {
+	struct work_struct work;
+	struct net_device *dev;
+};
+
 struct hw_addr_filt_notify_work {
 	struct work_struct work;
 	struct net_device *dev;
@@ -139,3 +144,42 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
 		set_hw_addr_filt(dev, IEEE802515_AFILT_PANID_CHANGED);
 	}
 }
+
+static void phy_chan_notify(struct work_struct *work)
+{
+	struct phy_chan_notify_work *nw = container_of(work,
+					  struct phy_chan_notify_work, work);
+	struct mac802154_priv *hw = mac802154_slave_get_priv(nw->dev);
+	struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
+	int res;
+
+	res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
+	if (res)
+		pr_debug("set_channel failed\n");
+
+	kfree(nw);
+}
+
+void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	struct phy_chan_notify_work *work;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	spin_lock_bh(&priv->mib_lock);
+	priv->page = page;
+	priv->chan = chan;
+	spin_unlock_bh(&priv->mib_lock);
+
+	if (priv->hw->phy->current_channel != priv->chan ||
+	    priv->hw->phy->current_page != priv->page) {
+		work = kzalloc(sizeof(*work), GFP_ATOMIC);
+		if (!work)
+			return;
+
+		INIT_WORK(&work->work, phy_chan_notify);
+		work->dev = dev;
+		queue_work(priv->hw->dev_workqueue, &work->work);
+	}
+}
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH v2 net-next 6/7] drivers/ieee802154: add support for the at86rf230/231 transceivers
From: Alexander Smirnov @ 2012-06-26  9:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340702694-24706-1-git-send-email-alex.bluesman.smirnov@gmail.com>

The AT86RF231 is a feature rich, low-power 2.4 GHz radio transceiver
designed for industrial and consumer ZigBee/IEEE 802.15.4, 6LoWPAN,
RF4CE and high data rate 2.4 GHz ISM band applications.

This patch adds support for the Atmel RF230/231 radio transceivers.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 drivers/ieee802154/Kconfig     |    6 +
 drivers/ieee802154/Makefile    |    1 +
 drivers/ieee802154/at86rf230.c |  965 ++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/at86rf230.h  |   31 ++
 4 files changed, 1003 insertions(+), 0 deletions(-)
 create mode 100644 drivers/ieee802154/at86rf230.c
 create mode 100644 include/linux/spi/at86rf230.h

diff --git a/drivers/ieee802154/Kconfig b/drivers/ieee802154/Kconfig
index 15c0640..1fc4eef 100644
--- a/drivers/ieee802154/Kconfig
+++ b/drivers/ieee802154/Kconfig
@@ -19,6 +19,7 @@ config IEEE802154_FAKEHARD
 
           This driver can also be built as a module. To do so say M here.
 	  The module will be called 'fakehard'.
+
 config IEEE802154_FAKELB
 	depends on IEEE802154_DRIVERS && MAC802154
 	tristate "IEEE 802.15.4 loopback driver"
@@ -28,3 +29,8 @@ config IEEE802154_FAKELB
 
 	  This driver can also be built as a module. To do so say M here.
 	  The module will be called 'fakelb'.
+
+config IEEE802154_AT86RF230
+        depends on IEEE802154_DRIVERS && MAC802154
+        tristate "AT86RF230/231 transceiver driver"
+        depends on SPI
diff --git a/drivers/ieee802154/Makefile b/drivers/ieee802154/Makefile
index ea784ea..4f4371d 100644
--- a/drivers/ieee802154/Makefile
+++ b/drivers/ieee802154/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_IEEE802154_FAKEHARD) += fakehard.o
 obj-$(CONFIG_IEEE802154_FAKELB) += fakelb.o
+obj-$(CONFIG_IEEE802154_AT86RF230) += at86rf230.o
diff --git a/drivers/ieee802154/at86rf230.c b/drivers/ieee802154/at86rf230.c
new file mode 100644
index 0000000..4d033d4
--- /dev/null
+++ b/drivers/ieee802154/at86rf230.c
@@ -0,0 +1,965 @@
+/*
+ * AT86RF230/RF231 driver
+ *
+ * Copyright (C) 2009-2012 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/at86rf230.h>
+#include <linux/skbuff.h>
+
+#include <net/mac802154.h>
+#include <net/wpan-phy.h>
+
+struct at86rf230_local {
+	struct spi_device *spi;
+	int rstn, slp_tr, dig2;
+
+	u8 part;
+	u8 vers;
+
+	u8 buf[2];
+	struct mutex bmux;
+
+	struct work_struct irqwork;
+	struct completion tx_complete;
+
+	struct ieee802154_dev *dev;
+
+	spinlock_t lock;
+	bool irq_disabled;
+	bool is_tx;
+};
+
+#define	RG_TRX_STATUS	(0x01)
+#define	SR_TRX_STATUS		0x01, 0x1f, 0
+#define	SR_RESERVED_01_3	0x01, 0x20, 5
+#define	SR_CCA_STATUS		0x01, 0x40, 6
+#define	SR_CCA_DONE		0x01, 0x80, 7
+#define	RG_TRX_STATE	(0x02)
+#define	SR_TRX_CMD		0x02, 0x1f, 0
+#define	SR_TRAC_STATUS		0x02, 0xe0, 5
+#define	RG_TRX_CTRL_0	(0x03)
+#define	SR_CLKM_CTRL		0x03, 0x07, 0
+#define	SR_CLKM_SHA_SEL		0x03, 0x08, 3
+#define	SR_PAD_IO_CLKM		0x03, 0x30, 4
+#define	SR_PAD_IO		0x03, 0xc0, 6
+#define	RG_TRX_CTRL_1	(0x04)
+#define	SR_IRQ_POLARITY		0x04, 0x01, 0
+#define	SR_IRQ_MASK_MODE	0x04, 0x02, 1
+#define	SR_SPI_CMD_MODE		0x04, 0x0c, 2
+#define	SR_RX_BL_CTRL		0x04, 0x10, 4
+#define	SR_TX_AUTO_CRC_ON	0x04, 0x20, 5
+#define	SR_IRQ_2_EXT_EN		0x04, 0x40, 6
+#define	SR_PA_EXT_EN		0x04, 0x80, 7
+#define	RG_PHY_TX_PWR	(0x05)
+#define	SR_TX_PWR		0x05, 0x0f, 0
+#define	SR_PA_LT		0x05, 0x30, 4
+#define	SR_PA_BUF_LT		0x05, 0xc0, 6
+#define	RG_PHY_RSSI	(0x06)
+#define	SR_RSSI			0x06, 0x1f, 0
+#define	SR_RND_VALUE		0x06, 0x60, 5
+#define	SR_RX_CRC_VALID		0x06, 0x80, 7
+#define	RG_PHY_ED_LEVEL	(0x07)
+#define	SR_ED_LEVEL		0x07, 0xff, 0
+#define	RG_PHY_CC_CCA	(0x08)
+#define	SR_CHANNEL		0x08, 0x1f, 0
+#define	SR_CCA_MODE		0x08, 0x60, 5
+#define	SR_CCA_REQUEST		0x08, 0x80, 7
+#define	RG_CCA_THRES	(0x09)
+#define	SR_CCA_ED_THRES		0x09, 0x0f, 0
+#define	SR_RESERVED_09_1	0x09, 0xf0, 4
+#define	RG_RX_CTRL	(0x0a)
+#define	SR_PDT_THRES		0x0a, 0x0f, 0
+#define	SR_RESERVED_0a_1	0x0a, 0xf0, 4
+#define	RG_SFD_VALUE	(0x0b)
+#define	SR_SFD_VALUE		0x0b, 0xff, 0
+#define	RG_TRX_CTRL_2	(0x0c)
+#define	SR_OQPSK_DATA_RATE	0x0c, 0x03, 0
+#define	SR_RESERVED_0c_2	0x0c, 0x7c, 2
+#define	SR_RX_SAFE_MODE		0x0c, 0x80, 7
+#define	RG_ANT_DIV	(0x0d)
+#define	SR_ANT_CTRL		0x0d, 0x03, 0
+#define	SR_ANT_EXT_SW_EN	0x0d, 0x04, 2
+#define	SR_ANT_DIV_EN		0x0d, 0x08, 3
+#define	SR_RESERVED_0d_2	0x0d, 0x70, 4
+#define	SR_ANT_SEL		0x0d, 0x80, 7
+#define	RG_IRQ_MASK	(0x0e)
+#define	SR_IRQ_MASK		0x0e, 0xff, 0
+#define	RG_IRQ_STATUS	(0x0f)
+#define	SR_IRQ_0_PLL_LOCK	0x0f, 0x01, 0
+#define	SR_IRQ_1_PLL_UNLOCK	0x0f, 0x02, 1
+#define	SR_IRQ_2_RX_START	0x0f, 0x04, 2
+#define	SR_IRQ_3_TRX_END	0x0f, 0x08, 3
+#define	SR_IRQ_4_CCA_ED_DONE	0x0f, 0x10, 4
+#define	SR_IRQ_5_AMI		0x0f, 0x20, 5
+#define	SR_IRQ_6_TRX_UR		0x0f, 0x40, 6
+#define	SR_IRQ_7_BAT_LOW	0x0f, 0x80, 7
+#define	RG_VREG_CTRL	(0x10)
+#define	SR_RESERVED_10_6	0x10, 0x03, 0
+#define	SR_DVDD_OK		0x10, 0x04, 2
+#define	SR_DVREG_EXT		0x10, 0x08, 3
+#define	SR_RESERVED_10_3	0x10, 0x30, 4
+#define	SR_AVDD_OK		0x10, 0x40, 6
+#define	SR_AVREG_EXT		0x10, 0x80, 7
+#define	RG_BATMON	(0x11)
+#define	SR_BATMON_VTH		0x11, 0x0f, 0
+#define	SR_BATMON_HR		0x11, 0x10, 4
+#define	SR_BATMON_OK		0x11, 0x20, 5
+#define	SR_RESERVED_11_1	0x11, 0xc0, 6
+#define	RG_XOSC_CTRL	(0x12)
+#define	SR_XTAL_TRIM		0x12, 0x0f, 0
+#define	SR_XTAL_MODE		0x12, 0xf0, 4
+#define	RG_RX_SYN	(0x15)
+#define	SR_RX_PDT_LEVEL		0x15, 0x0f, 0
+#define	SR_RESERVED_15_2	0x15, 0x70, 4
+#define	SR_RX_PDT_DIS		0x15, 0x80, 7
+#define	RG_XAH_CTRL_1	(0x17)
+#define	SR_RESERVED_17_8	0x17, 0x01, 0
+#define	SR_AACK_PROM_MODE	0x17, 0x02, 1
+#define	SR_AACK_ACK_TIME	0x17, 0x04, 2
+#define	SR_RESERVED_17_5	0x17, 0x08, 3
+#define	SR_AACK_UPLD_RES_FT	0x17, 0x10, 4
+#define	SR_AACK_FLTR_RES_FT	0x17, 0x20, 5
+#define	SR_RESERVED_17_2	0x17, 0x40, 6
+#define	SR_RESERVED_17_1	0x17, 0x80, 7
+#define	RG_FTN_CTRL	(0x18)
+#define	SR_RESERVED_18_2	0x18, 0x7f, 0
+#define	SR_FTN_START		0x18, 0x80, 7
+#define	RG_PLL_CF	(0x1a)
+#define	SR_RESERVED_1a_2	0x1a, 0x7f, 0
+#define	SR_PLL_CF_START		0x1a, 0x80, 7
+#define	RG_PLL_DCU	(0x1b)
+#define	SR_RESERVED_1b_3	0x1b, 0x3f, 0
+#define	SR_RESERVED_1b_2	0x1b, 0x40, 6
+#define	SR_PLL_DCU_START	0x1b, 0x80, 7
+#define	RG_PART_NUM	(0x1c)
+#define	SR_PART_NUM		0x1c, 0xff, 0
+#define	RG_VERSION_NUM	(0x1d)
+#define	SR_VERSION_NUM		0x1d, 0xff, 0
+#define	RG_MAN_ID_0	(0x1e)
+#define	SR_MAN_ID_0		0x1e, 0xff, 0
+#define	RG_MAN_ID_1	(0x1f)
+#define	SR_MAN_ID_1		0x1f, 0xff, 0
+#define	RG_SHORT_ADDR_0	(0x20)
+#define	SR_SHORT_ADDR_0		0x20, 0xff, 0
+#define	RG_SHORT_ADDR_1	(0x21)
+#define	SR_SHORT_ADDR_1		0x21, 0xff, 0
+#define	RG_PAN_ID_0	(0x22)
+#define	SR_PAN_ID_0		0x22, 0xff, 0
+#define	RG_PAN_ID_1	(0x23)
+#define	SR_PAN_ID_1		0x23, 0xff, 0
+#define	RG_IEEE_ADDR_0	(0x24)
+#define	SR_IEEE_ADDR_0		0x24, 0xff, 0
+#define	RG_IEEE_ADDR_1	(0x25)
+#define	SR_IEEE_ADDR_1		0x25, 0xff, 0
+#define	RG_IEEE_ADDR_2	(0x26)
+#define	SR_IEEE_ADDR_2		0x26, 0xff, 0
+#define	RG_IEEE_ADDR_3	(0x27)
+#define	SR_IEEE_ADDR_3		0x27, 0xff, 0
+#define	RG_IEEE_ADDR_4	(0x28)
+#define	SR_IEEE_ADDR_4		0x28, 0xff, 0
+#define	RG_IEEE_ADDR_5	(0x29)
+#define	SR_IEEE_ADDR_5		0x29, 0xff, 0
+#define	RG_IEEE_ADDR_6	(0x2a)
+#define	SR_IEEE_ADDR_6		0x2a, 0xff, 0
+#define	RG_IEEE_ADDR_7	(0x2b)
+#define	SR_IEEE_ADDR_7		0x2b, 0xff, 0
+#define	RG_XAH_CTRL_0	(0x2c)
+#define	SR_SLOTTED_OPERATION	0x2c, 0x01, 0
+#define	SR_MAX_CSMA_RETRIES	0x2c, 0x0e, 1
+#define	SR_MAX_FRAME_RETRIES	0x2c, 0xf0, 4
+#define	RG_CSMA_SEED_0	(0x2d)
+#define	SR_CSMA_SEED_0		0x2d, 0xff, 0
+#define	RG_CSMA_SEED_1	(0x2e)
+#define	SR_CSMA_SEED_1		0x2e, 0x07, 0
+#define	SR_AACK_I_AM_COORD	0x2e, 0x08, 3
+#define	SR_AACK_DIS_ACK		0x2e, 0x10, 4
+#define	SR_AACK_SET_PD		0x2e, 0x20, 5
+#define	SR_AACK_FVN_MODE	0x2e, 0xc0, 6
+#define	RG_CSMA_BE	(0x2f)
+#define	SR_MIN_BE		0x2f, 0x0f, 0
+#define	SR_MAX_BE		0x2f, 0xf0, 4
+
+#define CMD_REG		0x80
+#define CMD_REG_MASK	0x3f
+#define CMD_WRITE	0x40
+#define CMD_FB		0x20
+
+#define IRQ_BAT_LOW	(1 << 7)
+#define IRQ_TRX_UR	(1 << 6)
+#define IRQ_AMI		(1 << 5)
+#define IRQ_CCA_ED	(1 << 4)
+#define IRQ_TRX_END	(1 << 3)
+#define IRQ_RX_START	(1 << 2)
+#define IRQ_PLL_UNL	(1 << 1)
+#define IRQ_PLL_LOCK	(1 << 0)
+
+#define STATE_P_ON		0x00	/* BUSY */
+#define STATE_BUSY_RX		0x01
+#define STATE_BUSY_TX		0x02
+#define STATE_FORCE_TRX_OFF	0x03
+#define STATE_FORCE_TX_ON	0x04	/* IDLE */
+/* 0x05 */				/* INVALID_PARAMETER */
+#define STATE_RX_ON		0x06
+/* 0x07 */				/* SUCCESS */
+#define STATE_TRX_OFF		0x08
+#define STATE_TX_ON		0x09
+/* 0x0a - 0x0e */			/* 0x0a - UNSUPPORTED_ATTRIBUTE */
+#define STATE_SLEEP		0x0F
+#define STATE_BUSY_RX_AACK	0x11
+#define STATE_BUSY_TX_ARET	0x12
+#define STATE_BUSY_RX_AACK_ON	0x16
+#define STATE_BUSY_TX_ARET_ON	0x19
+#define STATE_RX_ON_NOCLK	0x1C
+#define STATE_RX_AACK_ON_NOCLK	0x1D
+#define STATE_BUSY_RX_AACK_NOCLK 0x1E
+#define STATE_TRANSITION_IN_PROGRESS 0x1F
+
+static int
+__at86rf230_write(struct at86rf230_local *lp, u8 addr, u8 data)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len	= 2,
+		.tx_buf	= buf,
+	};
+
+	buf[0] = (addr & CMD_REG_MASK) | CMD_REG | CMD_WRITE;
+	buf[1] = data;
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	return status;
+}
+
+static int
+__at86rf230_read_subreg(struct at86rf230_local *lp,
+			u8 addr, u8 mask, int shift, u8 *data)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len	= 2,
+		.tx_buf	= buf,
+		.rx_buf	= buf,
+	};
+
+	buf[0] = (addr & CMD_REG_MASK) | CMD_REG;
+	buf[1] = 0xff;
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	if (status == 0)
+		*data = buf[1];
+
+	return status;
+}
+
+static int
+at86rf230_read_subreg(struct at86rf230_local *lp,
+		      u8 addr, u8 mask, int shift, u8 *data)
+{
+	int status;
+
+	mutex_lock(&lp->bmux);
+	status = __at86rf230_read_subreg(lp, addr, mask, shift, data);
+	mutex_unlock(&lp->bmux);
+
+	return status;
+}
+
+static int
+at86rf230_write_subreg(struct at86rf230_local *lp,
+		       u8 addr, u8 mask, int shift, u8 data)
+{
+	int status;
+	u8 val;
+
+	mutex_lock(&lp->bmux);
+	status = __at86rf230_read_subreg(lp, addr, 0xff, 0, &val);
+	if (status)
+		goto out;
+
+	val &= ~mask;
+	val |= (data << shift) & mask;
+
+	status = __at86rf230_write(lp, addr, val);
+out:
+	mutex_unlock(&lp->bmux);
+
+	return status;
+}
+
+static int
+at86rf230_write_fbuf(struct at86rf230_local *lp, u8 *data, u8 len)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer_head = {
+		.len		= 2,
+		.tx_buf		= buf,
+
+	};
+	struct spi_transfer xfer_buf = {
+		.len		= len,
+		.tx_buf		= data,
+	};
+
+	mutex_lock(&lp->bmux);
+	buf[0] = CMD_WRITE | CMD_FB;
+	buf[1] = len + 2; /* 2 bytes for CRC that isn't written */
+
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	mutex_unlock(&lp->bmux);
+	return status;
+}
+
+static int
+at86rf230_read_fbuf(struct at86rf230_local *lp, u8 *data, u8 *len, u8 *lqi)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer_head = {
+		.len		= 2,
+		.tx_buf		= buf,
+		.rx_buf		= buf,
+	};
+	struct spi_transfer xfer_head1 = {
+		.len		= 2,
+		.tx_buf		= buf,
+		.rx_buf		= buf,
+	};
+	struct spi_transfer xfer_buf = {
+		.len		= 0,
+		.rx_buf		= data,
+	};
+
+	mutex_lock(&lp->bmux);
+
+	buf[0] = CMD_FB;
+	buf[1] = 0x00;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+
+	xfer_buf.len = *(buf + 1) + 1;
+	*len = buf[1];
+
+	buf[0] = CMD_FB;
+	buf[1] = 0x00;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head1, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	if (status) {
+		if (lqi && (*len > lp->buf[1]))
+			*lqi = data[lp->buf[1]];
+	}
+	mutex_unlock(&lp->bmux);
+
+	return status;
+}
+
+static int
+at86rf230_ed(struct ieee802154_dev *dev, u8 *level)
+{
+	might_sleep();
+	BUG_ON(!level);
+	*level = 0xbe;
+	return 0;
+}
+
+static int
+at86rf230_state(struct ieee802154_dev *dev, int state)
+{
+	struct at86rf230_local *lp = dev->priv;
+	int rc;
+	u8 val;
+	u8 desired_status;
+
+	might_sleep();
+
+	if (state == STATE_FORCE_TX_ON)
+		desired_status = STATE_TX_ON;
+	else if (state == STATE_FORCE_TRX_OFF)
+		desired_status = STATE_TRX_OFF;
+	else
+		desired_status = state;
+
+	do {
+		rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &val);
+		if (rc)
+			goto err;
+	} while (val == STATE_TRANSITION_IN_PROGRESS);
+
+	if (val == desired_status)
+		return 0;
+
+	/* state is equal to phy states */
+	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, state);
+	if (rc)
+		goto err;
+
+	do {
+		rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &val);
+		if (rc)
+			goto err;
+	} while (val == STATE_TRANSITION_IN_PROGRESS);
+
+
+	if (val == desired_status)
+		return 0;
+
+	pr_err("unexpected state change: %d, asked for %d\n", val, state);
+	return -EBUSY;
+
+err:
+	pr_err("error: %d\n", rc);
+	return rc;
+}
+
+static int
+at86rf230_start(struct ieee802154_dev *dev)
+{
+	struct at86rf230_local *lp = dev->priv;
+	u8 rc;
+
+	rc = at86rf230_write_subreg(lp, SR_RX_SAFE_MODE, 1);
+	if (rc)
+		return rc;
+
+	return at86rf230_state(dev, STATE_RX_ON);
+}
+
+static void
+at86rf230_stop(struct ieee802154_dev *dev)
+{
+	at86rf230_state(dev, STATE_FORCE_TRX_OFF);
+}
+
+static int
+at86rf230_channel(struct ieee802154_dev *dev, int page, int channel)
+{
+	struct at86rf230_local *lp = dev->priv;
+	int rc;
+
+	might_sleep();
+
+	if (page != 0 || channel < 11 || channel > 26) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	rc = at86rf230_write_subreg(lp, SR_CHANNEL, channel);
+	msleep(1); /* Wait for PLL */
+	dev->phy->current_channel = channel;
+
+	return 0;
+}
+
+static int
+at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb)
+{
+	struct at86rf230_local *lp = dev->priv;
+	int rc;
+	unsigned long flags;
+
+	might_sleep();
+
+	rc = at86rf230_state(dev, STATE_FORCE_TX_ON);
+	if (rc)
+		goto err;
+
+	spin_lock_irqsave(&lp->lock, flags);
+	lp->is_tx = 1;
+	INIT_COMPLETION(lp->tx_complete);
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	rc = at86rf230_write_fbuf(lp, skb->data, skb->len);
+	if (rc)
+		goto err_rx;
+
+	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_BUSY_TX);
+	if (rc)
+		goto err_rx;
+
+	rc = wait_for_completion_interruptible(&lp->tx_complete);
+	if (rc < 0)
+		goto err_rx;
+
+	rc = at86rf230_start(dev);
+
+	return rc;
+
+err_rx:
+	at86rf230_start(dev);
+err:
+	pr_err("error: %d\n", rc);
+
+	spin_lock_irqsave(&lp->lock, flags);
+	lp->is_tx = 0;
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return rc;
+}
+
+static int at86rf230_rx(struct at86rf230_local *lp)
+{
+	u8 len = 128, lqi = 0;
+	int rc;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+
+	if (!skb)
+		return -ENOMEM;
+
+	if (at86rf230_write_subreg(lp, SR_RX_PDT_DIS, 1) ||
+	    at86rf230_read_fbuf(lp, skb_put(skb, len), &len, &lqi) ||
+	    at86rf230_write_subreg(lp, SR_RX_SAFE_MODE, 1) ||
+	    at86rf230_write_subreg(lp, SR_RX_PDT_DIS, 0)) {
+		goto err;
+	}
+
+	if (len < 2)
+		goto err;
+
+	skb_trim(skb, len - 2); /* We do not put CRC into the frame */
+
+	ieee802154_rx_irqsafe(lp->dev, skb, lqi);
+
+	dev_dbg(&lp->spi->dev, "READ_FBUF: %d %d %x\n", rc, len, lqi);
+
+	return 0;
+err:
+	pr_debug("received frame is too small\n");
+
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static struct ieee802154_ops at86rf230_ops = {
+	.owner = THIS_MODULE,
+	.xmit = at86rf230_xmit,
+	.ed = at86rf230_ed,
+	.set_channel = at86rf230_channel,
+	.start = at86rf230_start,
+	.stop = at86rf230_stop,
+};
+
+static void at86rf230_irqwork(struct work_struct *work)
+{
+	struct at86rf230_local *lp =
+		container_of(work, struct at86rf230_local, irqwork);
+	u8 status = 0, val;
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lp->lock, flags);
+	rc = at86rf230_read_subreg(lp, RG_IRQ_STATUS, 0xff, 0, &val);
+	status |= val;
+
+	status &= ~IRQ_PLL_LOCK; /* ignore */
+	status &= ~IRQ_RX_START; /* ignore */
+	status &= ~IRQ_AMI; /* ignore */
+	status &= ~IRQ_TRX_UR; /* FIXME: possibly handle ???*/
+
+	if (status & IRQ_TRX_END) {
+		status &= ~IRQ_TRX_END;
+		if (lp->is_tx) {
+			lp->is_tx = 0;
+			complete(&lp->tx_complete);
+		} else {
+			at86rf230_rx(lp);
+		}
+	}
+
+	if (lp->irq_disabled) {
+		lp->irq_disabled = 0;
+		enable_irq(lp->spi->irq);
+	}
+	spin_unlock_irqrestore(&lp->lock, flags);
+}
+
+static irqreturn_t at86rf230_isr(int irq, void *data)
+{
+	struct at86rf230_local *lp = data;
+
+	spin_lock(&lp->lock);
+	if (!lp->irq_disabled) {
+		disable_irq_nosync(irq);
+		lp->irq_disabled = 1;
+	}
+	spin_unlock(&lp->lock);
+
+	schedule_work(&lp->irqwork);
+
+	return IRQ_HANDLED;
+}
+
+
+static int at86rf230_hw_init(struct at86rf230_local *lp)
+{
+	u8 status;
+	int rc;
+
+	rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
+	if (rc)
+		return rc;
+
+	dev_info(&lp->spi->dev, "Status: %02x\n", status);
+	if (status == STATE_P_ON) {
+		rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_TRX_OFF);
+		if (rc)
+			return rc;
+		msleep(1);
+		rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
+		if (rc)
+			return rc;
+		dev_info(&lp->spi->dev, "Status: %02x\n", status);
+	}
+
+	rc = at86rf230_write_subreg(lp, SR_IRQ_MASK, 0xff); /* IRQ_TRX_UR |
+							     * IRQ_CCA_ED |
+							     * IRQ_TRX_END |
+							     * IRQ_PLL_UNL |
+							     * IRQ_PLL_LOCK
+							     */
+	if (rc)
+		return rc;
+
+	/* CLKM changes are applied immediately */
+	rc = at86rf230_write_subreg(lp, SR_CLKM_SHA_SEL, 0x00);
+	if (rc)
+		return rc;
+
+	/* Turn CLKM Off */
+	rc = at86rf230_write_subreg(lp, SR_CLKM_CTRL, 0x00);
+	if (rc)
+		return rc;
+	/* Wait the next SLEEP cycle */
+	msleep(100);
+
+	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_TX_ON);
+	if (rc)
+		return rc;
+	msleep(1);
+
+	rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
+	if (rc)
+		return rc;
+	dev_info(&lp->spi->dev, "Status: %02x\n", status);
+
+	rc = at86rf230_read_subreg(lp, SR_DVDD_OK, &status);
+	if (rc)
+		return rc;
+	if (!status) {
+		dev_err(&lp->spi->dev, "DVDD error\n");
+		return -EINVAL;
+	}
+
+	rc = at86rf230_read_subreg(lp, SR_AVDD_OK, &status);
+	if (rc)
+		return rc;
+	if (!status) {
+		dev_err(&lp->spi->dev, "AVDD error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int at86rf230_suspend(struct spi_device *spi, pm_message_t message)
+{
+	return 0;
+}
+
+static int at86rf230_resume(struct spi_device *spi)
+{
+	return 0;
+}
+
+static int at86rf230_fill_data(struct spi_device *spi)
+{
+	struct at86rf230_local *lp = spi_get_drvdata(spi);
+	struct at86rf230_platform_data *pdata = spi->dev.platform_data;
+
+	if (!pdata) {
+		dev_err(&spi->dev, "no platform_data\n");
+		return -EINVAL;
+	}
+
+	lp->rstn = pdata->rstn;
+	lp->slp_tr = pdata->slp_tr;
+	lp->dig2 = pdata->dig2;
+
+	return 0;
+}
+
+static int __devinit at86rf230_probe(struct spi_device *spi)
+{
+	struct ieee802154_dev *dev;
+	struct at86rf230_local *lp;
+	u8 man_id_0, man_id_1;
+	int rc;
+	const char *chip;
+	int supported = 0;
+
+	if (!spi->irq) {
+		dev_err(&spi->dev, "no IRQ specified\n");
+		return -EINVAL;
+	}
+
+	dev = ieee802154_alloc_device(sizeof(*lp), &at86rf230_ops);
+	if (!dev)
+		return -ENOMEM;
+
+	lp = dev->priv;
+	lp->dev = dev;
+
+	lp->spi = spi;
+
+	dev->priv = lp;
+	dev->parent = &spi->dev;
+	dev->extra_tx_headroom = 0;
+	/* We do support only 2.4 Ghz */
+	dev->phy->channels_supported[0] = 0x7FFF800;
+	dev->flags = IEEE802154_HW_OMIT_CKSUM;
+
+	mutex_init(&lp->bmux);
+	INIT_WORK(&lp->irqwork, at86rf230_irqwork);
+	spin_lock_init(&lp->lock);
+	init_completion(&lp->tx_complete);
+
+	spi_set_drvdata(spi, lp);
+
+	rc = at86rf230_fill_data(spi);
+	if (rc)
+		goto err_fill;
+
+	rc = gpio_request(lp->rstn, "rstn");
+	if (rc)
+		goto err_rstn;
+
+	if (gpio_is_valid(lp->slp_tr)) {
+		rc = gpio_request(lp->slp_tr, "slp_tr");
+		if (rc)
+			goto err_slp_tr;
+	}
+
+	rc = gpio_direction_output(lp->rstn, 1);
+	if (rc)
+		goto err_gpio_dir;
+
+	if (gpio_is_valid(lp->slp_tr)) {
+		rc = gpio_direction_output(lp->slp_tr, 0);
+		if (rc)
+			goto err_gpio_dir;
+	}
+
+	/* Reset */
+	msleep(1);
+	gpio_set_value(lp->rstn, 0);
+	msleep(1);
+	gpio_set_value(lp->rstn, 1);
+	msleep(1);
+
+	rc = at86rf230_read_subreg(lp, SR_MAN_ID_0, &man_id_0);
+	if (rc)
+		goto err_gpio_dir;
+	rc = at86rf230_read_subreg(lp, SR_MAN_ID_1, &man_id_1);
+	if (rc)
+		goto err_gpio_dir;
+
+	if (man_id_1 != 0x00 || man_id_0 != 0x1f) {
+		dev_err(&spi->dev, "Non-Atmel dev found (MAN_ID %02x %02x)\n",
+			man_id_1, man_id_0);
+		rc = -EINVAL;
+		goto err_gpio_dir;
+	}
+
+	rc = at86rf230_read_subreg(lp, SR_PART_NUM, &lp->part);
+	if (rc)
+		goto err_gpio_dir;
+
+	rc = at86rf230_read_subreg(lp, SR_VERSION_NUM, &lp->vers);
+	if (rc)
+		goto err_gpio_dir;
+
+	switch (lp->part) {
+	case 2:
+		chip = "at86rf230";
+		/* supported = 1;  FIXME: should be easy to support; */
+		break;
+	case 3:
+		chip = "at86rf231";
+		supported = 1;
+		break;
+	default:
+		chip = "UNKNOWN";
+		break;
+	}
+
+	dev_info(&spi->dev, "Detected %s chip version %d\n", chip, lp->vers);
+	if (!supported) {
+		rc = -ENOTSUPP;
+		goto err_gpio_dir;
+	}
+
+	rc = at86rf230_hw_init(lp);
+	if (rc)
+		goto err_gpio_dir;
+
+	rc = request_irq(spi->irq, at86rf230_isr, IRQF_SHARED,
+			 dev_name(&spi->dev), lp);
+	if (rc)
+		goto err_gpio_dir;
+
+	rc = ieee802154_register_device(lp->dev);
+	if (rc)
+		goto err_irq;
+
+	return rc;
+
+	ieee802154_unregister_device(lp->dev);
+err_irq:
+	free_irq(spi->irq, lp);
+	flush_work(&lp->irqwork);
+err_gpio_dir:
+	if (gpio_is_valid(lp->slp_tr))
+		gpio_free(lp->slp_tr);
+err_slp_tr:
+	gpio_free(lp->rstn);
+err_rstn:
+err_fill:
+	spi_set_drvdata(spi, NULL);
+	mutex_destroy(&lp->bmux);
+	ieee802154_free_device(lp->dev);
+	return rc;
+}
+
+static int __devexit at86rf230_remove(struct spi_device *spi)
+{
+	struct at86rf230_local *lp = spi_get_drvdata(spi);
+
+	ieee802154_unregister_device(lp->dev);
+
+	free_irq(spi->irq, lp);
+	flush_work(&lp->irqwork);
+
+	if (gpio_is_valid(lp->slp_tr))
+		gpio_free(lp->slp_tr);
+	gpio_free(lp->rstn);
+
+	spi_set_drvdata(spi, NULL);
+	mutex_destroy(&lp->bmux);
+	ieee802154_free_device(lp->dev);
+
+	dev_dbg(&spi->dev, "unregistered at86rf230\n");
+	return 0;
+}
+
+static struct spi_driver at86rf230_driver = {
+	.driver = {
+		.name	= "at86rf230",
+		.owner	= THIS_MODULE,
+	},
+	.probe      = at86rf230_probe,
+	.remove     = __devexit_p(at86rf230_remove),
+	.suspend    = at86rf230_suspend,
+	.resume     = at86rf230_resume,
+};
+
+static int __init at86rf230_init(void)
+{
+	return spi_register_driver(&at86rf230_driver);
+}
+module_init(at86rf230_init);
+
+static void __exit at86rf230_exit(void)
+{
+	spi_unregister_driver(&at86rf230_driver);
+}
+module_exit(at86rf230_exit);
+
+MODULE_DESCRIPTION("AT86RF230 Transceiver Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h
new file mode 100644
index 0000000..b2b1afb
--- /dev/null
+++ b/include/linux/spi/at86rf230.h
@@ -0,0 +1,31 @@
+/*
+ * AT86RF230/RF231 driver
+ *
+ * Copyright (C) 2009-2012 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Dmitry Eremin-Solenikov <dmitry.baryshkov@siemens.com>
+ */
+#ifndef AT86RF230_H
+#define AT86RF230_H
+
+struct at86rf230_platform_data {
+	int rstn;
+	int slp_tr;
+	int dig2;
+};
+
+#endif
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH v2 net-next 7/7] mac802154: add monitor listener to TX datapath
From: Alexander Smirnov @ 2012-06-26  9:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340702694-24706-1-git-send-email-alex.bluesman.smirnov@gmail.com>

Add monitor receive callback to the TX datapath to catch all the
data sent to transceivers.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/tx.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 434b687..1a4df39 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -88,6 +88,8 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
+	mac802154_monitors_rx(mac802154_to_priv(&priv->hw), skb);
+
 	if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) {
 		u16 crc = crc_ccitt(0, skb->data, skb->len);
 		u8 *data = skb_put(skb, 2);
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH v2 net-next 5/7] mac802154: mlme start request
From: Alexander Smirnov @ 2012-06-26  9:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340702694-24706-1-git-send-email-alex.bluesman.smirnov@gmail.com>

Basic preparations to start the interface.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/mac_cmd.c |   25 +++++++++++++++++++++++++
 1 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index db83419..7f5403e 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -25,12 +25,36 @@
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 
+#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 #include <net/wpan-phy.h>
 #include <net/mac802154.h>
+#include <net/nl802154.h>
 
 #include "mac802154.h"
 
+static int mac802154_mlme_start_req(struct net_device *dev,
+				    struct ieee802154_addr *addr,
+				    u8 channel, u8 page,
+				    u8 bcn_ord, u8 sf_ord,
+				    u8 pan_coord, u8 blx,
+				    u8 coord_realign)
+{
+	BUG_ON(addr->addr_type != IEEE802154_ADDR_SHORT);
+
+	mac802154_dev_set_pan_id(dev, addr->pan_id);
+	mac802154_dev_set_short_addr(dev, addr->short_addr);
+	mac802154_dev_set_ieee_addr(dev);
+	mac802154_dev_set_page_channel(dev, page, channel);
+
+	/* FIXME: add validation for unused parameters to be sane
+	 * for SoftMAC
+	 */
+	ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS);
+
+	return 0;
+}
+
 struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
 {
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
@@ -46,4 +70,5 @@ struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = {
 
 struct ieee802154_mlme_ops mac802154_mlme_wpan = {
 	.get_phy = mac802154_get_phy,
+	.start_req = mac802154_mlme_start_req,
 };
-- 
1.7.2.3

^ permalink raw reply related

* Re: [PATCH] r8169: RxConfig hack for the 8168evl.
From: Francois Romieu @ 2012-06-26  9:22 UTC (permalink / raw)
  To: hayeswang; +Cc: netdev, thomas.pi
In-Reply-To: <DB5F775AE8124DDDAE4E1501A78AF7A3@realtek.com.tw>

hayeswang <hayeswang@realtek.com> :
[...]
> The definition of the IO 0x44 bit 14 is opposite for new chips.
> For 8111C, 0 means fetching one Rx descriptor, and 1 means fetching
> multi-descriptors.
> For 8111D and the later chips, 0 means fetching multi-descriptors, and 1 means
> fetching one Rx descriptor.

Ok. Is there much point fetching one Rx descriptor versus several ?

[...]
> The CFG_METHOD_16 is the internal test chip. We don't have mass production for
> it. Even it could be removed from driver. I don't think the kernel have to
> support it.

Ok.

There seem to be a few differences for the CFG_METHOD_16 chipset between
the kernel driver and Realtek's own. I have noticed the points below.
Should some of those be included ?

Thanks.

Subject: [PATCH] r8169: narrow 8168evl support.

Some bits taken from the comparison with Realtek's 8.031.00 8168 driver

- 0x7cf00000 / 0x2c900000 is a Realtek internal, test-only chipset
- rtl8168evl_reset_packet_filter is only there for documentation purpose
  (no change)
- TDFNR ?
- the Magic Packet feature should be set and read differently
- r8168_pll_power_up tweak
- what is the mysterious 0xf2 register for in rtl_hw_start_8168e_2 ?
- hardware checksum offloading fix for small packets

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Cc: Hayes Wang <hayeswang@realtek.com>
---
 drivers/net/ethernet/realtek/r8169.c |   87 ++++++++++++++++++++++++----------
 1 file changed, 62 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 7260aa7..ad6bcf6 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -364,6 +364,8 @@ enum rtl8110_registers {
 };
 
 enum rtl8168_8101_registers {
+	TDFNR			= 0x57,	/* Transmit descriptor fetch number. */
+#define TDFNR_MASK			0x3f
 	CSIDR			= 0x64,
 	CSIAR			= 0x68,
 #define	CSIAR_FLAG			0x80000000
@@ -1265,6 +1267,12 @@ static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
 	rtl_writephy(tp, MII_BMCR, val & 0xffff);
 }
 
+static void rtl8168evl_reset_packet_filter(void __iomem *ioaddr)
+{
+	rtl_w1w0_eri(ioaddr, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
+	rtl_w1w0_eri(ioaddr, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
+}
+
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -1291,11 +1299,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 			rtl_eri_write(ioaddr, 0x1dc, ERIAR_MASK_1111,
 				      0x0000003f, ERIAR_EXGMAC);
 		}
-		/* Reset packet filter */
-		rtl_w1w0_eri(ioaddr, 0xdc, ERIAR_MASK_0001, 0x00, 0x01,
-			     ERIAR_EXGMAC);
-		rtl_w1w0_eri(ioaddr, 0xdc, ERIAR_MASK_0001, 0x01, 0x00,
-			     ERIAR_EXGMAC);
+		rtl8168evl_reset_packet_filter(ioaddr);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_36) {
 		if (RTL_R8(PHYstatus) & _1000bpsF) {
@@ -1355,7 +1359,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
 	u8 options;
-	u32 wolopts = 0;
+	u32 wolopts = 0, csi;
 
 	options = RTL_R8(Config1);
 	if (!(options & PMEnable))
@@ -1364,8 +1368,18 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 	options = RTL_R8(Config3);
 	if (options & LinkUp)
 		wolopts |= WAKE_PHY;
-	if (options & MagicPacket)
-		wolopts |= WAKE_MAGIC;
+
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_34:
+		csi = rtl_eri_read(ioaddr, 0xde, ERIAR_EXGMAC);
+		if (csi & 0x01)
+			wolopts |= WAKE_MAGIC;
+		break;
+	default:
+		if (options & MagicPacket)
+			wolopts |= WAKE_MAGIC;
+		break;
+	}
 
 	options = RTL_R8(Config5);
 	if (options & UWF)
@@ -1399,24 +1413,19 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 		u16 reg;
 		u8  mask;
 	} cfg[] = {
-		{ WAKE_PHY,   Config3, LinkUp },
 		{ WAKE_MAGIC, Config3, MagicPacket },
+		{ WAKE_PHY,   Config3, LinkUp },
 		{ WAKE_UCAST, Config5, UWF },
 		{ WAKE_BCAST, Config5, BWF },
 		{ WAKE_MCAST, Config5, MWF },
 		{ WAKE_ANY,   Config5, LanWake }
 	};
+	int start = 0;
 	u8 options;
+	u32 csi;
 
 	RTL_W8(Cfg9346, Cfg9346_Unlock);
 
-	for (i = 0; i < ARRAY_SIZE(cfg); i++) {
-		options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
-		if (wolopts & cfg[i].opt)
-			options |= cfg[i].mask;
-		RTL_W8(cfg[i].reg, options);
-	}
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
 		options = RTL_R8(Config1) & ~PMEnable;
@@ -1424,6 +1433,13 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 			options |= PMEnable;
 		RTL_W8(Config1, options);
 		break;
+	case RTL_GIGA_MAC_VER_34:
+		csi = rtl_eri_read(ioaddr, 0xde, ERIAR_EXGMAC) & ~0x01;
+		if (wolopts & WAKE_MAGIC)
+			csi |= 0x01;
+		rtl_eri_write(ioaddr, 0xde, 4, csi, ERIAR_EXGMAC);
+
+		start++;
 	default:
 		options = RTL_R8(Config2) & ~PME_SIGNAL;
 		if (wolopts)
@@ -1432,6 +1448,13 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 		break;
 	}
 
+	for (i = start; i < ARRAY_SIZE(cfg); i++) {
+		options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+		if (wolopts & cfg[i].opt)
+			options |= cfg[i].mask;
+		RTL_W8(cfg[i].reg, options);
+	}
+
 	RTL_W8(Cfg9346, Cfg9346_Lock);
 }
 
@@ -1900,7 +1923,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 		{ 0x7cf00000, 0x48000000,	RTL_GIGA_MAC_VER_35 },
 
 		/* 8168E family. */
-		{ 0x7c800000, 0x2c800000,	RTL_GIGA_MAC_VER_34 },
+		{ 0x7cf00000, 0x2c800000,	RTL_GIGA_MAC_VER_34 },
 		{ 0x7cf00000, 0x2c200000,	RTL_GIGA_MAC_VER_33 },
 		{ 0x7cf00000, 0x2c100000,	RTL_GIGA_MAC_VER_32 },
 		{ 0x7c800000, 0x2c000000,	RTL_GIGA_MAC_VER_33 },
@@ -3778,6 +3801,9 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_33:
 		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
 		break;
+	case RTL_GIGA_MAC_VER_34:
+		RTL_W8(PMCH, RTL_R8(PMCH) & ~0xc0);
+		break;
 	}
 }
 
@@ -3795,6 +3821,9 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_33:
 		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
 		break;
+	case RTL_GIGA_MAC_VER_34:
+		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		break;
 	}
 
 	r8168_phy_power_up(tp);
@@ -4797,6 +4826,9 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
 	RTL_W8(MaxTxPacketSize, EarlySize);
 
+	RTL_W8(0xf2, (RTL_R8(0xf2) & ~0x02) | 0x05);
+	RTL_W8(TDFNR, (RTL_R8(TDFNR) & ~TDFNR_MASK) | 0x8);
+
 	rtl_disable_clock_request(pdev);
 
 	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
@@ -5474,14 +5506,19 @@ static inline void rtl8169_tso_csum(struct rtl8169_private *tp,
 		opts[0] |= TD_LSO;
 		opts[offset] |= min(mss, TD_MSS_MAX) << info->mss_shift;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		const struct iphdr *ip = ip_hdr(skb);
-
-		if (ip->protocol == IPPROTO_TCP)
-			opts[offset] |= info->checksum.tcp;
-		else if (ip->protocol == IPPROTO_UDP)
-			opts[offset] |= info->checksum.udp;
-		else
-			WARN_ON_ONCE(1);
+		if (likely(skb-> len >= 60 ||
+		    (tp->mac_version != RTL_GIGA_MAC_VER_34))) {
+			const struct iphdr *ip = ip_hdr(skb);
+
+			if (ip->protocol == IPPROTO_TCP)
+				opts[offset] |= info->checksum.tcp;
+			else if (ip->protocol == IPPROTO_UDP)
+				opts[offset] |= info->checksum.udp;
+			else
+				WARN_ON_ONCE(1);
+		} else {
+			skb_checksum_help(skb);
+		}
 	}
 }
 
-- 
1.7.10.2

^ permalink raw reply related

* Re: [PATCH] ipv4: Remove unnecessary code from rt_check_expire().
From: David Miller @ 2012-06-26  9:43 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1340701893.10893.296.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Jun 2012 11:11:33 +0200

> I think I stopped trying to improve route cache because your intention
> was to get rid of it.
> 
> Now it seems we should keep it for a while, so it makes sense to add
> more fuel on it ;)

Removal is a long term project, so we can still make some short-
term tweaks :-)

> About financial guys, they probably are smart enough to :
> 
> echo bigvalue >/proc/sys/net/ipv4/route/gc_timeout

Every excess knob adjustment is a failure on our part.

^ permalink raw reply

* Unknown chipsets from Realtek's 8168 driver
From: Francois Romieu @ 2012-06-26 10:26 UTC (permalink / raw)
  To: hayeswang; +Cc: netdev

Hayes,

  there appears to remain unknown chipsets in Realtek's own driver.
Namely:
- CFG_METHOD_21
- CFG_METHOD_22
- CFG_METHOD_23

Should support for some of those be added to the kernel driver ?

If so it would make sense to plan for those now as there are still a
couple of weeks ahead before the window for -next closes and everything
experiences more than 2 months of delay. You will find some incomplete
stuff below. Feel free to use or ignore it.


Some chipsets from Realtek's 8.031.00 8168 driver:
- CFG_METHOD_21 / RTL_GIGA_MAC_VER_39
- CFG_METHOD_22 / RTL_GIGA_MAC_VER_40
- CFG_METHOD_23 / RTL_GIGA_MAC_VER_41

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 drivers/net/ethernet/realtek/r8169.c |  284 ++++++++++++++++++++++++++++++++--
 1 file changed, 269 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 7260aa7..8381640 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -46,6 +46,7 @@
 #define FIRMWARE_8105E_1	"rtl_nic/rtl8105e-1.fw"
 #define FIRMWARE_8402_1		"rtl_nic/rtl8402-1.fw"
 #define FIRMWARE_8411_1		"rtl_nic/rtl8411-1.fw"
+#define FIRMWARE_8168G_1	"rtl_nic/rtl8168g-1.fw"
 
 #ifdef RTL8169_DEBUG
 #define assert(expr) \
@@ -141,6 +142,9 @@ enum mac_version {
 	RTL_GIGA_MAC_VER_36,
 	RTL_GIGA_MAC_VER_37,
 	RTL_GIGA_MAC_VER_38,
+	RTL_GIGA_MAC_VER_39,
+	RTL_GIGA_MAC_VER_40,
+	RTL_GIGA_MAC_VER_41,
 	RTL_GIGA_MAC_NONE   = 0xff,
 };
 
@@ -259,6 +263,13 @@ static const struct {
 	[RTL_GIGA_MAC_VER_38] =
 		_R("RTL8411",		RTL_TD_1, FIRMWARE_8411_1,
 							JUMBO_9K, false),
+	[RTL_GIGA_MAC_VER_39] =
+		_R("RTL8168g/8111g",	RTL_TD_1, FIRMWARE_8168G_1,
+							JUMBO_9K, false),
+	[RTL_GIGA_MAC_VER_40] =
+		_R("RTL8168g/8111g",	RTL_TD_1, NULL, JUMBO_9K, false),
+	[RTL_GIGA_MAC_VER_41] =
+		_R("RTL8168ep/8111ep",	RTL_TD_1, NULL, JUMBO_9K, false),
 };
 #undef _R
 
@@ -424,11 +435,22 @@ enum rtl8168_registers {
 #define OCPDR_REG_MASK			0x7f
 #define OCPDR_GPHY_REG_SHIFT		16
 #define OCPDR_DATA_MASK			0xffff
+	MACOCP			= 0xb0,
+	/* Shared with PHYOCP. */
+#define OCPR_FLAG			0x80000000
+#define OCPR_WRITE_CMD			0x80000000
+#define OCPR_READ_CMD			0x00000000
+#define OCPR_ADDR_REG_SHIFT		16
 	OCPAR			= 0xb4,
 #define OCPAR_FLAG			0x80000000
 #define OCPAR_GPHY_WRITE_CMD		0x8000f060
 #define OCPAR_GPHY_READ_CMD		0x0000f060
+	PHYOCP			= 0xb8,
 	RDSAR1			= 0xd0,	/* 8168c only. Undocumented on 8168dp */
+	MCUCMD			= 0xd3,
+#define	MCUCMD_NOW_IS_OOB		(1 << 7)
+#define	MCUCMD_TXFIFO_EMPTY		(1 << 5)
+#define	MCUCMD_RXFIFO_EMPTY		(1 << 4)
 	MISC			= 0xf0,	/* 8168e only. */
 #define TXPLA_RST			(1 << 29)
 #define PWM_EN				(1 << 22)
@@ -721,8 +743,8 @@ struct rtl8169_private {
 	u16 event_slow;
 
 	struct mdio_ops {
-		void (*write)(void __iomem *, int, int);
-		int (*read)(void __iomem *, int);
+		void (*write)(struct rtl8169_private *, int, int);
+		int (*read)(struct rtl8169_private *, int);
 	} mdio_ops;
 
 	struct pll_power_ops {
@@ -774,6 +796,8 @@ struct rtl8169_private {
 		} phy_action;
 	} *rtl_fw;
 #define RTL_FIRMWARE_UNKNOWN	ERR_PTR(-EAGAIN)
+
+	int mii_page;
 };
 
 MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
@@ -872,6 +896,27 @@ static u16 rtl8168_get_ocp_reg(struct rtl8169_private *tp)
 	return (tp->mac_version == RTL_GIGA_MAC_VER_31) ? 0xb8 : 0x10;
 }
 
+static u32 rtl_ocp_wrap_cmd(u16 addr, u32 cmd)
+{
+	return ((addr >> 1) << OCPR_ADDR_REG_SHIFT) | cmd;
+}
+
+static void rtl_mac_ocp_write(struct rtl8169_private *tp, u16 reg_addr, u16 value)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	RTL_W32(MACOCP, rtl_ocp_wrap_cmd(reg_addr, OCPR_WRITE_CMD | value));
+}
+
+static u16 rtl_mac_ocp_read(struct rtl8169_private *tp, u16 reg_addr)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	RTL_W32(MACOCP, rtl_ocp_wrap_cmd(reg_addr, OCPR_READ_CMD));
+
+	return (u16)RTL_R32(MACOCP);
+}
+
 static void rtl8168_driver_start(struct rtl8169_private *tp)
 {
 	u16 reg;
@@ -911,8 +956,9 @@ static int r8168dp_check_dash(struct rtl8169_private *tp)
 	return (ocp_read(tp, 0x0f, reg) & 0x00008000) ? 1 : 0;
 }
 
-static void r8169_mdio_write(void __iomem *ioaddr, int reg_addr, int value)
+static void r8169_mdio_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
+	void __iomem *ioaddr = tp->mmio_addr;
 	int i;
 
 	RTL_W32(PHYAR, 0x80000000 | (reg_addr & 0x1f) << 16 | (value & 0xffff));
@@ -933,8 +979,9 @@ static void r8169_mdio_write(void __iomem *ioaddr, int reg_addr, int value)
 	udelay(20);
 }
 
-static int r8169_mdio_read(void __iomem *ioaddr, int reg_addr)
+static int r8169_mdio_read(struct rtl8169_private *tp, int reg_addr)
 {
+	void __iomem *ioaddr = tp->mmio_addr;
 	int i, value = -1;
 
 	RTL_W32(PHYAR, 0x0 | (reg_addr & 0x1f) << 16);
@@ -975,14 +1022,15 @@ static void r8168dp_1_mdio_access(void __iomem *ioaddr, int reg_addr, u32 data)
 	}
 }
 
-static void r8168dp_1_mdio_write(void __iomem *ioaddr, int reg_addr, int value)
+static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
-	r8168dp_1_mdio_access(ioaddr, reg_addr, OCPDR_WRITE_CMD |
-		(value & OCPDR_DATA_MASK));
+	r8168dp_1_mdio_access(tp->mmio_addr, reg_addr,
+			      OCPDR_WRITE_CMD | (value & OCPDR_DATA_MASK));
 }
 
-static int r8168dp_1_mdio_read(void __iomem *ioaddr, int reg_addr)
+static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg_addr)
 {
+	void __iomem *ioaddr = tp->mmio_addr;
 	int i;
 
 	r8168dp_1_mdio_access(ioaddr, reg_addr, OCPDR_READ_CMD);
@@ -1012,8 +1060,10 @@ static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
 	RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
 }
 
-static void r8168dp_2_mdio_write(void __iomem *ioaddr, int reg_addr, int value)
+static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
+	void __iomem *ioaddr = tp->mmio_addr;
+
 	r8168dp_2_mdio_start(ioaddr);
 
 	r8169_mdio_write(ioaddr, reg_addr, value);
@@ -1021,8 +1071,9 @@ static void r8168dp_2_mdio_write(void __iomem *ioaddr, int reg_addr, int value)
 	r8168dp_2_mdio_stop(ioaddr);
 }
 
-static int r8168dp_2_mdio_read(void __iomem *ioaddr, int reg_addr)
+static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg_addr)
 {
+	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
 	r8168dp_2_mdio_start(ioaddr);
@@ -1034,14 +1085,81 @@ static int r8168dp_2_mdio_read(void __iomem *ioaddr, int reg_addr)
 	return value;
 }
 
+static u16 rtl_map_phy_ocp_addr(u16 page, u8 reg)
+{
+	if (!page) {
+		// FIXME: use some #define here ?
+		page = 0x0a40 + (reg / 8);
+		reg  = 0x0010 + (reg % 8);
+	}
+
+	page <<= 4;
+
+	reg -= 16;
+	reg <<= 1;
+
+	return page + reg;
+}
+
+static void rtl_phy_ocp_cmd(struct rtl8169_private *tp, u8 reg, u32 cmd)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	u32 data;
+
+	data = rtl_map_phy_ocp_addr(tp->mii_page, reg);
+
+	RTL_W32(PHYOCP, rtl_ocp_wrap_cmd(data, cmd));
+}
+
+static bool rtl_phy_ocp_wait_bit(struct rtl8169_private *tp, bool low)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	bool done;
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		bool set;
+
+		udelay(100);
+
+		set = !!(RTL_R32(PHYOCP) & OCPR_FLAG);
+		done = set ^ low;
+		if (done)
+			break;
+	}
+	return done;
+}
+
+static void r8168g_mdio_write(struct rtl8169_private *tp, int reg_addr, int value)
+{
+	if (reg_addr == 0x1f) {
+		tp->mii_page = value;
+		return;
+	}
+
+	rtl_phy_ocp_cmd(tp, reg_addr, OCPR_WRITE_CMD | value);
+
+	rtl_phy_ocp_wait_bit(tp, true);
+}
+
+static int r8168g_mdio_read(struct rtl8169_private *tp, int reg_addr)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	rtl_phy_ocp_cmd(tp, reg_addr, OCPR_READ_CMD);
+
+	return rtl_phy_ocp_wait_bit(tp, false) ?
+		RTL_R32(PHYOCP) & OCPDR_DATA_MASK : -1;
+}
+
 static void rtl_writephy(struct rtl8169_private *tp, int location, u32 val)
 {
-	tp->mdio_ops.write(tp->mmio_addr, location, val);
+	tp->mdio_ops.write(tp, location, val);
 }
 
 static int rtl_readphy(struct rtl8169_private *tp, int location)
 {
-	return tp->mdio_ops.read(tp->mmio_addr, location);
+	return tp->mdio_ops.read(tp, location);
 }
 
 static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value)
@@ -1319,6 +1437,11 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 			rtl_eri_write(ioaddr, 0x1d0, ERIAR_MASK_0011,
 				      0x0000, ERIAR_EXGMAC);
 		}
+	} else if (tp->mac_version == RTL_GIGA_MAC_VER_39 ||
+		   tp->mac_version == RTL_GIGA_MAC_VER_40) {
+		// ...
+	} else if (tp->mac_version == RTL_GIGA_MAC_VER_41) {
+		// ...
 	}
 }
 
@@ -1425,6 +1548,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 		RTL_W8(Config1, options);
 		break;
 	default:
+		// Is it ok for RTL_GIGA_MAC_VER_39..41 ?
 		options = RTL_R8(Config2) & ~PME_SIGNAL;
 		if (wolopts)
 			options |= PME_SIGNAL;
@@ -1894,6 +2018,11 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 		u32 val;
 		int mac_version;
 	} mac_info[] = {
+		{ 0x7cf00000, 0x50000000,	RTL_GIGA_MAC_VER_41 },
+
+		{ 0x7cf00000, 0x4c100000,	RTL_GIGA_MAC_VER_40 },
+		{ 0x7cf00000, 0x4c000000,	RTL_GIGA_MAC_VER_39 },
+
 		/* 8168F family. */
 		{ 0x7c800000, 0x48800000,	RTL_GIGA_MAC_VER_38 },
 		{ 0x7cf00000, 0x48100000,	RTL_GIGA_MAC_VER_36 },
@@ -3273,6 +3402,32 @@ static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0000);
 }
 
+static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+
+	rtl_apply_firmware(tp);
+}
+
+static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+}
+
+static void rtl8168ep_hw_phy_config(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0a40 },
+		{ 0x1b, 0x809c },
+		{ 0x1c, 0xa700 },
+		{ 0x1b, 0x80a5 },
+		{ 0x1c, 0xa700 }
+	};
+
+	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+}
+
 static void rtl_hw_phy_config(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -3369,6 +3524,18 @@ static void rtl_hw_phy_config(struct net_device *dev)
 		rtl8411_hw_phy_config(tp);
 		break;
 
+	case RTL_GIGA_MAC_VER_39:
+		rtl8168g_1_hw_phy_config(tp);
+		break;
+
+	case RTL_GIGA_MAC_VER_40:
+		rtl8168g_2_hw_phy_config(tp);
+		break;
+
+	case RTL_GIGA_MAC_VER_41:
+		rtl8168ep_hw_phy_config(tp);
+		break;
+
 	default:
 		break;
 	}
@@ -3589,6 +3756,12 @@ static void __devinit rtl_init_mdio_ops(struct rtl8169_private *tp)
 		ops->write	= r8168dp_2_mdio_write;
 		ops->read	= r8168dp_2_mdio_read;
 		break;
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
+		ops->write	= r8168g_mdio_write;
+		ops->read	= r8168g_mdio_read;
+		break;
 	default:
 		ops->write	= r8169_mdio_write;
 		ops->read	= r8169_mdio_read;
@@ -3611,6 +3784,10 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 		RTL_W32(RxConfig, RTL_R32(RxConfig) |
 			AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
 		break;
+	// Is it ok for RTL_GIGA_MAC_VER_39..41 ?
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 	default:
 		break;
 	}
@@ -3705,6 +3882,10 @@ static void r8168_phy_power_up(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_31:
 		rtl_writephy(tp, 0x0e, 0x0000);
 		break;
+	// Is it ok for RTL_GIGA_MAC_VER_39..41 ?
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 	default:
 		break;
 	}
@@ -3736,6 +3917,9 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_28:
 	case RTL_GIGA_MAC_VER_31:
 		rtl_writephy(tp, 0x0e, 0x0200);
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 	default:
 		rtl_writephy(tp, MII_BMCR, BMCR_PDOWN);
 		break;
@@ -3855,6 +4039,9 @@ static void __devinit rtl_init_pll_power_ops(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_35:
 	case RTL_GIGA_MAC_VER_36:
 	case RTL_GIGA_MAC_VER_38:
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 		ops->down	= r8168_pll_power_down;
 		ops->up		= r8168_pll_power_up;
 		break;
@@ -3896,6 +4083,9 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_24:
 		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 	default:
 		RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
 		break;
@@ -4050,6 +4240,9 @@ static void __devinit rtl_init_jumbo_ops(struct rtl8169_private *tp)
 	 * No action needed for jumbo frames with 8169.
 	 * No jumbo for 810x at all.
 	 */
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 	default:
 		ops->disable	= NULL;
 		ops->enable	= NULL;
@@ -4142,7 +4335,10 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 	           tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 	           tp->mac_version == RTL_GIGA_MAC_VER_36 ||
 	           tp->mac_version == RTL_GIGA_MAC_VER_37 ||
-	           tp->mac_version == RTL_GIGA_MAC_VER_38) {
+	           tp->mac_version == RTL_GIGA_MAC_VER_38 ||
+	           tp->mac_version == RTL_GIGA_MAC_VER_39 ||
+	           tp->mac_version == RTL_GIGA_MAC_VER_40 ||
+	           tp->mac_version == RTL_GIGA_MAC_VER_41) {
 		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
 		while (!(RTL_R32(TxConfig) & TXCFG_EMPTY))
 			udelay(100);
@@ -4478,6 +4674,9 @@ static void __devinit rtl_init_csi_ops(struct rtl8169_private *tp)
 		ops->read	= r8402_csi_read;
 		break;
 
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+	case RTL_GIGA_MAC_VER_41:
 	default:
 		ops->write	= r8169_csi_write;
 		ops->read	= r8169_csi_read;
@@ -4810,6 +5009,38 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
 }
 
+static void rtl_hw_start_8168e_3(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	struct pci_dev *pdev = tp->pci_dev;
+	static const struct ephy_info e_info_8168e_3[] = {
+		{ 0x00, 0x0000, 0x10a3 },
+		{ 0x06, 0x0000, 0xf030 },
+		{ 0x08, 0x0000, 0x2006 },
+		{ 0x0d, 0x0000, 0x1666 }
+	};
+
+	rtl_csi_access_enable_1(tp);
+
+	rtl_ephy_init(ioaddr, e_info_8168e_3, ARRAY_SIZE(e_info_8168e_3));
+
+	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+
+	rtl_eri_write(ioaddr, 0xc8, ERIAR_MASK_0001, 0x02, ERIAR_EXGMAC);
+	rtl_eri_write(ioaddr, 0xca, ERIAR_MASK_0001, 0x08, ERIAR_EXGMAC);
+	rtl_eri_write(ioaddr, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
+	rtl_eri_write(ioaddr, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC);
+	rtl_eri_write(ioaddr, 0xe8, ERIAR_MASK_1111, 0x00100006, ERIAR_EXGMAC);
+
+	rtl_w1w0_eri(ioaddr, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
+	rtl_w1w0_eri(ioaddr, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
+
+	RTL_W8(MaxTxPacketSize, EarlySize);
+
+	/* Adjust EEE LED frequency */
+	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+}
+
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -4880,6 +5111,18 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 		     ERIAR_EXGMAC);
 }
 
+static void rtl_hw_start_8168g(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	struct pci_dev *pdev = tp->pci_dev;
+	static const struct ephy_info e_info_8168g[] = {
+		{ 0x00, 0xffff,	0x0000 },
+		{ 0x00, 0xffff,	0x0000 }
+	};
+
+	rtl_ephy_init(ioaddr, e_info_8168g, ARRAY_SIZE(e_info_8168g));
+}
+
 static void rtl_hw_start_8168(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -4981,6 +5224,15 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		rtl_hw_start_8411(tp);
 		break;
 
+	case RTL_GIGA_MAC_VER_39:
+	case RTL_GIGA_MAC_VER_40:
+		rtl_hw_start_8168g(tp);
+		break;
+
+	case RTL_GIGA_MAC_VER_41:
+		rtl_hw_start_8168e_3(tp);
+		break;
+
 	default:
 		printk(KERN_ERR PFX "%s: unknown chipset (mac_version = %d).\n",
 			dev->name, tp->mac_version);
@@ -6335,7 +6587,8 @@ static void __devexit rtl_remove_one(struct pci_dev *pdev)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_31) {
+	    tp->mac_version == RTL_GIGA_MAC_VER_31 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_41) {
 		rtl8168_driver_stop(tp);
 	}
 
@@ -6651,7 +6904,8 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_31) {
+	    tp->mac_version == RTL_GIGA_MAC_VER_31 ||
+	    tp->mac_version == RTL_GIGA_MAC_VER_41) {
 		rtl8168_driver_start(tp);
 	}
 
-- 
1.7.10.2

^ permalink raw reply related

* Re: [net-next RFC V3 PATCH 4/6] tuntap: multiqueue support
From: Michael S. Tsirkin @ 2012-06-26 10:42 UTC (permalink / raw)
  To: Jason Wang
  Cc: habanero, netdev, linux-kernel, krkumar2, tahm, akong, davem,
	shemminger, mashirle
In-Reply-To: <4FE92F99.6020402@redhat.com>

On Tue, Jun 26, 2012 at 11:42:17AM +0800, Jason Wang wrote:
> On 06/25/2012 04:25 PM, Michael S. Tsirkin wrote:
> >On Mon, Jun 25, 2012 at 02:10:18PM +0800, Jason Wang wrote:
> >>This patch adds multiqueue support for tap device. This is done by abstracting
> >>each queue as a file/socket and allowing multiple sockets to be attached to the
> >>tuntap device (an array of tun_file were stored in the tun_struct). Userspace
> >>could write and read from those files to do the parallel packet
> >>sending/receiving.
> >>
> >>Unlike the previous single queue implementation, the socket and device were
> >>loosely coupled, each of them were allowed to go away first. In order to let the
> >>tx path lockless, netif_tx_loch_bh() is replaced by RCU/NETIF_F_LLTX to
> >>synchronize between data path and system call.
> >Don't use LLTX/RCU. It's not worth it.
> >Use something like netif_set_real_num_tx_queues.
> >
> >>The tx queue selecting is first based on the recorded rxq index of an skb, it
> >>there's no such one, then choosing based on rx hashing (skb_get_rxhash()).
> >>
> >>Signed-off-by: Jason Wang<jasowang@redhat.com>
> >Interestingly macvtap switched to hashing first:
> >ef0002b577b52941fb147128f30bd1ecfdd3ff6d
> >(the commit log is corrupted but see what it
> >does in the patch).
> >Any idea why?
> 
> Yes, so tap should be changed to behave same as macvtap. I remember
> the reason we do that is to make sure the packet of a single flow to
> be queued to a fixed socket/virtqueues. As 10g cards like ixgbe
> choose the rx queue for a flow based on the last tx queue where the
> packets of that flow comes. So if we are using recored rx queue in
> macvtap, the queue index of a flow would change as vhost thread
> moves amongs processors.

Hmm. OTOH if you override this, if TX is sent from VCPU0, RX might land
on VCPU1 in the guest, which is not good, right?

> But during test tun/tap, one interesting thing I find is that even
> ixgbe has recorded the queue index during rx, it seems be lost when
> tap tries to transmit skbs to userspace.

dev_pick_tx does this I think but ndo_select_queue
should be able to get it without trouble.


> >>---
> >>  drivers/net/tun.c |  371 +++++++++++++++++++++++++++++++++--------------------
> >>  1 files changed, 232 insertions(+), 139 deletions(-)
> >>
> >>diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> >>index 8233b0a..5c26757 100644
> >>--- a/drivers/net/tun.c
> >>+++ b/drivers/net/tun.c
> >>@@ -107,6 +107,8 @@ struct tap_filter {
> >>  	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
> >>  };
> >>
> >>+#define MAX_TAP_QUEUES (NR_CPUS<  16 ? NR_CPUS : 16)
> >Why the limit? I am guessing you copied this from macvtap?
> >This is problematic for a number of reasons:
> >	- will not play well with migration
> >	- will not work well for a large guest
> >
> >Yes, macvtap needs to be fixed too.
> >
> >I am guessing what it is trying to prevent is queueing
> >up a huge number of packets?
> >So just divide the default tx queue limit by the # of queues.
> >
> >And by the way, for MQ applications maybe we can finally
> >ignore tx queue altogether and limit the total number
> >of bytes queued?
> >To avoid regressions we can make it large like 64M/# queues.
> >Could be a separate patch I think, and for a single queue
> >might need a compatible mode though I am not sure.
> >
> >>+
> >>  struct tun_file {
> >>  	struct sock sk;
> >>  	struct socket socket;
> >>@@ -114,16 +116,18 @@ struct tun_file {
> >>  	int vnet_hdr_sz;
> >>  	struct tap_filter txflt;
> >>  	atomic_t count;
> >>-	struct tun_struct *tun;
> >>+	struct tun_struct __rcu *tun;
> >>  	struct net *net;
> >>  	struct fasync_struct *fasync;
> >>  	unsigned int flags;
> >>+	u16 queue_index;
> >>  };
> >>
> >>  struct tun_sock;
> >>
> >>  struct tun_struct {
> >>-	struct tun_file		*tfile;
> >>+	struct tun_file		*tfiles[MAX_TAP_QUEUES];
> >>+	unsigned int            numqueues;
> >>  	unsigned int 		flags;
> >>  	uid_t			owner;
> >>  	gid_t			group;
> >>@@ -138,80 +142,159 @@ struct tun_struct {
> >>  #endif
> >>  };
> >>
> >>-static int tun_attach(struct tun_struct *tun, struct file *file)
> >>+static DEFINE_SPINLOCK(tun_lock);
> >>+
> >>+/*
> >>+ * tun_get_queue(): calculate the queue index
> >>+ *     - if skbs comes from mq nics, we can just borrow
> >>+ *     - if not, calculate from the hash
> >>+ */
> >>+static struct tun_file *tun_get_queue(struct net_device *dev,
> >>+				      struct sk_buff *skb)
> >>  {
> >>-	struct tun_file *tfile = file->private_data;
> >>-	int err;
> >>+	struct tun_struct *tun = netdev_priv(dev);
> >>+	struct tun_file *tfile = NULL;
> >>+	int numqueues = tun->numqueues;
> >>+	__u32 rxq;
> >>
> >>-	ASSERT_RTNL();
> >>+	BUG_ON(!rcu_read_lock_held());
> >>
> >>-	netif_tx_lock_bh(tun->dev);
> >>+	if (!numqueues)
> >>+		goto out;
> >>
> >>-	err = -EINVAL;
> >>-	if (tfile->tun)
> >>+	if (numqueues == 1) {
> >>+		tfile = rcu_dereference(tun->tfiles[0]);
> >Instead of hacks like this, you can ask for an MQ
> >flag to be set in SETIFF. Then you won't need to
> >handle attach/detach at random times.
> >And most of the scary num_queues checks can go away.
> >You can then also ask userspace about the max # of queues
> >to expect if you want to save some memory.
> >
> >
> >>  		goto out;
> >>+	}
> >>
> >>-	err = -EBUSY;
> >>-	if (tun->tfile)
> >>+	if (likely(skb_rx_queue_recorded(skb))) {
> >>+		rxq = skb_get_rx_queue(skb);
> >>+
> >>+		while (unlikely(rxq>= numqueues))
> >>+			rxq -= numqueues;
> >>+
> >>+		tfile = rcu_dereference(tun->tfiles[rxq]);
> >>  		goto out;
> >>+	}
> >>
> >>-	err = 0;
> >>-	tfile->tun = tun;
> >>-	tun->tfile = tfile;
> >>-	netif_carrier_on(tun->dev);
> >>-	dev_hold(tun->dev);
> >>-	sock_hold(&tfile->sk);
> >>-	atomic_inc(&tfile->count);
> >>+	/* Check if we can use flow to select a queue */
> >>+	rxq = skb_get_rxhash(skb);
> >>+	if (rxq) {
> >>+		u32 idx = ((u64)rxq * numqueues)>>  32;
> >This completely confuses me. What's the logic here?
> >How do we even know it's in range?
> >
> >>+		tfile = rcu_dereference(tun->tfiles[idx]);
> >>+		goto out;
> >>+	}
> >>
> >>+	tfile = rcu_dereference(tun->tfiles[0]);
> >>  out:
> >>-	netif_tx_unlock_bh(tun->dev);
> >>-	return err;
> >>+	return tfile;
> >>  }
> >>
> >>-static void __tun_detach(struct tun_struct *tun)
> >>+static int tun_detach(struct tun_file *tfile, bool clean)
> >>  {
> >>-	struct tun_file *tfile = tun->tfile;
> >>-	/* Detach from net device */
> >>-	netif_tx_lock_bh(tun->dev);
> >>-	netif_carrier_off(tun->dev);
> >>-	tun->tfile = NULL;
> >>-	netif_tx_unlock_bh(tun->dev);
> >>-
> >>-	/* Drop read queue */
> >>-	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
> >>-
> >>-	/* Drop the extra count on the net device */
> >>-	dev_put(tun->dev);
> >>-}
> >>+	struct tun_struct *tun;
> >>+	struct net_device *dev = NULL;
> >>+	bool destroy = false;
> >>
> >>-static void tun_detach(struct tun_struct *tun)
> >>-{
> >>-	rtnl_lock();
> >>-	__tun_detach(tun);
> >>-	rtnl_unlock();
> >>-}
> >>+	spin_lock(&tun_lock);
> >>
> >>-static struct tun_struct *__tun_get(struct tun_file *tfile)
> >>-{
> >>-	struct tun_struct *tun = NULL;
> >>+	tun = rcu_dereference_protected(tfile->tun,
> >>+					lockdep_is_held(&tun_lock));
> >>+	if (tun) {
> >>+		u16 index = tfile->queue_index;
> >>+		BUG_ON(index>= tun->numqueues);
> >>+		dev = tun->dev;
> >>+
> >>+		rcu_assign_pointer(tun->tfiles[index],
> >>+				   tun->tfiles[tun->numqueues - 1]);
> >>+		tun->tfiles[index]->queue_index = index;
> >>+		rcu_assign_pointer(tfile->tun, NULL);
> >>+		--tun->numqueues;
> >>+		sock_put(&tfile->sk);
> >>
> >>-	if (atomic_inc_not_zero(&tfile->count))
> >>-		tun = tfile->tun;
> >>+		if (tun->numqueues == 0&&  !(tun->flags&  TUN_PERSIST))
> >>+			destroy = true;
> >Please don't use flags like that. Use dedicated labels and goto there on error.
> >
> >
> >>+	}
> >>
> >>-	return tun;
> >>+	spin_unlock(&tun_lock);
> >>+
> >>+	synchronize_rcu();
> >>+	if (clean)
> >>+		sock_put(&tfile->sk);
> >>+
> >>+	if (destroy) {
> >>+		rtnl_lock();
> >>+		if (dev->reg_state == NETREG_REGISTERED)
> >>+			unregister_netdevice(dev);
> >>+		rtnl_unlock();
> >>+	}
> >>+
> >>+	return 0;
> >>  }
> >>
> >>-static struct tun_struct *tun_get(struct file *file)
> >>+static void tun_detach_all(struct net_device *dev)
> >>  {
> >>-	return __tun_get(file->private_data);
> >>+	struct tun_struct *tun = netdev_priv(dev);
> >>+	struct tun_file *tfile, *tfile_list[MAX_TAP_QUEUES];
> >>+	int i, j = 0;
> >>+
> >>+	spin_lock(&tun_lock);
> >>+
> >>+	for (i = 0; i<  MAX_TAP_QUEUES&&  tun->numqueues; i++) {
> >>+		tfile = rcu_dereference_protected(tun->tfiles[i],
> >>+						lockdep_is_held(&tun_lock));
> >>+		BUG_ON(!tfile);
> >>+		wake_up_all(&tfile->wq.wait);
> >>+		tfile_list[j++] = tfile;
> >>+		rcu_assign_pointer(tfile->tun, NULL);
> >>+		--tun->numqueues;
> >>+	}
> >>+	BUG_ON(tun->numqueues != 0);
> >>+	/* guarantee that any future tun_attach will fail */
> >>+	tun->numqueues = MAX_TAP_QUEUES;
> >>+	spin_unlock(&tun_lock);
> >>+
> >>+	synchronize_rcu();
> >>+	for (--j; j>= 0; j--)
> >>+		sock_put(&tfile_list[j]->sk);
> >>  }
> >>
> >>-static void tun_put(struct tun_struct *tun)
> >>+static int tun_attach(struct tun_struct *tun, struct file *file)
> >>  {
> >>-	struct tun_file *tfile = tun->tfile;
> >>+	struct tun_file *tfile = file->private_data;
> >>+	int err;
> >>+
> >>+	ASSERT_RTNL();
> >>+
> >>+	spin_lock(&tun_lock);
> >>
> >>-	if (atomic_dec_and_test(&tfile->count))
> >>-		tun_detach(tfile->tun);
> >>+	err = -EINVAL;
> >>+	if (rcu_dereference_protected(tfile->tun, lockdep_is_held(&tun_lock)))
> >>+		goto out;
> >>+
> >>+	err = -EBUSY;
> >>+	if (!(tun->flags&  TUN_TAP_MQ)&&  tun->numqueues == 1)
> >>+		goto out;
> >>+
> >>+	if (tun->numqueues == MAX_TAP_QUEUES)
> >>+		goto out;
> >>+
> >>+	err = 0;
> >>+	tfile->queue_index = tun->numqueues;
> >>+	rcu_assign_pointer(tfile->tun, tun);
> >>+	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
> >>+	sock_hold(&tfile->sk);
> >>+	tun->numqueues++;
> >>+
> >>+	if (tun->numqueues == 1)
> >>+		netif_carrier_on(tun->dev);
> >>+
> >>+	/* device is allowed to go away first, so no need to hold extra
> >>+	 * refcnt. */
> >>+
> >>+out:
> >>+	spin_unlock(&tun_lock);
> >>+	return err;
> >>  }
> >>
> >>  /* TAP filtering */
> >>@@ -331,16 +414,7 @@ static const struct ethtool_ops tun_ethtool_ops;
> >>  /* Net device detach from fd. */
> >>  static void tun_net_uninit(struct net_device *dev)
> >>  {
> >>-	struct tun_struct *tun = netdev_priv(dev);
> >>-	struct tun_file *tfile = tun->tfile;
> >>-
> >>-	/* Inform the methods they need to stop using the dev.
> >>-	 */
> >>-	if (tfile) {
> >>-		wake_up_all(&tfile->wq.wait);
> >>-		if (atomic_dec_and_test(&tfile->count))
> >>-			__tun_detach(tun);
> >>-	}
> >>+	tun_detach_all(dev);
> >>  }
> >>
> >>  /* Net device open. */
> >>@@ -360,10 +434,10 @@ static int tun_net_close(struct net_device *dev)
> >>  /* Net device start xmit */
> >>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>  {
> >>-	struct tun_struct *tun = netdev_priv(dev);
> >>-	struct tun_file *tfile = tun->tfile;
> >>+	struct tun_file *tfile = NULL;
> >>
> >>-	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
> >>+	rcu_read_lock();
> >>+	tfile = tun_get_queue(dev, skb);
> >>
> >>  	/* Drop packet if interface is not attached */
> >>  	if (!tfile)
> >>@@ -381,7 +455,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>
> >>  	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
> >>  	>= dev->tx_queue_len) {
> >>-		if (!(tun->flags&  TUN_ONE_QUEUE)) {
> >>+		if (!(tfile->flags&  TUN_ONE_QUEUE)&&
> >Which patch moved flags from tun to tfile?
> >
> >>+		    !(tfile->flags&  TUN_TAP_MQ)) {
> >>  			/* Normal queueing mode. */
> >>  			/* Packet scheduler handles dropping of further packets. */
> >>  			netif_stop_queue(dev);
> >>@@ -390,7 +465,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>  			 * error is more appropriate. */
> >>  			dev->stats.tx_fifo_errors++;
> >>  		} else {
> >>-			/* Single queue mode.
> >>+			/* Single queue mode or multi queue mode.
> >>  			 * Driver handles dropping of all packets itself. */
> >Please don't do this. Stop the queue on overrun as appropriate.
> >ONE_QUEUE is a legacy hack.
> >
> >BTW we really should stop queue before we start dropping packets,
> >but that can be a separate patch.
> >
> >>  			goto drop;
> >>  		}
> >>@@ -408,9 +483,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>  		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
> >>  	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
> >>  				   POLLRDNORM | POLLRDBAND);
> >>+	rcu_read_unlock();
> >>  	return NETDEV_TX_OK;
> >>
> >>  drop:
> >>+	rcu_read_unlock();
> >>  	dev->stats.tx_dropped++;
> >>  	kfree_skb(skb);
> >>  	return NETDEV_TX_OK;
> >>@@ -527,16 +604,22 @@ static void tun_net_init(struct net_device *dev)
> >>  static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
> >>  {
> >>  	struct tun_file *tfile = file->private_data;
> >>-	struct tun_struct *tun = __tun_get(tfile);
> >>+	struct tun_struct *tun = NULL;
> >>  	struct sock *sk;
> >>  	unsigned int mask = 0;
> >>
> >>-	if (!tun)
> >>+	if (!tfile)
> >>  		return POLLERR;
> >>
> >>-	sk = tfile->socket.sk;
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>+		return POLLERR;
> >>+	}
> >>+	rcu_read_unlock();
> >>
> >>-	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
> >>+	sk =&tfile->sk;
> >>
> >>  	poll_wait(file,&tfile->wq.wait, wait);
> >>
> >>@@ -548,10 +631,12 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
> >>  	     sock_writeable(sk)))
> >>  		mask |= POLLOUT | POLLWRNORM;
> >>
> >>-	if (tun->dev->reg_state != NETREG_REGISTERED)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun || tun->dev->reg_state != NETREG_REGISTERED)
> >>  		mask = POLLERR;
> >>+	rcu_read_unlock();
> >>
> >>-	tun_put(tun);
> >>  	return mask;
> >>  }
> >>
> >>@@ -708,9 +793,12 @@ static ssize_t tun_get_user(struct tun_file *tfile,
> >>  		skb_shinfo(skb)->gso_segs = 0;
> >>  	}
> >>
> >>-	tun = __tun_get(tfile);
> >>-	if (!tun)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>  		return -EBADFD;
> >>+	}
> >>
> >>  	switch (tfile->flags&  TUN_TYPE_MASK) {
> >>  	case TUN_TUN_DEV:
> >>@@ -720,26 +808,30 @@ static ssize_t tun_get_user(struct tun_file *tfile,
> >>  		skb->protocol = eth_type_trans(skb, tun->dev);
> >>  		break;
> >>  	}
> >>-
> >>-	netif_rx_ni(skb);
> >>  	tun->dev->stats.rx_packets++;
> >>  	tun->dev->stats.rx_bytes += len;
> >>-	tun_put(tun);
> >>+	rcu_read_unlock();
> >>+
> >>+	netif_rx_ni(skb);
> >>+
> >>  	return count;
> >>
> >>  err_free:
> >>  	count = -EINVAL;
> >>  	kfree_skb(skb);
> >>  err:
> >>-	tun = __tun_get(tfile);
> >>-	if (!tun)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>  		return -EBADFD;
> >>+	}
> >>
> >>  	if (drop)
> >>  		tun->dev->stats.rx_dropped++;
> >>  	if (error)
> >>  		tun->dev->stats.rx_frame_errors++;
> >>-	tun_put(tun);
> >>+	rcu_read_unlock();
> >>  	return count;
> >>  }
> >>
> >>@@ -833,12 +925,13 @@ static ssize_t tun_put_user(struct tun_file *tfile,
> >>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> >>  	total += skb->len;
> >>
> >>-	tun = __tun_get(tfile);
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>  	if (tun) {
> >>  		tun->dev->stats.tx_packets++;
> >>  		tun->dev->stats.tx_bytes += len;
> >>-		tun_put(tun);
> >>  	}
> >>+	rcu_read_unlock();
> >>
> >>  	return total;
> >>  }
> >>@@ -869,28 +962,31 @@ static ssize_t tun_do_read(struct tun_file *tfile,
> >>  				break;
> >>  			}
> >>
> >>-			tun = __tun_get(tfile);
> >>+			rcu_read_lock();
> >>+			tun = rcu_dereference(tfile->tun);
> >>  			if (!tun) {
> >>-				ret = -EIO;
> >>+				ret = -EBADFD;
> >BADFD is for when you get passed something like -1 fd.
> >Here fd is OK, it's just in a bad state so you can not do IO.
> >
> >
> >>+				rcu_read_unlock();
> >>  				break;
> >>  			}
> >>  			if (tun->dev->reg_state != NETREG_REGISTERED) {
> >>  				ret = -EIO;
> >>-				tun_put(tun);
> >>+				rcu_read_unlock();
> >>  				break;
> >>  			}
> >>-			tun_put(tun);
> >>+			rcu_read_unlock();
> >>
> >>  			/* Nothing to read, let's sleep */
> >>  			schedule();
> >>  			continue;
> >>  		}
> >>
> >>-		tun = __tun_get(tfile);
> >>+		rcu_read_lock();
> >>+		tun = rcu_dereference(tfile->tun);
> >>  		if (tun) {
> >>  			netif_wake_queue(tun->dev);
> >>-			tun_put(tun);
> >>  		}
> >>+		rcu_read_unlock();
> >>
> >>  		ret = tun_put_user(tfile, skb, iv, len);
> >>  		kfree_skb(skb);
> >>@@ -1038,6 +1134,9 @@ static int tun_flags(struct tun_struct *tun)
> >>  	if (tun->flags&  TUN_VNET_HDR)
> >>  		flags |= IFF_VNET_HDR;
> >>
> >>+	if (tun->flags&  TUN_TAP_MQ)
> >>+		flags |= IFF_MULTI_QUEUE;
> >>+
> >>  	return flags;
> >>  }
> >>
> >>@@ -1097,8 +1196,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>  		err = tun_attach(tun, file);
> >>  		if (err<  0)
> >>  			return err;
> >>-	}
> >>-	else {
> >>+	} else {
> >>  		char *name;
> >>  		unsigned long flags = 0;
> >>
> >>@@ -1142,6 +1240,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>  		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
> >>  			TUN_USER_FEATURES;
> >>  		dev->features = dev->hw_features;
> >>+		if (ifr->ifr_flags&  IFF_MULTI_QUEUE)
> >>+			dev->features |= NETIF_F_LLTX;
> >>
> >>  		err = register_netdevice(tun->dev);
> >>  		if (err<  0)
> >>@@ -1154,7 +1254,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>
> >>  		err = tun_attach(tun, file);
> >>  		if (err<  0)
> >>-			goto failed;
> >>+			goto err_free_dev;
> >>  	}
> >>
> >>  	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
> >>@@ -1174,6 +1274,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>  	else
> >>  		tun->flags&= ~TUN_VNET_HDR;
> >>
> >>+	if (ifr->ifr_flags&  IFF_MULTI_QUEUE)
> >>+		tun->flags |= TUN_TAP_MQ;
> >>+	else
> >>+		tun->flags&= ~TUN_TAP_MQ;
> >>+
> >>  	/* Cache flags from tun device */
> >>  	tfile->flags = tun->flags;
> >>  	/* Make sure persistent devices do not get stuck in
> >>@@ -1187,7 +1292,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>
> >>  err_free_dev:
> >>  	free_netdev(dev);
> >>-failed:
> >>  	return err;
> >>  }
> >>
> >>@@ -1264,38 +1368,40 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> >>  				(unsigned int __user*)argp);
> >>  	}
> >>
> >>-	rtnl_lock();
> >>-
> >>-	tun = __tun_get(tfile);
> >>-	if (cmd == TUNSETIFF&&  !tun) {
> >>+	ret = 0;
> >>+	if (cmd == TUNSETIFF) {
> >>+		rtnl_lock();
> >>  		ifr.ifr_name[IFNAMSIZ-1] = '\0';
> >>-
> >>  		ret = tun_set_iff(tfile->net, file,&ifr);
> >>-
> >>+		rtnl_unlock();
> >>  		if (ret)
> >>-			goto unlock;
> >>-
> >>+			return ret;
> >>  		if (copy_to_user(argp,&ifr, ifreq_len))
> >>-			ret = -EFAULT;
> >>-		goto unlock;
> >>+			return -EFAULT;
> >>+		return ret;
> >>  	}
> >>
> >>+	rtnl_lock();
> >>+
> >>+	rcu_read_lock();
> >>+
> >>  	ret = -EBADFD;
> >>+	tun = rcu_dereference(tfile->tun);
> >>  	if (!tun)
> >>  		goto unlock;
> >>+	else
> >>+		ret = 0;
> >>
> >>-	tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd);
> >>-
> >>-	ret = 0;
> >>  	switch (cmd) {
> >>  	case TUNGETIFF:
> >>  		ret = tun_get_iff(current->nsproxy->net_ns, tun,&ifr);
> >>+		rcu_read_unlock();
> >>  		if (ret)
> >>-			break;
> >>+			goto out;
> >>
> >>  		if (copy_to_user(argp,&ifr, ifreq_len))
> >>  			ret = -EFAULT;
> >>-		break;
> >>+		goto out;
> >>
> >>  	case TUNSETNOCSUM:
> >>  		/* Disable/Enable checksum */
> >>@@ -1357,9 +1463,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> >>  		/* Get hw address */
> >>  		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
> >>  		ifr.ifr_hwaddr.sa_family = tun->dev->type;
> >>+		rcu_read_unlock();
> >>  		if (copy_to_user(argp,&ifr, ifreq_len))
> >>  			ret = -EFAULT;
> >>-		break;
> >>+		goto out;
> >>
> >>  	case SIOCSIFHWADDR:
> >>  		/* Set hw address */
> >>@@ -1375,9 +1482,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> >>  	}
> >>
> >>  unlock:
> >>+	rcu_read_unlock();
> >>+out:
> >>  	rtnl_unlock();
> >>-	if (tun)
> >>-		tun_put(tun);
> >>  	return ret;
> >>  }
> >>
> >>@@ -1517,6 +1624,11 @@ out:
> >>  	return ret;
> >>  }
> >>
> >>+static void tun_sock_destruct(struct sock *sk)
> >>+{
> >>+	skb_queue_purge(&sk->sk_receive_queue);
> >>+}
> >>+
> >>  static int tun_chr_open(struct inode *inode, struct file * file)
> >>  {
> >>  	struct net *net = current->nsproxy->net_ns;
> >>@@ -1540,6 +1652,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
> >>  	sock_init_data(&tfile->socket,&tfile->sk);
> >>
> >>  	tfile->sk.sk_write_space = tun_sock_write_space;
> >>+	tfile->sk.sk_destruct = tun_sock_destruct;
> >>  	tfile->sk.sk_sndbuf = INT_MAX;
> >>  	file->private_data = tfile;
> >>
> >>@@ -1549,31 +1662,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
> >>  static int tun_chr_close(struct inode *inode, struct file *file)
> >>  {
> >>  	struct tun_file *tfile = file->private_data;
> >>-	struct tun_struct *tun;
> >>-
> >>-	tun = __tun_get(tfile);
> >>-	if (tun) {
> >>-		struct net_device *dev = tun->dev;
> >>-
> >>-		tun_debug(KERN_INFO, tun, "tun_chr_close\n");
> >>-
> >>-		__tun_detach(tun);
> >>-
> >>-		/* If desirable, unregister the netdevice. */
> >>-		if (!(tun->flags&  TUN_PERSIST)) {
> >>-			rtnl_lock();
> >>-			if (dev->reg_state == NETREG_REGISTERED)
> >>-				unregister_netdevice(dev);
> >>-			rtnl_unlock();
> >>-		}
> >>
> >>-		/* drop the reference that netdevice holds */
> >>-		sock_put(&tfile->sk);
> >>-
> >>-	}
> >>-
> >>-	/* drop the reference that file holds */
> >>-	sock_put(&tfile->sk);
> >>+	tun_detach(tfile, true);
> >>
> >>  	return 0;
> >>  }
> >>@@ -1700,14 +1790,17 @@ static void tun_cleanup(void)
> >>   * holding a reference to the file for as long as the socket is in use. */
> >>  struct socket *tun_get_socket(struct file *file)
> >>  {
> >>-	struct tun_struct *tun;
> >>+	struct tun_struct *tun = NULL;
> >>  	struct tun_file *tfile = file->private_data;
> >>  	if (file->f_op !=&tun_fops)
> >>  		return ERR_PTR(-EINVAL);
> >>-	tun = tun_get(file);
> >>-	if (!tun)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>  		return ERR_PTR(-EBADFD);
> >>-	tun_put(tun);
> >>+	}
> >>+	rcu_read_unlock();
> >>  	return&tfile->socket;
> >>  }
> >>  EXPORT_SYMBOL_GPL(tun_get_socket);

^ permalink raw reply

* [PATCH 0/1] vhost, use_mm and KERNEL_DS
From: Christian Borntraeger @ 2012-06-26 10:59 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, netdev, Jens Freimann, Christian Borntraeger

Folks,

here is a patch that fixes vhost to use USER_DS before
doing a use_mm/usercopy operation. This was found during
vhost prototyping on s390 were we have a separate user/kernel
address space.


Jens Freimann (1):
  use USER_DS in vhost_worker thread

 drivers/vhost/vhost.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)


^ permalink raw reply

* [PATCH 1/1] use USER_DS in vhost_worker thread
From: Christian Borntraeger @ 2012-06-26 10:59 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, netdev, Jens Freimann, Christian Borntraeger
In-Reply-To: <1340708398-17965-1-git-send-email-borntraeger@de.ibm.com>

From: Jens Freimann <jfrei@linux.vnet.ibm.com>

On some architectures address spaces are set up in a way that this is
not necessary to work properly but on some others (like s390) it is.
Make sure we operate on the user address space to allow copy_xxx_user()
from the vhost_worker() thread by setting it explicitly before calling
use_mm() and revert it after unuse_mm().

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 drivers/vhost/vhost.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 94dbd25..112156f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -191,7 +191,9 @@ static int vhost_worker(void *data)
 	struct vhost_dev *dev = data;
 	struct vhost_work *work = NULL;
 	unsigned uninitialized_var(seq);
+	mm_segment_t oldfs = get_fs();
 
+	set_fs(USER_DS);
 	use_mm(dev->mm);
 
 	for (;;) {
@@ -229,6 +231,7 @@ static int vhost_worker(void *data)
 
 	}
 	unuse_mm(dev->mm);
+	set_fs(oldfs);
 	return 0;
 }
 
-- 
1.7.0.4

^ permalink raw reply related

* Re: [RFC net-next (v2) 12/14] ixgbe: set maximal number of default RSS queues
From: Yuval Mintz @ 2012-06-26 11:08 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: eilong, davem, netdev, Jeff Kirsher, John Fastabend
In-Reply-To: <4FE8B019.4030807@intel.com>


>> How about this:
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>> index af1a531..23a8609 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>> @@ -277,6 +277,8 @@ static inline bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
>>  	bool ret = false;
>>  	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_RSS];
>>  
>> +	f->indices = min_t(int, netif_get_num_default_rss_queues(), f->indices);
>> +
>>  	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
>>  		f->mask = 0xF;
>>  		adapter->num_rx_queues = f->indices;
>> @@ -302,7 +304,9 @@ static inline bool ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter)
>>  	bool ret = false;
>>  	struct ixgbe_ring_feature *f_fdir = &adapter->ring_feature[RING_F_FDIR];
>>  
>> -	f_fdir->indices = min_t(int, num_online_cpus(), f_fdir->indices);
>> +	f_fdir->indices = min_t(int, netif_get_num_default_rss_queues(),
>> +				f_fdir->indices);
>> +
>>  	f_fdir->mask = 0;
>>  
>>  	/*
>> @@ -339,8 +343,7 @@ static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
>>  	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
>>  		return false;
>>  
>> -	f->indices = min_t(int, num_online_cpus(), f->indices);
>> -
>> +	f->indices = min_t(int, f->indices, netif_get_num_default_rss_queues());
>>  	adapter->num_rx_queues = 1;
>>  	adapter->num_tx_queues = 1;
>>
> This makes much more sense, but still needs a few minor changes.



Well, what about this one:

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index af1a531..0dd1e51 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -277,6 +277,7 @@ static inline bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
 	bool ret = false;
 	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_RSS];

+	f->indices = min_t(int, netif_get_num_default_rss_queues(), f->indices);
 	if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
 		f->mask = 0xF;
 		adapter->num_rx_queues = f->indices;
@@ -376,7 +377,7 @@ static inline bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)

 	/* Map queue offset and counts onto allocated tx queues */
 	per_tc_q = min_t(unsigned int, dev->num_tx_queues / tcs, DCB_QUEUE_CAP);
-	q = min_t(int, num_online_cpus(), per_tc_q);
+	q = min_t(int, netif_get_num_default_rss_queues(), per_tc_q);

 	for (i = 0; i < tcs; i++) {
 		netdev_set_tc_queue(dev, i, q, offset);

^ permalink raw reply related

* Re: [PATCH 3/3] net: fec: add phy-reset-interval for device tree probe
From: Shawn Guo @ 2012-06-26 11:15 UTC (permalink / raw)
  To: Lothar Waßmann; +Cc: David S. Miller, netdev, linux-arm-kernel
In-Reply-To: <20457.31943.332545.171232@ipc1.ka-ro>

On Tue, Jun 26, 2012 at 11:11:35AM +0200, Lothar Waßmann wrote:
> Hi,
> 
> Shawn Guo writes:
> > Different boards may require different phy reset interval time.  Add
> > property phy-reset-interval for device tree probe, so that the boards
> > that need a longer interval time can specify it in their device tree.
> >
> 'phy-reset-duration' would be a more appropriate name.
> 
Ok, point taken.  Thanks.

-- 
Regards,
Shawn

^ permalink raw reply

* Re: [PATCH 1/1] use USER_DS in vhost_worker thread
From: Michael S. Tsirkin @ 2012-06-26 11:42 UTC (permalink / raw)
  To: Christian Borntraeger; +Cc: kvm, netdev, Jens Freimann
In-Reply-To: <1340708398-17965-2-git-send-email-borntraeger@de.ibm.com>

On Tue, Jun 26, 2012 at 12:59:58PM +0200, Christian Borntraeger wrote:
> From: Jens Freimann <jfrei@linux.vnet.ibm.com>
> 
> On some architectures address spaces are set up in a way that this is
> not necessary to work properly but on some others (like s390) it is.
> Make sure we operate on the user address space to allow copy_xxx_user()
> from the vhost_worker() thread by setting it explicitly before calling
> use_mm() and revert it after unuse_mm().
> 
> Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

Acked-by: Michael S. Tsirkin <mst@redhat.com>

Dave, can you queue this up for 3.5 please?

Thanks.

> ---
>  drivers/vhost/vhost.c |    3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 94dbd25..112156f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -191,7 +191,9 @@ static int vhost_worker(void *data)
>  	struct vhost_dev *dev = data;
>  	struct vhost_work *work = NULL;
>  	unsigned uninitialized_var(seq);
> +	mm_segment_t oldfs = get_fs();
>  
> +	set_fs(USER_DS);
>  	use_mm(dev->mm);
>  
>  	for (;;) {
> @@ -229,6 +231,7 @@ static int vhost_worker(void *data)
>  
>  	}
>  	unuse_mm(dev->mm);
> +	set_fs(oldfs);
>  	return 0;
>  }
>  
> -- 
> 1.7.0.4

^ permalink raw reply

* Re: [net-next RFC V3 PATCH 4/6] tuntap: multiqueue support
From: Michael S. Tsirkin @ 2012-06-26 11:54 UTC (permalink / raw)
  To: Jason Wang
  Cc: habanero, netdev, linux-kernel, krkumar2, tahm, akong, davem,
	shemminger, mashirle, Eric Dumazet
In-Reply-To: <4FE94E39.6070305@redhat.com>

On Tue, Jun 26, 2012 at 01:52:57PM +0800, Jason Wang wrote:
> On 06/25/2012 04:25 PM, Michael S. Tsirkin wrote:
> >On Mon, Jun 25, 2012 at 02:10:18PM +0800, Jason Wang wrote:
> >>This patch adds multiqueue support for tap device. This is done by abstracting
> >>each queue as a file/socket and allowing multiple sockets to be attached to the
> >>tuntap device (an array of tun_file were stored in the tun_struct). Userspace
> >>could write and read from those files to do the parallel packet
> >>sending/receiving.
> >>
> >>Unlike the previous single queue implementation, the socket and device were
> >>loosely coupled, each of them were allowed to go away first. In order to let the
> >>tx path lockless, netif_tx_loch_bh() is replaced by RCU/NETIF_F_LLTX to
> >>synchronize between data path and system call.
> >Don't use LLTX/RCU. It's not worth it.
> >Use something like netif_set_real_num_tx_queues.
> >
> 
> For LLTX, maybe it's better to convert it to alloc_netdev_mq() to
> let the kernel see all queues and make the queue stopping and
> per-queue stats eaiser.
> RCU is used to handle the attaching/detaching when tun/tap is
> sending and receiving packets which looks reasonalbe for me.

Yes but do we have to allow this? How about we always ask
userspace to attach to all active queues?

> Not
> sure netif_set_real_num_tx_queues() can help in this situation.

Check it out.

> >>The tx queue selecting is first based on the recorded rxq index of an skb, it
> >>there's no such one, then choosing based on rx hashing (skb_get_rxhash()).
> >>
> >>Signed-off-by: Jason Wang<jasowang@redhat.com>
> >Interestingly macvtap switched to hashing first:
> >ef0002b577b52941fb147128f30bd1ecfdd3ff6d
> >(the commit log is corrupted but see what it
> >does in the patch).
> >Any idea why?
> >
> >>---
> >>  drivers/net/tun.c |  371 +++++++++++++++++++++++++++++++++--------------------
> >>  1 files changed, 232 insertions(+), 139 deletions(-)
> >>
> >>diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> >>index 8233b0a..5c26757 100644
> >>--- a/drivers/net/tun.c
> >>+++ b/drivers/net/tun.c
> >>@@ -107,6 +107,8 @@ struct tap_filter {
> >>  	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
> >>  };
> >>
> >>+#define MAX_TAP_QUEUES (NR_CPUS<  16 ? NR_CPUS : 16)
> >Why the limit? I am guessing you copied this from macvtap?
> >This is problematic for a number of reasons:
> >	- will not play well with migration
> >	- will not work well for a large guest
> >
> >Yes, macvtap needs to be fixed too.
> >
> >I am guessing what it is trying to prevent is queueing
> >up a huge number of packets?
> >So just divide the default tx queue limit by the # of queues.
> 
> Not sure,
> another reasons I can guess:
> - to prevent storing a large array of pointers in tun_struct or macvlan_dev.

OK so with the limit of e.g. 1024 we'd allocate at most
2 pages of memory. This doesn't look too bad. 1024 is probably a
high enough limit: modern hypervisors seem to support on the order
of 100-200 CPUs so this leaves us some breathing space
if we want to match a queue per guest CPU.
Of course we need to limit the packets per queue
in such a setup more aggressively. 1000 packets * 1000 queues
* 64K per packet is too much.

> - it may not be suitable to allow the number of virtqueues greater
> than the number of physical queues in the card

Maybe for macvtap, here we have no idea which card we
are working with and how many queues it has.

> >
> >And by the way, for MQ applications maybe we can finally
> >ignore tx queue altogether and limit the total number
> >of bytes queued?
> >To avoid regressions we can make it large like 64M/# queues.
> >Could be a separate patch I think, and for a single queue
> >might need a compatible mode though I am not sure.
> 
> Could you explain more about this?
> Did you mean to have a total
> sndbuf for all sockets that attached to tun/tap?

Consider that we currently limit the # of
packets queued at tun for xmit to userspace.
Some limit is needed but # of packets sounds
very silly - limiting the total memory
might be more reasonable.

In case of multiqueue, we really care about
total # of packets or total memory, but a simple
approximation could be to divide the allocation
between active queues equally.

qdisc also queues some packets, that logic is
using # of packets anyway. So either make that
1000/# queues, or even set to 0 as Eric once
suggested.

> >>+
> >>  struct tun_file {
> >>  	struct sock sk;
> >>  	struct socket socket;
> >>@@ -114,16 +116,18 @@ struct tun_file {
> >>  	int vnet_hdr_sz;
> >>  	struct tap_filter txflt;
> >>  	atomic_t count;
> >>-	struct tun_struct *tun;
> >>+	struct tun_struct __rcu *tun;
> >>  	struct net *net;
> >>  	struct fasync_struct *fasync;
> >>  	unsigned int flags;
> >>+	u16 queue_index;
> >>  };
> >>
> >>  struct tun_sock;
> >>
> >>  struct tun_struct {
> >>-	struct tun_file		*tfile;
> >>+	struct tun_file		*tfiles[MAX_TAP_QUEUES];
> >>+	unsigned int            numqueues;
> >>  	unsigned int 		flags;
> >>  	uid_t			owner;
> >>  	gid_t			group;
> >>@@ -138,80 +142,159 @@ struct tun_struct {
> >>  #endif
> >>  };
> >>
> >>-static int tun_attach(struct tun_struct *tun, struct file *file)
> >>+static DEFINE_SPINLOCK(tun_lock);
> >>+
> >>+/*
> >>+ * tun_get_queue(): calculate the queue index
> >>+ *     - if skbs comes from mq nics, we can just borrow
> >>+ *     - if not, calculate from the hash
> >>+ */
> >>+static struct tun_file *tun_get_queue(struct net_device *dev,
> >>+				      struct sk_buff *skb)
> >>  {
> >>-	struct tun_file *tfile = file->private_data;
> >>-	int err;
> >>+	struct tun_struct *tun = netdev_priv(dev);
> >>+	struct tun_file *tfile = NULL;
> >>+	int numqueues = tun->numqueues;
> >>+	__u32 rxq;
> >>
> >>-	ASSERT_RTNL();
> >>+	BUG_ON(!rcu_read_lock_held());
> >>
> >>-	netif_tx_lock_bh(tun->dev);
> >>+	if (!numqueues)
> >>+		goto out;
> >>
> >>-	err = -EINVAL;
> >>-	if (tfile->tun)
> >>+	if (numqueues == 1) {
> >>+		tfile = rcu_dereference(tun->tfiles[0]);
> >Instead of hacks like this, you can ask for an MQ
> >flag to be set in SETIFF. Then you won't need to
> >handle attach/detach at random times.
> 
> Consier user switch between a sq guest to mq guest, qemu would
> attach or detach the fd which could not be expceted in kernel.

Can't userspace keep it attached always, just deactivate MQ?

> >And most of the scary num_queues checks can go away.
> 
> Even we has a MQ flag, userspace could still just attach one queue
> to the device.

I think we allow too much flexibility if we let
userspace detach a random queue.
Maybe only allow attaching/detaching with MQ off?
If userspace wants to attach/detach, clear MQ first?
Alternatively, attach/detach all queues in one ioctl?

> >You can then also ask userspace about the max # of queues
> >to expect if you want to save some memory.
> >
> 
> Yes, good suggestion.
> >>  		goto out;
> >>+	}
> >>
> >>-	err = -EBUSY;
> >>-	if (tun->tfile)
> >>+	if (likely(skb_rx_queue_recorded(skb))) {
> >>+		rxq = skb_get_rx_queue(skb);
> >>+
> >>+		while (unlikely(rxq>= numqueues))
> >>+			rxq -= numqueues;
> >>+
> >>+		tfile = rcu_dereference(tun->tfiles[rxq]);
> >>  		goto out;
> >>+	}
> >>
> >>-	err = 0;
> >>-	tfile->tun = tun;
> >>-	tun->tfile = tfile;
> >>-	netif_carrier_on(tun->dev);
> >>-	dev_hold(tun->dev);
> >>-	sock_hold(&tfile->sk);
> >>-	atomic_inc(&tfile->count);
> >>+	/* Check if we can use flow to select a queue */
> >>+	rxq = skb_get_rxhash(skb);
> >>+	if (rxq) {
> >>+		u32 idx = ((u64)rxq * numqueues)>>  32;
> >This completely confuses me. What's the logic here?
> >How do we even know it's in range?
> >
> 
> rxq is a u32, so the result should be less than numqueues.

Aha. So the point is to use multiply+shift instead of %?
Please add a comment.


> >>+		tfile = rcu_dereference(tun->tfiles[idx]);
> >>+		goto out;
> >>+	}
> >>
> >>+	tfile = rcu_dereference(tun->tfiles[0]);
> >>  out:
> >>-	netif_tx_unlock_bh(tun->dev);
> >>-	return err;
> >>+	return tfile;
> >>  }
> >>
> >>-static void __tun_detach(struct tun_struct *tun)
> >>+static int tun_detach(struct tun_file *tfile, bool clean)
> >>  {
> >>-	struct tun_file *tfile = tun->tfile;
> >>-	/* Detach from net device */
> >>-	netif_tx_lock_bh(tun->dev);
> >>-	netif_carrier_off(tun->dev);
> >>-	tun->tfile = NULL;
> >>-	netif_tx_unlock_bh(tun->dev);
> >>-
> >>-	/* Drop read queue */
> >>-	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
> >>-
> >>-	/* Drop the extra count on the net device */
> >>-	dev_put(tun->dev);
> >>-}
> >>+	struct tun_struct *tun;
> >>+	struct net_device *dev = NULL;
> >>+	bool destroy = false;
> >>
> >>-static void tun_detach(struct tun_struct *tun)
> >>-{
> >>-	rtnl_lock();
> >>-	__tun_detach(tun);
> >>-	rtnl_unlock();
> >>-}
> >>+	spin_lock(&tun_lock);
> >>
> >>-static struct tun_struct *__tun_get(struct tun_file *tfile)
> >>-{
> >>-	struct tun_struct *tun = NULL;
> >>+	tun = rcu_dereference_protected(tfile->tun,
> >>+					lockdep_is_held(&tun_lock));
> >>+	if (tun) {
> >>+		u16 index = tfile->queue_index;
> >>+		BUG_ON(index>= tun->numqueues);
> >>+		dev = tun->dev;
> >>+
> >>+		rcu_assign_pointer(tun->tfiles[index],
> >>+				   tun->tfiles[tun->numqueues - 1]);
> >>+		tun->tfiles[index]->queue_index = index;
> >>+		rcu_assign_pointer(tfile->tun, NULL);
> >>+		--tun->numqueues;
> >>+		sock_put(&tfile->sk);
> >>
> >>-	if (atomic_inc_not_zero(&tfile->count))
> >>-		tun = tfile->tun;
> >>+		if (tun->numqueues == 0&&  !(tun->flags&  TUN_PERSIST))
> >>+			destroy = true;
> >Please don't use flags like that. Use dedicated labels and goto there on error.
> 
> ok.
> >
> >>+	}
> >>
> >>-	return tun;
> >>+	spin_unlock(&tun_lock);
> >>+
> >>+	synchronize_rcu();
> >>+	if (clean)
> >>+		sock_put(&tfile->sk);
> >>+
> >>+	if (destroy) {
> >>+		rtnl_lock();
> >>+		if (dev->reg_state == NETREG_REGISTERED)
> >>+			unregister_netdevice(dev);
> >>+		rtnl_unlock();
> >>+	}
> >>+
> >>+	return 0;
> >>  }
> >>
> >>-static struct tun_struct *tun_get(struct file *file)
> >>+static void tun_detach_all(struct net_device *dev)
> >>  {
> >>-	return __tun_get(file->private_data);
> >>+	struct tun_struct *tun = netdev_priv(dev);
> >>+	struct tun_file *tfile, *tfile_list[MAX_TAP_QUEUES];
> >>+	int i, j = 0;
> >>+
> >>+	spin_lock(&tun_lock);
> >>+
> >>+	for (i = 0; i<  MAX_TAP_QUEUES&&  tun->numqueues; i++) {
> >>+		tfile = rcu_dereference_protected(tun->tfiles[i],
> >>+						lockdep_is_held(&tun_lock));
> >>+		BUG_ON(!tfile);
> >>+		wake_up_all(&tfile->wq.wait);
> >>+		tfile_list[j++] = tfile;
> >>+		rcu_assign_pointer(tfile->tun, NULL);
> >>+		--tun->numqueues;
> >>+	}
> >>+	BUG_ON(tun->numqueues != 0);
> >>+	/* guarantee that any future tun_attach will fail */
> >>+	tun->numqueues = MAX_TAP_QUEUES;
> >>+	spin_unlock(&tun_lock);
> >>+
> >>+	synchronize_rcu();
> >>+	for (--j; j>= 0; j--)
> >>+		sock_put(&tfile_list[j]->sk);
> >>  }
> >>
> >>-static void tun_put(struct tun_struct *tun)
> >>+static int tun_attach(struct tun_struct *tun, struct file *file)
> >>  {
> >>-	struct tun_file *tfile = tun->tfile;
> >>+	struct tun_file *tfile = file->private_data;
> >>+	int err;
> >>+
> >>+	ASSERT_RTNL();
> >>+
> >>+	spin_lock(&tun_lock);
> >>
> >>-	if (atomic_dec_and_test(&tfile->count))
> >>-		tun_detach(tfile->tun);
> >>+	err = -EINVAL;
> >>+	if (rcu_dereference_protected(tfile->tun, lockdep_is_held(&tun_lock)))
> >>+		goto out;
> >>+
> >>+	err = -EBUSY;
> >>+	if (!(tun->flags&  TUN_TAP_MQ)&&  tun->numqueues == 1)
> >>+		goto out;
> >>+
> >>+	if (tun->numqueues == MAX_TAP_QUEUES)
> >>+		goto out;
> >>+
> >>+	err = 0;
> >>+	tfile->queue_index = tun->numqueues;
> >>+	rcu_assign_pointer(tfile->tun, tun);
> >>+	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
> >>+	sock_hold(&tfile->sk);
> >>+	tun->numqueues++;
> >>+
> >>+	if (tun->numqueues == 1)
> >>+		netif_carrier_on(tun->dev);
> >>+
> >>+	/* device is allowed to go away first, so no need to hold extra
> >>+	 * refcnt. */
> >>+
> >>+out:
> >>+	spin_unlock(&tun_lock);
> >>+	return err;
> >>  }
> >>
> >>  /* TAP filtering */
> >>@@ -331,16 +414,7 @@ static const struct ethtool_ops tun_ethtool_ops;
> >>  /* Net device detach from fd. */
> >>  static void tun_net_uninit(struct net_device *dev)
> >>  {
> >>-	struct tun_struct *tun = netdev_priv(dev);
> >>-	struct tun_file *tfile = tun->tfile;
> >>-
> >>-	/* Inform the methods they need to stop using the dev.
> >>-	 */
> >>-	if (tfile) {
> >>-		wake_up_all(&tfile->wq.wait);
> >>-		if (atomic_dec_and_test(&tfile->count))
> >>-			__tun_detach(tun);
> >>-	}
> >>+	tun_detach_all(dev);
> >>  }
> >>
> >>  /* Net device open. */
> >>@@ -360,10 +434,10 @@ static int tun_net_close(struct net_device *dev)
> >>  /* Net device start xmit */
> >>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>  {
> >>-	struct tun_struct *tun = netdev_priv(dev);
> >>-	struct tun_file *tfile = tun->tfile;
> >>+	struct tun_file *tfile = NULL;
> >>
> >>-	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
> >>+	rcu_read_lock();
> >>+	tfile = tun_get_queue(dev, skb);
> >>
> >>  	/* Drop packet if interface is not attached */
> >>  	if (!tfile)
> >>@@ -381,7 +455,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>
> >>  	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
> >>  	>= dev->tx_queue_len) {
> >>-		if (!(tun->flags&  TUN_ONE_QUEUE)) {
> >>+		if (!(tfile->flags&  TUN_ONE_QUEUE)&&
> >Which patch moved flags from tun to tfile?
> 
> Patch 1 cache the tun->flags in tfile, but it seems this may let the
> flags out of sync. So we'd better to use the one in tun_struct.
> >
> >>+		    !(tfile->flags&  TUN_TAP_MQ)) {
> >>  			/* Normal queueing mode. */
> >>  			/* Packet scheduler handles dropping of further packets. */
> >>  			netif_stop_queue(dev);
> >>@@ -390,7 +465,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>  			 * error is more appropriate. */
> >>  			dev->stats.tx_fifo_errors++;
> >>  		} else {
> >>-			/* Single queue mode.
> >>+			/* Single queue mode or multi queue mode.
> >>  			 * Driver handles dropping of all packets itself. */
> >Please don't do this. Stop the queue on overrun as appropriate.
> >ONE_QUEUE is a legacy hack.
> >
> >BTW we really should stop queue before we start dropping packets,
> >but that can be a separate patch.
> 
> The problem here is the using of NETIF_F_LLTX. Kernel could only see
> one queue even for a multiqueue tun/tap. If we use
> netif_stop_queue(), all other queues would be stopped also.

Another reason not to use LLTX?

> >>  			goto drop;
> >>  		}
> >>@@ -408,9 +483,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> >>  		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
> >>  	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
> >>  				   POLLRDNORM | POLLRDBAND);
> >>+	rcu_read_unlock();
> >>  	return NETDEV_TX_OK;
> >>
> >>  drop:
> >>+	rcu_read_unlock();
> >>  	dev->stats.tx_dropped++;
> >>  	kfree_skb(skb);
> >>  	return NETDEV_TX_OK;
> >>@@ -527,16 +604,22 @@ static void tun_net_init(struct net_device *dev)
> >>  static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
> >>  {
> >>  	struct tun_file *tfile = file->private_data;
> >>-	struct tun_struct *tun = __tun_get(tfile);
> >>+	struct tun_struct *tun = NULL;
> >>  	struct sock *sk;
> >>  	unsigned int mask = 0;
> >>
> >>-	if (!tun)
> >>+	if (!tfile)
> >>  		return POLLERR;
> >>
> >>-	sk = tfile->socket.sk;
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>+		return POLLERR;
> >>+	}
> >>+	rcu_read_unlock();
> >>
> >>-	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
> >>+	sk =&tfile->sk;
> >>
> >>  	poll_wait(file,&tfile->wq.wait, wait);
> >>
> >>@@ -548,10 +631,12 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
> >>  	     sock_writeable(sk)))
> >>  		mask |= POLLOUT | POLLWRNORM;
> >>
> >>-	if (tun->dev->reg_state != NETREG_REGISTERED)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun || tun->dev->reg_state != NETREG_REGISTERED)
> >>  		mask = POLLERR;
> >>+	rcu_read_unlock();
> >>
> >>-	tun_put(tun);
> >>  	return mask;
> >>  }
> >>
> >>@@ -708,9 +793,12 @@ static ssize_t tun_get_user(struct tun_file *tfile,
> >>  		skb_shinfo(skb)->gso_segs = 0;
> >>  	}
> >>
> >>-	tun = __tun_get(tfile);
> >>-	if (!tun)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>  		return -EBADFD;
> >>+	}
> >>
> >>  	switch (tfile->flags&  TUN_TYPE_MASK) {
> >>  	case TUN_TUN_DEV:
> >>@@ -720,26 +808,30 @@ static ssize_t tun_get_user(struct tun_file *tfile,
> >>  		skb->protocol = eth_type_trans(skb, tun->dev);
> >>  		break;
> >>  	}
> >>-
> >>-	netif_rx_ni(skb);
> >>  	tun->dev->stats.rx_packets++;
> >>  	tun->dev->stats.rx_bytes += len;
> >>-	tun_put(tun);
> >>+	rcu_read_unlock();
> >>+
> >>+	netif_rx_ni(skb);
> >>+
> >>  	return count;
> >>
> >>  err_free:
> >>  	count = -EINVAL;
> >>  	kfree_skb(skb);
> >>  err:
> >>-	tun = __tun_get(tfile);
> >>-	if (!tun)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>  		return -EBADFD;
> >>+	}
> >>
> >>  	if (drop)
> >>  		tun->dev->stats.rx_dropped++;
> >>  	if (error)
> >>  		tun->dev->stats.rx_frame_errors++;
> >>-	tun_put(tun);
> >>+	rcu_read_unlock();
> >>  	return count;
> >>  }
> >>
> >>@@ -833,12 +925,13 @@ static ssize_t tun_put_user(struct tun_file *tfile,
> >>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> >>  	total += skb->len;
> >>
> >>-	tun = __tun_get(tfile);
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>  	if (tun) {
> >>  		tun->dev->stats.tx_packets++;
> >>  		tun->dev->stats.tx_bytes += len;
> >>-		tun_put(tun);
> >>  	}
> >>+	rcu_read_unlock();
> >>
> >>  	return total;
> >>  }
> >>@@ -869,28 +962,31 @@ static ssize_t tun_do_read(struct tun_file *tfile,
> >>  				break;
> >>  			}
> >>
> >>-			tun = __tun_get(tfile);
> >>+			rcu_read_lock();
> >>+			tun = rcu_dereference(tfile->tun);
> >>  			if (!tun) {
> >>-				ret = -EIO;
> >>+				ret = -EBADFD;
> >BADFD is for when you get passed something like -1 fd.
> >Here fd is OK, it's just in a bad state so you can not do IO.
> >
> 
> Sure.
> >>+				rcu_read_unlock();
> >>  				break;
> >>  			}
> >>  			if (tun->dev->reg_state != NETREG_REGISTERED) {
> >>  				ret = -EIO;
> >>-				tun_put(tun);
> >>+				rcu_read_unlock();
> >>  				break;
> >>  			}
> >>-			tun_put(tun);
> >>+			rcu_read_unlock();
> >>
> >>  			/* Nothing to read, let's sleep */
> >>  			schedule();
> >>  			continue;
> >>  		}
> >>
> >>-		tun = __tun_get(tfile);
> >>+		rcu_read_lock();
> >>+		tun = rcu_dereference(tfile->tun);
> >>  		if (tun) {
> >>  			netif_wake_queue(tun->dev);
> >>-			tun_put(tun);
> >>  		}
> >>+		rcu_read_unlock();
> >>
> >>  		ret = tun_put_user(tfile, skb, iv, len);
> >>  		kfree_skb(skb);
> >>@@ -1038,6 +1134,9 @@ static int tun_flags(struct tun_struct *tun)
> >>  	if (tun->flags&  TUN_VNET_HDR)
> >>  		flags |= IFF_VNET_HDR;
> >>
> >>+	if (tun->flags&  TUN_TAP_MQ)
> >>+		flags |= IFF_MULTI_QUEUE;
> >>+
> >>  	return flags;
> >>  }
> >>
> >>@@ -1097,8 +1196,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>  		err = tun_attach(tun, file);
> >>  		if (err<  0)
> >>  			return err;
> >>-	}
> >>-	else {
> >>+	} else {
> >>  		char *name;
> >>  		unsigned long flags = 0;
> >>
> >>@@ -1142,6 +1240,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>  		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
> >>  			TUN_USER_FEATURES;
> >>  		dev->features = dev->hw_features;
> >>+		if (ifr->ifr_flags&  IFF_MULTI_QUEUE)
> >>+			dev->features |= NETIF_F_LLTX;
> >>
> >>  		err = register_netdevice(tun->dev);
> >>  		if (err<  0)
> >>@@ -1154,7 +1254,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>
> >>  		err = tun_attach(tun, file);
> >>  		if (err<  0)
> >>-			goto failed;
> >>+			goto err_free_dev;
> >>  	}
> >>
> >>  	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
> >>@@ -1174,6 +1274,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>  	else
> >>  		tun->flags&= ~TUN_VNET_HDR;
> >>
> >>+	if (ifr->ifr_flags&  IFF_MULTI_QUEUE)
> >>+		tun->flags |= TUN_TAP_MQ;
> >>+	else
> >>+		tun->flags&= ~TUN_TAP_MQ;
> >>+
> >>  	/* Cache flags from tun device */
> >>  	tfile->flags = tun->flags;
> >>  	/* Make sure persistent devices do not get stuck in
> >>@@ -1187,7 +1292,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >>
> >>  err_free_dev:
> >>  	free_netdev(dev);
> >>-failed:
> >>  	return err;
> >>  }
> >>
> >>@@ -1264,38 +1368,40 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> >>  				(unsigned int __user*)argp);
> >>  	}
> >>
> >>-	rtnl_lock();
> >>-
> >>-	tun = __tun_get(tfile);
> >>-	if (cmd == TUNSETIFF&&  !tun) {
> >>+	ret = 0;
> >>+	if (cmd == TUNSETIFF) {
> >>+		rtnl_lock();
> >>  		ifr.ifr_name[IFNAMSIZ-1] = '\0';
> >>-
> >>  		ret = tun_set_iff(tfile->net, file,&ifr);
> >>-
> >>+		rtnl_unlock();
> >>  		if (ret)
> >>-			goto unlock;
> >>-
> >>+			return ret;
> >>  		if (copy_to_user(argp,&ifr, ifreq_len))
> >>-			ret = -EFAULT;
> >>-		goto unlock;
> >>+			return -EFAULT;
> >>+		return ret;
> >>  	}
> >>
> >>+	rtnl_lock();
> >>+
> >>+	rcu_read_lock();
> >>+
> >>  	ret = -EBADFD;
> >>+	tun = rcu_dereference(tfile->tun);
> >>  	if (!tun)
> >>  		goto unlock;
> >>+	else
> >>+		ret = 0;
> >>
> >>-	tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd);
> >>-
> >>-	ret = 0;
> >>  	switch (cmd) {
> >>  	case TUNGETIFF:
> >>  		ret = tun_get_iff(current->nsproxy->net_ns, tun,&ifr);
> >>+		rcu_read_unlock();
> >>  		if (ret)
> >>-			break;
> >>+			goto out;
> >>
> >>  		if (copy_to_user(argp,&ifr, ifreq_len))
> >>  			ret = -EFAULT;
> >>-		break;
> >>+		goto out;
> >>
> >>  	case TUNSETNOCSUM:
> >>  		/* Disable/Enable checksum */
> >>@@ -1357,9 +1463,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> >>  		/* Get hw address */
> >>  		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
> >>  		ifr.ifr_hwaddr.sa_family = tun->dev->type;
> >>+		rcu_read_unlock();
> >>  		if (copy_to_user(argp,&ifr, ifreq_len))
> >>  			ret = -EFAULT;
> >>-		break;
> >>+		goto out;
> >>
> >>  	case SIOCSIFHWADDR:
> >>  		/* Set hw address */
> >>@@ -1375,9 +1482,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
> >>  	}
> >>
> >>  unlock:
> >>+	rcu_read_unlock();
> >>+out:
> >>  	rtnl_unlock();
> >>-	if (tun)
> >>-		tun_put(tun);
> >>  	return ret;
> >>  }
> >>
> >>@@ -1517,6 +1624,11 @@ out:
> >>  	return ret;
> >>  }
> >>
> >>+static void tun_sock_destruct(struct sock *sk)
> >>+{
> >>+	skb_queue_purge(&sk->sk_receive_queue);
> >>+}
> >>+
> >>  static int tun_chr_open(struct inode *inode, struct file * file)
> >>  {
> >>  	struct net *net = current->nsproxy->net_ns;
> >>@@ -1540,6 +1652,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
> >>  	sock_init_data(&tfile->socket,&tfile->sk);
> >>
> >>  	tfile->sk.sk_write_space = tun_sock_write_space;
> >>+	tfile->sk.sk_destruct = tun_sock_destruct;
> >>  	tfile->sk.sk_sndbuf = INT_MAX;
> >>  	file->private_data = tfile;
> >>
> >>@@ -1549,31 +1662,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
> >>  static int tun_chr_close(struct inode *inode, struct file *file)
> >>  {
> >>  	struct tun_file *tfile = file->private_data;
> >>-	struct tun_struct *tun;
> >>-
> >>-	tun = __tun_get(tfile);
> >>-	if (tun) {
> >>-		struct net_device *dev = tun->dev;
> >>-
> >>-		tun_debug(KERN_INFO, tun, "tun_chr_close\n");
> >>-
> >>-		__tun_detach(tun);
> >>-
> >>-		/* If desirable, unregister the netdevice. */
> >>-		if (!(tun->flags&  TUN_PERSIST)) {
> >>-			rtnl_lock();
> >>-			if (dev->reg_state == NETREG_REGISTERED)
> >>-				unregister_netdevice(dev);
> >>-			rtnl_unlock();
> >>-		}
> >>
> >>-		/* drop the reference that netdevice holds */
> >>-		sock_put(&tfile->sk);
> >>-
> >>-	}
> >>-
> >>-	/* drop the reference that file holds */
> >>-	sock_put(&tfile->sk);
> >>+	tun_detach(tfile, true);
> >>
> >>  	return 0;
> >>  }
> >>@@ -1700,14 +1790,17 @@ static void tun_cleanup(void)
> >>   * holding a reference to the file for as long as the socket is in use. */
> >>  struct socket *tun_get_socket(struct file *file)
> >>  {
> >>-	struct tun_struct *tun;
> >>+	struct tun_struct *tun = NULL;
> >>  	struct tun_file *tfile = file->private_data;
> >>  	if (file->f_op !=&tun_fops)
> >>  		return ERR_PTR(-EINVAL);
> >>-	tun = tun_get(file);
> >>-	if (!tun)
> >>+	rcu_read_lock();
> >>+	tun = rcu_dereference(tfile->tun);
> >>+	if (!tun) {
> >>+		rcu_read_unlock();
> >>  		return ERR_PTR(-EBADFD);
> >>-	tun_put(tun);
> >>+	}
> >>+	rcu_read_unlock();
> >>  	return&tfile->socket;
> >>  }
> >>  EXPORT_SYMBOL_GPL(tun_get_socket);

^ permalink raw reply

* Re: [PATCH 3/3] net: fec: add phy-reset-interval for device tree probe
From: Florian Fainelli @ 2012-06-26 11:55 UTC (permalink / raw)
  To: Shawn Guo; +Cc: David S. Miller, netdev, linux-arm-kernel
In-Reply-To: <1340700308-8315-4-git-send-email-shawn.guo@linaro.org>

Hi,

On Tuesday 26 June 2012 16:45:08 Shawn Guo wrote:
> Different boards may require different phy reset interval time.  Add
> property phy-reset-interval for device tree probe, so that the boards
> that need a longer interval time can specify it in their device tree.
> 
> Along with the update to phy related stuff, it also makes a minor fix
> on phy-reset-gpios in binding document to have it be optional to match
> the driver code.
> 
> Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
> ---
>  Documentation/devicetree/bindings/net/fsl-fec.txt |    5 ++++-
>  drivers/net/ethernet/freescale/fec.c              |    4 +++-
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt 
b/Documentation/devicetree/bindings/net/fsl-fec.txt
> index 7ab9e1a..74a4ce7 100644
> --- a/Documentation/devicetree/bindings/net/fsl-fec.txt
> +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
> @@ -7,10 +7,13 @@ Required properties:
>  - phy-mode : String, operation mode of the PHY interface.
>    Supported values are: "mii", "gmii", "sgmii", "tbi", "rmii",
>    "rgmii", "rgmii-id", "rgmii-rxid", "rgmii-txid", "rtbi", "smii".
> -- phy-reset-gpios : Should specify the gpio for phy reset
>  
>  Optional properties:
>  - local-mac-address : 6 bytes, mac address
> +- phy-reset-gpios : Should specify the gpio for phy reset
> +- phy-reset-interval : Reset interval time in milliseconds.  Should present
> +  only if property "phy-reset-gpios" is available.  When "phy-reset-gpios"
> +  is available, missing the property will have the interval be 1 
millisecond.
>  
>  Example:
>  
> diff --git a/drivers/net/ethernet/freescale/fec.c 
b/drivers/net/ethernet/freescale/fec.c
> index 4dce9e3..86ecaae 100644
> --- a/drivers/net/ethernet/freescale/fec.c
> +++ b/drivers/net/ethernet/freescale/fec.c
> @@ -1507,18 +1507,20 @@ static int __devinit fec_get_phy_mode_dt(struct 
platform_device *pdev)
>  static void __devinit fec_reset_phy(struct platform_device *pdev)
>  {
>  	int err, phy_reset;
> +	int msec = 1;
>  	struct device_node *np = pdev->dev.of_node;
>  
>  	if (!np)
>  		return;
>  
> +	of_property_read_u32(np, "phy-reset-interval", &msec);
>  	phy_reset = of_get_named_gpio(np, "phy-reset-gpios", 0);
>  	err = gpio_request_one(phy_reset, GPIOF_OUT_INIT_LOW, "phy-reset");
>  	if (err) {
>  		pr_debug("FEC: failed to get gpio phy-reset: %d\n", err);
>  		return;
>  	}
> -	msleep(1);
> +	msleep(msec);

You might want to check the value of the property here to make sure it is in 
the msleep() acceptable range.

>  	gpio_set_value(phy_reset, 1);
>  }
>  #else /* CONFIG_OF */
> -- 
> 1.7.5.4
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 3/3] net: fec: add phy-reset-interval for device tree probe
From: Shawn Guo @ 2012-06-26 12:13 UTC (permalink / raw)
  To: Florian Fainelli; +Cc: David S. Miller, netdev, linux-arm-kernel
In-Reply-To: <2921547.BkALd1Y1Bk@flexo>

On Tue, Jun 26, 2012 at 01:55:05PM +0200, Florian Fainelli wrote:
> > +	of_property_read_u32(np, "phy-reset-interval", &msec);
> >  	phy_reset = of_get_named_gpio(np, "phy-reset-gpios", 0);
> >  	err = gpio_request_one(phy_reset, GPIOF_OUT_INIT_LOW, "phy-reset");
> >  	if (err) {
> >  		pr_debug("FEC: failed to get gpio phy-reset: %d\n", err);
> >  		return;
> >  	}
> > -	msleep(1);
> > +	msleep(msec);
> 
> You might want to check the value of the property here to make sure it is in 
> the msleep() acceptable range.
> 
Sounds good.  I think a sensible reset duration should be less one
second, so let's force 1000 ms to be the possible max one?

-- 
Regards,
Shawn

^ permalink raw reply

* Re: New commands to configure IOV features
From: Yuval Mintz @ 2012-06-26 12:21 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Greg Rose, netdev@vger.kernel.org
In-Reply-To: <20120507081634.000003f8.gregory.v.rose@intel.com>

On 05/07/2012 06:16 PM, Greg Rose wrote:

> On Mon, 7 May 2012 14:17:54 +0300
> Yuval Mintz <yuvalmin@broadcom.com> wrote:
> 
>> I've tried to figure out if there was a standard interface
>> (ethtool/iproute) through which a user could configure the number
>> of vfs in his system.
>>
>> I've seen the RFC suggested in
>> http://markmail.org/thread/qblfcv7zbxsxp7q6, and
>> http://markmail.org/thread/fw54dcppmxuxoe6n, but failed to see any
>> later references to it (commits or further discussion on this topic).
>>
>> How exactly are things standing with these RFCs? Were they abandoned?
> 
> The only way to configure the number of VFs continues to be through the
> max_vfs module parameter.  I've got a patch to do it through ethtool
> sitting on the back burner but due to other requirements of my day job
> I've not been able to work on it since last fall.
> 
> - Greg


Hi Ben,

If I want to pick the RFCs and add support for configuring the number of
VFs - do you think ethtool's the right place for such added support?

I'm asking since as far as I can see, ethtool (today) doesn't contain any
features related to virtual functions.

Thanks,
Yuval

^ permalink raw reply

* [net-next patch] bnx2x: Change bnx2x_tests_str_arr to static char
From: Merav Sicron @ 2012-06-26 16:11 UTC (permalink / raw)
  To: davem, netdev, eilong; +Cc: Merav Sicron

This patch changes the definition of bnx2x_tests_str_arr from char to static
char. This correction will also eliminate the sparse warning created in commit
cf2c1df62e065bfc15e38daf2d3479a56b320f29.

Signed-off-by: Merav Sicron <meravs@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
Hi Dave,

Please consider applying this patch to net-next.

Thanks,
Merav

 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 1f8c156..bff3129 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1600,7 +1600,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev,
 	return 0;
 }
 
-char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = {
+static char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = {
 	"register_test (offline)    ",
 	"memory_test (offline)      ",
 	"int_loopback_test (offline)",
-- 
1.7.10

^ permalink raw reply related

* Re: [net] ixgbe: Do not pad FCoE frames as this can cause issues with FCoE DDP
From: Ben Hutchings @ 2012-06-26 14:09 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: David Miller, alexander.h.duyck, netdev, gospo, sassmann, stable
In-Reply-To: <1340697238.2255.20.camel@jtkirshe-mobl>

[-- Attachment #1: Type: text/plain, Size: 987 bytes --]

On Tue, 2012-06-26 at 00:53 -0700, Jeff Kirsher wrote:
> On Tue, 2012-06-26 at 00:50 -0700, David Miller wrote:
> > Sorry, quotes don't work either, what you did is still a SMTP syntax error,
> > here's what is in the bounce I get back:
> > 
> > 	<stable@vger.kernel.org> "[3.4]",
> > 	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> > Illegal-Object:	Syntax error in Cc: address found on vger.kernel.org:
> > 	Cc:	<stable@vger.kernel.org>"[3.4]"
> > 						^-missing end of address
> 
> Grrr...
> 
> I will re-send without the "[3.4]", Greg will just have to deal with it.

It's certainly not necessary to put anything like that in the real Cc
header. Many people put something like
'Cc: <stable@vger.kernel.org> # 3.4' in the body; I don't know if that
is more like likely to result in the version being stripped when
generating mail recipients.

Ben.

-- 
Ben Hutchings
Lowery's Law:
             If it jams, force it. If it breaks, it needed replacing anyway.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply

* Re: [PATCH 01/13] netfilter: fix problem with proto register
From: Pablo Neira Ayuso @ 2012-06-26 14:36 UTC (permalink / raw)
  To: Gao feng; +Cc: netdev, netfilter-devel
In-Reply-To: <4FE92F1E.9020901@cn.fujitsu.com>

On Tue, Jun 26, 2012 at 11:40:14AM +0800, Gao feng wrote:
> Hi Pablo:
> 
> 于 2012年06月25日 19:12, Pablo Neira Ayuso 写道:
> > On Thu, Jun 21, 2012 at 10:36:38PM +0800, Gao feng wrote:
> >> before commit 2c352f444ccfa966a1aa4fd8e9ee29381c467448
> >> (netfilter: nf_conntrack: prepare namespace support for
> >> l4 protocol trackers), we register sysctl before register
> >> protos, so if sysctl is registered faild, the protos will
> >> not be registered.
> >>
> >> but now, we register protos first, and when register
> >> sysctl failed, we can use protos too, it's different
> >> from before.
> > 
> > No, this has to be an all-or-nothing game. If one fails, everything
> > else that you've registered has to be unregistered.
> 
> indeed,this is an all-or-nothing game right now,please look at the ipv4_net_init,
> when we register nf_conntrack_l3proto_ipv4 failed,we will unregister the already
> registered l4protoes, and in nf_conntrack_l4proto_unregister,we will call
> nf_ct_l4proto_unregister_sysctl to free the sysctl table.

I see proto->init_net allocates in->ctl_table, then
nf_ct_l3proto_register_sysctl release it if it fails. I got confused
because I did not see where that memory was being freed. Then, it's
good.

Still one more thing:

> >> so change to register sysctl before register protos.
> >>
> >> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
> >> ---
> >>  net/netfilter/nf_conntrack_proto.c |   36 +++++++++++++++++++++++-------------
> >>  1 files changed, 23 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
> >> index 1ea9194..9bd88aa 100644
> >> --- a/net/netfilter/nf_conntrack_proto.c
> >> +++ b/net/netfilter/nf_conntrack_proto.c
> >> @@ -253,18 +253,23 @@ int nf_conntrack_l3proto_register(struct net *net,
> >>  {
> >>  	int ret = 0;
> >>  
> >> -	if (net == &init_net)
> >> -		ret = nf_conntrack_l3proto_register_net(proto);
> >> +	if (proto->init_net) {

I think proto->init_net has to be mandatory since all protocol support
pernet already. We can add BUG_ON at the beginning of the function if
proto->init_net is not defined.

I can manually add that to the patch if you see no inconvenience with
it.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [net-next patch] bnx2x: Change bnx2x_tests_str_arr to static char
From: Joe Perches @ 2012-06-26 14:41 UTC (permalink / raw)
  To: Merav Sicron; +Cc: davem, netdev, eilong
In-Reply-To: <1340727063-23870-1-git-send-email-meravs@broadcom.com>

On Tue, 2012-06-26 at 19:11 +0300, Merav Sicron wrote:
> This patch changes the definition of bnx2x_tests_str_arr from char to static
> char.
[]
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
[]
> @@ -1600,7 +1600,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev,
>  	return 0;
>  }
>  
> -char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = {
> +static char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = {
>  	"register_test (offline)    ",
>  	"memory_test (offline)      ",
>  	"int_loopback_test (offline)",

Should probably be const too.

^ permalink raw reply

* Re: [PATCH 04/13] netfilter: regard users as refcount for l4proto's per-net data
From: Pablo Neira Ayuso @ 2012-06-26 14:47 UTC (permalink / raw)
  To: Gao feng; +Cc: netdev, netfilter-devel
In-Reply-To: <4FE93375.1080803@cn.fujitsu.com>

On Tue, Jun 26, 2012 at 11:58:45AM +0800, Gao feng wrote:
> Hi Pablo:
> 于 2012年06月25日 19:20, Pablo Neira Ayuso 写道:
> > On Thu, Jun 21, 2012 at 10:36:41PM +0800, Gao feng wrote:
> >> Now, nf_proto_net's users is confusing.
> >> we should regard it as the refcount for l4proto's per-net data,
> >> because maybe there are two l4protos use the same per-net data.
> >>
> >> so increment pn->users when nf_conntrack_l4proto_register
> >> success, and decrement it for nf_conntrack_l4_unregister case.
> >>
> >> because nf_conntrack_l3proto_ipv[4|6] don't use the same per-net
> >> data,so we don't need to add a refcnt for their per-net data.
> >>
> >> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
> >> ---
> >>  net/netfilter/nf_conntrack_proto.c |   76 ++++++++++++++++++++++--------------
> >>  1 files changed, 46 insertions(+), 30 deletions(-)
> >>
> >> diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
> >> index 9d6b6ab..63612e6 100644
> >> --- a/net/netfilter/nf_conntrack_proto.c
> >> +++ b/net/netfilter/nf_conntrack_proto.c
> > [...]
> >> @@ -458,23 +446,32 @@ int nf_conntrack_l4proto_register(struct net *net,
> >>  				  struct nf_conntrack_l4proto *l4proto)
> >>  {
> >>  	int ret = 0;
> >> +	struct nf_proto_net *pn = NULL;
> >>  
> >>  	if (l4proto->init_net) {
> >>  		ret = l4proto->init_net(net, l4proto->l3proto);
> >>  		if (ret < 0)
> >> -			return ret;
> >> +			goto out;
> >>  	}
> >>  
> >> -	ret = nf_ct_l4proto_register_sysctl(net, l4proto);
> >> +	pn = nf_ct_l4proto_net(net, l4proto);
> >> +	if (pn == NULL)
> >> +		goto out;
> > 
> > Same thing here, we're leaking memory allocated by l4proto->init_net.
> 
> if pn is NULL,init_net can't allocate memory for pn->ctl_table.
> So I think it's not memory leak here.

Sorry, I meant to say the line below. But we've already clarified
this in patch 1/1.

> >> +	ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
> >>  	if (ret < 0)
> >> -		return ret;
> >> +		goto out;
> >>  
> >>  	if (net == &init_net) {
> >>  		ret = nf_conntrack_l4proto_register_net(l4proto);
> >> -		if (ret < 0)
> >> -			nf_ct_l4proto_unregister_sysctl(net, l4proto);
> >> +		if (ret < 0) {
> >> +			nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
> >> +			goto out;
> > 
> > Better replace the two lines above by:
> > 
> > goto out_register_net;
> > 
> > and then...
> > 
> >> +		}
> >>  	}
> >>  
> >> +	pn->users++;
> > 
> > out_register_net:
> >         nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
> > 
> >> +out:
> >>  	return ret;
> > 
> > I think that this change is similar to patch 1/1, I think you should
> > send it as a separated patch.
> > 
> 
> Yes, It looks better.
> should I change this and rebase whole patchset or
> maybe you just apply this patchset and then I send a cleanup patch to do this?

This patch includes changes that are not included in the description,
so you have two choices:

1) You resend me this patch with appropriate description (including
the fact that you're fixing the same thing that patch 1/1 does). This
option still I don't like too much, since making two different things
in one single patch is nasty, but well if you push me...

2) you split the patch in two, with the appropriate descriptions each
and you'll make me happy.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] bridge: Assign rtnl_link_ops to bridge devices created via ioctl()
From: Stephen Hemminger @ 2012-06-26 14:59 UTC (permalink / raw)
  To: Thomas Graf; +Cc: davem, netdev
In-Reply-To: <692d04316b78401b0256598155caae190d3f27c9.1340700973.git.tgraf@suug.ch>

On Tue, 26 Jun 2012 10:56:15 +0200
Thomas Graf <tgraf@suug.ch> wrote:

> +void br_assign_rtnl_link_ops(struct net_device *dev)
> +{
> +	dev->rtnl_link_ops = &br_link_ops;
> +}

I am fine with the concept, but maybe it would just be simpler to
make br_link_ops public?

--- a/net/bridge/br_netlink.c	2012-06-22 08:27:50.837126940 -0700
+++ b/net/bridge/br_netlink.c	2012-06-26 07:56:33.510237340 -0700
@@ -208,7 +208,7 @@ static int br_validate(struct nlattr *tb
 	return 0;
 }
 
-static struct rtnl_link_ops br_link_ops __read_mostly = {
+struct rtnl_link_ops br_link_ops __read_mostly = {
 	.kind		= "bridge",
 	.priv_size	= sizeof(struct net_bridge),
 	.setup		= br_dev_setup,
--- a/net/bridge/br_private.h	2012-06-22 08:27:50.837126940 -0700
+++ b/net/bridge/br_private.h	2012-06-26 07:57:25.873711454 -0700
@@ -549,6 +549,7 @@ extern int (*br_fdb_test_addr_hook)(stru
 #endif
 
 /* br_netlink.c */
+extern struct rtnl_link_ops br_link_ops;
 extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
--- a/net/bridge/br_if.c	2012-06-26 07:59:01.996746090 -0700
+++ b/net/bridge/br_if.c	2012-06-26 07:58:55.904807272 -0700
@@ -240,6 +240,7 @@ int br_add_bridge(struct net *net, const
 		return -ENOMEM;
 
 	dev_net_set(dev, net);
+	br->rtnl_link_ops = &br_link_ops;
 
 	res = register_netdev(dev);
 	if (res)

^ permalink raw reply

* Re: [PATCH] bridge: Assign rtnl_link_ops to bridge devices created via ioctl()
From: Thomas Graf @ 2012-06-26 15:13 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev
In-Reply-To: <20120626075940.099521a7@nehalam.linuxnetplumber.net>

On Tue, Jun 26, 2012 at 07:59:40AM -0700, Stephen Hemminger wrote:
> On Tue, 26 Jun 2012 10:56:15 +0200
> Thomas Graf <tgraf@suug.ch> wrote:
> 
> > +void br_assign_rtnl_link_ops(struct net_device *dev)
> > +{
> > +	dev->rtnl_link_ops = &br_link_ops;
> > +}
> 
> I am fine with the concept, but maybe it would just be simpler to
> make br_link_ops public?

Either is fine with me. I chose the assignment function to keep the
struct read-only outside of br_netlink.c.

^ permalink raw reply

* Re: [PATCH 09/16] netvm: Allow skb allocation to use PFMEMALLOC reserves
From: Sebastian Andrzej Siewior @ 2012-06-26 15:27 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andrew Morton, Linux-MM, Linux-Netdev, LKML, David Miller,
	Neil Brown, Peter Zijlstra, Mike Christie, Eric B Munson,
	Eric Dumazet
In-Reply-To: <1340375443-22455-10-git-send-email-mgorman@suse.de>

On Fri, Jun 22, 2012 at 03:30:36PM +0100, Mel Gorman wrote:
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 5c9ca2b..159dccc 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -271,6 +271,9 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
>  int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
>  EXPORT_SYMBOL(sysctl_optmem_max);
>  
> +struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
> +EXPORT_SYMBOL_GPL(memalloc_socks);
> +

This is used via sk_memalloc_socks() by SLAB.

>From 3da9ab9972845974da114c5a6624335e6371b2d5 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 26 Jun 2012 17:18:20 +0200
Subject: [PATCH] export sk_memalloc_socks() only with CONFIG_NET

|mm/built-in.o: In function `atomic_read':
|include/asm/atomic.h:25: undefined reference to `memalloc_socks'
|include/asm/atomic.h:25: undefined reference to `memalloc_socks'
|include/asm/atomic.h:25: undefined reference to `memalloc_socks'
|include/asm/atomic.h:25: undefined reference to `memalloc_socks'
|include/asm/atomic.h:25: undefined reference to `memalloc_socks'
|mm/built-in.o:include/asm/atomic.h:25: more undefined references to `memalloc_socks' follow

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/net/sock.h |    9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index db0c20c..767c443 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -659,11 +659,20 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
 	return test_bit(flag, &sk->sk_flags);
 }
 
+#ifdef CONFIG_NET
 extern struct static_key memalloc_socks;
 static inline int sk_memalloc_socks(void)
 {
 	return static_key_false(&memalloc_socks);
 }
+#else
+
+static inline int sk_memalloc_socks(void)
+{
+	return 0;
+}
+
+#endif
 
 static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask)
 {
-- 
1.7.10

Sebastian

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox