Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [Suggestion] net/atm : for sprintf, need check the total write length whether larger than a page.
From: Chas Williams (CONTRACTOR) @ 2012-12-04  3:46 UTC (permalink / raw)
  To: Chen Gang; +Cc: David Miller, netdev
In-Reply-To: <50AC58BC.1020004@asianux.com>

In message <50AC58BC.1020004@asianux.com>,Chen Gang writes:
>in net/atm/atm_sysfs.c:
>  suggest to check the write length whether larger than a page.
>  the length of parameter buf is one page size (reference: fill_read_buffer at fs/sysfs/file.c)
>  and the count of atm adresses are not limited (reference: atm_dev_ioctl -> atm_add_addr)
>
>  thanks.
>
>gchen.

how about this as a possible fix?

atm: use scnprintf() instead of sprintf()

As reported by Chen Gang <gang.chen@asianux.com>, we should ensure there
is enough space when formatting the sysfs buffers.

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
---
 net/atm/atm_sysfs.c |   40 +++++++++++++++-------------------------
 1 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index f49da58..350bf62 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -14,49 +14,45 @@ static ssize_t show_type(struct device *cdev,
 			 struct device_attribute *attr, char *buf)
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
-	return sprintf(buf, "%s\n", adev->type);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type);
 }
 
 static ssize_t show_address(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
-	int i;
-
-	for (i = 0; i < (ESI_LEN - 1); i++)
-		pos += sprintf(pos, "%02x:", adev->esi[i]);
-	pos += sprintf(pos, "%02x\n", adev->esi[i]);
 
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi);
 }
 
 static ssize_t show_atmaddress(struct device *cdev,
 			       struct device_attribute *attr, char *buf)
 {
 	unsigned long flags;
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 	struct atm_dev_addr *aaddr;
 	int bin[] = { 1, 2, 10, 6, 1 }, *fmt = bin;
-	int i, j;
+	int i, j, count = 0;
 
 	spin_lock_irqsave(&adev->lock, flags);
 	list_for_each_entry(aaddr, &adev->local, entry) {
 		for (i = 0, j = 0; i < ATM_ESA_LEN; ++i, ++j) {
 			if (j == *fmt) {
-				pos += sprintf(pos, ".");
+				count += scnprintf(buf + count,
+						   PAGE_SIZE - count, ".");
 				++fmt;
 				j = 0;
 			}
-			pos += sprintf(pos, "%02x",
-				       aaddr->addr.sas_addr.prv[i]);
+			count += scnprintf(buf + count,
+					   PAGE_SIZE - count, "%02x",
+					   aaddr->addr.sas_addr.prv[i]);
 		}
-		pos += sprintf(pos, "\n");
+		count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
 	}
 	spin_unlock_irqrestore(&adev->lock, flags);
 
-	return pos - buf;
+	return count;
 }
 
 static ssize_t show_atmindex(struct device *cdev,
@@ -64,25 +60,21 @@ static ssize_t show_atmindex(struct device *cdev,
 {
 	struct atm_dev *adev = to_atm_dev(cdev);
 
-	return sprintf(buf, "%d\n", adev->number);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number);
 }
 
 static ssize_t show_carrier(struct device *cdev,
 			    struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 
-	pos += sprintf(pos, "%d\n",
-		       adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			 adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
 }
 
 static ssize_t show_link_rate(struct device *cdev,
 			      struct device_attribute *attr, char *buf)
 {
-	char *pos = buf;
 	struct atm_dev *adev = to_atm_dev(cdev);
 	int link_rate;
 
@@ -100,9 +92,7 @@ static ssize_t show_link_rate(struct device *cdev,
 	default:
 		link_rate = adev->link_rate * 8 * 53;
 	}
-	pos += sprintf(pos, "%d\n", link_rate);
-
-	return pos - buf;
+	return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
 }
 
 static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-- 
1.7.7.6

^ permalink raw reply related

* [Patch net-next] netlink: add missing netlink message types to selinux perm table
From: Cong Wang @ 2012-12-04  3:53 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Cong Wang

RTM_NEWNETCONF and RTM_GETNETCONF are missing in this table.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>

---
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index d309e7f..0e7a89b 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -67,6 +67,8 @@ static struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_GETADDRLABEL,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_GETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_SETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_NEWNETCONF,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETNETCONF,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };

 static struct nlmsg_perm nlmsg_tcpdiag_perms[] =

^ permalink raw reply related

* [PATCH -next] net: neterion: use for_each_pci_dev to simplify the code
From: Wei Yongjun @ 2012-12-04  5:05 UTC (permalink / raw)
  To: jdmason, davem; +Cc: yongjun_wei, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

Use for_each_pci_dev to simplify the code.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/net/ethernet/neterion/s2io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index c98decb..3b63504 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -1040,7 +1040,7 @@ static int s2io_verify_pci_mode(struct s2io_nic *nic)
 static int s2io_on_nec_bridge(struct pci_dev *s2io_pdev)
 {
 	struct pci_dev *tdev = NULL;
-	while ((tdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, tdev)) != NULL) {
+	for_each_pci_dev(tdev) {
 		if (tdev->vendor == NEC_VENID && tdev->device == NEC_DEVID) {
 			if (tdev->bus == s2io_pdev->bus->parent) {
 				pci_dev_put(tdev);

^ permalink raw reply related

* [PATCH 3/5 net-next v2] tg3: PTP - Implement the ptp api and ethtool functions
From: Michael Chan @ 2012-12-04  5:36 UTC (permalink / raw)
  To: davem; +Cc: netdev, nsujir, richardcochran
In-Reply-To: <1354599420-3589-2-git-send-email-mchan@broadcom.com>

From: Matt Carlson <mcarlson@broadcom.com>

This patch adds the ptp_caps structure, ptp api implementation,
reference clock read and register/unregister functions.  All the basic
clock operations as described in Documentation/ptp/ptp.txt are
supported.

Frequency adjustment is performed using hardware with a 24 bit
accumulator and a programmable correction value. On each clk, the
correction value gets added to the accumulator and when it overflows,
the time counter is incremented/decremented and the accumulator reset.

So conversion from ppb to correction value is
	ppb * (1 << 24) / 1000000000

[Re-organized to put the ptp_clock_info struct declaration in one patch,
 added ptp_clock_info.name, and added locking to tg3_ptp_adjtime() based
 on input from Richard Cochran.]

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Cc: Richard Cochran <richardcochran@gmail.com>
---
 drivers/net/ethernet/broadcom/tg3.c |  146 ++++++++++++++++++++++++++++++++++-
 1 files changed, 145 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 6922596..58af2a1 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -5520,6 +5520,13 @@ static int tg3_setup_phy(struct tg3 *tp, int force_reset)
 }
 
 /* tp->lock must be held */
+static u64 tg3_refclk_read(struct tg3 *tp)
+{
+	u64 stamp = tr32(TG3_EAV_REF_CLCK_LSB);
+	return stamp | (u64)tr32(TG3_EAV_REF_CLCK_MSB) << 32;
+}
+
+/* tp->lock must be held */
 static void tg3_refclk_write(struct tg3 *tp, u64 newval)
 {
 	tw32(TG3_EAV_REF_CLCK_CTL, TG3_EAV_REF_CLCK_CTL_STOP);
@@ -5528,6 +5535,134 @@ static void tg3_refclk_write(struct tg3 *tp, u64 newval)
 	tw32_f(TG3_EAV_REF_CLCK_CTL, TG3_EAV_REF_CLCK_CTL_RESUME);
 }
 
+static inline void tg3_full_lock(struct tg3 *tp, int irq_sync);
+static inline void tg3_full_unlock(struct tg3 *tp);
+static int tg3_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+{
+	struct tg3 *tp = netdev_priv(dev);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE    |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (tp->ptp_clock)
+		info->phc_index = ptp_clock_index(tp->ptp_clock);
+	else
+		info->phc_index = -1;
+
+	info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON);
+
+	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+			   (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
+			   (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			   (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
+	return 0;
+}
+
+static int tg3_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct tg3 *tp = container_of(ptp, struct tg3, ptp_info);
+	bool neg_adj = false;
+	u32 correction = 0;
+
+	if (ppb < 0) {
+		neg_adj = true;
+		ppb = -ppb;
+	}
+
+	/* Frequency adjustment is performed using hardware with a 24 bit
+	 * accumulator and a programmable correction value. On each clk, the
+	 * correction value gets added to the accumulator and when it
+	 * overflows, the time counter is incremented/decremented.
+	 *
+	 * So conversion from ppb to correction value is
+	 *		ppb * (1 << 24) / 1000000000
+	 */
+	correction = div_u64((u64)ppb * (1 << 24), 1000000000ULL) &
+		     TG3_EAV_REF_CLK_CORRECT_MASK;
+
+	tg3_full_lock(tp, 0);
+
+	if (correction)
+		tw32(TG3_EAV_REF_CLK_CORRECT_CTL,
+		     TG3_EAV_REF_CLK_CORRECT_EN |
+		     (neg_adj ? TG3_EAV_REF_CLK_CORRECT_NEG : 0) | correction);
+	else
+		tw32(TG3_EAV_REF_CLK_CORRECT_CTL, 0);
+
+	tg3_full_unlock(tp);
+
+	return 0;
+}
+
+static int tg3_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct tg3 *tp = container_of(ptp, struct tg3, ptp_info);
+
+	tg3_full_lock(tp, 0);
+	tp->ptp_adjust += delta;
+	tg3_full_unlock(tp);
+
+	return 0;
+}
+
+static int tg3_ptp_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+	u64 ns;
+	u32 remainder;
+	struct tg3 *tp = container_of(ptp, struct tg3, ptp_info);
+
+	tg3_full_lock(tp, 0);
+	ns = tg3_refclk_read(tp);
+	ns += tp->ptp_adjust;
+	tg3_full_unlock(tp);
+
+	ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder);
+	ts->tv_nsec = remainder;
+
+	return 0;
+}
+
+static int tg3_ptp_settime(struct ptp_clock_info *ptp,
+			   const struct timespec *ts)
+{
+	u64 ns;
+	struct tg3 *tp = container_of(ptp, struct tg3, ptp_info);
+
+	ns = timespec_to_ns(ts);
+
+	tg3_full_lock(tp, 0);
+	tg3_refclk_write(tp, ns);
+	tp->ptp_adjust = 0;
+	tg3_full_unlock(tp);
+
+	return 0;
+}
+
+static int tg3_ptp_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info tg3_ptp_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "tg3 clock",
+	.max_adj	= 250000000,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.pps		= 0,
+	.adjfreq	= tg3_ptp_adjfreq,
+	.adjtime	= tg3_ptp_adjtime,
+	.gettime	= tg3_ptp_gettime,
+	.settime	= tg3_ptp_settime,
+	.enable		= tg3_ptp_enable,
+};
+
 /* tp->lock must be held */
 static void tg3_ptp_init(struct tg3 *tp)
 {
@@ -5537,6 +5672,7 @@ static void tg3_ptp_init(struct tg3 *tp)
 	/* Initialize the hardware clock to the system time. */
 	tg3_refclk_write(tp, ktime_to_ns(ktime_get_real()));
 	tp->ptp_adjust = 0;
+	tp->ptp_info = tg3_ptp_caps;
 }
 
 /* tp->lock must be held */
@@ -5554,6 +5690,7 @@ static void tg3_ptp_fini(struct tg3 *tp)
 	if (!tg3_flag(tp, PTP_CAPABLE) || !tp->ptp_clock)
 		return;
 
+	ptp_clock_unregister(tp->ptp_clock);
 	tp->ptp_clock = NULL;
 	tp->ptp_adjust = 0;
 }
@@ -10598,6 +10735,13 @@ static int tg3_open(struct net_device *dev)
 		pci_set_power_state(tp->pdev, PCI_D3hot);
 	}
 
+	if (tg3_flag(tp, PTP_CAPABLE)) {
+		tp->ptp_clock = ptp_clock_register(&tp->ptp_info,
+						   &tp->pdev->dev);
+		if (IS_ERR(tp->ptp_clock))
+			tp->ptp_clock = NULL;
+	}
+
 	return err;
 }
 
@@ -12767,7 +12911,7 @@ static const struct ethtool_ops tg3_ethtool_ops = {
 	.set_rxfh_indir		= tg3_set_rxfh_indir,
 	.get_channels		= tg3_get_channels,
 	.set_channels		= tg3_set_channels,
-	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_ts_info		= tg3_get_ts_info,
 };
 
 static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
-- 
1.7.1

^ permalink raw reply related

* [PATCH 2/5 net-next v2] tg3: PTP - Add header definitions, initialization and hw access functions.
From: Michael Chan @ 2012-12-04  5:36 UTC (permalink / raw)
  To: davem; +Cc: netdev, nsujir, richardcochran
In-Reply-To: <1354599420-3589-1-git-send-email-mchan@broadcom.com>

From: Matt Carlson <mcarlson@broadcom.com>

This patch adds code to write the reference clock. If a chip reset is
performed, the hwclock is reinitialized with the adjusted kernel time

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Cc: Richard Cochran <richardcochran@gmail.com>
---
 drivers/net/ethernet/broadcom/Kconfig |    1 +
 drivers/net/ethernet/broadcom/tg3.c   |   61 ++++++++++++++++++++++++++++++--
 drivers/net/ethernet/broadcom/tg3.h   |   60 ++++++++++++++++++++++++++++++--
 3 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 4bd416b..f552673 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -102,6 +102,7 @@ config TIGON3
 	depends on PCI
 	select PHYLIB
 	select HWMON
+	select PTP_1588_CLOCK
 	---help---
 	  This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 41a2dbd..6922596 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -54,6 +54,9 @@
 #include <asm/byteorder.h>
 #include <linux/uaccess.h>
 
+#include <uapi/linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+
 #ifdef CONFIG_SPARC
 #include <asm/idprom.h>
 #include <asm/prom.h>
@@ -5516,6 +5519,45 @@ static int tg3_setup_phy(struct tg3 *tp, int force_reset)
 	return err;
 }
 
+/* tp->lock must be held */
+static void tg3_refclk_write(struct tg3 *tp, u64 newval)
+{
+	tw32(TG3_EAV_REF_CLCK_CTL, TG3_EAV_REF_CLCK_CTL_STOP);
+	tw32(TG3_EAV_REF_CLCK_LSB, newval & 0xffffffff);
+	tw32(TG3_EAV_REF_CLCK_MSB, newval >> 32);
+	tw32_f(TG3_EAV_REF_CLCK_CTL, TG3_EAV_REF_CLCK_CTL_RESUME);
+}
+
+/* tp->lock must be held */
+static void tg3_ptp_init(struct tg3 *tp)
+{
+	if (!tg3_flag(tp, PTP_CAPABLE))
+		return;
+
+	/* Initialize the hardware clock to the system time. */
+	tg3_refclk_write(tp, ktime_to_ns(ktime_get_real()));
+	tp->ptp_adjust = 0;
+}
+
+/* tp->lock must be held */
+static void tg3_ptp_resume(struct tg3 *tp)
+{
+	if (!tg3_flag(tp, PTP_CAPABLE))
+		return;
+
+	tg3_refclk_write(tp, ktime_to_ns(ktime_get_real()) + tp->ptp_adjust);
+	tp->ptp_adjust = 0;
+}
+
+static void tg3_ptp_fini(struct tg3 *tp)
+{
+	if (!tg3_flag(tp, PTP_CAPABLE) || !tp->ptp_clock)
+		return;
+
+	tp->ptp_clock = NULL;
+	tp->ptp_adjust = 0;
+}
+
 static inline int tg3_irq_sync(struct tg3 *tp)
 {
 	return tp->irq_sync;
@@ -6528,6 +6570,8 @@ static inline void tg3_netif_stop(struct tg3 *tp)
 /* tp->lock must be held */
 static inline void tg3_netif_start(struct tg3 *tp)
 {
+	tg3_ptp_resume(tp);
+
 	/* NOTE: unconditional netif_tx_wake_all_queues is only
 	 * appropriate so long as all callers are assured to
 	 * have free tx slots (such as after tg3_init_hw)
@@ -10365,7 +10409,8 @@ static void tg3_ints_fini(struct tg3 *tp)
 	tg3_flag_clear(tp, ENABLE_TSS);
 }
 
-static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq)
+static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq,
+		     bool init)
 {
 	struct net_device *dev = tp->dev;
 	int i, err;
@@ -10444,6 +10489,12 @@ static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq)
 	tg3_flag_set(tp, INIT_COMPLETE);
 	tg3_enable_ints(tp);
 
+	if (init)
+		tg3_ptp_init(tp);
+	else
+		tg3_ptp_resume(tp);
+
+
 	tg3_full_unlock(tp);
 
 	netif_tx_start_all_queues(dev);
@@ -10541,11 +10592,12 @@ static int tg3_open(struct net_device *dev)
 
 	tg3_full_unlock(tp);
 
-	err = tg3_start(tp, true, true);
+	err = tg3_start(tp, true, true, true);
 	if (err) {
 		tg3_frob_aux_power(tp, false);
 		pci_set_power_state(tp->pdev, PCI_D3hot);
 	}
+
 	return err;
 }
 
@@ -10553,6 +10605,8 @@ static int tg3_close(struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
+	tg3_ptp_fini(tp);
+
 	tg3_stop(tp);
 
 	/* Clear stats across close / open calls */
@@ -11455,7 +11509,7 @@ static int tg3_set_channels(struct net_device *dev,
 
 	tg3_carrier_off(tp);
 
-	tg3_start(tp, true, false);
+	tg3_start(tp, true, false, false);
 
 	return 0;
 }
@@ -12508,7 +12562,6 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 		}
 
 		tg3_full_lock(tp, irq_sync);
-
 		tg3_halt(tp, RESET_KIND_SUSPEND, 1);
 		err = tg3_nvram_lock(tp);
 		tg3_halt_cpu(tp, RX_CPU_BASE);
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 4534804..d330e81 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -772,7 +772,10 @@
 #define  SG_DIG_MAC_ACK_STATUS		 0x00000004
 #define  SG_DIG_AUTONEG_COMPLETE	 0x00000002
 #define  SG_DIG_AUTONEG_ERROR		 0x00000001
-/* 0x5b8 --> 0x600 unused */
+#define TG3_TX_TSTAMP_LSB		0x000005c0
+#define TG3_TX_TSTAMP_MSB		0x000005c4
+#define  TG3_TSTAMP_MASK		 0x7fffffffffffffff
+/* 0x5c8 --> 0x600 unused */
 #define MAC_TX_MAC_STATE_BASE		0x00000600 /* 16 bytes */
 #define MAC_RX_MAC_STATE_BASE		0x00000610 /* 20 bytes */
 /* 0x624 --> 0x670 unused */
@@ -789,7 +792,36 @@
 #define MAC_RSS_HASH_KEY_7		0x0000068c
 #define MAC_RSS_HASH_KEY_8		0x00000690
 #define MAC_RSS_HASH_KEY_9		0x00000694
-/* 0x698 --> 0x800 unused */
+/* 0x698 --> 0x6b0 unused */
+
+#define TG3_RX_TSTAMP_LSB		0x000006b0
+#define TG3_RX_TSTAMP_MSB		0x000006b4
+/* 0x6b8 --> 0x6c8 unused */
+
+#define TG3_RX_PTP_CTL			0x000006c8
+#define TG3_RX_PTP_CTL_SYNC_EVNT	0x00000001
+#define TG3_RX_PTP_CTL_DELAY_REQ	0x00000002
+#define TG3_RX_PTP_CTL_PDLAY_REQ	0x00000004
+#define TG3_RX_PTP_CTL_PDLAY_RES	0x00000008
+#define TG3_RX_PTP_CTL_ALL_V1_EVENTS	(TG3_RX_PTP_CTL_SYNC_EVNT | \
+					 TG3_RX_PTP_CTL_DELAY_REQ)
+#define TG3_RX_PTP_CTL_ALL_V2_EVENTS	(TG3_RX_PTP_CTL_SYNC_EVNT | \
+					 TG3_RX_PTP_CTL_DELAY_REQ | \
+					 TG3_RX_PTP_CTL_PDLAY_REQ | \
+					 TG3_RX_PTP_CTL_PDLAY_RES)
+#define TG3_RX_PTP_CTL_FOLLOW_UP	0x00000100
+#define TG3_RX_PTP_CTL_DELAY_RES	0x00000200
+#define TG3_RX_PTP_CTL_PDRES_FLW_UP	0x00000400
+#define TG3_RX_PTP_CTL_ANNOUNCE		0x00000800
+#define TG3_RX_PTP_CTL_SIGNALING	0x00001000
+#define TG3_RX_PTP_CTL_MANAGEMENT	0x00002000
+#define TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN	0x00800000
+#define TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN	0x01000000
+#define TG3_RX_PTP_CTL_RX_PTP_V2_EN	(TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN | \
+					 TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN)
+#define TG3_RX_PTP_CTL_RX_PTP_V1_EN	0x02000000
+#define TG3_RX_PTP_CTL_HWTS_INTERLOCK	0x04000000
+/* 0x6cc --> 0x800 unused */
 
 #define MAC_TX_STATS_OCTETS		0x00000800
 #define MAC_TX_STATS_RESV1		0x00000804
@@ -1669,6 +1701,7 @@
 #define  GRC_MODE_HOST_STACKUP		0x00010000
 #define  GRC_MODE_HOST_SENDBDS		0x00020000
 #define  GRC_MODE_HTX2B_ENABLE		0x00040000
+#define  GRC_MODE_TIME_SYNC_ENABLE	0x00080000
 #define  GRC_MODE_NO_TX_PHDR_CSUM	0x00100000
 #define  GRC_MODE_NVRAM_WR_ENABLE	0x00200000
 #define  GRC_MODE_PCIE_TL_SEL		0x00000000
@@ -1771,7 +1804,17 @@
 #define GRC_VCPU_EXT_CTRL_DISABLE_WOL	 0x20000000
 #define GRC_FASTBOOT_PC			0x00006894	/* 5752, 5755, 5787 */
 
-/* 0x6c00 --> 0x7000 unused */
+#define TG3_EAV_REF_CLCK_LSB		0x00006900
+#define TG3_EAV_REF_CLCK_MSB		0x00006904
+#define TG3_EAV_REF_CLCK_CTL		0x00006908
+#define  TG3_EAV_REF_CLCK_CTL_STOP	 0x00000002
+#define  TG3_EAV_REF_CLCK_CTL_RESUME	 0x00000004
+#define TG3_EAV_REF_CLK_CORRECT_CTL	0x00006928
+#define  TG3_EAV_REF_CLK_CORRECT_EN	 (1 << 31)
+#define  TG3_EAV_REF_CLK_CORRECT_NEG	 (1 << 30)
+
+#define TG3_EAV_REF_CLK_CORRECT_MASK	0xffffff
+/* 0x690c --> 0x7000 unused */
 
 /* NVRAM Control registers */
 #define NVRAM_CMD			0x00007000
@@ -2439,6 +2482,7 @@ struct tg3_tx_buffer_desc {
 #define TXD_FLAG_IP_FRAG		0x0008
 #define TXD_FLAG_JMB_PKT		0x0008
 #define TXD_FLAG_IP_FRAG_END		0x0010
+#define TXD_FLAG_HWTSTAMP		0x0020
 #define TXD_FLAG_VLAN			0x0040
 #define TXD_FLAG_COAL_NOW		0x0080
 #define TXD_FLAG_CPU_PRE_DMA		0x0100
@@ -2480,6 +2524,9 @@ struct tg3_rx_buffer_desc {
 #define RXD_FLAG_IP_CSUM		0x1000
 #define RXD_FLAG_TCPUDP_CSUM		0x2000
 #define RXD_FLAG_IS_TCP			0x4000
+#define RXD_FLAG_PTPSTAT_MASK		0x0210
+#define RXD_FLAG_PTPSTAT_PTPV1		0x0010
+#define RXD_FLAG_PTPSTAT_PTPV2		0x0200
 
 	u32				ip_tcp_csum;
 #define RXD_IPCSUM_MASK		0xffff0000
@@ -2970,9 +3017,11 @@ enum TG3_FLAGS {
 	TG3_FLAG_USE_JUMBO_BDFLAG,
 	TG3_FLAG_L1PLLPD_EN,
 	TG3_FLAG_APE_HAS_NCSI,
+	TG3_FLAG_TX_TSTAMP_EN,
 	TG3_FLAG_4K_FIFO_LIMIT,
 	TG3_FLAG_5719_RDMA_BUG,
 	TG3_FLAG_RESET_TASK_PENDING,
+	TG3_FLAG_PTP_CAPABLE,
 	TG3_FLAG_5705_PLUS,
 	TG3_FLAG_IS_5788,
 	TG3_FLAG_5750_PLUS,
@@ -3041,6 +3090,10 @@ struct tg3 {
 	u32				coal_now;
 	u32				msg_enable;
 
+	struct ptp_clock_info		ptp_info;
+	struct ptp_clock		*ptp_clock;
+	s64				ptp_adjust;
+
 	/* begin "tx thread" cacheline section */
 	void				(*write32_tx_mbox) (struct tg3 *, u32,
 							    u32);
@@ -3108,6 +3161,7 @@ struct tg3 {
 	u32				dma_rwctrl;
 	u32				coalesce_mode;
 	u32				pwrmgmt_thresh;
+	u32				rxptpctl;
 
 	/* PCI block */
 	u32				pci_chip_rev_id;
-- 
1.7.1

^ permalink raw reply related

* [PATCH 4/5 net-next v2] tg3: PTP - Add the hardware timestamp ioctl
From: Michael Chan @ 2012-12-04  5:36 UTC (permalink / raw)
  To: davem; +Cc: netdev, nsujir, richardcochran
In-Reply-To: <1354599420-3589-3-git-send-email-mchan@broadcom.com>

From: Matt Carlson <mcarlson@broadcom.com>

This patch implements the SIOCSHWTSTAMP ioctl as described in
Documentation/networking/timestamping.txt

[Removed HWTSTAMP_FILTER_ALL handling by returning -ERANGE based on input
 from Richard Cochran.]

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Cc: Richard Cochran <richardcochran@gmail.com>
---
 drivers/net/ethernet/broadcom/tg3.c |   93 +++++++++++++++++++++++++++++++++++
 1 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 58af2a1..dddb21e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12760,6 +12760,96 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 
 }
 
+static int tg3_hwtstamp_ioctl(struct net_device *dev,
+			      struct ifreq *ifr, int cmd)
+{
+	struct tg3 *tp = netdev_priv(dev);
+	struct hwtstamp_config stmpconf;
+
+	if (!tg3_flag(tp, PTP_CAPABLE))
+		return -EINVAL;
+
+	if (copy_from_user(&stmpconf, ifr->ifr_data, sizeof(stmpconf)))
+		return -EFAULT;
+
+	if (stmpconf.flags)
+		return -EINVAL;
+
+	switch (stmpconf.tx_type) {
+	case HWTSTAMP_TX_ON:
+		tg3_flag_set(tp, TX_TSTAMP_EN);
+		break;
+	case HWTSTAMP_TX_OFF:
+		tg3_flag_clear(tp, TX_TSTAMP_EN);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (stmpconf.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tp->rxptpctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V1_EN |
+			       TG3_RX_PTP_CTL_ALL_V1_EVENTS;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V1_EN |
+			       TG3_RX_PTP_CTL_SYNC_EVNT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V1_EN |
+			       TG3_RX_PTP_CTL_DELAY_REQ;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_EN |
+			       TG3_RX_PTP_CTL_ALL_V2_EVENTS;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN |
+			       TG3_RX_PTP_CTL_ALL_V2_EVENTS;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN |
+			       TG3_RX_PTP_CTL_ALL_V2_EVENTS;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_EN |
+			       TG3_RX_PTP_CTL_SYNC_EVNT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN |
+			       TG3_RX_PTP_CTL_SYNC_EVNT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN |
+			       TG3_RX_PTP_CTL_SYNC_EVNT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_EN |
+			       TG3_RX_PTP_CTL_DELAY_REQ;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN |
+			       TG3_RX_PTP_CTL_DELAY_REQ;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tp->rxptpctl = TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN |
+			       TG3_RX_PTP_CTL_DELAY_REQ;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (netif_running(dev) && tp->rxptpctl)
+		tw32(TG3_RX_PTP_CTL,
+		     tp->rxptpctl | TG3_RX_PTP_CTL_HWTS_INTERLOCK);
+
+	return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ?
+		-EFAULT : 0;
+}
+
 static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct mii_ioctl_data *data = if_mii(ifr);
@@ -12810,6 +12900,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 		return err;
 
+	case SIOCSHWTSTAMP:
+		return tg3_hwtstamp_ioctl(dev, ifr, cmd);
+
 	default:
 		/* do nothing */
 		break;
-- 
1.7.1

^ permalink raw reply related

* [PATCH 1/5 net-next v2] tg3: Fix inconsistent locking for tg3_netif_start().
From: Michael Chan @ 2012-12-04  5:36 UTC (permalink / raw)
  To: davem; +Cc: netdev, nsujir, richardcochran

From: Nithin Nayak Sujir <nsujir@broadcom.com>

Every caller holds tp->lock when calling tg3_netif_start() except
tg3_io_resume().  Fix it so that it is all consistent.  The subsequent
PTP patches add tg3_ptp_resume() to tg3_netif_start() and the tp->lock
is required.

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/tg3.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 5cc976d..41a2dbd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6525,6 +6525,7 @@ static inline void tg3_netif_stop(struct tg3 *tp)
 	netif_tx_disable(tp->dev);
 }
 
+/* tp->lock must be held */
 static inline void tg3_netif_start(struct tg3 *tp)
 {
 	/* NOTE: unconditional netif_tx_wake_all_queues is only
@@ -16598,8 +16599,8 @@ static void tg3_io_resume(struct pci_dev *pdev)
 	tg3_full_lock(tp, 0);
 	tg3_flag_set(tp, INIT_COMPLETE);
 	err = tg3_restart_hw(tp, 1);
-	tg3_full_unlock(tp);
 	if (err) {
+		tg3_full_unlock(tp);
 		netdev_err(netdev, "Cannot restart hardware after reset.\n");
 		goto done;
 	}
@@ -16610,6 +16611,8 @@ static void tg3_io_resume(struct pci_dev *pdev)
 
 	tg3_netif_start(tp);
 
+	tg3_full_unlock(tp);
+
 	tg3_phy_start(tp);
 
 done:
-- 
1.7.1

^ permalink raw reply related

* [PATCH 5/5 net-next v2] tg3: PTP - Enable the timestamping feature in hardware and fill skb tx/rx timestamps
From: Michael Chan @ 2012-12-04  5:37 UTC (permalink / raw)
  To: davem; +Cc: netdev, nsujir, richardcochran
In-Reply-To: <1354599420-3589-4-git-send-email-mchan@broadcom.com>

From: Matt Carlson <mcarlson@broadcom.com>

This patch implements the hardware timestamping as described in
Documentation/networking/timestamping.txt

Update version to 3.128.

Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Cc: Richard Cochran <richardcochran@gmail.com>
---
 drivers/net/ethernet/broadcom/tg3.c |   57 +++++++++++++++++++++++++++++++---
 1 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index dddb21e..23918ce 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -93,10 +93,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 
 #define DRV_MODULE_NAME		"tg3"
 #define TG3_MAJ_NUM			3
-#define TG3_MIN_NUM			127
+#define TG3_MIN_NUM			128
 #define DRV_MODULE_VERSION	\
 	__stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM)
-#define DRV_MODULE_RELDATE	"November 14, 2012"
+#define DRV_MODULE_RELDATE	"December 03, 2012"
 
 #define RESET_KIND_SHUTDOWN	0
 #define RESET_KIND_INIT		1
@@ -5663,6 +5663,14 @@ static const struct ptp_clock_info tg3_ptp_caps = {
 	.enable		= tg3_ptp_enable,
 };
 
+static void tg3_hwclock_to_timestamp(struct tg3 *tp, u64 hwclock,
+				     struct skb_shared_hwtstamps *timestamp)
+{
+	memset(timestamp, 0, sizeof(struct skb_shared_hwtstamps));
+	timestamp->hwtstamp  = ns_to_ktime((hwclock & TG3_TSTAMP_MASK) +
+					   tp->ptp_adjust);
+}
+
 /* tp->lock must be held */
 static void tg3_ptp_init(struct tg3 *tp)
 {
@@ -5875,6 +5883,16 @@ static void tg3_tx(struct tg3_napi *tnapi)
 			return;
 		}
 
+		if (tnapi->tx_ring[sw_idx].len_flags & TXD_FLAG_HWTSTAMP) {
+			struct skb_shared_hwtstamps timestamp;
+			u64 hwclock = tr32(TG3_TX_TSTAMP_LSB);
+			hwclock |= (u64)tr32(TG3_TX_TSTAMP_MSB) << 32;
+
+			tg3_hwclock_to_timestamp(tp, hwclock, &timestamp);
+
+			skb_tstamp_tx(skb, &timestamp);
+		}
+
 		pci_unmap_single(tp->pdev,
 				 dma_unmap_addr(ri, mapping),
 				 skb_headlen(skb),
@@ -6142,6 +6160,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		dma_addr_t dma_addr;
 		u32 opaque_key, desc_idx, *post_ptr;
 		u8 *data;
+		u64 tstamp = 0;
 
 		desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
 		opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
@@ -6176,6 +6195,14 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) -
 		      ETH_FCS_LEN;
 
+		if ((desc->type_flags & RXD_FLAG_PTPSTAT_MASK) ==
+		     RXD_FLAG_PTPSTAT_PTPV1 ||
+		    (desc->type_flags & RXD_FLAG_PTPSTAT_MASK) ==
+		     RXD_FLAG_PTPSTAT_PTPV2) {
+			tstamp = tr32(TG3_RX_TSTAMP_LSB);
+			tstamp |= (u64)tr32(TG3_RX_TSTAMP_MSB) << 32;
+		}
+
 		if (len > TG3_RX_COPY_THRESH(tp)) {
 			int skb_size;
 			unsigned int frag_size;
@@ -6219,6 +6246,10 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 		}
 
 		skb_put(skb, len);
+		if (tstamp)
+			tg3_hwclock_to_timestamp(tp, tstamp,
+						 skb_hwtstamps(skb));
+
 		if ((tp->dev->features & NETIF_F_RXCSUM) &&
 		    (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) &&
 		    (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
@@ -7276,6 +7307,12 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan = vlan_tx_tag_get(skb);
 	}
 
+	if ((unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) &&
+	    tg3_flag(tp, TX_TSTAMP_EN)) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		base_flags |= TXD_FLAG_HWTSTAMP;
+	}
+
 	len = skb_headlen(skb);
 
 	mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
@@ -9144,9 +9181,15 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 	 */
 	tp->grc_mode |= GRC_MODE_NO_TX_PHDR_CSUM;
 
-	tw32(GRC_MODE,
-	     tp->grc_mode |
-	     (GRC_MODE_IRQ_ON_MAC_ATTN | GRC_MODE_HOST_STACKUP));
+	val = GRC_MODE_IRQ_ON_MAC_ATTN | GRC_MODE_HOST_STACKUP;
+	if (tp->rxptpctl)
+		tw32(TG3_RX_PTP_CTL,
+		     tp->rxptpctl | TG3_RX_PTP_CTL_HWTS_INTERLOCK);
+
+	if (tg3_flag(tp, PTP_CAPABLE))
+		val |= GRC_MODE_TIME_SYNC_ENABLE;
+
+	tw32(GRC_MODE, tp->grc_mode | val);
 
 	/* Setup the timer prescalar register.  Clock is always 66Mhz. */
 	val = tr32(GRC_MISC_CFG);
@@ -16564,6 +16607,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, dev);
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720)
+		tg3_flag_set(tp, PTP_CAPABLE);
+
 	if (tg3_flag(tp, 5717_PLUS)) {
 		/* Resume a low-power mode */
 		tg3_frob_aux_power(tp, false);
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH net-next] bridge: implement multicast fast leave
From: Cong Wang @ 2012-12-04  7:04 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Stephen Hemminger, netdev, bridge, David S. Miller
In-Reply-To: <20121204013838.GA31329@gondor.apana.org.au>

On Tue, 2012-12-04 at 09:38 +0800, Herbert Xu wrote:
> On Mon, Dec 03, 2012 at 07:53:16AM -0800, Stephen Hemminger wrote:
> > On Mon,  3 Dec 2012 22:36:03 +0800
> > Cong Wang <amwang@redhat.com> wrote:
> > 
> > > Fast leave allows bridge to immediately stops the multicast
> > > traffic on the port receives IGMP Leave when IGMP snooping is enabled,
> > > no timeouts are observed.
> > > 
> > > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > > Cc: Stephen Hemminger <shemminger@vyatta.com>
> > > Cc: "David S. Miller" <davem@davemloft.net>
> > > Signed-off-by: Cong Wang <amwang@redhat.com>
> > 
> > I like the feature, and it looks like an oversight in the initial design.
> > Why is this not the default, adding more options obscures it.
> 
> If the port has a bridge on it then you're toast.  I think this
> should be a per-port option.

Per-port sounds better than per-bridge. And I will make it enabled by
default.

Thanks!

^ permalink raw reply

* Re: [PATCH net-next] bridge: implement multicast fast leave
From: Herbert Xu @ 2012-12-04  7:07 UTC (permalink / raw)
  To: Cong Wang; +Cc: Stephen Hemminger, netdev, bridge, David S. Miller
In-Reply-To: <1354604692.15167.7.camel@cr0>

On Tue, Dec 04, 2012 at 03:04:52PM +0800, Cong Wang wrote:
>
> Per-port sounds better than per-bridge. And I will make it enabled by
> default.

IMHO the default should be off.  Suddenly losing your subscription
because someone else on the same port unsubscribed is a lot more
annoying than getting a few minutes of unwanted multicast data.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [net-next rfc v7 2/3] virtio_net: multiqueue support
From: Michael S. Tsirkin @ 2012-12-04  7:35 UTC (permalink / raw)
  To: Jason Wang
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, shiyer
In-Reply-To: <50BC7F59.301@redhat.com>

On Mon, Dec 03, 2012 at 06:30:49PM +0800, Jason Wang wrote:
> On 12/03/2012 06:14 PM, Michael S. Tsirkin wrote:
> > On Tue, Nov 27, 2012 at 06:15:59PM +0800, Jason Wang wrote:
> >> > -	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
> >> > -		schedule_delayed_work(&vi->rq.refill, 0);
> >> > +	for (i = 0; i < vi->max_queue_pairs; i++)
> >> > +		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> >> > +			schedule_delayed_work(&vi->rq[i].refill, 0);
> >> >  
> >> >  	mutex_lock(&vi->config_lock);
> >> >  	vi->config_enable = true;
> >> >  	mutex_unlock(&vi->config_lock);
> >> >  
> >> > +	BUG_ON(virtnet_set_queues(vi));
> >> > +
> >> >  	return 0;
> >> >  }
> >> >  #endif
> > Also crashing on device nack of command is also not nice.
> > In this case it seems we can just switch to
> > single-queue mode which should always be safe.
> 
> Not sure it's safe. It depends on the reason why this call fails. If we
> left a state that the driver only use single queue but the device use
> multi queues, we may still lost the network.

Looks like we won't: napi will stay enabled on all queues
so we will process incoming packets.

-- 
MST

^ permalink raw reply

* Re: [PATCH 2/4 net-next] tg3: PTP - Implement the ptp api and ethtool functions
From: Richard Cochran @ 2012-12-04  7:53 UTC (permalink / raw)
  To: Nithin Nayak Sujir; +Cc: Michael Chan, davem, netdev
In-Reply-To: <50BD1F10.7020105@broadcom.com>

On Mon, Dec 03, 2012 at 01:52:16PM -0800, Nithin Nayak Sujir wrote:
> 
> Yes, the hardware does seem to be different from what you describe
> but I think the conversion is right. I tested this with ptp4l in a
> back-to-back configuration and observed convergence of the master
> offset to ~0. Without this code, the offset keeps increasing.

Okay, thanks, just asking.

It appears that this works just like the IGB. The register value is a
binary fraction of the increment value, and not an addend as I
assumed.

Thanks,
Richard

^ permalink raw reply

* Re: [PATCH 1/5 net-next v2] tg3: Fix inconsistent locking for tg3_netif_start().
From: Richard Cochran @ 2012-12-04  8:48 UTC (permalink / raw)
  To: Michael Chan; +Cc: davem, netdev, nsujir
In-Reply-To: <1354599420-3589-1-git-send-email-mchan@broadcom.com>

On Mon, Dec 03, 2012 at 09:36:56PM -0800, Michael Chan wrote:
> From: Nithin Nayak Sujir <nsujir@broadcom.com>
> 
> Every caller holds tp->lock when calling tg3_netif_start() except
> tg3_io_resume().  Fix it so that it is all consistent.  The subsequent
> PTP patches add tg3_ptp_resume() to tg3_netif_start() and the tp->lock
> is required.
> 
> Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
> Signed-off-by: Michael Chan <mchan@broadcom.com>

This series looks good to me now.

Acked-by: Richard Cochran <richardcochran@gmail.com>

^ permalink raw reply

* Re: [net-next rfc v7 1/3] virtio-net: separate fields of sending/receiving queue from virtnet_info
From: Jason Wang @ 2012-12-04  9:22 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, shiyer
In-Reply-To: <20121203111848.GD26167@redhat.com>

On Monday, December 03, 2012 01:18:48 PM Michael S. Tsirkin wrote:
> On Mon, Dec 03, 2012 at 01:15:01PM +0800, Jason Wang wrote:
> > > > +
> > > > 
> > > > + /* Work struct for refilling if we run low on memory. */
> > > > 
> > > > + struct delayed_work refill;
> > > 
> > > I can't really see the justificaiton for a refill per queue. Just have
> > > 
> > > one work iterate all the queues if it happens, unless it happens often
> > > 
> > > (in which case, we need to look harder at this anyway).
> > 
> > But during this kind of iteration, we may need enable/disable the napi
> > regardless of whether the receive queue has lots to be refilled. This may
> > add extra latency.
> 
> We are running from the timer, so latency is not a concern I think.

Maybe, anyway it's only called when avaiable memory is low, so it should not 
be an issue.

^ permalink raw reply

* Re: [net-next rfc v7 1/3] virtio-net: separate fields of sending/receiving queue from virtnet_info
From: Jason Wang @ 2012-12-04  9:23 UTC (permalink / raw)
  To: Rusty Russell
  Cc: krkumar2, kvm, mst, netdev, linux-kernel, virtualization,
	bhutchings, jwhan, shiyer
In-Reply-To: <87zk1uh48g.fsf@rustcorp.com.au>

On Tuesday, December 04, 2012 02:13:11 PM Rusty Russell wrote:
> Jason Wang <jasowang@redhat.com> writes:
> > On Monday, December 03, 2012 12:25:42 PM Rusty Russell wrote:
> >> > +
> >> > +	/* Work struct for refilling if we run low on memory. */
> >> > +	struct delayed_work refill;
> >> 
> >> I can't really see the justificaiton for a refill per queue.  Just have
> >> one work iterate all the queues if it happens, unless it happens often
> >> (in which case, we need to look harder at this anyway).
> > 
> > But during this kind of iteration, we may need enable/disable the napi
> > regardless of whether the receive queue has lots to be refilled. This may
> > add extra latency.
> 
> Sure, but does it actually happen?  We only use the work when we run out
> of memory.  If this happens in normal behaviour we need to change
> something else...

True, I will change to use a global one.

Thanks
> 
> Thanks,
> Rusty.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [net-next rfc v7 2/3] virtio_net: multiqueue support
From: Jason Wang @ 2012-12-04  9:24 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, shiyer
In-Reply-To: <20121203111118.GC26167@redhat.com>

On Monday, December 03, 2012 01:11:18 PM Michael S. Tsirkin wrote:
> On Mon, Dec 03, 2012 at 06:01:58PM +0800, Jason Wang wrote:
> > On 12/03/2012 05:47 PM, Michael S. Tsirkin wrote:
> > > On Mon, Dec 03, 2012 at 02:05:27PM +0800, Jason Wang wrote:
> > >> On Monday, December 03, 2012 12:34:08 PM Rusty Russell wrote:
> > >>> Jason Wang <jasowang@redhat.com> writes:
> > >>>> +static const struct ethtool_ops virtnet_ethtool_ops;
> > >>>> +
> > >>>> +/*
> > >>>> + * Converting between virtqueue no. and kernel tx/rx queue no.
> > >>>> + * 0:rx0 1:tx0 2:cvq 3:rx1 4:tx1 ... 2N+1:rxN 2N+2:txN
> > >>>> + */
> > >>>> +static int vq2txq(struct virtqueue *vq)
> > >>>> +{
> > >>>> +	int index = virtqueue_get_queue_index(vq);
> > >>>> +	return index == 1 ? 0 : (index - 2) / 2;
> > >>>> +}
> > >>>> +
> > >>>> +static int txq2vq(int txq)
> > >>>> +{
> > >>>> +	return txq ? 2 * txq + 2 : 1;
> > >>>> +}
> > >>>> +
> > >>>> +static int vq2rxq(struct virtqueue *vq)
> > >>>> +{
> > >>>> +	int index = virtqueue_get_queue_index(vq);
> > >>>> +	return index ? (index - 1) / 2 : 0;
> > >>>> +}
> > >>>> +
> > >>>> +static int rxq2vq(int rxq)
> > >>>> +{
> > >>>> +	return rxq ? 2 * rxq + 1 : 0;
> > >>>> +}
> > >>>> +
> > >>> 
> > >>> I thought MST changed the proposed spec to make the control queue
> > >>> always
> > >>> the last one, so this logic becomes trivial.
> > >> 
> > >> But it may break the support of legacy guest. If we boot a legacy
> > >> single queue guest on a 2 queue virtio-net device. It may think vq 2
> > >> is cvq which is indeed rx1.
> > > 
> > > Legacy guyest support should be handled by host using feature
> > > bits in the usual way: host should detect legacy guest
> > > by checking the VIRTIO_NET_F_RFS feature.
> > > 
> > > If VIRTIO_NET_F_RFS is acked, cvq is vq max_virtqueue_pairs * 2.
> > > If it's not acked, cvq is vq 2.
> > 
> > We could, but we didn't gain much from this.
> 
> It just seems cleaner and easier to understand.
> 
> > Furthermore, we need also
> > do the dynamic creation/destroying of virtqueues during feature
> > negotiation which seems not supported in qemu now.
> 
> It's not *done* in qemu now, but it seems easy: just call
> virtio_add_queue for vq2 and on from virtio_net_set_features.
> As features can be modified multiple times, we
> should add virtio_del_queue and call that beforehand
> to get to the known state (two vqs).

And also need some work after migration like what we need in setting features. 
I'm ok with this method, will change to follow spec v5.

Thanks

^ permalink raw reply

* Re: [net-next rfc v7 2/3] virtio_net: multiqueue support
From: Jason Wang @ 2012-12-04  9:27 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, shiyer
In-Reply-To: <20121204073503.GB7499@redhat.com>

On Tuesday, December 04, 2012 09:35:03 AM Michael S. Tsirkin wrote:
> On Mon, Dec 03, 2012 at 06:30:49PM +0800, Jason Wang wrote:
> > On 12/03/2012 06:14 PM, Michael S. Tsirkin wrote:
> > > On Tue, Nov 27, 2012 at 06:15:59PM +0800, Jason Wang wrote:
> > >> > -	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
> > >> > -		schedule_delayed_work(&vi->rq.refill, 0);
> > >> > +	for (i = 0; i < vi->max_queue_pairs; i++)
> > >> > +		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> > >> > +			schedule_delayed_work(&vi->rq[i].refill, 0);
> > >> > 
> > >> >  	mutex_lock(&vi->config_lock);
> > >> >  	vi->config_enable = true;
> > >> >  	mutex_unlock(&vi->config_lock);
> > >> > 
> > >> > +	BUG_ON(virtnet_set_queues(vi));
> > >> > +
> > >> > 
> > >> >  	return 0;
> > >> >  
> > >> >  }
> > >> >  #endif
> > > 
> > > Also crashing on device nack of command is also not nice.
> > > In this case it seems we can just switch to
> > > single-queue mode which should always be safe.
> > 
> > Not sure it's safe. It depends on the reason why this call fails. If we
> > left a state that the driver only use single queue but the device use
> > multi queues, we may still lost the network.
> 
> Looks like we won't: napi will stay enabled on all queues
> so we will process incoming packets.

True, consider there's no bug in qemu. Will leave a just leave a warning in 
next version.

Thanks

^ permalink raw reply

* Re: [PATCH net-next] bridge: implement multicast fast leave
From: Cong Wang @ 2012-12-04  9:55 UTC (permalink / raw)
  To: Herbert Xu; +Cc: netdev, Stephen Hemminger, bridge, David S. Miller
In-Reply-To: <20121204070732.GA32550@gondor.apana.org.au>

On Tue, 2012-12-04 at 15:07 +0800, Herbert Xu wrote:
> On Tue, Dec 04, 2012 at 03:04:52PM +0800, Cong Wang wrote:
> >
> > Per-port sounds better than per-bridge. And I will make it enabled by
> > default.
> 
> IMHO the default should be off.  Suddenly losing your subscription
> because someone else on the same port unsubscribed is a lot more
> annoying than getting a few minutes of unwanted multicast data.
> 

You are right, this is why it should only be used when there is one
client behind the port.

Thanks!

^ permalink raw reply

* [PATCH net-next v2] bridge: implement multicast fast leave
From: Cong Wang @ 2012-12-04  9:56 UTC (permalink / raw)
  To: netdev; +Cc: Herbert Xu, Stephen Hemminger, bridge, David S. Miller, Cong Wang

V2: make the toggle per-port

Fast leave allows bridge to immediately stops the multicast
traffic on the port receives IGMP Leave when IGMP snooping is enabled,
no timeouts are observed.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>

---
 net/bridge/br_multicast.c |   21 +++++++++++++++++++++
 net/bridge/br_private.h   |    1 +
 net/bridge/br_sysfs_if.c  |   20 ++++++++++++++++++++
 3 files changed, 42 insertions(+), 0 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d53e4f4..02da618 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1226,6 +1226,27 @@ static void br_multicast_leave_group(struct net_bridge *br,
 	if (!mp)
 		goto out;
 
+	if (port && port->multicast_fast_leave) {
+		struct net_bridge_port_group __rcu **pp;
+
+		for (pp = &mp->ports;
+		     (p = mlock_dereference(*pp, br)) != NULL;
+		     pp = &p->next) {
+			if (p->port != port)
+				continue;
+
+			rcu_assign_pointer(*pp, p->next);
+			hlist_del_init(&p->mglist);
+			del_timer(&p->timer);
+			call_rcu_bh(&p->rcu, br_multicast_free_pg);
+
+			if (!mp->ports && !mp->mglist &&
+			    netif_running(br->dev))
+				mod_timer(&mp->timer, jiffies);
+		}
+		goto out;
+	}
+
 	now = jiffies;
 	time = now + br->multicast_last_member_count *
 		     br->multicast_last_member_interval;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6484069..8f0e789 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -142,6 +142,7 @@ struct net_bridge_port
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	u32				multicast_startup_queries_sent;
 	unsigned char			multicast_router;
+	unsigned char			multicast_fast_leave;
 	struct timer_list		multicast_router_timer;
 	struct timer_list		multicast_query_timer;
 	struct hlist_head		mglist;
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7ff95ba..dc484ac 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -172,6 +172,25 @@ static int store_multicast_router(struct net_bridge_port *p,
 }
 static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
 		   store_multicast_router);
+
+static ssize_t show_multicast_fast_leave(struct net_bridge_port *p,
+					 char *buf)
+{
+	return sprintf(buf, "%d\n", p->multicast_fast_leave);
+}
+
+static int store_multicast_fast_leave(struct net_bridge_port *p,
+				      unsigned long v)
+{
+	if (p->br->multicast_disabled)
+		return -EINVAL;
+
+	p->multicast_fast_leave = !!v;
+	return 0;
+}
+
+static BRPORT_ATTR(multicast_fast_leave, S_IRUGO | S_IWUSR,
+		   show_multicast_fast_leave, store_multicast_fast_leave);
 #endif
 
 static const struct brport_attribute *brport_attrs[] = {
@@ -195,6 +214,7 @@ static const struct brport_attribute *brport_attrs[] = {
 	&brport_attr_root_block,
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	&brport_attr_multicast_router,
+	&brport_attr_multicast_fast_leave,
 #endif
 	NULL
 };

^ permalink raw reply related

* [PATCH] ip6mr: fix rtm_family of rtnl msg
From: Nicolas Dichtel @ 2012-12-04 11:01 UTC (permalink / raw)
  To: netdev; +Cc: davem, kaber, Nicolas Dichtel

We talk about IPv6, hence the family is RTNL_FAMILY_IP6MR!
rtnl_register() is already called with RTNL_FAMILY_IP6MR.

The bug is here since the beginning of this function (commit 5b285cac3570).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f7c7c63..940aa52 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2212,7 +2212,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 		return -EMSGSIZE;
 
 	rtm = nlmsg_data(nlh);
-	rtm->rtm_family   = RTNL_FAMILY_IPMR;
+	rtm->rtm_family   = RTNL_FAMILY_IP6MR;
 	rtm->rtm_dst_len  = 128;
 	rtm->rtm_src_len  = 128;
 	rtm->rtm_tos      = 0;
-- 
1.8.0.1

^ permalink raw reply related

* [PATCH] ipv4/route/rtnl: get mcast attributes when dst is multicast
From: Nicolas Dichtel @ 2012-12-04 11:03 UTC (permalink / raw)
  To: netdev; +Cc: davem, Nicolas Dichtel

Commit f1ce3062c538 (ipv4: Remove 'rt_dst' from 'struct rtable') removes the
call to ipmr_get_route(), which will get multicast parameters of the route.

I revert the part of the patch that remove this call. I think the goal was only
to get rid of rt_dst field.

The patch is only compiled-tested. My first idea was to remove ipmr_get_route()
because rt_fill_info() was the only user, but it seems the previous patch cleans
the code a bit too much ;-)

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 net/ipv4/route.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df25142..69135ed 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2232,8 +2232,27 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	error = rt->dst.error;
 
 	if (rt_is_input_route(rt)) {
-		if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
-			goto nla_put_failure;
+#ifdef CONFIG_IP_MROUTE
+		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
+		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+			int err = ipmr_get_route(net, skb,
+						 fl4->saddr, fl4->daddr,
+						 r, nowait);
+			if (err <= 0) {
+				if (!nowait) {
+					if (err == 0)
+						return 0;
+					goto nla_put_failure;
+				} else {
+					if (err == -EMSGSIZE)
+						goto nla_put_failure;
+					error = err;
+				}
+			}
+		} else
+#endif
+			if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+				goto nla_put_failure;
 	}
 
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
-- 
1.8.0.1

^ permalink raw reply related

* [PATCH 3.0.y] route: release dst_entry.hh_cache when handling redirects
From: Michal Kubecek @ 2012-12-04 10:09 UTC (permalink / raw)
  To: stable; +Cc: netdev, Eric Dumazet

Stable-3.0 commit 42ab5316 (ipv4: fix redirect handling) was
backport of mainline commit 9cc20b26 from 3.2-rc3 where hh
member of struct dst_entry was already gone.

However, in 3.0 we still have it and we have to clean it as
well, otherwise it keeps pointing to the cleaned up (and
unusable) hh_cache entry and packets cannot be sent out.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
 net/ipv4/route.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6b95f74..5ff2614 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1374,6 +1374,7 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
 	struct rtable *rt = (struct rtable *) dst;
 	__be32 orig_gw = rt->rt_gateway;
 	struct neighbour *n, *old_n;
+	struct hh_cache *old_hh;
 
 	dst_confirm(&rt->dst);
 
@@ -1381,6 +1382,9 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
 	n = __arp_bind_neighbour(&rt->dst, rt->rt_gateway);
 	if (IS_ERR(n))
 		return PTR_ERR(n);
+	old_hh = xchg(&rt->dst.hh, NULL);
+	if (old_hh)
+		hh_cache_put(old_hh);
 	old_n = xchg(&rt->dst._neighbour, n);
 	if (old_n)
 		neigh_release(old_n);
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 3/3] net: cpsw: implement ioctl for MII
From: Jan Lübbe @ 2012-12-04 11:06 UTC (permalink / raw)
  To: Mugunthan V N
  Cc: netdev, David S. Miller, Vaibhav Hiremath, linux-arm-kernel,
	linux-omap
In-Reply-To: <50BCDB9B.80307@ti.com>

On Mon, 2012-12-03 at 22:34 +0530, Mugunthan V N wrote:
> Already ndo_do_ioctl is already implemented. Can you rebase the patch 
> with latest git repo
> and resubmit the patch

Sorry, I should have checked that before. I'll update the other patches
and resubmit.
-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* [PATCH net-next 0/3] Multiqueue support for virtio-net
From: Jason Wang @ 2012-12-04 11:07 UTC (permalink / raw)
  To: rusty, mst, virtualization, netdev, linux-kernel, davem
  Cc: krkumar2, kvm, bhutchings, jwhan, shiyer

Hi all:

This series is an update version of multiqueue virtio-net driver based on
Krishna Kumar's work to let virtio-net use multiple rx/tx queues to do the
packets reception and transmission. Please review and comments.

A protype implementation of qemu-kvm support could by found in
git://github.com/jasowang/qemu-kvm-mq.git. To start a guest with two queues, you
could specify the queues parameters to both tap and virtio-net like:

./qemu-kvm -netdev tap,queues=2,... -device virtio-net-pci,queues=2,...

then enable the multiqueue through ethtool by:

ethtool -L eth0 combined 2

Changes from RFC v7:
Addressing Rusty's comments:
- align the implementation (location of cvq) to v5.
- fix the style issue.
- use a global refill instead of per-vq one.
- check the VIRTIO_NET_F_RFS before calling virtnet_set_queues()

Addresing Michael's comments
- rename the curr_queue_pairs in virtnet_probe() to max_queue_pairs
- validate the number of queue pairs supported by the device against
  VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MIN and VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MAX.
- don't crash when failing to change the number of virtqueues
- don't set the affinity hint when onle single queue is used or there's too much
  virtqueues
- add a TODO of handling cpu hotplug
- allow user to set the nubmer of queue pairs between 1 and max_queue_pairs

Changes from RFC v6:
- Align the implementation with the RFC spec update v5
- Addressing Rusty's comments:
  * split the patches
  * rename to max_queue_pairs and curr_queue_pairs
  * remove the useless status
  * fix the hibernation bug
- Addressing Ben's comments:
  * check other parameters in ethtool_set_queues

Changes from RFC v5:
- Align the implementation with the RFC spec update v4
- Switch the mode between single mode and multiqueue mode without reset
- Remove the 256 limitation of queues
- Use helpers to do the mapping between virtqueues and tx/rx queues
- Use commbined channels instead of separated rx/tx queus when do the queue
  number configuartion
- Other coding style comments from Michael

Changes from RFC v4:
- Add ability to negotiate the number of queues through control virtqueue
- Ethtool -{L|l} support and default the tx/rx queue number to 1
- Expose the API to set irq affinity instead of irq itself

Changes from RFC v3:
- Rebase to the net-next
- Let queue 2 to be the control virtqueue to obey the spec
- Prodives irq affinity
- Choose txq based on processor id

Reference:
- Virtio spec RFC: http://patchwork.ozlabs.org/patch/201303/
- RFC V7:https://lkml.org/lkml/2012/11/27/177
- RFC V6: https://lkml.org/lkml/2012/10/30/127
- RFC V5: http://lwn.net/Articles/505388/
- RFC V4: https://lkml.org/lkml/2012/6/25/120
- RFC V2: http://lwn.net/Articles/467283/

Perf Numbers:

Will do some basic test and post as a reply to this mail.

Jason Wang (3):
  virtio-net: separate fields of sending/receiving queue from
    virtnet_info
  virtio_net: multiqueue support
  virtio-net: change the number of queues through ethtool

 drivers/net/virtio_net.c        |  723 ++++++++++++++++++++++++++++-----------
 include/uapi/linux/virtio_net.h |   16 +
 2 files changed, 546 insertions(+), 193 deletions(-)

^ permalink raw reply

* [PATCH net-next 1/3] virtio-net: separate fields of sending/receiving queue from virtnet_info
From: Jason Wang @ 2012-12-04 11:07 UTC (permalink / raw)
  To: rusty, mst, virtualization, netdev, linux-kernel, davem
  Cc: krkumar2, kvm, bhutchings, jwhan, shiyer
In-Reply-To: <1354619278-35702-1-git-send-email-jasowang@redhat.com>

To support multiqueue transmitq/receiveq, the first step is to separate queue
related structure from virtnet_info. This patch introduce send_queue and
receive_queue structure and use the pointer to them as the parameter in
functions handling sending/receiving.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c |  271 +++++++++++++++++++++++++---------------------
 1 files changed, 149 insertions(+), 122 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8262232..266f712 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -51,16 +51,40 @@ struct virtnet_stats {
 	u64 rx_packets;
 };
 
-struct virtnet_info {
-	struct virtio_device *vdev;
-	struct virtqueue *rvq, *svq, *cvq;
-	struct net_device *dev;
+/* Internal representation of a send virtqueue */
+struct send_queue {
+	/* Virtqueue associated with this send _queue */
+	struct virtqueue *vq;
+
+	/* TX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+};
+
+/* Internal representation of a receive virtqueue */
+struct receive_queue {
+	/* Virtqueue associated with this receive_queue */
+	struct virtqueue *vq;
+
 	struct napi_struct napi;
-	unsigned int status;
 
 	/* Number of input buffers, and max we've ever had. */
 	unsigned int num, max;
 
+	/* Chain pages by the private ptr. */
+	struct page *pages;
+
+	/* RX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+};
+
+struct virtnet_info {
+	struct virtio_device *vdev;
+	struct virtqueue *cvq;
+	struct net_device *dev;
+	struct send_queue sq;
+	struct receive_queue rq;
+	unsigned int status;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
@@ -81,13 +105,6 @@ struct virtnet_info {
 
 	/* Lock for config space updates */
 	struct mutex config_lock;
-
-	/* Chain pages by the private ptr. */
-	struct page *pages;
-
-	/* fragments + linear part + virtio header */
-	struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
-	struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
 };
 
 struct skb_vnet_hdr {
@@ -117,22 +134,22 @@ static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
  * private is used to chain pages for big packets, put the whole
  * most recent used list in the beginning for reuse
  */
-static void give_pages(struct virtnet_info *vi, struct page *page)
+static void give_pages(struct receive_queue *rq, struct page *page)
 {
 	struct page *end;
 
-	/* Find end of list, sew whole thing into vi->pages. */
+	/* Find end of list, sew whole thing into vi->rq.pages. */
 	for (end = page; end->private; end = (struct page *)end->private);
-	end->private = (unsigned long)vi->pages;
-	vi->pages = page;
+	end->private = (unsigned long)rq->pages;
+	rq->pages = page;
 }
 
-static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 {
-	struct page *p = vi->pages;
+	struct page *p = rq->pages;
 
 	if (p) {
-		vi->pages = (struct page *)p->private;
+		rq->pages = (struct page *)p->private;
 		/* clear private here, it is used to chain pages */
 		p->private = 0;
 	} else
@@ -140,12 +157,12 @@ static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
 	return p;
 }
 
-static void skb_xmit_done(struct virtqueue *svq)
+static void skb_xmit_done(struct virtqueue *vq)
 {
-	struct virtnet_info *vi = svq->vdev->priv;
+	struct virtnet_info *vi = vq->vdev->priv;
 
 	/* Suppress further interrupts. */
-	virtqueue_disable_cb(svq);
+	virtqueue_disable_cb(vq);
 
 	/* We were probably waiting for more output buffers. */
 	netif_wake_queue(vi->dev);
@@ -167,9 +184,10 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 }
 
 /* Called from bottom half context */
-static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+static struct sk_buff *page_to_skb(struct receive_queue *rq,
 				   struct page *page, unsigned int len)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	struct sk_buff *skb;
 	struct skb_vnet_hdr *hdr;
 	unsigned int copy, hdr_len, offset;
@@ -224,12 +242,12 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	}
 
 	if (page)
-		give_pages(vi, page);
+		give_pages(rq, page);
 
 	return skb;
 }
 
-static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
+static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	struct page *page;
@@ -243,7 +261,7 @@ static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 			skb->dev->stats.rx_length_errors++;
 			return -EINVAL;
 		}
-		page = virtqueue_get_buf(vi->rvq, &len);
+		page = virtqueue_get_buf(rq->vq, &len);
 		if (!page) {
 			pr_debug("%s: rx error: %d buffers missing\n",
 				 skb->dev->name, hdr->mhdr.num_buffers);
@@ -256,14 +274,15 @@ static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 
 		set_skb_frag(skb, page, 0, &len);
 
-		--vi->num;
+		--rq->num;
 	}
 	return 0;
 }
 
-static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
+static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_info *vi = rq->vq->vdev->priv;
+	struct net_device *dev = vi->dev;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 	struct sk_buff *skb;
 	struct page *page;
@@ -273,7 +292,7 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 		pr_debug("%s: short packet %i\n", dev->name, len);
 		dev->stats.rx_length_errors++;
 		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(vi, buf);
+			give_pages(rq, buf);
 		else
 			dev_kfree_skb(buf);
 		return;
@@ -285,14 +304,14 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 		skb_trim(skb, len);
 	} else {
 		page = buf;
-		skb = page_to_skb(vi, page, len);
+		skb = page_to_skb(rq, page, len);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
-			give_pages(vi, page);
+			give_pages(rq, page);
 			return;
 		}
 		if (vi->mergeable_rx_bufs)
-			if (receive_mergeable(vi, skb)) {
+			if (receive_mergeable(rq, skb)) {
 				dev_kfree_skb(skb);
 				return;
 			}
@@ -359,8 +378,9 @@ frame_err:
 	dev_kfree_skb(skb);
 }
 
-static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	struct sk_buff *skb;
 	struct skb_vnet_hdr *hdr;
 	int err;
@@ -372,77 +392,77 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
 	skb_put(skb, MAX_PACKET_LEN);
 
 	hdr = skb_vnet_hdr(skb);
-	sg_set_buf(vi->rx_sg, &hdr->hdr, sizeof hdr->hdr);
+	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
 
-	skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
+	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
 
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 2, skb, gfp);
 	if (err < 0)
 		dev_kfree_skb(skb);
 
 	return err;
 }
 
-static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 {
 	struct page *first, *list = NULL;
 	char *p;
 	int i, err, offset;
 
-	/* page in vi->rx_sg[MAX_SKB_FRAGS + 1] is list tail */
+	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
-		first = get_a_page(vi, gfp);
+		first = get_a_page(rq, gfp);
 		if (!first) {
 			if (list)
-				give_pages(vi, list);
+				give_pages(rq, list);
 			return -ENOMEM;
 		}
-		sg_set_buf(&vi->rx_sg[i], page_address(first), PAGE_SIZE);
+		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
 
 		/* chain new page in list head to match sg */
 		first->private = (unsigned long)list;
 		list = first;
 	}
 
-	first = get_a_page(vi, gfp);
+	first = get_a_page(rq, gfp);
 	if (!first) {
-		give_pages(vi, list);
+		give_pages(rq, list);
 		return -ENOMEM;
 	}
 	p = page_address(first);
 
-	/* vi->rx_sg[0], vi->rx_sg[1] share the same page */
-	/* a separated vi->rx_sg[0] for virtio_net_hdr only due to QEMU bug */
-	sg_set_buf(&vi->rx_sg[0], p, sizeof(struct virtio_net_hdr));
+	/* rq->sg[0], rq->sg[1] share the same page */
+	/* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
+	sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
 
-	/* vi->rx_sg[1] for data packet, from offset */
+	/* rq->sg[1] for data packet, from offset */
 	offset = sizeof(struct padded_vnet_hdr);
-	sg_set_buf(&vi->rx_sg[1], p + offset, PAGE_SIZE - offset);
+	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 
 	/* chain first in list head */
 	first->private = (unsigned long)list;
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, MAX_SKB_FRAGS + 2,
 				first, gfp);
 	if (err < 0)
-		give_pages(vi, first);
+		give_pages(rq, first);
 
 	return err;
 }
 
-static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 {
 	struct page *page;
 	int err;
 
-	page = get_a_page(vi, gfp);
+	page = get_a_page(rq, gfp);
 	if (!page)
 		return -ENOMEM;
 
-	sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
+	sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
 
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 1, page, gfp);
 	if (err < 0)
-		give_pages(vi, page);
+		give_pages(rq, page);
 
 	return err;
 }
@@ -454,65 +474,68 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
  * before we're receiving packets, or from refill_work which is
  * careful to disable receiving (using napi_disable).
  */
-static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
+static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	int err;
 	bool oom;
 
 	do {
 		if (vi->mergeable_rx_bufs)
-			err = add_recvbuf_mergeable(vi, gfp);
+			err = add_recvbuf_mergeable(rq, gfp);
 		else if (vi->big_packets)
-			err = add_recvbuf_big(vi, gfp);
+			err = add_recvbuf_big(rq, gfp);
 		else
-			err = add_recvbuf_small(vi, gfp);
+			err = add_recvbuf_small(rq, gfp);
 
 		oom = err == -ENOMEM;
 		if (err < 0)
 			break;
-		++vi->num;
+		++rq->num;
 	} while (err > 0);
-	if (unlikely(vi->num > vi->max))
-		vi->max = vi->num;
-	virtqueue_kick(vi->rvq);
+	if (unlikely(rq->num > rq->max))
+		rq->max = rq->num;
+	virtqueue_kick(rq->vq);
 	return !oom;
 }
 
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
+	struct receive_queue *rq = &vi->rq;
+
 	/* Schedule NAPI, Suppress further interrupts if successful. */
-	if (napi_schedule_prep(&vi->napi)) {
+	if (napi_schedule_prep(&rq->napi)) {
 		virtqueue_disable_cb(rvq);
-		__napi_schedule(&vi->napi);
+		__napi_schedule(&rq->napi);
 	}
 }
 
-static void virtnet_napi_enable(struct virtnet_info *vi)
+static void virtnet_napi_enable(struct receive_queue *rq)
 {
-	napi_enable(&vi->napi);
+	napi_enable(&rq->napi);
 
 	/* If all buffers were filled by other side before we napi_enabled, we
 	 * won't get another interrupt, so process any outstanding packets
 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
-	if (napi_schedule_prep(&vi->napi)) {
-		virtqueue_disable_cb(vi->rvq);
+	if (napi_schedule_prep(&rq->napi)) {
+		virtqueue_disable_cb(rq->vq);
 		local_bh_disable();
-		__napi_schedule(&vi->napi);
+		__napi_schedule(&rq->napi);
 		local_bh_enable();
 	}
 }
 
 static void refill_work(struct work_struct *work)
 {
-	struct virtnet_info *vi;
+	struct virtnet_info *vi =
+		container_of(work, struct virtnet_info, refill.work);
 	bool still_empty;
 
-	vi = container_of(work, struct virtnet_info, refill.work);
-	napi_disable(&vi->napi);
-	still_empty = !try_fill_recv(vi, GFP_KERNEL);
-	virtnet_napi_enable(vi);
+	napi_disable(&vi->rq.napi);
+	still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL);
+	virtnet_napi_enable(&vi->rq);
 
 	/* In theory, this can happen: if we don't get any buffers in
 	 * we will *never* try to fill again. */
@@ -522,29 +545,31 @@ static void refill_work(struct work_struct *work)
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
-	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
+	struct receive_queue *rq =
+		container_of(napi, struct receive_queue, napi);
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	void *buf;
 	unsigned int len, received = 0;
 
 again:
 	while (received < budget &&
-	       (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
-		receive_buf(vi->dev, buf, len);
-		--vi->num;
+	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+		receive_buf(rq, buf, len);
+		--rq->num;
 		received++;
 	}
 
-	if (vi->num < vi->max / 2) {
-		if (!try_fill_recv(vi, GFP_ATOMIC))
+	if (rq->num < rq->max / 2) {
+		if (!try_fill_recv(rq, GFP_ATOMIC))
 			schedule_delayed_work(&vi->refill, 0);
 	}
 
 	/* Out of packets? */
 	if (received < budget) {
 		napi_complete(napi);
-		if (unlikely(!virtqueue_enable_cb(vi->rvq)) &&
+		if (unlikely(!virtqueue_enable_cb(rq->vq)) &&
 		    napi_schedule_prep(napi)) {
-			virtqueue_disable_cb(vi->rvq);
+			virtqueue_disable_cb(rq->vq);
 			__napi_schedule(napi);
 			goto again;
 		}
@@ -553,13 +578,14 @@ again:
 	return received;
 }
 
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static unsigned int free_old_xmit_skbs(struct send_queue *sq)
 {
 	struct sk_buff *skb;
 	unsigned int len, tot_sgs = 0;
+	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
+	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 
 		u64_stats_update_begin(&stats->tx_syncp);
@@ -573,10 +599,11 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
 	return tot_sgs;
 }
 
-static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
+static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
+	struct virtnet_info *vi = sq->vq->vdev->priv;
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 
@@ -611,25 +638,26 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 
 	/* Encode metadata header at front. */
 	if (vi->mergeable_rx_bufs)
-		sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
+		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
 	else
-		sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr);
+		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
 
-	hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
-	return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
+	hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg,
 				 0, skb, GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	struct send_queue *sq = &vi->sq;
 	int capacity;
 
 	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(vi);
+	free_old_xmit_skbs(sq);
 
 	/* Try to transmit */
-	capacity = xmit_skb(vi, skb);
+	capacity = xmit_skb(sq, skb);
 
 	/* This can happen with OOM and indirect buffers. */
 	if (unlikely(capacity < 0)) {
@@ -648,7 +676,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
-	virtqueue_kick(vi->svq);
+	virtqueue_kick(sq->vq);
 
 	/* Don't wait up for transmitted skbs to be freed. */
 	skb_orphan(skb);
@@ -658,12 +686,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (capacity < 2+MAX_SKB_FRAGS) {
 		netif_stop_queue(dev);
-		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
+		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 			/* More just got used, free them then recheck. */
-			capacity += free_old_xmit_skbs(vi);
+			capacity += free_old_xmit_skbs(sq);
 			if (capacity >= 2+MAX_SKB_FRAGS) {
 				netif_start_queue(dev);
-				virtqueue_disable_cb(vi->svq);
+				virtqueue_disable_cb(sq->vq);
 			}
 		}
 	}
@@ -731,7 +759,7 @@ static void virtnet_netpoll(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
-	napi_schedule(&vi->napi);
+	napi_schedule(&vi->rq.napi);
 }
 #endif
 
@@ -740,10 +768,10 @@ static int virtnet_open(struct net_device *dev)
 	struct virtnet_info *vi = netdev_priv(dev);
 
 	/* Make sure we have some buffers: if oom use wq. */
-	if (!try_fill_recv(vi, GFP_KERNEL))
+	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
 		schedule_delayed_work(&vi->refill, 0);
 
-	virtnet_napi_enable(vi);
+	virtnet_napi_enable(&vi->rq);
 	return 0;
 }
 
@@ -808,7 +836,7 @@ static int virtnet_close(struct net_device *dev)
 
 	/* Make sure refill_work doesn't re-enable napi! */
 	cancel_delayed_work_sync(&vi->refill);
-	napi_disable(&vi->napi);
+	napi_disable(&vi->rq.napi);
 
 	return 0;
 }
@@ -920,11 +948,10 @@ static void virtnet_get_ringparam(struct net_device *dev,
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
-	ring->rx_max_pending = virtqueue_get_vring_size(vi->rvq);
-	ring->tx_max_pending = virtqueue_get_vring_size(vi->svq);
+	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq);
+	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq);
 	ring->rx_pending = ring->rx_max_pending;
 	ring->tx_pending = ring->tx_max_pending;
-
 }
 
 
@@ -1034,8 +1061,8 @@ static int init_vqs(struct virtnet_info *vi)
 	if (err)
 		return err;
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
+	vi->rq.vq = vqs[0];
+	vi->sq.vq = vqs[1];
 
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
 		vi->cvq = vqs[2];
@@ -1100,11 +1127,11 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Set up our device-specific information */
 	vi = netdev_priv(dev);
-	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
+	netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
 	vi->dev = dev;
 	vi->vdev = vdev;
 	vdev->priv = vi;
-	vi->pages = NULL;
+	vi->rq.pages = NULL;
 	vi->stats = alloc_percpu(struct virtnet_stats);
 	err = -ENOMEM;
 	if (vi->stats == NULL)
@@ -1114,8 +1141,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	mutex_init(&vi->config_lock);
 	vi->config_enable = true;
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
-	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
-	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
+	sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
+	sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
 
 	/* If we can receive ANY GSO packets, we must allocate large ones. */
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1137,10 +1164,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 	}
 
 	/* Last of all, set up some receive buffers. */
-	try_fill_recv(vi, GFP_KERNEL);
+	try_fill_recv(&vi->rq, GFP_KERNEL);
 
 	/* If we didn't even get one input buffer, we're useless. */
-	if (vi->num == 0) {
+	if (vi->rq.num == 0) {
 		err = -ENOMEM;
 		goto unregister;
 	}
@@ -1173,22 +1200,22 @@ static void free_unused_bufs(struct virtnet_info *vi)
 {
 	void *buf;
 	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->svq);
+		buf = virtqueue_detach_unused_buf(vi->sq.vq);
 		if (!buf)
 			break;
 		dev_kfree_skb(buf);
 	}
 	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->rvq);
+		buf = virtqueue_detach_unused_buf(vi->rq.vq);
 		if (!buf)
 			break;
 		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(vi, buf);
+			give_pages(&vi->rq, buf);
 		else
 			dev_kfree_skb(buf);
-		--vi->num;
+		--vi->rq.num;
 	}
-	BUG_ON(vi->num != 0);
+	BUG_ON(vi->rq.num != 0);
 }
 
 static void remove_vq_common(struct virtnet_info *vi)
@@ -1200,8 +1227,8 @@ static void remove_vq_common(struct virtnet_info *vi)
 
 	vi->vdev->config->del_vqs(vi->vdev);
 
-	while (vi->pages)
-		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
+	while (vi->rq.pages)
+		__free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
 }
 
 static void __devexit virtnet_remove(struct virtio_device *vdev)
@@ -1237,7 +1264,7 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	cancel_delayed_work_sync(&vi->refill);
 
 	if (netif_running(vi->dev))
-		napi_disable(&vi->napi);
+		napi_disable(&vi->rq.napi);
 
 	remove_vq_common(vi);
 
@@ -1256,11 +1283,11 @@ static int virtnet_restore(struct virtio_device *vdev)
 		return err;
 
 	if (netif_running(vi->dev))
-		virtnet_napi_enable(vi);
+		virtnet_napi_enable(&vi->rq);
 
 	netif_device_attach(vi->dev);
 
-	if (!try_fill_recv(vi, GFP_KERNEL))
+	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
 		schedule_delayed_work(&vi->refill, 0);
 
 	mutex_lock(&vi->config_lock);
-- 
1.7.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox