Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 3/3] drivers/net/bfin_mac.c: Misc function cleanups, neatening
From: Joe Perches @ 2010-07-29 23:58 UTC (permalink / raw)
  To: Michael Hennerich
  Cc: uclinux-dist-devel, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <cover.1280447281.git.joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>

Use new bfin_alloc_skb to centralize skb allocations
Add and use get_mac_addr function
Neaten bfin_mac_init
Neaten bfin_mac_hard_start_xmit

Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
---
 drivers/net/bfin_mac.c |   93 ++++++++++++++++++++++++++---------------------
 1 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index a1d8119..68afb2a 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -81,6 +81,26 @@ static u16 pin_req[] = P_RMII0;
 static u16 pin_req[] = P_MII0;
 #endif
 
+static struct sk_buff *bfin_alloc_skb(void)
+{
+	/* allocate a new skb */
+	struct sk_buff *new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
+
+	if (!new_skb)
+		return NULL;
+
+	skb_reserve(new_skb, NET_IP_ALIGN);
+	/*
+	 * Invalidate the data cache of skb->data range
+	 * when it is write back cache to prevent overwriting
+	 * the new data from DMA.
+	 */
+	blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
+					 (unsigned long)new_skb->end);
+
+	return new_skb;
+}
+
 static void desc_list_free(void)
 {
 	struct net_dma_desc_rx *r;
@@ -193,18 +213,12 @@ static int desc_list_init(void)
 		struct dma_descriptor *b = &(r->desc_b);
 
 		/* allocate a new skb for next time receive */
-		new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
+		new_skb = bfin_alloc_skb();
 		if (!new_skb) {
 			pr_notice("init: low on mem - packet dropped\n");
 			goto init_error;
 		}
-		skb_reserve(new_skb, NET_IP_ALIGN);
-		/* Invalidate the data cache of skb->data range when it is
-		 * write back cache.
-		 * It will prevent overwriting the new data from DMA
-		 */
-		blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
-					 (unsigned long)new_skb->end);
+
 		r->skb = new_skb;
 
 		/*
@@ -598,6 +612,16 @@ void setup_system_regs(struct net_device *dev)
 	bfin_write_DMA1_Y_MODIFY(0);
 }
 
+/* Grab the MAC address in the MAC */
+static void get_mac_addr(u8 *mac_addr)
+{
+	__le32 addr_low = cpu_to_le32(bfin_read_EMAC_ADDRLO());
+	__le16 addr_hi = cpu_to_le16((u16)bfin_read_EMAC_ADDRHI());
+
+	memcpy(mac_addr, &addr_low, 4);
+	memcpy(mac_addr + 4, &addr_hi, 2);
+}
+
 static void setup_mac_addr(u8 *mac_addr)
 {
 	u32 addr_low = le32_to_cpu(*(__le32 *)&mac_addr[0]);
@@ -994,15 +1018,17 @@ static void tx_reclaim_skb_timeout(unsigned long lp)
 static int bfin_mac_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
-	u16 *data;
 	u32 data_align = (unsigned long)(skb->data) & 0x3;
+	unsigned long buf_start;
+	unsigned long buf_len;
 	union skb_shared_tx *shtx = skb_tx(skb);
 
 	current_tx_ptr->skb = skb;
 
 	if (data_align == 0x2) {
 		/* move skb->data to current_tx_ptr payload */
-		data = (u16 *)(skb->data) - 1;
+		u16 *data = (u16 *)(skb->data) - 1;
+
 		*data = (u16)(skb->len);
 		/*
 		 * When transmitting an Ethernet packet, the PTP_TSYNC module
@@ -1014,23 +1040,21 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (shtx->hardware)
 			*data |= 0x1000;
 
-		current_tx_ptr->desc_a.start_addr = (u32)data;
-		/* this is important! */
-		blackfin_dcache_flush_range((u32)data,
-					    (u32)((u8 *)data + skb->len + 4));
+		buf_start = (unsigned long)data;
+		buf_len = skb->len + 4;
 	} else {
 		*((u16 *)(current_tx_ptr->packet)) = (u16)(skb->len);
 		/* enable timestamping for the sent packet */
 		if (shtx->hardware)
 			*((u16 *)(current_tx_ptr->packet)) |= 0x1000;
-		memcpy((u8 *)(current_tx_ptr->packet + 2), skb->data,
-			skb->len);
-		current_tx_ptr->desc_a.start_addr =
-			(u32)current_tx_ptr->packet;
-		blackfin_dcache_flush_range(
-			(u32)current_tx_ptr->packet,
-			(u32)(current_tx_ptr->packet + skb->len + 2));
+		memcpy(current_tx_ptr->packet + 2, skb->data, skb->len);
+
+		buf_start = (unsigned long)current_tx_ptr->packet;
+		buf_len = skb->len + 2;
 	}
+	current_tx_ptr->desc_a.start_addr = buf_start;
+	/* this is important! */
+	blackfin_dcache_flush_range(buf_start, buf_start + buf_len);
 
 	/*
 	 * Make sure the internal data buffers in the core are drained
@@ -1095,20 +1119,12 @@ static void bfin_mac_rx(struct net_device *dev)
 	/* allocate a new skb for next time receive */
 	skb = current_rx_ptr->skb;
 
-	new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
+	new_skb = bfin_alloc_skb();
 	if (!new_skb) {
 		pr_notice("rx: low on mem - packet dropped\n");
 		dev->stats.rx_dropped++;
 		goto out;
 	}
-	/* reserve 2 bytes for RXDWA padding */
-	skb_reserve(new_skb, NET_IP_ALIGN);
-	/*
-	 * Invalidate the data cache of skb->data range when it is write back
-	 * cache. It will prevent overwriting the new data from DMA
-	 */
-	blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
-					 (unsigned long)new_skb->end);
 
 	current_rx_ptr->skb = new_skb;
 	current_rx_ptr->desc_a.start_addr = (unsigned long)new_skb->data - 2;
@@ -1475,14 +1491,10 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev)
 	lp = netdev_priv(ndev);
 	lp->ndev = ndev;
 
-	/* Grab the MAC address in the MAC */
-	*(__le32 *)(&(ndev->dev_addr[0])) =
-		cpu_to_le32(bfin_read_EMAC_ADDRLO());
-	*(__le16 *)(&(ndev->dev_addr[4])) =
-		cpu_to_le16((u16)bfin_read_EMAC_ADDRHI());
+	get_mac_addr(ndev->dev_addr);
 
 	/* probe mac */
-	/*todo: how to proble? which is revision_register */
+	/* todo: how to probe? which is revision_register */
 	bfin_write_EMAC_ADDRLO(0x12345678);
 	if (bfin_read_EMAC_ADDRLO() != 0x12345678) {
 		dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n");
@@ -1718,12 +1730,9 @@ static struct platform_driver bfin_mac_driver = {
 
 static int __init bfin_mac_init(void)
 {
-	int ret;
-
-	ret = platform_driver_register(&bfin_mii_bus_driver);
-	if (!ret)
-		return platform_driver_register(&bfin_mac_driver);
-	return -ENODEV;
+	if (platform_driver_register(&bfin_mii_bus_driver))
+		return -ENODEV;
+	return platform_driver_register(&bfin_mac_driver);
 }
 
 module_init(bfin_mac_init);
-- 
1.7.2.19.g9a302

^ permalink raw reply related

* [PATCH net-next 2/3] drivers/net/bfin_mac.c: Use pr_<level>, netdev_<level>
From: Joe Perches @ 2010-07-29 23:58 UTC (permalink / raw)
  To: Michael Hennerich
  Cc: uclinux-dist-devel, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <cover.1280447281.git.joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>

Add and use pr_fmt, pr_<level> and netdev_<level>

Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
---
 drivers/net/bfin_mac.c |   56 +++++++++++++++++++++--------------------------
 1 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index 0b032a5..a1d8119 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -195,8 +195,7 @@ static int desc_list_init(void)
 		/* allocate a new skb for next time receive */
 		new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
 		if (!new_skb) {
-			printk(KERN_NOTICE DRV_NAME
-			       ": init: low on mem - packet dropped\n");
+			pr_notice("init: low on mem - packet dropped\n");
 			goto init_error;
 		}
 		skb_reserve(new_skb, NET_IP_ALIGN);
@@ -247,7 +246,7 @@ static int desc_list_init(void)
 
 init_error:
 	desc_list_free();
-	printk(KERN_ERR DRV_NAME ": kmalloc failed\n");
+	pr_err("kmalloc failed\n");
 	return -ENOMEM;
 }
 
@@ -264,12 +263,11 @@ static int bfin_mdio_poll(void)
 
 	/* poll the STABUSY bit */
 	while ((bfin_read_EMAC_STAADD()) & STABUSY) {
-		udelay(1);
 		if (timeout_cnt-- < 0) {
-			printk(KERN_ERR DRV_NAME
-			": wait MDC/MDIO transaction to complete timeout\n");
+			pr_err("wait MDC/MDIO transaction to complete timeout\n");
 			return -ETIMEDOUT;
 		}
+		udelay(1);
 	}
 
 	return 0;
@@ -357,9 +355,9 @@ static void bfin_mac_adjust_link(struct net_device *dev)
 				opmode &= ~(RMII_10);
 				break;
 			default:
-				printk(KERN_WARNING
-					"%s: Ack!  Speed (%d) is not 10/100!\n",
-					DRV_NAME, phydev->speed);
+				netdev_warn(dev,
+					    "Ack!  Speed (%d) is not 10/100!\n",
+					    phydev->speed);
 				break;
 			}
 			bfin_write_EMAC_OPMODE(opmode);
@@ -383,7 +381,7 @@ static void bfin_mac_adjust_link(struct net_device *dev)
 	if (new_state) {
 		u32 opmode = bfin_read_EMAC_OPMODE();
 		phy_print_status(phydev);
-		pr_debug("EMAC_OPMODE = 0x%08x\n", opmode);
+		netdev_dbg(dev, "EMAC_OPMODE = 0x%08x\n", opmode);
 	}
 
 	spin_unlock_irqrestore(&lp->lock, flags);
@@ -424,8 +422,7 @@ static int mii_probe(struct net_device *dev)
 
 	/* now we are supposed to have a proper phydev to attach to... */
 	if (!phydev) {
-		printk(KERN_INFO "%s: Don't found any phy device at all\n",
-			dev->name);
+		netdev_info(dev, "No PHY device found\n");
 		return -ENODEV;
 	}
 
@@ -438,7 +435,7 @@ static int mii_probe(struct net_device *dev)
 #endif
 
 	if (IS_ERR(phydev)) {
-		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
+		netdev_err(dev, "Could not attach to PHY\n");
 		return PTR_ERR(phydev);
 	}
 
@@ -460,11 +457,11 @@ static int mii_probe(struct net_device *dev)
 	lp->old_duplex = -1;
 	lp->phydev = phydev;
 
-	printk(KERN_INFO "%s: attached PHY driver [%s] "
-	       "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)"
-	       "@sclk=%dMHz)\n",
-	       DRV_NAME, phydev->drv->name, dev_name(&phydev->dev), phydev->irq,
-	       MDC_CLK, mdc_div, sclk/1000000);
+	netdev_info(dev, "attached PHY driver [%s] "
+		    "(mii_bus:phy_addr=%s, irq=%d, mdc_clk=%dHz(mdc_div=%d)"
+		    "@sclk=%dMHz)\n",
+		    phydev->drv->name, dev_name(&phydev->dev), phydev->irq,
+		    MDC_CLK, mdc_div, sclk/1000000);
 
 	return 0;
 }
@@ -833,8 +830,7 @@ static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb)
 		       (--timeout_cnt))
 			udelay(1);
 		if (timeout_cnt == 0)
-			printk(KERN_ERR DRV_NAME
-					": fails to timestamp the TX packet\n");
+			pr_err("failed to timestamp the TX packet\n");
 		else {
 			struct skb_shared_hwtstamps shhwtstamps;
 			u64 ns;
@@ -1091,8 +1087,7 @@ static void bfin_mac_rx(struct net_device *dev)
 	 * we which case we simply drop the packet
 	 */
 	if (current_rx_ptr->status.status_word & RX_ERROR_MASK) {
-		printk(KERN_NOTICE DRV_NAME
-		       ": rx: receive error - packet dropped\n");
+		netdev_notice(dev, "rx: receive error - packet dropped\n");
 		dev->stats.rx_dropped++;
 		goto out;
 	}
@@ -1102,8 +1097,7 @@ static void bfin_mac_rx(struct net_device *dev)
 
 	new_skb = dev_alloc_skb(PKT_BUF_SZ + NET_IP_ALIGN);
 	if (!new_skb) {
-		printk(KERN_NOTICE DRV_NAME
-		       ": rx: low on mem - packet dropped\n");
+		pr_notice("rx: low on mem - packet dropped\n");
 		dev->stats.rx_dropped++;
 		goto out;
 	}
@@ -1226,7 +1220,7 @@ static int bfin_mac_enable(void)
 	int ret;
 	u32 opmode;
 
-	pr_debug("%s: %s\n", DRV_NAME, __func__);
+	pr_debug("%s\n", __func__);
 
 	/* Set RX DMA */
 	bfin_write_DMA1_NEXT_DESC_PTR(&(rx_list_head->desc_a));
@@ -1268,7 +1262,7 @@ static void bfin_mac_timeout(struct net_device *dev)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
 
-	pr_debug("%s: %s\n", dev->name, __func__);
+	netdev_dbg(dev, "%s\n", __func__);
 
 	bfin_mac_disable();
 
@@ -1335,7 +1329,7 @@ static void bfin_mac_set_multicast_list(struct net_device *dev)
 	u32 sysctl;
 
 	if (dev->flags & IFF_PROMISC) {
-		printk(KERN_INFO "%s: set to promisc mode\n", dev->name);
+		netdev_info(dev, "set to promisc mode\n");
 		sysctl = bfin_read_EMAC_OPMODE();
 		sysctl |= PR;
 		bfin_write_EMAC_OPMODE(sysctl);
@@ -1389,7 +1383,7 @@ static int bfin_mac_open(struct net_device *dev)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
 	int ret;
-	pr_debug("%s: %s\n", dev->name, __func__);
+	netdev_dbg(dev, "%s\n", __func__);
 
 	/*
 	 * Check that the address is valid.  If its not, refuse
@@ -1397,7 +1391,7 @@ static int bfin_mac_open(struct net_device *dev)
 	 * address using ifconfig eth0 hw ether xx:xx:xx:xx:xx:xx
 	 */
 	if (!is_valid_ether_addr(dev->dev_addr)) {
-		printk(KERN_WARNING DRV_NAME ": no valid ethernet hw addr\n");
+		netdev_warn(dev, "no valid ethernet hw addr\n");
 		return -EINVAL;
 	}
 
@@ -1415,7 +1409,7 @@ static int bfin_mac_open(struct net_device *dev)
 	ret = bfin_mac_enable();
 	if (ret)
 		return ret;
-	pr_debug("hardware init finished\n");
+	netdev_dbg(dev, "hardware init finished\n");
 
 	netif_start_queue(dev);
 	netif_carrier_on(dev);
@@ -1431,7 +1425,7 @@ static int bfin_mac_open(struct net_device *dev)
 static int bfin_mac_close(struct net_device *dev)
 {
 	struct bfin_mac_local *lp = netdev_priv(dev);
-	pr_debug("%s: %s\n", dev->name, __func__);
+	netdev_dbg(dev, "%s\n", __func__);
 
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
-- 
1.7.2.19.g9a302

^ permalink raw reply related

* [PATCH net-next 0/3] drivers/net/bfin_mac.c: Neatening
From: Joe Perches @ 2010-07-29 23:58 UTC (permalink / raw)
  To: Michael Hennerich
  Cc: Mike Frysinger, uclinux-dist-devel, netdev, linux-kernel

Ho-hum, broken up into pieces just for Mike Frysinger.

Still uncompiled, untested

Joe Perches (3):
  drivers/net/bfin_mac.c: Neatening
  drivers/net/bfin_mac.c: Use pr_<level>, netdev_<level>
  drivers/net/bfin_mac.c: Misc function cleanups, neatening

 drivers/net/bfin_mac.c |  329 ++++++++++++++++++++++++++----------------------
 1 files changed, 176 insertions(+), 153 deletions(-)

-- 
1.7.2.19.g9a302

^ permalink raw reply

* Re: [RFC PATCH v8 00/16] Provide a zero-copy method on KVM virtio-net.
From: Shirley Ma @ 2010-07-29 22:31 UTC (permalink / raw)
  To: xiaohui.xin; +Cc: netdev, kvm, linux-kernel, mst, mingo, davem, herbert, jdike
In-Reply-To: <1280402088-5849-1-git-send-email-xiaohui.xin@intel.com>

Hello Xiaohui,

On Thu, 2010-07-29 at 19:14 +0800, xiaohui.xin@intel.com wrote:
> The idea is simple, just to pin the guest VM user space and then
> let host NIC driver has the chance to directly DMA to it. 
> The patches are based on vhost-net backend driver. We add a device
> which provides proto_ops as sendmsg/recvmsg to vhost-net to
> send/recv directly to/from the NIC driver. KVM guest who use the
> vhost-net backend may bind any ethX interface in the host side to
> get copyless data transfer thru guest virtio-net frontend.

Since vhost-net already supports macvtap/tun backends, do you think
whether it's better to implement zero copy in macvtap/tun than inducing
a new media passthrough device here? 

> Our goal is to improve the bandwidth and reduce the CPU usage.
> Exact performance data will be provided later.

I did some vhost performance measurement over 10Gb ixgbe, and found that
in order to get consistent BW results, netperf/netserver, qemu, vhost
threads smp affinities are required.

Looking forward to these results for small message size comparison. For
large message size 10Gb ixgbe BW already reached by doing vhost smp
affinity w/i offloading support, we will see how much CPU utilization it
can be reduced. 

Please provide latency results as well. I did some experimental on
macvtap zero copy sendmsg, what I have found that get_user_pages latency
pretty high.

Thanks
Shirley





^ permalink raw reply

* pull request: wireless-next-2.6 2010-07-29
From: John W. Linville @ 2010-07-29 19:12 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA

Dave,

Yet another slew of changes intended for 2.6.36...

For the first time, this pull request includes a batch of bluetooth
stuff by way of Marcel.  Some upcoming developments are likely to
require more extensive integration between 802.11 and Bluetooth bits, so
Marcel's tree will be feeding wireless-next-2.6 for a while.

The rest is the usual stuff from the usual suspects -- mostly driver
updates with the usual strong showings from ath9k and iwlwifi, this time
joined by libertas in particular.

This is a "for-davem" branch, so hopefully there will be no pain for you
to pull this time. :-)

Please let me know if there are problems!

John

---

The following changes since commit 7f3e01fee41a322747db2d7574516d9fbd3785c0:

  net: bnx2x_cmn.c needs net/ip6_checksum.h for csum_ipv6_magic (2010-07-28 22:20:34 -0700)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6.git for-davem

Amitkumar Karwar (1):
      Bluetooth: Process interrupt in main thread of btmrvl driver as well

Christian Lamparter (1):
      cfg80211: fix dev <-> wiphy typo

Cyril Lacoux (1):
      Bluetooth: Added support for controller shipped with iMac i5

Dan Carpenter (6):
      ath9k: snprintf() returns largish values
      ath5k: snprintf() returns largish values
      mac80211: freeing the wrong variable
      wireless: remove unneeded variable from regulatory_hint_11d()
      libertas: precedence bug
      Bluetooth: Fix kfree() => kfree_skb() in hci_ath.c

Dan Williams (16):
      libertas: clean up MONITOR_MODE command
      libertas: clean up RSSI command
      libertas: convert 11D_DOMAIN_INFO to a direct command
      libertas: remove unused indirect TPC_CFG command leftovers
      libertas: remove unused Automatic Frequency Control command
      libertas: remove Beacon Control
      libertas: convert LED_GPIO_CTRL to a direct command
      libertas: convert register access to direct commands
      libertas: convert Mesh Blinding Table access to a direct command
      libertas: convert CMD_FWT_ACCESS to a direct command
      libertas: remove unused indirect command response handler
      libertas: convert PS_MODE to a direct command
      libertas: convert DEEP_SLEEP timer to a direct command
      libertas: kill unused lbs_prepare_and_send_command()
      libertas: rename lbs_get_cmd_ctrl_node() to lbs_get_free_cmd_node()
      libertas: remove unused cmd_pending waitq

Felix Fietkau (5):
      ath9k: fix yet another buffer leak in the tx aggregation code
      ath9k_hw: fix invalid extension channel noisefloor readings in HT20
      ath9k_hw: fix a small typo in the noisefloor calibration debug code
      ath9k_hw: simplify noisefloor calibration chainmask calculation
      mac80211: inform drivers about the off-channel status on channel changes

Gustavo F. Padovan (44):
      Bluetooth: Remove max_tx and tx_window module paramenters from L2CAP
      Bluetooth: Remove L2CAP Extended Features from Kconfig
      Bluetooth: Fix drop of packets with invalid req_seq/tx_seq
      Bluetooth: Fix bug with ERTM vars increment
      Bluetooth: Only check SAR bits if frame is an I-frame
      Bluetooth: Fix bug in l2cap_ertm_send() behavior
      Bluetooth: Proper shutdown ERTM when closing the channel
      Bluetooth: Fix L2CAP control bit field corruption
      Bluetooth: Stop ack_timer if ERTM enters in Local Busy or SREJ_SENT
      Bluetooth: Update buffer_seq before retransmit frames
      Bluetooth: Fix handle of received P-bit
      Bluetooth: Check the tx_window size on setsockopt
      Bluetooth: Check packet FCS earlier
      Bluetooth: Fix missing retransmission action with RR(P=1)
      Bluetooth: Fix ERTM error reporting to the userspace
      Bluetooth: Add debug output to ERTM code
      Bluetooth: Tweaks to l2cap_send_i_or_rr_or_rnr() flow
      Bluetooth: Change the way we set ERTM mode as mandatory
      Bluetooth: Disconnect the channel if we don't want the proposed mode
      Bluetooth: Prefer Basic Mode on receipt of ConfigReq
      Bluetooth: Actively send request for Basic Mode
      Bluetooth: Refuse ConfigRsp with different mode
      Bluetooth: Remove check for supported mode
      Bluetooth: Disconnect early if mode is not supported
      Bluetooth: Don't accept ConfigReq if we aren't in the BT_CONFIG state
      Bluetooth: Remove the send_lock spinlock from ERTM
      Bluetooth: Add backlog queue to ERTM code
      Bluetooth: Improve ERTM local busy handling
      Bluetooth: Send ConfigReq after send a ConnectionRsp
      Bluetooth: Fix bug in kzalloc allocation size
      Bluetooth: Keep code under column 80
      Bluetooth: Add Copyright notice to L2CAP
      Bluetooth: Update L2CAP version information
      Bluetooth: Add Google's copyright to L2CAP
      Bluetooth: Move bit-field variable in USB driver to data->flags
      Bluetooth: Fix typo in hci_event.c
      Bluetooth: Enable L2CAP Extended features by default
      Bluetooth: Use __packed annotation
      Bluetooth: Use __packed annotation for drivers
      Bluetooth: Fix permission of hci_ath.c
      Bluetooth: Test 'count' value before enter the loop
      Bluetooth: Use hci_recv_stream_fragment() in UART driver
      Bluetooth: Add __init and __exit marks to UART drivers
      Bluetooth: Add __init and __exit marks to RFCOMM

Ivo van Doorn (1):
      rt2x00: Fix regression for rt2500pci

Joe Perches (5):
      Bluetooth: Remove unnecessary casts of private_data in drivers
      include/net/cfg80211.h: Add wiphy_<level> printk equivalents
      drivers/net/wireless: Use wiphy_<level>
      drivers/net/wireless/at76c50x-usb.c: Neaten macros
      wireless: Convert wiphy_debug macro to function

Johan Hedberg (4):
      Bluetooth: Add blacklist support for incoming connections
      Bluetooth: Add debugfs support for showing the blacklist
      Bluetooth: Add missing HCIUARTGETDEVICE ioctl to compat_ioctl.c
      Bluetooth: Add HCIUARTSETFLAGS and HCIUARTGETFLAGS ioctls

Johannes Berg (8):
      iwlagn: fix firmware loading TLV error path
      iwlwifi: make iwl_mac_beacon_update static
      iwlwifi: reduce beacon fill conditions
      iwlwifi: remove spurious semicolons
      mac80211: remove bogus rcu_read_lock()
      cfg80211: fix IBSS default management key
      mac80211: fix sta assignment
      mac80211: allow drivers to request DTIM period

John W. Linville (24):
      iwlagn: use __packed on new structure definitions
      wl1251: fix sparse-generated warnings
      rtl8180: improve signal reporting for actual rtl8180 hardware
      rtl8180: silence "dubious: x | !y" sparse warning
      MAINTAINERS: mark prism54 obsolete
      MAINTAINERS: orphan the raylink wireless driver
      MAINTAINERS: orphan the zd1201 wireless driver
      MAINTAINERS: remove entry for wavelan
      iwlwifi: assume vif is NULL for internal scans and non-NULL otherwise
      minstrel_ht: remove unnecessary NULL check in minstrel_ht_update_caps
      minstrel: don't complain about feedback for unrequested rates
      lib80211: remove unused host_build_iv option
      Merge branch 'master' of git://git.kernel.org/.../holtmann/bluetooth-next-2.6
      p54: Added get_survey callback in order to get channel noise
      Merge branch 'master' of git://git.kernel.org/.../holtmann/bluetooth-next-2.6
      ath9k: enable serialize_regmode for non-PCIE AR9160
      mwl8k: add get_survey callback in order to get channel noise
      ar9170: add get_survey callback in order to get channel noise
      wl1251: add get_survey callback in order to get channel noise
      libertas_tf: add get_survey callback in order to get channel noise
      wl1271: add get_survey callback in order to get channel noise
      wl1251: update hw/fw version info in wiphy struct
      wl1271: update hw/fw version info in wiphy struct
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless-next-2.6 into for-davem

Jouni Malinen (1):
      mac80211: Fix key freeing to handle unlinked keys

João Paulo Rechi Vita (8):
      Bluetooth: Fix SREJ_QUEUE corruption in L2CAP
      Bluetooth: Fix l2cap_sock_connect error return.
      Bluetooth: Make l2cap_streaming_send() void.
      Bluetooth: Fix error return value on sendmsg.
      Bluetooth: Fix error return value on sendmsg.
      Bluetooth: Fix error return for l2cap_connect_rsp().
      Bluetooth: Fix error value for wrong FCS.
      Bluetooth: Fix error return on L2CAP-HCI interface.

Julia Lawall (3):
      Bluetooth: Use kmemdup for drivers
      Bluetooth: Use kzalloc for drivers
      drivers/net/wireless/wl12xx: Use kmemdup

Justin P. Mattock (1):
      Bluetooth: Fix warning: variable 'tty' set but not used

Kulikov Vasiliy (1):
      Bluetooth: Silence warning in btmrvl SDIO driver

Lennert Buytenhek (1):
      mwl8k: change maintenance status

Luis R. Rodriguez (2):
      ath9k: remove the two wiphys scanning at the same time message
      Revert "mac80211: fix sw scan bracketing"

Marcel Holtmann (1):
      Bluetooth: Defer SCO setup if mode change is pending

Nathan Holstein (1):
      Bluetooth: Fix bug with ERTM minimum packet length

Ron Shaffer (2):
      Bluetooth: Remove extraneous white space
      Bluetooth: Reassigned copyright to Code Aurora Forum

Senthil Balasubramanian (4):
      ath9k: Introduce bit masks for valid and valid_single_stream.
      ath9k: Add three stream rate control support for AR938X.
      ath9k: Fix incorrect user ratekbs of MCS15 ShortGI
      ath9k: remove unused base_index from rate table.

Stanislaw Gruszka (4):
      rt2500usb: write keys to proper registers
      rt2500usb: truly disable encryption when initialize
      rt2500usb: disallow to set WEP key with non zero index
      iwlwifi: fix scan abort

Sujith (1):
      mac80211: Don't set per-BSS QoS for monitor interfaces

Suraj Sumangala (5):
      Bluetooth: Add one more buffer for HCI stream reassembly
      Bluetooth: Implement hci_reassembly helper to reassemble RX packets
      Bluetooth: Modified hci_recv_fragment() to use hci_reassembly helper
      Bluetooth: Implemented HCI frame reassembly for RX from stream
      Bluetooth: Support for Atheros AR300x serial chip

Vasanthakumar Thiagarajan (1):
      ath9k: Fix inconsistency between txq->stopped and the actual queue state

Wey-Yi Guy (6):
      iwlagn: add statistic notification structure for WiFi/BT devices
      iwlagn: add .cfg flag to idenfity the need for bt statistics
      iwlagn: Add support for bluetooth statistics notification
      iwlagn: add bluetooth stats to debugfs
      iwlwifi: add TLV to specify the size of phy calibration table
      iwlwifi: read multiple MAC addresses

Yuri Ershov (3):
      mac80211: Put some code under MESH macro
      nl80211: Fix memory leaks
      cfg80211: Update of regulatory request initiator handling

 MAINTAINERS                                    |   20 +-
 drivers/bluetooth/Kconfig                      |   12 +
 drivers/bluetooth/Makefile                     |    1 +
 drivers/bluetooth/bcm203x.c                    |    3 +-
 drivers/bluetooth/bpa10x.c                     |    2 +-
 drivers/bluetooth/btmrvl_debugfs.c             |    2 +-
 drivers/bluetooth/btmrvl_drv.h                 |    5 +-
 drivers/bluetooth/btmrvl_main.c                |    5 +-
 drivers/bluetooth/btmrvl_sdio.c                |  111 ++--
 drivers/bluetooth/btusb.c                      |   13 +-
 drivers/bluetooth/dtl1_cs.c                    |    2 +-
 drivers/bluetooth/hci_ath.c                    |  235 ++++++++
 drivers/bluetooth/hci_bcsp.c                   |    4 +-
 drivers/bluetooth/hci_h4.c                     |  107 +----
 drivers/bluetooth/hci_ldisc.c                  |   20 +-
 drivers/bluetooth/hci_ll.c                     |    6 +-
 drivers/bluetooth/hci_uart.h                   |   15 +-
 drivers/net/wireless/adm8211.c                 |   53 +-
 drivers/net/wireless/at76c50x-usb.c            |  168 +++---
 drivers/net/wireless/ath/ar9170/cmd.c          |    7 +-
 drivers/net/wireless/ath/ar9170/led.c          |    4 +-
 drivers/net/wireless/ath/ar9170/main.c         |  191 ++++---
 drivers/net/wireless/ath/ar9170/phy.c          |    8 +-
 drivers/net/wireless/ath/ath5k/debug.c         |   18 +
 drivers/net/wireless/ath/ath9k/ahb.c           |    7 +-
 drivers/net/wireless/ath/ath9k/ar5008_phy.c    |    3 +
 drivers/net/wireless/ath/ath9k/ar9002_phy.c    |    6 +-
 drivers/net/wireless/ath/ath9k/ar9003_phy.c    |    3 +
 drivers/net/wireless/ath/ath9k/ath9k.h         |    2 +-
 drivers/net/wireless/ath/ath9k/calib.c         |   21 +-
 drivers/net/wireless/ath/ath9k/htc_drv_main.c  |    9 +
 drivers/net/wireless/ath/ath9k/hw.c            |    3 +-
 drivers/net/wireless/ath/ath9k/main.c          |   13 +-
 drivers/net/wireless/ath/ath9k/pci.c           |    7 +-
 drivers/net/wireless/ath/ath9k/rc.c            |  601 ++++++++++++--------
 drivers/net/wireless/ath/ath9k/rc.h            |   89 +++-
 drivers/net/wireless/ath/ath9k/virtual.c       |    6 +-
 drivers/net/wireless/ath/ath9k/xmit.c          |   20 +-
 drivers/net/wireless/ipw2x00/libipw.h          |    1 -
 drivers/net/wireless/ipw2x00/libipw_tx.c       |   16 +-
 drivers/net/wireless/ipw2x00/libipw_wx.c       |    2 +-
 drivers/net/wireless/iwlwifi/iwl-1000.c        |    1 +
 drivers/net/wireless/iwlwifi/iwl-4965.c        |    5 +-
 drivers/net/wireless/iwlwifi/iwl-5000.c        |    3 +-
 drivers/net/wireless/iwlwifi/iwl-6000.c        |    7 +
 drivers/net/wireless/iwlwifi/iwl-agn-calib.c   |   64 ++-
 drivers/net/wireless/iwlwifi/iwl-agn-debugfs.c |  225 ++++++--
 drivers/net/wireless/iwlwifi/iwl-agn-debugfs.h |    7 +
 drivers/net/wireless/iwlwifi/iwl-agn-hcmd.c    |    4 +-
 drivers/net/wireless/iwlwifi/iwl-agn-lib.c     |    7 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rx.c      |  167 ++++--
 drivers/net/wireless/iwlwifi/iwl-agn.c         |  141 +++--
 drivers/net/wireless/iwlwifi/iwl-calib.h       |    6 +-
 drivers/net/wireless/iwlwifi/iwl-commands.h    |   55 ++-
 drivers/net/wireless/iwlwifi/iwl-core.c        |   65 +--
 drivers/net/wireless/iwlwifi/iwl-core.h        |    4 +-
 drivers/net/wireless/iwlwifi/iwl-debugfs.c     |   13 +
 drivers/net/wireless/iwlwifi/iwl-dev.h         |   16 +
 drivers/net/wireless/iwlwifi/iwl-eeprom.h      |    1 +
 drivers/net/wireless/iwlwifi/iwl-scan.c        |   18 +-
 drivers/net/wireless/iwlwifi/iwl3945-base.c    |   22 +-
 drivers/net/wireless/libertas/cfg.c            |  197 +------
 drivers/net/wireless/libertas/cfg.h            |    6 -
 drivers/net/wireless/libertas/cmd.c            |  718 ++++++++++--------------
 drivers/net/wireless/libertas/cmd.h            |   25 +-
 drivers/net/wireless/libertas/cmdresp.c        |  179 +------
 drivers/net/wireless/libertas/debugfs.c        |   67 +--
 drivers/net/wireless/libertas/decl.h           |    5 -
 drivers/net/wireless/libertas/defs.h           |   18 -
 drivers/net/wireless/libertas/dev.h            |    6 -
 drivers/net/wireless/libertas/host.h           |  142 ++---
 drivers/net/wireless/libertas/if_usb.c         |    4 +-
 drivers/net/wireless/libertas/main.c           |   35 +-
 drivers/net/wireless/libertas/mesh.c           |  216 ++++++--
 drivers/net/wireless/libertas/mesh.h           |   14 +-
 drivers/net/wireless/libertas/tx.c             |    2 +-
 drivers/net/wireless/libertas_tf/libertas_tf.h |    3 +
 drivers/net/wireless/libertas_tf/main.c        |   18 +
 drivers/net/wireless/mac80211_hwsim.c          |   99 ++--
 drivers/net/wireless/mwl8k.c                   |  154 +++---
 drivers/net/wireless/orinoco/cfg.c             |    5 +-
 drivers/net/wireless/p54/eeprom.c              |   76 ++--
 drivers/net/wireless/p54/fwio.c                |   53 +-
 drivers/net/wireless/p54/led.c                 |    8 +-
 drivers/net/wireless/p54/main.c                |   17 +
 drivers/net/wireless/p54/p54pci.c              |    3 +-
 drivers/net/wireless/p54/txrx.c                |   36 +-
 drivers/net/wireless/rt2x00/rt2500usb.c        |   11 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c        |   19 +-
 drivers/net/wireless/rtl818x/rtl8180_dev.c     |   35 +-
 drivers/net/wireless/rtl818x/rtl8180_grf5101.c |   12 +-
 drivers/net/wireless/rtl818x/rtl8180_max2820.c |   19 +-
 drivers/net/wireless/rtl818x/rtl8180_rtl8225.c |    5 +-
 drivers/net/wireless/rtl818x/rtl8180_sa2400.c  |   28 +-
 drivers/net/wireless/rtl818x/rtl8187_dev.c     |   11 +-
 drivers/net/wireless/rtl818x/rtl8187_rtl8225.c |    8 +-
 drivers/net/wireless/rtl818x/rtl818x.h         |    1 +
 drivers/net/wireless/wl12xx/wl1251.h           |    3 +
 drivers/net/wireless/wl12xx/wl1251_boot.c      |    8 +-
 drivers/net/wireless/wl12xx/wl1251_cmd.h       |   12 +-
 drivers/net/wireless/wl12xx/wl1251_main.c      |   22 +
 drivers/net/wireless/wl12xx/wl1251_rx.c        |    6 +
 drivers/net/wireless/wl12xx/wl1251_tx.c        |   10 +-
 drivers/net/wireless/wl12xx/wl1251_tx.h        |    8 +-
 drivers/net/wireless/wl12xx/wl1271.h           |    3 +
 drivers/net/wireless/wl12xx/wl1271_main.c      |   32 +-
 drivers/net/wireless/wl12xx/wl1271_rx.c        |    7 +
 fs/compat_ioctl.c                              |    9 +-
 include/net/bluetooth/bluetooth.h              |    3 +-
 include/net/bluetooth/hci.h                    |  187 ++++---
 include/net/bluetooth/hci_core.h               |   30 +-
 include/net/bluetooth/l2cap.h                  |   34 +-
 include/net/bluetooth/rfcomm.h                 |   14 +-
 include/net/cfg80211.h                         |   65 +++-
 include/net/lib80211.h                         |    3 -
 include/net/mac80211.h                         |   12 +-
 net/bluetooth/Kconfig                          |   13 -
 net/bluetooth/hci_conn.c                       |   34 +-
 net/bluetooth/hci_core.c                       |  204 +++++--
 net/bluetooth/hci_event.c                      |   39 +-
 net/bluetooth/hci_sock.c                       |   90 +++
 net/bluetooth/hci_sysfs.c                      |   38 ++
 net/bluetooth/l2cap.c                          |  669 ++++++++++++++---------
 net/bluetooth/rfcomm/sock.c                    |    2 +-
 net/bluetooth/rfcomm/tty.c                     |    4 +-
 net/mac80211/cfg.c                             |   13 +-
 net/mac80211/ieee80211_i.h                     |    1 +
 net/mac80211/key.c                             |   13 +-
 net/mac80211/key.h                             |    3 +-
 net/mac80211/main.c                            |    3 +
 net/mac80211/mlme.c                            |   32 +-
 net/mac80211/rc80211_minstrel.c                |    1 -
 net/mac80211/rc80211_minstrel_ht.c             |    4 +-
 net/mac80211/scan.c                            |    8 +-
 net/mac80211/sta_info.c                        |    2 +-
 net/mac80211/tx.c                              |   19 +-
 net/mac80211/util.c                            |    8 +-
 net/mac80211/work.c                            |   43 ++
 net/wireless/core.c                            |   49 ++
 net/wireless/ibss.c                            |    4 +-
 net/wireless/lib80211_crypt_ccmp.c             |    1 -
 net/wireless/lib80211_crypt_tkip.c             |    1 -
 net/wireless/lib80211_crypt_wep.c              |    1 -
 net/wireless/nl80211.c                         |    2 +
 net/wireless/reg.c                             |    8 +-
 145 files changed, 3858 insertions(+), 2826 deletions(-)
 create mode 100644 drivers/bluetooth/hci_ath.c

Omnibus patch is available here:

	http://www.kernel.org/pub/linux/kernel/people/linville/wireless-next-2.6-2010-07-29.patch.bz2

-- 
John W. Linville		Someday the world will need a hero, and you
linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org			might be all we have.  Be ready.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [PATCH V4] Export SMBIOS provided firmware instance and label to sysfs
From: Narendra_K @ 2010-07-29 18:58 UTC (permalink / raw)
  To: netdev, linux-hotplug, linux-pci
  Cc: Matt_Domsch, Charles_Rose, Jordan_Hargrave, Vijay_Nijhawan,
	jbarnes
In-Reply-To: <20100726105650.GA19738@auslistsprd01.us.dell.com>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-
> owner@vger.kernel.org] On Behalf Of Narendra K
> Sent: Monday, July 26, 2010 4:27 PM
> To: netdev@vger.kernel.org; linux-hotplug@vger.kernel.org; linux-
> pci@vger.kernel.org
> Cc: Domsch, Matt; Rose, Charles; Hargrave, Jordan; Nijhawan, Vijay
> Subject: [PATCH V4] Export SMBIOS provided firmware instance and label
> to sysfs
> 
> Hello,
> 
> V3 -> V4:
> 
> Updated the contact field in Documentation/ABI directory.
> 
> Please consider for inclusion -
> 
> From: Narendra K <narendra_k@dell.com>
> Subject: [PATCH] Export SMBIOS provided firmware instance and label to
> sysfs
> 
> This patch exports SMBIOS provided firmware instance and label
> of onboard pci devices to sysfs
> 
> Signed-off-by: Jordan Hargrave <jordan_hargrave@dell.com>
> Signed-off-by: Narendra K <narendra_k@dell.com>

Jesse,

Please let us know if there are any concerns with this patch. If the
patch is acceptable,
please consider it for inclusion.

With regards,
Narendra K




^ permalink raw reply

* Re: [Uclinux-dist-devel] [PATCH 1/2] net: dsa: introduce STPID switch tagging handling code
From: Mike Frysinger @ 2010-07-29 17:50 UTC (permalink / raw)
  To: Lennert Buytenhek
  Cc: Karl Beldan, netdev, uclinux-dist-devel, David S. Miller
In-Reply-To: <AANLkTinPtd2xXzq0MpR517OsbZWUZjdp25x8FndhQ2CP@mail.gmail.com>

On Wed, Jul 21, 2010 at 12:08, Mike Frysinger wrote:
> On Wed, Jul 21, 2010 at 11:35, Lennert Buytenhek wrote:
>> On Wed, Jul 21, 2010 at 11:29:30AM -0400, Mike Frysinger wrote:
>>> >> +     source_port = dsa_header[1] & 0x03;
>>> >> +     if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
>>> >> +             goto out_drop;
>>> >> +
>>> >> +     if (((dsa_header[0] & ETH_P_8021QH) == ETH_P_8021QH) &&
>>> >
>>> > This is bogus -- what it does is:
>>> >
>>> >        if ((dsa_header[0] & 0x81) == 0x81)
>>> >
>>> > It doesn't look like you need to mask here at all.
>>>
>>> where does it say dsa_header[0] will always have 0x81 set ?
>>
>> Eh?
>>
>> This code is checking whether the packet has a STPID tag on it or not.
>> A STPID tag exists if the first 12 nibbles are 0x810.
>>
>> You are checking whether the first 8 nibbles of this are equal to 0x81
>> by doing:
>>
>>        if ((byte & 0x81) == 0x81)
>>
>> What if the first byte is 0x93?  Or 0xc5?
>
> that was my point.  should it be masking or doing a raw compare ?

and the answer is ... ?  so i can send an updated patch ;)
-mike

^ permalink raw reply

* Re: [PATCH net-next] bonding: take rtnl in bond_loadbalance_arp_mon
From: Jay Vosburgh @ 2010-07-29 17:49 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev
In-Reply-To: <20100729011355.GV7497@gospo.rdu.redhat.com>

Andy Gospodarek <andy@greyhouse.net> wrote:

>On Wed, Jul 28, 2010 at 02:39:49PM -0700, Jay Vosburgh wrote:
>> Andy Gospodarek <andy@greyhouse.net> wrote:
>> 
>> >With the latest code in net-next-2.6 the following (and similar) are
>> >spewed when using arp monitoring and balance-alb.
>> 
>> 	Does the ARP monitor function correctly for balance-alb?  My
>> recollection is that the ARP monitor probes interfere with the tailored
>> ARP messages that balance-alb sends.
>
>It seems to work fine here on a few tries (I only use sysfs for
>configuration anymore), but it might be blind luck that the addresses
>chosen are hashing out correctly to make arp monitoring work.

	I haven't tried it in a long time, but I think the problem for
balance-alb also had to do with snooping switches updating the MAC table
from the arp monitor traffic instead of (or in addition to) the
"authentic" ARPs.  I don't remember if it caused actual communication
breaks, or just messed up the traffic balance.

>> The bond_check_params function
>> disallows setting arp_interval (it forces miimon on).  I suspect this
>> nuance was missed when setting up the sysfs code, but if it does work,
>> then perhaps it is too strict.
>
>You are correct, it does.  It is clear that some checks should be added
>to the sysfs code and it also seems like some work should be done to
>more clearly define what modes support which form of link monitoring (it
>doesn't seem to me like balance-rr should support can monitoring in it's
>current implementation, but there is no explicit code to check for it in
>the sysfs-layer or bond_check_params).

	Presumably you mean "balance-rr should support ARP monitoring,"
and you're right, the whole "loadbalance" ARP monitor is pretty dodgy in
general (for balance-rr and balance-xor specifically).  Since both of
those modes are intended to interface with etherchannel, the validity of
the ARP monitor's decisions are entirely dependent upon how the switch
balances incoming traffic.  If the switch does a good job and hits all
of the ports, then it'll "work."  There's a comment to this effect in
bond_loadbalance_arp_mon:

                /* note: if switch is in round-robin mode, all links
                 * must tx arp to ensure all links rx an arp - otherwise
                 * links may oscillate or not come up at all; if switch is
                 * in something like xor mode, there is nothing we can
                 * do - all replies will be rx'ed on same link causing slaves
                 * to be unstable during low/no traffic periods

	As an added tidbit, I'm not aware of any switch that has a
round-robin balance policy for its etherchannel implementation.  That
comment predates my involvement in bonding, so maybe back then there
were some 10 Mb/sec switches that did round robin.

	This also might have been useful for one switch configuration
that was used at the time, with multiple switches not running
etherchannel were connected up such that each bonding slave was
connected to a discrete switch.  The switches were not interconnected,
so the only etherchannel was on the bonding hosts, which ran balance-rr.
This is discussed a bit in the bonding.txt, 12.2, complete with ASCII
art.  It was pretty snazzy in the "one packet per interrupt" days, but
badly reorders traffic with modern hardware (anything with packet
coalescing; NAPI probably breaks it, too, now that I think about it).

>> 	As I recall, I had deliberately left acquiring rtnl out of the
>> loadbalance_arp_mon function, since none of the modes that used it
>> required rtnl for failover.
>
>Understood.
>
>Based on your comments, at least something like the following should
>probably be done.

	I agree.

>[PATCH net-next] bonding: prevent sysfs from allowing arp monitoring with alb/tlb
>
>When using module options arp monitoring and balance-alb/balance-tlb
>are mutually exclusive options.  Anytime balance-alb/balance-tlb are
>enabled mii monitoring is forced to 100ms if not set.  When configuring
>via sysfs no checking is currently done.
>
>Handling these cases with sysfs has to be done a bit differently because
>we do not have all configuration information available at once.  This
>patch will not allow a mode change to balance-alb/balance-tlb if
>arp_interval is already non-zero.  It will also not allow the user to
>set a non-zero arp_interval value if the mode is already set to
>balance-alb/balance-tlb.  They are still mutually exclusive on a
>first-come, first serve basis.
>
>Tested with initscripts on Fedora and manual setting via sysfs.
>
>Signed-off-by: Andy Gospodarek <gospo@redhat.com>

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>

>---
> drivers/net/bonding/bond_sysfs.c |   37 +++++++++++++++++++++++++------------
> 1 files changed, 25 insertions(+), 12 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>index 1a99764..c311aed 100644
>--- a/drivers/net/bonding/bond_sysfs.c
>+++ b/drivers/net/bonding/bond_sysfs.c
>@@ -313,19 +313,26 @@ static ssize_t bonding_store_mode(struct device *d,
> 		       bond->dev->name, (int)strlen(buf) - 1, buf);
> 		ret = -EINVAL;
> 		goto out;
>-	} else {
>-		if (bond->params.mode == BOND_MODE_8023AD)
>-			bond_unset_master_3ad_flags(bond);
>+	}
>+	if ((new_value == BOND_MODE_ALB ||
>+	     new_value == BOND_MODE_TLB) &&
>+	    bond->params.arp_interval) {
>+		pr_err("%s: %s mode is incompatible with arp monitoring.\n",
>+		       bond->dev->name, bond_mode_tbl[new_value].modename);
>+		ret = -EINVAL;
>+		goto out;
>+	}
>+	if (bond->params.mode == BOND_MODE_8023AD)
>+		bond_unset_master_3ad_flags(bond);
>
>-		if (bond->params.mode == BOND_MODE_ALB)
>-			bond_unset_master_alb_flags(bond);
>+	if (bond->params.mode == BOND_MODE_ALB)
>+		bond_unset_master_alb_flags(bond);
>
>-		bond->params.mode = new_value;
>-		bond_set_mode_ops(bond, bond->params.mode);
>-		pr_info("%s: setting mode to %s (%d).\n",
>-			bond->dev->name, bond_mode_tbl[new_value].modename,
>-		       new_value);
>-	}
>+	bond->params.mode = new_value;
>+	bond_set_mode_ops(bond, bond->params.mode);
>+	pr_info("%s: setting mode to %s (%d).\n",
>+		bond->dev->name, bond_mode_tbl[new_value].modename,
>+		new_value);
> out:
> 	return ret;
> }
>@@ -510,7 +517,13 @@ static ssize_t bonding_store_arp_interval(struct device *d,
> 		ret = -EINVAL;
> 		goto out;
> 	}
>-
>+	if (bond->params.mode == BOND_MODE_ALB ||
>+	    bond->params.mode == BOND_MODE_TLB) {
>+		pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n",
>+			bond->dev->name, bond->dev->name);
>+		ret = -EINVAL;
>+		goto out;
>+	}
> 	pr_info("%s: Setting ARP monitoring interval to %d.\n",
> 		bond->dev->name, new_value);
> 	bond->params.arp_interval = new_value;
>-- 
>1.7.0.1
>

^ permalink raw reply

* Re: [PATCH] sky2: Code style fixes
From: Stephen Hemminger @ 2010-07-29 17:27 UTC (permalink / raw)
  To: Mike McCormack, David Miller; +Cc: netdev
In-Reply-To: <4C51837C.4020505@ring3k.org>

On Thu, 29 Jul 2010 22:34:52 +0900
Mike McCormack <mikem@ring3k.org> wrote:

> Fix selected style problems reported by checkpatch.
> 
> Signed-off-by: Mike McCormack <mikem@ring3k.org>

Acked-by: Stephen Hemminger <shemminger@vyatta.com>

-- 

^ permalink raw reply

* [PATCH -next] net: ks8842 depends on DMA_ENGINE
From: Randy Dunlap @ 2010-07-29 17:14 UTC (permalink / raw)
  To: Stephen Rothwell, davem; +Cc: linux-next, LKML, netdev
In-Reply-To: <20100729143211.9e7edaed.sfr@canb.auug.org.au>

From: Randy Dunlap <randy.dunlap@oracle.com>

ks8842 uses dma channel functions, so it should depend on DMA_ENGINE.

ERROR: "__dma_request_channel" [drivers/net/ks8842.ko] undefined!
ERROR: "dma_release_channel" [drivers/net/ks8842.ko] undefined!

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 drivers/net/Kconfig |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- linux-next-20100729.orig/drivers/net/Kconfig
+++ linux-next-20100729/drivers/net/Kconfig
@@ -1751,7 +1751,7 @@ config TLAN
 
 config KS8842
 	tristate "Micrel KSZ8841/42 with generic bus interface"
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && DMA_ENGINE
 	help
 	 This platform driver is for KSZ8841(1-port) / KS8842(2-port)
 	 ethernet switch chip (managed, VLAN, QoS) from Micrel or

^ permalink raw reply

* Re: [Uclinux-dist-devel] [PATCH net-next] drivers/net/bfin_mac.c: Use pr_fmt, netdev_<level>
From: Mike Frysinger @ 2010-07-29 16:24 UTC (permalink / raw)
  To: Joe Perches; +Cc: Michael Hennerich, uclinux-dist-devel, LKML, netdev
In-Reply-To: <AANLkTimHYS8BRBbomxwyqwayu8eyp+k6TurtsniNTMfs@mail.gmail.com>

On Wed, Jul 28, 2010 at 11:36, Mike Frysinger wrote:
> On Wed, Jul 28, 2010 at 06:00, Joe Perches wrote:
>> On Wed, 2010-07-28 at 03:50 -0400, Mike Frysinger wrote:
>>> On Tue, Jul 27, 2010 at 15:22, Joe Perches wrote:
>>> > $ ./scripts/checkpatch.pl -f drivers/net/bfin_mac.c | grep "^total:"
>>> > total: 2 errors, 25 warnings, 1723 lines checked
>>> > $ ./scripts/checkpatch.pl -f drivers/net/bfin_mac.c | grep "^total:"
>>> > total: 0 errors, 0 warnings, 1743 lines checked
>>> i dislike the mixing of whitespace and useful changes
>>> if they were split, and they worked, then i wouldnt have a problem with them
>>
>> Is that a nak or a dislike?
>
> if you arent going to bother fixing things, then i guess it's a NAK ;)

to be clear, i dont have a problem with the direction you're going
with things, just the steps to get there.  i dont like throwing up
"NAK" at postings because that often sounds like a "f-you!" in my
head.  i'd rather suggest changes and then ACK the updated patch(es).
-mike

^ permalink raw reply

* [PATCH net-next 4/4] be2net: fix to avoid sending get_stats request if one is already being processed.
From: Ajit Khaparde @ 2010-07-29 16:18 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

GET_STATS request uses the same memory region as the response.
If a new request for get stats is fired before the response for
the previous get_stats request is received, the response will
corrupt the new request, causing the f/w to misbehave.

Signed-off-by: Somnath K <somnathk@serverengines.com>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be.h      |    1 +
 drivers/net/benet/be_cmds.c |    2 ++
 drivers/net/benet/be_main.c |    3 ++-
 3 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h
index 5e6f581..99197bd 100644
--- a/drivers/net/benet/be.h
+++ b/drivers/net/benet/be.h
@@ -289,6 +289,7 @@ struct be_adapter {
 	u32 rx_fc;		/* Rx flow control */
 	u32 tx_fc;		/* Tx flow control */
 	bool ue_detected;
+	bool stats_ioctl_sent;
 	int link_speed;
 	u8 port_type;
 	u8 transceiver;
diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index 7fd860d..3d30549 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -75,6 +75,7 @@ static int be_mcc_compl_process(struct be_adapter *adapter,
 			be_dws_le_to_cpu(&resp->hw_stats,
 						sizeof(resp->hw_stats));
 			netdev_stats_update(adapter);
+			adapter->stats_ioctl_sent = false;
 		}
 	} else if ((compl_status != MCC_STATUS_NOT_SUPPORTED) &&
 		   (compl->tag0 != OPCODE_COMMON_NTWK_MAC_QUERY)) {
@@ -951,6 +952,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd)
 	sge->len = cpu_to_le32(nonemb_cmd->size);
 
 	be_mcc_notify(adapter);
+	adapter->stats_ioctl_sent = true;
 
 err:
 	spin_unlock_bh(&adapter->mcc_lock);
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index e4a8ae3..74e146f 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1801,7 +1801,8 @@ static void be_worker(struct work_struct *work)
 	struct be_adapter *adapter =
 		container_of(work, struct be_adapter, work.work);
 
-	be_cmd_get_stats(adapter, &adapter->stats.cmd);
+	if (!adapter->stats_ioctl_sent)
+		be_cmd_get_stats(adapter, &adapter->stats.cmd);
 
 	/* Set EQ delay */
 	be_rx_eqd_update(adapter);
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH net-next 3/4] be2net: change to show correct physical link status
From: Ajit Khaparde @ 2010-07-29 16:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

link status is wrongly displayed under certain circumstances.
This change fixes it.

Signed-off-by: Somnath K <somnathk@serverengines.com>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be_ethtool.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c
index c0ade24..cd16243 100644
--- a/drivers/net/benet/be_ethtool.c
+++ b/drivers/net/benet/be_ethtool.c
@@ -322,10 +322,11 @@ static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 	int status;
 	u16 intf_type;
 
-	if (adapter->link_speed < 0) {
+	if ((adapter->link_speed < 0) || (!(netdev->flags & IFF_UP))) {
 		status = be_cmd_link_status_query(adapter, &link_up,
 						&mac_speed, &link_speed);
 
+		be_link_status_update(adapter, link_up);
 		/* link_speed is in units of 10 Mbps */
 		if (link_speed) {
 			ecmd->speed = link_speed*10;
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH net-next 2/4] be2net: add code to dump registers for debug
From: Ajit Khaparde @ 2010-07-29 16:16 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

when the BE device becomes unresponsive, dump the registers to help debugging

Signed-off-by: Somnath K <somnathk@serverengines.com>
Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be.h      |    1 +
 drivers/net/benet/be_cmds.c |    1 +
 drivers/net/benet/be_cmds.h |    1 +
 drivers/net/benet/be_hw.h   |   10 ++++
 drivers/net/benet/be_main.c |  127 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 140 insertions(+), 0 deletions(-)

diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h
index e06369c..5e6f581 100644
--- a/drivers/net/benet/be.h
+++ b/drivers/net/benet/be.h
@@ -288,6 +288,7 @@ struct be_adapter {
 	u32 function_mode;
 	u32 rx_fc;		/* Rx flow control */
 	u32 tx_fc;		/* Tx flow control */
+	bool ue_detected;
 	int link_speed;
 	u8 port_type;
 	u8 transceiver;
diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index 6eaf8a3..7fd860d 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -206,6 +206,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
 
 		if (msecs > 4000) {
 			dev_err(&adapter->pdev->dev, "mbox poll timed out\n");
+			be_dump_ue(adapter);
 			return -1;
 		}
 
diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h
index 036531c..bdc10a2 100644
--- a/drivers/net/benet/be_cmds.h
+++ b/drivers/net/benet/be_cmds.h
@@ -992,4 +992,5 @@ extern int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
 extern int be_cmd_get_phy_info(struct be_adapter *adapter,
 		struct be_dma_mem *cmd);
 extern int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain);
+extern void be_dump_ue(struct be_adapter *adapter);
 
diff --git a/drivers/net/benet/be_hw.h b/drivers/net/benet/be_hw.h
index 0683967..6c8f9bb 100644
--- a/drivers/net/benet/be_hw.h
+++ b/drivers/net/benet/be_hw.h
@@ -56,6 +56,16 @@
 #define PCICFG_PM_CONTROL_OFFSET		0x44
 #define PCICFG_PM_CONTROL_MASK			0x108	/* bits 3 & 8 */
 
+/********* Online Control Registers *******/
+#define PCICFG_ONLINE0				0xB0
+#define PCICFG_ONLINE1				0xB4
+
+/********* UE Status and Mask Registers ***/
+#define PCICFG_UE_STATUS_LOW			0xA0
+#define PCICFG_UE_STATUS_HIGH			0xA4
+#define PCICFG_UE_STATUS_LOW_MASK		0xA8
+#define PCICFG_UE_STATUS_HI_MASK		0xAC
+
 /********* ISR0 Register offset **********/
 #define CEV_ISR0_OFFSET 			0xC18
 #define CEV_ISR_SIZE				4
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index e72b482..e4a8ae3 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -40,6 +40,76 @@ static DEFINE_PCI_DEVICE_TABLE(be_dev_ids) = {
 	{ 0 }
 };
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
+/* UE Status Low CSR */
+static char *ue_status_low_desc[] = {
+	"CEV",
+	"CTX",
+	"DBUF",
+	"ERX",
+	"Host",
+	"MPU",
+	"NDMA",
+	"PTC ",
+	"RDMA ",
+	"RXF ",
+	"RXIPS ",
+	"RXULP0 ",
+	"RXULP1 ",
+	"RXULP2 ",
+	"TIM ",
+	"TPOST ",
+	"TPRE ",
+	"TXIPS ",
+	"TXULP0 ",
+	"TXULP1 ",
+	"UC ",
+	"WDMA ",
+	"TXULP2 ",
+	"HOST1 ",
+	"P0_OB_LINK ",
+	"P1_OB_LINK ",
+	"HOST_GPIO ",
+	"MBOX ",
+	"AXGMAC0",
+	"AXGMAC1",
+	"JTAG",
+	"MPU_INTPEND"
+};
+/* UE Status High CSR */
+static char *ue_status_hi_desc[] = {
+	"LPCMEMHOST",
+	"MGMT_MAC",
+	"PCS0ONLINE",
+	"MPU_IRAM",
+	"PCS1ONLINE",
+	"PCTL0",
+	"PCTL1",
+	"PMEM",
+	"RR",
+	"TXPB",
+	"RXPP",
+	"XAUI",
+	"TXP",
+	"ARM",
+	"IPC",
+	"HOST2",
+	"HOST3",
+	"HOST4",
+	"HOST5",
+	"HOST6",
+	"HOST7",
+	"HOST8",
+	"HOST9",
+	"NETC"
+	"Unknown",
+	"Unknown",
+	"Unknown",
+	"Unknown",
+	"Unknown",
+	"Unknown",
+	"Unknown",
+	"Unknown"
+};
 
 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 {
@@ -1673,6 +1743,59 @@ static int be_poll_tx_mcc(struct napi_struct *napi, int budget)
 	return 1;
 }
 
+static inline bool be_detect_ue(struct be_adapter *adapter)
+{
+	u32 online0 = 0, online1 = 0;
+
+	pci_read_config_dword(adapter->pdev, PCICFG_ONLINE0, &online0);
+
+	pci_read_config_dword(adapter->pdev, PCICFG_ONLINE1, &online1);
+
+	if (!online0 || !online1) {
+		adapter->ue_detected = true;
+		dev_err(&adapter->pdev->dev,
+			"UE Detected!! online0=%d online1=%d\n",
+			online0, online1);
+		return true;
+	}
+
+	return false;
+}
+
+void be_dump_ue(struct be_adapter *adapter)
+{
+	u32 ue_status_lo, ue_status_hi, ue_status_lo_mask, ue_status_hi_mask;
+	u32 i;
+
+	pci_read_config_dword(adapter->pdev,
+				PCICFG_UE_STATUS_LOW, &ue_status_lo);
+	pci_read_config_dword(adapter->pdev,
+				PCICFG_UE_STATUS_HIGH, &ue_status_hi);
+	pci_read_config_dword(adapter->pdev,
+				PCICFG_UE_STATUS_LOW_MASK, &ue_status_lo_mask);
+	pci_read_config_dword(adapter->pdev,
+				PCICFG_UE_STATUS_HI_MASK, &ue_status_hi_mask);
+
+	ue_status_lo = (ue_status_lo & (~ue_status_lo_mask));
+	ue_status_hi = (ue_status_hi & (~ue_status_hi_mask));
+
+	if (ue_status_lo) {
+		for (i = 0; ue_status_lo; ue_status_lo >>= 1, i++) {
+			if (ue_status_lo & 1)
+				dev_err(&adapter->pdev->dev,
+				"UE: %s bit set\n", ue_status_low_desc[i]);
+		}
+	}
+	if (ue_status_hi) {
+		for (i = 0; ue_status_hi; ue_status_hi >>= 1, i++) {
+			if (ue_status_hi & 1)
+				dev_err(&adapter->pdev->dev,
+				"UE: %s bit set\n", ue_status_hi_desc[i]);
+		}
+	}
+
+}
+
 static void be_worker(struct work_struct *work)
 {
 	struct be_adapter *adapter =
@@ -1690,6 +1813,10 @@ static void be_worker(struct work_struct *work)
 		adapter->rx_post_starved = false;
 		be_post_rx_frags(adapter);
 	}
+	if (!adapter->ue_detected) {
+		if (be_detect_ue(adapter))
+			be_dump_ue(adapter);
+	}
 
 	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
 }
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH net-next 1/4] be2net: fix to correctly know if driver needs to run for a VF or a PF
From: Ajit Khaparde @ 2010-07-29 16:15 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Move be_check_sriov_fn_type to appropriate place to correctly determine
if the be2net driver needs to work as a VF driver or a PF driver.

Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/benet/be_main.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index d5b097d..e72b482 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1717,10 +1717,11 @@ static void be_msix_enable(struct be_adapter *adapter)
 
 static void be_sriov_enable(struct be_adapter *adapter)
 {
-#ifdef CONFIG_PCI_IOV
-	int status;
 	be_check_sriov_fn_type(adapter);
+#ifdef CONFIG_PCI_IOV
 	if (be_physfn(adapter) && num_vfs) {
+		int status;
+
 		status = pci_enable_sriov(adapter->pdev, num_vfs);
 		adapter->sriov_enabled = status ? false : true;
 	}
-- 
1.7.0.4


^ permalink raw reply related

* Re: [PATCH] drivers: Fixes a typo from "dev" to "ndev" that caused compilation erros.
From: Vasiliy Kulikov @ 2010-07-29 16:03 UTC (permalink / raw)
  To: Henrique Camargo
  Cc: David S. Miller, Chaithrika U S, Sriramakrishnan, Kevin Hilman,
	Jiri Pirko, netdev, linux-kernel
In-Reply-To: <AANLkTiko540hMAUhaJr5mNKzFK2-vBOTZmEOxZWZv7ci@mail.gmail.com>

On Thu, Jul 29, 2010 at 12:51 -0300, Henrique Camargo wrote:
> From: Henrique Camargo
> 
> Fixes a typo from "dev" to "ndev" that caused compilation errors.
> 
> Signed-off-by: Henrique Camargo <henrique.camargo@ensitec.com.br>

Thanks,
Reviewed-by: Kulikov Vasiliy <segooon@gmail.com>

Also I've checked my other patches of removing private stats, the rest
are ok.

> ---
>  drivers/net/davinci_emac.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
> index 25e14d2..b89b7bf 100644
> --- a/drivers/net/davinci_emac.c
> +++ b/drivers/net/davinci_emac.c
> @@ -1182,8 +1182,8 @@ static int emac_net_tx_complete(struct emac_priv *priv,
>         struct net_device *ndev = priv->ndev;
>         u32 cnt;
> 
> -       if (unlikely(num_tokens && netif_queue_stopped(dev)))
> -               netif_start_queue(dev);
> +       if (unlikely(num_tokens && netif_queue_stopped(ndev)))
> +               netif_start_queue(ndev);
>         for (cnt = 0; cnt < num_tokens; cnt++) {
>                 struct sk_buff *skb = (struct sk_buff *)net_data_tokens[cnt];
>                 if (skb == NULL)
> -- 
> 1.7.0.4

^ permalink raw reply

* [PATCH] drivers: Fixes a typo from "dev" to "ndev" that caused compilation erros.
From: Henrique Camargo @ 2010-07-29 15:51 UTC (permalink / raw)
  To: David S. Miller, Chaithrika U S, Sriramakrishnan, Kevin Hilman,
	Jiri Pirko
  Cc: segooon

From: Henrique Camargo

Fixes a typo from "dev" to "ndev" that caused compilation errors.

Signed-off-by: Henrique Camargo <henrique.camargo@ensitec.com.br>
---
 drivers/net/davinci_emac.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 25e14d2..b89b7bf 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -1182,8 +1182,8 @@ static int emac_net_tx_complete(struct emac_priv *priv,
        struct net_device *ndev = priv->ndev;
        u32 cnt;

-       if (unlikely(num_tokens && netif_queue_stopped(dev)))
-               netif_start_queue(dev);
+       if (unlikely(num_tokens && netif_queue_stopped(ndev)))
+               netif_start_queue(ndev);
        for (cnt = 0; cnt < num_tokens; cnt++) {
                struct sk_buff *skb = (struct sk_buff *)net_data_tokens[cnt];
                if (skb == NULL)
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH] sky2: Code style fixes
From: Mike McCormack @ 2010-07-29 13:34 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

Fix selected style problems reported by checkpatch.

Signed-off-by: Mike McCormack <mikem@ring3k.org>
---
 drivers/net/sky2.c |   24 ++++++++++++------------
 1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 8b85c02..8723e3d 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -79,7 +79,7 @@
 
 #define SKY2_EEPROM_MAGIC	0x9955aabb
 
-#define RING_NEXT(x,s)	(((x)+1) & ((s)-1))
+#define RING_NEXT(x, s)	(((x)+1) & ((s)-1))
 
 static const u32 default_msg =
     NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
@@ -172,7 +172,7 @@ static int gm_phy_write(struct sky2_hw *hw, unsigned port, u16 reg, u16 val)
 		udelay(10);
 	}
 
-	dev_warn(&hw->pdev->dev,"%s: phy write timeout\n", hw->dev[port]->name);
+	dev_warn(&hw->pdev->dev, "%s: phy write timeout\n", hw->dev[port]->name);
 	return -ETIMEDOUT;
 
 io_error:
@@ -1078,7 +1078,7 @@ static inline struct sky2_rx_le *sky2_next_rx(struct sky2_port *sky2)
 	return le;
 }
 
-static unsigned sky2_get_rx_threshold(struct sky2_port* sky2)
+static unsigned sky2_get_rx_threshold(struct sky2_port *sky2)
 {
 	unsigned size;
 
@@ -1089,7 +1089,7 @@ static unsigned sky2_get_rx_threshold(struct sky2_port* sky2)
 	return (size - 8) / sizeof(u32);
 }
 
-static unsigned sky2_get_rx_data_size(struct sky2_port* sky2)
+static unsigned sky2_get_rx_data_size(struct sky2_port *sky2)
 {
 	struct rx_ring_info *re;
 	unsigned size;
@@ -1113,7 +1113,7 @@ static unsigned sky2_get_rx_data_size(struct sky2_port* sky2)
 }
 
 /* Build description to hardware for one receive segment */
-static void sky2_rx_add(struct sky2_port *sky2,  u8 op,
+static void sky2_rx_add(struct sky2_port *sky2, u8 op,
 			dma_addr_t map, unsigned len)
 {
 	struct sky2_rx_le *le;
@@ -3019,7 +3019,7 @@ static int __devinit sky2_init(struct sky2_hw *hw)
 	hw->chip_id = sky2_read8(hw, B2_CHIP_ID);
 	hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4;
 
-	switch(hw->chip_id) {
+	switch (hw->chip_id) {
 	case CHIP_ID_YUKON_XL:
 		hw->flags = SKY2_HW_GIGABIT | SKY2_HW_NEWER_PHY;
 		if (hw->chip_rev < CHIP_REV_YU_XL_A2)
@@ -3690,7 +3690,7 @@ static int sky2_set_mac_address(struct net_device *dev, void *p)
 	return 0;
 }
 
-static void inline sky2_add_filter(u8 filter[8], const u8 *addr)
+static inline void sky2_add_filter(u8 filter[8], const u8 *addr)
 {
 	u32 bit;
 
@@ -3916,7 +3916,7 @@ static int sky2_set_coalesce(struct net_device *dev,
 		return -EINVAL;
 	if (ecmd->rx_max_coalesced_frames > RX_MAX_PENDING)
 		return -EINVAL;
-	if (ecmd->rx_max_coalesced_frames_irq >RX_MAX_PENDING)
+	if (ecmd->rx_max_coalesced_frames_irq > RX_MAX_PENDING)
 		return -EINVAL;
 
 	if (ecmd->tx_coalesce_usecs == 0)
@@ -4381,7 +4381,7 @@ static int sky2_debug_show(struct seq_file *seq, void *v)
 			seq_printf(seq, "%u:", idx);
 		sop = 0;
 
-		switch(le->opcode & ~HW_OWNER) {
+		switch (le->opcode & ~HW_OWNER) {
 		case OP_ADDR64:
 			seq_printf(seq, " %#x:", a);
 			break;
@@ -4450,7 +4450,7 @@ static int sky2_device_event(struct notifier_block *unused,
 	if (dev->netdev_ops->ndo_open != sky2_up || !sky2_debug)
 		return NOTIFY_DONE;
 
-	switch(event) {
+	switch (event) {
 	case NETDEV_CHANGENAME:
 		if (sky2->debugfs) {
 			sky2->debugfs = debugfs_rename(sky2_debug, sky2->debugfs,
@@ -4645,7 +4645,7 @@ static int __devinit sky2_test_msi(struct sky2_hw *hw)
 	struct pci_dev *pdev = hw->pdev;
 	int err;
 
-	init_waitqueue_head (&hw->msi_wait);
+	init_waitqueue_head(&hw->msi_wait);
 
 	sky2_write32(hw, B0_IMSK, Y2_IS_IRQ_SW);
 
@@ -4762,7 +4762,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 	 * this driver uses software swapping.
 	 */
 	reg &= ~PCI_REV_DESC;
-	err = pci_write_config_dword(pdev,PCI_DEV_REG2, reg);
+	err = pci_write_config_dword(pdev, PCI_DEV_REG2, reg);
 	if (err) {
 		dev_err(&pdev->dev, "PCI write config failed\n");
 		goto err_out_free_regions;
-- 
1.5.6.5


^ permalink raw reply related

* [PATCH 1/3] TI DaVinci EMAC : Implement interrupt pacing functionality.
From: Sriramakrishnan @ 2010-07-29 12:33 UTC (permalink / raw)
  To: netdev, davinci-linux-open-source; +Cc: nsekhar, anantgole, Sriramakrishnan

DaVinci EMAC module includes an interrupt pacing block that can
be programmed to throttle the rate at which interrupts are
generated. This patch implements interrupt pacing logic that can
be controlled through the ethtool interface(only rx_coalesce_usecs
param is honored)

Signed-off-by: Sriramakrishnan <srk@ti.com>
---
 drivers/net/davinci_emac.c |  133 +++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 08e82b1..bc1b270 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -298,6 +298,11 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
 #define EMAC_CTRL_EWCTL		(0x4)
 #define EMAC_CTRL_EWINTTCNT	(0x8)
 
+/* EMAC DM644x control module masks */
+#define EMAC_DM644X_EWINTCNT_MASK	0x1FFFF
+#define EMAC_DM644X_INTMIN_INTVL	0x1
+#define EMAC_DM644X_INTMAX_INTVL	(EMAC_DM644X_EWINTCNT_MASK)
+
 /* EMAC MDIO related */
 /* Mask & Control defines */
 #define MDIO_CONTROL_CLKDIV	(0xFF)
@@ -318,8 +323,20 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
 #define MDIO_CONTROL		(0x04)
 
 /* EMAC DM646X control module registers */
-#define EMAC_DM646X_CMRXINTEN	(0x14)
-#define EMAC_DM646X_CMTXINTEN	(0x18)
+#define EMAC_DM646X_CMINTCTRL	0x0C
+#define EMAC_DM646X_CMRXINTEN	0x14
+#define EMAC_DM646X_CMTXINTEN	0x18
+#define EMAC_DM646X_CMRXINTMAX	0x70
+#define EMAC_DM646X_CMTXINTMAX	0x74
+
+/* EMAC DM646X control module masks */
+#define EMAC_DM646X_INTPACEEN		(0x3 << 16)
+#define EMAC_DM646X_INTPRESCALE_MASK	(0x7FF << 0)
+#define EMAC_DM646X_CMINTMAX_CNT	63
+#define EMAC_DM646X_CMINTMIN_CNT	2
+#define EMAC_DM646X_CMINTMAX_INTVL	(1000 / EMAC_DM646X_CMINTMIN_CNT)
+#define EMAC_DM646X_CMINTMIN_INTVL	((1000 / EMAC_DM646X_CMINTMAX_CNT) + 1)
+
 
 /* EMAC EOI codes for C0 */
 #define EMAC_DM646X_MAC_EOI_C0_RXEN	(0x01)
@@ -468,6 +485,8 @@ struct emac_priv {
 	u32 duplex; /* Link duplex: 0=Half, 1=Full */
 	u32 rx_buf_size;
 	u32 isr_count;
+	u32 coal_intvl;
+	u32 bus_freq_mhz;
 	u8 rmii_en;
 	u8 version;
 	struct net_device_stats net_dev_stats;
@@ -692,6 +711,103 @@ static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 }
 
 /**
+ * emac_get_coalesce : Get interrupt coalesce settings for this device
+ * @ndev : The DaVinci EMAC network adapter
+ * @coal : ethtool coalesce settings structure
+ *
+ * Fetch the current interrupt coalesce settings
+ *
+ */
+static int emac_get_coalesce(struct net_device *ndev,
+				struct ethtool_coalesce *coal)
+{
+	struct emac_priv *priv = netdev_priv(ndev);
+
+	coal->rx_coalesce_usecs = priv->coal_intvl;
+	return 0;
+
+}
+
+/**
+ * emac_set_coalesce : Set interrupt coalesce settings for this device
+ * @ndev : The DaVinci EMAC network adapter
+ * @coal : ethtool coalesce settings structure
+ *
+ * Set interrupt coalesce parameters
+ *
+ */
+static int emac_set_coalesce(struct net_device *ndev,
+				struct ethtool_coalesce *coal)
+{
+	struct emac_priv *priv = netdev_priv(ndev);
+	u32 int_ctrl, num_interrupts = 0;
+	u32 prescale = 0, addnl_dvdr = 1, coal_intvl = 0;
+
+	if (!coal->rx_coalesce_usecs)
+		return -EINVAL;
+
+	coal_intvl = coal->rx_coalesce_usecs;
+
+	switch (priv->version) {
+	case EMAC_VERSION_2:
+		int_ctrl =  emac_ctrl_read(EMAC_DM646X_CMINTCTRL);
+		prescale = priv->bus_freq_mhz * 4;
+
+		if (coal_intvl < EMAC_DM646X_CMINTMIN_INTVL)
+			coal_intvl = EMAC_DM646X_CMINTMIN_INTVL;
+
+		if (coal_intvl > EMAC_DM646X_CMINTMAX_INTVL) {
+			/*
+			 * Interrupt pacer works with 4us Pulse, we can
+			 * throttle further by dilating the 4us pulse.
+			 */
+			addnl_dvdr = EMAC_DM646X_INTPRESCALE_MASK / prescale;
+
+			if (addnl_dvdr > 1) {
+				prescale *= addnl_dvdr;
+				if (coal_intvl > (EMAC_DM646X_CMINTMAX_INTVL
+							* addnl_dvdr))
+					coal_intvl = (EMAC_DM646X_CMINTMAX_INTVL
+							* addnl_dvdr);
+			} else {
+				addnl_dvdr = 1;
+				coal_intvl = EMAC_DM646X_CMINTMAX_INTVL;
+			}
+		}
+
+		num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
+
+		int_ctrl |= EMAC_DM646X_INTPACEEN;
+		int_ctrl &= (~EMAC_DM646X_INTPRESCALE_MASK);
+		int_ctrl |= (prescale & EMAC_DM646X_INTPRESCALE_MASK);
+		emac_ctrl_write(EMAC_DM646X_CMINTCTRL, int_ctrl);
+
+		emac_ctrl_write(EMAC_DM646X_CMRXINTMAX, num_interrupts);
+		emac_ctrl_write(EMAC_DM646X_CMTXINTMAX, num_interrupts);
+
+		break;
+	default:
+		int_ctrl = emac_ctrl_read(EMAC_CTRL_EWINTTCNT);
+		int_ctrl &= (~EMAC_DM644X_EWINTCNT_MASK);
+		prescale = coal_intvl * priv->bus_freq_mhz;
+		if (prescale > EMAC_DM644X_EWINTCNT_MASK) {
+			prescale = EMAC_DM644X_EWINTCNT_MASK;
+			coal_intvl = prescale / priv->bus_freq_mhz;
+		}
+		emac_ctrl_write(EMAC_CTRL_EWINTTCNT, (int_ctrl | prescale));
+
+		break;
+	}
+
+	printk(KERN_INFO"Set coalesce to %d usecs.\n", coal_intvl);
+	priv->coal_intvl = coal_intvl;
+
+	return 0;
+
+}
+
+
+/**
  * ethtool_ops: DaVinci EMAC Ethtool structure
  *
  * Ethtool support for EMAC adapter
@@ -702,6 +818,8 @@ static const struct ethtool_ops ethtool_ops = {
 	.get_settings = emac_get_settings,
 	.set_settings = emac_set_settings,
 	.get_link = ethtool_op_get_link,
+	.get_coalesce = emac_get_coalesce,
+	.set_coalesce =  emac_set_coalesce,
 };
 
 /**
@@ -2437,6 +2555,14 @@ static int emac_dev_open(struct net_device *ndev)
 	/* Start/Enable EMAC hardware */
 	emac_hw_enable(priv);
 
+	/* Enable Interrupt pacing if configured */
+	if (priv->coal_intvl != 0) {
+		struct ethtool_coalesce coal;
+
+		coal.rx_coalesce_usecs = (priv->coal_intvl << 4);
+		emac_set_coalesce(ndev, &coal);
+	}
+
 	/* find the first phy */
 	priv->phydev = NULL;
 	if (priv->phy_mask) {
@@ -2677,6 +2803,9 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	priv->int_enable = pdata->interrupt_enable;
 	priv->int_disable = pdata->interrupt_disable;
 
+	priv->coal_intvl = 0;
+	priv->bus_freq_mhz = (u32)(emac_bus_frequency / 1000000);
+
 	emac_dev = &ndev->dev;
 	/* Get EMAC platform data */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
1.6.2.4


^ permalink raw reply related

* [PATCH 2/3] TI DaVinci EMAC: Fix asymmetric handling of packets in NAPI Poll function.
From: Sriramakrishnan @ 2010-07-29 12:33 UTC (permalink / raw)
  To: netdev, davinci-linux-open-source; +Cc: nsekhar, anantgole, Sriramakrishnan
In-Reply-To: <1280406840-27393-1-git-send-email-srk@ti.com>

The current implementation of NAPI poll function in the driver does not service
Rx packets, error condition even if a single Tx packet gets serviced in
the napi poll call. This behavior severely affects performance for specific use
cases. This patch modifies the poll function implementation to service tx/rx
packets in an identical manner.

Signed-off-by: Sriramakrishnan <srk@ti.com>
---
 drivers/net/davinci_emac.c |   21 ++++++++-------------
 1 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index bc1b270..4ee9af6 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -2266,7 +2266,7 @@ static int emac_poll(struct napi_struct *napi, int budget)
 	struct net_device *ndev = priv->ndev;
 	struct device *emac_dev = &ndev->dev;
 	u32 status = 0;
-	u32 num_pkts = 0;
+	u32 num_tx_pkts = 0, num_rx_pkts = 0;
 
 	/* Check interrupt vectors and call packet processing */
 	status = emac_read(EMAC_MACINVECTOR);
@@ -2277,27 +2277,19 @@ static int emac_poll(struct napi_struct *napi, int budget)
 		mask = EMAC_DM646X_MAC_IN_VECTOR_TX_INT_VEC;
 
 	if (status & mask) {
-		num_pkts = emac_tx_bdproc(priv, EMAC_DEF_TX_CH,
+		num_tx_pkts = emac_tx_bdproc(priv, EMAC_DEF_TX_CH,
 					  EMAC_DEF_TX_MAX_SERVICE);
 	} /* TX processing */
 
-	if (num_pkts)
-		return budget;
-
 	mask = EMAC_DM644X_MAC_IN_VECTOR_RX_INT_VEC;
 
 	if (priv->version == EMAC_VERSION_2)
 		mask = EMAC_DM646X_MAC_IN_VECTOR_RX_INT_VEC;
 
 	if (status & mask) {
-		num_pkts = emac_rx_bdproc(priv, EMAC_DEF_RX_CH, budget);
+		num_rx_pkts = emac_rx_bdproc(priv, EMAC_DEF_RX_CH, budget);
 	} /* RX processing */
 
-	if (num_pkts < budget) {
-		napi_complete(napi);
-		emac_int_enable(priv);
-	}
-
 	mask = EMAC_DM644X_MAC_IN_VECTOR_HOST_INT;
 	if (priv->version == EMAC_VERSION_2)
 		mask = EMAC_DM646X_MAC_IN_VECTOR_HOST_INT;
@@ -2328,9 +2320,12 @@ static int emac_poll(struct napi_struct *napi, int budget)
 				dev_err(emac_dev, "RX Host error %s on ch=%d\n",
 					&emac_rxhost_errcodes[cause][0], ch);
 		}
-	} /* Host error processing */
+	} else if (num_rx_pkts < budget) {
+		napi_complete(napi);
+		emac_int_enable(priv);
+	}
 
-	return num_pkts;
+	return num_rx_pkts;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-- 
1.6.2.4


^ permalink raw reply related

* [PATCH 3/3] TI DaVinci EMAC: Fix incorrect reference to EMAC_CTRL registers.
From: Sriramakrishnan @ 2010-07-29 12:34 UTC (permalink / raw)
  To: netdev, davinci-linux-open-source; +Cc: nsekhar, anantgole, Sriramakrishnan
In-Reply-To: <1280406840-27393-2-git-send-email-srk@ti.com>

The EMAC modules control registers vary as per the version of the
EMAC module. EMAC_CTRL_EWCTL,EMAC_CTRL_EWINTTCNT are available
only on EMAC_VERSION_1. The emac_dump_regs() function accesses
these indiscriminately. This patch fixes the issue.

Signed-off-by: Sriramakrishnan <srk@ti.com>
---
 drivers/net/davinci_emac.c |    8 +++++---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 4ee9af6..12196ad 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -565,9 +565,11 @@ static void emac_dump_regs(struct emac_priv *priv)
 
 	/* Print important registers in EMAC */
 	dev_info(emac_dev, "EMAC Basic registers\n");
-	dev_info(emac_dev, "EMAC: EWCTL: %08X, EWINTTCNT: %08X\n",
-		emac_ctrl_read(EMAC_CTRL_EWCTL),
-		emac_ctrl_read(EMAC_CTRL_EWINTTCNT));
+	if (priv->version == EMAC_VERSION_1) {
+		dev_info(emac_dev, "EMAC: EWCTL: %08X, EWINTTCNT: %08X\n",
+			emac_ctrl_read(EMAC_CTRL_EWCTL),
+			emac_ctrl_read(EMAC_CTRL_EWINTTCNT));
+	}
 	dev_info(emac_dev, "EMAC: TXID: %08X %s, RXID: %08X %s\n",
 		emac_read(EMAC_TXIDVER),
 		((emac_read(EMAC_TXCONTROL)) ? "enabled" : "disabled"),
-- 
1.6.2.4


^ permalink raw reply related

* [PATCH] vhost: locking/rcu cleanup
From: Michael S. Tsirkin @ 2010-07-29 12:23 UTC (permalink / raw)
  To: Michael S. Tsirkin, David S. Miller, Sridhar Samudrala, Tejun Heo,
	Jeff Dike

I saw WARN_ON(!list_empty(&dev->work_list)) trigger
so our custom flush is not as airtight as need be.

This patch switches to a simple atomic counter + srcu instead of
the custom locked queue + flush implementation.

This will slow down the setup ioctls, which should not matter -
it's slow path anyway. We use the expedited flush to at least
make sure it has a sane time bound.

Works fine for me. I got reports that with many guests,
work lock is highly contended, and this patch should in theory
fix this as well - but I haven't tested this yet.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c   |   55 +++++--------------
 drivers/vhost/vhost.c |  140 ++++++++++++++++++++++---------------------------
 drivers/vhost/vhost.h |   47 +++++++++-------
 3 files changed, 103 insertions(+), 139 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f13e56b..ee69c51 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -111,8 +111,9 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
-static void handle_tx(struct vhost_net *net)
+static void handle_tx(struct vhost_dev *dev)
 {
+	struct vhost_net *net = container_of(dev, struct vhost_net, dev);
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
 	unsigned out, in, s;
 	int head;
@@ -127,7 +128,7 @@ static void handle_tx(struct vhost_net *net)
 	size_t len, total_len = 0;
 	int err, wmem;
 	size_t hdr_size;
-	struct socket *sock = rcu_dereference(vq->private_data);
+	struct socket *sock = vhost_vq_data(vq, &net->dev);
 	if (!sock)
 		return;
 
@@ -305,7 +306,7 @@ static void handle_rx_big(struct vhost_net *net)
 	size_t len, total_len = 0;
 	int err;
 	size_t hdr_size;
-	struct socket *sock = rcu_dereference(vq->private_data);
+	struct socket *sock = vhost_vq_data(vq, &net->dev);
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -416,7 +417,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	int err, headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
-	struct socket *sock = rcu_dereference(vq->private_data);
+	struct socket *sock = vhost_vq_data(vq, &net->dev);
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -500,46 +501,15 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	unuse_mm(net->dev.mm);
 }
 
-static void handle_rx(struct vhost_net *net)
+static void handle_rx(struct vhost_dev *dev)
 {
+	struct vhost_net *net = container_of(dev, struct vhost_net, dev);
 	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
 		handle_rx_mergeable(net);
 	else
 		handle_rx_big(net);
 }
 
-static void handle_tx_kick(struct vhost_work *work)
-{
-	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
-						  poll.work);
-	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);
-
-	handle_tx(net);
-}
-
-static void handle_rx_kick(struct vhost_work *work)
-{
-	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
-						  poll.work);
-	struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);
-
-	handle_rx(net);
-}
-
-static void handle_tx_net(struct vhost_work *work)
-{
-	struct vhost_net *net = container_of(work, struct vhost_net,
-					     poll[VHOST_NET_VQ_TX].work);
-	handle_tx(net);
-}
-
-static void handle_rx_net(struct vhost_work *work)
-{
-	struct vhost_net *net = container_of(work, struct vhost_net,
-					     poll[VHOST_NET_VQ_RX].work);
-	handle_rx(net);
-}
-
 static int vhost_net_open(struct inode *inode, struct file *f)
 {
 	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
@@ -550,16 +520,18 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		return -ENOMEM;
 
 	dev = &n->dev;
-	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
-	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
+	vhost_work_set_fn(&n->vqs[VHOST_NET_VQ_TX].work, handle_tx);
+	vhost_work_set_fn(&n->vqs[VHOST_NET_VQ_RX].work, handle_rx);
 	r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
 	if (r < 0) {
 		kfree(n);
 		return r;
 	}
 
-	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
-	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
+	vhost_poll_init(n->poll + VHOST_NET_VQ_TX,
+			&n->vqs[VHOST_NET_VQ_TX].work, POLLOUT, dev);
+	vhost_poll_init(n->poll + VHOST_NET_VQ_RX,
+			&n->vqs[VHOST_NET_VQ_RX].work, POLLIN, dev);
 	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
 
 	f->private_data = n;
@@ -640,6 +612,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
 	/* We do an extra flush before freeing memory,
 	 * since jobs can re-queue themselves. */
 	vhost_net_flush(n);
+	vhost_dev_free(&n->dev);
 	kfree(n);
 	return 0;
 }
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e05557d..daa95c8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -60,22 +60,27 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
 	return 0;
 }
 
+/* Must be called for each vq before vhost_dev_init. */
+void vhost_work_set_fn(struct vhost_work *work, vhost_work_fn_t fn)
+{
+	work->fn = fn;
+}
+
+static void vhost_work_init(struct vhost_work *work)
+{
+	atomic_set(&work->queue_seq, 0);
+	work->done_seq = 0;
+}
+
 /* Init poll structure */
-void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+void vhost_poll_init(struct vhost_poll *poll, struct vhost_work *work,
 		     unsigned long mask, struct vhost_dev *dev)
 {
-	struct vhost_work *work = &poll->work;
-
+	poll->work = work;
 	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
 	init_poll_funcptr(&poll->table, vhost_poll_func);
 	poll->mask = mask;
 	poll->dev = dev;
-
-	INIT_LIST_HEAD(&work->node);
-	work->fn = fn;
-	init_waitqueue_head(&work->done);
-	work->flushing = 0;
-	work->queue_seq = work->done_seq = 0;
 }
 
 /* Start polling a file. We add ourselves to file's wait queue. The caller must
@@ -99,40 +104,16 @@ void vhost_poll_stop(struct vhost_poll *poll)
  * locks that are also used by the callback. */
 void vhost_poll_flush(struct vhost_poll *poll)
 {
-	struct vhost_work *work = &poll->work;
-	unsigned seq;
-	int left;
-	int flushing;
-
-	spin_lock_irq(&poll->dev->work_lock);
-	seq = work->queue_seq;
-	work->flushing++;
-	spin_unlock_irq(&poll->dev->work_lock);
-	wait_event(work->done, ({
-		   spin_lock_irq(&poll->dev->work_lock);
-		   left = seq - work->done_seq <= 0;
-		   spin_unlock_irq(&poll->dev->work_lock);
-		   left;
-	}));
-	spin_lock_irq(&poll->dev->work_lock);
-	flushing = --work->flushing;
-	spin_unlock_irq(&poll->dev->work_lock);
-	BUG_ON(flushing < 0);
+	synchronize_srcu_expedited(&poll->dev->worker_srcu);
 }
 
 void vhost_poll_queue(struct vhost_poll *poll)
 {
 	struct vhost_dev *dev = poll->dev;
-	struct vhost_work *work = &poll->work;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->work_lock, flags);
-	if (list_empty(&work->node)) {
-		list_add_tail(&work->node, &dev->work_list);
-		work->queue_seq++;
-		wake_up_process(dev->worker);
-	}
-	spin_unlock_irqrestore(&dev->work_lock, flags);
+	struct vhost_work *work = poll->work;
+
+	atomic_inc(&work->queue_seq);
+	wake_up_process(dev->worker);
 }
 
 static void vhost_vq_reset(struct vhost_dev *dev,
@@ -164,41 +145,39 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 static int vhost_worker(void *data)
 {
 	struct vhost_dev *dev = data;
-	struct vhost_work *work = NULL;
-	unsigned uninitialized_var(seq);
+	struct vhost_work *uninitialized_var(work);
+	unsigned n, i, vq = 0;
+	int seq;
 
-	for (;;) {
-		/* mb paired w/ kthread_stop */
-		set_current_state(TASK_INTERRUPTIBLE);
+	n = dev->nvqs;
+repeat:
+	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
 
-		spin_lock_irq(&dev->work_lock);
-		if (work) {
-			work->done_seq = seq;
-			if (work->flushing)
-				wake_up_all(&work->done);
-		}
+	if (kthread_should_stop()) {
+		__set_current_state(TASK_RUNNING);
+		return 0;
+	}
 
-		if (kthread_should_stop()) {
-			spin_unlock_irq(&dev->work_lock);
-			__set_current_state(TASK_RUNNING);
-			return 0;
+	for (i = 0; i < n; ++i) {
+		work = &dev->vqs[(vq + i) % n].work;
+		seq = atomic_read(&work->queue_seq);
+		if (seq != work->done_seq) {
+			work->done_seq = seq;
+			break;
 		}
-		if (!list_empty(&dev->work_list)) {
-			work = list_first_entry(&dev->work_list,
-						struct vhost_work, node);
-			list_del_init(&work->node);
-			seq = work->queue_seq;
-		} else
-			work = NULL;
-		spin_unlock_irq(&dev->work_lock);
+		work = NULL;
+	}
 
-		if (work) {
-			__set_current_state(TASK_RUNNING);
-			work->fn(work);
-		} else
-			schedule();
+	if (work) {
+		int idx;
+		__set_current_state(TASK_RUNNING);
+		idx = srcu_read_lock(&dev->worker_srcu);
+		work->fn(dev);
+		srcu_read_unlock(&dev->worker_srcu, idx);
+	} else
+		schedule();
 
-	}
+	goto repeat;
 }
 
 long vhost_dev_init(struct vhost_dev *dev,
@@ -213,20 +192,22 @@ long vhost_dev_init(struct vhost_dev *dev,
 	dev->log_file = NULL;
 	dev->memory = NULL;
 	dev->mm = NULL;
-	spin_lock_init(&dev->work_lock);
-	INIT_LIST_HEAD(&dev->work_list);
 	dev->worker = NULL;
 
 	for (i = 0; i < dev->nvqs; ++i) {
 		dev->vqs[i].dev = dev;
 		mutex_init(&dev->vqs[i].mutex);
 		vhost_vq_reset(dev, dev->vqs + i);
-		if (dev->vqs[i].handle_kick)
+		if (dev->vqs[i].work.fn)
 			vhost_poll_init(&dev->vqs[i].poll,
-					dev->vqs[i].handle_kick, POLLIN, dev);
+					&dev->vqs[i].work, POLLIN, dev);
 	}
+	return init_srcu_struct(&dev->worker_srcu);
+}
 
-	return 0;
+void vhost_dev_free(struct vhost_dev *dev)
+{
+	cleanup_srcu_struct(&dev->worker_srcu);
 }
 
 /* Caller should have device mutex */
@@ -240,7 +221,7 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 static long vhost_dev_set_owner(struct vhost_dev *dev)
 {
 	struct task_struct *worker;
-	int err;
+	int i, err;
 	/* Is there an owner already? */
 	if (dev->mm) {
 		err = -EBUSY;
@@ -258,6 +239,10 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
 	err = cgroup_attach_task_current_cg(worker);
 	if (err)
 		goto err_cgroup;
+
+	for (i = 0; i < dev->nvqs; ++i) {
+		vhost_work_init(&dev->vqs[i].work);
+	}
 	wake_up_process(worker);	/* avoid contributing to loadavg */
 
 	return 0;
@@ -293,7 +278,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 {
 	int i;
 	for (i = 0; i < dev->nvqs; ++i) {
-		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
+		if (dev->vqs[i].kick && dev->vqs[i].work.fn) {
 			vhost_poll_stop(&dev->vqs[i].poll);
 			vhost_poll_flush(&dev->vqs[i].poll);
 		}
@@ -322,7 +307,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 		mmput(dev->mm);
 	dev->mm = NULL;
 
-	WARN_ON(!list_empty(&dev->work_list));
 	kthread_stop(dev->worker);
 }
 
@@ -644,7 +628,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
 		r = -ENOIOCTLCMD;
 	}
 
-	if (pollstop && vq->handle_kick)
+	if (pollstop && vq->work.fn)
 		vhost_poll_stop(&vq->poll);
 
 	if (ctx)
@@ -652,12 +636,12 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
 	if (filep)
 		fput(filep);
 
-	if (pollstart && vq->handle_kick)
+	if (pollstart && vq->work.fn)
 		vhost_poll_start(&vq->poll, vq->kick);
 
 	mutex_unlock(&vq->mutex);
 
-	if (pollstop && vq->handle_kick)
+	if (pollstop && vq->work.fn)
 		vhost_poll_flush(&vq->poll);
 	return r;
 }
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index afd7729..9c990ea 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -11,9 +11,10 @@
 #include <linux/uio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
+#include <linux/srcu.h>
 #include <asm/atomic.h>
 
-struct vhost_device;
+struct vhost_dev;
 
 enum {
 	/* Enough place for all fragments, head, and virtio net header. */
@@ -21,29 +22,33 @@ enum {
 };
 
 struct vhost_work;
-typedef void (*vhost_work_fn_t)(struct vhost_work *work);
+typedef void (*vhost_work_fn_t)(struct vhost_dev *dev);
 
 struct vhost_work {
-	struct list_head	  node;
+	/* Callback function to execute. */
 	vhost_work_fn_t		  fn;
-	wait_queue_head_t	  done;
-	int			  flushing;
-	unsigned		  queue_seq;
-	unsigned		  done_seq;
+	/* Incremented to request callback execution.
+	 * Atomic to allow multiple writers. */
+	atomic_t		  queue_seq;
+	/* Used by worker to track execution requests.
+	 * Used from a single thread so no locking. */
+	int			  done_seq;
 };
 
+void vhost_work_set_fn(struct vhost_work *work, vhost_work_fn_t fn);
+
 /* Poll a file (eventfd or socket) */
 /* Note: there's nothing vhost specific about this structure. */
 struct vhost_poll {
 	poll_table                table;
 	wait_queue_head_t        *wqh;
 	wait_queue_t              wait;
-	struct vhost_work	  work;
+	struct vhost_work	 *work;
 	unsigned long		  mask;
 	struct vhost_dev	 *dev;
 };
 
-void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+void vhost_poll_init(struct vhost_poll *poll, struct vhost_work* work,
 		     unsigned long mask, struct vhost_dev *dev);
 void vhost_poll_start(struct vhost_poll *poll, struct file *file);
 void vhost_poll_stop(struct vhost_poll *poll);
@@ -72,11 +77,12 @@ struct vhost_virtqueue {
 	struct eventfd_ctx *error_ctx;
 	struct eventfd_ctx *log_ctx;
 
+	/* The work to execute when the Guest kicks us,
+	 * on Host activity, or timeout. */
+	struct vhost_work work;
+	/* Poll Guest for kicks */
 	struct vhost_poll poll;
 
-	/* The routine to call when the Guest pings us, or timeout. */
-	vhost_work_fn_t handle_kick;
-
 	/* Last available index we saw. */
 	u16 last_avail_idx;
 
@@ -99,12 +105,7 @@ struct vhost_virtqueue {
 	size_t vhost_hlen;
 	size_t sock_hlen;
 	struct vring_used_elem heads[VHOST_NET_MAX_SG];
-	/* We use a kind of RCU to access private pointer.
-	 * All readers access it from worker, which makes it possible to
-	 * flush the vhost_work instead of synchronize_rcu. Therefore readers do
-	 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
-	 * vhost_work execution acts instead of rcu_read_lock() and the end of
-	 * vhost_work execution acts instead of rcu_read_lock().
+	/* Readers use worker_srcu in device to access private pointer.
 	 * Writers use virtqueue mutex. */
 	void *private_data;
 	/* Log write descriptors */
@@ -112,6 +113,12 @@ struct vhost_virtqueue {
 	struct vhost_log log[VHOST_NET_MAX_SG];
 };
 
+static inline void *vhost_vq_data(struct vhost_virtqueue *vq,
+				  struct vhost_dev *dev)
+{
+	return srcu_dereference(vq->private_data, &dev->worker_srcu);
+}
+
 struct vhost_dev {
 	/* Readers use RCU to access memory table pointer
 	 * log base pointer and features.
@@ -124,12 +131,12 @@ struct vhost_dev {
 	int nvqs;
 	struct file *log_file;
 	struct eventfd_ctx *log_ctx;
-	spinlock_t work_lock;
-	struct list_head work_list;
 	struct task_struct *worker;
+	struct srcu_struct worker_srcu;
 };
 
 long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+void vhost_dev_free(struct vhost_dev *);
 long vhost_dev_check_owner(struct vhost_dev *);
 long vhost_dev_reset_owner(struct vhost_dev *);
 void vhost_dev_cleanup(struct vhost_dev *);
-- 
1.7.2.rc0.14.g41c1c

^ permalink raw reply related

* [PATCH] tcp: cookie transactions setsockopt memory leak
From: Dmitry Popov @ 2010-07-29 11:59 UTC (permalink / raw)
  To: David S. Miller, Alexey Kuznetsov, James Morris, Patrick McHardy
  Cc: Pekka Savola (ipv6), Hideaki YOSHIFUJI, Eric Dumazet,
	Ilpo Järvinen, Andrew Morton, Steven J. Magnani, netdev,
	linux-kernel, William Allen Simpson

From: Dmitry Popov <dp@highloadlab.com>

There is a bug in do_tcp_setsockopt(net/ipv4/tcp.c),
TCP_COOKIE_TRANSACTIONS case.
In some cases (when tp->cookie_values == NULL) new tcp_cookie_values
structure can be allocated (at cvp), but not bound to
tp->cookie_values. So a memory leak occurs.

Signed-off-by: Dmitry Popov <dp@highloadlab.com>
---
tp->cookie_values can be NULL if socket was initialized with
sysctl_tcp_cookie_size == 0 (tcp_v4_init_sock, net/ipv4/tcp_ipv4.c)
Buggy releases: 2.6.33+ (since commit e56fb50f2b7958b931c8a2fc0966061b3f3c8f3a)

 net/ipv4/tcp.c |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 83d0213..9c490a1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2179,6 +2179,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 				      GFP_KERNEL);
 			if (cvp == NULL)
 				return -ENOMEM;
+
+			kref_init(&cvp->kref);
 		}
 		lock_sock(sk);
 		tp->rx_opt.cookie_in_always =
@@ -2193,12 +2195,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 				 */
 				kref_put(&tp->cookie_values->kref,
 					 tcp_cookie_values_release);
-				kref_init(&cvp->kref);
-				tp->cookie_values = cvp;
 			} else {
 				cvp = tp->cookie_values;
 			}
 		}
+
 		if (cvp != NULL) {
 			cvp->cookie_desired = ctd.tcpct_cookie_desired;

@@ -2212,6 +2213,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 				cvp->s_data_desired = ctd.tcpct_s_data_desired;
 				cvp->s_data_constant = 0; /* false */
 			}
+
+			tp->cookie_values = cvp;
 		}
 		release_sock(sk);
 		return err;

^ permalink raw reply related

* Re: can: expected receive behavior broken
From: Oliver Hartkopp @ 2010-07-29 11:16 UTC (permalink / raw)
  To: Matthias Fuchs
  Cc: Socketcan-core-0fE9KPoRgkgATYTw5x5z8w, Linux Netdev List,
	Wolfgang Grandegger
In-Reply-To: <4C514BAE.4070201-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

On 29.07.2010 11:36, Oliver Hartkopp wrote:

> Invoking tst-rcv-own-msgs produces this output, which is far away from the
> correct (wanted) output seen in the commit message below.
> 
> sockopt default
>  s : 0
>  t : 0
>  timeout
> sockopt - -
>  timeout
> sockopt - R
>  timeout
> sockopt L -
>  s : 3
>  t : 3
>  timeout
> sockopt L R
>  s : 4
>  t : 4
>  timeout
> done.
> 
> I'll check that with the latest linux-2.6 (after rebooting :-)

Grr - it is also broken in 2.6.34-rc7-05126-g278554b ...

I'll go and hunt the bad commit in 2.6.34 and will fix it.

Tnx & regards,
Oliver


> It needs a vcan0 virtual CAN network interface and should produce an output
> like this, when invoked:
> 
> sockopt default
>  t : 0
>  timeout
> sockopt - -
>  timeout
> sockopt - R
>  timeout
> sockopt L -
>  t : 3
>  timeout
> sockopt L R
>  s : 4
>  t : 4
>  timeout
> done.
> 

^ permalink raw reply

* [RFC PATCH v8 14/16] Provides multiple submits and asynchronous notifications.
From: xiaohui.xin @ 2010-07-29 11:14 UTC (permalink / raw)
  To: netdev, kvm, linux-kernel, mst, mingo, davem, herbert, jdike; +Cc: Xin Xiaohui
In-Reply-To: <1280402088-5849-14-git-send-email-xiaohui.xin@intel.com>

From: Xin Xiaohui <xiaohui.xin@intel.com>

The vhost-net backend now only supports synchronous send/recv
operations. The patch provides multiple submits and asynchronous
notifications. This is needed for zero-copy case.

Signed-off-by: Xin Xiaohui <xiaohui.xin@intel.com>
---
 drivers/vhost/net.c   |  348 +++++++++++++++++++++++++++++++++++++++++++++----
 drivers/vhost/vhost.c |   79 +++++++++++
 drivers/vhost/vhost.h |   15 ++
 3 files changed, 414 insertions(+), 28 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index b38abc6..c4bc815 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -24,6 +24,8 @@
 #include <linux/if_arp.h>
 #include <linux/if_tun.h>
 #include <linux/if_macvlan.h>
+#include <linux/mpassthru.h>
+#include <linux/aio.h>
 
 #include <net/sock.h>
 
@@ -39,6 +41,8 @@ enum {
 	VHOST_NET_VQ_MAX = 2,
 };
 
+static struct kmem_cache *notify_cache;
+
 enum vhost_net_poll_state {
 	VHOST_NET_POLL_DISABLED = 0,
 	VHOST_NET_POLL_STARTED = 1,
@@ -49,6 +53,7 @@ struct vhost_net {
 	struct vhost_dev dev;
 	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
 	struct vhost_poll poll[VHOST_NET_VQ_MAX];
+	struct kmem_cache       *cache;
 	/* Tells us whether we are polling a socket for TX.
 	 * We only do this when socket buffer fills up.
 	 * Protected by tx vq lock. */
@@ -93,11 +98,190 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock)
 	net->tx_poll_state = VHOST_NET_POLL_STARTED;
 }
 
+struct kiocb *notify_dequeue(struct vhost_virtqueue *vq)
+{
+	struct kiocb *iocb = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vq->notify_lock, flags);
+	if (!list_empty(&vq->notifier)) {
+		iocb = list_first_entry(&vq->notifier,
+				struct kiocb, ki_list);
+		list_del(&iocb->ki_list);
+	}
+	spin_unlock_irqrestore(&vq->notify_lock, flags);
+	return iocb;
+}
+
+static void handle_iocb(struct kiocb *iocb)
+{
+	struct vhost_virtqueue *vq = iocb->private;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vq->notify_lock, flags);
+	list_add_tail(&iocb->ki_list, &vq->notifier);
+	spin_unlock_irqrestore(&vq->notify_lock, flags);
+}
+
+static int is_async_vq(struct vhost_virtqueue *vq)
+{
+	return (vq->link_state == VHOST_VQ_LINK_ASYNC);
+}
+
+static void handle_async_rx_events_notify(struct vhost_net *net,
+					  struct vhost_virtqueue *vq,
+					  struct socket *sock)
+{
+	struct kiocb *iocb = NULL;
+	struct vhost_log *vq_log = NULL;
+	int rx_total_len = 0;
+	unsigned int head, log, in, out;
+	int size;
+	int count;
+
+	struct virtio_net_hdr_mrg_rxbuf hdr = {
+		.hdr.flags = 0,
+		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	};
+
+	if (!is_async_vq(vq))
+		return;
+
+	if (sock->sk->sk_data_ready)
+		sock->sk->sk_data_ready(sock->sk, 0);
+
+	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
+		vq->log : NULL;
+
+	while ((iocb = notify_dequeue(vq)) != NULL) {
+		if (!iocb->ki_left) {
+			vhost_add_used_and_signal(&net->dev, vq,
+					iocb->ki_pos, iocb->ki_nbytes);
+			size = iocb->ki_nbytes;
+			head = iocb->ki_pos;
+			rx_total_len += iocb->ki_nbytes;
+
+			if (iocb->ki_dtor)
+				iocb->ki_dtor(iocb);
+			kmem_cache_free(net->cache, iocb);
+
+			/* when log is enabled, recomputing the log is needed,
+			 * since these buffers are in async queue, may not get
+			 * the log info before.
+			 */
+			if (unlikely(vq_log)) {
+				if (!log)
+					__vhost_get_desc(&net->dev, vq, vq->iov,
+							ARRAY_SIZE(vq->iov),
+							&out, &in, vq_log,
+							&log, head);
+				vhost_log_write(vq, vq_log, log, size);
+			}
+			if (unlikely(rx_total_len >= VHOST_NET_WEIGHT)) {
+				vhost_poll_queue(&vq->poll);
+				break;
+			}
+		} else {
+			int i = 0;
+			int count = iocb->ki_left;
+			int hc = count;
+			while (count--) {
+				if (iocb) {
+					vq->heads[i].id = iocb->ki_pos;
+					vq->heads[i].len = iocb->ki_nbytes;
+					size = iocb->ki_nbytes;
+					head = iocb->ki_pos;
+					rx_total_len += iocb->ki_nbytes;
+
+					if (iocb->ki_dtor)
+						iocb->ki_dtor(iocb);
+					kmem_cache_free(net->cache, iocb);
+
+					if (unlikely(vq_log)) {
+						if (!log)
+							__vhost_get_desc(
+							&net->dev, vq, vq->iov,
+							ARRAY_SIZE(vq->iov),
+							&out, &in, vq_log,
+							&log, head);
+						vhost_log_write(
+							vq, vq_log, log, size);
+					}
+				} else
+					break;
+
+				i++;
+				iocb == NULL;
+				if (count)
+					iocb = notify_dequeue(vq);
+			}
+			vhost_add_used_and_signal_n(
+					&net->dev, vq, vq->heads, hc);
+		}
+	}
+}
+
+static void handle_async_tx_events_notify(struct vhost_net *net,
+					  struct vhost_virtqueue *vq)
+{
+	struct kiocb *iocb = NULL;
+	struct list_head *entry, *tmp;
+	unsigned long flags;
+	int tx_total_len = 0;
+
+	if (!is_async_vq(vq))
+		return;
+
+	spin_lock_irqsave(&vq->notify_lock, flags);
+	list_for_each_safe(entry, tmp, &vq->notifier) {
+		iocb = list_entry(entry,
+				  struct kiocb, ki_list);
+		if (!iocb->ki_flags)
+			continue;
+		list_del(&iocb->ki_list);
+		vhost_add_used_and_signal(&net->dev, vq,
+				iocb->ki_pos, 0);
+		tx_total_len += iocb->ki_nbytes;
+
+		if (iocb->ki_dtor)
+			iocb->ki_dtor(iocb);
+
+		kmem_cache_free(net->cache, iocb);
+		if (unlikely(tx_total_len >= VHOST_NET_WEIGHT)) {
+			vhost_poll_queue(&vq->poll);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&vq->notify_lock, flags);
+}
+
+static struct kiocb *create_iocb(struct vhost_net *net,
+				 struct vhost_virtqueue *vq,
+				 unsigned head)
+{
+	struct kiocb *iocb = NULL;
+
+	if (!is_async_vq(vq))
+		return NULL;
+
+	iocb = kmem_cache_zalloc(net->cache, GFP_KERNEL);
+	if (!iocb)
+		return NULL;
+	iocb->private = vq;
+	iocb->ki_pos = head;
+	iocb->ki_dtor = handle_iocb;
+	if (vq == &net->dev.vqs[VHOST_NET_VQ_RX])
+		iocb->ki_user_data = vq->num;
+	iocb->ki_iovec = vq->hdr;
+	return iocb;
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_tx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+	struct kiocb *iocb = NULL;
 	unsigned head, out, in, s;
 	struct msghdr msg = {
 		.msg_name = NULL,
@@ -130,6 +314,8 @@ static void handle_tx(struct vhost_net *net)
 		tx_poll_stop(net);
 	vhost_hlen = vq->vhost_hlen;
 
+	handle_async_tx_events_notify(net, vq);
+
 	for (;;) {
 		head = vhost_get_desc(&net->dev, vq, vq->iov,
 				      ARRAY_SIZE(vq->iov),
@@ -138,10 +324,13 @@ static void handle_tx(struct vhost_net *net)
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
 			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
-			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
-				tx_poll_start(net, sock);
-				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
-				break;
+			if (!is_async_vq(vq)) {
+				if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
+					tx_poll_start(net, sock);
+					set_bit(SOCK_ASYNC_NOSPACE,
+						&sock->flags);
+					break;
+				}
 			}
 			if (unlikely(vhost_enable_notify(vq))) {
 				vhost_disable_notify(vq);
@@ -158,6 +347,13 @@ static void handle_tx(struct vhost_net *net)
 		s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, out);
 		msg.msg_iovlen = out;
 		len = iov_length(vq->iov, out);
+
+		if (is_async_vq(vq)) {
+			iocb = create_iocb(net, vq, head);
+			if (!iocb)
+				break;
+		}
+
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for TX: "
@@ -166,12 +362,18 @@ static void handle_tx(struct vhost_net *net)
 			break;
 		}
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
-		err = sock->ops->sendmsg(NULL, sock, &msg, len);
+		err = sock->ops->sendmsg(iocb, sock, &msg, len);
 		if (unlikely(err < 0)) {
+			if (is_async_vq(vq))
+				kmem_cache_free(net->cache, iocb);
 			vhost_discard_desc(vq, 1);
 			tx_poll_start(net, sock);
 			break;
 		}
+
+		if (is_async_vq(vq))
+			continue;
+
 		if (err != len)
 			pr_err("Truncated TX packet: "
 			       " len %d != %zd\n", err, len);
@@ -183,6 +385,8 @@ static void handle_tx(struct vhost_net *net)
 		}
 	}
 
+	handle_async_tx_events_notify(net, vq);
+
 	mutex_unlock(&vq->mutex);
 	unuse_mm(net->dev.mm);
 }
@@ -205,7 +409,8 @@ static int vhost_head_len(struct vhost_virtqueue *vq, struct sock *sk)
 static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned in, log, s;
+	struct kiocb *iocb = NULL;
+	unsigned in, out, log, s;
 	struct vhost_log *vq_log;
 	struct msghdr msg = {
 		.msg_name = NULL,
@@ -225,25 +430,42 @@ static void handle_rx(struct vhost_net *net)
 	int err, headcount, datalen;
 	size_t vhost_hlen;
 	struct socket *sock = rcu_dereference(vq->private_data);
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+	if (!sock || (skb_queue_empty(&sock->sk->sk_receive_queue) &&
+		      !is_async_vq(vq)))
 		return;
-
 	use_mm(net->dev.mm);
 	mutex_lock(&vq->mutex);
 	vhost_disable_notify(vq);
 	vhost_hlen = vq->vhost_hlen;
 
+	/* In async cases, when write log is enabled, in case the submitted
+	 * buffers did not get log info before the log enabling, so we'd
+	 * better recompute the log info when needed. We do this in
+	 * handle_async_rx_events_notify().
+	 */
+
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
 
-	while ((datalen = vhost_head_len(vq, sock->sk))) {
-		headcount = vhost_get_desc_n(vq, vq->heads,
-					     datalen + vhost_hlen,
-					     &in, vq_log, &log);
+	handle_async_rx_events_notify(net, vq, sock);
+
+	while (is_async_vq(vq) ||
+		(datalen = vhost_head_len(vq, sock->sk)) != 0) {
+		if (is_async_vq(vq))
+			headcount =
+				vhost_get_desc(&net->dev, vq, vq->iov,
+						ARRAY_SIZE(vq->iov),
+						&out, &in,
+						vq->log, &log);
+		else
+			headcount = vhost_get_desc_n(vq, vq->heads,
+						     datalen + vhost_hlen,
+						     &in, vq_log, &log);
 		if (headcount < 0)
 			break;
 		/* OK, now we need to know about added descriptors. */
-		if (!headcount) {
+		if ((!headcount && !is_async_vq(vq)) ||
+			(headcount == vq->num && is_async_vq(vq))) {
 			if (unlikely(vhost_enable_notify(vq))) {
 				/* They have slipped one in as we were
 				 * doing that: check again. */
@@ -256,7 +478,12 @@ static void handle_rx(struct vhost_net *net)
 		}
 		/* We don't need to be notified again. */
 		/* Skip header. TODO: support TSO. */
+		if (is_async_vq(vq) && vhost_hlen == sizeof(hdr)) {
+			vq->hdr[0].iov_len = vhost_hlen;
+			goto nomove;
+		}
 		s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
+nomove:
 		msg.msg_iovlen = in;
 		len = iov_length(vq->iov, in);
 		/* Sanity check */
@@ -266,13 +493,23 @@ static void handle_rx(struct vhost_net *net)
 			       iov_length(vq->hdr, s), vhost_hlen);
 			break;
 		}
-		err = sock->ops->recvmsg(NULL, sock, &msg,
+		if (is_async_vq(vq)) {
+			iocb = create_iocb(net, vq, headcount);
+			if (!iocb)
+				break;
+		}
+		err = sock->ops->recvmsg(iocb, sock, &msg,
 					 len, MSG_DONTWAIT | MSG_TRUNC);
 		/* TODO: Check specific error and bomb out unless EAGAIN? */
 		if (err < 0) {
+			if (is_async_vq(vq))
+				kmem_cache_free(net->cache, iocb);
 			vhost_discard_desc(vq, headcount);
 			break;
 		}
+		if (is_async_vq(vq))
+			continue;
+
 		if (err != datalen) {
 			pr_err("Discarded rx packet: "
 			       " len %d, expected %zd\n", err, datalen);
@@ -280,6 +517,9 @@ static void handle_rx(struct vhost_net *net)
 			continue;
 		}
 		len = err;
+		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
+			hdr.num_buffers = headcount;
+
 		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr,
 				     vhost_hlen);
 		if (err) {
@@ -287,18 +527,7 @@ static void handle_rx(struct vhost_net *net)
 			       vq->iov->iov_base, err);
 			break;
 		}
-		/* TODO: Should check and handle checksum. */
-		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF)) {
-			struct iovec *iov = vhost_hlen ? vq->hdr : vq->iov;
-
-			if (memcpy_toiovecend(iov, (unsigned char *)&headcount,
-				      offsetof(typeof(hdr), num_buffers),
-				      sizeof(hdr.num_buffers))) {
-				vq_err(vq, "Failed num_buffers write");
-				vhost_discard_desc(vq, headcount);
-				break;
-			}
-		}
+
 		len += vhost_hlen;
 		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
 					    headcount);
@@ -311,6 +540,8 @@ static void handle_rx(struct vhost_net *net)
 		}
 	}
 
+	handle_async_rx_events_notify(net, vq, sock);
+
 	mutex_unlock(&vq->mutex);
 	unuse_mm(net->dev.mm);
 }
@@ -364,6 +595,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT);
 	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN);
 	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+	n->cache = NULL;
 
 	f->private_data = n;
 
@@ -427,6 +659,21 @@ static void vhost_net_flush(struct vhost_net *n)
 	vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
 }
 
+static void vhost_async_cleanup(struct vhost_net *n)
+{
+	/* clean the notifier */
+	struct vhost_virtqueue *vq;
+	struct kiocb *iocb = NULL;
+	if (n->cache) {
+		vq = &n->dev.vqs[VHOST_NET_VQ_RX];
+		while ((iocb = notify_dequeue(vq)) != NULL)
+			kmem_cache_free(n->cache, iocb);
+		vq = &n->dev.vqs[VHOST_NET_VQ_TX];
+		while ((iocb = notify_dequeue(vq)) != NULL)
+			kmem_cache_free(n->cache, iocb);
+	}
+}
+
 static int vhost_net_release(struct inode *inode, struct file *f)
 {
 	struct vhost_net *n = f->private_data;
@@ -443,6 +690,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
 	/* We do an extra flush before freeing memory,
 	 * since jobs can re-queue themselves. */
 	vhost_net_flush(n);
+	vhost_async_cleanup(n);
 	kfree(n);
 	return 0;
 }
@@ -494,21 +742,58 @@ static struct socket *get_tap_socket(int fd)
 	return sock;
 }
 
-static struct socket *get_socket(int fd)
+static struct socket *get_mp_socket(int fd)
+{
+	struct file *file = fget(fd);
+	struct socket *sock;
+	if (!file)
+		return ERR_PTR(-EBADF);
+	sock = mp_get_socket(file);
+	if (IS_ERR(sock))
+		fput(file);
+	return sock;
+}
+
+static struct socket *get_socket(struct vhost_virtqueue *vq, int fd,
+				 enum vhost_vq_link_state *state)
 {
 	struct socket *sock;
 	/* special case to disable backend */
 	if (fd == -1)
 		return NULL;
+
+	*state = VHOST_VQ_LINK_SYNC;
+
 	sock = get_raw_socket(fd);
 	if (!IS_ERR(sock))
 		return sock;
 	sock = get_tap_socket(fd);
 	if (!IS_ERR(sock))
 		return sock;
+	/* If we dont' have notify_cache, then dont do mpassthru */
+	if (!notify_cache)
+		return ERR_PTR(-ENOTSOCK);
+	sock = get_mp_socket(fd);
+	if (!IS_ERR(sock)) {
+		*state = VHOST_VQ_LINK_ASYNC;
+		return sock;
+	}
 	return ERR_PTR(-ENOTSOCK);
 }
 
+static void vhost_init_link_state(struct vhost_net *n, int index)
+{
+	struct vhost_virtqueue *vq = n->vqs + index;
+
+	WARN_ON(!mutex_is_locked(&vq->mutex));
+	if (vq->link_state == VHOST_VQ_LINK_ASYNC) {
+		INIT_LIST_HEAD(&vq->notifier);
+		spin_lock_init(&vq->notify_lock);
+		if (!n->cache)
+			n->cache = notify_cache;
+	}
+}
+
 static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 {
 	struct socket *sock, *oldsock;
@@ -532,12 +817,14 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 		r = -EFAULT;
 		goto err_vq;
 	}
-	sock = get_socket(fd);
+	sock = get_socket(vq, fd, &vq->link_state);
 	if (IS_ERR(sock)) {
 		r = PTR_ERR(sock);
 		goto err_vq;
 	}
 
+	vhost_init_link_state(n, index);
+
 	/* start polling new socket */
 	oldsock = vq->private_data;
 	if (sock == oldsock)
@@ -687,6 +974,9 @@ int vhost_net_init(void)
 	r = misc_register(&vhost_net_misc);
 	if (r)
 		goto err_reg;
+	notify_cache = kmem_cache_create("vhost_kiocb",
+					sizeof(struct kiocb), 0,
+					SLAB_HWCACHE_ALIGN, NULL);
 	return 0;
 err_reg:
 	vhost_cleanup();
@@ -700,6 +990,8 @@ void vhost_net_exit(void)
 {
 	misc_deregister(&vhost_net_misc);
 	vhost_cleanup();
+	if (notify_cache)
+		kmem_cache_destroy(notify_cache);
 }
 module_exit(vhost_net_exit);
 
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 118c8e0..66ff5c5 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -909,6 +909,85 @@ err:
 	return r;
 }
 
+unsigned __vhost_get_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+			   struct iovec iov[], unsigned int iov_size,
+			   unsigned int *out_num, unsigned int *in_num,
+			   struct vhost_log *log, unsigned int *log_num,
+			   unsigned int head)
+{
+	struct vring_desc desc;
+	unsigned int i, found = 0;
+	u16 last_avail_idx;
+	int ret;
+
+	/* When we start there are none of either input nor output. */
+	*out_num = *in_num = 0;
+	if (unlikely(log))
+		*log_num = 0;
+
+	i = head;
+	do {
+		unsigned iov_count = *in_num + *out_num;
+		if (i >= vq->num) {
+			vq_err(vq, "Desc index is %u > %u, head = %u",
+			       i, vq->num, head);
+			return vq->num;
+		}
+		if (++found > vq->num) {
+			vq_err(vq, "Loop detected: last one at %u "
+			       "vq size %u head %u\n",
+			       i, vq->num, head);
+			return vq->num;
+		}
+		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+		if (ret) {
+			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+			       i, vq->desc + i);
+			return vq->num;
+		}
+		if (desc.flags & VRING_DESC_F_INDIRECT) {
+			ret = get_indirect(dev, vq, iov, iov_size,
+					   out_num, in_num,
+					   log, log_num, &desc);
+			if (ret < 0) {
+				vq_err(vq, "Failure detected "
+				       "in indirect descriptor at idx %d\n", i);
+				return vq->num;
+			}
+			continue;
+		}
+
+		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+				     iov_size - iov_count);
+		if (ret < 0) {
+			vq_err(vq, "Translation failure %d descriptor idx %d\n",
+			       ret, i);
+			return vq->num;
+		}
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			/* If this is an input descriptor,
+			 * increment that count. */
+			*in_num += ret;
+			if (unlikely(log)) {
+				log[*log_num].addr = desc.addr;
+				log[*log_num].len = desc.len;
+				++*log_num;
+			}
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (*in_num) {
+				vq_err(vq, "Descriptor has out after in: "
+				       "idx %d\n", i);
+				return vq->num;
+			}
+			*out_num += ret;
+		}
+	} while ((i = next_desc(&desc)) != -1);
+
+	return head;
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 08d740a..54c6d0b 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -43,6 +43,11 @@ struct vhost_log {
 	u64 len;
 };
 
+enum vhost_vq_link_state {
+	VHOST_VQ_LINK_SYNC = 0,
+	VHOST_VQ_LINK_ASYNC = 1,
+};
+
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
 	struct vhost_dev *dev;
@@ -98,6 +103,10 @@ struct vhost_virtqueue {
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log log[VHOST_NET_MAX_SG];
+	/* Differiate async socket for 0-copy from normal */
+	enum vhost_vq_link_state link_state;
+	struct list_head notifier;
+	spinlock_t notify_lock;
 };
 
 struct vhost_dev {
@@ -125,6 +134,11 @@ int vhost_log_access_ok(struct vhost_dev *);
 int vhost_get_desc_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
 		     int datalen, unsigned int *iovcount, struct vhost_log *log,
 		     unsigned int *log_num);
+unsigned __vhost_get_desc(struct vhost_dev *, struct vhost_virtqueue *,
+			struct iovec iov[], unsigned int iov_count,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vhost_log *log, unsigned int *log_num,
+			unsigned int head);
 unsigned vhost_get_desc(struct vhost_dev *, struct vhost_virtqueue *,
 			   struct iovec iov[], unsigned int iov_count,
 			   unsigned int *out_num, unsigned int *in_num,
@@ -165,6 +179,7 @@ enum {
 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
 {
 	unsigned acked_features = rcu_dereference(dev->acked_features);
+	acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
 	return acked_features & (1 << bit);
 }
 
-- 
1.5.4.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox