Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 3/4] smsc75xx: enable power saving mode during system suspend
From: Steve Glendinning @ 2012-09-28 10:57 UTC (permalink / raw)
  To: netdev; +Cc: Steve Glendinning
In-Reply-To: <1348829873-30779-1-git-send-email-steve.glendinning@shawell.net>

This patch instructs the device to enter its lowest power SUSPEND2
state during system suspend.

This patch also explicitly wakes the device after resume, which
should address reports of the device not automatically coming
back after system suspend:

Patch updated to change BUG_ON to WARN_ON_ONCE.

http://code.google.com/p/chromium-os/issues/detail?id=31871

Signed-off-by: Steve Glendinning <steve.glendinning@shawell.net>
---
 drivers/net/usb/smsc75xx.c |   57 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 1f45f7b..759e5770 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1106,6 +1106,57 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 	}
 }
 
+static int smsc75xx_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct usbnet *dev = usb_get_intfdata(intf);
+	int ret;
+	u32 val;
+
+	if (WARN_ON_ONCE(!dev))
+		return -EINVAL;
+
+	ret = usbnet_suspend(intf, message);
+	check_warn_return(ret, "usbnet_suspend error");
+
+	netdev_info(dev->net, "entering SUSPEND2 mode");
+
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+	check_warn_return(ret, "Error reading PMT_CTL");
+
+	val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST);
+	val |= PMT_CTL_SUS_MODE_2;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+	check_warn_return(ret, "Error writing PMT_CTL");
+
+	return 0;
+}
+
+static int smsc75xx_resume(struct usb_interface *intf)
+{
+	struct usbnet *dev = usb_get_intfdata(intf);
+	int ret;
+	u32 val;
+
+	if (WARN_ON_ONCE(!dev))
+		return -EINVAL;
+
+	netdev_info(dev->net, "resuming from SUSPEND2");
+
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+	check_warn_return(ret, "Error reading PMT_CTL");
+
+	val |= PMT_CTL_PHY_PWRUP;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+	check_warn_return(ret, "Error writing PMT_CTL");
+
+	ret = smsc75xx_wait_ready(dev);
+	check_warn_return(ret, "device not ready in smsc75xx_resume");
+
+	return usbnet_resume(intf);
+}
+
 static void smsc75xx_rx_csum_offload(struct usbnet *dev, struct sk_buff *skb,
 				     u32 rx_cmd_a, u32 rx_cmd_b)
 {
@@ -1274,9 +1325,9 @@ static struct usb_driver smsc75xx_driver = {
 	.name		= SMSC_CHIPNAME,
 	.id_table	= products,
 	.probe		= usbnet_probe,
-	.suspend	= usbnet_suspend,
-	.resume		= usbnet_resume,
-	.reset_resume	= usbnet_resume,
+	.suspend	= smsc75xx_suspend,
+	.resume		= smsc75xx_resume,
+	.reset_resume	= smsc75xx_resume,
 	.disconnect	= usbnet_disconnect,
 	.disable_hub_initiated_lpm = 1,
 };
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 4/4] smsc75xx: add wol magic packet support
From: Steve Glendinning @ 2012-09-28 10:57 UTC (permalink / raw)
  To: netdev; +Cc: Steve Glendinning
In-Reply-To: <1348829873-30779-1-git-send-email-steve.glendinning@shawell.net>

This patch enables wake from system suspend on magic packet.

Patch updated to change BUG_ON to WARN_ON_ONCE.

Signed-off-by: Steve Glendinning <steve.glendinning@shawell.net>
---
 drivers/net/usb/smsc75xx.c |  188 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 174 insertions(+), 14 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 759e5770..b77ae76 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -52,6 +52,7 @@
 #define USB_PRODUCT_ID_LAN7500		(0x7500)
 #define USB_PRODUCT_ID_LAN7505		(0x7505)
 #define RXW_PADDING			2
+#define SUPPORTED_WAKE			(WAKE_MAGIC)
 
 #define check_warn(ret, fmt, args...) \
 	({ if (ret < 0) netdev_warn(dev->net, fmt, ##args); })
@@ -65,6 +66,7 @@
 struct smsc75xx_priv {
 	struct usbnet *dev;
 	u32 rfe_ctl;
+	u32 wolopts;
 	u32 multicast_hash_table[DP_SEL_VHF_HASH_LEN];
 	struct mutex dataport_mutex;
 	spinlock_t rfe_ctl_lock;
@@ -135,6 +137,30 @@ static int __must_check smsc75xx_write_reg(struct usbnet *dev, u32 index,
 	return ret;
 }
 
+static int smsc75xx_set_feature(struct usbnet *dev, u32 feature)
+{
+	if (WARN_ON_ONCE(!dev))
+		return -EINVAL;
+
+	cpu_to_le32s(&feature);
+
+	return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+		USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0,
+		USB_CTRL_SET_TIMEOUT);
+}
+
+static int smsc75xx_clear_feature(struct usbnet *dev, u32 feature)
+{
+	if (WARN_ON_ONCE(!dev))
+		return -EINVAL;
+
+	cpu_to_le32s(&feature);
+
+	return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+		USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0,
+		USB_CTRL_SET_TIMEOUT);
+}
+
 /* Loop until the read is completed with timeout
  * called with phy_mutex held */
 static int smsc75xx_phy_wait_not_busy(struct usbnet *dev)
@@ -578,6 +604,26 @@ static int smsc75xx_ethtool_set_eeprom(struct net_device *netdev,
 	return smsc75xx_write_eeprom(dev, ee->offset, ee->len, data);
 }
 
+static void smsc75xx_ethtool_get_wol(struct net_device *net,
+				     struct ethtool_wolinfo *wolinfo)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
+
+	wolinfo->supported = SUPPORTED_WAKE;
+	wolinfo->wolopts = pdata->wolopts;
+}
+
+static int smsc75xx_ethtool_set_wol(struct net_device *net,
+				    struct ethtool_wolinfo *wolinfo)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
+
+	pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE;
+	return 0;
+}
+
 static const struct ethtool_ops smsc75xx_ethtool_ops = {
 	.get_link	= usbnet_get_link,
 	.nway_reset	= usbnet_nway_reset,
@@ -589,6 +635,8 @@ static const struct ethtool_ops smsc75xx_ethtool_ops = {
 	.get_eeprom_len	= smsc75xx_ethtool_get_eeprom_len,
 	.get_eeprom	= smsc75xx_ethtool_get_eeprom,
 	.set_eeprom	= smsc75xx_ethtool_set_eeprom,
+	.get_wol	= smsc75xx_ethtool_get_wol,
+	.set_wol	= smsc75xx_ethtool_set_wol,
 };
 
 static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -1109,47 +1157,159 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 static int smsc75xx_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct usbnet *dev = usb_get_intfdata(intf);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	int ret;
 	u32 val;
 
-	if (WARN_ON_ONCE(!dev))
-		return -EINVAL;
-
 	ret = usbnet_suspend(intf, message);
 	check_warn_return(ret, "usbnet_suspend error");
 
-	netdev_info(dev->net, "entering SUSPEND2 mode");
+	/* if no wol options set, enter lowest power SUSPEND2 mode */
+	if (!(pdata->wolopts & SUPPORTED_WAKE)) {
+		netdev_info(dev->net, "entering SUSPEND2 mode");
+
+		/* disable energy detect (link up) & wake up events */
+		ret = smsc75xx_read_reg(dev, WUCSR, &val);
+		check_warn_return(ret, "Error reading WUCSR");
+
+		val &= ~(WUCSR_MPEN | WUCSR_WUEN);
+
+		ret = smsc75xx_write_reg(dev, WUCSR, val);
+		check_warn_return(ret, "Error writing WUCSR");
+
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val &= ~(PMT_CTL_ED_EN | PMT_CTL_WOL_EN);
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+
+		/* enter suspend2 mode */
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST);
+		val |= PMT_CTL_SUS_MODE_2;
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+
+		return 0;
+	}
+
+	if (pdata->wolopts & WAKE_MAGIC) {
+		/* clear any pending magic packet status */
+		ret = smsc75xx_read_reg(dev, WUCSR, &val);
+		check_warn_return(ret, "Error reading WUCSR");
+
+		val |= WUCSR_MPR;
 
+		ret = smsc75xx_write_reg(dev, WUCSR, val);
+		check_warn_return(ret, "Error writing WUCSR");
+	}
+
+	/* enable/disable magic packup wake */
+	ret = smsc75xx_read_reg(dev, WUCSR, &val);
+	check_warn_return(ret, "Error reading WUCSR");
+
+	if (pdata->wolopts & WAKE_MAGIC) {
+		netdev_info(dev->net, "enabling magic packet wakeup");
+		val |= WUCSR_MPEN;
+	} else {
+		netdev_info(dev->net, "disabling magic packet wakeup");
+		val &= ~WUCSR_MPEN;
+	}
+
+	ret = smsc75xx_write_reg(dev, WUCSR, val);
+	check_warn_return(ret, "Error writing WUCSR");
+
+	/* enable wol wakeup source */
 	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
 	check_warn_return(ret, "Error reading PMT_CTL");
 
-	val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST);
-	val |= PMT_CTL_SUS_MODE_2;
+	val |= PMT_CTL_WOL_EN;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+	check_warn_return(ret, "Error writing PMT_CTL");
+
+	/* enable receiver */
+	ret = smsc75xx_read_reg(dev, MAC_RX, &val);
+	check_warn_return(ret, "Failed to read MAC_RX: %d", ret);
+
+	val |= MAC_RX_RXEN;
+
+	ret = smsc75xx_write_reg(dev, MAC_RX, val);
+	check_warn_return(ret, "Failed to write MAC_RX: %d", ret);
+
+	/* some wol options are enabled, so enter SUSPEND0 */
+	netdev_info(dev->net, "entering SUSPEND0 mode");
+
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+	check_warn_return(ret, "Error reading PMT_CTL");
+
+	val &= (~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST));
+	val |= PMT_CTL_SUS_MODE_0;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+	check_warn_return(ret, "Error writing PMT_CTL");
 
+	/* clear wol status */
+	val &= ~PMT_CTL_WUPS;
+	val |= PMT_CTL_WUPS_WOL;
 	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
 	check_warn_return(ret, "Error writing PMT_CTL");
 
+	/* read back PMT_CTL */
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+	check_warn_return(ret, "Error reading PMT_CTL");
+
+	smsc75xx_set_feature(dev, USB_DEVICE_REMOTE_WAKEUP);
+
 	return 0;
 }
 
 static int smsc75xx_resume(struct usb_interface *intf)
 {
 	struct usbnet *dev = usb_get_intfdata(intf);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	int ret;
 	u32 val;
 
-	if (WARN_ON_ONCE(!dev))
-		return -EINVAL;
+	if (pdata->wolopts & WAKE_MAGIC) {
+		netdev_info(dev->net, "resuming from SUSPEND0");
 
-	netdev_info(dev->net, "resuming from SUSPEND2");
+		smsc75xx_clear_feature(dev, USB_DEVICE_REMOTE_WAKEUP);
 
-	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
-	check_warn_return(ret, "Error reading PMT_CTL");
+		/* Disable magic packup wake */
+		ret = smsc75xx_read_reg(dev, WUCSR, &val);
+		check_warn_return(ret, "Error reading WUCSR");
 
-	val |= PMT_CTL_PHY_PWRUP;
+		val &= ~WUCSR_MPEN;
 
-	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
-	check_warn_return(ret, "Error writing PMT_CTL");
+		ret = smsc75xx_write_reg(dev, WUCSR, val);
+		check_warn_return(ret, "Error writing WUCSR");
+
+		/* clear wake-up status */
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val &= ~PMT_CTL_WOL_EN;
+		val |= PMT_CTL_WUPS;
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+	} else {
+		netdev_info(dev->net, "resuming from SUSPEND2");
+
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val |= PMT_CTL_PHY_PWRUP;
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+	}
 
 	ret = smsc75xx_wait_ready(dev);
 	check_warn_return(ret, "device not ready in smsc75xx_resume");
-- 
1.7.9.5

^ permalink raw reply related

* Re: [PATCH 3/3] net/mlx4_en: Add HW timestamping (TS) support
From: Richard Cochran @ 2012-09-28 11:11 UTC (permalink / raw)
  To: Yevgeny Petrilin; +Cc: davem, netdev, eugenia
In-Reply-To: <1348826603-17439-4-git-send-email-yevgenyp@mellanox.com>

On Fri, Sep 28, 2012 at 12:03:23PM +0200, Yevgeny Petrilin wrote:
> From: Eugenia Emantayev <eugenia@mellanox.co.il>
> 

...

> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index edd9cb8..10fa453 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -1517,6 +1517,60 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
>  	return 0;
>  }
>  
> +static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
> +{
> +	struct mlx4_en_priv *priv = netdev_priv(dev);
> +	struct mlx4_en_dev *mdev = priv->mdev;
> +	struct hwtstamp_config config;
> +
> +	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
> +		return -EFAULT;
> +
> +	/* reserved for future extensions */
> +	if (config.flags)
> +		return -EINVAL;
> +
> +	/* device doesn't support time stamping */
> +	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS_EN))
> +		return -EINVAL;
> +
> +	/* TX HW timestamp */
> +	switch (config.tx_type) {
> +	case HWTSTAMP_TX_OFF:
> +	case HWTSTAMP_TX_ON:
> +		break;
> +	default:
> +		return -ERANGE;
> +	}
> +
> +	/* RX HW timestamp */
> +	switch (config.rx_filter) {
> +	case HWTSTAMP_FILTER_NONE:
> +	case HWTSTAMP_FILTER_ALL:
> +		break;
> +	default:

Instead of rejecting the HWTSTAMP_FILTER_PTP_ codes out of hand, you
should just accept them, and return by promoting rx_filter to
HWTSTAMP_FILTER_ALL.

[ See Documentation/networking/timestamping.txt ]

> +		return -ERANGE;
> +	}
> +
> +	if (mlx4_en_timestamp_config(dev, config.tx_type, config.rx_filter)) {
> +		config.tx_type = HWTSTAMP_TX_OFF;
> +		config.rx_filter = HWTSTAMP_FILTER_NONE;
> +	}
> +
> +	return copy_to_user(ifr->ifr_data, &config,
> +			    sizeof(config)) ? -EFAULT : 0;
> +}

...

> @@ -363,6 +368,9 @@ struct mlx4_en_dev {
>  	u32                     priv_pdn;
>  	spinlock_t              uar_lock;
>  	u8			mac_removed[MLX4_MAX_PORTS + 1];
> +	struct cyclecounter	cycles;
> +	struct timecounter	clock;
> +	struct timecompare	compare;

I am working on a patch to remove the timecompare stuff altogether
(after removing it from blackfin). It is and was a bad idea, I would
hate to see new drivers using it.

I strongly recommend just offering raw hardware time stamps in
nanosecond resolution. Also, why not expose your device as a PTP
Hardware Clock?

Thanks,
Richard

^ permalink raw reply

* Re: Possible networking regression in 3.6.0
From: Eric Dumazet @ 2012-09-28 11:26 UTC (permalink / raw)
  To: Chris Clayton; +Cc: David Miller, netdev, gpiez
In-Reply-To: <50656C4A.8090302@googlemail.com>

On Fri, 2012-09-28 at 10:22 +0100, Chris Clayton wrote:

> No, the WinXP guest is configured with a fixed IP address 
> (192.168.200.1). Subnet mask is 255.255.255.0, and default gateway is 
> 192.168.200.254. DNS is 192.168.0.1.
> 

I have no problem with such a setup, with a linux guest.

Could you send again a tcpdump, but including link-level header ?
(option -e)

Ideally, you could send two traces, one taken on tap0, and another taken
on eth0.

^ permalink raw reply

* Re: You have to fix this
From: Vipul Pandya @ 2012-09-28 11:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <20120927.183406.962014014858457666.davem@davemloft.net>

On 28-09-2012 04:04, David Miller wrote:
> 
> You cannot put such monster sized local data objects on the stack:
> 
> drivers/net/ethernet/chelsio/cxgb4/t4_hw.c: In function ‘t4_memory_rw.constprop.6’:
> drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:484:1: warning: the frame size of 2056 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> 
> That's because of this thing:
> 
> 		__be32 data[MEMWIN0_APERTURE/sizeof(__be32)];
> 
> I'm really surprised this didn't show up in any of your test builds.
> Or are you ignoring warnings that your changes add?
> 

Thanks for pointing this out. We will send a patch for this soon.
I am not ignoring any warnings. I did build my tree different way as
shown below but did not get above warning message.
#> make
#> make allmodconfig
#> make allnoconfig

Please let me know how else would I get above warning message?

I missed doing make checkstack which doesn't display warning message but
lists the function names.

Thanks,
Vipul Pandya

^ permalink raw reply

* [PATCH] vlan: Make it possible to add vlan with id 4095
From: Paulius Zaleckas @ 2012-09-28 12:32 UTC (permalink / raw)
  To: kaber, netdev

vconfig help tells that vlan_id should be 0-4095, but fails
with 4095.

There is an off-by-one bug while evaluating vlan_id.
Fix it by evaluating against count(4096), not mask(0x0fff = 4095).

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
---

 net/8021q/vlan.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9096bcb..9e528bf 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -199,7 +199,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	char name[IFNAMSIZ];
 	int err;
 
-	if (vlan_id >= VLAN_VID_MASK)
+	if (vlan_id >= VLAN_N_VID)
 		return -ERANGE;
 
 	err = vlan_check_real_dev(real_dev, vlan_id);

^ permalink raw reply related

* drivers/net/cris/eth_v10.c:1715:2: error: too many arguments to function 'e100rxtx_interrupt'
From: Fengguang Wu @ 2012-09-28 13:06 UTC (permalink / raw)
  To: Jesper Nilsson; +Cc: kernel-janitors, netdev

Hi Jesper,

FYI, a rather old build bug that's introduced by

bafef0a cris build fixes: update eth_v10.c ethernet driver

All error/warnings:

drivers/net/cris/eth_v10.c: In function 'e100_netpoll':
drivers/net/cris/eth_v10.c:1715:2: error: too many arguments to function 'e100rxtx_interrupt'
drivers/net/cris/eth_v10.c:1131:1: note: declared here

vim +1715 drivers/net/cris/eth_v10.c

^1da177e (Linus Torvalds 2005-04-16  1710) 
bafef0ae (Jesper Nilsson 2007-11-14  1711) #ifdef CONFIG_NET_POLL_CONTROLLER
bafef0ae (Jesper Nilsson 2007-11-14  1712) static void
bafef0ae (Jesper Nilsson 2007-11-14  1713) e100_netpoll(struct net_device* netdev)
bafef0ae (Jesper Nilsson 2007-11-14  1714) {
bafef0ae (Jesper Nilsson 2007-11-14 @1715) 	e100rxtx_interrupt(NETWORK_DMA_TX_IRQ_NBR, netdev, NULL);
bafef0ae (Jesper Nilsson 2007-11-14  1716) }
bafef0ae (Jesper Nilsson 2007-11-14  1717) #endif
bafef0ae (Jesper Nilsson 2007-11-14  1718) 
^1da177e (Linus Torvalds 2005-04-16  1719) static int
^1da177e (Linus Torvalds 2005-04-16  1720) etrax_init_module(void)
^1da177e (Linus Torvalds 2005-04-16  1721) {
^1da177e (Linus Torvalds 2005-04-16  1722) 	return etrax_ethernet_init();
^1da177e (Linus Torvalds 2005-04-16  1723) }

---
0-DAY kernel build testing backend         Open Source Technology Centre
Fengguang Wu, Yuanhan Liu                              Intel Corporation

^ permalink raw reply

* Re: [RFC PATCH net-next] tcp: introduce tcp_tw_interval to specifiy the time of TIME-WAIT
From: Neil Horman @ 2012-09-28 13:16 UTC (permalink / raw)
  To: Cong Wang
  Cc: netdev, David S. Miller, Alexey Kuznetsov, Patrick McHardy,
	Eric Dumazet
In-Reply-To: <1348813987.7264.41.camel@cr0>

On Fri, Sep 28, 2012 at 02:33:07PM +0800, Cong Wang wrote:
> On Thu, 2012-09-27 at 10:23 -0400, Neil Horman wrote:
> > On Thu, Sep 27, 2012 at 04:41:01PM +0800, Cong Wang wrote:
> > > Some customer requests this feature, as they stated:
> > > 
> > > 	"This parameter is necessary, especially for software that continually 
> > >         creates many ephemeral processes which open sockets, to avoid socket 
> > >         exhaustion. In many cases, the risk of the exhaustion can be reduced by 
> > >         tuning reuse interval to allow sockets to be reusable earlier.
> > > 
> > >         In commercial Unix systems, this kind of parameters, such as 
> > >         tcp_timewait in AIX and tcp_time_wait_interval in HP-UX, have 
> > >         already been available. Their implementations allow users to tune 
> > >         how long they keep TCP connection as TIME-WAIT state on the 
> > >         millisecond time scale."
> > > 
> > > We indeed have "tcp_tw_reuse" and "tcp_tw_recycle", but these tunings
> > > are not equivalent in that they cannot be tuned directly on the time
> > > scale nor in a safe way, as some combinations of tunings could still
> > > cause some problem in NAT. And, I think second scale is enough, we don't
> > > have to make it in millisecond time scale.
> > > 
> > I think I have a little difficultly seeing how this does anything other than
> > pay lip service to actually having sockets spend time in TIME_WAIT state.  That
> > is to say, while I see users using this to just make the pain stop.  If we wait
> > less time than it takes to be sure that a connection isn't being reused (either
> > by waiting two segment lifetimes, or by checking timestamps), then you might as
> > well not wait at all.  I see how its tempting to be able to say "Just don't wait
> > as long", but it seems that theres no difference between waiting half as long as
> > the RFC mandates, and waiting no time at all.  Neither is a good idea.
> 
> I don't think reducing TIME_WAIT is a good idea either, but there must
> be some reason behind as several UNIX provides a microsecond-scale
> tuning interface, or maybe in non-recycle mode, their RTO is much less
> than 2*MSL?
> 
My guess?  Cash was the reason.  I certainly wasn't there for any of those
developments, but a setting like this just smells to me like some customer waved
some cash under IBM's/HP's/Sun's nose and said, "We'd like to get our tcp
sockets back to CLOSED state faster, what can you do for us?"

> > 
> > Given the problem you're trying to solve here, I'll ask the standard question in
> > response: How does using SO_REUSEADDR not solve the problem?  Alternatively, in
> > a pinch, why not reduce the tcp_max_tw_buckets sufficiently to start forcing
> > TIME_WAIT sockets back into CLOSED state?
> > 
> > The code looks fine, but the idea really doesn't seem like a good plan to me.
> > I'm sure HPUX/Solaris/AIX/etc have done this in response to customer demand, but
> > that doesn't make it the right solution.
> > 
> 
> *I think* the customer doesn't want to modify their applications, so
> that is why they don't use SO_REUSERADDR.
> 
Well, ok, thats a legitimate distro problem.  What its not is an upstream
problem.  Fixing the appilcation is the right thing to do, wether or not they
want to. 

> I didn't know tcp_max_tw_buckets can do the trick, nor the customer, so
> this is a side effect of tcp_max_tw_buckets? Is it documented?
man 7 tcp:
tcp_max_tw_buckets (integer; default: see below; since Linux 2.4)
	The maximum number of sockets in TIME_WAIT state allowed in the
	system.  This limit exists only  to  prevent  simple
	denial-of-service attacks.   The  default  value of NR_FILE*2 is
        adjusted depending on the memory in the system.  If this number
	is exceeded, the socket is closed and a warning is printed.

Neil

^ permalink raw reply

* [PATCH] flexcan: disable bus error interrupts for the i.MX28
From: Wolfgang Grandegger @ 2012-09-28 13:17 UTC (permalink / raw)
  To: Linux Netdev List; +Cc: Linux-CAN, Hui Wang, Shawn Guo

Due to a bug in most Flexcan cores, the bus error interrupt needs
to be enabled. Otherwise we don't get any error warning or passive
interrupts. This is _not_ necessay for the i.MX28 and this patch
disables bus error interrupts if "berr-reporting" is not requested.
This avoids bus error flooding, which might harm, especially on
low-end systems.

To handle such quirks of the Flexcan cores, a hardware feature flag
has been introduced, also replacing the "hw_ver" variable. So far
nobody could tell what Flexcan core version is available on what
Freescale SOC, apart from the i.MX6Q and P1010, and which bugs or
features are present on the various "hw_rev".

CC: Hui Wang <jason77.wang@gmail.com>
CC: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
---

Concerning the bug, I know that the i.MX35 does have it. Maybe other
Flexcan cores than on the i.MX28 does *not* have it either. If you
have a chance, please check on the P1010, i.MX6Q, i.MX51, i.MX53,
etc.

Wolfgang.


 drivers/net/can/flexcan.c |   29 +++++++++++++++++++----------
 1 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index c5f1431..c78ecfc 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -144,6 +144,10 @@
 
 #define FLEXCAN_MB_CODE_MASK		(0xf0ffffff)
 
+/* FLEXCAN hardware feature flags */
+#define FLEXCAN_HAS_V10_FEATURES	BIT(1) /* For core version >= 10 */
+#define FLEXCAN_HAS_BROKEN_ERR_STATE	BIT(2) /* Broken error state handling */
+
 /* Structure of the message buffer */
 struct flexcan_mb {
 	u32 can_ctrl;
@@ -178,7 +182,7 @@ struct flexcan_regs {
 };
 
 struct flexcan_devtype_data {
-	u32 hw_ver;	/* hardware controller version */
+	u32 features;	/* hardware controller features */
 };
 
 struct flexcan_priv {
@@ -197,11 +201,11 @@ struct flexcan_priv {
 };
 
 static struct flexcan_devtype_data fsl_p1010_devtype_data = {
-	.hw_ver = 3,
+	.features = FLEXCAN_HAS_BROKEN_ERR_STATE,
 };
-
+static struct flexcan_devtype_data fsl_imx28_devtype_data;
 static struct flexcan_devtype_data fsl_imx6q_devtype_data = {
-	.hw_ver = 10,
+	.features = FLEXCAN_HAS_V10_FEATURES | FLEXCAN_HAS_BROKEN_ERR_STATE,
 };
 
 static const struct can_bittiming_const flexcan_bittiming_const = {
@@ -741,15 +745,19 @@ static int flexcan_chip_start(struct net_device *dev)
 	 * enable tx and rx warning interrupt
 	 * enable bus off interrupt
 	 * (== FLEXCAN_CTRL_ERR_STATE)
-	 *
-	 * _note_: we enable the "error interrupt"
-	 * (FLEXCAN_CTRL_ERR_MSK), too. Otherwise we don't get any
-	 * warning or bus passive interrupts.
 	 */
 	reg_ctrl = flexcan_read(&regs->ctrl);
 	reg_ctrl &= ~FLEXCAN_CTRL_TSYN;
 	reg_ctrl |= FLEXCAN_CTRL_BOFF_REC | FLEXCAN_CTRL_LBUF |
-		FLEXCAN_CTRL_ERR_STATE | FLEXCAN_CTRL_ERR_MSK;
+		FLEXCAN_CTRL_ERR_STATE;
+	/*
+	 * enable the "error interrupt" (FLEXCAN_CTRL_ERR_MSK),
+	 * on most Flexcan cores, too. Otherwise we don't get
+	 * any error warning or passive interrupts.
+	 */
+	if (priv->devtype_data->features & FLEXCAN_HAS_BROKEN_ERR_STATE ||
+	    priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+		reg_ctrl |= FLEXCAN_CTRL_ERR_MSK;
 
 	/* save for later use */
 	priv->reg_ctrl_default = reg_ctrl;
@@ -772,7 +780,7 @@ static int flexcan_chip_start(struct net_device *dev)
 	flexcan_write(0x0, &regs->rx14mask);
 	flexcan_write(0x0, &regs->rx15mask);
 
-	if (priv->devtype_data->hw_ver >= 10)
+	if (priv->devtype_data->features & FLEXCAN_HAS_V10_FEATURES)
 		flexcan_write(0x0, &regs->rxfgmask);
 
 	flexcan_transceiver_switch(priv, 1);
@@ -954,6 +962,7 @@ static void __devexit unregister_flexcandev(struct net_device *dev)
 
 static const struct of_device_id flexcan_of_match[] = {
 	{ .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, },
+	{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
 	{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
 	{ /* sentinel */ },
 };
-- 
1.7.7.6


^ permalink raw reply related

* Re: [PATCH] vlan: Make it possible to add vlan with id 4095
From: Paulius Zaleckas @ 2012-09-28 13:29 UTC (permalink / raw)
  To: kaber, netdev
In-Reply-To: <20120928123258.9454.95197.stgit@localhost.localdomain>

On 09/28/2012 03:32 PM, Paulius Zaleckas wrote:
> vconfig help tells that vlan_id should be 0-4095, but fails
> with 4095.
>
> There is an off-by-one bug while evaluating vlan_id.
> Fix it by evaluating against count(4096), not mask(0x0fff = 4095).

On the other hand 4095 is reserved by 802.1Q...

http://en.wikipedia.org/wiki/IEEE_802.1Q
VLAN Identifier (VID): a 12-bit field specifying the VLAN to which the 
frame belongs. The hexadecimal values of 0x000 and 0xFFF are reserved. 
All other values may be used as VLAN identifiers, allowing up to 4,094 
VLANs. The reserved value 0x000 indicates that the frame does not belong 
to any VLAN; in this case, the 802.1Q tag specifies only a priority and 
is referred to as a priority tag.

So maybe we should fix vconfig help?

^ permalink raw reply

* Re: [PATCH net-next 3/3] ipv4: gre: add GRO capability
From: Eric Dumazet @ 2012-09-28 14:04 UTC (permalink / raw)
  To: Jesse Gross; +Cc: David Miller, netdev
In-Reply-To: <CAEP_g=8B7xZPxye0Kuu-EVKpTDt1a3nsJKb61aaYaqOGsYGx8w@mail.gmail.com>

On Thu, 2012-09-27 at 15:03 -0700, Jesse Gross wrote:

> We wouldn't actually do the decapsulation at the point of GRO.  This
> is actually pretty similar to what we do with TCP - we merge TCP
> payloads even though we haven't done any real IP processing yet.
> However, we do check firewall rules later if we actually hit the IP
> stack.  GRE would work the same way in this case.
> 
> What I'm describing is pretty much exactly what NICs will be doing, so
> if that doesn't work we'll have a problem...

GRO ability to truly aggregate data is kind of limited to some
workloads. How NICs will handle interleaved flows I dont really know.

What you describe needs a serious GRO preliminary work, because it
depends on napi_gro_flush() being called from time to time, while we
need something else, more fine grained.

(I am pretty sure GRO needs some love from us, it looks like some
packets can stay a long time in gro_list. It would be nice if it was
able to reorder packets (from same flow) as well)

Anyway, my changes are self-contained in a new file and non intrusive.

As soon as we can provide a better alternative we can revert them ?

Thanks

^ permalink raw reply

* RE: [PATCH 3/3] net/mlx4_en: Add HW timestamping (TS) support
From: Yevgeny Petrilin @ 2012-09-28 14:10 UTC (permalink / raw)
  To: Richard Cochran
  Cc: davem@davemloft.net, netdev@vger.kernel.org, Eugenia Emantayev
In-Reply-To: <20120928111147.GA7474@netboy.at.omicron.at>

> > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> > index edd9cb8..10fa453 100644
> > --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> > +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> > @@ -1517,6 +1517,60 @@ static int mlx4_en_change_mtu(struct net_device
> *dev, int new_mtu)
> >  	return 0;
> >  }
> >
> > +static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
> > +{
> > +	struct mlx4_en_priv *priv = netdev_priv(dev);
> > +	struct mlx4_en_dev *mdev = priv->mdev;
> > +	struct hwtstamp_config config;
> > +
> > +	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
> > +		return -EFAULT;
> > +
> > +	/* reserved for future extensions */
> > +	if (config.flags)
> > +		return -EINVAL;
> > +
> > +	/* device doesn't support time stamping */
> > +	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS_EN))
> > +		return -EINVAL;
> > +
> > +	/* TX HW timestamp */
> > +	switch (config.tx_type) {
> > +	case HWTSTAMP_TX_OFF:
> > +	case HWTSTAMP_TX_ON:
> > +		break;
> > +	default:
> > +		return -ERANGE;
> > +	}
> > +
> > +	/* RX HW timestamp */
> > +	switch (config.rx_filter) {
> > +	case HWTSTAMP_FILTER_NONE:
> > +	case HWTSTAMP_FILTER_ALL:
> > +		break;
> > +	default:
> 
> Instead of rejecting the HWTSTAMP_FILTER_PTP_ codes out of hand, you
> should just accept them, and return by promoting rx_filter to
> HWTSTAMP_FILTER_ALL.
> 
> [ See Documentation/networking/timestamping.txt ]
> 
> > +		return -ERANGE;
> > +	}
> > +
> > +	if (mlx4_en_timestamp_config(dev, config.tx_type, config.rx_filter)) {
> > +		config.tx_type = HWTSTAMP_TX_OFF;
> > +		config.rx_filter = HWTSTAMP_FILTER_NONE;
> > +	}
> > +
> > +	return copy_to_user(ifr->ifr_data, &config,
> > +			    sizeof(config)) ? -EFAULT : 0;
> > +}
> 
> ...
> 
> > @@ -363,6 +368,9 @@ struct mlx4_en_dev {
> >  	u32                     priv_pdn;
> >  	spinlock_t              uar_lock;
> >  	u8			mac_removed[MLX4_MAX_PORTS + 1];
> > +	struct cyclecounter	cycles;
> > +	struct timecounter	clock;
> > +	struct timecompare	compare;
> 
> I am working on a patch to remove the timecompare stuff altogether
> (after removing it from blackfin). It is and was a bad idea, I would
> hate to see new drivers using it.
> 
> I strongly recommend just offering raw hardware time stamps in
> nanosecond resolution. Also, why not expose your device as a PTP
> Hardware Clock?
> 
> Thanks,
> Richard

Hello Richard,
Thanks for your feedback,
Will address all your comments in V1 of this patchset.

Thanks,
Yevgeny 

^ permalink raw reply

* network-namespace and unix-domain-sockets
From: Dilip Daya @ 2012-09-28 14:12 UTC (permalink / raw)
  To: ebiederm; +Cc: Linux Netdev List

[-- Attachment #1: Type: text/plain, Size: 1269 bytes --]

Hi Eric,

=> kernel 3.6.0-rc6 + network-namespace + unix-domain-sockets

srv/cli sample programs at:
<http://tkhanson.net/cgit.cgi/misc.git/plain/unixdomain/Unix_domain_sockets.html>
Executing UNIX domain sockets between two network-namespaces fails but
successful if both srv and cli are executed within a network-namespace.

Test results:

(1) Executing both srv and cli within default/host network-namespace:

On host/default netns:
# ./cli 
testing...
^C

On host/default netns:
# ./srv 
read 11 bytes: testing...

EOF


(2) Executing srv in default/host netns and cli within netns named
netns0:

On host/default netns:
# ip netns
netns1
netns0

On host/default netns:
# ./srv 

Within netns name netns0:
# ip netns exec netns0 ./cli
connect error: Connection refused


=> I find difference between __unix_find_socket_byname()  and
                              *unix_find_socket_byinode()

	---
		if (!net_eq(sock_net(s), net))
			continue;
	---

=> Is there an explanation for why __unix_find_socket_byname() was left
   netns aware and *unix_find_socket_byinode() is not netns aware ?

=> Please see attached patch. Is this valid? or will it break something?
   I've tested network namespaces with this patch applied and I did not 
   find any issues.

-DilipD.

[-- Attachment #2: unix_sockets_netns.patch --]
[-- Type: text/x-patch, Size: 2248 bytes --]

--- linux-3.6-rc6/net/unix/af_unix.c_orig	2012-09-27 14:25:27.000000000 -0400
+++ linux-3.6-rc6/net/unix/af_unix.c	2012-09-27 14:44:41.000000000 -0400
@@ -258,8 +258,7 @@ static inline void unix_insert_socket(st
 	spin_unlock(&unix_table_lock);
 }
 
-static struct sock *__unix_find_socket_byname(struct net *net,
-					      struct sockaddr_un *sunname,
+static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
 					      int len, int type, unsigned int hash)
 {
 	struct sock *s;
@@ -268,9 +267,6 @@ static struct sock *__unix_find_socket_b
 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
-		if (!net_eq(sock_net(s), net))
-			continue;
-
 		if (u->addr->len == len &&
 		    !memcmp(u->addr->name, sunname, len))
 			goto found;
@@ -280,15 +276,14 @@ found:
 	return s;
 }
 
-static inline struct sock *unix_find_socket_byname(struct net *net,
-						   struct sockaddr_un *sunname,
+static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
 						   int len, int type,
 						   unsigned int hash)
 {
 	struct sock *s;
 
 	spin_lock(&unix_table_lock);
-	s = __unix_find_socket_byname(net, sunname, len, type, hash);
+	s = __unix_find_socket_byname(sunname, len, type, hash);
 	if (s)
 		sock_hold(s);
 	spin_unlock(&unix_table_lock);
@@ -740,7 +735,7 @@ retry:
 	spin_lock(&unix_table_lock);
 	ordernum = (ordernum+1)&0xFFFFF;
 
-	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
+	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
 				      addr->hash)) {
 		spin_unlock(&unix_table_lock);
 		/*
@@ -805,7 +800,7 @@ static struct sock *unix_find_other(stru
 		}
 	} else {
 		err = -ECONNREFUSED;
-		u = unix_find_socket_byname(net, sunname, len, type, hash);
+		u = unix_find_socket_byname(sunname, len, type, hash);
 		if (u) {
 			struct dentry *dentry;
 			dentry = unix_sk(u)->path.dentry;
@@ -913,7 +908,7 @@ static int unix_bind(struct socket *sock
 	} else {
 		spin_lock(&unix_table_lock);
 		err = -EADDRINUSE;
-		if (__unix_find_socket_byname(net, sunaddr, addr_len,
+		if (__unix_find_socket_byname(sunaddr, addr_len,
 					      sk->sk_type, hash)) {
 			unix_release_addr(addr);
 			goto out_unlock;

^ permalink raw reply

* Re: Possible networking regression in 3.6.0
From: Chris Clayton @ 2012-09-28 14:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, gpiez
In-Reply-To: <1348831592.5093.2251.camel@edumazet-glaptop>



On 09/28/12 12:26, Eric Dumazet wrote:
> On Fri, 2012-09-28 at 10:22 +0100, Chris Clayton wrote:
>
>> No, the WinXP guest is configured with a fixed IP address
>> (192.168.200.1). Subnet mask is 255.255.255.0, and default gateway is
>> 192.168.200.254. DNS is 192.168.0.1.
>>
>
> I have no problem with such a setup, with a linux guest.
>
> Could you send again a tcpdump, but including link-level header ?
> (option -e)
>
> Ideally, you could send two traces, one taken on tap0, and another taken
> on eth0.
>

Two traces

Trace 1 - tap0 (192.168.200.254) whilst pinging router (192.168.0.1)from 
KVM guest (192.168.200.1):

15:03:14.953599 52:54:0c:3b:17:38 > Broadcast, ethertype ARP (0x0806), 
length 42: Request who-has 192.168.200.254 tell 192.168.200.1, length 28
15:03:14.953617 9e:c3:0c:c8:65:8d > 52:54:0c:3b:17:38, ethertype ARP 
(0x0806), length 42: Reply 192.168.200.254 is-at 9e:c3:0c:c8:65:8d, 
length 28
15:03:14.953725 52:54:0c:3b:17:38 > 9e:c3:0c:c8:65:8d, ethertype IPv4 
(0x0800), length 74: 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 5376, length 40
15:03:20.427278 52:54:0c:3b:17:38 > 9e:c3:0c:c8:65:8d, ethertype IPv4 
(0x0800), length 74: 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 5632, length 40
15:03:25.942215 52:54:0c:3b:17:38 > 9e:c3:0c:c8:65:8d, ethertype IPv4 
(0x0800), length 74: 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 5888, length 40
15:03:31.455578 52:54:0c:3b:17:38 > 9e:c3:0c:c8:65:8d, ethertype IPv4 
(0x0800), length 74: 192.168.200.1 > 192.168.0.1: ICMP echo request, id 
512, seq 6144, length 40

Trace 2 - eth0 (192.168.0.40) whilst pinging router (192.168.0.1)from 
KVM guest (192.168.200.1):

15:04:06.427863 5c:9a:d8:5c:63:31 > 00:1f:33:80:09:44, ethertype IPv4 
(0x0800), length 74: 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 6400, length 40
15:04:06.432100 00:1f:33:80:09:44 > 5c:9a:d8:5c:63:31, ethertype IPv4 
(0x0800), length 74: 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 
512, seq 6400, length 40
15:04:11.430877 00:1f:33:80:09:44 > 5c:9a:d8:5c:63:31, ethertype ARP 
(0x0806), length 60: Request who-has 192.168.0.40 tell 192.168.0.1, 
length 46
15:04:11.430898 5c:9a:d8:5c:63:31 > 00:1f:33:80:09:44, ethertype ARP 
(0x0806), length 42: Reply 192.168.0.40 is-at 5c:9a:d8:5c:63:31, length 28
15:04:11.567319 5c:9a:d8:5c:63:31 > 00:1f:33:80:09:44, ethertype IPv4 
(0x0800), length 74: 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 6656, length 40
15:04:11.571534 00:1f:33:80:09:44 > 5c:9a:d8:5c:63:31, ethertype IPv4 
(0x0800), length 74: 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 
512, seq 6656, length 40
15:04:16.577137 5c:9a:d8:5c:63:31 > 00:1f:33:80:09:44, ethertype ARP 
(0x0806), length 42: Request who-has 192.168.0.1 tell 192.168.0.40, 
length 28
15:04:16.580373 00:1f:33:80:09:44 > 5c:9a:d8:5c:63:31, ethertype ARP 
(0x0806), length 60: Reply 192.168.0.1 is-at 00:1f:33:80:09:44, length 46
15:04:17.083328 5c:9a:d8:5c:63:31 > 00:1f:33:80:09:44, ethertype IPv4 
(0x0800), length 74: 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 6912, length 40
15:04:17.086854 00:1f:33:80:09:44 > 5c:9a:d8:5c:63:31, ethertype IPv4 
(0x0800), length 74: 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 
512, seq 6912, length 40
15:04:22.585766 5c:9a:d8:5c:63:31 > 00:1f:33:80:09:44, ethertype IPv4 
(0x0800), length 74: 192.168.0.40 > 192.168.0.1: ICMP echo request, id 
512, seq 7168, length 40
15:04:22.589989 00:1f:33:80:09:44 > 5c:9a:d8:5c:63:31, ethertype IPv4 
(0x0800), length 74: 192.168.0.1 > 192.168.0.40: ICMP echo reply, id 
512, seq 7168, length 40
15:04:32.240422 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 446: 192.168.0.112.2704 > 239.255.255.250.1900: UDP, 
length 404
15:04:32.241404 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 455: 192.168.0.112.2704 > 239.255.255.250.1900: UDP, 
length 413
15:04:32.242915 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 494: 192.168.0.112.2704 > 239.255.255.250.1900: UDP, 
length 452
15:04:32.243986 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 490: 192.168.0.112.1434 > 239.255.255.250.1900: UDP, 
length 448
15:04:32.245476 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 486: 192.168.0.112.2901 > 239.255.255.250.1900: UDP, 
length 444
15:04:32.246545 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 486: 192.168.0.112.3828 > 239.255.255.250.1900: UDP, 
length 444
15:04:32.342459 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 446: 192.168.0.112.4445 > 239.255.255.250.1900: UDP, 
length 404
15:04:32.343506 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 455: 192.168.0.112.4445 > 239.255.255.250.1900: UDP, 
length 413
15:04:32.345017 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 494: 192.168.0.112.4445 > 239.255.255.250.1900: UDP, 
length 452
15:04:32.346087 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 490: 192.168.0.112.2735 > 239.255.255.250.1900: UDP, 
length 448
15:04:32.348314 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 486: 192.168.0.112.4940 > 239.255.255.250.1900: UDP, 
length 444
15:04:32.349362 00:19:fb:be:cb:55 > 01:00:5e:7f:ff:fa, ethertype IPv4 
(0x0800), length 486: 192.168.0.112.1029 > 239.255.255.250.1900: UDP, 
length 444

The second trace seems to contain some upnp-related traffic involving my 
satellite TV box. If it would help, I can turn that off when my wife 
isn't watching TV, and run the traces again.

Chris

>
>
>
>

^ permalink raw reply

* [net-next PATCH 1/4] be2net: remove type argument of be_cmd_mac_addr_query()
From: Sathya Perla @ 2012-09-28 14:39 UTC (permalink / raw)
  To: netdev; +Cc: Sathya Perla
In-Reply-To: <1348843184-22214-1-git-send-email-sathya.perla@emulex.com>

All invocations of this routine use the same type value.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c |    4 ++--
 drivers/net/ethernet/emulex/benet/be_cmds.h |    2 +-
 drivers/net/ethernet/emulex/benet/be_main.c |   18 ++++++------------
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 701b3e9..6fbfb20 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -717,7 +717,7 @@ int be_cmd_eq_create(struct be_adapter *adapter,
 
 /* Use MCC */
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
-			u8 type, bool permanent, u32 if_handle, u32 pmac_id)
+			  bool permanent, u32 if_handle, u32 pmac_id)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_mac_query *req;
@@ -734,7 +734,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
 		OPCODE_COMMON_NTWK_MAC_QUERY, sizeof(*req), wrb, NULL);
-	req->type = type;
+	req->type = MAC_ADDRESS_TYPE_NETWORK;
 	if (permanent) {
 		req->permanent = 1;
 	} else {
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 250f19b..1f5b839 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1687,7 +1687,7 @@ struct be_cmd_req_set_ext_fat_caps {
 extern int be_pci_fnum_get(struct be_adapter *adapter);
 extern int be_fw_wait_ready(struct be_adapter *adapter);
 extern int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
-			u8 type, bool permanent, u32 if_handle, u32 pmac_id);
+				 bool permanent, u32 if_handle, u32 pmac_id);
 extern int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
 			u32 if_id, u32 *pmac_id, u32 domain);
 extern int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id,
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 84379f4..fa17430 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -241,9 +241,8 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	status = be_cmd_mac_addr_query(adapter, current_mac,
-				MAC_ADDRESS_TYPE_NETWORK, false,
-				adapter->if_handle, 0);
+	status = be_cmd_mac_addr_query(adapter, current_mac, false,
+				       adapter->if_handle, 0);
 	if (status)
 		goto err;
 
@@ -2693,21 +2692,16 @@ static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle,
 		status = be_cmd_get_mac_from_list(adapter, mac,
 						  active_mac, pmac_id, 0);
 		if (*active_mac) {
-			status = be_cmd_mac_addr_query(adapter, mac,
-						       MAC_ADDRESS_TYPE_NETWORK,
-						       false, if_handle,
-						       *pmac_id);
+			status = be_cmd_mac_addr_query(adapter, mac, false,
+						       if_handle, *pmac_id);
 		}
 	} else if (be_physfn(adapter)) {
 		/* For BE3, for PF get permanent MAC */
-		status = be_cmd_mac_addr_query(adapter, mac,
-					       MAC_ADDRESS_TYPE_NETWORK, true,
-					       0, 0);
+		status = be_cmd_mac_addr_query(adapter, mac, true, 0, 0);
 		*active_mac = false;
 	} else {
 		/* For BE3, for VF get soft MAC assigned by PF*/
-		status = be_cmd_mac_addr_query(adapter, mac,
-					       MAC_ADDRESS_TYPE_NETWORK, false,
+		status = be_cmd_mac_addr_query(adapter, mac, false,
 					       if_handle, 0);
 		*active_mac = true;
 	}
-- 
1.7.4

^ permalink raw reply related

* [net-next PATCH 0/4] fixes v2
From: Sathya Perla @ 2012-09-28 14:39 UTC (permalink / raw)
  To: netdev; +Cc: Sathya Perla

Resending the patch series minus the patch to remove the AMAP macros.
Pls apply.

Sathya Perla (4):
  be2net: remove type argument of be_cmd_mac_addr_query()
  be2net: fix wrong handling of be_setup() failure in be_probe()
  be2net: cleanup code related to be_link_status_query()
  be2net: fixup log messages

 drivers/net/ethernet/emulex/benet/be.h         |    1 -
 drivers/net/ethernet/emulex/benet/be_cmds.c    |   53 +++++++++++++++------
 drivers/net/ethernet/emulex/benet/be_cmds.h    |    6 +-
 drivers/net/ethernet/emulex/benet/be_ethtool.c |   57 +++++------------------
 drivers/net/ethernet/emulex/benet/be_main.c    |   60 ++++++++++++++---------
 5 files changed, 89 insertions(+), 88 deletions(-)

-- 
1.7.4

^ permalink raw reply

* [net-next PATCH 2/4] be2net: fix wrong handling of be_setup() failure in be_probe()
From: Sathya Perla @ 2012-09-28 14:39 UTC (permalink / raw)
  To: netdev; +Cc: Sathya Perla
In-Reply-To: <1348843184-22214-1-git-send-email-sathya.perla@emulex.com>


Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index fa17430..b712091 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3889,7 +3889,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	status = be_setup(adapter);
 	if (status)
-		goto msix_disable;
+		goto stats_clean;
 
 	be_netdev_init(netdev);
 	status = register_netdev(netdev);
@@ -3910,8 +3910,6 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 unsetup:
 	be_clear(adapter);
-msix_disable:
-	be_msix_disable(adapter);
 stats_clean:
 	be_stats_cleanup(adapter);
 ctrl_clean:
-- 
1.7.4

^ permalink raw reply related

* [net-next PATCH 3/4] be2net: cleanup code related to be_link_status_query()
From: Sathya Perla @ 2012-09-28 14:39 UTC (permalink / raw)
  To: netdev; +Cc: Sathya Perla
In-Reply-To: <1348843184-22214-1-git-send-email-sathya.perla@emulex.com>

1) link_status_query() is always called to query the link-speed (speed
after applying qos). When there is no qos setting, link-speed is derived from
port-speed. Do all this inside this routine and hide this from the callers.

2) adpater->phy.forced_port_speed is not being set anywhere after being
initialized. Get rid of this variable.

3) Ignore async link_speed notifications till the initial value has been
fetched from FW.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h         |    1 -
 drivers/net/ethernet/emulex/benet/be_cmds.c    |   46 ++++++++++++++-----
 drivers/net/ethernet/emulex/benet/be_cmds.h    |    4 +-
 drivers/net/ethernet/emulex/benet/be_ethtool.c |   57 +++++-------------------
 drivers/net/ethernet/emulex/benet/be_main.c    |    4 +-
 5 files changed, 48 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 5b622993..cf4c05b 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -337,7 +337,6 @@ struct phy_info {
 	u16 auto_speeds_supported;
 	u16 fixed_speeds_supported;
 	int link_speed;
-	int forced_port_speed;
 	u32 dac_cable_len;
 	u32 advertising;
 	u32 supported;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 6fbfb20..46a19af 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -165,14 +165,13 @@ static void be_async_grp5_cos_priority_process(struct be_adapter *adapter,
 	}
 }
 
-/* Grp5 QOS Speed evt */
+/* Grp5 QOS Speed evt: qos_link_speed is in units of 10 Mbps */
 static void be_async_grp5_qos_speed_process(struct be_adapter *adapter,
 		struct be_async_event_grp5_qos_link_speed *evt)
 {
-	if (evt->physical_port == adapter->port_num) {
-		/* qos_link_speed is in units of 10 Mbps */
-		adapter->phy.link_speed = evt->qos_link_speed * 10;
-	}
+	if (adapter->phy.link_speed >= 0 &&
+	    evt->physical_port == adapter->port_num)
+		adapter->phy.link_speed = le16_to_cpu(evt->qos_link_speed) * 10;
 }
 
 /*Grp5 PVID evt*/
@@ -1326,9 +1325,28 @@ err:
 	return status;
 }
 
-/* Uses synchronous mcc */
-int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
-			     u16 *link_speed, u8 *link_status, u32 dom)
+static int be_mac_to_link_speed(int mac_speed)
+{
+	switch (mac_speed) {
+	case PHY_LINK_SPEED_ZERO:
+		return 0;
+	case PHY_LINK_SPEED_10MBPS:
+		return 10;
+	case PHY_LINK_SPEED_100MBPS:
+		return 100;
+	case PHY_LINK_SPEED_1GBPS:
+		return 1000;
+	case PHY_LINK_SPEED_10GBPS:
+		return 10000;
+	}
+	return 0;
+}
+
+/* Uses synchronous mcc
+ * Returns link_speed in Mbps
+ */
+int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
+			     u8 *link_status, u32 dom)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_link_status *req;
@@ -1357,11 +1375,13 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
 	status = be_mcc_notify_wait(adapter);
 	if (!status) {
 		struct be_cmd_resp_link_status *resp = embedded_payload(wrb);
-		if (resp->mac_speed != PHY_LINK_SPEED_ZERO) {
-			if (link_speed)
-				*link_speed = le16_to_cpu(resp->link_speed);
-			if (mac_speed)
-				*mac_speed = resp->mac_speed;
+		if (link_speed) {
+			*link_speed = resp->link_speed ?
+				      le16_to_cpu(resp->link_speed) * 10 :
+				      be_mac_to_link_speed(resp->mac_speed);
+
+			if (!resp->logical_link_status)
+				*link_speed = 0;
 		}
 		if (link_status)
 			*link_status = resp->logical_link_status;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 1f5b839..0936e21 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1714,8 +1714,8 @@ extern int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
 			int type);
 extern int be_cmd_rxq_destroy(struct be_adapter *adapter,
 			struct be_queue_info *q);
-extern int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed,
-				    u16 *link_speed, u8 *link_status, u32 dom);
+extern int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed,
+				    u8 *link_status, u32 dom);
 extern int be_cmd_reset(struct be_adapter *adapter);
 extern int be_cmd_get_stats(struct be_adapter *adapter,
 			struct be_dma_mem *nonemb_cmd);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index c0e7006..8e6fb0b 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -512,28 +512,6 @@ static u32 convert_to_et_setting(u32 if_type, u32 if_speeds)
 	return val;
 }
 
-static int convert_to_et_speed(u32 be_speed)
-{
-	int et_speed = SPEED_10000;
-
-	switch (be_speed) {
-	case PHY_LINK_SPEED_10MBPS:
-		et_speed = SPEED_10;
-		break;
-	case PHY_LINK_SPEED_100MBPS:
-		et_speed = SPEED_100;
-		break;
-	case PHY_LINK_SPEED_1GBPS:
-		et_speed = SPEED_1000;
-		break;
-	case PHY_LINK_SPEED_10GBPS:
-		et_speed = SPEED_10000;
-		break;
-	}
-
-	return et_speed;
-}
-
 bool be_pause_supported(struct be_adapter *adapter)
 {
 	return (adapter->phy.interface_type == PHY_TYPE_SFP_PLUS_10GB ||
@@ -544,27 +522,16 @@ bool be_pause_supported(struct be_adapter *adapter)
 static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
-	u8 port_speed = 0;
-	u16 link_speed = 0;
 	u8 link_status;
-	u32 et_speed = 0;
+	u16 link_speed = 0;
 	int status;
 
-	if (adapter->phy.link_speed < 0 || !(netdev->flags & IFF_UP)) {
-		if (adapter->phy.forced_port_speed < 0) {
-			status = be_cmd_link_status_query(adapter, &port_speed,
-						&link_speed, &link_status, 0);
-			if (!status)
-				be_link_status_update(adapter, link_status);
-			if (link_speed)
-				et_speed = link_speed * 10;
-			else if (link_status)
-				et_speed = convert_to_et_speed(port_speed);
-		} else {
-			et_speed = adapter->phy.forced_port_speed;
-		}
-
-		ethtool_cmd_speed_set(ecmd, et_speed);
+	if (adapter->phy.link_speed < 0) {
+		status = be_cmd_link_status_query(adapter, &link_speed,
+						  &link_status, 0);
+		if (!status)
+			be_link_status_update(adapter, link_status);
+		ethtool_cmd_speed_set(ecmd, link_speed);
 
 		status = be_cmd_get_phy_info(adapter);
 		if (status)
@@ -773,8 +740,8 @@ static void
 be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
-	u8 mac_speed = 0;
-	u16 qos_link_speed = 0;
+	int status;
+	u8 link_status = 0;
 
 	memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
 
@@ -798,11 +765,11 @@ be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
 		test->flags |= ETH_TEST_FL_FAILED;
 	}
 
-	if (be_cmd_link_status_query(adapter, &mac_speed,
-				     &qos_link_speed, NULL, 0) != 0) {
+	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
+	if (status) {
 		test->flags |= ETH_TEST_FL_FAILED;
 		data[4] = -1;
-	} else if (!mac_speed) {
+	} else if (!link_status) {
 		test->flags |= ETH_TEST_FL_FAILED;
 		data[4] = 1;
 	}
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index b712091..4855dd6 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2440,8 +2440,7 @@ static int be_open(struct net_device *netdev)
 		be_eq_notify(adapter, eqo->q.id, true, false, 0);
 	}
 
-	status = be_cmd_link_status_query(adapter, NULL, NULL,
-					  &link_status, 0);
+	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
 	if (!status)
 		be_link_status_update(adapter, link_status);
 
@@ -2670,7 +2669,6 @@ static void be_setup_init(struct be_adapter *adapter)
 	adapter->be3_native = false;
 	adapter->promiscuous = false;
 	adapter->eq_next_idx = 0;
-	adapter->phy.forced_port_speed = -1;
 }
 
 static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle,
-- 
1.7.4

^ permalink raw reply related

* [net-next PATCH 4/4] be2net: fixup log messages
From: Sathya Perla @ 2012-09-28 14:39 UTC (permalink / raw)
  To: netdev; +Cc: Sathya Perla
In-Reply-To: <1348843184-22214-1-git-send-email-sathya.perla@emulex.com>

Added and modified a few log messages mostly in probe path.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c |    3 ++
 drivers/net/ethernet/emulex/benet/be_main.c |   34 ++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 46a19af..af60bb2 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2425,6 +2425,9 @@ int be_cmd_req_native_mode(struct be_adapter *adapter)
 		struct be_cmd_resp_set_func_cap *resp = embedded_payload(wrb);
 		adapter->be3_native = le32_to_cpu(resp->cap_flags) &
 					CAPABILITY_BE3_NATIVE_ERX_API;
+		if (!adapter->be3_native)
+			dev_warn(&adapter->pdev->dev,
+				 "adapter not in advanced mode\n");
 	}
 err:
 	mutex_unlock(&adapter->mbox_lock);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 4855dd6..6accb0c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1896,6 +1896,8 @@ static int be_tx_qs_create(struct be_adapter *adapter)
 			return status;
 	}
 
+	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
+		 adapter->num_tx_qs);
 	return 0;
 }
 
@@ -1946,10 +1948,9 @@ static int be_rx_cqs_create(struct be_adapter *adapter)
 			return rc;
 	}
 
-	if (adapter->num_rx_qs != MAX_RX_QS)
-		dev_info(&adapter->pdev->dev,
-			"Created only %d receive queues\n", adapter->num_rx_qs);
-
+	dev_info(&adapter->pdev->dev,
+		 "created %d RSS queue(s) and 1 default RX queue\n",
+		 adapter->num_rx_qs - 1);
 	return 0;
 }
 
@@ -2187,6 +2188,7 @@ static void be_msix_enable(struct be_adapter *adapter)
 {
 #define BE_MIN_MSIX_VECTORS		1
 	int i, status, num_vec, num_roce_vec = 0;
+	struct device *dev = &adapter->pdev->dev;
 
 	/* If RSS queues are not used, need a vec for default RX Q */
 	num_vec = min(be_num_rss_want(adapter), num_online_cpus());
@@ -2211,6 +2213,8 @@ static void be_msix_enable(struct be_adapter *adapter)
 				num_vec) == 0)
 			goto done;
 	}
+
+	dev_warn(dev, "MSIx enable failed\n");
 	return;
 done:
 	if (be_roce_supported(adapter)) {
@@ -2224,6 +2228,7 @@ done:
 		}
 	} else
 		adapter->num_msix_vec = num_vec;
+	dev_info(dev, "enabled %d MSI-x vector(s)\n", adapter->num_msix_vec);
 	return;
 }
 
@@ -3797,6 +3802,23 @@ static bool be_reset_required(struct be_adapter *adapter)
 	return be_find_vfs(adapter, ENABLED) > 0 ? false : true;
 }
 
+static char *mc_name(struct be_adapter *adapter)
+{
+	if (adapter->function_mode & FLEX10_MODE)
+		return "FLEX10";
+	else if (adapter->function_mode & VNIC_MODE)
+		return "vNIC";
+	else if (adapter->function_mode & UMC_ENABLED)
+		return "UMC";
+	else
+		return "";
+}
+
+static inline char *func_name(struct be_adapter *adapter)
+{
+	return be_physfn(adapter) ? "PF" : "VF";
+}
+
 static int __devinit be_probe(struct pci_dev *pdev,
 			const struct pci_device_id *pdev_id)
 {
@@ -3901,8 +3923,8 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	be_cmd_query_port_name(adapter, &port_name);
 
-	dev_info(&pdev->dev, "%s: %s port %c\n", netdev->name, nic_name(pdev),
-		 port_name);
+	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
+		 func_name(adapter), mc_name(adapter), port_name);
 
 	return 0;
 
-- 
1.7.4

^ permalink raw reply related

* mlx4_en_alloc_frag allocation failures
From: Shawn Bohrer @ 2012-09-28 15:14 UTC (permalink / raw)
  To: netdev; +Cc: linux-mm, linux-kernel

We've got a new application that is receiving UDP multicast data using
AF_PACKET and writing out the packets in a custom format to disk.  The
packet rates are bursty, but it seems to be roughly 100 Mbps on
average for 1 minute periods.  With this application running all day
we get a lot of these messages:

[1298269.103034] kswapd1: page allocation failure: order:2, mode:0x4020
[1298269.103038] Pid: 80, comm: kswapd1 Not tainted 3.4.9-2.rgm.fc16.x86_64 #1
[1298269.103040] Call Trace:
[1298269.103041]  <IRQ>  [<ffffffff810db746>] warn_alloc_failed+0xf6/0x160
[1298269.103053]  [<ffffffff813c767d>] ? skb_copy_bits+0x16d/0x2c0
[1298269.103058]  [<ffffffff810e83a9>] ? wakeup_kswapd+0x69/0x160
[1298269.103060]  [<ffffffff810df188>] __alloc_pages_nodemask+0x6e8/0x930
[1298269.103064]  [<ffffffff81114316>] alloc_pages_current+0xb6/0x120
[1298269.103070]  [<ffffffffa00c142b>] mlx4_en_alloc_frag+0x16b/0x1e0 [mlx4_en]
[1298269.103073]  [<ffffffffa00c18a0>] mlx4_en_complete_rx_desc+0x120/0x1d0 [mlx4_en]
[1298269.103076]  [<ffffffffa00c27d4>] mlx4_en_process_rx_cq+0x584/0x700 [mlx4_en]
[1298269.103079]  [<ffffffffa00c29ef>] mlx4_en_poll_rx_cq+0x3f/0x80 [mlx4_en]
[1298269.103083]  [<ffffffff813d6569>] net_rx_action+0x119/0x210
[1298269.103086]  [<ffffffff8103c690>] __do_softirq+0xb0/0x220
[1298269.103090]  [<ffffffff8109911d>] ? handle_irq_event+0x4d/0x70
[1298269.103095]  [<ffffffff8148e30c>] call_softirq+0x1c/0x30
[1298269.103100]  [<ffffffff81003ef5>] do_softirq+0x55/0x90
[1298269.103101]  [<ffffffff8103ca65>] irq_exit+0x75/0x80
[1298269.103103]  [<ffffffff8148e853>] do_IRQ+0x63/0xe0
[1298269.103107]  [<ffffffff81485667>] common_interrupt+0x67/0x67
[1298269.103108]  <EOI>  [<ffffffff8148523f>] ? _raw_spin_unlock_irqrestore+0xf/0x20
[1298269.103113]  [<ffffffff811184b1>] compaction_alloc+0x361/0x3f0
[1298269.103115]  [<ffffffff810e29b7>] ? pagevec_lru_move_fn+0xd7/0xf0
[1298269.103118]  [<ffffffff81123d19>] migrate_pages+0xa9/0x470
[1298269.103120]  [<ffffffff81118150>] ? perf_trace_mm_compaction_migratepages+0xd0/0xd0
[1298269.103122]  [<ffffffff81118abb>] compact_zone+0x4cb/0x910
[1298269.103124]  [<ffffffff8111904b>] __compact_pgdat+0x14b/0x190
[1298269.103125]  [<ffffffff8111931d>] compact_pgdat+0x2d/0x30
[1298269.103129]  [<ffffffff810f32b9>] ? fragmentation_index+0x19/0x70
[1298269.103131]  [<ffffffff810eb15f>] balance_pgdat+0x6ef/0x710
[1298269.103133]  [<ffffffff810eb2ca>] kswapd+0x14a/0x390
[1298269.103136]  [<ffffffff810567c0>] ? add_wait_queue+0x60/0x60
[1298269.103138]  [<ffffffff810eb180>] ? balance_pgdat+0x710/0x710
[1298269.103140]  [<ffffffff81055e93>] kthread+0x93/0xa0
[1298269.103142]  [<ffffffff8148e214>] kernel_thread_helper+0x4/0x10
[1298269.103144]  [<ffffffff81055e00>] ? kthread_worker_fn+0x140/0x140
[1298269.103146]  [<ffffffff8148e210>] ? gs_change+0xb/0xb

The kernel is based on a Fedora 16 kernel and actually has the 3.4.10
patches applied.  I can easily test patches or different kernels.

I'm mostly wondering if there is anything that can be done about these
failures?  It appears that these failures have to do with handling
fragmented IP frames, but the majority of the packets this machines
should not be fragmented (there are probably some that are).

>From a memory management point of view the system has 48GB of RAM, and
typically 44GB of that is page cache.  The dirty pages seem to hover
around 5-6MB and the filesystem/disks don't seem to have any problems
keeping up with writing out the data.

--
Shawn

-- 

---------------------------------------------------------------
This email, along with any attachments, is confidential. If you 
believe you received this message in error, please contact the 
sender immediately and delete all copies of the message.  
Thank you.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [net-next PATCH 4/4] be2net: fixup log messages
From: Joe Perches @ 2012-09-28 15:39 UTC (permalink / raw)
  To: Sathya Perla; +Cc: netdev
In-Reply-To: <e698cd68-540d-4a7a-bd76-2e9c73ddf19c@CMEXHTCAS2.ad.emulex.com>

On Fri, 2012-09-28 at 20:09 +0530, Sathya Perla wrote:
> Added and modified a few log messages mostly in probe path.

Trivial comment:

> diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
[]
> @@ -3797,6 +3802,23 @@ static bool be_reset_required(struct be_adapter *adapter)
>  	return be_find_vfs(adapter, ENABLED) > 0 ? false : true;
>  }
>  
> +static char *mc_name(struct be_adapter *adapter)
> +{
> +	if (adapter->function_mode & FLEX10_MODE)
> +		return "FLEX10";
> +	else if (adapter->function_mode & VNIC_MODE)
> +		return "vNIC";
> +	else if (adapter->function_mode & UMC_ENABLED)
> +		return "UMC";
> +	else
> +		return "";
> +}

Maybe something other than "" to identify this state
or add a space before or after the actual uses so that
the unprefixed state prints without 2 spaces.

Perhaps if you don't want to identify the non-virtual (?)
state:

static char *mc_name(struct be_adapter *adapter)
{
	if (adapter->function_mode & FLEX10_MODE)
		return " FLEX10";
	else if (adapter->function_mode & VNIC_MODE)
		return " vNIC";
	else if (adapter->function_mode & UMC_ENABLED)
		return " UMC";
	else
		return "";
}

[]

> @@ -3901,8 +3923,8 @@ static int __devinit be_probe(struct pci_dev *pdev,
>  
>  	be_cmd_query_port_name(adapter, &port_name);
>  
> -	dev_info(&pdev->dev, "%s: %s port %c\n", netdev->name, nic_name(pdev),
> -		 port_name);
> +	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
> +		 func_name(adapter), mc_name(adapter), port_name);
>  
>  	return 0;

and:

	dev_info(&pdev->dev, "%s: %s%s port %c\n",
		 nic_name(pdev), func_name(adapter), mc_name(adapter),
		 port_name);

^ permalink raw reply

* Re: mlx4_en_alloc_frag allocation failures
From: Eric Dumazet @ 2012-09-28 15:50 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: netdev, linux-mm, linux-kernel
In-Reply-To: <20120928151429.GB2731@BohrerMBP.rgmadvisors.com>

On Fri, 2012-09-28 at 10:14 -0500, Shawn Bohrer wrote:
> We've got a new application that is receiving UDP multicast data using
> AF_PACKET and writing out the packets in a custom format to disk.  The
> packet rates are bursty, but it seems to be roughly 100 Mbps on
> average for 1 minute periods.  With this application running all day
> we get a lot of these messages:
> 
> [1298269.103034] kswapd1: page allocation failure: order:2, mode:0x4020
> [1298269.103038] Pid: 80, comm: kswapd1 Not tainted 3.4.9-2.rgm.fc16.x86_64 #1
> [1298269.103040] Call Trace:
> [1298269.103041]  <IRQ>  [<ffffffff810db746>] warn_alloc_failed+0xf6/0x160
> [1298269.103053]  [<ffffffff813c767d>] ? skb_copy_bits+0x16d/0x2c0
> [1298269.103058]  [<ffffffff810e83a9>] ? wakeup_kswapd+0x69/0x160
> [1298269.103060]  [<ffffffff810df188>] __alloc_pages_nodemask+0x6e8/0x930
> [1298269.103064]  [<ffffffff81114316>] alloc_pages_current+0xb6/0x120
> [1298269.103070]  [<ffffffffa00c142b>] mlx4_en_alloc_frag+0x16b/0x1e0 [mlx4_en]
> [1298269.103073]  [<ffffffffa00c18a0>] mlx4_en_complete_rx_desc+0x120/0x1d0 [mlx4_en]
> [1298269.103076]  [<ffffffffa00c27d4>] mlx4_en_process_rx_cq+0x584/0x700 [mlx4_en]
> [1298269.103079]  [<ffffffffa00c29ef>] mlx4_en_poll_rx_cq+0x3f/0x80 [mlx4_en]
> [1298269.103083]  [<ffffffff813d6569>] net_rx_action+0x119/0x210
> [1298269.103086]  [<ffffffff8103c690>] __do_softirq+0xb0/0x220
> [1298269.103090]  [<ffffffff8109911d>] ? handle_irq_event+0x4d/0x70
> [1298269.103095]  [<ffffffff8148e30c>] call_softirq+0x1c/0x30
> [1298269.103100]  [<ffffffff81003ef5>] do_softirq+0x55/0x90
> [1298269.103101]  [<ffffffff8103ca65>] irq_exit+0x75/0x80
> [1298269.103103]  [<ffffffff8148e853>] do_IRQ+0x63/0xe0
> [1298269.103107]  [<ffffffff81485667>] common_interrupt+0x67/0x67
> [1298269.103108]  <EOI>  [<ffffffff8148523f>] ? _raw_spin_unlock_irqrestore+0xf/0x20
> [1298269.103113]  [<ffffffff811184b1>] compaction_alloc+0x361/0x3f0
> [1298269.103115]  [<ffffffff810e29b7>] ? pagevec_lru_move_fn+0xd7/0xf0
> [1298269.103118]  [<ffffffff81123d19>] migrate_pages+0xa9/0x470
> [1298269.103120]  [<ffffffff81118150>] ? perf_trace_mm_compaction_migratepages+0xd0/0xd0
> [1298269.103122]  [<ffffffff81118abb>] compact_zone+0x4cb/0x910
> [1298269.103124]  [<ffffffff8111904b>] __compact_pgdat+0x14b/0x190
> [1298269.103125]  [<ffffffff8111931d>] compact_pgdat+0x2d/0x30
> [1298269.103129]  [<ffffffff810f32b9>] ? fragmentation_index+0x19/0x70
> [1298269.103131]  [<ffffffff810eb15f>] balance_pgdat+0x6ef/0x710
> [1298269.103133]  [<ffffffff810eb2ca>] kswapd+0x14a/0x390
> [1298269.103136]  [<ffffffff810567c0>] ? add_wait_queue+0x60/0x60
> [1298269.103138]  [<ffffffff810eb180>] ? balance_pgdat+0x710/0x710
> [1298269.103140]  [<ffffffff81055e93>] kthread+0x93/0xa0
> [1298269.103142]  [<ffffffff8148e214>] kernel_thread_helper+0x4/0x10
> [1298269.103144]  [<ffffffff81055e00>] ? kthread_worker_fn+0x140/0x140
> [1298269.103146]  [<ffffffff8148e210>] ? gs_change+0xb/0xb
> 
> The kernel is based on a Fedora 16 kernel and actually has the 3.4.10
> patches applied.  I can easily test patches or different kernels.
> 
> I'm mostly wondering if there is anything that can be done about these
> failures?  It appears that these failures have to do with handling
> fragmented IP frames, but the majority of the packets this machines
> should not be fragmented (there are probably some that are).
> 
> From a memory management point of view the system has 48GB of RAM, and
> typically 44GB of that is page cache.  The dirty pages seem to hover
> around 5-6MB and the filesystem/disks don't seem to have any problems
> keeping up with writing out the data.

What is the value of /proc/sys/vm/min_free_kbytes ?



--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: mlx4_en_alloc_frag allocation failures
From: Shawn Bohrer @ 2012-09-28 15:52 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, linux-mm, linux-kernel
In-Reply-To: <1348847408.5093.2548.camel@edumazet-glaptop>

On Fri, Sep 28, 2012 at 05:50:08PM +0200, Eric Dumazet wrote:
> On Fri, 2012-09-28 at 10:14 -0500, Shawn Bohrer wrote:
> > We've got a new application that is receiving UDP multicast data using
> > AF_PACKET and writing out the packets in a custom format to disk.  The
> > packet rates are bursty, but it seems to be roughly 100 Mbps on
> > average for 1 minute periods.  With this application running all day
> > we get a lot of these messages:
> > 
> > [1298269.103034] kswapd1: page allocation failure: order:2, mode:0x4020
> > [1298269.103038] Pid: 80, comm: kswapd1 Not tainted 3.4.9-2.rgm.fc16.x86_64 #1
> > [1298269.103040] Call Trace:
> > [1298269.103041]  <IRQ>  [<ffffffff810db746>] warn_alloc_failed+0xf6/0x160
> > [1298269.103053]  [<ffffffff813c767d>] ? skb_copy_bits+0x16d/0x2c0
> > [1298269.103058]  [<ffffffff810e83a9>] ? wakeup_kswapd+0x69/0x160
> > [1298269.103060]  [<ffffffff810df188>] __alloc_pages_nodemask+0x6e8/0x930
> > [1298269.103064]  [<ffffffff81114316>] alloc_pages_current+0xb6/0x120
> > [1298269.103070]  [<ffffffffa00c142b>] mlx4_en_alloc_frag+0x16b/0x1e0 [mlx4_en]
> > [1298269.103073]  [<ffffffffa00c18a0>] mlx4_en_complete_rx_desc+0x120/0x1d0 [mlx4_en]
> > [1298269.103076]  [<ffffffffa00c27d4>] mlx4_en_process_rx_cq+0x584/0x700 [mlx4_en]
> > [1298269.103079]  [<ffffffffa00c29ef>] mlx4_en_poll_rx_cq+0x3f/0x80 [mlx4_en]
> > [1298269.103083]  [<ffffffff813d6569>] net_rx_action+0x119/0x210
> > [1298269.103086]  [<ffffffff8103c690>] __do_softirq+0xb0/0x220
> > [1298269.103090]  [<ffffffff8109911d>] ? handle_irq_event+0x4d/0x70
> > [1298269.103095]  [<ffffffff8148e30c>] call_softirq+0x1c/0x30
> > [1298269.103100]  [<ffffffff81003ef5>] do_softirq+0x55/0x90
> > [1298269.103101]  [<ffffffff8103ca65>] irq_exit+0x75/0x80
> > [1298269.103103]  [<ffffffff8148e853>] do_IRQ+0x63/0xe0
> > [1298269.103107]  [<ffffffff81485667>] common_interrupt+0x67/0x67
> > [1298269.103108]  <EOI>  [<ffffffff8148523f>] ? _raw_spin_unlock_irqrestore+0xf/0x20
> > [1298269.103113]  [<ffffffff811184b1>] compaction_alloc+0x361/0x3f0
> > [1298269.103115]  [<ffffffff810e29b7>] ? pagevec_lru_move_fn+0xd7/0xf0
> > [1298269.103118]  [<ffffffff81123d19>] migrate_pages+0xa9/0x470
> > [1298269.103120]  [<ffffffff81118150>] ? perf_trace_mm_compaction_migratepages+0xd0/0xd0
> > [1298269.103122]  [<ffffffff81118abb>] compact_zone+0x4cb/0x910
> > [1298269.103124]  [<ffffffff8111904b>] __compact_pgdat+0x14b/0x190
> > [1298269.103125]  [<ffffffff8111931d>] compact_pgdat+0x2d/0x30
> > [1298269.103129]  [<ffffffff810f32b9>] ? fragmentation_index+0x19/0x70
> > [1298269.103131]  [<ffffffff810eb15f>] balance_pgdat+0x6ef/0x710
> > [1298269.103133]  [<ffffffff810eb2ca>] kswapd+0x14a/0x390
> > [1298269.103136]  [<ffffffff810567c0>] ? add_wait_queue+0x60/0x60
> > [1298269.103138]  [<ffffffff810eb180>] ? balance_pgdat+0x710/0x710
> > [1298269.103140]  [<ffffffff81055e93>] kthread+0x93/0xa0
> > [1298269.103142]  [<ffffffff8148e214>] kernel_thread_helper+0x4/0x10
> > [1298269.103144]  [<ffffffff81055e00>] ? kthread_worker_fn+0x140/0x140
> > [1298269.103146]  [<ffffffff8148e210>] ? gs_change+0xb/0xb
> > 
> > The kernel is based on a Fedora 16 kernel and actually has the 3.4.10
> > patches applied.  I can easily test patches or different kernels.
> > 
> > I'm mostly wondering if there is anything that can be done about these
> > failures?  It appears that these failures have to do with handling
> > fragmented IP frames, but the majority of the packets this machines
> > should not be fragmented (there are probably some that are).
> > 
> > From a memory management point of view the system has 48GB of RAM, and
> > typically 44GB of that is page cache.  The dirty pages seem to hover
> > around 5-6MB and the filesystem/disks don't seem to have any problems
> > keeping up with writing out the data.
> 
> What is the value of /proc/sys/vm/min_free_kbytes ?

$ cat /proc/sys/vm/min_free_kbytes
90112

--
Shawn

-- 

---------------------------------------------------------------
This email, along with any attachments, is confidential. If you 
believe you received this message in error, please contact the 
sender immediately and delete all copies of the message.  
Thank you.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCHv4 1/4] modem_shm: Add Modem Access Framework
From: Greg KH @ 2012-09-28 16:00 UTC (permalink / raw)
  To: Arun Murthy; +Cc: linux-kernel, netdev, linux-doc, alan
In-Reply-To: <1348819504-1303-2-git-send-email-arun.murthy@stericsson.com>

On Fri, Sep 28, 2012 at 01:35:01PM +0530, Arun Murthy wrote:
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/err.h>
> +#include <linux/printk.h>
> +#include <linux/modem_shm/modem.h>
> +
> +static struct class *modem_class;

What's wrong with a bus_type instead?

> +static int __modem_is_requested(struct device *dev, void *data)
> +{
> +	struct modem_desc *mdesc = (struct modem_desc *)data;
> +
> +	if (!mdesc->mclients) {
> +		printk(KERN_ERR "modem_access: modem description is NULL\n");
> +		return 0;
> +	}
> +	return atomic_read(&mdesc->mclients->cnt);
> +}
> +
> +int modem_is_requested(struct modem_desc *mdesc)
> +{
> +	return class_for_each_device(modem_class, NULL, (void *)mdesc, __modem_is_requested);
> +}

Where is the documentation for your public api functions like this?

> +
> +int modem_release(struct modem_desc *mdesc)
> +{
> +	if (!mdesc->release)
> +		return -EFAULT;
> +
> +	if (modem_is_requested(mdesc)) {
> +		atomic_dec(&mdesc->mclients->cnt);
> +		if (atomic_read(&mdesc->use_cnt) == 1) {
> +			mdesc->release(mdesc);
> +			atomic_dec(&mdesc->use_cnt);
> +		}

Eeek, why aren't you using the built-in reference counting that the
struct device provided to you, and instead are rolling your own?  This
happens in many places, why?

greg k-h

^ permalink raw reply

* Re: Lab: v.1.8 + Linux 2.6.37.6+up #1 + ESXi 5.0 - VM - Slow Network Performance/Failures
From: Stephen Hemminger @ 2012-09-28 16:21 UTC (permalink / raw)
  To: Mike Harris; +Cc: netdev
In-Reply-To: <CAJXRGag+VEMYUfhEaVwpVCUpjwC6Rhd0NeOrA15VAEz-5OwAMQ@mail.gmail.com>

On Fri, 28 Sep 2012 04:56:35 +0200
Mike Harris <mharris@onxis.com> wrote:

> Hi,
> 
> I hope everyone is well!
> 
> Some network throughput/performance oddness with a linux based virtual firewall…
> 
> Lab scenario;
> 
> [Windows VM #1] --- VLAN X-----(*)Linux Firewall VM ----- VLAN Y
> ---[Windows VM #2]
> 
> A tcpdump is kicked off with the following options on the firewall
> this a 100MB file is copied between VM #1 to VM #2 (SMB).
> 
> tcpdump -i eth0.x -n -s0 -w file-transfer-1.pcap -c100
> 
> Notes:
> 
> + Physical blade run ESXi 5.0.
> + Windows VMs run on the same vSwitch and physical blade.
> + VLAN X and Y support up to 1500 bytes MTU.
> + Virtual firewall is configured with the 4095 (any) VLAN (receives
> and transmits tagged frames).
> + Virtual firewall is runs Linux 2.6.37.6+up #1
> + Virtual and physical backbones do support up to 9k frames.
> 
> Observations;
> 
> 1. The file copy fails…
> 2. The pcap reveals a 12,443 byte uber jumbo frame is present shortly
> after the file transfer starts.
> 
> Repeated the same scenario using a test vyatta 6.4 VM and the file
> copy completes normally… no jumbo frames or any other oddness.
> Virtual and physical networking can not originate such a frame
> normally.
> 
> Given this, I suspect there's a general framing failure of the network
> driver on the virtual linux firewall, which lead me to the dmesg
> command and this mailing list :)
> 
> Has anyone else seen this behavior before on a linux VM before?
> 
> Thoughts?
> 
> Helpful suggestions :)

Vyatta ran into a problem because the Vmware driver was incorrectly keeping LRO
enabled even when forwarding. Given the age of the kernel that could
be your problem.

The short term workaround used to just force LRO off (change to vmxnet3).
Thankfully, someone later found where the driver was mistakenly renabling LRO
and fixed the real bug.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox