Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 2/2] smsc75xx: add wol magic packet support
From: Steve Glendinning @ 2012-09-26 15:13 UTC (permalink / raw)
  To: netdev; +Cc: Steve Glendinning
In-Reply-To: <1348672394-15191-1-git-send-email-steve.glendinning@shawell.net>

This patch enables wake from system suspend on magic packet.

Signed-off-by: Steve Glendinning <steve.glendinning@shawell.net>
---
 drivers/net/usb/smsc75xx.c |  184 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 172 insertions(+), 12 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 83b9927..a480c46 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -52,6 +52,7 @@
 #define USB_PRODUCT_ID_LAN7500		(0x7500)
 #define USB_PRODUCT_ID_LAN7505		(0x7505)
 #define RXW_PADDING			2
+#define SUPPORTED_WAKE			(WAKE_MAGIC)
 
 #define check_warn(ret, fmt, args...) \
 	({ if (ret < 0) netdev_warn(dev->net, fmt, ##args); })
@@ -65,6 +66,7 @@
 struct smsc75xx_priv {
 	struct usbnet *dev;
 	u32 rfe_ctl;
+	u32 wolopts;
 	u32 multicast_hash_table[DP_SEL_VHF_HASH_LEN];
 	struct mutex dataport_mutex;
 	spinlock_t rfe_ctl_lock;
@@ -135,6 +137,28 @@ static int __must_check smsc75xx_write_reg(struct usbnet *dev, u32 index,
 	return ret;
 }
 
+static int smsc75xx_set_feature(struct usbnet *dev, u32 feature)
+{
+	BUG_ON(!dev);
+
+	cpu_to_le32s(&feature);
+
+	return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+		USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0,
+		USB_CTRL_SET_TIMEOUT);
+}
+
+static int smsc75xx_clear_feature(struct usbnet *dev, u32 feature)
+{
+	BUG_ON(!dev);
+
+	cpu_to_le32s(&feature);
+
+	return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+		USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0,
+		USB_CTRL_SET_TIMEOUT);
+}
+
 /* Loop until the read is completed with timeout
  * called with phy_mutex held */
 static int smsc75xx_phy_wait_not_busy(struct usbnet *dev)
@@ -578,6 +602,26 @@ static int smsc75xx_ethtool_set_eeprom(struct net_device *netdev,
 	return smsc75xx_write_eeprom(dev, ee->offset, ee->len, data);
 }
 
+static void smsc75xx_ethtool_get_wol(struct net_device *net,
+				     struct ethtool_wolinfo *wolinfo)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
+
+	wolinfo->supported = SUPPORTED_WAKE;
+	wolinfo->wolopts = pdata->wolopts;
+}
+
+static int smsc75xx_ethtool_set_wol(struct net_device *net,
+				    struct ethtool_wolinfo *wolinfo)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
+
+	pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE;
+	return 0;
+}
+
 static const struct ethtool_ops smsc75xx_ethtool_ops = {
 	.get_link	= usbnet_get_link,
 	.nway_reset	= usbnet_nway_reset,
@@ -589,6 +633,8 @@ static const struct ethtool_ops smsc75xx_ethtool_ops = {
 	.get_eeprom_len	= smsc75xx_ethtool_get_eeprom_len,
 	.get_eeprom	= smsc75xx_ethtool_get_eeprom,
 	.set_eeprom	= smsc75xx_ethtool_set_eeprom,
+	.get_wol	= smsc75xx_ethtool_get_wol,
+	.set_wol	= smsc75xx_ethtool_set_wol,
 };
 
 static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@ -1109,45 +1155,159 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 static int smsc75xx_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct usbnet *dev = usb_get_intfdata(intf);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	int ret;
 	u32 val;
 
-	BUG_ON(!dev);
-
 	ret = usbnet_suspend(intf, message);
 	check_warn_return(ret, "usbnet_suspend error");
 
-	netdev_info(dev->net, "entering SUSPEND2 mode");
+	/* if no wol options set, enter lowest power SUSPEND2 mode */
+	if (!(pdata->wolopts & SUPPORTED_WAKE)) {
+		netdev_info(dev->net, "entering SUSPEND2 mode");
+
+		/* disable energy detect (link up) & wake up events */
+		ret = smsc75xx_read_reg(dev, WUCSR, &val);
+		check_warn_return(ret, "Error reading WUCSR");
+
+		val &= ~(WUCSR_MPEN | WUCSR_WUEN);
+
+		ret = smsc75xx_write_reg(dev, WUCSR, val);
+		check_warn_return(ret, "Error writing WUCSR");
+
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val &= ~(PMT_CTL_ED_EN | PMT_CTL_WOL_EN);
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+
+		/* enter suspend2 mode */
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST);
+		val |= PMT_CTL_SUS_MODE_2;
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+
+		return 0;
+	}
+
+	if (pdata->wolopts & WAKE_MAGIC) {
+		/* clear any pending magic packet status */
+		ret = smsc75xx_read_reg(dev, WUCSR, &val);
+		check_warn_return(ret, "Error reading WUCSR");
+
+		val |= WUCSR_MPR;
+
+		ret = smsc75xx_write_reg(dev, WUCSR, val);
+		check_warn_return(ret, "Error writing WUCSR");
+	}
+
+	/* enable/disable magic packup wake */
+	ret = smsc75xx_read_reg(dev, WUCSR, &val);
+	check_warn_return(ret, "Error reading WUCSR");
+
+	if (pdata->wolopts & WAKE_MAGIC) {
+		netdev_info(dev->net, "enabling magic packet wakeup");
+		val |= WUCSR_MPEN;
+	} else {
+		netdev_info(dev->net, "disabling magic packet wakeup");
+		val &= ~WUCSR_MPEN;
+	}
+
+	ret = smsc75xx_write_reg(dev, WUCSR, val);
+	check_warn_return(ret, "Error writing WUCSR");
+
+	/* enable wol wakeup source */
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+	check_warn_return(ret, "Error reading PMT_CTL");
+
+	val |= PMT_CTL_WOL_EN;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+	check_warn_return(ret, "Error writing PMT_CTL");
+
+	/* enable receiver */
+	ret = smsc75xx_read_reg(dev, MAC_RX, &val);
+	check_warn_return(ret, "Failed to read MAC_RX: %d", ret);
+
+	val |= MAC_RX_RXEN;
+
+	ret = smsc75xx_write_reg(dev, MAC_RX, val);
+	check_warn_return(ret, "Failed to write MAC_RX: %d", ret);
+
+	/* some wol options are enabled, so enter SUSPEND0 */
+	netdev_info(dev->net, "entering SUSPEND0 mode");
 
 	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
 	check_warn_return(ret, "Error reading PMT_CTL");
 
-	val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST);
-	val |= PMT_CTL_SUS_MODE_2;
+	val &= (~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST));
+	val |= PMT_CTL_SUS_MODE_0;
+
+	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+	check_warn_return(ret, "Error writing PMT_CTL");
 
+	/* clear wol status */
+	val &= ~PMT_CTL_WUPS;
+	val |= PMT_CTL_WUPS_WOL;
 	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
 	check_warn_return(ret, "Error writing PMT_CTL");
 
+	/* read back PMT_CTL */
+	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+	check_warn_return(ret, "Error reading PMT_CTL");
+
+	smsc75xx_set_feature(dev, USB_DEVICE_REMOTE_WAKEUP);
+
 	return 0;
 }
 
 static int smsc75xx_resume(struct usb_interface *intf)
 {
 	struct usbnet *dev = usb_get_intfdata(intf);
+	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	int ret;
 	u32 val;
 
-	BUG_ON(!dev);
+	if (pdata->wolopts & WAKE_MAGIC) {
+		netdev_info(dev->net, "resuming from SUSPEND0");
 
-	netdev_info(dev->net, "resuming from SUSPEND2");
+		smsc75xx_clear_feature(dev, USB_DEVICE_REMOTE_WAKEUP);
 
-	ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
-	check_warn_return(ret, "Error reading PMT_CTL");
+		/* Disable magic packup wake */
+		ret = smsc75xx_read_reg(dev, WUCSR, &val);
+		check_warn_return(ret, "Error reading WUCSR");
 
-	val |= PMT_CTL_PHY_PWRUP;
+		val &= ~WUCSR_MPEN;
 
-	ret = smsc75xx_write_reg(dev, PMT_CTL, val);
-	check_warn_return(ret, "Error writing PMT_CTL");
+		ret = smsc75xx_write_reg(dev, WUCSR, val);
+		check_warn_return(ret, "Error writing WUCSR");
+
+		/* clear wake-up status */
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val &= ~PMT_CTL_WOL_EN;
+		val |= PMT_CTL_WUPS;
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+	} else {
+		netdev_info(dev->net, "resuming from SUSPEND2");
+
+		ret = smsc75xx_read_reg(dev, PMT_CTL, &val);
+		check_warn_return(ret, "Error reading PMT_CTL");
+
+		val |= PMT_CTL_PHY_PWRUP;
+
+		ret = smsc75xx_write_reg(dev, PMT_CTL, val);
+		check_warn_return(ret, "Error writing PMT_CTL");
+	}
 
 	ret = smsc75xx_wait_ready(dev);
 	check_warn_return(ret, "device not ready in smsc75xx_resume");
-- 
1.7.9.5

^ permalink raw reply related

* Re: [PATCH 5/5] smsc95xx: enable power saving mode during system suspend
From: Steve Glendinning @ 2012-09-26 15:16 UTC (permalink / raw)
  To: Bjørn Mork; +Cc: netdev
In-Reply-To: <87y5jw982o.fsf@nemi.mork.no>

On 26 September 2012 15:23, Bjørn Mork <bjorn@mork.no> wrote:
> Steve Glendinning <steve.glendinning@shawell.net> writes:
>
>> +static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
>> +{
>> +     struct usbnet *dev = usb_get_intfdata(intf);
>> +     int ret;
>> +     u32 val;
>> +
>> +     BUG_ON(!dev);
>
> That's not very user friendly.  Why not just return here?

I hadn't thought that was a situation that could arise, is it?  Would
this happen if the USB device was removed during suspend?

-Steve

^ permalink raw reply

* Re: [PATCH] smsc75xx: fix resume after device reset
From: Steve Glendinning @ 2012-09-26 15:35 UTC (permalink / raw)
  To: netdev
In-Reply-To: <1348497779-10042-1-git-send-email-steve.glendinning@shawell.net>

Oops, missing signed-off line.  Please let me know if you want me to
resubmit this patch.

Signed-off-by: Steve Glendinning <steve.glendinning@shawell.net>

-Steve

^ permalink raw reply

* Re: [PATCH 5/5] smsc95xx: enable power saving mode during system suspend
From: Bjørn Mork @ 2012-09-26 15:48 UTC (permalink / raw)
  To: Steve Glendinning; +Cc: netdev
In-Reply-To: <CAKh2mn5tKowqjbvgWDSz52UF9qY9_ysAb23Pb-M_3wvCG=_R6w@mail.gmail.com>

Steve Glendinning <steve@shawell.net> writes:
> On 26 September 2012 15:23, Bjørn Mork <bjorn@mork.no> wrote:
>> Steve Glendinning <steve.glendinning@shawell.net> writes:
>>
>>> +static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
>>> +{
>>> +     struct usbnet *dev = usb_get_intfdata(intf);
>>> +     int ret;
>>> +     u32 val;
>>> +
>>> +     BUG_ON(!dev);
>>
>> That's not very user friendly.  Why not just return here?
>
> I hadn't thought that was a situation that could arise, is it?  Would
> this happen if the USB device was removed during suspend?

No, it should not happen.  But then, why test at all?


Bjørn

^ permalink raw reply

* Re: [PATCH 5/5] smsc95xx: enable power saving mode during system suspend
From: Steve Glendinning @ 2012-09-26 15:58 UTC (permalink / raw)
  To: Bjørn Mork; +Cc: netdev
In-Reply-To: <87txuk9453.fsf@nemi.mork.no>

>> I hadn't thought that was a situation that could arise, is it?  Would
>> this happen if the USB device was removed during suspend?
>
> No, it should not happen.  But then, why test at all?

I thought it was common practice to add these tests to document an
assumption the developer made that later code relies on?  I had
assumed that the !dev condition should not be possible, hence the
simple BUG test.  If it is possible then I agree - I definitely need
to handle this more gracefully.

In this case, asserting that dev is not NULL will make the code fail
loudly there instead of a few lines down when the netdev_info call
dereferences dev->net.  Either way something bad will happen!

-Steve

^ permalink raw reply

* Re: [PATCH net-next] net: use bigger pages in __netdev_alloc_frag
From: Alexander Duyck @ 2012-09-26 16:00 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1348650402.5093.176.camel@edumazet-glaptop>

On 09/26/2012 02:06 AM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> We currently use percpu order-0 pages in __netdev_alloc_frag
> to deliver fragments used by __netdev_alloc_skb()
>
> Depending on NIC driver and arch being 32 or 64 bit, it allows a page to
> be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096
>
> Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows :
>
> - Better filling of space (the ending hole overhead is less an issue)
>
> - Less calls to page allocator or accesses to page->_count
>
> - Could allow struct skb_shared_info futures changes without major
>   performance impact.
>
> This patch implements a transparent fallback to smaller
> pages in case of memory pressure.
>
> It also uses a standard "struct page_frag" instead of a custom one.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Alexander Duyck <alexander.h.duyck@intel.com>
> ---
>  net/core/skbuff.c |   46 ++++++++++++++++++++++++++++----------------
>  1 file changed, 30 insertions(+), 16 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 2ede3cf..4ab83ce 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
>  EXPORT_SYMBOL(build_skb);
>  
>  struct netdev_alloc_cache {
> -	struct page *page;
> -	unsigned int offset;
> -	unsigned int pagecnt_bias;
> +	struct page_frag	frag;
> +	/* we maintain a pagecount bias, so that we dont dirty cache line
> +	 * containing page->_count every time we allocate a fragment.
> +	 */
> +	unsigned int		pagecnt_bias;
>  };
>  static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
>  
> -#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
> +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
> +#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
> +#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
>  
>  static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
>  {
>  	struct netdev_alloc_cache *nc;
>  	void *data = NULL;
> +	int order;
>  	unsigned long flags;
>  
>  	local_irq_save(flags);
>  	nc = &__get_cpu_var(netdev_alloc_cache);
> -	if (unlikely(!nc->page)) {
> +	if (unlikely(!nc->frag.page)) {
>  refill:
> -		nc->page = alloc_page(gfp_mask);
> -		if (unlikely(!nc->page))
> -			goto end;
> +		for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
> +			gfp_t gfp = gfp_mask;
> +
> +			if (order)
> +				gfp |= __GFP_COMP | __GFP_NOWARN;
> +			nc->frag.page = alloc_pages(gfp, order);
> +			if (likely(nc->frag.page))
> +				break;
> +			if (--order <= 0)
> +				goto end;
> +		}
> +		nc->frag.size = PAGE_SIZE << order;
>  recycle:
> -		atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
> -		nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
> -		nc->offset = 0;
> +		atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
> +		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
> +		nc->frag.offset = 0;
>  	}
>  
> -	if (nc->offset + fragsz > PAGE_SIZE) {
> +	if (nc->frag.offset + fragsz > nc->frag.size) {
>  		/* avoid unnecessary locked operations if possible */
> -		if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
> -		    atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
> +		if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
> +		    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
>  			goto recycle;
>  		goto refill;
>  	}
>  
> -	data = page_address(nc->page) + nc->offset;
> -	nc->offset += fragsz;
> +	data = page_address(nc->frag.page) + nc->frag.offset;
> +	nc->frag.offset += fragsz;
>  	nc->pagecnt_bias--;
>  end:
>  	local_irq_restore(flags);
>
>

One minor thought.  Instead of tracking offset and size why not just
work from the top down instead of the bottom up?

So instead of starting with the frag offset at 0, why not start it at
PAGE_SIZE << order, and then work your way down to 0.  That way you
don't need to track both size and offset.

Thanks,

Alex

^ permalink raw reply

* Re: [PATCH 02/11] inet_diag: pass inet_diag module to netlink_dump_start
From: Stephen Hemminger @ 2012-09-26 16:07 UTC (permalink / raw)
  To: Gao feng
  Cc: davem, netfilter-devel, linux-rdma, netdev, eric.dumazet, pablo,
	steffen.klassert, linux-crypto, jengelh, stephen.hemminger
In-Reply-To: <1348635140-20225-2-git-send-email-gaofeng@cn.fujitsu.com>

On Wed, 26 Sep 2012 12:52:11 +0800
Gao feng <gaofeng@cn.fujitsu.com> wrote:

>  			struct netlink_dump_control c = {
>  				.dump = inet_diag_dump_compat,
> +				.done = netlink_dump_done,
> +				.module = THIS_MODULE,
>  			};

Since these are immutable, why the netlink_dump_control structures
on the stack (which requires code to generate at runtime), and instead
make them global

static const struct netlink_dump_control unix_diag_ctrl = {
...

^ permalink raw reply

* Re: [PATCH net-next] net: use bigger pages in __netdev_alloc_frag
From: Eric Dumazet @ 2012-09-26 16:14 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: David Miller, netdev
In-Reply-To: <50632681.40208@intel.com>

On Wed, 2012-09-26 at 09:00 -0700, Alexander Duyck wrote:

> One minor thought.  Instead of tracking offset and size why not just
> work from the top down instead of the bottom up?
> 
> So instead of starting with the frag offset at 0, why not start it at
> PAGE_SIZE << order, and then work your way down to 0.  That way you
> don't need to track both size and offset.
> 

How do you refill then ? 

(ie setting xxx->offset back to PAGE_SIZE << order)

I am not sure we have direct access to a page order given a struct page
pointer.

I also like struct page_frag abstraction, because it might allow us
better code factorization with other frag allocators.

( skb_append_datato_frags(), sk_page_frag_refill(), ...)

^ permalink raw reply

* Re: [PATCH 5/5] smsc95xx: enable power saving mode during system suspend
From: Bjørn Mork @ 2012-09-26 16:17 UTC (permalink / raw)
  To: Steve Glendinning; +Cc: netdev
In-Reply-To: <CAKh2mn4XLCPfTFySEvtZc56J+WM863JtnsjF2D-RyDJatoyAww@mail.gmail.com>

Steve Glendinning <steve@shawell.net> writes:

>>> I hadn't thought that was a situation that could arise, is it?  Would
>>> this happen if the USB device was removed during suspend?
>>
>> No, it should not happen.  But then, why test at all?
>
> I thought it was common practice to add these tests to document an
> assumption the developer made that later code relies on?  I had
> assumed that the !dev condition should not be possible, hence the
> simple BUG test.  If it is possible then I agree - I definitely need
> to handle this more gracefully.
>
> In this case, asserting that dev is not NULL will make the code fail
> loudly there instead of a few lines down when the netdev_info call
> dereferences dev->net.  Either way something bad will happen!

Yes, but you are a lot less likely to know about it if you BUG out.  The
user will be left with no other choice than hitting reset or poweroff.
What's the point of that?

If your driver crashes but the machine is left running, then the user
may forward the Oops to you.  That's much more useful.


Bjørn

^ permalink raw reply

* Re: [PATCH net-next] net: use bigger pages in __netdev_alloc_frag
From: Alexander Duyck @ 2012-09-26 16:36 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1348676085.5093.361.camel@edumazet-glaptop>

On 09/26/2012 09:14 AM, Eric Dumazet wrote:
> On Wed, 2012-09-26 at 09:00 -0700, Alexander Duyck wrote:
>
>> One minor thought.  Instead of tracking offset and size why not just
>> work from the top down instead of the bottom up?
>>
>> So instead of starting with the frag offset at 0, why not start it at
>> PAGE_SIZE << order, and then work your way down to 0.  That way you
>> don't need to track both size and offset.
>>
> How do you refill then ? 
>
> (ie setting xxx->offset back to PAGE_SIZE << order)
>
> I am not sure we have direct access to a page order given a struct page
> pointer.
>
> I also like struct page_frag abstraction, because it might allow us
> better code factorization with other frag allocators.
>
> ( skb_append_datato_frags(), sk_page_frag_refill(), ...)

I forgot about the page recycling portion of it.  I was still thinking
of the original implementation that had a fixed page order.  You are
correct, with the page order being dynamic you will have to store size
somewhere.

Thanks,

Alex

^ permalink raw reply

* Re: [PATCH] rtlwifi: use %*ph[C] to dump small buffers
From: Joe Perches @ 2012-09-26 16:45 UTC (permalink / raw)
  To: Andy Shevchenko, Larry Finger, Chaoming Li
  Cc: David S. Miller, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1348667852-5957-1-git-send-email-andriy.shevchenko-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

On Wed, 2012-09-26 at 16:57 +0300, Andy Shevchenko wrote:
> Signed-off-by: Andy Shevchenko <andriy.shevchenko-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

Hi Andy.

(adding Larry and Chaoming to cc's)

Please use scripts/get_maintainer.pl on your patches to
see who maintains these files so you can cc them.

One comment below, it looks like a possible endian bug.
(not in your patch)

> ---
>  drivers/net/wireless/rtlwifi/cam.c          |    7 ++-----
>  drivers/net/wireless/rtlwifi/rtl8192ce/hw.c |    6 ++----
>  drivers/net/wireless/rtlwifi/rtl8192cu/hw.c |    6 ++----
>  3 files changed, 6 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/wireless/rtlwifi/cam.c b/drivers/net/wireless/rtlwifi/cam.c
> index 5b4b4d4..6ba9f80 100644
> --- a/drivers/net/wireless/rtlwifi/cam.c
> +++ b/drivers/net/wireless/rtlwifi/cam.c
> @@ -52,11 +52,8 @@ static void rtl_cam_program_entry(struct ieee80211_hw *hw, u32 entry_no,
>  	u32 target_content = 0;
>  	u8 entry_i;
>  
> -	RT_TRACE(rtlpriv, COMP_SEC, DBG_LOUD,
> -		 "key_cont_128:\n %x:%x:%x:%x:%x:%x\n",
> -		 key_cont_128[0], key_cont_128[1],
> -		 key_cont_128[2], key_cont_128[3],
> -		 key_cont_128[4], key_cont_128[5]);
> +	RT_TRACE(rtlpriv, COMP_SEC, DBG_LOUD, "key_cont_128: %*ph\n",
> +			6, key_cont_128);
>  
>  	for (entry_i = 0; entry_i < CAM_CONTENT_COUNT; entry_i++) {
>  		target_command = entry_i + CAM_CONTENT_COUNT * entry_no;
> diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
> index 86d73b3..932780d 100644
> --- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
> +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
> @@ -1918,10 +1918,8 @@ static void rtl92ce_update_hal_rate_mask(struct ieee80211_hw *hw,
>  				     (ratr_index << 28);
>  	rate_mask[4] = macid | (shortgi ? 0x20 : 0x00) | 0x80;
>  	RT_TRACE(rtlpriv, COMP_RATR, DBG_DMESG,
> -		 "Rate_index:%x, ratr_val:%x, %x:%x:%x:%x:%x\n",
> -		 ratr_index, ratr_bitmap,
> -		 rate_mask[0], rate_mask[1], rate_mask[2], rate_mask[3],
> -		 rate_mask[4]);
> +		 "Rate_index:%x, ratr_val:%x, %*phC\n",
> +		 ratr_index, ratr_bitmap, 5, rate_mask);
>  	rtl92c_fill_h2c_cmd(hw, H2C_RA_MASK, 5, rate_mask);
>  
>  	if (macid != 0)
> diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
> index 4bbb711..7554501 100644
> --- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
> +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
> @@ -2169,10 +2169,8 @@ void rtl92cu_update_hal_rate_mask(struct ieee80211_hw *hw, u8 rssi_level)
>  				      ratr_index << 28);
>  	rate_mask[4] = macid | (shortgi ? 0x20 : 0x00) | 0x80;
>  	RT_TRACE(rtlpriv, COMP_RATR, DBG_DMESG,
> -		 "Rate_index:%x, ratr_val:%x, %x:%x:%x:%x:%x\n",
> -		 ratr_index, ratr_bitmap,
> -		 rate_mask[0], rate_mask[1], rate_mask[2], rate_mask[3],
> -		 rate_mask[4]);
> +		 "Rate_index:%x, ratr_val:%x, %*phC\n",
> +		 ratr_index, ratr_bitmap, 5, rate_mask);
>  	rtl92c_fill_h2c_cmd(hw, H2C_RA_MASK, 5, rate_mask);
>  }
>  

rate_mask uses:

	u32 ratr_bitmap = (u32) mac->basic_rates;
...
	u8 rate_mask[5];
...
	[sets ratr_bitmap as u32]
...
	*(u32 *)&rate_mask = ((ratr_bitmap & 0x0fffffff) |
				      ratr_index << 28);
...
	rtl92c_fill_h2c_cmd(hw, H2C_RA_MASK, 5, rate_mask);

Looks like a possible endian misuse to me.



--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH net-next v2] net: use bigger pages in __netdev_alloc_frag
From: Eric Dumazet @ 2012-09-26 16:46 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: David Miller, netdev, Benjamin LaHaise
In-Reply-To: <50632F06.1040306@intel.com>

From: Eric Dumazet <edumazet@google.com>

We currently use percpu order-0 pages in __netdev_alloc_frag
to deliver fragments used by __netdev_alloc_skb()

Depending on NIC driver and arch being 32 or 64 bit, it allows a page to
be split in several fragments (between 1 and 8), assuming PAGE_SIZE=4096

Switching to bigger pages (32768 bytes for PAGE_SIZE=4096 case) allows :

- Better filling of space (the ending hole overhead is less an issue)

- Less calls to page allocator or accesses to page->_count

- Could allow struct skb_shared_info futures changes without major
  performance impact.

This patch implements a transparent fallback to smaller
pages in case of memory pressure.

It also uses a standard "struct page_frag" instead of a custom one.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
---
v2 : fix the (--order <= 0) test, as Benjamin pointed out

 net/core/skbuff.c |   46 ++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2ede3cf..607a70f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -340,43 +340,57 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 EXPORT_SYMBOL(build_skb);
 
 struct netdev_alloc_cache {
-	struct page *page;
-	unsigned int offset;
-	unsigned int pagecnt_bias;
+	struct page_frag	frag;
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_count every time we allocate a fragment.
+	 */
+	unsigned int		pagecnt_bias;
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
 
-#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
 
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct netdev_alloc_cache *nc;
 	void *data = NULL;
+	int order;
 	unsigned long flags;
 
 	local_irq_save(flags);
 	nc = &__get_cpu_var(netdev_alloc_cache);
-	if (unlikely(!nc->page)) {
+	if (unlikely(!nc->frag.page)) {
 refill:
-		nc->page = alloc_page(gfp_mask);
-		if (unlikely(!nc->page))
-			goto end;
+		for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
+			gfp_t gfp = gfp_mask;
+
+			if (order)
+				gfp |= __GFP_COMP | __GFP_NOWARN;
+			nc->frag.page = alloc_pages(gfp, order);
+			if (likely(nc->frag.page))
+				break;
+			if (--order < 0)
+				goto end;
+		}
+		nc->frag.size = PAGE_SIZE << order;
 recycle:
-		atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
-		nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
-		nc->offset = 0;
+		atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+		nc->frag.offset = 0;
 	}
 
-	if (nc->offset + fragsz > PAGE_SIZE) {
+	if (nc->frag.offset + fragsz > nc->frag.size) {
 		/* avoid unnecessary locked operations if possible */
-		if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
-		    atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
+		if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
+		    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
 			goto recycle;
 		goto refill;
 	}
 
-	data = page_address(nc->page) + nc->offset;
-	nc->offset += fragsz;
+	data = page_address(nc->frag.page) + nc->frag.offset;
+	nc->frag.offset += fragsz;
 	nc->pagecnt_bias--;
 end:
 	local_irq_restore(flags);

^ permalink raw reply related

* [PATCH net-next] net: struct napi_struct fields reordering
From: Eric Dumazet @ 2012-09-26 17:07 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

Remove two holes on 64bit arches, and put dev_list at the end of
napi_struct since its not used in fast path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/netdevice.h |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6c131f0..dd320bb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -338,18 +338,16 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
+	unsigned int		gro_count;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
 	spinlock_t		poll_lock;
 	int			poll_owner;
 #endif
-
-	unsigned int		gro_count;
-
 	struct net_device	*dev;
-	struct list_head	dev_list;
 	struct sk_buff		*gro_list;
 	struct sk_buff		*skb;
+	struct list_head	dev_list;
 };
 
 enum {

^ permalink raw reply related

* [PATCH] bnx2: Clean up remaining iounmap
From: Neil Horman @ 2012-09-26 17:22 UTC (permalink / raw)
  To: netdev; +Cc: Neil Horman, Michael Chan, David S. Miller

commit c0357e975afdbbedab5c662d19bef865f02adc17 modified bnx2 to switch from
using ioremap/iounmap to pci_iomap/pci_iounmap.  They missed a spot in the error
path of bnx2_init_one though.  This patch just cleans that up.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Michael Chan <mcan@broadcom.com>
CC: "David S. Miller" <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 79cebd8..e48312f 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8564,7 +8564,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;

 error:
-	iounmap(bp->regview);
+	pci_iounmap(pdev, bp->regview);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
-- 
1.7.11.4

^ permalink raw reply related

* [PATCH 2/2] net: ti cpsw ethernet: set IFCTL_{A,B} bits for RMII mode
From: Daniel Mack @ 2012-09-26 17:24 UTC (permalink / raw)
  To: netdev
  Cc: devicetree-discuss, Daniel Mack, Mugunthan V N, Vaibhav Hiremath,
	David S. Miller
In-Reply-To: <1348680268-8194-1-git-send-email-zonque@gmail.com>

For RMII mode operation in 100Mbps, the CPSW needs to set the
IFCTL_A / IFCTL_B bits in the MACCONTROL register.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Cc: Mugunthan V N <mugunthanvnm@ti.com>
Cc: Vaibhav Hiremath <hvaibhav@ti.com>
Cc: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 3d7594e..d88dbfa 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -386,6 +386,12 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
 			mac_control |= BIT(7);	/* GIGABITEN	*/
 		if (phy->duplex)
 			mac_control |= BIT(0);	/* FULLDUPLEXEN	*/
+
+		/* set speed_in input in case RMII mode is used in >10Mbps */
+		if (phy->speed > 10 && slave->slave_num < 2 &&
+		    phy->interface == PHY_INTERFACE_MODE_RMII)
+			mac_control |= BIT(15 + slave->slave_num);
+
 		*link = true;
 	} else {
 		mac_control = 0;
-- 
1.7.11.4

^ permalink raw reply related

* [PATCH 1/2] net: ti cpsw ethernet: allow reading phy interface mode from DT
From: Daniel Mack @ 2012-09-26 17:24 UTC (permalink / raw)
  To: netdev
  Cc: devicetree-discuss, Daniel Mack, Mugunthan V N, Vaibhav Hiremath,
	David S. Miller

Allow users to specify the phy interface of the CPSW slaves. The new
node parameter is called "phy_if_mode" and is optional. The original
behaviour of the driver is preserved when not given.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Cc: Mugunthan V N <mugunthanvnm@ti.com>
Cc: Vaibhav Hiremath <hvaibhav@ti.com>
Cc: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt | 3 +++
 drivers/net/ethernet/ti/cpsw.c                 | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index dcaabe9..d87f7d2 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -25,6 +25,8 @@ Required properties:
 - slave_reg_ofs		: Specifies slave register offset
 - sliver_reg_ofs	: Specifies slave sliver register offset
 - phy_id		: Specifies slave phy id
+- phy_if_mode		: Specified slave phy interface mode (optional)
+			  (one of the PHY_INTERFACE_MODE_* as numerical value)
 - mac-address		: Specifies slave MAC address
 
 Optional properties:
@@ -62,6 +64,7 @@ Examples:
 			slave_reg_ofs = <0x208>;
 			sliver_reg_ofs = <0xd80>;
 			phy_id = "davinci_mdio.16:00";
+			phy_if_mode = <6>; /* PHY_INTERFACE_MODE_RGMII */
 			/* Filled in by U-Boot */
 			mac-address = [ 00 00 00 00 00 00 ];
 		};
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index aa78168..3d7594e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -850,6 +850,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		}
 		slave_data->sliver_reg_ofs = prop;
 
+		if (!of_property_read_u32(slave_node, "phy_if_mode", &prop))
+			slave_data->phy_if = prop;
+
 		mac_addr = of_get_mac_address(slave_node);
 		if (mac_addr)
 			memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
-- 
1.7.11.4

^ permalink raw reply related

* Re: [PATCH 02/11] inet_diag: pass inet_diag module to netlink_dump_start
From: Pablo Neira Ayuso @ 2012-09-26 17:26 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Gao feng, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	netfilter-devel-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ,
	linux-crypto-u79uwXL29TY76Z2rM5mHXA, jengelh-9+2X+4sQBs8,
	stephen.hemminger-ZtmgI6mnKB3QT0dZR+AlfA
In-Reply-To: <20120926090714.618a4b39-We1ePj4FEcvRI77zikRAJc56i+j3xesD0e7PPNI6Mm0@public.gmane.org>

On Wed, Sep 26, 2012 at 09:07:14AM -0700, Stephen Hemminger wrote:
> On Wed, 26 Sep 2012 12:52:11 +0800
> Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org> wrote:
> 
> >  			struct netlink_dump_control c = {
> >  				.dump = inet_diag_dump_compat,
> > +				.done = netlink_dump_done,
> > +				.module = THIS_MODULE,
> >  			};
> 
> Since these are immutable, why the netlink_dump_control structures
> on the stack (which requires code to generate at runtime), and instead
> make them global
> 
> static const struct netlink_dump_control unix_diag_ctrl = {
> ...

I made that code. I can send a patch to statify those
netlink_dump_control structures.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v5 01/10] ipc: remove forced assignment of selected message
From: Serge Hallyn @ 2012-09-26 17:37 UTC (permalink / raw)
  To: Stanislav Kinsbursky
  Cc: akpm, manfred, a.p.zijlstra, netdev, will.deacon, linux-kernel,
	cmetcalf, jmorris, linux-driver, linux-security-module, hughd,
	ron.mercer, viro, james.l.morris, catalin.marinas, casey, eparis,
	sds, jitendra.kalsaria, devel
In-Reply-To: <20120919160531.11254.48182.stgit@localhost6.localdomain6>

Quoting Stanislav Kinsbursky (skinsbursky@parallels.com):
> This is a cleanup patch. The assignment is redundant.
> 
> Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
> ---
>  ipc/msg.c |    1 -
>  1 files changed, 0 insertions(+), 1 deletions(-)
> 
> diff --git a/ipc/msg.c b/ipc/msg.c
> index 7385de2..f3bfbb8 100644
> --- a/ipc/msg.c
> +++ b/ipc/msg.c
> @@ -787,7 +787,6 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
>  			    !security_msg_queue_msgrcv(msq, walk_msg, current,
>  						       msgtyp, mode)) {
>  
> -				msg = walk_msg;
>  				if (mode == SEARCH_LESSEQUAL &&
>  						walk_msg->m_type != 1) {
>  					msg = walk_msg;

Perhaps your tree is different from mine, but it looks to me like it would
be simpler to remove the 'msg = walk_msg' from both the 'if' and 'else',
and keep them above the if/else?

^ permalink raw reply

* Re: [PATCH] bnx2: Clean up remaining iounmap
From: Michael Chan @ 2012-09-26 18:33 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, Michael Chan, David S. Miller
In-Reply-To: <1348680122-9711-1-git-send-email-nhorman@tuxdriver.com>

On Wed, 2012-09-26 at 13:22 -0400, Neil Horman wrote: 
> commit c0357e975afdbbedab5c662d19bef865f02adc17 modified bnx2 to switch from
> using ioremap/iounmap to pci_iomap/pci_iounmap.  They missed a spot in the error
> path of bnx2_init_one though.  This patch just cleans that up.
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> CC: Michael Chan <mcan@broadcom.com>
> CC: "David S. Miller" <davem@davemloft.net>

Acked-by: Michael Chan <mchan@broadcom.com>

> ---
>  drivers/net/ethernet/broadcom/bnx2.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
> index 79cebd8..e48312f 100644
> --- a/drivers/net/ethernet/broadcom/bnx2.c
> +++ b/drivers/net/ethernet/broadcom/bnx2.c
> @@ -8564,7 +8564,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	return 0;
>  
>  error:
> -	iounmap(bp->regview);
> +	pci_iounmap(pdev, bp->regview);
>  	pci_release_regions(pdev);
>  	pci_disable_device(pdev);
>  	pci_set_drvdata(pdev, NULL);

^ permalink raw reply

* Re: ixgbe unstable performance at 1Gb/s
From: Charles Vejnar @ 2012-09-26 18:33 UTC (permalink / raw)
  To: Tantilov, Emil S, netdev@vger.kernel.org
In-Reply-To: <87618083B2453E4A8714035B62D6799216E25153@FMSMSX102.amr.corp.intel.com>

Le 25/09/2012 19:58, Tantilov, Emil S a écrit :
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On
>> Behalf Of Charles
>> Sent: Monday, September 24, 2012 10:47 AM
>> To: netdev@vger.kernel.org
>> Subject: ixgbe unstable performance at 1Gb/s
>>
>> Hi,
>>
>> I hope I am posting on the right mailing-list. If not, sorry; please
>> redirect me
>> to the right place. Thanks.
>>
>> I have a new motherboard with integrated Intel X540 10GBase-T. For now, I
>> want
>> to use it at 1Gb/s.
>>
>> The bandwidth is only of ~300 Mbit/s (with Iperf). It's actually very
>> unstable
>> (always varies between 100 to 800 Mbit/s during the transfer).
> Do you by any chance have CONFIG_IXGBE_PTP set in your kernel config?
>
> If so, try disabling it and see if it fixes your performance.
>
> Thanks,
> Emil
>
Hi,

Thanks for your reply.

I compiled manually the ixgbe module with the default options of my 
distribution (Archlinux). I had the same problem.

I then changed the CONFIG_IXGBE_PTP to no (default is yes) as you 
suggested, and recompiled. The problem disappeared; normal transfer.

Could you please explain why this PTP is causing a problem? How can this 
be fixed without having to recompile the module (ethtool, /sys, bios... 
)? Thanks

Regards,

Charles

^ permalink raw reply

* RE: [PATCH 2/2] net: ti cpsw ethernet: set IFCTL_{A,B} bits for RMII mode
From: N, Mugunthan V @ 2012-09-26 18:50 UTC (permalink / raw)
  To: Daniel Mack, netdev@vger.kernel.org
  Cc: devicetree-discuss@lists.ozlabs.org, Hiremath, Vaibhav,
	David S. Miller
In-Reply-To: <1348680268-8194-2-git-send-email-zonque@gmail.com>

> -----Original Message-----
> From: Daniel Mack [mailto:zonque@gmail.com]
> Sent: Wednesday, September 26, 2012 10:54 PM
> To: netdev@vger.kernel.org
> Cc: devicetree-discuss@lists.ozlabs.org; Daniel Mack; N, Mugunthan V;
> Hiremath, Vaibhav; David S. Miller
> Subject: [PATCH 2/2] net: ti cpsw ethernet: set IFCTL_{A,B} bits for
> RMII mode
> 
> For RMII mode operation in 100Mbps, the CPSW needs to set the
> IFCTL_A / IFCTL_B bits in the MACCONTROL register.
> 
> Signed-off-by: Daniel Mack <zonque@gmail.com>
> Cc: Mugunthan V N <mugunthanvnm@ti.com>
> Cc: Vaibhav Hiremath <hvaibhav@ti.com>
> Cc: David S. Miller <davem@davemloft.net>
> ---
>  drivers/net/ethernet/ti/cpsw.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c
> b/drivers/net/ethernet/ti/cpsw.c
> index 3d7594e..d88dbfa 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -386,6 +386,12 @@ static void _cpsw_adjust_link(struct cpsw_slave
> *slave,
>  			mac_control |= BIT(7);	/* GIGABITEN	*/
>  		if (phy->duplex)
>  			mac_control |= BIT(0);	/* FULLDUPLEXEN	*/
> +
> +		/* set speed_in input in case RMII mode is used in >10Mbps
> */
> +		if (phy->speed > 10 && slave->slave_num < 2 &&
> +		    phy->interface == PHY_INTERFACE_MODE_RMII)
> +			mac_control |= BIT(15 + slave->slave_num);

Mac control register is separate for both the slaves and has same bit definitions,
Bit 15 has to be set for 100Mbps link for RMII and RGMII Phy interface to control
the RMII/RGMII gasket and in GMII this bit is Un-used by CPSW.
For slave 1, Bit 16 is set with the above code which is not used control the
RMII/RGMII gasket control. So it is not required to pass the Phy mode from DT.
This patch has to be reworked to set Bit 15 with any Phy mode connected.

The original driver present was tested with GMII (Beagle Bone A5) and
RGMII (AM3358 EVM) phy , but CPSW works fine without setting this bit in
RGMII phymode so this issue was not caught in testing.

Regards
Mugunthan V N

> +
>  		*link = true;
>  	} else {
>  		mac_control = 0;
> --
> 1.7.11.4

^ permalink raw reply

* [PATCH] rapidio/rionet: fix multicast packet transmit logic
From: Alexandre Bounine @ 2012-09-26 19:14 UTC (permalink / raw)
  To: Andrew Morton, linux-kernel, linuxppc-dev
  Cc: Alexandre Bounine, Matt Porter, David S. Miller, netdev

Fix multicast packet transmit logic to account for repetitive transmission
of single skb:
- correct check for available buffers (this bug may produce NULL pointer
  crash dump in case of heavy traffic);
- update skb user count (incorrect user counter causes a warning dump from
  net_tx_action routine during multicast transfers in systems with three or
  more rionet participants).

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
---
 drivers/net/rionet.c |   20 +++++++++++++++++---
 1 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 91d2588..1470d3e 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -79,6 +79,7 @@ static int rionet_capable = 1;
  * on system trade-offs.
  */
 static struct rio_dev **rionet_active;
+static int nact;	/* total number of active rionet peers */
 
 #define is_rionet_capable(src_ops, dst_ops)			\
 			((src_ops & RIO_SRC_OPS_DATA_MSG) &&	\
@@ -175,6 +176,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	struct ethhdr *eth = (struct ethhdr *)skb->data;
 	u16 destid;
 	unsigned long flags;
+	int add_num = 1;
 
 	local_irq_save(flags);
 	if (!spin_trylock(&rnet->tx_lock)) {
@@ -182,7 +184,10 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		return NETDEV_TX_LOCKED;
 	}
 
-	if ((rnet->tx_cnt + 1) > RIONET_TX_RING_SIZE) {
+	if (is_multicast_ether_addr(eth->h_dest))
+		add_num = nact;
+
+	if ((rnet->tx_cnt + add_num) > RIONET_TX_RING_SIZE) {
 		netif_stop_queue(ndev);
 		spin_unlock_irqrestore(&rnet->tx_lock, flags);
 		printk(KERN_ERR "%s: BUG! Tx Ring full when queue awake!\n",
@@ -191,11 +196,16 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	}
 
 	if (is_multicast_ether_addr(eth->h_dest)) {
+		int count = 0;
 		for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size);
 				i++)
-			if (rionet_active[i])
+			if (rionet_active[i]) {
 				rionet_queue_tx_msg(skb, ndev,
 						    rionet_active[i]);
+				if (count)
+					atomic_inc(&skb->users);
+				count++;
+			}
 	} else if (RIONET_MAC_MATCH(eth->h_dest)) {
 		destid = RIONET_GET_DESTID(eth->h_dest);
 		if (rionet_active[destid])
@@ -220,14 +230,17 @@ static void rionet_dbell_event(struct rio_mport *mport, void *dev_id, u16 sid, u
 	if (info == RIONET_DOORBELL_JOIN) {
 		if (!rionet_active[sid]) {
 			list_for_each_entry(peer, &rionet_peers, node) {
-				if (peer->rdev->destid == sid)
+				if (peer->rdev->destid == sid) {
 					rionet_active[sid] = peer->rdev;
+					nact++;
+				}
 			}
 			rio_mport_send_doorbell(mport, sid,
 						RIONET_DOORBELL_JOIN);
 		}
 	} else if (info == RIONET_DOORBELL_LEAVE) {
 		rionet_active[sid] = NULL;
+		nact--;
 	} else {
 		if (netif_msg_intr(rnet))
 			printk(KERN_WARNING "%s: unhandled doorbell\n",
@@ -523,6 +536,7 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
 
 		rc = rionet_setup_netdev(rdev->net->hport, ndev);
 		rionet_check = 1;
+		nact = 0;
 	}
 
 	/*
-- 
1.7.8.4

^ permalink raw reply related

* RE: ixgbe unstable performance at 1Gb/s
From: Tantilov, Emil S @ 2012-09-26 20:22 UTC (permalink / raw)
  To: Charles Vejnar, netdev@vger.kernel.org; +Cc: Kirsher, Jeffrey T
In-Reply-To: <50634A60.40609@unige.ch>

>-----Original Message-----
>From: Charles Vejnar [mailto:Charles.Vejnar@unige.ch]
>Sent: Wednesday, September 26, 2012 11:33 AM
>To: Tantilov, Emil S; netdev@vger.kernel.org
>Subject: Re: ixgbe unstable performance at 1Gb/s
>
>Le 25/09/2012 19:58, Tantilov, Emil S a écrit :
>>> -----Original Message-----
>>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>On
>>> Behalf Of Charles
>>> Sent: Monday, September 24, 2012 10:47 AM
>>> To: netdev@vger.kernel.org
>>> Subject: ixgbe unstable performance at 1Gb/s
>>>
>>> Hi,
>>>
>>> I hope I am posting on the right mailing-list. If not, sorry; please
>>> redirect me
>>> to the right place. Thanks.
>>>
>>> I have a new motherboard with integrated Intel X540 10GBase-T. For now,
>I
>>> want
>>> to use it at 1Gb/s.
>>>
>>> The bandwidth is only of ~300 Mbit/s (with Iperf). It's actually very
>>> unstable
>>> (always varies between 100 to 800 Mbit/s during the transfer).
>> Do you by any chance have CONFIG_IXGBE_PTP set in your kernel config?
>>
>> If so, try disabling it and see if it fixes your performance.
>>
>> Thanks,
>> Emil
>>
>Hi,
>
>Thanks for your reply.
>
>I compiled manually the ixgbe module with the default options of my
>distribution (Archlinux). I had the same problem.
>
>I then changed the CONFIG_IXGBE_PTP to no (default is yes) as you
>suggested, and recompiled. The problem disappeared; normal transfer.
>
>Could you please explain why this PTP is causing a problem? How can this
>be fixed without having to recompile the module (ethtool, /sys, bios...
>)? Thanks

This is actually a bug in the driver. We should have a patch out very soon to address it.

>
>Regards,
>
>Charles

Thanks,
Emil

^ permalink raw reply

* Re: [PATCH V4 0/7] ipvs: IPv6 fragment handling for IPVS
From: Julian Anastasov @ 2012-09-26 20:50 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Hans Schillstrom, Hans Schillstrom, netdev, Pablo Neira Ayuso,
	lvs-devel, Patrick McHardy, Thomas Graf, Wensong Zhang,
	netfilter-devel, Simon Horman
In-Reply-To: <20120926120534.24804.78415.stgit@dragon>


	Hello,

On Wed, 26 Sep 2012, Jesper Dangaard Brouer wrote:

> The following patchset implement IPv6 fragment handling for IPVS.
> 
> This work is based upon patches from Hans Schillstrom.  I have taken
> over the patchset, in close agreement with Hans, because he don't have
> (gotten allocated) time to complete his work.
> 
> I have cleaned up the patchset significantly, and split the patchset
> up into seven patches.
> 
> The first 3 patches, are ready to be merged
> 
>  Patch01: Trivial changes, use compressed IPv6 address in output
>  Patch02: IPv6 extend ICMPv6 handling for future types
>  Patch03: Use config macro IS_ENABLED()
> 
> The next 4 patches, is V4 of the patches I have submitted earlier.
> Where I have incorporated Julian's recent feedback.
> 
> - Notice that patch04 of patchset V3, have been dropped.
> 
> I have also tried to make the patches easier to review, by
> reorganizing the changes, to be more strictly split (exthdr
> vs. fragment handling).
> 
> I have also removed the API changes, and moved those to patch06.  This
> is done, (1) to make it easier to review the patches, and (2) to allow
> easier integration of Patricks idea and my RFC patch of caching exthdr
> info in skb->cb[].  Thus, we can get these patches applied (and later
> go back and apply the caching scheme easier).
> 
>  Patch04: Fix faulty IPv6 extension header handling in IPVS
>  Patch05: Complete IPv6 fragment handling for IPVS
>  Patch06: IPVS API change to avoid rescan of IPv6 exthdr
>  Patch07: IPVS SIP fragment handling
> 
> The SIP frag handling have been split into its own patch, as I have
> not been able to test this part my self.
> 
> This patchset is based upon:
>   Pablo's nf-next tree:  git://1984.lsi.us.es/nf-next
>   On top of:
>     commit 2cbc78a29e76a2e92c172651204f3117491877d2
>     (netfilter: combine ipt_REDIRECT and ip6t_REDIRECT)
> 
> ---
> 
> Jesper Dangaard Brouer (7):
>       ipvs: SIP fragment handling
>       ipvs: API change to avoid rescan of IPv6 exthdr
>       ipvs: Complete IPv6 fragment handling for IPVS
>       ipvs: Fix faulty IPv6 extension header handling in IPVS
>       ipvs: Use config macro IS_ENABLED()
>       ipvs: IPv6 extend ICMPv6 handling for future types
>       ipvs: Trivial changes, use compressed IPv6 address in output

	All 7 patches look good to me. Thanks!

Acked-by: Julian Anastasov <ja@ssi.bg>

>  include/net/ip_vs.h                     |  194 +++++++++++----
>  net/netfilter/ipvs/Kconfig              |    7 -
>  net/netfilter/ipvs/ip_vs_conn.c         |   15 -
>  net/netfilter/ipvs/ip_vs_core.c         |  404 +++++++++++++++++--------------
>  net/netfilter/ipvs/ip_vs_dh.c           |    2 
>  net/netfilter/ipvs/ip_vs_lblc.c         |    2 
>  net/netfilter/ipvs/ip_vs_lblcr.c        |    2 
>  net/netfilter/ipvs/ip_vs_pe_sip.c       |   21 +-
>  net/netfilter/ipvs/ip_vs_proto.c        |    6 
>  net/netfilter/ipvs/ip_vs_proto_ah_esp.c |    9 -
>  net/netfilter/ipvs/ip_vs_proto_sctp.c   |   42 +--
>  net/netfilter/ipvs/ip_vs_proto_tcp.c    |   40 +--
>  net/netfilter/ipvs/ip_vs_proto_udp.c    |   41 +--
>  net/netfilter/ipvs/ip_vs_sched.c        |    2 
>  net/netfilter/ipvs/ip_vs_sh.c           |    2 
>  net/netfilter/ipvs/ip_vs_xmit.c         |   73 +++---
>  net/netfilter/xt_ipvs.c                 |    4 
>  17 files changed, 504 insertions(+), 362 deletions(-)
> 
> 
> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Sr. Network Kernel Developer at Red Hat
>   Author of http://www.iptv-analyzer.org
>   LinkedIn: http://www.linkedin.com/in/brouer

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* Re: Problems with tg3 on BCM5720
From: Nithin Nayak Sujir @ 2012-09-26 21:13 UTC (permalink / raw)
  To: Dirkjan Ochtman; +Cc: netdev
In-Reply-To: <CAKmKYaCBG5hURNomffgp9gRhS7g2BYsNG-LTmBrKsX-iyVzVxg@mail.gmail.com>

Hi Dirkjan,
A few questions -
1. Can you tell me the last patch that is included in the tg3 driver in 3.4.9 on your distro?
2. Can you give more info about the working setup?
3. Was there any system reset or driver reload between the working and not working setups? Or was it 
just a cable switch?

4. Please give the output of
ethtool eth0
ethtool -i eth0
ethtool -k eth0

5. Can you run ethtool --test in the working setup?

6. I noticed in the syslog, the link is coming up at 100 Mbps. Is this expected?
7. Does it fail immediately on connect to the data center switch? Or is it after some traffic goes 
through?

Thanks,
Nithin.


On 09/26/2012 06:48 AM, Dirkjan Ochtman wrote:
> Hi,
>
> I hope this is the right list for this; please let me know if there is
> some more appropriate venue for this kind of problem.
>
> We have bought a Dell PowerEdge server recently with Broadcom ethernet
> controllers:
>
> 01:00.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5720
> Gigabit Ethernet PCIe
> 01:00.1 Ethernet controller: Broadcom Corporation NetXtreme BCM5720
> Gigabit Ethernet PCIe
> 02:00.0 Ethernet controller: Broadcom Corporation NetXtreme BCM5720
> Gigabit Ethernet PCIe
> 02:00.1 Ethernet controller: Broadcom Corporation NetXtreme BCM5720
> Gigabit Ethernet PCIe
>
> Using the tg3 driver as provided by the 3.4.9 kernel (it's a distro
> kernel, but I verified that there are no relevant patches on top of
> the vanilla 3.4.9), networking works fine on a small VLAN. However,
> we've run into trouble connecting one of the interfaces to switch run
> by our data center vendor (the switch side works fine when connecting
> with a laptop). The driver keeps erroring out with "transmit timed
> out, resetting" (full syslog output is at [1]). This also keeps
> happening when I use the latest tg3.{c,h} from torvalds/linux-2.6
> (though I've had to revert a few things to make it compile inside the
> 3.4.9 kernel).
>
> I've also run some tests with ethtool:
>
> djc@jansky ~ $ sudo ethtool --test eth0
> The test result is FAIL
> The test extra info:
> nvram test        (online)       0
> link test         (online)       1
> register test     (offline)      0
> memory test       (offline)      0
> mac loopback test (offline)      7
> phy loopback test (offline)      7
> ext loopback test (offline)      0
> interrupt test    (offline)      1
>
> While I'd be happy to do more digging, at this point I'm out of ideas
> on how to go about that.
>
> Again, sorry if this is off-topic for this list, any help would be appreciated.
>
> Cheers,
>
> Dirkjan
>
> [1] http://dirkjan.ochtman.nl/files/tg3-fail.txt
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox