Netdev List
 help / color / mirror / Atom feed
* [patch-net-next v3 2/2] net: ethernet: cpsw: don't requests IRQs we don't use
From: Felipe Balbi @ 2015-01-16 16:11 UTC (permalink / raw)
  To: davem
  Cc: Tony Lindgren, Linux OMAP Mailing List, mugunthanvnm, netdev,
	Felipe Balbi
In-Reply-To: <1421424672-19323-1-git-send-email-balbi@ti.com>

CPSW never uses RX_THRESHOLD or MISC interrupts. In
fact, they are always kept masked in their appropriate
IRQ Enable register.

Instead of allocating an IRQ that never fires, it's best
to remove that code altogether and let future patches
implement it if anybody needs those.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---

combined patches 2 and 3.

 drivers/net/ethernet/ti/cpsw.c | 72 +++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 8e1af51e4b76..ba09ff3c1695 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -754,17 +754,25 @@ requeue:
 		dev_kfree_skb_any(new_skb);
 }
 
-static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
+static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
 {
 	struct cpsw_priv *priv = dev_id;
-	int value = irq - priv->irqs_table[0];
 
-	/* NOTICE: Ending IRQ here. The trick with the 'value' variable above
-	 * is to make sure we will always write the correct value to the EOI
-	 * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2
-	 * for TX Interrupt and 3 for MISC Interrupt.
-	 */
-	cpdma_ctlr_eoi(priv->dma, value);
+	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
+	cpdma_chan_process(priv->txch, 128);
+
+	priv = cpsw_get_slave_priv(priv, 1);
+	if (priv)
+		cpdma_chan_process(priv->txch, 128);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
+{
+	struct cpsw_priv *priv = dev_id;
+
+	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
 
 	cpsw_intr_disable(priv);
 	if (priv->irq_enabled == true) {
@@ -1617,7 +1625,8 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
 
 	cpsw_intr_disable(priv);
 	cpdma_ctlr_int_ctrl(priv->dma, false);
-	cpsw_interrupt(ndev->irq, priv);
+	cpsw_rx_interrupt(priv->irq[0], priv);
+	cpsw_tx_interrupt(priv->irq[1], priv);
 	cpdma_ctlr_int_ctrl(priv->dma, true);
 	cpsw_intr_enable(priv);
 }
@@ -2339,62 +2348,47 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_dma_ret;
 	}
 
-	ndev->irq = platform_get_irq(pdev, 0);
+	ndev->irq = platform_get_irq(pdev, 1);
 	if (ndev->irq < 0) {
 		dev_err(priv->dev, "error getting irq resource\n");
 		ret = -ENOENT;
 		goto clean_ale_ret;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		goto clean_ale_ret;
-
-	priv->irqs_table[0] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
-			       0, dev_name(&pdev->dev), priv);
-	if (ret < 0) {
-		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-		goto clean_ale_ret;
-	}
+	/* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and
+	 * MISC IRQs which are always kept disabled with this driver so
+	 * we will not request them.
+	 *
+	 * If anyone wants to implement support for those, make sure to
+	 * first request and append them to irqs_table array.
+	 */
 
+	/* RX IRQ */
 	irq = platform_get_irq(pdev, 1);
 	if (irq < 0)
 		goto clean_ale_ret;
 
-	priv->irqs_table[1] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+	priv->irqs_table[0] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
 			       0, dev_name(&pdev->dev), priv);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
 	}
 
+	/* TX IRQ */
 	irq = platform_get_irq(pdev, 2);
 	if (irq < 0)
 		goto clean_ale_ret;
 
-	priv->irqs_table[2] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
-			       0, dev_name(&pdev->dev), priv);
-	if (ret < 0) {
-		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
-		goto clean_ale_ret;
-	}
-
-	irq = platform_get_irq(pdev, 3);
-	if (irq < 0)
-		goto clean_ale_ret;
-
-	priv->irqs_table[3] = irq;
-	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
+	priv->irqs_table[1] = irq;
+	ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
 			       0, dev_name(&pdev->dev), priv);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
 	}
-
-	priv->num_irqs = 4;
+	priv->num_irqs = 2;
 
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
-- 
2.2.0


^ permalink raw reply related

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Thomas Graf @ 2015-01-16 16:15 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116160354.GI30132@acer.localdomain>

On 01/16/15 at 04:03pm, Patrick McHardy wrote:
> On 16.01, Thomas Graf wrote:
> > A walker may not see insertions that occur after the walker was started
> > if resizing is enabled. Is that a problem for nftables?
> 
> No, that would be Ok. The case I'm wondering about is:
> 
> - insertion
> - resize starts
> - another insertion
> - walker, resize not finished yet

Correct, walker may not see "another insertion". The window for this
behavior to occur is not the full resize operation, just the linking
period, but it may occur. The length of the window is typically
equal to a grace period.

We can provide a synchronization function to block the dumper or the
insertion/removal until the linking is complete. The latter would
give the old runtime behaviour back (variable runtime of insert),
the blocked dumper might be preferred. What do you think?

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Patrick McHardy @ 2015-01-16 16:32 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116161530.GC15052@casper.infradead.org>

On 16.01, Thomas Graf wrote:
> On 01/16/15 at 04:03pm, Patrick McHardy wrote:
> > On 16.01, Thomas Graf wrote:
> > > A walker may not see insertions that occur after the walker was started
> > > if resizing is enabled. Is that a problem for nftables?
> > 
> > No, that would be Ok. The case I'm wondering about is:
> > 
> > - insertion
> > - resize starts
> > - another insertion
> > - walker, resize not finished yet
> 
> Correct, walker may not see "another insertion". The window for this
> behavior to occur is not the full resize operation, just the linking
> period, but it may occur. The length of the window is typically
> equal to a grace period.
> 
> We can provide a synchronization function to block the dumper or the
> insertion/removal until the linking is complete. The latter would
> give the old runtime behaviour back (variable runtime of insert),
> the blocked dumper might be preferred. What do you think?

If we have to block, the dumper if of course preferred. Taking the
mutex should do fine I guess?

I suppose walking both tables without any races would be rather
complicated.

^ permalink raw reply

* Re: [PATCH 4/6] kbuild: add a new kselftest_install make target to install selftests
From: Shuah Khan @ 2015-01-16 16:34 UTC (permalink / raw)
  To: Michael Ellerman, linux-kernel
  Cc: mmarek, gregkh, akpm, rostedt, mingo, davem, keescook,
	tranmanphong, cov, dh.herrmann, hughd, bobby.prani, serge.hallyn,
	ebiederm, tim.bird, josh, koct9i, linux-kbuild, linux-api, netdev
In-Reply-To: <1420794375-31881-4-git-send-email-mpe@ellerman.id.au>

On 01/09/2015 02:06 AM, Michael Ellerman wrote:
> Add a new make target to install kernel selftests. This new target will
> build and install selftests.
> 
> The default is just $(objtree)/selftests. This is preferable to
> something based on $(INSTALL_MOD_PATH) (which defaults to /), as it
> allows a normal user to install the tests. This is similar to the
> default behaviour of make headers_install.

A normal user can install tests at any location they choose by
overriding the default path. For example:

INSTALL_MOD_PATH=/tmp make kselftest_install

will install under tmp.

The approach I used also ties test installs to kernel release.
This addresses an important use-case for kernel developers
that want to compare results from release to release.

The use-case for any user to be able to install tests at
any location is addressed by the above example.

I would like these two above use-cases continued to be supported,
especially the one that tries the test installs to kernel release.
Another goal is to keep changes to the main Makefile minimal and
the rest of the install support belongs under selftests/Makefile
and any other include file (like the one you proposed).

The patch I have in patch v4 addresses the use-cases mentioned above.
I do like the lib.mk approach in general and I am going to review that
patch and give you feedback.

thanks,
-- Shuah

-- 
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh@osg.samsung.com | (970) 217-8978

^ permalink raw reply

* Re: [PATCH 6/6] selftests: Set CC using CROSS_COMPILE once in lib.mk
From: Shuah Khan @ 2015-01-16 16:37 UTC (permalink / raw)
  To: Michael Ellerman, linux-kernel
  Cc: mmarek, gregkh, akpm, rostedt, mingo, davem, keescook,
	tranmanphong, cov, dh.herrmann, hughd, bobby.prani, serge.hallyn,
	ebiederm, tim.bird, josh, koct9i, linux-kbuild, linux-api, netdev
In-Reply-To: <1420794375-31881-6-git-send-email-mpe@ellerman.id.au>

On 01/09/2015 02:06 AM, Michael Ellerman wrote:

Missing commit log. Please make sure your future include
a meaningful commit log that describes what the patch does.

> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
>  tools/testing/selftests/efivarfs/Makefile | 1 -
>  tools/testing/selftests/exec/Makefile     | 1 -
>  tools/testing/selftests/kcmp/Makefile     | 1 -
>  tools/testing/selftests/lib.mk            | 6 ++++++
>  tools/testing/selftests/net/Makefile      | 1 -
>  tools/testing/selftests/powerpc/Makefile  | 3 +--
>  tools/testing/selftests/size/Makefile     | 2 --
>  tools/testing/selftests/vm/Makefile       | 1 -
>  8 files changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
> index 3052d0bda24b..d683486a859b 100644
> --- a/tools/testing/selftests/efivarfs/Makefile
> +++ b/tools/testing/selftests/efivarfs/Makefile
> @@ -1,4 +1,3 @@
> -CC = $(CROSS_COMPILE)gcc
>  CFLAGS = -Wall
>  
>  test_objs = open-unlink create-read
> diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
> index 886cabe307b1..4edb7d0da29b 100644
> --- a/tools/testing/selftests/exec/Makefile
> +++ b/tools/testing/selftests/exec/Makefile
> @@ -1,4 +1,3 @@
> -CC = $(CROSS_COMPILE)gcc
>  CFLAGS = -Wall
>  BINARIES = execveat
>  DEPS = execveat.symlink execveat.denatured script subdir
> diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
> index 0eecd183058c..2ae7450a9a89 100644
> --- a/tools/testing/selftests/kcmp/Makefile
> +++ b/tools/testing/selftests/kcmp/Makefile
> @@ -1,4 +1,3 @@
> -CC := $(CROSS_COMPILE)$(CC)
>  CFLAGS += -I../../../../usr/include/
>  
>  all: kcmp_test
> diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
> index 7bd3dabe2846..abae16396c43 100644
> --- a/tools/testing/selftests/lib.mk
> +++ b/tools/testing/selftests/lib.mk
> @@ -1,3 +1,9 @@
> +# When we're called from kbuild $(CC) already contains $(CROSS_COMPILE), so
> +# here we need to use "cc", otherwise we'll get $(CROSS_COMPILE) twice. The
> +# only downside is it breaks someone overriding $(CC), but that's probably OK,
> +# they can probably cope by changing their path.

I don't want to see the compiles broken and users needing to
work-around. Pleas find a way to not break the compiles.

thanks,
-- Shuah

-- 
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh@osg.samsung.com | (970) 217-8978

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Thomas Graf @ 2015-01-16 16:38 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116163202.GJ30132@acer.localdomain>

On 01/16/15 at 04:32pm, Patrick McHardy wrote:
> If we have to block, the dumper if of course preferred. Taking the
> mutex should do fine I guess?

That will work, it will also ensure that the walker doesn't see the
pre unzip state which would cause entries to be duplicated in the
walker.

> I suppose walking both tables without any races would be rather
> complicated.

Very interested if you can come up with something ;-)

^ permalink raw reply

* Re: NETDEV WATCHDOG:  internal(r8152): transmit queue 0 timed out
From: sean darcy @ 2015-01-16 16:40 UTC (permalink / raw)
  To: users; +Cc: netdev, linux-usb
In-Reply-To: <54B8FF7E.2010901@gmail.com>

On 01/16/2015 07:09 AM, poma wrote:
> On 16.01.2015 10:37, Hayes Wang wrote:
>>   poma [mailto:pomidorabelisima@gmail.com]
>>> Sent: Friday, January 16, 2015 4:25 PM
>> [...]
>>>> This looks like a USB problem. Is there a way to get usb (or
>>>> NetworkManager) to reinitialize the driver when this happens?
>>>
>>> I would ask these people for advice, therefore.
>>
>> Our hw engineers need to analyse the behavior of the device.
>> However, I don't think you have such instrument to provide
>> the required information. If we don't know the reason, we
>> couldn't give you the proper solution. Besides, your solution
>> would work if and only if reloading the driver is helpful.
>>
>> The issue have to debug from the hardware, and I have no idea
>> about what the software could do before analysing the hw. Maybe
>> you could try the following driver first to check if it is useful.
>>
>> http://www.realtek.com.tw/downloads/downloadsView.aspx?Langid=2&PNid=13&PFid=56&Level=5&Conn=4&DownTypeID=3&GetDown=false
>>
>> Best Regards,
>> Hayes
>>
>
> Thanks for your response, Mr. Hayes.
>
> Mr. Sean, please download and check if "timeout" is still present with built RTL8153 module from REALTEK site, as Mr. Hayes proposed.
> http://www.realtek.com.tw/downloads/downloadsView.aspx?Langid=2&PNid=13&PFid=56&Level=5&Conn=4&DownTypeID=3&GetDown=false#2
> r8152.53-2.03.0.tar.bz2
>
> Procedure - should be equal for both, Fedora 21 & 20:
>
> $ uname -r
> 3.17.8-300.fc21.x86_64
>
> $ su -c 'yum install kernel-devel'
>
> $ tar xf r8152.53-2.03.0.tar.bz2
> $ cd r8152-2.03.0/
> $ make
> $ su
>
> # cp 50-usb-realtek-net.rules /etc/udev/rules.d/
> # udevadm trigger --action=add
>
> # modprobe -rv r8152
> # cp r8152.ko /lib/modules/$(uname -r)/updates/
> # depmod
> # modprobe -v r8152
>
>
> poma
>
OK, will do.

In the meantime, here's the kernel log showing the timeout, the removal 
of the usb ethernet adapter, and it's reinsertion:


Jan 15 00:15:18 kernel: r8152 2-1:1.0 internal: Tx timeout
Jan 15 00:15:23 kernel: r8152 2-1:1.0 internal: Tx timeout
Jan 15 00:15:29 kernel: usb 2-1: USB disconnect, device number 4
Jan 15 00:15:29 kernel: r8152 2-1:1.0 internal: Tx status -108
.....................
Jan 15 00:15:29 kernel: r8152 2-1:1.0 internal: Tx status -108
Jan 15 00:15:29 kernel: r8152 2-1:1.0 internal: Tx status -108
Jan 15 00:15:31 kernel: usb 2-1: new high-speed USB device number 5 
using ehci-pci
Jan 15 00:15:31 kernel: usb 2-1: New USB device found, idVendor=0bda, 
idProduct=8152
Jan 15 00:15:31 kernel: usb 2-1: New USB device strings: Mfr=1, 
Product=2, SerialNumber=3
Jan 15 00:15:31 kernel: usb 2-1: Product: USB 10/100 LAN
Jan 15 00:15:31 kernel: usb 2-1: Manufacturer: CE-LINK
Jan 15 00:15:31 kernel: usb 2-1: SerialNumber: ...........
Jan 15 00:15:31 kernel: usb 2-1: reset high-speed USB device number 5 
using ehci-pci
Jan 15 00:15:31 kernel: r8152 2-1:1.0 eth0: v1.06.1 (2014/10/01)
Jan 15 00:15:32 kernel: r8152 2-1:1.0 internal: renamed from eth0
Jan 15 00:15:32 systemd-udevd[12869]: renamed network interface eth0 to 
internal
Jan 15 00:15:32 kernel: IPv6: ADDRCONF(NETDEV_UP): internal: link is not 
ready
Jan 15 00:15:34 kernel: IPv6: ADDRCONF(NETDEV_CHANGE): internal: link 
becomes ready


-- 
users mailing list
users@lists.fedoraproject.org
To unsubscribe or change subscription options:
https://admin.fedoraproject.org/mailman/listinfo/users
Fedora Code of Conduct: http://fedoraproject.org/code-of-conduct
Guidelines: http://fedoraproject.org/wiki/Mailing_list_guidelines
Have a question? Ask away: http://ask.fedoraproject.org

^ permalink raw reply

* RE: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: David Laight @ 2015-01-16 16:43 UTC (permalink / raw)
  To: 'Patrick McHardy', Thomas Graf
  Cc: davem@davemloft.net, netdev@vger.kernel.org,
	kernel@vger.kernel.org, herbert@gondor.apana.org.au,
	paulmck@linux.vnet.ibm.com, edumazet@google.com,
	john.r.fastabend@intel.com, josh@joshtriplett.org,
	netfilter-devel@vger.kernel.org
In-Reply-To: <20150116163202.GJ30132@acer.localdomain>

From: Patrick McHardy
> On 16.01, Thomas Graf wrote:
> > On 01/16/15 at 04:03pm, Patrick McHardy wrote:
> > > On 16.01, Thomas Graf wrote:
> > > > A walker may not see insertions that occur after the walker was started
> > > > if resizing is enabled. Is that a problem for nftables?
> > >
> > > No, that would be Ok. The case I'm wondering about is:
> > >
> > > - insertion
> > > - resize starts
> > > - another insertion
> > > - walker, resize not finished yet
> >
> > Correct, walker may not see "another insertion". The window for this
> > behavior to occur is not the full resize operation, just the linking
> > period, but it may occur. The length of the window is typically
> > equal to a grace period.
> >
> > We can provide a synchronization function to block the dumper or the
> > insertion/removal until the linking is complete. The latter would
> > give the old runtime behaviour back (variable runtime of insert),
> > the blocked dumper might be preferred. What do you think?
> 
> If we have to block, the dumper if of course preferred. Taking the
> mutex should do fine I guess?
> 
> I suppose walking both tables without any races would be rather
> complicated.

The walker is unlikely to see items that get inserted early in the hash
table even without a resize.

I'd be more worried about the walker missing big blocks of entries or
getting duplicate entries during a resize.
This might be a problem if the walker is a user-process table dump,
in which case you can't assume it will finish in any finite time.

	David

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Patrick McHardy @ 2015-01-16 16:51 UTC (permalink / raw)
  To: Thomas Graf
  Cc: davem, netdev, kernel, herbert, paulmck, edumazet,
	john.r.fastabend, josh, netfilter-devel
In-Reply-To: <20150116163826.GD15052@casper.infradead.org>

On 16.01, Thomas Graf wrote:
> On 01/16/15 at 04:32pm, Patrick McHardy wrote:
> > If we have to block, the dumper if of course preferred. Taking the
> > mutex should do fine I guess?
> 
> That will work, it will also ensure that the walker doesn't see the
> pre unzip state which would cause entries to be duplicated in the
> walker.
> 
> > I suppose walking both tables without any races would be rather
> > complicated.
> 
> Very interested if you can come up with something ;-)

For now I'll stick to the mutex. Thanks!

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Thomas Graf @ 2015-01-16 16:53 UTC (permalink / raw)
  To: David Laight
  Cc: 'Patrick McHardy', davem@davemloft.net,
	netdev@vger.kernel.org, herbert@gondor.apana.org.au,
	paulmck@linux.vnet.ibm.com, edumazet@google.com,
	john.r.fastabend@intel.com, josh@joshtriplett.org,
	netfilter-devel@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D1CACADAF@AcuExch.aculab.com>

On 01/16/15 at 04:43pm, David Laight wrote:
> The walker is unlikely to see items that get inserted early in the hash
> table even without a resize.

I don't follow, you have to explain this statement.

Walkers which don't want to see duplicates or miss entries should
just take the mutex.

^ permalink raw reply

* Re: [PATCH 2/6] selftests: Add install target
From: Shuah Khan @ 2015-01-16 17:46 UTC (permalink / raw)
  To: Michael Ellerman, linux-kernel
  Cc: mmarek, gregkh, akpm, rostedt, mingo, davem, keescook,
	tranmanphong, cov, dh.herrmann, hughd, bobby.prani, serge.hallyn,
	ebiederm, tim.bird, josh, koct9i, linux-kbuild, linux-api, netdev
In-Reply-To: <1420794375-31881-2-git-send-email-mpe@ellerman.id.au>

On 01/09/2015 02:06 AM, Michael Ellerman wrote:
> This adds make install support to selftests. The basic usage is:
> 
> $ cd tools/testing/selftests
> $ make install
> 
> That installs into tools/testing/selftests/install, which can then be
> copied where ever necessary.
> 
> The install destination is also configurable using eg:
> 
> $ INSTALL_PATH=/mnt/selftests make install

Please see my response to [PATCH 4/6] kbuild: add a new
kselftest_install make target to install selftests

These are addressed by the current approach to use existing
INSTALL_MOD_PATH.

> 
> The implementation uses two targets in the child makefiles. The first
> "install" is expected to install all files into $(INSTALL_PATH).
> 
> The second, "emit_tests", is expected to emit the test instructions (ie.
> bash script) on stdout. Separating this from install means the child
> makefiles need no knowledge of the location of the test script.

Yes. Removing the need for individual makefiles to know the install
path details would be a good improvement to make on top of my patch v4
series.

thanks,
-- Shuah


-- 
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh@osg.samsung.com | (970) 217-8978

^ permalink raw reply

* [PATCH net 0/2] sh_eth fixes
From: Ben Hutchings @ 2015-01-16 17:49 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, linux-kernel, Nobuhiro Iwamatsu, Mitsuhiro Kimura,
	Hisashi Nakamura, Yoshihiro Kaneko

I'm currently looking at Ethernet support on the R-Car H2 chip,
reviewing and testing the sh_eth driver.  Here are fixes for two fairly
obvious bugs in the driver; I will probably have some more later.

These are not tested on any of the other supported chips.

Ben.

Ben Hutchings (2):
  sh_eth: Fix promiscuous mode on chips without TSU
  sh_eth: Fix ethtool operation crash when net device is down

 drivers/net/ethernet/renesas/sh_eth.c |   28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* [PATCH net 1/2] sh_eth: Fix promiscuous mode on chips without TSU
From: Ben Hutchings @ 2015-01-16 17:51 UTC (permalink / raw)
  To: ct178-internal, David S. Miller
  Cc: netdev, linux-kernel, Nobuhiro Iwamatsu, Mitsuhiro Kimura,
	Hisashi Nakamura, Yoshihiro Kaneko
In-Reply-To: <1421430592.1222.190.camel@xylophone.i.decadent.org.uk>

Currently net_device_ops::set_rx_mode is only implemented for
chips with a TSU (multiple address table).  However we do need
to turn the PRM (promiscuous) flag on and off for other chips.

- Remove the unlikely() from the TSU functions that we may safely
  call for chips without a TSU
- Make setting of the MCT flag conditional on the tsu capability flag
- Rename sh_eth_set_multicast_list() to sh_eth_set_rx_mode() and plumb
  it into both net_device_ops structures
- Remove the previously-unreachable branch in sh_eth_rx_mode() that
  would otherwise reset the flags to defaults for non-TSU chips

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
---
 drivers/net/ethernet/renesas/sh_eth.c |   18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 167737f..0c4d5b5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2417,7 +2417,7 @@ static int sh_eth_tsu_purge_all(struct net_device *ndev)
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int i, ret;
 
-	if (unlikely(!mdp->cd->tsu))
+	if (!mdp->cd->tsu)
 		return 0;
 
 	for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES; i++) {
@@ -2440,7 +2440,7 @@ static void sh_eth_tsu_purge_mcast(struct net_device *ndev)
 	void *reg_offset = sh_eth_tsu_get_offset(mdp, TSU_ADRH0);
 	int i;
 
-	if (unlikely(!mdp->cd->tsu))
+	if (!mdp->cd->tsu)
 		return;
 
 	for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES; i++, reg_offset += 8) {
@@ -2450,8 +2450,8 @@ static void sh_eth_tsu_purge_mcast(struct net_device *ndev)
 	}
 }
 
-/* Multicast reception directions set */
-static void sh_eth_set_multicast_list(struct net_device *ndev)
+/* Update promiscuous flag and multicast filter */
+static void sh_eth_set_rx_mode(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	u32 ecmr_bits;
@@ -2462,7 +2462,9 @@ static void sh_eth_set_multicast_list(struct net_device *ndev)
 	/* Initial condition is MCT = 1, PRM = 0.
 	 * Depending on ndev->flags, set PRM or clear MCT
 	 */
-	ecmr_bits = (sh_eth_read(ndev, ECMR) & ~ECMR_PRM) | ECMR_MCT;
+	ecmr_bits = sh_eth_read(ndev, ECMR) & ~ECMR_PRM;
+	if (mdp->cd->tsu)
+		ecmr_bits |= ECMR_MCT;
 
 	if (!(ndev->flags & IFF_MULTICAST)) {
 		sh_eth_tsu_purge_mcast(ndev);
@@ -2491,9 +2493,6 @@ static void sh_eth_set_multicast_list(struct net_device *ndev)
 				}
 			}
 		}
-	} else {
-		/* Normal, unicast/broadcast-only mode. */
-		ecmr_bits = (ecmr_bits & ~ECMR_PRM) | ECMR_MCT;
 	}
 
 	/* update the ethernet mode */
@@ -2701,6 +2700,7 @@ static const struct net_device_ops sh_eth_netdev_ops = {
 	.ndo_stop		= sh_eth_close,
 	.ndo_start_xmit		= sh_eth_start_xmit,
 	.ndo_get_stats		= sh_eth_get_stats,
+	.ndo_set_rx_mode	= sh_eth_set_rx_mode,
 	.ndo_tx_timeout		= sh_eth_tx_timeout,
 	.ndo_do_ioctl		= sh_eth_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -2713,7 +2713,7 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = {
 	.ndo_stop		= sh_eth_close,
 	.ndo_start_xmit		= sh_eth_start_xmit,
 	.ndo_get_stats		= sh_eth_get_stats,
-	.ndo_set_rx_mode	= sh_eth_set_multicast_list,
+	.ndo_set_rx_mode	= sh_eth_set_rx_mode,
 	.ndo_vlan_rx_add_vid	= sh_eth_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= sh_eth_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= sh_eth_tx_timeout,
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net 2/2] sh_eth: Fix ethtool operation crash when net device is down
From: Ben Hutchings @ 2015-01-16 17:51 UTC (permalink / raw)
  To: ct178-internal, David S. Miller
  Cc: netdev, linux-kernel, Nobuhiro Iwamatsu, Mitsuhiro Kimura,
	Hisashi Nakamura, Yoshihiro Kaneko
In-Reply-To: <1421430592.1222.190.camel@xylophone.i.decadent.org.uk>

The driver connects and disconnects the PHY device whenever the
net device is brought up and down.  The ethtool get_settings,
set_settings and nway_reset operations will dereference a null
or dangling pointer if called while it is down.

I think it would be preferable to keep the PHY connected, but there
may be good reasons not to.

As an immediate fix for this bug:
- Set the phydev pointer to NULL after disconnecting the PHY
- Change those three operations to return -ENODEV while the PHY is
  not connected

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
---
 drivers/net/ethernet/renesas/sh_eth.c |   10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 0c4d5b5..28e3822 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1827,6 +1827,9 @@ static int sh_eth_get_settings(struct net_device *ndev,
 	unsigned long flags;
 	int ret;
 
+	if (!mdp->phydev)
+		return -ENODEV;
+
 	spin_lock_irqsave(&mdp->lock, flags);
 	ret = phy_ethtool_gset(mdp->phydev, ecmd);
 	spin_unlock_irqrestore(&mdp->lock, flags);
@@ -1841,6 +1844,9 @@ static int sh_eth_set_settings(struct net_device *ndev,
 	unsigned long flags;
 	int ret;
 
+	if (!mdp->phydev)
+		return -ENODEV;
+
 	spin_lock_irqsave(&mdp->lock, flags);
 
 	/* disable tx and rx */
@@ -1875,6 +1881,9 @@ static int sh_eth_nway_reset(struct net_device *ndev)
 	unsigned long flags;
 	int ret;
 
+	if (!mdp->phydev)
+		return -ENODEV;
+
 	spin_lock_irqsave(&mdp->lock, flags);
 	ret = phy_start_aneg(mdp->phydev);
 	spin_unlock_irqrestore(&mdp->lock, flags);
@@ -2184,6 +2193,7 @@ static int sh_eth_close(struct net_device *ndev)
 	if (mdp->phydev) {
 		phy_stop(mdp->phydev);
 		phy_disconnect(mdp->phydev);
+		mdp->phydev = NULL;
 	}
 
 	free_irq(ndev->irq, ndev);
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 1/6] selftests: Introduce minimal shared logic for running tests
From: Shuah Khan @ 2015-01-16 17:53 UTC (permalink / raw)
  To: Michael Ellerman, linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: mmarek-AlSwsSmVLrQ, gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	rostedt-nx8X9YLhiw1AfugRpC6u6w, mingo-H+wXaHxf7aLQT0dZR+AlfA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, keescook-F7+t8E8rja9g9hUCZPvPmw,
	tranmanphong-Re5JQEeQqe8AvxtiuMwx3w, cov-sgV2jX0FEOL9JmXXK+q4OQ,
	dh.herrmann-Re5JQEeQqe8AvxtiuMwx3w, hughd-hpIqsD4AKlfQT0dZR+AlfA,
	bobby.prani-Re5JQEeQqe8AvxtiuMwx3w,
	serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w, tim.bird-/MT0OVThwyLZJqsBc5GL+g,
	josh-iaAMLnmF4UmaiuxdJuQwMA, koct9i-Re5JQEeQqe8AvxtiuMwx3w,
	linux-kbuild-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1420794375-31881-1-git-send-email-mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>

On 01/09/2015 02:06 AM, Michael Ellerman wrote:
> This adds a Make include file which most selftests can then include to
> get the run_tests logic.
> 
> On its own this has the advantage of some reduction in repetition, and
> also means the pass/fail message is defined in fewer places.
> 
> However the key advantage is it will allow us to implement install very
> simply in a subsequent patch.
> 
> The default implementation just executes each program in $(TEST_PROGS).
> 
> We use a variable to hold the default implementation of $(RUN_TESTS)
> because that gives us a clean way to override it if necessary, ie. using
> override. The mount, memory-hotplug and mqueue tests use that to provide
> a different implementation.
> 
> Tests are not run via /bin/bash, so if they are scripts they must be
> executable, we add u+x to several.
> 
> Signed-off-by: Michael Ellerman <mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>

I like the shared logic approach in general provided it leaves the
flexibility to not use the shared logic if a test have the need to
do so. This series requires some patch planning. shared logic patch
followed by individual test patches as opposed a single patch.

I would like to see the shared logic work done on top of my patch v4
series.

thanks,
-- Shuah


-- 
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978

^ permalink raw reply

* [PATCH net-next] mii: Handle link state changes for forced modes in mii_check_media()
From: Ben Hutchings @ 2015-01-16 17:55 UTC (permalink / raw)
  To: ct178-internal, David S. Miller; +Cc: netdev, linux-kernel

mii_check_media() does not update the link (carrier) state or log link
changes when the link mode is forced.  Drivers using the mii library
must do this themselves, but most of them do not.

Instead of changing them all, provide a sensible default behaviour
similar to mii_check_link() when the mode is forced.

via-rhine depends on it being a no-op in this case, so make its call
to mii_check_media() conditional.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
---
I've only tested this with the asix driver, which is where I found the
bug.  I reviewed all the other drivers calling mii_check_media().

Ben.

 drivers/net/ethernet/amd/pcnet32.c   |    2 +-
 drivers/net/ethernet/via/via-rhine.c |    3 ++-
 drivers/net/mii.c                    |   12 +++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index e2e3aaf..11d6e65 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -2806,7 +2806,7 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
 
 /*
  * Check for loss of link and link establishment.
- * Can not use mii_check_media because it does nothing if mode is forced.
+ * Could possibly be changed to use mii_check_media instead.
  */
 
 static void pcnet32_watchdog(struct net_device *dev)
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 0ac7610..b3b7f5a 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1326,7 +1326,8 @@ static void rhine_check_media(struct net_device *dev, unsigned int init_media)
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
 
-	mii_check_media(&rp->mii_if, netif_msg_link(rp), init_media);
+	if (!mii->force_media)
+		mii_check_media(&rp->mii_if, netif_msg_link(rp), init_media);
 
 	if (rp->mii_if.full_duplex)
 	    iowrite8(ioread8(ioaddr + ChipCmd1) | Cmd1FDuplex,
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 4a99c39..993570b 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -302,7 +302,7 @@ void mii_check_link (struct mii_if_info *mii)
 }
 
 /**
- * mii_check_media - check the MII interface for a duplex change
+ * mii_check_media - check the MII interface for a carrier/speed/duplex change
  * @mii: the MII interface
  * @ok_to_print: OK to print link up/down messages
  * @init_media: OK to save duplex mode in @mii
@@ -318,10 +318,6 @@ unsigned int mii_check_media (struct mii_if_info *mii,
 	int advertise, lpa, media, duplex;
 	int lpa2 = 0;
 
-	/* if forced media, go no further */
-	if (mii->force_media)
-		return 0; /* duplex did not change */
-
 	/* check current and old link status */
 	old_carrier = netif_carrier_ok(mii->dev) ? 1 : 0;
 	new_carrier = (unsigned int) mii_link_ok(mii);
@@ -345,6 +341,12 @@ unsigned int mii_check_media (struct mii_if_info *mii,
 	 */
 	netif_carrier_on(mii->dev);
 
+	if (mii->force_media) {
+		if (ok_to_print)
+			netdev_info(mii->dev, "link up\n");
+		return 0; /* duplex did not change */
+	}
+
 	/* get MII advertise and LPA values */
 	if ((!init_media) && (mii->advertising))
 		advertise = mii->advertising;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next v2 0/2] net: DSA fixes for bridge and ip-autoconf
From: Florian Fainelli @ 2015-01-16 17:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, stephen, kaber, bridge, buytenh, Florian Fainelli

Hi David,

These two patches address some real world use cases of the DSA master and slave
network devices.

You have already seen patch 1 previously and you rejected it since my
explanations were not good enough to provide a justification as to why it is
useful, hopefully this time my explanation is better.

Patch 2 solves a different, yet very real problem as well at the bridge layer
when using DSA network devices.

Florian Fainelli (2):
  net: ipv4: handle DSA enabled master network devices
  net: bridge: reject DSA-enabled master netdevices as bridge members

 net/bridge/br_if.c  | 10 ++++++++--
 net/ipv4/ipconfig.c |  6 +++---
 2 files changed, 11 insertions(+), 5 deletions(-)

-- 
2.1.0

^ permalink raw reply

* [PATCH net-next v2 1/2] net: ipv4: handle DSA enabled master network devices
From: Florian Fainelli @ 2015-01-16 17:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, stephen, kaber, bridge, buytenh, Florian Fainelli
In-Reply-To: <1421430962-18119-1-git-send-email-f.fainelli@gmail.com>

The logic to configure a network interface for kernel IP
auto-configuration is very simplistic, and does not handle the case
where a device is stacked onto another such as with DSA. This causes the
kernel not to open and configure the master network device in a DSA
switch tree, and therefore slave network devices using this master
network devices as conduit device cannot be open.

This restriction comes from a check in net/dsa/slave.c, which is
basically checking the master netdev flags for IFF_UP and returns
-ENETDOWN if it is not the case.

Automatically bringing-up DSA master network devices allows DSA slave
network devices to be used as valid interfaces for e.g: NFS root booting
by allowing kernel IP autoconfiguration to succeed on these interfaces.

On the reverse path, make sure we do not attempt to close a DSA-enabled
device as this would implicitely prevent the slave DSA network device
from operating.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
Changes in v2:

- fixed typo in first comment hunk
- remove spurious newline change
- removed second comment, the code is obvious enough

 net/ipv4/ipconfig.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 7fa18bc7e47f..b26376ef87f6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -209,9 +209,9 @@ static int __init ic_open_devs(void)
 	last = &ic_first_dev;
 	rtnl_lock();
 
-	/* bring loopback device up first */
+	/* bring loopback and DSA master network devices up first */
 	for_each_netdev(&init_net, dev) {
-		if (!(dev->flags & IFF_LOOPBACK))
+		if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev))
 			continue;
 		if (dev_change_flags(dev, dev->flags | IFF_UP) < 0)
 			pr_err("IP-Config: Failed to open %s\n", dev->name);
@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if (dev != ic_dev) {
+		if (dev != ic_dev && !netdev_uses_dsa(dev)) {
 			DBG(("IP-Config: Downing %s\n", dev->name));
 			dev_change_flags(dev, d->flags);
 		}
-- 
2.1.0

^ permalink raw reply related

* [PATCH net-next v2 2/2] net: bridge: reject DSA-enabled master netdevices as bridge members
From: Florian Fainelli @ 2015-01-16 17:56 UTC (permalink / raw)
  To: netdev; +Cc: davem, stephen, kaber, bridge, buytenh, Florian Fainelli
In-Reply-To: <1421430962-18119-1-git-send-email-f.fainelli@gmail.com>

DSA-enabled master network devices with a switch tagging protocol should
strip the protocol specific format before handing the frame over to
higher layer.

When adding such a DSA master network device as a bridge member, we go
through the following code path when receiving a frame:

__netif_receive_skb_core
	-> first ptype check against ptype_all is not returning any
	   handler for this skb

	-> check and invoke rx_handler:
		-> deliver frame to the bridge layer: br_handle_frame

DSA registers a ptype handler with the fake ETH_XDSA ethertype, which is
called *after* the bridge-layer rx_handler has run. br_handle_frame()
tries to parse the frame it received from the DSA master network device,
and will not be able to match any of its conditions and jumps straight
at the end of the end of br_handle_frame() and returns
RX_HANDLER_CONSUMED there.

Since we returned RX_HANDLER_CONSUMED, __netif_receive_skb_core() stops
RX processing for this frame and returns NET_RX_SUCCESS, so we never get
a chance to call our switch tag packet processing logic and deliver
frames to the DSA slave network devices, and so we do not get any
functional bridge members at all.

Instead of cluttering the bridge receive path with DSA-specific checks,
and rely on assumptions about how __netif_receive_skb_core() is
processing frames, we simply deny adding the DSA master network device
(conduit interface) as a bridge member, leaving only the slave DSA
network devices to be bridge members, since those will work correctly in
all circumstances.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 net/bridge/br_if.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 81e49fb73169..b087d278c679 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -436,10 +436,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	int err = 0;
 	bool changed_addr;
 
-	/* Don't allow bridging non-ethernet like devices */
+	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
+	 * master network devices since the bridge layer rx_handler prevents
+	 * the DSA fake ethertype handler to be invoked, so we do not strip off
+	 * the DSA switch tag protocol header and the bridge layer just return
+	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
+	 */
 	if ((dev->flags & IFF_LOOPBACK) ||
 	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
-	    !is_valid_ether_addr(dev->dev_addr))
+	    !is_valid_ether_addr(dev->dev_addr) ||
+	    netdev_uses_dsa(dev))
 		return -EINVAL;
 
 	/* No bridging of bridges */
-- 
2.1.0

^ permalink raw reply related

* [PATCH v3 net-next 1/1] ip_tunnel: Create percpu gro_cell
From: Martin KaFai Lau @ 2015-01-16 18:11 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, kernel-team
In-Reply-To: <1421431860-1960597-1-git-send-email-kafai@fb.com>

In the ipip tunnel, the skb->queue_mapping is lost in ipip_rcv().
All skb will be queued to the same cell->napi_skbs.  The
gro_cell_poll is pinned to one core under load.  In production traffic,
we also see severe rx_dropped in the tunl iface and it is probably due to
this limit: skb_queue_len(&cell->napi_skbs) > netdev_max_backlog.

This patch is trying to alloc_percpu(struct gro_cell) and schedule
gro_cell_poll to process the skb in the same core.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/net/gro_cells.h | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
index 734d9b5..0f712c0 100644
--- a/include/net/gro_cells.h
+++ b/include/net/gro_cells.h
@@ -8,25 +8,23 @@
 struct gro_cell {
 	struct sk_buff_head	napi_skbs;
 	struct napi_struct	napi;
-} ____cacheline_aligned_in_smp;
+};
 
 struct gro_cells {
-	unsigned int		gro_cells_mask;
-	struct gro_cell		*cells;
+	struct gro_cell __percpu	*cells;
 };
 
 static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 {
-	struct gro_cell *cell = gcells->cells;
+	struct gro_cell *cell;
 	struct net_device *dev = skb->dev;
 
-	if (!cell || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
+	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
 		netif_rx(skb);
 		return;
 	}
 
-	if (skb_rx_queue_recorded(skb))
-		cell += skb_get_rx_queue(skb) & gcells->gro_cells_mask;
+	cell = this_cpu_ptr(gcells->cells);
 
 	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
 		atomic_long_inc(&dev->rx_dropped);
@@ -72,15 +70,12 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
 {
 	int i;
 
-	gcells->gro_cells_mask = roundup_pow_of_two(netif_get_num_default_rss_queues()) - 1;
-	gcells->cells = kcalloc(gcells->gro_cells_mask + 1,
-				sizeof(struct gro_cell),
-				GFP_KERNEL);
+	gcells->cells = alloc_percpu(struct gro_cell);
 	if (!gcells->cells)
 		return -ENOMEM;
 
-	for (i = 0; i <= gcells->gro_cells_mask; i++) {
-		struct gro_cell *cell = gcells->cells + i;
+	for_each_possible_cpu(i) {
+		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
 		skb_queue_head_init(&cell->napi_skbs);
 		netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
@@ -91,16 +86,16 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
 
 static inline void gro_cells_destroy(struct gro_cells *gcells)
 {
-	struct gro_cell *cell = gcells->cells;
 	int i;
 
-	if (!cell)
+	if (!gcells->cells)
 		return;
-	for (i = 0; i <= gcells->gro_cells_mask; i++,cell++) {
+	for_each_possible_cpu(i) {
+		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 		netif_napi_del(&cell->napi);
 		skb_queue_purge(&cell->napi_skbs);
 	}
-	kfree(gcells->cells);
+	free_percpu(gcells->cells);
 	gcells->cells = NULL;
 }
 
-- 
1.8.1

^ permalink raw reply related

* [PATCH v3 net-next 0/1] ip_tunnel: Create percpu gro_cell
From: Martin KaFai Lau @ 2015-01-16 18:10 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, kernel-team

In the ipip tunnel, the skb->queue_mapping is lost in ipip_rcv().
All skb will be queued to the same cell->napi_skbs.  The
gro_cell_poll is pinned to one core under load. In production traffic,
we also see severe rx_dropped in the tunl iface and it is probably due to
this limit: skb_queue_len(&cell->napi_skbs) > netdev_max_backlog

This patch is trying to alloc_percpu(struct gro_cell) and schedule
gro_cell_poll to process the skb in the same core.

Changes from v1:
Eric Dumazet pointed out that ____cacheline_aligned_in_smp is no longer needed.

Changes from v2:
Dropped the one-item-struct cleanup patch per comment.

Setup:
VIP_PREFIX=9.9.9.9/32
REMOTE_REAL_IP=10.228.95.75

if [ "$1" = "encap" ]
then
    sudo ip tunnel add mode ipip remote ${REMOTE_REAL_IP}
    sudo ip link set dev ipip0 up
    sudo ip route add dev ipip0 ${VIP_PREFIX}
else
    # Decapsulating host

    sudo ip tunnel add mode ipip
    sudo ip link set dev tunl0 up
    sudo ip addr add dev lo ${VIP_PREFIX}
    sudo sysctl -a  | grep '\.rp_filter' | awk '{print $1;}' | \
        xargs -n1 -I{} sudo sysctl {}=0
fi

Before:
[root@DECAP ~]# netserver -p 8888
[root@ENCAP ~]# super_netperf 200 -t TCP_RR -H 9.9.9.9 -p 8888 \
-l 30 -- -d 0x6 -m 8k,64k -s 1M -S 1M
332215
[root@DECAP ~]# perf probe -a gro_cell_poll
[root@DECAP ~]# perf stat -I 1000 -a -A -e probe:gro_cell_poll
   117.258518273 CPU0                     0      probe:gro_cell_poll
   117.258518273 CPU1                     0      probe:gro_cell_poll
   117.258518273 CPU2                     0      probe:gro_cell_poll
   117.258518273 CPU3                     0      probe:gro_cell_poll
   117.258518273 CPU4                     0      probe:gro_cell_poll
   117.258518273 CPU5                     0      probe:gro_cell_poll
   117.258518273 CPU6                     0      probe:gro_cell_poll
   117.258518273 CPU7                     0      probe:gro_cell_poll
   117.258518273 CPU8                     0      probe:gro_cell_poll
   117.258518273 CPU9                     0      probe:gro_cell_poll
   117.258518273 CPU10                    0      probe:gro_cell_poll
   117.258518273 CPU11                    0      probe:gro_cell_poll
   117.258518273 CPU12                    0      probe:gro_cell_poll
   117.258518273 CPU13                    0      probe:gro_cell_poll
   117.258518273 CPU14                    0      probe:gro_cell_poll
   117.258518273 CPU15                4,882      probe:gro_cell_poll
   117.258518273 CPU16                    0      probe:gro_cell_poll
   117.258518273 CPU17                    0      probe:gro_cell_poll
   117.258518273 CPU18                    0      probe:gro_cell_poll
   117.258518273 CPU19                    0      probe:gro_cell_poll
   117.258518273 CPU20                    0      probe:gro_cell_poll
   117.258518273 CPU21                    0      probe:gro_cell_poll
   117.258518273 CPU22                    0      probe:gro_cell_poll
   117.258518273 CPU23                    0      probe:gro_cell_poll
   117.258518273 CPU24                    0      probe:gro_cell_poll
   117.258518273 CPU25                    0      probe:gro_cell_poll
   117.258518273 CPU26                    0      probe:gro_cell_poll
   117.258518273 CPU27                    0      probe:gro_cell_poll
   117.258518273 CPU28                    0      probe:gro_cell_poll
   117.258518273 CPU29                    0      probe:gro_cell_poll
   117.258518273 CPU30                    0      probe:gro_cell_poll
   117.258518273 CPU31                    0      probe:gro_cell_poll
   117.258518273 CPU32                    0      probe:gro_cell_poll
   117.258518273 CPU33                    0      probe:gro_cell_poll
   117.258518273 CPU34                    0      probe:gro_cell_poll
   117.258518273 CPU35                    0      probe:gro_cell_poll
   117.258518273 CPU36                    0      probe:gro_cell_poll
   117.258518273 CPU37                    0      probe:gro_cell_poll
   117.258518273 CPU38                    0      probe:gro_cell_poll
   117.258518273 CPU39                    0      probe:gro_cell_poll

After:
[root@DECAP ~]# netserver -p 8888
[root@ENCAP ~]# super_netperf 200 -t TCP_RR -H 9.9.9.9 -p 8888 \
-l 30 -- -d 0x6 -m 8k,64k -s 1M -S 1M
877530
[root@DECAP ~]# perf probe -a gro_cell_poll
[root@DECAP ~]# perf stat -I 1000 -a -A -e probe:gro_cell_poll
    40.085714389 CPU0                13,607      probe:gro_cell_poll
    40.085714389 CPU1                13,188      probe:gro_cell_poll
    40.085714389 CPU2                12,913      probe:gro_cell_poll
    40.085714389 CPU3                12,790      probe:gro_cell_poll
    40.085714389 CPU4                13,395      probe:gro_cell_poll
    40.085714389 CPU5                13,121      probe:gro_cell_poll
    40.085714389 CPU6                11,083      probe:gro_cell_poll
    40.085714389 CPU7                12,945      probe:gro_cell_poll
    40.085714389 CPU8                13,704      probe:gro_cell_poll
    40.085714389 CPU9                13,514      probe:gro_cell_poll
    40.085714389 CPU10                    0      probe:gro_cell_poll
    40.085714389 CPU11                    0      probe:gro_cell_poll
    40.085714389 CPU12                    0      probe:gro_cell_poll
    40.085714389 CPU13                    0      probe:gro_cell_poll
    40.085714389 CPU14                    0      probe:gro_cell_poll
    40.085714389 CPU15                    0      probe:gro_cell_poll
    40.085714389 CPU16                    0      probe:gro_cell_poll
    40.085714389 CPU17                    0      probe:gro_cell_poll
    40.085714389 CPU18                    0      probe:gro_cell_poll
    40.085714389 CPU19                    0      probe:gro_cell_poll
    40.085714389 CPU20               10,402      probe:gro_cell_poll
    40.085714389 CPU21               12,312      probe:gro_cell_poll
    40.085714389 CPU22               11,913      probe:gro_cell_poll
    40.085714389 CPU23               12,964      probe:gro_cell_poll
    40.085714389 CPU24               13,727      probe:gro_cell_poll
    40.085714389 CPU25               12,943      probe:gro_cell_poll
    40.085714389 CPU26               13,558      probe:gro_cell_poll
    40.085714389 CPU27               12,676      probe:gro_cell_poll
    40.085714389 CPU28               13,754      probe:gro_cell_poll
    40.085714389 CPU29               13,379      probe:gro_cell_poll
    40.085714389 CPU30                    0      probe:gro_cell_poll
    40.085714389 CPU31                    0      probe:gro_cell_poll
    40.085714389 CPU32                    0      probe:gro_cell_poll
    40.085714389 CPU33                    0      probe:gro_cell_poll
    40.085714389 CPU34                    0      probe:gro_cell_poll
    40.085714389 CPU35                    0      probe:gro_cell_poll
    40.085714389 CPU36                    0      probe:gro_cell_poll
    40.085714389 CPU37                    0      probe:gro_cell_poll
    40.085714389 CPU38                    0      probe:gro_cell_poll
    40.085714389 CPU39                    0      probe:gro_cell_poll

^ permalink raw reply

* Re: [PATCH] net: wireless: atmel: Remove open-coded and wrong strcasecmp
From: Sergei Shtylyov @ 2015-01-16 18:17 UTC (permalink / raw)
  To: Rasmus Villemoes, Simon Kelley, Kalle Valo
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1421414907-26764-1-git-send-email-linux-qQsb+v5E8BnlAoU/VqSP6n9LOBIZ5rWg@public.gmane.org>

Hello.

On 01/16/2015 04:28 PM, Rasmus Villemoes wrote:

> The kernel's string library does in fact have strcasecmp, at least
> since ded220bd8f08. Moreover, this open-coded version is in fact

    Please also specify that commit's summary line in parens.

> wrong: If the strings only differ in their last character, a and b
> have already been incremented to point to the terminating nul bytes,

    NUL.

> so they would be treated as equal.

> Signed-off-by: Rasmus Villemoes <linux-qQsb+v5E8BnlAoU/VqSP6n9LOBIZ5rWg@public.gmane.org>

[...]

WBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Stable request for "hw csum failure" fixes
From: Jay Vosburgh @ 2015-01-16 18:30 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller


	Please consider commits

commit 17e96834fd35997ca7cdfbf15413bcd5a36ad448
Author: Govindarajulu Varadarajan <_govind@gmx.com>
Date:   Thu Dec 18 15:58:42 2014 +0530

    enic: fix rx skb checksum

commit 2c26d34bbcc0b3f30385d5587aa232289e2eed8e
Author: Jay Vosburgh <jay.vosburgh@canonical.com>
Date:   Fri Dec 19 15:32:00 2014 -0800

    net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding
    

	for -stable kernels back to 3.10.

	These patches have been confirmed to fix the "hw csum failure"
issue in the Ubuntu 3.13 kernel, and code inspection suggests that the
3.12 and 3.10 kernels would also exhibit the problem.

	Thanks,

	-J

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply

* Re: [PATCH 7/9] rhashtable: Per bucket locks & deferred expansion/shrinking
From: Patrick McHardy @ 2015-01-16 18:36 UTC (permalink / raw)
  To: Thomas Graf
  Cc: David Laight, davem@davemloft.net, netdev@vger.kernel.org,
	herbert@gondor.apana.org.au, paulmck@linux.vnet.ibm.com,
	edumazet@google.com, john.r.fastabend@intel.com,
	josh@joshtriplett.org, netfilter-devel@vger.kernel.org
In-Reply-To: <20150116165302.GE15052@casper.infradead.org>

On 16.01, Thomas Graf wrote:
> On 01/16/15 at 04:43pm, David Laight wrote:
> > The walker is unlikely to see items that get inserted early in the hash
> > table even without a resize.
> 
> I don't follow, you have to explain this statement.
> 
> Walkers which don't want to see duplicates or miss entries should
> just take the mutex.

Well, we do have a problem with interrupted dumps. As you know once
the netlink message buffer is full, we return to userspace and
continue dumping during the next read. Expanding obviously changes
the order since we rehash from bucket N to N and 2N, so this will
indeed cause duplicate (doesn't matter) and missed entries.

^ permalink raw reply

* Re: [PATCH net 2/2] sh_eth: Fix ethtool operation crash when net device is down
From: Florian Fainelli @ 2015-01-16 18:45 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: ct178-internal, David S. Miller, netdev, linux-kernel,
	Nobuhiro Iwamatsu, Mitsuhiro Kimura, Hisashi Nakamura,
	Yoshihiro Kaneko
In-Reply-To: <1421430685.1222.192.camel@xylophone.i.decadent.org.uk>

2015-01-16 9:51 GMT-08:00 Ben Hutchings <ben.hutchings@codethink.co.uk>:
> The driver connects and disconnects the PHY device whenever the
> net device is brought up and down.  The ethtool get_settings,
> set_settings and nway_reset operations will dereference a null
> or dangling pointer if called while it is down.
>
> I think it would be preferable to keep the PHY connected, but there
> may be good reasons not to.

phy_disconnect() is the canonical way to stop the PHY library state
machine, and avoid deferred work to be done and call the driver's
adjust_link function. This also boils down to calling phy_detach()
which can put the PHY in a low-power mode when implemented.

>
> As an immediate fix for this bug:
> - Set the phydev pointer to NULL after disconnecting the PHY
> - Change those three operations to return -ENODEV while the PHY is
>   not connected
>
> Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
> ---
>  drivers/net/ethernet/renesas/sh_eth.c |   10 ++++++++++
>  1 file changed, 10 insertions(+)
>
> diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
> index 0c4d5b5..28e3822 100644
> --- a/drivers/net/ethernet/renesas/sh_eth.c
> +++ b/drivers/net/ethernet/renesas/sh_eth.c
> @@ -1827,6 +1827,9 @@ static int sh_eth_get_settings(struct net_device *ndev,
>         unsigned long flags;
>         int ret;
>
> +       if (!mdp->phydev)
> +               return -ENODEV;

Since the PHY is disconnected, would not checking for netif_running()
make sense here, unless there is a good reason to still allow
phy_ethtool_gset() to be called?

> +
>         spin_lock_irqsave(&mdp->lock, flags);
>         ret = phy_ethtool_gset(mdp->phydev, ecmd);
>         spin_unlock_irqrestore(&mdp->lock, flags);
> @@ -1841,6 +1844,9 @@ static int sh_eth_set_settings(struct net_device *ndev,
>         unsigned long flags;
>         int ret;
>
> +       if (!mdp->phydev)
> +               return -ENODEV;
> +
>         spin_lock_irqsave(&mdp->lock, flags);
>
>         /* disable tx and rx */
> @@ -1875,6 +1881,9 @@ static int sh_eth_nway_reset(struct net_device *ndev)
>         unsigned long flags;
>         int ret;
>
> +       if (!mdp->phydev)
> +               return -ENODEV;
> +
>         spin_lock_irqsave(&mdp->lock, flags);
>         ret = phy_start_aneg(mdp->phydev);
>         spin_unlock_irqrestore(&mdp->lock, flags);
> @@ -2184,6 +2193,7 @@ static int sh_eth_close(struct net_device *ndev)
>         if (mdp->phydev) {
>                 phy_stop(mdp->phydev);
>                 phy_disconnect(mdp->phydev);
> +               mdp->phydev = NULL;
>         }
>
>         free_irq(ndev->irq, ndev);
> --
> 1.7.10.4
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Florian

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox