Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 1/2] net: Add Transparent Ethernet Bridging GRO support.
From: Jesse Gross @ 2014-12-31  3:10 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Currently the only tunnel protocol that supports GRO with encapsulated
Ethernet is VXLAN. This pulls out the Ethernet code into a proper layer
so that it can be used by other tunnel protocols such as GRE and Geneve.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 drivers/net/vxlan.c         | 53 +++-----------------------
 include/linux/etherdevice.h |  4 ++
 net/ethernet/eth.c          | 92 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7fbd89f..2ab0922 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -549,10 +549,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 {
 	struct sk_buff *p, **pp = NULL;
 	struct vxlanhdr *vh, *vh2;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_vx, off_eth;
-	const struct packet_offload *ptype;
-	__be16 type;
+	unsigned int hlen, off_vx;
 	int flush = 1;
 
 	off_vx = skb_gro_offset(skb);
@@ -563,17 +560,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 		if (unlikely(!vh))
 			goto out;
 	}
-	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
-	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
-
-	off_eth = skb_gro_offset(skb);
-	hlen = off_eth + sizeof(*eh);
-	eh   = skb_gro_header_fast(skb, off_eth);
-	if (skb_gro_header_hard(skb, hlen)) {
-		eh = skb_gro_header_slow(skb, hlen, off_eth);
-		if (unlikely(!eh))
-			goto out;
-	}
 
 	flush = 0;
 
@@ -582,28 +568,16 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		eh2 = (struct ethhdr   *)(p->data + off_eth);
-		if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
+		if (vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
 	}
 
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
-	if (ptype == NULL) {
-		flush = 1;
-		goto out_unlock;
-	}
-
-	skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
-	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-	pp = ptype->callbacks.gro_receive(head, skb);
+	skb_gro_pull(skb, sizeof(struct vxlanhdr));
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+	pp = eth_gro_receive(head, skb);
 
-out_unlock:
-	rcu_read_unlock();
 out:
 	NAPI_GRO_CB(skb)->flush |= flush;
 
@@ -612,24 +586,9 @@ out:
 
 static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
 {
-	struct ethhdr *eh;
-	struct packet_offload *ptype;
-	__be16 type;
-	int vxlan_len  = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
-	int err = -ENOSYS;
-
 	udp_tunnel_gro_complete(skb, nhoff);
 
-	eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
-
-	rcu_read_unlock();
-	return err;
+	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
 /* Notify netdevs that UDP port started listening */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 41c891d..1d869d1 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -52,6 +52,10 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb);
+int eth_gro_complete(struct sk_buff *skb, int nhoff);
+
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
 static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) =
 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 33a140e..238f38d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 	return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
 }
 EXPORT_SYMBOL(sysfs_format_mac);
+
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct ethhdr *eh, *eh2;
+	unsigned int hlen, off_eth;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_eth = skb_gro_offset(skb);
+	hlen = off_eth + sizeof(*eh);
+	eh = skb_gro_header_fast(skb, off_eth);
+	if (skb_gro_header_hard(skb, hlen)) {
+		eh = skb_gro_header_slow(skb, hlen, off_eth);
+		if (unlikely(!eh))
+			goto out;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		eh2 = (struct ethhdr *)(p->data + off_eth);
+		if (compare_ether_header(eh, eh2)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	type = eh->h_proto;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (ptype == NULL) {
+		flush = 1;
+		goto out_unlock;
+	}
+
+	skb_gro_pull(skb, sizeof(*eh));
+	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+EXPORT_SYMBOL(eth_gro_receive);
+
+int eth_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
+	__be16 type = eh->h_proto;
+	struct packet_offload *ptype;
+	int err = -ENOSYS;
+
+	if (skb->encapsulation)
+		skb_set_inner_mac_header(skb, nhoff);
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype != NULL)
+		err = ptype->callbacks.gro_complete(skb, nhoff +
+						    sizeof(struct ethhdr));
+
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(eth_gro_complete);
+
+static struct packet_offload eth_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_TEB),
+	.callbacks = {
+		.gro_receive = eth_gro_receive,
+		.gro_complete = eth_gro_complete,
+	},
+};
+
+static int __init eth_offload_init(void)
+{
+	dev_add_offload(&eth_packet_offload);
+
+	return 0;
+}
+
+fs_initcall(eth_offload_init);
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 23/23 V2 for 3.19]  rtlwifi: Fix error when accessing unmapped memory in skb
From: Eric Biggers @ 2014-12-31  0:49 UTC (permalink / raw)
  To: Larry Finger; +Cc: kvalo, linux-wireless, netdev, Stable
In-Reply-To: <1419711457-21469-1-git-send-email-Larry.Finger@lwfinger.net>

On Sat, Dec 27, 2014 at 02:17:37PM -0600, Larry Finger wrote:
> These drivers use 9100-byte receive buffers, thus allocating an skb requires
> an O(3) memory allocation. Under heavy memory loads and fragmentation, such
> a request can fail. Previous versions of the driver have dropped the packet
> and reused the old buffer; however, the new version introduced a bug in that
> it released the old buffer before trying to allocate a new one. The previous
> method is implemented here.

It looks like in the out-of-memory path, pci_map_single() gets called while the
skb is still mapped.  Won't this leak the IOMMU mapping?

^ permalink raw reply

* net-next is OPEN
From: David Miller @ 2014-12-31  0:06 UTC (permalink / raw)
  To: netdev; +Cc: linux-wireless, netfilter-devel


Just FYI...

^ permalink raw reply

* Re: [PATCH net-next 2/2] net: gianfar: add missing __iomem annotation
From: David Miller @ 2014-12-31  0:05 UTC (permalink / raw)
  To: haokexin; +Cc: netdev, claudiu.manoil
In-Reply-To: <1419401145-8967-2-git-send-email-haokexin@gmail.com>

From: Kevin Hao <haokexin@gmail.com>
Date: Wed, 24 Dec 2014 14:05:45 +0800

> Fix the following spare warning:
> drivers/net/ethernet/freescale/gianfar.c:3521:60: warning: incorrect type in argument 1 (different address spaces)
> drivers/net/ethernet/freescale/gianfar.c:3521:60:    expected unsigned int [noderef] <asn:2>*addr
> drivers/net/ethernet/freescale/gianfar.c:3521:60:    got unsigned int [usertype] *rfbptr
> drivers/net/ethernet/freescale/gianfar.c:205:16: warning: incorrect type in assignment (different address spaces)
> drivers/net/ethernet/freescale/gianfar.c:205:16:    expected unsigned int [usertype] *rfbptr
> drivers/net/ethernet/freescale/gianfar.c:205:16:    got unsigned int [noderef] <asn:2>*<noident>
> drivers/net/ethernet/freescale/gianfar.c:2918:44: warning: incorrect type in argument 1 (different address spaces)
> drivers/net/ethernet/freescale/gianfar.c:2918:44:    expected unsigned int [noderef] <asn:2>*addr
> drivers/net/ethernet/freescale/gianfar.c:2918:44:    got unsigned int [usertype] *rfbptr
> 
> Signed-off-by: Kevin Hao <haokexin@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 1/2] net: gianfar: mark the local functions static
From: David Miller @ 2014-12-31  0:05 UTC (permalink / raw)
  To: haokexin; +Cc: netdev, claudiu.manoil
In-Reply-To: <1419401145-8967-1-git-send-email-haokexin@gmail.com>

From: Kevin Hao <haokexin@gmail.com>
Date: Wed, 24 Dec 2014 14:05:44 +0800

> Signed-off-by: Kevin Hao <haokexin@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next V2] virtio-net: don't do header check for dodgy gso packets
From: David Miller @ 2014-12-30 23:53 UTC (permalink / raw)
  To: jasowang; +Cc: netdev, virtualization, linux-kernel, mst
In-Reply-To: <1419390232-14906-1-git-send-email-jasowang@redhat.com>

From: Jason Wang <jasowang@redhat.com>
Date: Wed, 24 Dec 2014 11:03:52 +0800

> There's no need to do header check for virtio-net since:
> 
> - Host sets dodgy for all gso packets from guest and check the header.
> - Host should be prepared for all kinds of evil packets from guest, since
>   malicious guest can send any kinds of packet.
> 
> So this patch sets NETIF_F_GSO_ROBUST for virtio-net to skip the check.
> 
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes from V1:
> - typo fixes

Applied.

^ permalink raw reply

* Re: [PATCH 4/27] atheros: atlx: Use setup_timer
From: David Miller @ 2014-12-30 23:35 UTC (permalink / raw)
  To: Julia.Lawall; +Cc: jcliburn, kernel-janitors, chris.snook, netdev, linux-kernel
In-Reply-To: <1419604558-29743-4-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:34 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH 3/27] atl1e: Use setup_timer
From: David Miller @ 2014-12-30 23:35 UTC (permalink / raw)
  To: Julia.Lawall; +Cc: jcliburn, kernel-janitors, chris.snook, netdev, linux-kernel
In-Reply-To: <1419604558-29743-5-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:35 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH 11/27] ksz884x: Use setup_timer
From: David Miller @ 2014-12-30 23:35 UTC (permalink / raw)
  To: Julia.Lawall; +Cc: netdev, kernel-janitors, linux-kernel
In-Reply-To: <1419604558-29743-12-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:42 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH 15/27] net: sxgbe: Use setup_timer
From: David Miller @ 2014-12-30 23:34 UTC (permalink / raw)
  To: Julia.Lawall
  Cc: bh74.an, kernel-janitors, ks.giri, vipul.pandya, netdev,
	linux-kernel
In-Reply-To: <1419604558-29743-16-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:46 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 00/11] Time Counter fixes and improvements
From: David Miller @ 2014-12-30 23:32 UTC (permalink / raw)
  To: richardcochran
  Cc: netdev, linux-kernel, amirv, ariel.elior, carolyn.wyborny,
	Frank.Li, jeffrey.t.kirsher, john.stultz, matthew.vick, mlichvar,
	mugunthanvnm, ogerlitz, tglx, thomas.lendacky
In-Reply-To: <cover.1418504883.git.richardcochran@gmail.com>

From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 21 Dec 2014 19:46:55 +0100

> Several PTP Hardware Clock (PHC) drivers implement the clock in
> software using the timecounter/cyclecounter code. This series adds one
> simple improvement and one more subtle fix to the shared timecounter
> facility. Credit for this series goes to Janusz Użycki, who pointed
> the issues out to me off list.

Series applied, thanks Richard.

^ permalink raw reply

* Re: am335x: cpsw: interrupt failure
From: Felipe Balbi @ 2014-12-30 23:22 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: Tony Lindgren, Yegor Yefremov, netdev, N, Mugunthan V,
	linux-omap@vger.kernel.org
In-Reply-To: <20141229171355.GJ29379@saruman>

[-- Attachment #1: Type: text/plain, Size: 2910 bytes --]

Hi,

On Mon, Dec 29, 2014 at 11:13:55AM -0600, Felipe Balbi wrote:
> > > > >>> U-Boot version: 2014.07
> > > > >>> Kernel config is omap2plus with enabled USB
> > > > >>>
> > > > >>> # cat /proc/version
> > > > >>> Linux version 3.18.0 (user@user-VirtualBox) (gcc version 4.8.3
> > > > >>> 20140320 (prerelease) (Sourcery CodeBench Lite 2014.05-29) ) #6 SMP
> > > > >>> Mon Dec 8 22:47:43 CET 2014
> > > > >>
> > > > >> Wasn't GCC 4.8.x total crap for building ARM kernels ? IIRC it was even
> > > > >> blacklisted. Can you try with 4.9.x just to make sure ?
> > > > >
> > > > > Will do.
> > > > 
> > > > Adding linux-omap. Beginning of this discussion:
> > > > http://comments.gmane.org/gmane.linux.network/341427
> > > > 
> > > > Quick summary: starting with kernel 3.18 or commit
> > > > 55601c9f24670ba926ebdd4d712ac3b177232330 am335x (at least BBB and some
> > > > custom boards) stalls at high network load. Reproducible via nuttcp
> > > > within some minutes
> > > > 
> > > > nuttcp -S (on BBB)
> > > > nuttcp -t -N 4 -T30m 192.168.1.235 (on host)
> > > > 
> > > > As Felipe Balbi suggested, I tried both 4.8.3 and 4.9.2 toolchains,
> > > > but both show the same behavior.
> > > > 
> > > > Linux version 3.18.0 (user@user-VirtualBox) (gcc version 4.8.3
> > > > 20140320 (prerelease) (Sourcery CodeBench Lite 2014.05-29) ) #6 SMP
> > > > Mon Dec 8 22:47:43 CET 2014
> > > > Linux version 3.18.1 (user@user-VirtualBox) (gcc version 4.9.2
> > > > (Buildroot 2015.02-git-00582-g10b9761) ) #1 SMP Mon Dec 29 09:22:29
> > > > CET 2014
> > > > 
> > > > Let me know, if you can reproduce this issue.
> > > 
> > > finally managed to reproduce this, it took quite a bit of effort though.
> > > I'll see if I can gether more information about the problem.
> > 
> > Maybe check if the irqnr is 127 (or the last reserved interrupt)
> > in irq-omap-intc.c. If so, also print out the previous interrupt.
> > It seems the intc uses the last reserved interrupt to signal a
> > spurious interrupt for the previous irqnr, so we should probably
> > add some handling for that.
> > 
> > If the previous interrupt is a cpsw interrupt, then there's probably
> > something wrong with cpsw interrupt handling. Either a missing
> > read-back to flush posted write in the cpsw interrupt handler,
> > or the EOI registers are written at a wrong time.
> 
> yeah, I'll go over it, but I first need to reproduce it again. Just
> rebooted to try again and after half an hour, couldn't reproduce it
> anymore. Interesting race to end the year :-)

alright, managed to reproduce multiple and I'm pretty confident I've
found the bug. Right now I'm testing with AM437x and AM335x to make sure
it's really working. If it's still running until tomorrow I'll send a
preliminary patch but I want to leave this running for quite a few days
before calling it "fixed".

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: add BQL support
From: Beniamino Galvani @ 2014-12-30 23:13 UTC (permalink / raw)
  To: Dave Taht
  Cc: Giuseppe Cavallaro, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAA93jw4GeWOqeZu5SjOYhhORiH6XCrv7uPTjrgE9O1QWESCxqw@mail.gmail.com>

On Mon, Dec 29, 2014 at 09:42:01AM -0800, Dave Taht wrote:
> On Sun, Dec 28, 2014 at 1:48 PM, Beniamino Galvani <b.galvani@gmail.com> wrote:
> > On Sun, Dec 28, 2014 at 08:25:40AM -0800, Dave Taht wrote:
> >> On Sun, Dec 28, 2014 at 6:57 AM, Beniamino Galvani <b.galvani@gmail.com> wrote:
> >> > Add support for Byte Queue Limits to the STMicro MAC driver.
> >>
> >> Thank you!
> >>
> >> > Tested on a Amlogic S805 Cortex-A5 board, where the use of BQL
> >> > slightly decreases the ping latency from ~10ms to ~3ms when the
> >> > 100Mbps link is saturated by TCP streams. No difference is
> >> > observed at 1Gbps.
> >>
> >> I see the plural. With TSQ in place it is hard (without something like
> >> the rrul test driving multiple streams) to drive a driver to
> >> saturation with small numbers of flows. This was with pfifo_fast, not
> >> sch_fq, at 100mbit?
> >
> > Hi Dave,
> >
> > yes, this was with pfifo_fast and I used 4 iperf TCP streams. The total
> > throughput didn't seem to increase adding more streams.
> 
> >>
> >> Can this board actually drive a full gigabit in the first place? Until
> >> now most of the low end arm boards I have seen only came with
> >> a 100mbit mac, and the gig ones lacking offloads seemed to peak
> >> out at about 600mbit.
> >
> > I measured a throughput of 650mbit in rx and 600mbit in tx.
> 
> You might want to try the rrul test which tests both directions and
> latency at the same time.

I will try it, thanks.

> 
> In my case I have been trying to find a low-cost chip that could do soft
> rate limiting (htb) + fq_codel at up to 300mbit/sec, as that is about
> the peak speed
> we will be getting from cable modems, and these are horribly overbuffered,
> at these speeds too, with 1.2sec of bidirectional latency observed at
> 120mbit/12mbit.
> 
> I'm open to crazy ideas like trying to find a use for the gpu, etc, to
> get there.
> 
> >
> >>
> >> Under my christmas tree landed a quad core A5 (odroid-c1), also an
> >> xgene and zedboard - both of the latter are a-needing BQL,
> >> and I haven't booted the udroid yet. Hopefully it is the
> >> same driver you just improved.
> >
> > I'm using the odroid-c1 too, with this tree based on the recent
> > Amlogic mainline work:
> >
> >   https://github.com/bengal/linux/tree/meson8b
> 
> Oh, cool, thx!
> 
> > Unfortunately at the moment the support for the board is very basic
> > (for example, SMP is not working yet) but it's enough to do some NIC
> > tests.
> 
> Good to know. Have you looked at xmit_more yet?
> 
> http://lwn.net/Articles/615238/

I don't know if I have implemented it correctly, but I found that the
improvement with xmit_more is so small to be barely observable, maybe
because the cost for starting the hardware transmission is very low
(it's a single mmio write).

Beniamino

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: add BQL support
From: Beniamino Galvani @ 2014-12-30 21:27 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: David S. Miller, Giuseppe Cavallaro, netdev,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAGVrzca_i9Wdpa9ecJYPZDgw+TqgMq1SH6ZmRnE4MoQ2dvnAAw@mail.gmail.com>

On Sun, Dec 28, 2014 at 11:48:14PM -0800, Florian Fainelli wrote:
> 2014-12-28 6:57 GMT-08:00 Beniamino Galvani <b.galvani@gmail.com>:
> > Add support for Byte Queue Limits to the STMicro MAC driver.
> >
> > Tested on a Amlogic S805 Cortex-A5 board, where the use of BQL
> > slightly decreases the ping latency from ~10ms to ~3ms when the
> > 100Mbps link is saturated by TCP streams. No difference is
> > observed at 1Gbps.
> >
> > Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
> > ---
> 
> [snip]
> 
> >         priv->dev->stats.tx_errors++;
> > @@ -2049,6 +2057,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> >                 skb_tx_timestamp(skb);
> >
> >         priv->hw->dma->enable_dma_transmission(priv->ioaddr);
> > +       netdev_sent_queue(dev, skb->len);
> 
> You are introducing a potential use after free here in case tx_lock is
> eliminated one day and your TX reclaim logic kicks in and frees the
> freshly transmitted SKB, it would be safer to just cache skb->len in a
> local variable, and use it here.

Ok, I will change this part; probably a simpler solution is to call
netdev_sent_queue() before enabling the DMA like in other drivers.

BTW, I'm not sure this lock is really needed, since it should be
possible to safely access the ring without locks from both the
transmit and the reclaim functions if the pointers are updated
carefully. So maybe it will be really removed one day.

Beniamino

> 
> >
> >         spin_unlock(&priv->tx_lock);
> >         return NETDEV_TX_OK;

^ permalink raw reply

* Re: [PATCH net] tunnel: support propagating lower device state
From: Cong Wang @ 2014-12-30 20:37 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev
In-Reply-To: <CAHA+R7OTFAc=1D=WRxLNuqScBJ+wD-wc-qxtNaFxSji_kRi1PA@mail.gmail.com>

On Tue, Dec 30, 2014 at 12:33 PM, Cong Wang <cwang@twopensource.com> wrote:
>
> Are you sure we really need to free itn->tunnels on
> failure path? It is not allocated inside ip_tunnel_init_net()
> anyway.

Also you seem to forget to kfree() itn->lower_dev in
ip_tunnel_delete_net().

^ permalink raw reply

* Re: [PATCH net] tunnel: support propagating lower device state
From: Cong Wang @ 2014-12-30 20:33 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, netdev
In-Reply-To: <20141227095950.20a062d2@urahara>

On Sat, Dec 27, 2014 at 9:59 AM, Stephen Hemminger <shemming@brocade.com> wrote:
> @@ -982,8 +1045,17 @@ int ip_tunnel_init_net(struct net *net,
>         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
>                 INIT_HLIST_HEAD(&itn->tunnels[i]);
>
> +       itn->lower_dev = kcalloc(NETDEV_HASHENTRIES, sizeof(struct hlist_head),
> +                                GFP_KERNEL);
> +       if (!itn->lower_dev) {
> +               kfree(itn->tunnels);
> +               return -ENOMEM;
> +       }
> +
>         if (!ops) {
>                 itn->fb_tunnel_dev = NULL;
> +               kfree(itn->tunnels);
> +               kfree(itn->lower_dev);
>                 return 0;
>         }
>
> @@ -1003,7 +1075,12 @@ int ip_tunnel_init_net(struct net *net,
>         }
>         rtnl_unlock();
>
> -       return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
> +       if (IS_ERR(itn->fb_tunnel_dev)) {
> +               kfree(itn->tunnels);
> +               return PTR_ERR(itn->fb_tunnel_dev);
> +       }
> +       return 0;
> +
>  }

Are you sure we really need to free itn->tunnels on
failure path? It is not allocated inside ip_tunnel_init_net()
anyway.

^ permalink raw reply

* Re: [Question] What's the noop_qdisc introduced for in the kernel?
From: Cong Wang @ 2014-12-30 20:15 UTC (permalink / raw)
  To: Dennis Chen; +Cc: netdev
In-Reply-To: <CA+U0gVi=DTpz-Givbrs771+KYz8EE1ABdy2oO38q4LR9fG37YQ@mail.gmail.com>

On Tue, Dec 30, 2014 at 1:23 AM, Dennis Chen <kernel.org.gnu@gmail.com> wrote:
> After google and the code reading, seems this Qdisc instance is only
> used for the initialization purpose, I can't find the reason that this
> object introduced in the kernel. Does anybody know what the historical
> reason is for this invention? the purpose or the benefit for this
> Qdisc object?
>

Not just for initialization, it is kinda a null qdisc when
the previous qdisc gets removed:

        /* ... and graft new one */
        if (qdisc == NULL)
                qdisc = &noop_qdisc;

or the entire device is not activated yet. It guarantees no
packets can be sent out via this qdisc.

^ permalink raw reply

* [PATCH v2] ath10k: fixup wait_for_completion_timeout return handling
From: Nicholas Mc Guire @ 2014-12-30 19:44 UTC (permalink / raw)
  To: Kalle Valo
  Cc: Michal Kazior, Ben Greear, Chun-Yeow Yeoh, Yanbo Li,
	Sergei Shtylyov, ath10k, linux-wireless, netdev, linux-kernel,
	Nicholas Mc Guire

wait_for_completion_timeout does not return negative values so the tests
for <= 0 are not needed and the case differentiation in the error handling
path unnecessary.

v2: all wait_for_completion_timeout changes in a single patch

patch was only compile tested x86_64_defconfig + CONFIG_ATH_CARDS=m
CONFIG_ATH10K=m

patch is against linux-next 3.19.0-rc1 -next-20141226

None of the proposed cleanups are critical.
All changes should only be removing unreachable cases.

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
---
 drivers/net/wireless/ath/ath10k/debug.c |    2 +-
 drivers/net/wireless/ath/ath10k/htc.c   |    6 ++----
 drivers/net/wireless/ath/ath10k/htt.c   |    2 +-
 drivers/net/wireless/ath/ath10k/mac.c   |    2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index a716758..7e1fe93 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -373,7 +373,7 @@ static int ath10k_debug_fw_stats_request(struct ath10k *ar)
 
 		ret = wait_for_completion_timeout(&ar->debug.fw_stats_complete,
 						  1*HZ);
-		if (ret <= 0)
+		if (ret == 0)
 			return -ETIMEDOUT;
 
 		spin_lock_bh(&ar->data_lock);
diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index f1946a6..2fd9e18 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -703,11 +703,9 @@ int ath10k_htc_connect_service(struct ath10k_htc *htc,
 	/* wait for response */
 	status = wait_for_completion_timeout(&htc->ctl_resp,
 					     ATH10K_HTC_CONN_SVC_TIMEOUT_HZ);
-	if (status <= 0) {
-		if (status == 0)
-			status = -ETIMEDOUT;
+	if (status == 0) {
 		ath10k_err(ar, "Service connect timeout: %d\n", status);
-		return status;
+		return -ETIMEDOUT;
 	}
 
 	/* we controlled the buffer creation, it's aligned */
diff --git a/drivers/net/wireless/ath/ath10k/htt.c b/drivers/net/wireless/ath/ath10k/htt.c
index 56cb4ac..58b6fc1 100644
--- a/drivers/net/wireless/ath/ath10k/htt.c
+++ b/drivers/net/wireless/ath/ath10k/htt.c
@@ -102,7 +102,7 @@ int ath10k_htt_setup(struct ath10k_htt *htt)
 
 	status = wait_for_completion_timeout(&htt->target_version_received,
 					     HTT_TARGET_VERSION_TIMEOUT_HZ);
-	if (status <= 0) {
+	if (status == 0) {
 		ath10k_warn(ar, "htt version request timed out\n");
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index c400567..f9d7dbb 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2151,7 +2151,7 @@ void ath10k_offchan_tx_work(struct work_struct *work)
 
 		ret = wait_for_completion_timeout(&ar->offchan_tx_completed,
 						  3 * HZ);
-		if (ret <= 0)
+		if (ret == 0)
 			ath10k_warn(ar, "timed out waiting for offchannel skb %p\n",
 				    skb);
 
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 0/4] ath10k: a few incorrect return handling fix-up
From: Nicholas Mc Guire @ 2014-12-30 19:42 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: Kalle Valo, Michal Kazior, Ben Greear, Chun-Yeow Yeoh, Yanbo Li,
	ath10k, linux-wireless, netdev, linux-kernel
In-Reply-To: <54A2F151.6040205@cogentembedded.com>

On Tue, 30 Dec 2014, Sergei Shtylyov wrote:

> On 12/30/2014 09:28 PM, Nicholas Mc Guire wrote:
>
>>>> wait_for_completion_timeout does not return negative values so the tests
>>>> for <= 0 are not needed and the case differentiation in the error handling
>>>> path unnecessary.
>
>>>     I decided to verify your statement and I saw that it seems wrong.
>>> do_wait_for_common() can return -ERESTARTSYS and the return value gets
>>> returned by its callers unchanged.
>
>> the -ERESTARTSYS only can be returned if state matches but
>> wait_for_completion_timemout passes TASK_UNINTERRUPTIBLE
>> so signal_pending_state will return 0 and never negativ
>
>> my understanding of the callchain is:
>> wait_for_completion_timemout with TASK_UNINTERRUPTIBLE
>>    -> wait_for_common(...TASK_UNINTERRUPTIBLE)
>>      -> __wait_for_common(...TASK_UNINTERRUPTIBLE)
>>        -> do_wait_for_common(...TASK_UNINTERRUPTIBLE)
>>          -> signal_pending_state(TASK_UNINTERRUPTIBLE...)
>
>> static inline int signal_pending_state(long state, struct task_struct *p)
>> {
>>          if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
>>                  return 0;
>
>    Right. I didn't look into TASK_UNINTERRUPTIBLE thing before sending my mail.
>
>> so wait_for_completion_timemout should return 0 or 1 only
>
>    0 or the remaining time, to be precise.
>

yup - thanks for the confirmation!

>>>> patch was only compile tested x86_64_defconfig + CONFIG_ATH_CARDS=m
>>>> CONFIG_ATH10K=m
>
>>>> patch is against linux-next 3.19.0-rc1 -next-20141226
>
>>>     Rather patches. It would have been better to send one patch instead of
>>> 4 patches with the same name.
>
>> sorry for that - I had split it into separate patches as it was
>> in different files - giving them the same name of course was a bit
>> brain-dead.
>
>    You should have mentioned the modified files in the subject. But IMHO 
> it would be better to have just one patch.
>
resent as a single patch as v2

thx!
hofrat

^ permalink raw reply

* Re: [RFC PATCH net-next 0/5]: ixgbevf: Allow querying VFs RSS indirection table and key
From: Vlad Zolotarov @ 2014-12-30 19:36 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: Greg Rose, netdev, Gleb Natapov, avi
In-Reply-To: <1419968078.31582.27.camel@jtkirshe-mobl>


On 12/30/14 21:34, Jeff Kirsher wrote:
> On Tue, 2014-12-30 at 21:30 +0200, Vlad Zolotarov wrote:
>> On 12/30/14 21:28, Jeff Kirsher wrote:
>>> On Tue, 2014-12-30 at 21:00 +0200, Vladislav Zolotarov wrote:
>>>> On Dec 30, 2014 8:52 PM, "Jeff Kirsher" <jeffrey.t.kirsher@intel.com>
>>>> wrote:
>>>>> On Tue, 2014-12-30 at 20:50 +0200, Vladislav Zolotarov wrote:
>>>>>> On Dec 30, 2014 8:16 PM, "Jeff Kirsher"
>>>> <jeffrey.t.kirsher@intel.com>
>>>>>> wrote:
>>>>>>> On Tue, Dec 30, 2014 at 8:30 AM, Vlad Zolotarov
>>>>>>> <vladz@cloudius-systems.com> wrote:
>>>>>>>> Add the ethtool ops to VF driver to allow querying the RSS
>>>>>> indirection table
>>>>>>>> and RSS Random Key.
>>>>>>>>
>>>>>>>>    - PF driver: Add new VF-PF channel commands.
>>>>>>>>    - VF driver: Utilize these new commands and add the
>>>> corresponding
>>>>>>>>                 ethtool callbacks.
>>>>>>>>
>>>>>>>> Vlad Zolotarov (5):
>>>>>>>>     ixgbe: Add a RETA query command to VF-PF channel API
>>>>>>>>     ixgbevf: Add a RETA query code
>>>>>>>>     ixgbe: Add GET_RSS_KEY command to VF-PF channel commands set
>>>>>>>>     ixgbevf: Add RSS Key query code
>>>>>>>>     ixgbevf: Add the appropriate ethtool ops to query RSS
>>>>>> indirection
>>>>>>>>       table and key
>>>>>>>>
>>>>>>>>    drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h      |   8 ++
>>>>>>>>    drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c    |  60
>>>>>> +++++++++++
>>>>>>>>    drivers/net/ethernet/intel/ixgbevf/ethtool.c      |  37
>>>> +++++++
>>>>>>>>    drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   4 +-
>>>>>>>>    drivers/net/ethernet/intel/ixgbevf/mbx.h          |   8 ++
>>>>>>>>    drivers/net/ethernet/intel/ixgbevf/vf.c           | 125
>>>>>> ++++++++++++++++++++++
>>>>>>>>    drivers/net/ethernet/intel/ixgbevf/vf.h           |   2 +
>>>>>>>>    7 files changed, 243 insertions(+), 1 deletion(-)
>>>>>>> Remember to CC on patches against drivers in
>>>>>> drivers/net/ethernet/intel/ please.
>>>>>>
>>>>>> To CC who? ;)
>>>>>>
>>>>> Sorry, I fat fingered that response.  Please CC me.
>>>> Sure. NP.
>>>> Since we are talking already, could u comment on the RFC? Does it look
>>>> ok to you?
>>>>
>>>>
>>> I have provided feedback on 3 of the patches, the others look ok to me.
>>> I would like Greg Rose to look over your changes since he did most of
>>> the ixgbevf work, and I want to make sure that he is fine with these
>>> changes.
>> I saw them. Thanks, Jeff. I'll wait for Greg's comments before cooking
>> the final series then.
>>
>> Again, thanks for a rapid response.
>>
> Go ahead and send out a non-RFC series.  Greg is on vacation till the
> 5th anyway, so I will pick up your series and have my validation team
> run it through their tests in the mean time.

Great! It's a real pleasure working with you!

^ permalink raw reply

* Re: [RFC PATCH net-next 1/5] ixgbe: Add a RETA query command to VF-PF channel API
From: Vlad Zolotarov @ 2014-12-30 19:35 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: netdev, gleb, avi
In-Reply-To: <1419967855.31582.24.camel@jtkirshe-mobl>


On 12/30/14 21:30, Jeff Kirsher wrote:
> On Tue, 2014-12-30 at 21:28 +0200, Vlad Zolotarov wrote:
>> On 12/30/14 21:21, Jeff Kirsher wrote:
>>> On Tue, Dec 30, 2014 at 8:30 AM, Vlad Zolotarov
>>> <vladz@cloudius-systems.com> wrote:
>>>> 82599 VFs and PF share the same RSS redirection table (RETA). Therefore we
>>>> just return it for all VFs.
>>>>
>>>> RETA table is an array of 32 registers and the maximum number of registers
>>>> that may be delivered in a single VF-PF channel command is 15. Therefore
>>>> we will deliver the whole table in 3 steps: 12, 12 and 8 registers in each
>>>> step correspondingly. Thus this patch does the following:
>>>>
>>>>     - Adds a new API version (to specify a new commands set).
>>>>     - Adds the IXGBE_VF_GET_RETA_[0,1,2] commands to the VF-PF commands set.
>>>>
>>>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>>>> ---
>>>>    drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h   |  6 +++++
>>>>    drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 35 ++++++++++++++++++++++++++
>>>>    2 files changed, 41 insertions(+)
>>>>
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
>>>> index a5cb755..c1123d9 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
>>>> @@ -73,6 +73,7 @@ enum ixgbe_pfvf_api_rev {
>>>>           ixgbe_mbox_api_10,      /* API version 1.0, linux/freebsd VF driver */
>>>>           ixgbe_mbox_api_20,      /* API version 2.0, solaris Phase1 VF driver */
>>>>           ixgbe_mbox_api_11,      /* API version 1.1, linux/freebsd VF driver */
>>>> +       ixgbe_mbox_api_12,      /* API version 1.2, linux/freebsd VF driver */
>>>>           /* This value should always be last */
>>>>           ixgbe_mbox_api_unknown, /* indicates that API version is not known */
>>>>    };
>>>> @@ -91,6 +92,11 @@ enum ixgbe_pfvf_api_rev {
>>>>    /* mailbox API, version 1.1 VF requests */
>>>>    #define IXGBE_VF_GET_QUEUES    0x09 /* get queue configuration */
>>>>
>>>> +/* mailbox API, version 1.2 VF requests */
>>>> +#define IXGBE_VF_GET_RETA_0    0x0a /* get RETA[0..11]  */
>>>> +#define IXGBE_VF_GET_RETA_1    0x0b /* get RETA[12..23] */
>>>> +#define IXGBE_VF_GET_RETA_2    0x0c /* get RETA[24..31] */
>>>> +
>>>>    /* GET_QUEUES return data indices within the mailbox */
>>>>    #define IXGBE_VF_TX_QUEUES     1       /* number of Tx queues supported */
>>>>    #define IXGBE_VF_RX_QUEUES     2       /* number of Rx queues supported */
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
>>>> index c76ba90..84db1a5 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
>>>> @@ -427,6 +427,7 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
>>>>    #endif /* CONFIG_FCOE */
>>>>                   switch (adapter->vfinfo[vf].vf_api) {
>>>>                   case ixgbe_mbox_api_11:
>>>> +               case ixgbe_mbox_api_12:
>>>>                           /*
>>>>                            * Version 1.1 supports jumbo frames on VFs if PF has
>>>>                            * jumbo frames enabled which means legacy VFs are
>>>> @@ -894,6 +895,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
>>>>           switch (api) {
>>>>           case ixgbe_mbox_api_10:
>>>>           case ixgbe_mbox_api_11:
>>>> +       case ixgbe_mbox_api_12:
>>>>                   adapter->vfinfo[vf].vf_api = api;
>>>>                   return 0;
>>>>           default:
>>>> @@ -917,6 +919,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
>>>>           switch (adapter->vfinfo[vf].vf_api) {
>>>>           case ixgbe_mbox_api_20:
>>>>           case ixgbe_mbox_api_11:
>>>> +       case ixgbe_mbox_api_12:
>>>>                   break;
>>>>           default:
>>>>                   return -1;
>>>> @@ -944,6 +947,29 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
>>>>           return 0;
>>>>    }
>>>>
>>>> +static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter,
>>>> +                            u32 *msgbuf, u32 vf, int reta_offset_dw,
>>>> +                            size_t dwords)
>>>> +{
>>>> +       struct ixgbe_hw *hw = &adapter->hw;
>>>> +       int i;
>>>> +       u32 *reta = &msgbuf[1];
>>>> +
>>>> +       /* verify the PF is supporting the correct API */
>>>> +       switch (adapter->vfinfo[vf].vf_api) {
>>>> +       case ixgbe_mbox_api_12:
>>>> +               break;
>>>> +       default:
>>>> +               return -EPERM;
>>>> +       }
>>> A switch statement is overkill for a single case.  It would be far
>>> simpler to just have
>>>
>>> if (adapter->vfinfo[vf].vf_api != ixgbe_mbox_api_12)
>>>           return -EPERM;
>>>
>>> Later on, if there are multiple API's or future API's that also
>>> support this, then we could move to a case statement.
>> I thought about it to but decided to make it future-proof. NP. Will make
>> it an "if" in a non-RFC series. Thanks.
>>
> Thanks fine, and if you CC me on the non-RFC series,

Sure thing I will! ;) After such a lightning fast review I'll CC u for 
all my future net patches! ;)

>   I will pick them up
> (and make sure that Greg Rose has a chance to review them).
Cool. Then I'll cook the series first think tomorrow morning.
Thanks a lot, Jeff

^ permalink raw reply

* Re: [RFC PATCH net-next 0/5]: ixgbevf: Allow querying VFs RSS indirection table and key
From: Jeff Kirsher @ 2014-12-30 19:34 UTC (permalink / raw)
  To: Vlad Zolotarov; +Cc: Greg Rose, netdev, Gleb Natapov, avi
In-Reply-To: <54A2FD6C.6050807@cloudius-systems.com>

[-- Attachment #1: Type: text/plain, Size: 2754 bytes --]

On Tue, 2014-12-30 at 21:30 +0200, Vlad Zolotarov wrote:
> On 12/30/14 21:28, Jeff Kirsher wrote:
> > On Tue, 2014-12-30 at 21:00 +0200, Vladislav Zolotarov wrote:
> >> On Dec 30, 2014 8:52 PM, "Jeff Kirsher" <jeffrey.t.kirsher@intel.com>
> >> wrote:
> >>> On Tue, 2014-12-30 at 20:50 +0200, Vladislav Zolotarov wrote:
> >>>> On Dec 30, 2014 8:16 PM, "Jeff Kirsher"
> >> <jeffrey.t.kirsher@intel.com>
> >>>> wrote:
> >>>>> On Tue, Dec 30, 2014 at 8:30 AM, Vlad Zolotarov
> >>>>> <vladz@cloudius-systems.com> wrote:
> >>>>>> Add the ethtool ops to VF driver to allow querying the RSS
> >>>> indirection table
> >>>>>> and RSS Random Key.
> >>>>>>
> >>>>>>   - PF driver: Add new VF-PF channel commands.
> >>>>>>   - VF driver: Utilize these new commands and add the
> >> corresponding
> >>>>>>                ethtool callbacks.
> >>>>>>
> >>>>>> Vlad Zolotarov (5):
> >>>>>>    ixgbe: Add a RETA query command to VF-PF channel API
> >>>>>>    ixgbevf: Add a RETA query code
> >>>>>>    ixgbe: Add GET_RSS_KEY command to VF-PF channel commands set
> >>>>>>    ixgbevf: Add RSS Key query code
> >>>>>>    ixgbevf: Add the appropriate ethtool ops to query RSS
> >>>> indirection
> >>>>>>      table and key
> >>>>>>
> >>>>>>   drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h      |   8 ++
> >>>>>>   drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c    |  60
> >>>> +++++++++++
> >>>>>>   drivers/net/ethernet/intel/ixgbevf/ethtool.c      |  37
> >> +++++++
> >>>>>>   drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   4 +-
> >>>>>>   drivers/net/ethernet/intel/ixgbevf/mbx.h          |   8 ++
> >>>>>>   drivers/net/ethernet/intel/ixgbevf/vf.c           | 125
> >>>> ++++++++++++++++++++++
> >>>>>>   drivers/net/ethernet/intel/ixgbevf/vf.h           |   2 +
> >>>>>>   7 files changed, 243 insertions(+), 1 deletion(-)
> >>>>> Remember to CC on patches against drivers in
> >>>> drivers/net/ethernet/intel/ please.
> >>>>
> >>>> To CC who? ;)
> >>>>
> >>> Sorry, I fat fingered that response.  Please CC me.
> >> Sure. NP.
> >> Since we are talking already, could u comment on the RFC? Does it look
> >> ok to you?
> >>
> >>
> > I have provided feedback on 3 of the patches, the others look ok to me.
> > I would like Greg Rose to look over your changes since he did most of
> > the ixgbevf work, and I want to make sure that he is fine with these
> > changes.
> 
> I saw them. Thanks, Jeff. I'll wait for Greg's comments before cooking 
> the final series then.
> 
> Again, thanks for a rapid response.
> 

Go ahead and send out a non-RFC series.  Greg is on vacation till the
5th anyway, so I will pick up your series and have my validation team
run it through their tests in the mean time.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [RFC PATCH net-next 0/5]: ixgbevf: Allow querying VFs RSS indirection table and key
From: Vlad Zolotarov @ 2014-12-30 19:30 UTC (permalink / raw)
  To: Jeff Kirsher, Greg Rose; +Cc: netdev, Gleb Natapov, avi
In-Reply-To: <1419967707.31582.23.camel@jtkirshe-mobl>


On 12/30/14 21:28, Jeff Kirsher wrote:
> On Tue, 2014-12-30 at 21:00 +0200, Vladislav Zolotarov wrote:
>> On Dec 30, 2014 8:52 PM, "Jeff Kirsher" <jeffrey.t.kirsher@intel.com>
>> wrote:
>>> On Tue, 2014-12-30 at 20:50 +0200, Vladislav Zolotarov wrote:
>>>> On Dec 30, 2014 8:16 PM, "Jeff Kirsher"
>> <jeffrey.t.kirsher@intel.com>
>>>> wrote:
>>>>> On Tue, Dec 30, 2014 at 8:30 AM, Vlad Zolotarov
>>>>> <vladz@cloudius-systems.com> wrote:
>>>>>> Add the ethtool ops to VF driver to allow querying the RSS
>>>> indirection table
>>>>>> and RSS Random Key.
>>>>>>
>>>>>>   - PF driver: Add new VF-PF channel commands.
>>>>>>   - VF driver: Utilize these new commands and add the
>> corresponding
>>>>>>                ethtool callbacks.
>>>>>>
>>>>>> Vlad Zolotarov (5):
>>>>>>    ixgbe: Add a RETA query command to VF-PF channel API
>>>>>>    ixgbevf: Add a RETA query code
>>>>>>    ixgbe: Add GET_RSS_KEY command to VF-PF channel commands set
>>>>>>    ixgbevf: Add RSS Key query code
>>>>>>    ixgbevf: Add the appropriate ethtool ops to query RSS
>>>> indirection
>>>>>>      table and key
>>>>>>
>>>>>>   drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h      |   8 ++
>>>>>>   drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c    |  60
>>>> +++++++++++
>>>>>>   drivers/net/ethernet/intel/ixgbevf/ethtool.c      |  37
>> +++++++
>>>>>>   drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   4 +-
>>>>>>   drivers/net/ethernet/intel/ixgbevf/mbx.h          |   8 ++
>>>>>>   drivers/net/ethernet/intel/ixgbevf/vf.c           | 125
>>>> ++++++++++++++++++++++
>>>>>>   drivers/net/ethernet/intel/ixgbevf/vf.h           |   2 +
>>>>>>   7 files changed, 243 insertions(+), 1 deletion(-)
>>>>> Remember to CC on patches against drivers in
>>>> drivers/net/ethernet/intel/ please.
>>>>
>>>> To CC who? ;)
>>>>
>>> Sorry, I fat fingered that response.  Please CC me.
>> Sure. NP.
>> Since we are talking already, could u comment on the RFC? Does it look
>> ok to you?
>>
>>
> I have provided feedback on 3 of the patches, the others look ok to me.
> I would like Greg Rose to look over your changes since he did most of
> the ixgbevf work, and I want to make sure that he is fine with these
> changes.

I saw them. Thanks, Jeff. I'll wait for Greg's comments before cooking 
the final series then.

Again, thanks for a rapid response.

^ permalink raw reply

* Re: [RFC PATCH net-next 1/5] ixgbe: Add a RETA query command to VF-PF channel API
From: Jeff Kirsher @ 2014-12-30 19:30 UTC (permalink / raw)
  To: Vlad Zolotarov; +Cc: netdev, gleb, avi
In-Reply-To: <54A2FCE1.3000000@cloudius-systems.com>

[-- Attachment #1: Type: text/plain, Size: 5285 bytes --]

On Tue, 2014-12-30 at 21:28 +0200, Vlad Zolotarov wrote:
> On 12/30/14 21:21, Jeff Kirsher wrote:
> > On Tue, Dec 30, 2014 at 8:30 AM, Vlad Zolotarov
> > <vladz@cloudius-systems.com> wrote:
> >> 82599 VFs and PF share the same RSS redirection table (RETA). Therefore we
> >> just return it for all VFs.
> >>
> >> RETA table is an array of 32 registers and the maximum number of registers
> >> that may be delivered in a single VF-PF channel command is 15. Therefore
> >> we will deliver the whole table in 3 steps: 12, 12 and 8 registers in each
> >> step correspondingly. Thus this patch does the following:
> >>
> >>    - Adds a new API version (to specify a new commands set).
> >>    - Adds the IXGBE_VF_GET_RETA_[0,1,2] commands to the VF-PF commands set.
> >>
> >> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> >> ---
> >>   drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h   |  6 +++++
> >>   drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 35 ++++++++++++++++++++++++++
> >>   2 files changed, 41 insertions(+)
> >>
> >> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
> >> index a5cb755..c1123d9 100644
> >> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
> >> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
> >> @@ -73,6 +73,7 @@ enum ixgbe_pfvf_api_rev {
> >>          ixgbe_mbox_api_10,      /* API version 1.0, linux/freebsd VF driver */
> >>          ixgbe_mbox_api_20,      /* API version 2.0, solaris Phase1 VF driver */
> >>          ixgbe_mbox_api_11,      /* API version 1.1, linux/freebsd VF driver */
> >> +       ixgbe_mbox_api_12,      /* API version 1.2, linux/freebsd VF driver */
> >>          /* This value should always be last */
> >>          ixgbe_mbox_api_unknown, /* indicates that API version is not known */
> >>   };
> >> @@ -91,6 +92,11 @@ enum ixgbe_pfvf_api_rev {
> >>   /* mailbox API, version 1.1 VF requests */
> >>   #define IXGBE_VF_GET_QUEUES    0x09 /* get queue configuration */
> >>
> >> +/* mailbox API, version 1.2 VF requests */
> >> +#define IXGBE_VF_GET_RETA_0    0x0a /* get RETA[0..11]  */
> >> +#define IXGBE_VF_GET_RETA_1    0x0b /* get RETA[12..23] */
> >> +#define IXGBE_VF_GET_RETA_2    0x0c /* get RETA[24..31] */
> >> +
> >>   /* GET_QUEUES return data indices within the mailbox */
> >>   #define IXGBE_VF_TX_QUEUES     1       /* number of Tx queues supported */
> >>   #define IXGBE_VF_RX_QUEUES     2       /* number of Rx queues supported */
> >> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> >> index c76ba90..84db1a5 100644
> >> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> >> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
> >> @@ -427,6 +427,7 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
> >>   #endif /* CONFIG_FCOE */
> >>                  switch (adapter->vfinfo[vf].vf_api) {
> >>                  case ixgbe_mbox_api_11:
> >> +               case ixgbe_mbox_api_12:
> >>                          /*
> >>                           * Version 1.1 supports jumbo frames on VFs if PF has
> >>                           * jumbo frames enabled which means legacy VFs are
> >> @@ -894,6 +895,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
> >>          switch (api) {
> >>          case ixgbe_mbox_api_10:
> >>          case ixgbe_mbox_api_11:
> >> +       case ixgbe_mbox_api_12:
> >>                  adapter->vfinfo[vf].vf_api = api;
> >>                  return 0;
> >>          default:
> >> @@ -917,6 +919,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
> >>          switch (adapter->vfinfo[vf].vf_api) {
> >>          case ixgbe_mbox_api_20:
> >>          case ixgbe_mbox_api_11:
> >> +       case ixgbe_mbox_api_12:
> >>                  break;
> >>          default:
> >>                  return -1;
> >> @@ -944,6 +947,29 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
> >>          return 0;
> >>   }
> >>
> >> +static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter,
> >> +                            u32 *msgbuf, u32 vf, int reta_offset_dw,
> >> +                            size_t dwords)
> >> +{
> >> +       struct ixgbe_hw *hw = &adapter->hw;
> >> +       int i;
> >> +       u32 *reta = &msgbuf[1];
> >> +
> >> +       /* verify the PF is supporting the correct API */
> >> +       switch (adapter->vfinfo[vf].vf_api) {
> >> +       case ixgbe_mbox_api_12:
> >> +               break;
> >> +       default:
> >> +               return -EPERM;
> >> +       }
> > A switch statement is overkill for a single case.  It would be far
> > simpler to just have
> >
> > if (adapter->vfinfo[vf].vf_api != ixgbe_mbox_api_12)
> >          return -EPERM;
> >
> > Later on, if there are multiple API's or future API's that also
> > support this, then we could move to a case statement.
> 
> I thought about it to but decided to make it future-proof. NP. Will make 
> it an "if" in a non-RFC series. Thanks.
> 

Thanks fine, and if you CC me on the non-RFC series, I will pick them up
(and make sure that Greg Rose has a chance to review them).

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [RFC PATCH net-next 3/5] ixgbe: Add GET_RSS_KEY command to VF-PF channel commands set
From: Vlad Zolotarov @ 2014-12-30 19:29 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: netdev, Gleb Natapov, Avi Kivity
In-Reply-To: <CAL3LdT50tUsJOW+y15GbO=90UDYWNLjMyOF6AqFgj_ciqBeTUQ@mail.gmail.com>


On 12/30/14 21:23, Jeff Kirsher wrote:
> On Tue, Dec 30, 2014 at 8:30 AM, Vlad Zolotarov
> <vladz@cloudius-systems.com> wrote:
>> 82599 VFs and PF share the same RSS Key. Therefore we will return
>> the same RSS key for all VFs.
>>
>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>> ---
>>   drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h   |  2 ++
>>   drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 25 +++++++++++++++++++++++++
>>   2 files changed, 27 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
>> index c1123d9..52e775b 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
>> @@ -97,6 +97,8 @@ enum ixgbe_pfvf_api_rev {
>>   #define IXGBE_VF_GET_RETA_1    0x0b /* get RETA[12..23] */
>>   #define IXGBE_VF_GET_RETA_2    0x0c /* get RETA[24..31] */
>>
>> +#define IXGBE_VF_GET_RSS_KEY   0x0d /* get RSS key */
>> +
>>   /* GET_QUEUES return data indices within the mailbox */
>>   #define IXGBE_VF_TX_QUEUES     1       /* number of Tx queues supported */
>>   #define IXGBE_VF_RX_QUEUES     2       /* number of Rx queues supported */
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
>> index 84db1a5..fc8233e 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
>> @@ -970,6 +970,28 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter,
>>          return 0;
>>   }
>>
>> +static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter,
>> +                               u32 *msgbuf, u32 vf)
>> +{
>> +       struct ixgbe_hw *hw = &adapter->hw;
>> +       int i;
>> +       u32 *rss_key = &msgbuf[1];
>> +
>> +       /* verify the PF is supporting the correct API */
>> +       switch (adapter->vfinfo[vf].vf_api) {
>> +       case ixgbe_mbox_api_12:
>> +               break;
>> +       default:
>> +               return -EPERM;
>> +       }
> Same as in patch 01 of the series, a switch statement is overkill for
> a single case.  It would be far simpler to just have
>
> if (adapter->vfinfo[vf].vf_api != ixgbe_mbox_api_12)
>          return -EPERM;
>
> Later on, if there are multiple API's or future API's that also
> support this, then we could move to a case statement.

Got it. Will fix.
Thanks.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox