Netdev List
 help / color / mirror / Atom feed
* [RFC PATCH net-next 8/8] net: ipv4: listify ip_rcv_finish
From: Edward Cree @ 2016-04-19 13:37 UTC (permalink / raw)
  To: netdev, David Miller; +Cc: Jesper Dangaard Brouer, linux-net-drivers
In-Reply-To: <5716338E.4050003@solarflare.com>

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 net/ipv4/ip_input.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e7d0d85..5bbc409 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -308,7 +308,8 @@ drop:
 	return true;
 }
 
-static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+			      struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
@@ -385,13 +386,22 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	return dst_input(skb);
+	return NET_RX_SUCCESS;
 
 drop:
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
 
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	int ret = ip_rcv_finish_core(net, sk, skb);
+
+	if (ret != NET_RX_DROP)
+		ret = dst_input(skb);
+	return ret;
+}
+
 /*
  * 	Main IP Receive routine.
  */
@@ -501,16 +511,54 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		       ip_rcv_finish);
 }
 
+static void ip_sublist_rcv_finish(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(list)) != NULL)
+		dst_input(skb);
+}
+
+static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+			       struct sk_buff_head *list)
+{
+	struct dst_entry *curr_dst = NULL;
+	struct sk_buff_head sublist;
+	struct sk_buff *skb;
+
+	__skb_queue_head_init(&sublist);
+
+	while ((skb = __skb_dequeue(list)) != NULL) {
+		struct dst_entry *dst;
+
+		if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+			continue;
+
+		dst = skb_dst(skb);
+		if (skb_queue_empty(&sublist)) {
+			curr_dst = dst;
+		} else if (curr_dst != dst) {
+			/* dispatch old sublist */
+			ip_sublist_rcv_finish(&sublist);
+			/* start new sublist */
+			__skb_queue_head_init(&sublist);
+			curr_dst = dst;
+		}
+		/* add to current sublist */
+		__skb_queue_tail(&sublist, skb);
+	}
+	/* dispatch final sublist */
+	ip_sublist_rcv_finish(&sublist);
+}
+
 static void ip_sublist_rcv(struct sk_buff_head *list, struct net_device *dev,
 			   struct net *net)
 {
 	struct sk_buff_head sublist;
-	struct sk_buff *skb;
 
 	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
 		     list, &sublist, dev, NULL, ip_rcv_finish);
-	while ((skb = __skb_dequeue(&sublist)) != NULL)
-		ip_rcv_finish(net, NULL, skb);
+	ip_list_rcv_finish(net, NULL, &sublist);
 }
 
 /* Receive a list of IP packets */

^ permalink raw reply related

* Re: [PATCH net] tcp: Fix SOF_TIMESTAMPING_TX_ACK when handling dup acks
From: Soheil Hassas Yeganeh @ 2016-04-19 13:54 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: netdev, Kernel Team, Eric Dumazet, Neal Cardwell,
	Soheil Hassas Yeganeh, Willem de Bruijn, Yuchung Cheng
In-Reply-To: <1461019193-3034571-1-git-send-email-kafai@fb.com>

On Mon, Apr 18, 2016 at 6:39 PM, Martin KaFai Lau <kafai@fb.com> wrote:
> Assuming SOF_TIMESTAMPING_TX_ACK is on. When dup acks are received,
> it could incorrectly think that a skb has already
> been acked and queue a SCM_TSTAMP_ACK cmsg to the
> sk->sk_error_queue.
>
> In tcp_ack_tstamp(), it checks
> 'between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)'.
> If prior_snd_una == tcp_sk(sk)->snd_una like the following packetdrill
> script, between() returns true but the tskey is actually not acked.
> e.g. try between(3, 2, 1).
>
> The fix is to replace between() with one before() and one !before().
> By doing this, the -1 offset on the tcp_sk(sk)->snd_una can also be
> removed.
>
> A packetdrill script is used to reproduce the dup ack scenario.
> Due to the lacking cmsg support in packetdrill (may be I
> cannot find it),  a BPF prog is used to kprobe to
> sock_queue_err_skb() and print out the value of
> serr->ee.ee_data.
>
> Both the packetdrill and the bcc BPF script is attached at the end of
> this commit message.
>
> BPF Output Before Fix:
> ~~~~~~
>       <...>-2056  [001] d.s.   433.927987: : ee_data:1459  #incorrect
> packetdrill-2056  [001] d.s.   433.929563: : ee_data:1459  #incorrect
> packetdrill-2056  [001] d.s.   433.930765: : ee_data:1459  #incorrect
> packetdrill-2056  [001] d.s.   434.028177: : ee_data:1459
> packetdrill-2056  [001] d.s.   434.029686: : ee_data:14599
>
> BPF Output After Fix:
> ~~~~~~
>       <...>-2049  [000] d.s.   113.517039: : ee_data:1459
>       <...>-2049  [000] d.s.   113.517253: : ee_data:14599
>
> BCC BPF Script:
> ~~~~~~
> #!/usr/bin/env python
>
> from __future__ import print_function
> from bcc import BPF
>
> bpf_text = """
> #include <uapi/linux/ptrace.h>
> #include <net/sock.h>
> #include <bcc/proto.h>
> #include <linux/errqueue.h>
>
> #ifdef memset
> #undef memset
> #endif
>
> int trace_err_skb(struct pt_regs *ctx)
> {
>         struct sk_buff *skb = (struct sk_buff *)ctx->si;
>         struct sock *sk = (struct sock *)ctx->di;
>         struct sock_exterr_skb *serr;
>         u32 ee_data = 0;
>
>         if (!sk || !skb)
>                 return 0;
>
>         serr = SKB_EXT_ERR(skb);
>         bpf_probe_read(&ee_data, sizeof(ee_data), &serr->ee.ee_data);
>         bpf_trace_printk("ee_data:%u\\n", ee_data);
>
>         return 0;
> };
> """
>
> b = BPF(text=bpf_text)
> b.attach_kprobe(event="sock_queue_err_skb", fn_name="trace_err_skb")
> print("Attached to kprobe")
> b.trace_print()
>
> Packetdrill Script:
> ~~~~~~
> +0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10`
> +0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1`
> +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
> +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
> +0 bind(3, ..., ...) = 0
> +0 listen(3, 1) = 0
>
> 0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
> 0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>
> 0.200 < . 1:1(0) ack 1 win 257
> 0.200 accept(3, ..., ...) = 4
> +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
>
> +0 setsockopt(4, SOL_SOCKET, 37, [2688], 4) = 0
> 0.200 write(4, ..., 1460) = 1460
> 0.200 write(4, ..., 13140) = 13140
>
> 0.200 > P. 1:1461(1460) ack 1
> 0.200 > . 1461:8761(7300) ack 1
> 0.200 > P. 8761:14601(5840) ack 1
>
> 0.300 < . 1:1(0) ack 1 win 257 <sack 1461:2921,nop,nop>
> 0.300 < . 1:1(0) ack 1 win 257 <sack 1461:4381,nop,nop>
> 0.300 < . 1:1(0) ack 1 win 257 <sack 1461:5841,nop,nop>
> 0.300 > P. 1:1461(1460) ack 1
> 0.400 < . 1:1(0) ack 14601 win 257
>
> 0.400 close(4) = 0
> 0.400 > F. 14601:14601(0) ack 1
> 0.500 < F. 1:1(0) ack 14602 win 257
> 0.500 > . 14602:14602(0) ack 2
>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Soheil Hassas Yeganeh <soheil.kdev@gmail.com>

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> ---
>  net/ipv4/tcp_input.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index e6e65f7..0edb071 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3098,7 +3098,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
>
>         shinfo = skb_shinfo(skb);
>         if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
> -           between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
> +           !before(shinfo->tskey, prior_snd_una) &&
> +           before(shinfo->tskey, tcp_sk(sk)->snd_una))
>                 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
>  }

Nice catch! Thanks.

> --
> 2.5.1
>

^ permalink raw reply

* [PATCH v2 0/1] drivers: net: cpsw: Fix NULL pointer dereference with two slave PHYs
From: Andrew Goodbody @ 2016-04-19 13:56 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, linux-omap, mugunthanvnm, grygorii.strashko, tony,
	Andrew Goodbody

Resend to add more people on Cc: as requested by Grygrii Strashko.

This is a fix for a NULL pointer dereference from cpsw which is triggered
by having two slave PHYs attached to a cpsw network device. The problem is
due to only maintaining a single reference to a PHY node in the prive data
which gets overwritten by the second PHY probe. So move the PHY node
reference to the individual slave data so that there is now one per slave.

v1 had a problem that data->slaves was used before it had been filled in

Andrew Goodbody (1):
  Prevent NUll pointer dereference with two PHYs on cpsw

 drivers/net/ethernet/ti/cpsw.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

-- 
2.5.0

^ permalink raw reply

* [PATCH v2 1/1] drivers: net: cpsw: Prevent NUll pointer dereference with two PHYs
From: Andrew Goodbody @ 2016-04-19 13:56 UTC (permalink / raw)
  To: netdev
  Cc: linux-kernel, linux-omap, mugunthanvnm, grygorii.strashko, tony,
	Andrew Goodbody
In-Reply-To: <1461074186-25535-1-git-send-email-andrew.goodbody@cambrionix.com>

Adding a 2nd PHY to cpsw results in a NULL pointer dereference
as below. Fix by maintaining a reference to each PHY node in slave
struct instead of a single reference in the priv struct which was
overwritten by the 2nd PHY.

[   17.870933] Unable to handle kernel NULL pointer dereference at virtual address 00000180
[   17.879557] pgd = dc8bc000
[   17.882514] [00000180] *pgd=9c882831, *pte=00000000, *ppte=00000000
[   17.889213] Internal error: Oops: 17 [#1] ARM
[   17.893838] Modules linked in:
[   17.897102] CPU: 0 PID: 1657 Comm: connmand Not tainted 4.5.0-ge463dfb-dirty #11
[   17.904947] Hardware name: Cambrionix whippet
[   17.909576] task: dc859240 ti: dc968000 task.ti: dc968000
[   17.915339] PC is at phy_attached_print+0x18/0x8c
[   17.920339] LR is at phy_attached_info+0x14/0x18
[   17.925247] pc : [<c042baec>]    lr : [<c042bb74>]    psr: 600f0113
[   17.925247] sp : dc969cf8  ip : dc969d28  fp : dc969d18
[   17.937425] r10: dda7a400  r9 : 00000000  r8 : 00000000
[   17.942971] r7 : 00000001  r6 : ddb00480  r5 : ddb8cb34  r4 : 00000000
[   17.949898] r3 : c0954cc0  r2 : c09562b0  r1 : 00000000  r0 : 00000000
[   17.956829] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   17.964401] Control: 10c5387d  Table: 9c8bc019  DAC: 00000051
[   17.970500] Process connmand (pid: 1657, stack limit = 0xdc968210)
[   17.977059] Stack: (0xdc969cf8 to 0xdc96a000)
[   17.981692] 9ce0:                                                       dc969d28 dc969d08
[   17.990386] 9d00: c038f9bc c038f6b4 ddb00480 dc969d34 dc969d28 c042bb74 c042bae4 00000000
[   17.999080] 9d20: c09562b0 c0954cc0 dc969d5c dc969d38 c043ebfc c042bb6c 00000007 00000003
[   18.007773] 9d40: ddb00000 ddb8cb58 ddb00480 00000001 dc969dec dc969d60 c0441614 c043ea68
[   18.016465] 9d60: 00000000 00000003 00000000 fffffff4 dc969df4 0000000d 00000000 00000000
[   18.025159] 9d80: dc969db4 dc969d90 c005dc08 c05839e0 dc969df4 0000000d ddb00000 00001002
[   18.033851] 9da0: 00000000 00000000 dc969dcc dc969db8 c005ddf4 c005dbc8 00000000 00000118
[   18.042544] 9dc0: dc969dec dc969dd0 ddb00000 c06db27c ffff9003 00001002 00000000 00000000
[   18.051237] 9de0: dc969e0c dc969df0 c057c88c c04410dc dc969e0c ddb00000 ddb00000 00000001
[   18.059930] 9e00: dc969e34 dc969e10 c057cb44 c057c7d8 ddb00000 ddb00138 00001002 beaeda20
[   18.068622] 9e20: 00000000 00000000 dc969e5c dc969e38 c057cc28 c057cac0 00000000 dc969e80
[   18.077315] 9e40: dda7a40c beaeda20 00000000 00000000 dc969ecc dc969e60 c05e36d0 c057cc14
[   18.086007] 9e60: dc969e84 00000051 beaeda20 00000000 dda7a40c 00000014 ddb00000 00008914
[   18.094699] 9e80: 30687465 00000000 00000000 00000000 00009003 00000000 00000000 00000000
[   18.103391] 9ea0: 00001002 00008914 dd257ae0 beaeda20 c098a428 beaeda20 00000011 00000000
[   18.112084] 9ec0: dc969edc dc969ed0 c05e4e54 c05e3030 dc969efc dc969ee0 c055f5ac c05e4cc4
[   18.120777] 9ee0: beaeda20 dd257ae0 dc8ab4c0 00008914 dc969f7c dc969f00 c010b388 c055f45c
[   18.129471] 9f00: c071ca40 dd257ac0 c00165e8 dc968000 dc969f3c dc969f20 dc969f64 dc969f28
[   18.138164] 9f20: c0115708 c0683ec8 dd257ac0 dd257ac0 dc969f74 dc969f40 c055f350 c00fc66c
[   18.146857] 9f40: dd82e4d0 00000011 00000000 00080000 dd257ac0 00000000 dc8ab4c0 dc8ab4c0
[   18.155550] 9f60: 00008914 beaeda20 00000011 00000000 dc969fa4 dc969f80 c010bc34 c010b2fc
[   18.164242] 9f80: 00000000 00000011 00000002 00000036 c00165e8 dc968000 00000000 dc969fa8
[   18.172935] 9fa0: c00163e0 c010bbcc 00000000 00000011 00000011 00008914 beaeda20 00009003
[   18.181628] 9fc0: 00000000 00000011 00000002 00000036 00081018 00000001 00000000 beaedc10
[   18.190320] 9fe0: 00083188 beaeda1c 00043a5d b6d29c0c 600b0010 00000011 00000000 00000000
[   18.198989] Backtrace:
[   18.201621] [<c042bad8>] (phy_attached_print) from [<c042bb74>] (phy_attached_info+0x14/0x18)
[   18.210664]  r3:c0954cc0 r2:c09562b0 r1:00000000
[   18.215588]  r4:ddb00480
[   18.218322] [<c042bb60>] (phy_attached_info) from [<c043ebfc>] (cpsw_slave_open+0x1a0/0x280)
[   18.227293] [<c043ea5c>] (cpsw_slave_open) from [<c0441614>] (cpsw_ndo_open+0x544/0x674)
[   18.235874]  r7:00000001 r6:ddb00480 r5:ddb8cb58 r4:ddb00000
[   18.241944] [<c04410d0>] (cpsw_ndo_open) from [<c057c88c>] (__dev_open+0xc0/0x128)
[   18.249972]  r9:00000000 r8:00000000 r7:00001002 r6:ffff9003 r5:c06db27c r4:ddb00000
[   18.258255] [<c057c7cc>] (__dev_open) from [<c057cb44>] (__dev_change_flags+0x90/0x154)
[   18.266745]  r5:00000001 r4:ddb00000
[   18.270575] [<c057cab4>] (__dev_change_flags) from [<c057cc28>] (dev_change_flags+0x20/0x50)
[   18.279523]  r9:00000000 r8:00000000 r7:beaeda20 r6:00001002 r5:ddb00138 r4:ddb00000
[   18.287811] [<c057cc08>] (dev_change_flags) from [<c05e36d0>] (devinet_ioctl+0x6ac/0x76c)
[   18.296483]  r9:00000000 r8:00000000 r7:beaeda20 r6:dda7a40c r5:dc969e80 r4:00000000
[   18.304762] [<c05e3024>] (devinet_ioctl) from [<c05e4e54>] (inet_ioctl+0x19c/0x1c8)
[   18.312882]  r10:00000000 r9:00000011 r8:beaeda20 r7:c098a428 r6:beaeda20 r5:dd257ae0
[   18.321235]  r4:00008914
[   18.323956] [<c05e4cb8>] (inet_ioctl) from [<c055f5ac>] (sock_ioctl+0x15c/0x2d8)
[   18.331829] [<c055f450>] (sock_ioctl) from [<c010b388>] (do_vfs_ioctl+0x98/0x8d0)
[   18.339765]  r7:00008914 r6:dc8ab4c0 r5:dd257ae0 r4:beaeda20
[   18.345822] [<c010b2f0>] (do_vfs_ioctl) from [<c010bc34>] (SyS_ioctl+0x74/0x84)
[   18.353573]  r10:00000000 r9:00000011 r8:beaeda20 r7:00008914 r6:dc8ab4c0 r5:dc8ab4c0
[   18.361924]  r4:00000000
[   18.364653] [<c010bbc0>] (SyS_ioctl) from [<c00163e0>] (ret_fast_syscall+0x0/0x3c)
[   18.372682]  r9:dc968000 r8:c00165e8 r7:00000036 r6:00000002 r5:00000011 r4:00000000
[   18.380960] Code: e92dd810 e24cb010 e24dd010 e59b4004 (e5902180)
[   18.387580] ---[ end trace c80529466223f3f3 ]---

Signed-off-by: Andrew Goodbody <andrew.goodbody@cambrionix.com>
---

v2 - Move allocation of memory for priv->slaves to inside cpsw_probe_dt so it
     has data->slaves initialised first which is needed to calculate size

 drivers/net/ethernet/ti/cpsw.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 42fdfd4..e62909c 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -349,6 +349,7 @@ struct cpsw_slave {
 	struct cpsw_slave_data		*data;
 	struct phy_device		*phy;
 	struct net_device		*ndev;
+	struct device_node		*phy_node;
 	u32				port_vlan;
 	u32				open_stat;
 };
@@ -367,7 +368,6 @@ struct cpsw_priv {
 	spinlock_t			lock;
 	struct platform_device		*pdev;
 	struct net_device		*ndev;
-	struct device_node		*phy_node;
 	struct napi_struct		napi_rx;
 	struct napi_struct		napi_tx;
 	struct device			*dev;
@@ -1148,8 +1148,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
 				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
-	if (priv->phy_node)
-		slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+	if (slave->phy_node)
+		slave->phy = of_phy_connect(priv->ndev, slave->phy_node,
 				 &cpsw_adjust_link, 0, slave->data->phy_if);
 	else
 		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
@@ -1946,7 +1946,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
 	struct device_node *node = pdev->dev.of_node;
 	struct device_node *slave_node;
 	struct cpsw_platform_data *data = &priv->data;
-	int i = 0, ret;
+	int i, ret;
 	u32 prop;
 
 	if (!node)
@@ -1958,6 +1958,14 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
 	}
 	data->slaves = prop;
 
+	priv->slaves = devm_kzalloc(&pdev->dev,
+				    sizeof(struct cpsw_slave) * data->slaves,
+				    GFP_KERNEL);
+	if (!priv->slaves)
+		return -ENOMEM;
+	for (i = 0; i < data->slaves; i++)
+		priv->slaves[i].slave_num = i;
+
 	if (of_property_read_u32(node, "active_slave", &prop)) {
 		dev_err(&pdev->dev, "Missing active_slave property in the DT.\n");
 		return -EINVAL;
@@ -2023,6 +2031,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
 	if (ret)
 		dev_warn(&pdev->dev, "Doesn't have any child node\n");
 
+	i = 0;
 	for_each_child_of_node(node, slave_node) {
 		struct cpsw_slave_data *slave_data = data->slave_data + i;
 		const void *mac_addr = NULL;
@@ -2033,7 +2042,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
 		if (strcmp(slave_node->name, "slave"))
 			continue;
 
-		priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
+		priv->slaves[i].phy_node =
+			of_parse_phandle(slave_node, "phy-handle", 0);
 		parp = of_get_property(slave_node, "phy_id", &lenp);
 		if (of_phy_is_fixed_link(slave_node)) {
 			struct device_node *phy_node;
@@ -2292,16 +2302,6 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 
-	priv->slaves = devm_kzalloc(&pdev->dev,
-				    sizeof(struct cpsw_slave) * data->slaves,
-				    GFP_KERNEL);
-	if (!priv->slaves) {
-		ret = -ENOMEM;
-		goto clean_runtime_disable_ret;
-	}
-	for (i = 0; i < data->slaves; i++)
-		priv->slaves[i].slave_num = i;
-
 	priv->slaves[0].ndev = ndev;
 	priv->emac_port = 0;
 
-- 
2.5.0

^ permalink raw reply related

* Re: [PATCHv2] wlcore: spi: add wl18xx support
From: Arnd Bergmann @ 2016-04-19 14:21 UTC (permalink / raw)
  To: Reizer, Eyal
  Cc: Kalle Valo, Eyal Reizer, linux-wireless@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	devicetree@vger.kernel.org, linux-spi@vger.kernel.org
In-Reply-To: <8665E2433BC68541A24DFFCA87B70F5B360C0745@DFRE01.ent.ti.com>

On Tuesday 19 April 2016 09:05:45 Reizer, Eyal wrote:
> > > It is also part of the generic spi.h (include/Linux/spi/spi.h),
> > > already part of " struct spi_device" So it seemed redundant adding
> > > another mechanism for implementing the same.
> > > Platform that interact with a wilink need to use it, and platforms
> > > that don't have this capability will probably not interact with a wilink device
> > using SPI.
> > 
> > The cs_gpio field in spi_device belongs to the spi host controller, no other
> > slave driver uses it.
> > 
> > I wasn't asking for a duplication of this mechanism, but an interface to use it
> > properly. Internally, the spi core uses the spi_set_cs() function to pick a CS.
> > Find a way to use that rather than reimplementing it incorrectly.
> > 
> 
> Understood. As this special CS manipulation is unique to wspi (wilink spi)  I think the 
> best option is to move this gpio allocation into wlcore_spi as a new device tree entry
> used only by this driver.
> If you agree I will submit a v3.

I don't think that can work either: aside of not solving the problem
of wilink devices on spi controllers that don't use gpio, it also doesn't
solve the problem of what happens when the driver manually triggers the
gpio to hold the CS signal while another driver talks to a different
device using another CS on the same controller.

	Arnd

^ permalink raw reply

* [patch -next] geneve: testing the wrong variable in geneve6_build_skb()
From: Dan Carpenter @ 2016-04-19 14:30 UTC (permalink / raw)
  To: David S. Miller, Alexander Duyck
  Cc: Jesse Gross, John W. Linville, Pravin B Shelar, Jiri Benc,
	Daniel Borkmann, Tom Herbert, netdev, kernel-janitors

We intended to test "err" and not "skb".

Fixes: aed069df099c ('ip_tunnel_core: iptunnel_handle_offloads returns int and doesn't free skb')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index efbc7ce..512dbe0 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -733,7 +733,7 @@ static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb,
 		goto free_dst;
 
 	err = udp_tunnel_handle_offloads(skb, udp_sum);
-	if (IS_ERR(skb))
+	if (err)
 		goto free_dst;
 
 	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);

^ permalink raw reply related

* RE: [PATCHv2] wlcore: spi: add wl18xx support
From: Reizer, Eyal @ 2016-04-19 14:35 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Kalle Valo, Eyal Reizer,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-spi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <4133521.UUI3B1RTyv@wuerfel>

> > > > It is also part of the generic spi.h (include/Linux/spi/spi.h),
> > > > already part of " struct spi_device" So it seemed redundant adding
> > > > another mechanism for implementing the same.
> > > > Platform that interact with a wilink need to use it, and platforms
> > > > that don't have this capability will probably not interact with a
> > > > wilink device
> > > using SPI.
> > >
> > > The cs_gpio field in spi_device belongs to the spi host controller,
> > > no other slave driver uses it.
> > >
> > > I wasn't asking for a duplication of this mechanism, but an
> > > interface to use it properly. Internally, the spi core uses the spi_set_cs()
> function to pick a CS.
> > > Find a way to use that rather than reimplementing it incorrectly.
> > >
> >
> > Understood. As this special CS manipulation is unique to wspi (wilink
> > spi)  I think the best option is to move this gpio allocation into
> > wlcore_spi as a new device tree entry used only by this driver.
> > If you agree I will submit a v3.
> 
> I don't think that can work either: aside of not solving the problem of wilink
> devices on spi controllers that don't use gpio, it also doesn't solve the
> problem of what happens when the driver manually triggers the gpio to hold
> the CS signal while another driver talks to a different device using another CS
> on the same controller.
> 
Ok, understood. Will look into it.

Best Regards,
Eyal
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next V3 2/2] intel: ixgbevf: Support Windows hosts (Hyper-V)
From: kbuild test robot @ 2016-04-19 14:36 UTC (permalink / raw)
  To: K. Y. Srinivasan
  Cc: olaf, netdev, jasowang, linux-kernel, alexander.duyck, jackm,
	yevgenyp, john.ronciak, intel-wired-lan, kbuild-all, eli, apw,
	devel, davem
In-Reply-To: <1461080968-25235-2-git-send-email-kys@microsoft.com>

[-- Attachment #1: Type: text/plain, Size: 1333 bytes --]

Hi,

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/K-Y-Srinivasan/ethernet-intel-Add-the-device-ID-s-presented-while-running-on-Hyper-V/20160419-221508
config: i386-randconfig-s0-201616 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/intel/ixgbevf/vf.c: In function 'ixgbevf_hv_reset_hw_vf':
>> drivers/net/ethernet/intel/ixgbevf/vf.c:142:6: warning: unused variable 'i' [-Wunused-variable]
     int i;
         ^
>> drivers/net/ethernet/intel/ixgbevf/vf.c:141:26: warning: unused variable 'adapter' [-Wunused-variable]
     struct ixgbevf_adapter *adapter = hw->back;
                             ^

vim +/i +142 drivers/net/ethernet/intel/ixgbevf/vf.c

   135	/**
   136	 * Hyper-V variant; the VF/PF communication is through the PCI
   137	 * config space.
   138	 */
   139	static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw)
   140	{
 > 141		struct ixgbevf_adapter *adapter = hw->back;
 > 142		int i;
   143	
   144	#if IS_ENABLED(CONFIG_PCI_MMCONFIG)
   145		for (i = 0; i < 6; i++)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 27643 bytes --]

[-- Attachment #3: Type: text/plain, Size: 169 bytes --]

_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply

* Re: [PATCH v2 1/1] drivers: net: cpsw: Prevent NUll pointer dereference with two PHYs
From: Grygorii Strashko @ 2016-04-19 14:41 UTC (permalink / raw)
  To: Andrew Goodbody, netdev, David S. Miller
  Cc: linux-kernel, linux-omap, mugunthanvnm, tony
In-Reply-To: <1461074186-25535-2-git-send-email-andrew.goodbody@cambrionix.com>

Hi,

On 04/19/2016 04:56 PM, Andrew Goodbody wrote:
> Adding a 2nd PHY to cpsw results in a NULL pointer dereference
> as below. Fix by maintaining a reference to each PHY node in slave
> struct instead of a single reference in the priv struct which was
> overwritten by the 2nd PHY.

David, Is it possible to drop prev version of this patch from linux-next
- it breaks boot on many TI boards with -next.


> 
> [   17.870933] Unable to handle kernel NULL pointer dereference at virtual address 00000180
> [   17.879557] pgd = dc8bc000
> [   17.882514] [00000180] *pgd=9c882831, *pte=00000000, *ppte=00000000
> [   17.889213] Internal error: Oops: 17 [#1] ARM
> [   17.893838] Modules linked in:
> [   17.897102] CPU: 0 PID: 1657 Comm: connmand Not tainted 4.5.0-ge463dfb-dirty #11
> [   17.904947] Hardware name: Cambrionix whippet
> [   17.909576] task: dc859240 ti: dc968000 task.ti: dc968000
> [   17.915339] PC is at phy_attached_print+0x18/0x8c
> [   17.920339] LR is at phy_attached_info+0x14/0x18
> [   17.925247] pc : [<c042baec>]    lr : [<c042bb74>]    psr: 600f0113
> [   17.925247] sp : dc969cf8  ip : dc969d28  fp : dc969d18
> [   17.937425] r10: dda7a400  r9 : 00000000  r8 : 00000000
> [   17.942971] r7 : 00000001  r6 : ddb00480  r5 : ddb8cb34  r4 : 00000000
> [   17.949898] r3 : c0954cc0  r2 : c09562b0  r1 : 00000000  r0 : 00000000
> [   17.956829] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> [   17.964401] Control: 10c5387d  Table: 9c8bc019  DAC: 00000051
> [   17.970500] Process connmand (pid: 1657, stack limit = 0xdc968210)
> [   17.977059] Stack: (0xdc969cf8 to 0xdc96a000)

[...]

> [   18.323956] [<c05e4cb8>] (inet_ioctl) from [<c055f5ac>] (sock_ioctl+0x15c/0x2d8)
> [   18.331829] [<c055f450>] (sock_ioctl) from [<c010b388>] (do_vfs_ioctl+0x98/0x8d0)
> [   18.339765]  r7:00008914 r6:dc8ab4c0 r5:dd257ae0 r4:beaeda20
> [   18.345822] [<c010b2f0>] (do_vfs_ioctl) from [<c010bc34>] (SyS_ioctl+0x74/0x84)
> [   18.353573]  r10:00000000 r9:00000011 r8:beaeda20 r7:00008914 r6:dc8ab4c0 r5:dc8ab4c0
> [   18.361924]  r4:00000000
> [   18.364653] [<c010bbc0>] (SyS_ioctl) from [<c00163e0>] (ret_fast_syscall+0x0/0x3c)
> [   18.372682]  r9:dc968000 r8:c00165e8 r7:00000036 r6:00000002 r5:00000011 r4:00000000
> [   18.380960] Code: e92dd810 e24cb010 e24dd010 e59b4004 (e5902180)
> [   18.387580] ---[ end trace c80529466223f3f3 ]---

^ Could you make it shorter and drop timestamps, pls?

> 
> Signed-off-by: Andrew Goodbody <andrew.goodbody@cambrionix.com>
> ---
> 
> v2 - Move allocation of memory for priv->slaves to inside cpsw_probe_dt so it
>       has data->slaves initialised first which is needed to calculate size
> 
>   drivers/net/ethernet/ti/cpsw.c | 30 +++++++++++++++---------------
>   1 file changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index 42fdfd4..e62909c 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -349,6 +349,7 @@ struct cpsw_slave {
>   	struct cpsw_slave_data		*data;
>   	struct phy_device		*phy;
>   	struct net_device		*ndev;
> +	struct device_node		*phy_node;
>   	u32				port_vlan;
>   	u32				open_stat;
>   };
> @@ -367,7 +368,6 @@ struct cpsw_priv {
>   	spinlock_t			lock;
>   	struct platform_device		*pdev;
>   	struct net_device		*ndev;
> -	struct device_node		*phy_node;
>   	struct napi_struct		napi_rx;
>   	struct napi_struct		napi_tx;
>   	struct device			*dev;
> @@ -1148,8 +1148,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
>   		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
>   				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
>   
> -	if (priv->phy_node)
> -		slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
> +	if (slave->phy_node)
> +		slave->phy = of_phy_connect(priv->ndev, slave->phy_node,
>   				 &cpsw_adjust_link, 0, slave->data->phy_if);
>   	else
>   		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
> @@ -1946,7 +1946,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>   	struct device_node *node = pdev->dev.of_node;
>   	struct device_node *slave_node;
>   	struct cpsw_platform_data *data = &priv->data;
> -	int i = 0, ret;
> +	int i, ret;
>   	u32 prop;
>   
>   	if (!node)
> @@ -1958,6 +1958,14 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>   	}
>   	data->slaves = prop;
>   
> +	priv->slaves = devm_kzalloc(&pdev->dev,
> +				    sizeof(struct cpsw_slave) * data->slaves,
> +				    GFP_KERNEL);
> +	if (!priv->slaves)
> +		return -ENOMEM;
> +	for (i = 0; i < data->slaves; i++)
> +		priv->slaves[i].slave_num = i;
> +
>   	if (of_property_read_u32(node, "active_slave", &prop)) {
>   		dev_err(&pdev->dev, "Missing active_slave property in the DT.\n");
>   		return -EINVAL;
> @@ -2023,6 +2031,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>   	if (ret)
>   		dev_warn(&pdev->dev, "Doesn't have any child node\n");
>   
> +	i = 0;
>   	for_each_child_of_node(node, slave_node) {
>   		struct cpsw_slave_data *slave_data = data->slave_data + i;
>   		const void *mac_addr = NULL;
> @@ -2033,7 +2042,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>   		if (strcmp(slave_node->name, "slave"))
>   			continue;
>   
> -		priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
> +		priv->slaves[i].phy_node =
> +			of_parse_phandle(slave_node, "phy-handle", 0);

i++?

Ideally, the simplest way is to save phy_node in slave_data, but ...
(see comment below).


>   		parp = of_get_property(slave_node, "phy_id", &lenp);
>   		if (of_phy_is_fixed_link(slave_node)) {
>   			struct device_node *phy_node;
> @@ -2292,16 +2302,6 @@ static int cpsw_probe(struct platform_device *pdev)
>   
>   	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
>   
> -	priv->slaves = devm_kzalloc(&pdev->dev,
> -				    sizeof(struct cpsw_slave) * data->slaves,
> -				    GFP_KERNEL);
> -	if (!priv->slaves) {
> -		ret = -ENOMEM;
> -		goto clean_runtime_disable_ret;
> -	}
I don't think you can move this out from here - it will break legacy boot :(


> -	for (i = 0; i < data->slaves; i++)
> -		priv->slaves[i].slave_num = i;

Personally, I see only one safe way to do it without big rework -
do second pass of DT parsing here to fill phy_node field.



> -
>   	priv->slaves[0].ndev = ndev;
>   	priv->emac_port = 0;
>   
> 


-- 
regards,
-grygorii

^ permalink raw reply

* Re: [RFC PATCH net-next 2/8] sfc: batch up RX delivery on EF10
From: Eric Dumazet @ 2016-04-19 14:47 UTC (permalink / raw)
  To: Edward Cree
  Cc: netdev, David Miller, Jesper Dangaard Brouer, linux-net-drivers
In-Reply-To: <57163404.2000507@solarflare.com>

On Tue, 2016-04-19 at 14:35 +0100, Edward Cree wrote:
> Improves packet rate of 1-byte UDP receives by 10%.

Sure, by adding yet another queue and extra latencies.

If the switch delivered a high prio packet to your host right before a
train of 60 low prio packets, this is not to allow us to wait the end of
the train.

We have to really invent something better, like a real pipeline, instead
of hacks like this, adding complexity everywhere.

Have you tested this on cpus with tiny caches, like 32KB ?

^ permalink raw reply

* Re: [PATCH] rtl8xxxu: hide unused tables
From: Jes Sorensen @ 2016-04-19 14:49 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Kalle Valo, Jakub Sitnicki, linux-wireless, netdev, linux-kernel
In-Reply-To: <1461016782-2640046-1-git-send-email-arnd@arndb.de>

Arnd Bergmann <arnd@arndb.de> writes:
> The references to some arrays in the rtl8xxxu driver were moved inside
> of an #ifdef, but the symbols remain outside, resulting in build warnings:
>
> rtl8xxxu/rtl8xxxu.c:1506:33: error: 'rtl8188ru_radioa_1t_highpa_table' defined but not used
> rtl8xxxu/rtl8xxxu.c:1431:33: error: 'rtl8192cu_radioa_1t_init_table' defined but not used
> rtl8xxxu/rtl8xxxu.c:1407:33: error: 'rtl8192cu_radiob_2t_init_table' defined but not used
> rtl8xxxu/rtl8xxxu.c:1332:33: error: 'rtl8192cu_radioa_2t_init_table' defined but not used
> rtl8xxxu/rtl8xxxu.c:239:35: error: 'rtl8192c_power_base' defined but not used
> rtl8xxxu/rtl8xxxu.c:217:35: error: 'rtl8188r_power_base' defined but not used
>
> This adds an extra #ifdef around them to shut up the warnings.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> Fixes: 2fc0b8e5a17d ("rtl8xxxu: Add TX power base values for gen1 parts")
> Fixes: 4062b8ffec36 ("rtl8xxxu: Move PHY RF init into device specific functions")
> ---
>  drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.c | 4 ++++
>  1 file changed, 4 insertions(+)

I'll apply it to my tree!

Thanks,
Jes

^ permalink raw reply

* Re: [RFC PATCH net-next 7/8] net: ipv4: listified version of ip_rcv
From: Eric Dumazet @ 2016-04-19 14:50 UTC (permalink / raw)
  To: Edward Cree
  Cc: netdev, David Miller, Jesper Dangaard Brouer, linux-net-drivers
In-Reply-To: <5716347D.3030808@solarflare.com>

On Tue, 2016-04-19 at 14:37 +0100, Edward Cree wrote:
> Also involved adding a way to run a netfilter hook over a list of packets.
> Rather than attempting to make netfilter know about lists (which would be
> horrendous) we just let it call the regular okfn (in this case
> ip_rcv_finish()) for any packets it steals, and have it give us back a list
> of packets it's synchronously accepted (which normally NF_HOOK would
> automatically call okfn() on, but we want to be able to potentially pass
> the list to a listified version of okfn().)
> 
> There is potential for out-of-order receives if the netfilter hook ends up
> synchronously stealing packets, as they will be processed before any accepts
> earlier in the list.  However, it was already possible for an asynchronous
> accept to cause out-of-order receives, so hopefully I haven't broken
> anything that wasn't broken already.
> 
> Signed-off-by: Edward Cree <ecree@solarflare.com>
> ---

We have hard time to deal with latencies already, and maintaining some
sanity in the stack(s)

This is not going to give us a 10x or even 2x improvement factor, so
what about working on something that would really lower cache line
misses and use pipelines to amortize the costs ?

The main problem in UDP stack today is having to lock the socket because
of the dumb forward allocation problem. Are you really going to provide
a list of skbs up to _one_ UDP socket ?

^ permalink raw reply

* Re: [iproute PATCH 0/2] Minor ss filter fix and review
From: Stephen Hemminger @ 2016-04-19 14:57 UTC (permalink / raw)
  To: Phil Sutter; +Cc: Vadim Kochan, netdev
In-Reply-To: <1460578025-12224-1-git-send-email-phil@nwl.cc>

On Wed, 13 Apr 2016 22:07:03 +0200
Phil Sutter <phil@nwl.cc> wrote:

> While looking for a solution to the problem described in patch 2/2, I
> discovered the overly complicated assignment in filter_states_set() which
> is simplified in patch 1/2.
> 
> Phil Sutter (2):
>   ss: Drop silly assignment
>   ss: Fix accidental state filter override
> 
>  misc/ss.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 

Applied

^ permalink raw reply

* Re: [PATCH iproute2] ss: take care of unknown min_rtt
From: Stephen Hemminger @ 2016-04-19 14:57 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1460585918.10638.42.camel@edumazet-glaptop3.roam.corp.google.com>

On Wed, 13 Apr 2016 15:18:38 -0700
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> From: Eric Dumazet <edumazet@google.com>
> 
> Kernel sets info->tcpi_min_rtt to ~0U when no RTT sample was ever
> taken for the session, thus min_rtt is unknown.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  misc/ss.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 


Applied

^ permalink raw reply

* Re: [PATCH iproute2] ip: neigh: Fix leftover attributes message during flush
From: Stephen Hemminger @ 2016-04-19 14:59 UTC (permalink / raw)
  To: Jeff Harris; +Cc: netdev
In-Reply-To: <1460657703-8222-1-git-send-email-jefftharris@gmail.com>

On Thu, 14 Apr 2016 14:15:03 -0400
Jeff Harris <jefftharris@gmail.com> wrote:

> Use the same rtnl_dump_request_n call as the show.  The rtnl_wilddump_request
> assumes the type uses an ifinfomsg which is not the case for the neighbor
> table.
> 
> Signed-off-by: Jeff Harris <jefftharris@gmail.com>
> ---
>  ip/ipneigh.c |    6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)

Applied thanks

^ permalink raw reply

* Re: [patch iproute2 00/11] devlink: add support for shared buffer configuration and control
From: Stephen Hemminger @ 2016-04-19 15:01 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, idosch, eladr, yotamg, ogerlitz, roopa, nikolay,
	jhs, john.fastabend, rami.rosen, gospo, sfeldma
In-Reply-To: <1460706713-5942-1-git-send-email-jiri@resnulli.us>

On Fri, 15 Apr 2016 09:51:42 +0200
Jiri Pirko <jiri@resnulli.us> wrote:

> From: Jiri Pirko <jiri@mellanox.com>
> 
> Jiri Pirko (11):
>   devlink: fix "devlink port" help message
>   list: add list_for_each_entry_reverse macro
>   list: add list_add_tail helper
>   devlink: introduce pr_out_port_handle helper
>   devlink: introduce helper to print out nice names (ifnames)
>   devlink: split dl_argv_parse_put to parse and put parts
>   devlink: introduce dump filtering function
>   devlink: allow to parse both devlink and port handle in the same time
>   devlink: implement shared buffer support
>   devlink: implement shared buffer occupancy control
>   devlink: add manpage for shared buffer
> 
>  devlink/devlink.c          | 1310 +++++++++++++++++++++++++++++++++++++++++---
>  include/linux/devlink.h    |   63 +++
>  include/list.h             |   16 +
>  man/man8/devlink-dev.8     |    2 +
>  man/man8/devlink-monitor.8 |    1 +
>  man/man8/devlink-port.8    |    2 +
>  man/man8/devlink-sb.8      |  313 +++++++++++
>  man/man8/devlink.8         |    5 +
>  8 files changed, 1636 insertions(+), 76 deletions(-)
>  create mode 100644 man/man8/devlink-sb.8
> 

Applied

^ permalink raw reply

* Re: [PATCH v2 1/1] drivers: net: cpsw: Prevent NUll pointer dereference with two PHYs
From: David Rivshin (Allworx) @ 2016-04-19 15:01 UTC (permalink / raw)
  To: Grygorii Strashko
  Cc: Andrew Goodbody, netdev, David S. Miller, linux-kernel,
	linux-omap, mugunthanvnm, tony
In-Reply-To: <57164383.6080103@ti.com>

On Tue, 19 Apr 2016 17:41:07 +0300
Grygorii Strashko <grygorii.strashko@ti.com> wrote:

> Hi,
> 
> On 04/19/2016 04:56 PM, Andrew Goodbody wrote:
> > Adding a 2nd PHY to cpsw results in a NULL pointer dereference
> > as below. Fix by maintaining a reference to each PHY node in slave
> > struct instead of a single reference in the priv struct which was
> > overwritten by the 2nd PHY.  
> 
> David, Is it possible to drop prev version of this patch from linux-next
> - it breaks boot on many TI boards with -next.
> 
> 
> > 
> > [   17.870933] Unable to handle kernel NULL pointer dereference at virtual address 00000180
> > [   17.879557] pgd = dc8bc000
> > [   17.882514] [00000180] *pgd=9c882831, *pte=00000000, *ppte=00000000
> > [   17.889213] Internal error: Oops: 17 [#1] ARM
> > [   17.893838] Modules linked in:
> > [   17.897102] CPU: 0 PID: 1657 Comm: connmand Not tainted 4.5.0-ge463dfb-dirty #11
> > [   17.904947] Hardware name: Cambrionix whippet
> > [   17.909576] task: dc859240 ti: dc968000 task.ti: dc968000
> > [   17.915339] PC is at phy_attached_print+0x18/0x8c
> > [   17.920339] LR is at phy_attached_info+0x14/0x18
> > [   17.925247] pc : [<c042baec>]    lr : [<c042bb74>]    psr: 600f0113
> > [   17.925247] sp : dc969cf8  ip : dc969d28  fp : dc969d18
> > [   17.937425] r10: dda7a400  r9 : 00000000  r8 : 00000000
> > [   17.942971] r7 : 00000001  r6 : ddb00480  r5 : ddb8cb34  r4 : 00000000
> > [   17.949898] r3 : c0954cc0  r2 : c09562b0  r1 : 00000000  r0 : 00000000
> > [   17.956829] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > [   17.964401] Control: 10c5387d  Table: 9c8bc019  DAC: 00000051
> > [   17.970500] Process connmand (pid: 1657, stack limit = 0xdc968210)
> > [   17.977059] Stack: (0xdc969cf8 to 0xdc96a000)  
> 
> [...]
> 
> > [   18.323956] [<c05e4cb8>] (inet_ioctl) from [<c055f5ac>] (sock_ioctl+0x15c/0x2d8)
> > [   18.331829] [<c055f450>] (sock_ioctl) from [<c010b388>] (do_vfs_ioctl+0x98/0x8d0)
> > [   18.339765]  r7:00008914 r6:dc8ab4c0 r5:dd257ae0 r4:beaeda20
> > [   18.345822] [<c010b2f0>] (do_vfs_ioctl) from [<c010bc34>] (SyS_ioctl+0x74/0x84)
> > [   18.353573]  r10:00000000 r9:00000011 r8:beaeda20 r7:00008914 r6:dc8ab4c0 r5:dc8ab4c0
> > [   18.361924]  r4:00000000
> > [   18.364653] [<c010bbc0>] (SyS_ioctl) from [<c00163e0>] (ret_fast_syscall+0x0/0x3c)
> > [   18.372682]  r9:dc968000 r8:c00165e8 r7:00000036 r6:00000002 r5:00000011 r4:00000000
> > [   18.380960] Code: e92dd810 e24cb010 e24dd010 e59b4004 (e5902180)
> > [   18.387580] ---[ end trace c80529466223f3f3 ]---  
> 
> ^ Could you make it shorter and drop timestamps, pls?
> 
> > 
> > Signed-off-by: Andrew Goodbody <andrew.goodbody@cambrionix.com>
> > ---
> > 
> > v2 - Move allocation of memory for priv->slaves to inside cpsw_probe_dt so it
> >       has data->slaves initialised first which is needed to calculate size
> > 
> >   drivers/net/ethernet/ti/cpsw.c | 30 +++++++++++++++---------------
> >   1 file changed, 15 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> > index 42fdfd4..e62909c 100644
> > --- a/drivers/net/ethernet/ti/cpsw.c
> > +++ b/drivers/net/ethernet/ti/cpsw.c
> > @@ -349,6 +349,7 @@ struct cpsw_slave {
> >   	struct cpsw_slave_data		*data;
> >   	struct phy_device		*phy;
> >   	struct net_device		*ndev;
> > +	struct device_node		*phy_node;
> >   	u32				port_vlan;
> >   	u32				open_stat;
> >   };
> > @@ -367,7 +368,6 @@ struct cpsw_priv {
> >   	spinlock_t			lock;
> >   	struct platform_device		*pdev;
> >   	struct net_device		*ndev;
> > -	struct device_node		*phy_node;
> >   	struct napi_struct		napi_rx;
> >   	struct napi_struct		napi_tx;
> >   	struct device			*dev;
> > @@ -1148,8 +1148,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
> >   		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
> >   				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
> >   
> > -	if (priv->phy_node)
> > -		slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
> > +	if (slave->phy_node)
> > +		slave->phy = of_phy_connect(priv->ndev, slave->phy_node,
> >   				 &cpsw_adjust_link, 0, slave->data->phy_if);
> >   	else
> >   		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
> > @@ -1946,7 +1946,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
> >   	struct device_node *node = pdev->dev.of_node;
> >   	struct device_node *slave_node;
> >   	struct cpsw_platform_data *data = &priv->data;
> > -	int i = 0, ret;
> > +	int i, ret;
> >   	u32 prop;
> >   
> >   	if (!node)
> > @@ -1958,6 +1958,14 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
> >   	}
> >   	data->slaves = prop;
> >   
> > +	priv->slaves = devm_kzalloc(&pdev->dev,
> > +				    sizeof(struct cpsw_slave) * data->slaves,
> > +				    GFP_KERNEL);
> > +	if (!priv->slaves)
> > +		return -ENOMEM;
> > +	for (i = 0; i < data->slaves; i++)
> > +		priv->slaves[i].slave_num = i;
> > +
> >   	if (of_property_read_u32(node, "active_slave", &prop)) {
> >   		dev_err(&pdev->dev, "Missing active_slave property in the DT.\n");
> >   		return -EINVAL;
> > @@ -2023,6 +2031,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
> >   	if (ret)
> >   		dev_warn(&pdev->dev, "Doesn't have any child node\n");
> >   
> > +	i = 0;
> >   	for_each_child_of_node(node, slave_node) {
> >   		struct cpsw_slave_data *slave_data = data->slave_data + i;
> >   		const void *mac_addr = NULL;
> > @@ -2033,7 +2042,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
> >   		if (strcmp(slave_node->name, "slave"))
> >   			continue;
> >   
> > -		priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
> > +		priv->slaves[i].phy_node =
> > +			of_parse_phandle(slave_node, "phy-handle", 0);  
> 
> i++?
> 
> Ideally, the simplest way is to save phy_node in slave_data, but ...
> (see comment below).

FYI, I have a patch [1] that does exactly that in my queue. Sorry 
I've been busy and haven't had a chance to rebase/retest/resubmit
since Nicolas gave his Tested-By (and I missed Andrew's original 
patch). I can probably steal some time to resurrect that quickly 
if it's preferred, just let me know.

[1] http://www.spinics.net/lists/netdev/msg357772.html

> 
> 
> >   		parp = of_get_property(slave_node, "phy_id", &lenp);
> >   		if (of_phy_is_fixed_link(slave_node)) {
> >   			struct device_node *phy_node;
> > @@ -2292,16 +2302,6 @@ static int cpsw_probe(struct platform_device *pdev)
> >   
> >   	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
> >   
> > -	priv->slaves = devm_kzalloc(&pdev->dev,
> > -				    sizeof(struct cpsw_slave) * data->slaves,
> > -				    GFP_KERNEL);
> > -	if (!priv->slaves) {
> > -		ret = -ENOMEM;
> > -		goto clean_runtime_disable_ret;
> > -	}  
> I don't think you can move this out from here - it will break legacy boot :(
> 
> 
> > -	for (i = 0; i < data->slaves; i++)
> > -		priv->slaves[i].slave_num = i;  
> 
> Personally, I see only one safe way to do it without big rework -
> do second pass of DT parsing here to fill phy_node field.
> 
> 
> 
> > -
> >   	priv->slaves[0].ndev = ndev;
> >   	priv->emac_port = 0;
> >   
> >   
> 
> 

^ permalink raw reply

* RE: Poorer networking performance in later kernels?
From: Butler, Peter @ 2016-04-19 14:54 UTC (permalink / raw)
  To: Rick Jones, netdev@vger.kernel.org
In-Reply-To: <57152600.7000801@hpe.com>

> -----Original Message-----
> From: Rick Jones [mailto:rick.jones2@hpe.com]
> Sent: April-15-16 6:37 PM
> To: Butler, Peter <pbutler@sonusnet.com>; netdev@vger.kernel.org
> Subject: Re: Poorer networking performance in later kernels?
>
> On 04/15/2016 02:02 PM, Butler, Peter wrote:
>> (Please keep me CC'd to all comments/responses)
>>
>> I've tried a kernel upgrade from 3.4.2 to 4.4.0 and see a marked drop 
>> in networking performance.  Nothing was changed on the test systems, 
>> other than the kernel itself (and kernel modules).  The identical 
>> .config used to build the 3.4.2 kernel was brought over into the
>> 4.4.0 kernel source tree, and any configuration differences (e.g. new 
>> parameters, etc.) were taken as default values.
>>
>> The testing was performed on the same actual hardware for both kernel 
>> versions (i.e. take the existing 3.4.2 physical setup, simply boot 
>> into the (new) kernel and run the same test).  The netperf utility 
>> was used for benchmarking and the testing was always performed on 
>> idle systems.
>>
>> TCP testing yielded the following results, where the 4.4.0 kernel 
>> only got about 1/2 of the throughput:
>>
>
>>         Recv     Send       Send                          Utilization       Service Demand
>>         Socket   Socket     Message Elapsed               Send     Recv     Send    Recv
>>         Size     Size       Size    Time       Throughput local    remote   local   remote
>>         bytes    bytes      bytes   secs.      10^6bits/s % S      % S      us/KB   us/KB
>>
>> 3.4.2 13631488 13631488   8952    30.01      9370.29    10.14    6.50     0.709   0.454
>> 4.4.0 13631488 13631488   8952    30.02      5314.03    9.14     14.31    1.127   1.765
>>
>> SCTP testing yielded the following results, where the 4.4.0 kernel only got about 1/3 of the throughput:
>>
>>         Recv     Send       Send                          Utilization       Service Demand
>>         Socket   Socket     Message Elapsed               Send     Recv     Send    Recv
>>         Size     Size       Size    Time       Throughput local    remote   local   remote
>>         bytes    bytes      bytes   secs.      10^6bits/s  % S     % S      us/KB   us/KB
>>
>> 3.4.2 13631488 13631488   8952    30.00      2306.22    13.87    13.19    3.941   3.747
>> 4.4.0 13631488 13631488   8952    30.01       882.74    16.86    19.14    12.516  14.210
>>
>> The same tests were performed a multitude of time, and are always 
>> consistent (within a few percent).  I've also tried playing with 
>> various run-time kernel parameters (/proc/sys/kernel/net/...) on the
>> 4.4.0 kernel to alleviate the issue but have had no success at all.
>>
>> I'm at a loss as to what could possibly account for such a discrepancy...
>>
>
> I suspect I am not alone in being curious about the CPU(s) present in the systems and the model/whatnot of the NIC being used.  I'm also curious as to why you have what at first glance seem like absurdly large socket buffer sizes.
>
> That said, it looks like you have some Really Big (tm) increases in service demand.  Many more CPU cycles being consumed per KB of data transferred.
>
> Your message size makes me wonder if you were using a 9000 byte MTU.
>
> Perhaps in the move from 3.4.2 to 4.4.0 you lost some or all of the stateless offloads for your NIC(s)?  Running ethtool -k <interface> on both ends under both kernels might be good.
>
> Also, if you did have a 9000 byte MTU under 3.4.2 are you certain you still had it under 4.4.0?
>
> It would (at least to me) also be interesting to run a TCP_RR test comparing the two kernels.  TCP_RR (at least with the default request/response size of one byte) doesn't really care about stateless offloads or MTUs and could show how much difference there is in basic path length (or I suppose in interrupt coalescing behaviour if the NIC in question has a mildly dodgy heuristic for such things).
>
> happy benchmarking,
>
> rick jones
>


I think the issue is resolved.  I had to recompile my 4.4.0 kernel with a few options pertaining to the Intel NIC which somehow (?) got left out or otherwise clobbered when I ported my 3.4.2 .config to the 4.4.0 kernel source tree.  With those changes now in I see essentially identical performance with the two kernels.  Sorry for any confusion and/or waste of time here.  My bad.

^ permalink raw reply

* Re: Poorer networking performance in later kernels?
From: Josh Hunt @ 2016-04-19 15:13 UTC (permalink / raw)
  To: Butler, Peter; +Cc: Rick Jones, netdev@vger.kernel.org
In-Reply-To: <SN1PR0301MB19983F545852731CD1410DB9D66C0@SN1PR0301MB1998.namprd03.prod.outlook.com>

On Tue, Apr 19, 2016 at 9:54 AM, Butler, Peter <pbutler@sonusnet.com> wrote:
>> -----Original Message-----
>> From: Rick Jones [mailto:rick.jones2@hpe.com]
>> Sent: April-15-16 6:37 PM
>> To: Butler, Peter <pbutler@sonusnet.com>; netdev@vger.kernel.org
>> Subject: Re: Poorer networking performance in later kernels?
>>
>> On 04/15/2016 02:02 PM, Butler, Peter wrote:
>>> (Please keep me CC'd to all comments/responses)
>>>
>>> I've tried a kernel upgrade from 3.4.2 to 4.4.0 and see a marked drop
>>> in networking performance.  Nothing was changed on the test systems,
>>> other than the kernel itself (and kernel modules).  The identical
>>> .config used to build the 3.4.2 kernel was brought over into the
>>> 4.4.0 kernel source tree, and any configuration differences (e.g. new
>>> parameters, etc.) were taken as default values.
>>>
>>> The testing was performed on the same actual hardware for both kernel
>>> versions (i.e. take the existing 3.4.2 physical setup, simply boot
>>> into the (new) kernel and run the same test).  The netperf utility
>>> was used for benchmarking and the testing was always performed on
>>> idle systems.
>>>
>>> TCP testing yielded the following results, where the 4.4.0 kernel
>>> only got about 1/2 of the throughput:
>>>
>>
>>>         Recv     Send       Send                          Utilization       Service Demand
>>>         Socket   Socket     Message Elapsed               Send     Recv     Send    Recv
>>>         Size     Size       Size    Time       Throughput local    remote   local   remote
>>>         bytes    bytes      bytes   secs.      10^6bits/s % S      % S      us/KB   us/KB
>>>
>>> 3.4.2 13631488 13631488   8952    30.01      9370.29    10.14    6.50     0.709   0.454
>>> 4.4.0 13631488 13631488   8952    30.02      5314.03    9.14     14.31    1.127   1.765
>>>
>>> SCTP testing yielded the following results, where the 4.4.0 kernel only got about 1/3 of the throughput:
>>>
>>>         Recv     Send       Send                          Utilization       Service Demand
>>>         Socket   Socket     Message Elapsed               Send     Recv     Send    Recv
>>>         Size     Size       Size    Time       Throughput local    remote   local   remote
>>>         bytes    bytes      bytes   secs.      10^6bits/s  % S     % S      us/KB   us/KB
>>>
>>> 3.4.2 13631488 13631488   8952    30.00      2306.22    13.87    13.19    3.941   3.747
>>> 4.4.0 13631488 13631488   8952    30.01       882.74    16.86    19.14    12.516  14.210
>>>
>>> The same tests were performed a multitude of time, and are always
>>> consistent (within a few percent).  I've also tried playing with
>>> various run-time kernel parameters (/proc/sys/kernel/net/...) on the
>>> 4.4.0 kernel to alleviate the issue but have had no success at all.
>>>
>>> I'm at a loss as to what could possibly account for such a discrepancy...
>>>
>>
>> I suspect I am not alone in being curious about the CPU(s) present in the systems and the model/whatnot of the NIC being used.  I'm also curious as to why you have what at first glance seem like absurdly large socket buffer sizes.
>>
>> That said, it looks like you have some Really Big (tm) increases in service demand.  Many more CPU cycles being consumed per KB of data transferred.
>>
>> Your message size makes me wonder if you were using a 9000 byte MTU.
>>
>> Perhaps in the move from 3.4.2 to 4.4.0 you lost some or all of the stateless offloads for your NIC(s)?  Running ethtool -k <interface> on both ends under both kernels might be good.
>>
>> Also, if you did have a 9000 byte MTU under 3.4.2 are you certain you still had it under 4.4.0?
>>
>> It would (at least to me) also be interesting to run a TCP_RR test comparing the two kernels.  TCP_RR (at least with the default request/response size of one byte) doesn't really care about stateless offloads or MTUs and could show how much difference there is in basic path length (or I suppose in interrupt coalescing behaviour if the NIC in question has a mildly dodgy heuristic for such things).
>>
>> happy benchmarking,
>>
>> rick jones
>>
>
>
> I think the issue is resolved.  I had to recompile my 4.4.0 kernel with a few options pertaining to the Intel NIC which somehow (?) got left out or otherwise clobbered when I ported my 3.4.2 .config to the 4.4.0 kernel source tree.  With those changes now in I see essentially identical performance with the two kernels.  Sorry for any confusion and/or waste of time here.  My bad.
>
>

Can you share which config options you enabled to get your performance back?

-- 
Josh

^ permalink raw reply

* Re: [PATCH net-next] net/hsr: Added support for HSR v1
From: Stephen Hemminger @ 2016-04-19 15:21 UTC (permalink / raw)
  To: Peter Heise
  Cc: arvid.brodin, davem, hannes, sd, henrik, nikolay, tgraf, linville,
	gospo, dsa, eranbe, ast, netdev, peter.heise
In-Reply-To: <20160413115222.GA42572@aircraft-controller>

On Wed, 13 Apr 2016 13:52:22 +0200
Peter Heise <mail@pheise.de> wrote:

> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 9427f17..bb3a90b 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -773,6 +773,7 @@ enum {
>  	IFLA_HSR_SLAVE1,
>  	IFLA_HSR_SLAVE2,
>  	IFLA_HSR_MULTICAST_SPEC,	/* Last byte of supervision addr */
> +	IFLA_HSR_VERSION,		/* HSR version */
>  	IFLA_HSR_SUPERVISION_ADDR,	/* Supervision frame multicast addr */
>  	IFLA_HSR_SEQ_NR,

You added a new value into the middle of an enumeration field.
This breaks kernel ABI. Older applications (like iproute) would see the wrong
values.

Please submit a new change which moves HSR_VERSION to the end of the enum

^ permalink raw reply

* Re: [PATCH v2 1/1] drivers: net: cpsw: Prevent NUll pointer dereference with two PHYs
From: Grygorii Strashko @ 2016-04-19 15:44 UTC (permalink / raw)
  To: David Rivshin (Allworx)
  Cc: Andrew Goodbody, netdev, David S. Miller, linux-kernel,
	linux-omap, mugunthanvnm, tony
In-Reply-To: <20160419110140.75e35c3c.drivshin.allworx@gmail.com>

On 04/19/2016 06:01 PM, David Rivshin (Allworx) wrote:
> On Tue, 19 Apr 2016 17:41:07 +0300
> Grygorii Strashko <grygorii.strashko@ti.com> wrote:
> 
>> Hi,
>>
>> On 04/19/2016 04:56 PM, Andrew Goodbody wrote:
>>> Adding a 2nd PHY to cpsw results in a NULL pointer dereference
>>> as below. Fix by maintaining a reference to each PHY node in slave
>>> struct instead of a single reference in the priv struct which was
>>> overwritten by the 2nd PHY.
>>
>> David, Is it possible to drop prev version of this patch from linux-next
>> - it breaks boot on many TI boards with -next.
>>
>>
>>>
>>> [   17.870933] Unable to handle kernel NULL pointer dereference at virtual address 00000180
>>> [   17.879557] pgd = dc8bc000
>>> [   17.882514] [00000180] *pgd=9c882831, *pte=00000000, *ppte=00000000
>>> [   17.889213] Internal error: Oops: 17 [#1] ARM
>>> [   17.893838] Modules linked in:
>>> [   17.897102] CPU: 0 PID: 1657 Comm: connmand Not tainted 4.5.0-ge463dfb-dirty #11
>>> [   17.904947] Hardware name: Cambrionix whippet
>>> [   17.909576] task: dc859240 ti: dc968000 task.ti: dc968000
>>> [   17.915339] PC is at phy_attached_print+0x18/0x8c
>>> [   17.920339] LR is at phy_attached_info+0x14/0x18
>>> [   17.925247] pc : [<c042baec>]    lr : [<c042bb74>]    psr: 600f0113
>>> [   17.925247] sp : dc969cf8  ip : dc969d28  fp : dc969d18
>>> [   17.937425] r10: dda7a400  r9 : 00000000  r8 : 00000000
>>> [   17.942971] r7 : 00000001  r6 : ddb00480  r5 : ddb8cb34  r4 : 00000000
>>> [   17.949898] r3 : c0954cc0  r2 : c09562b0  r1 : 00000000  r0 : 00000000
>>> [   17.956829] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
>>> [   17.964401] Control: 10c5387d  Table: 9c8bc019  DAC: 00000051
>>> [   17.970500] Process connmand (pid: 1657, stack limit = 0xdc968210)
>>> [   17.977059] Stack: (0xdc969cf8 to 0xdc96a000)
>>
>> [...]
>>
>>> [   18.323956] [<c05e4cb8>] (inet_ioctl) from [<c055f5ac>] (sock_ioctl+0x15c/0x2d8)
>>> [   18.331829] [<c055f450>] (sock_ioctl) from [<c010b388>] (do_vfs_ioctl+0x98/0x8d0)
>>> [   18.339765]  r7:00008914 r6:dc8ab4c0 r5:dd257ae0 r4:beaeda20
>>> [   18.345822] [<c010b2f0>] (do_vfs_ioctl) from [<c010bc34>] (SyS_ioctl+0x74/0x84)
>>> [   18.353573]  r10:00000000 r9:00000011 r8:beaeda20 r7:00008914 r6:dc8ab4c0 r5:dc8ab4c0
>>> [   18.361924]  r4:00000000
>>> [   18.364653] [<c010bbc0>] (SyS_ioctl) from [<c00163e0>] (ret_fast_syscall+0x0/0x3c)
>>> [   18.372682]  r9:dc968000 r8:c00165e8 r7:00000036 r6:00000002 r5:00000011 r4:00000000
>>> [   18.380960] Code: e92dd810 e24cb010 e24dd010 e59b4004 (e5902180)
>>> [   18.387580] ---[ end trace c80529466223f3f3 ]---
>>
>> ^ Could you make it shorter and drop timestamps, pls?
>>
>>>
>>> Signed-off-by: Andrew Goodbody <andrew.goodbody@cambrionix.com>
>>> ---
>>>
>>> v2 - Move allocation of memory for priv->slaves to inside cpsw_probe_dt so it
>>>        has data->slaves initialised first which is needed to calculate size
>>>
>>>    drivers/net/ethernet/ti/cpsw.c | 30 +++++++++++++++---------------
>>>    1 file changed, 15 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
>>> index 42fdfd4..e62909c 100644
>>> --- a/drivers/net/ethernet/ti/cpsw.c
>>> +++ b/drivers/net/ethernet/ti/cpsw.c
>>> @@ -349,6 +349,7 @@ struct cpsw_slave {
>>>    	struct cpsw_slave_data		*data;
>>>    	struct phy_device		*phy;
>>>    	struct net_device		*ndev;
>>> +	struct device_node		*phy_node;
>>>    	u32				port_vlan;
>>>    	u32				open_stat;
>>>    };
>>> @@ -367,7 +368,6 @@ struct cpsw_priv {
>>>    	spinlock_t			lock;
>>>    	struct platform_device		*pdev;
>>>    	struct net_device		*ndev;
>>> -	struct device_node		*phy_node;
>>>    	struct napi_struct		napi_rx;
>>>    	struct napi_struct		napi_tx;
>>>    	struct device			*dev;
>>> @@ -1148,8 +1148,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
>>>    		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
>>>    				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
>>>    
>>> -	if (priv->phy_node)
>>> -		slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
>>> +	if (slave->phy_node)
>>> +		slave->phy = of_phy_connect(priv->ndev, slave->phy_node,
>>>    				 &cpsw_adjust_link, 0, slave->data->phy_if);
>>>    	else
>>>    		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
>>> @@ -1946,7 +1946,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>>>    	struct device_node *node = pdev->dev.of_node;
>>>    	struct device_node *slave_node;
>>>    	struct cpsw_platform_data *data = &priv->data;
>>> -	int i = 0, ret;
>>> +	int i, ret;
>>>    	u32 prop;
>>>    
>>>    	if (!node)
>>> @@ -1958,6 +1958,14 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>>>    	}
>>>    	data->slaves = prop;
>>>    
>>> +	priv->slaves = devm_kzalloc(&pdev->dev,
>>> +				    sizeof(struct cpsw_slave) * data->slaves,
>>> +				    GFP_KERNEL);
>>> +	if (!priv->slaves)
>>> +		return -ENOMEM;
>>> +	for (i = 0; i < data->slaves; i++)
>>> +		priv->slaves[i].slave_num = i;
>>> +
>>>    	if (of_property_read_u32(node, "active_slave", &prop)) {
>>>    		dev_err(&pdev->dev, "Missing active_slave property in the DT.\n");
>>>    		return -EINVAL;
>>> @@ -2023,6 +2031,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>>>    	if (ret)
>>>    		dev_warn(&pdev->dev, "Doesn't have any child node\n");
>>>    
>>> +	i = 0;
>>>    	for_each_child_of_node(node, slave_node) {
>>>    		struct cpsw_slave_data *slave_data = data->slave_data + i;
>>>    		const void *mac_addr = NULL;
>>> @@ -2033,7 +2042,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
>>>    		if (strcmp(slave_node->name, "slave"))
>>>    			continue;
>>>    
>>> -		priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
>>> +		priv->slaves[i].phy_node =
>>> +			of_parse_phandle(slave_node, "phy-handle", 0);
>>
>> i++?
>>
>> Ideally, the simplest way is to save phy_node in slave_data, but ...
>> (see comment below).
> 
> FYI, I have a patch [1] that does exactly that in my queue. Sorry
> I've been busy and haven't had a chance to rebase/retest/resubmit
> since Nicolas gave his Tested-By (and I missed Andrew's original
> patch). I can probably steal some time to resurrect that quickly
> if it's preferred, just let me know.
> 
> [1] http://www.spinics.net/lists/netdev/msg357772.html

Ah Ok. There are no user of cpsw_platform_data outside of net/ethernet/ti/,
so yes, looks like your patch 1 does exactly what's needed.

> 
>>
>>
>>>    		parp = of_get_property(slave_node, "phy_id", &lenp);
>>>    		if (of_phy_is_fixed_link(slave_node)) {
>>>    			struct device_node *phy_node;
>>> @@ -2292,16 +2302,6 @@ static int cpsw_probe(struct platform_device *pdev)
>>>    
>>>    	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
>>>    
>>> -	priv->slaves = devm_kzalloc(&pdev->dev,
>>> -				    sizeof(struct cpsw_slave) * data->slaves,
>>> -				    GFP_KERNEL);
>>> -	if (!priv->slaves) {
>>> -		ret = -ENOMEM;
>>> -		goto clean_runtime_disable_ret;
>>> -	}
>> I don't think you can move this out from here - it will break legacy boot :(
>>
>>
>>> -	for (i = 0; i < data->slaves; i++)
>>> -		priv->slaves[i].slave_num = i;
>>
>> Personally, I see only one safe way to do it without big rework -
>> do second pass of DT parsing here to fill phy_node field.
>>
 


-- 
regards,
-grygorii

^ permalink raw reply

* Re: [RFC PATCH net-next 7/8] net: ipv4: listified version of ip_rcv
From: Tom Herbert @ 2016-04-19 15:46 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Edward Cree, Linux Kernel Network Developers, David Miller,
	Jesper Dangaard Brouer, linux-net-drivers
In-Reply-To: <1461077434.10638.189.camel@edumazet-glaptop3.roam.corp.google.com>

On Tue, Apr 19, 2016 at 7:50 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2016-04-19 at 14:37 +0100, Edward Cree wrote:
>> Also involved adding a way to run a netfilter hook over a list of packets.
>> Rather than attempting to make netfilter know about lists (which would be
>> horrendous) we just let it call the regular okfn (in this case
>> ip_rcv_finish()) for any packets it steals, and have it give us back a list
>> of packets it's synchronously accepted (which normally NF_HOOK would
>> automatically call okfn() on, but we want to be able to potentially pass
>> the list to a listified version of okfn().)
>>
>> There is potential for out-of-order receives if the netfilter hook ends up
>> synchronously stealing packets, as they will be processed before any accepts
>> earlier in the list.  However, it was already possible for an asynchronous
>> accept to cause out-of-order receives, so hopefully I haven't broken
>> anything that wasn't broken already.
>>
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
>> ---
>
> We have hard time to deal with latencies already, and maintaining some
> sanity in the stack(s)
>
Right, this is significant complexity for a fairly narrow use case.
One alternative might be to move early type demux like functionality
to the GRO layer. There's a lot of work done by GRO to parse and
identify packets of the same flow, even if we can't aggregate such
packets it might be nice if we can at least provide a cached route so
that we avoid doing a full route lookup on each one later on.

Tom

> This is not going to give us a 10x or even 2x improvement factor, so
> what about working on something that would really lower cache line
> misses and use pipelines to amortize the costs ?
>
> The main problem in UDP stack today is having to lock the socket because
> of the dumb forward allocation problem. Are you really going to provide
> a list of skbs up to _one_ UDP socket ?
>
>
>

^ permalink raw reply

* [PATCH net-next V3 0/2] ethernet: intel: Support Hyper-V hosts
From: K. Y. Srinivasan @ 2016-04-19 15:49 UTC (permalink / raw)
  To: davem, netdev, linux-kernel, devel, olaf, apw, jasowang, eli,
	jackm, yevgenyp, john.ronciak, intel-wired-lan, alexander.duyck

Make adjustments to the Intel 10G VF driver to support
running on Hyper-V hosts.

K. Y. Srinivasan (2):
  ethernet: intel: Add the device ID's presented while running on
    Hyper-V
  intel: ixgbevf: Support Windows hosts (Hyper-V)

 drivers/net/ethernet/intel/ixgbevf/defines.h      |    5 +
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      |   12 ++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   31 +++-
 drivers/net/ethernet/intel/ixgbevf/mbx.c          |   12 ++
 drivers/net/ethernet/intel/ixgbevf/vf.c           |  216 +++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbevf/vf.h           |    2 +
 6 files changed, 271 insertions(+), 7 deletions(-)

-- 
1.7.4.1

^ permalink raw reply

* [PATCH net-next V3 1/2] ethernet: intel: Add the device ID's presented while running on Hyper-V
From: K. Y. Srinivasan @ 2016-04-19 15:49 UTC (permalink / raw)
  To: davem, netdev, linux-kernel, devel, olaf, apw, jasowang, eli,
	jackm, yevgenyp, john.ronciak, intel-wired-lan, alexander.duyck
In-Reply-To: <1461080945-25194-1-git-send-email-kys@microsoft.com>

Intel SR-IOV cards present different ID when running on Hyper-V.
Add the device IDs presented while running on Hyper-V.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
---
	V2: No change from V1.
	V3: No change from V2.

 drivers/net/ethernet/intel/ixgbevf/defines.h |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 5843458..1306a0d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -33,6 +33,11 @@
 #define IXGBE_DEV_ID_X550_VF		0x1565
 #define IXGBE_DEV_ID_X550EM_X_VF	0x15A8
 
+#define IXGBE_DEV_ID_82599_VF_HV	0x152E
+#define IXGBE_DEV_ID_X540_VF_HV		0x1530
+#define IXGBE_DEV_ID_X550_VF_HV		0x1564
+#define IXGBE_DEV_ID_X550EM_X_VF_HV	0x15A9
+
 #define IXGBE_VF_IRQ_CLEAR_MASK		7
 #define IXGBE_VF_MAX_TX_QUEUES		8
 #define IXGBE_VF_MAX_RX_QUEUES		8
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH net-next V3 2/2] intel: ixgbevf: Support Windows hosts (Hyper-V)
From: K. Y. Srinivasan @ 2016-04-19 15:49 UTC (permalink / raw)
  To: davem, netdev, linux-kernel, devel, olaf, apw, jasowang, eli,
	jackm, yevgenyp, john.ronciak, intel-wired-lan, alexander.duyck
In-Reply-To: <1461080968-25235-1-git-send-email-kys@microsoft.com>

On Hyper-V, the VF/PF communication is a via software mediated path
as opposed to the hardware mailbox. Make the necessary
adjustments to support Hyper-V.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
	V2: Addressed most of the comments from
	    Alexander Duyck <alexander.duyck@gmail.com>
	    and Rustad, Mark D <mark.d.rustad@intel.com>.

	V3: Addressed additional comments from
	    Alexander Duyck <alexander.duyck@gmail.com>

---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      |   12 ++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   31 +++-
 drivers/net/ethernet/intel/ixgbevf/mbx.c          |   12 ++
 drivers/net/ethernet/intel/ixgbevf/vf.c           |  216 +++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbevf/vf.h           |    2 +
 5 files changed, 266 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 5ac60ee..3296d27 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -460,9 +460,13 @@ enum ixbgevf_state_t {
 
 enum ixgbevf_boards {
 	board_82599_vf,
+	board_82599_vf_hv,
 	board_X540_vf,
+	board_X540_vf_hv,
 	board_X550_vf,
+	board_X550_vf_hv,
 	board_X550EM_x_vf,
+	board_X550EM_x_vf_hv,
 };
 
 enum ixgbevf_xcast_modes {
@@ -477,6 +481,13 @@ extern const struct ixgbevf_info ixgbevf_X550_vf_info;
 extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_info;
 extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops;
 
+
+extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info;
+extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info;
+extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info;
+extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info;
+extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops;
+
 /* needed by ethtool.c */
 extern const char ixgbevf_driver_name[];
 extern const char ixgbevf_driver_version[];
@@ -494,6 +505,7 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
 void ixgbevf_update_stats(struct ixgbevf_adapter *adapter);
 int ethtool_ioctl(struct ifreq *ifr);
+bool ixgbevf_on_hyperv(struct ixgbe_hw *hw);
 
 extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 007cbe0..c4bb480 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -62,10 +62,14 @@ static char ixgbevf_copyright[] =
 	"Copyright (c) 2009 - 2015 Intel Corporation.";
 
 static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
-	[board_82599_vf] = &ixgbevf_82599_vf_info,
-	[board_X540_vf]  = &ixgbevf_X540_vf_info,
-	[board_X550_vf]  = &ixgbevf_X550_vf_info,
-	[board_X550EM_x_vf] = &ixgbevf_X550EM_x_vf_info,
+	[board_82599_vf]	= &ixgbevf_82599_vf_info,
+	[board_82599_vf_hv]	= &ixgbevf_82599_vf_hv_info,
+	[board_X540_vf]		= &ixgbevf_X540_vf_info,
+	[board_X540_vf_hv]	= &ixgbevf_X540_vf_hv_info,
+	[board_X550_vf]		= &ixgbevf_X550_vf_info,
+	[board_X550_vf_hv]	= &ixgbevf_X550_vf_hv_info,
+	[board_X550EM_x_vf]	= &ixgbevf_X550EM_x_vf_info,
+	[board_X550EM_x_vf_hv]	= &ixgbevf_X550EM_x_vf_hv_info,
 };
 
 /* ixgbevf_pci_tbl - PCI Device ID Table
@@ -78,9 +82,13 @@ static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
  */
 static const struct pci_device_id ixgbevf_pci_tbl[] = {
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF), board_82599_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF_HV), board_82599_vf_hv },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF), board_X540_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF_HV), board_X540_vf_hv },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550_VF), board_X550_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550_VF_HV), board_X550_vf_hv },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv},
 	/* required last entry */
 	{0, }
 };
@@ -1795,7 +1803,10 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 		ixgbevf_setup_vfmrqc(adapter);
 
 	/* notify the PF of our intent to use this size of frame */
-	ixgbevf_rlpml_set_vf(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+	if (!ixgbevf_on_hyperv(hw))
+		ixgbevf_rlpml_set_vf(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+	else
+		ixgbevf_hv_rlpml_set_vf(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
 
 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
 	 * the Base and Length of the Rx Descriptor Ring
@@ -2056,7 +2067,10 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 	spin_lock_bh(&adapter->mbx_lock);
 
 	while (api[idx] != ixgbe_mbox_api_unknown) {
-		err = ixgbevf_negotiate_api_version(hw, api[idx]);
+		if (!ixgbevf_on_hyperv(hw))
+			err = ixgbevf_negotiate_api_version(hw, api[idx]);
+		else
+			err = ixgbevf_hv_negotiate_api_version(hw, api[idx]);
 		if (!err)
 			break;
 		idx++;
@@ -3727,7 +3741,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	netdev->mtu = new_mtu;
 
 	/* notify the PF of our intent to use this size of frame */
-	ixgbevf_rlpml_set_vf(hw, max_frame);
+	if (!ixgbevf_on_hyperv(hw))
+		ixgbevf_rlpml_set_vf(hw, max_frame);
+	else
+		ixgbevf_hv_rlpml_set_vf(hw, max_frame);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index dc68fea..298a0da 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -346,3 +346,15 @@ const struct ixgbe_mbx_operations ixgbevf_mbx_ops = {
 	.check_for_rst	= ixgbevf_check_for_rst_vf,
 };
 
+/**
+ * Mailbox operations when running on Hyper-V.
+ * On Hyper-V, PF/VF communiction is not through the
+ * hardware mailbox; this communication is through
+ * a software mediated path.
+ * Most mail box operations are noop while running on
+ * Hyper-V.
+ */
+const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops = {
+	.init_params	= ixgbevf_init_mbx_params_vf,
+	.check_for_rst	= ixgbevf_check_for_rst_vf,
+};
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 4d613a4..c794e0f 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -27,6 +27,13 @@
 #include "vf.h"
 #include "ixgbevf.h"
 
+/*
+ * On Hyper-V, to reset, we need to read from this offset
+ * from the PCI config space. This is the mechanism used on
+ * Hyper-V to support PF/VF communication.
+ */
+#define IXGBE_HV_RESET_OFFSET           0x201
+
 /**
  *  ixgbevf_start_hw_vf - Prepare hardware for Tx/Rx
  *  @hw: pointer to hardware structure
@@ -126,6 +133,27 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
 }
 
 /**
+ * Hyper-V variant; the VF/PF communication is through the PCI
+ * config space.
+ */
+static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw)
+{
+	struct ixgbevf_adapter *adapter = hw->back;
+	int i;
+
+#if IS_ENABLED(CONFIG_PCI_MMCONFIG)
+	for (i = 0; i < 6; i++)
+		pci_read_config_byte(adapter->pdev,
+				     (i + IXGBE_HV_RESET_OFFSET),
+				     &hw->mac.perm_addr[i]);
+#else
+	pr_err("PCI_MMCONFIG needs to be enabled for Hyper-V\n");
+#endif
+
+	return 0;
+}
+
+/**
  *  ixgbevf_stop_hw_vf - Generic stop Tx/Rx units
  *  @hw: pointer to hardware structure
  *
@@ -258,6 +286,11 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
 	return ret_val;
 }
 
+static s32 ixgbevf_hv_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
+{
+	return -EOPNOTSUPP;
+}
+
 /**
  * ixgbevf_get_reta_locked - get the RSS redirection table (RETA) contents.
  * @adapter: pointer to the port handle
@@ -416,6 +449,26 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
 	return ret_val;
 }
 
+/**
+ *  ixgbevf_hv_set_rar_vf - set device MAC address Hyper-V variant
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *  @addr: Address to put into receive address register
+ *  @vmdq: Unused in this implementation
+ *
+ * We don't really allow setting the device MAC address. However,
+ * if the address being set is the permanent MAC address we will
+ * permit that.
+ **/
+static s32 ixgbevf_hv_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
+				 u32 vmdq)
+{
+	if (ether_addr_equal(addr, hw->mac.perm_addr))
+		return 0;
+
+	return -EOPNOTSUPP;
+}
+
 static void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw,
 				       u32 *msg, u16 size)
 {
@@ -473,6 +526,15 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 }
 
 /**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw,
+					  struct net_device *netdev)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
  *  ixgbevf_update_xcast_mode - Update Multicast mode
  *  @hw: pointer to the HW structure
  *  @netdev: pointer to net device structure
@@ -513,6 +575,15 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw,
 }
 
 /**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw,
+					struct net_device *netdev, int xcast_mode)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
  *  ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address
  *  @hw: pointer to the HW structure
  *  @vlan: 12 bit VLAN ID
@@ -551,6 +622,15 @@ mbx_err:
 }
 
 /**
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+				  bool vlan_on)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
  *  ixgbevf_setup_mac_link_vf - Setup MAC link settings
  *  @hw: pointer to hardware structure
  *  @speed: Unused in this implementation
@@ -656,6 +736,67 @@ out:
 }
 
 /**
+ * Hyper-V variant; there is no mailbox communication.
+ */
+static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw,
+					ixgbe_link_speed *speed,
+					bool *link_up,
+					bool autoneg_wait_to_complete)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 links_reg;
+
+	/* If we were hit with a reset drop the link */
+	if (!mbx->ops.check_for_rst(hw) || !mbx->timeout)
+		mac->get_link_status = true;
+
+	if (!mac->get_link_status)
+		goto out;
+
+	/* if link status is down no point in checking to see if pf is up */
+	links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+	if (!(links_reg & IXGBE_LINKS_UP))
+		goto out;
+
+	/* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
+	 * before the link status is correct
+	 */
+	if (mac->type == ixgbe_mac_82599_vf) {
+		int i;
+
+		for (i = 0; i < 5; i++) {
+			udelay(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+
+			if (!(links_reg & IXGBE_LINKS_UP))
+				goto out;
+		}
+	}
+
+	switch (links_reg & IXGBE_LINKS_SPEED_82599) {
+	case IXGBE_LINKS_SPEED_10G_82599:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_1G_82599:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_100_82599:
+		*speed = IXGBE_LINK_SPEED_100_FULL;
+		break;
+	}
+
+	/* if we passed all the tests above then the link is up and we no
+	 * longer need to check for link
+	 */
+	mac->get_link_status = false;
+
+out:
+	*link_up = !mac->get_link_status;
+	return 0;
+}
+
+/**
  *  ixgbevf_rlpml_set_vf - Set the maximum receive packet length
  *  @hw: pointer to the HW structure
  *  @max_size: value to assign to max frame size
@@ -670,6 +811,25 @@ void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size)
 }
 
 /**
+ *  ixgbevf_hv_rlpml_set_vf - Set the maximum receive packet length
+ *  @hw: pointer to the HW structure
+ *  @max_size: value to assign to max frame size
+ *  Hyper-V variant.
+ **/
+void ixgbevf_hv_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size)
+{
+	u32 reg;
+
+	/* If we are on Hyper-V, we implement
+	 * this functionality differently.
+	 */
+	reg =  IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(0));
+	/* CRC == 4 */
+	reg |= ((max_size + 4) | IXGBE_RXDCTL_RLPML_EN);
+	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(0), reg);
+}
+
+/**
  *  ixgbevf_negotiate_api_version - Negotiate supported API version
  *  @hw: pointer to the HW structure
  *  @api: integer containing requested API version
@@ -703,6 +863,22 @@ int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api)
 	return err;
 }
 
+/**
+ *  ixgbevf_hv_negotiate_api_version - Negotiate supported API version
+ *  @hw: pointer to the HW structure
+ *  @api: integer containing requested API version
+ *  Hyper-V version - only ixgbe_mbox_api_10 supported.
+ **/
+int ixgbevf_hv_negotiate_api_version(struct ixgbe_hw *hw, int api)
+{
+	/* Hyper-V only supports api version ixgbe_mbox_api_10
+	 */
+	if (api != ixgbe_mbox_api_10)
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	return 0;
+}
+
 int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
 		       unsigned int *default_tc)
 {
@@ -776,22 +952,62 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = {
 	.set_vfta		= ixgbevf_set_vfta_vf,
 };
 
+static const struct ixgbe_mac_operations ixgbevf_hv_mac_ops = {
+	.init_hw		= ixgbevf_init_hw_vf,
+	.reset_hw		= ixgbevf_hv_reset_hw_vf,
+	.start_hw		= ixgbevf_start_hw_vf,
+	.get_mac_addr		= ixgbevf_get_mac_addr_vf,
+	.stop_adapter		= ixgbevf_stop_hw_vf,
+	.setup_link		= ixgbevf_setup_mac_link_vf,
+	.check_link		= ixgbevf_hv_check_mac_link_vf,
+	.set_rar		= ixgbevf_hv_set_rar_vf,
+	.update_mc_addr_list	= ixgbevf_hv_update_mc_addr_list_vf,
+	.update_xcast_mode	= ixgbevf_hv_update_xcast_mode,
+	.set_uc_addr		= ixgbevf_hv_set_uc_addr_vf,
+	.set_vfta		= ixgbevf_hv_set_vfta_vf,
+};
+
 const struct ixgbevf_info ixgbevf_82599_vf_info = {
 	.mac = ixgbe_mac_82599_vf,
 	.mac_ops = &ixgbevf_mac_ops,
 };
 
+const struct ixgbevf_info ixgbevf_82599_vf_hv_info = {
+	.mac = ixgbe_mac_82599_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
 const struct ixgbevf_info ixgbevf_X540_vf_info = {
 	.mac = ixgbe_mac_X540_vf,
 	.mac_ops = &ixgbevf_mac_ops,
 };
 
+const struct ixgbevf_info ixgbevf_X540_vf_hv_info = {
+	.mac = ixgbe_mac_X540_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
 const struct ixgbevf_info ixgbevf_X550_vf_info = {
 	.mac = ixgbe_mac_X550_vf,
 	.mac_ops = &ixgbevf_mac_ops,
 };
 
+const struct ixgbevf_info ixgbevf_X550_vf_hv_info = {
+	.mac = ixgbe_mac_X550_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
 const struct ixgbevf_info ixgbevf_X550EM_x_vf_info = {
 	.mac = ixgbe_mac_X550EM_x_vf,
 	.mac_ops = &ixgbevf_mac_ops,
 };
+
+const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info = {
+	.mac = ixgbe_mac_X550EM_x_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
+bool ixgbevf_on_hyperv(struct ixgbe_hw *hw)
+{
+	return hw->mbx.ops.check_for_msg == NULL;
+}
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index ef9f773..658883e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -208,7 +208,9 @@ static inline u32 ixgbe_read_reg_array(struct ixgbe_hw *hw, u32 reg,
 #define IXGBE_READ_REG_ARRAY(h, r, o) ixgbe_read_reg_array(h, r, o)
 
 void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size);
+void ixgbevf_hv_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size);
 int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api);
+int ixgbevf_hv_negotiate_api_version(struct ixgbe_hw *hw, int api);
 int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
 		       unsigned int *default_tc);
 int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues);
-- 
1.7.4.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox