Netdev List
 help / color / mirror / Atom feed
* Transaction
From: Nora Stanley @ 2017-08-26 18:10 UTC (permalink / raw)


Attention:

Greetings from Miss Nora Stanley, A banker who is assigned by the
Togolaise ministry of finance and inheritance funds reconciliation
Forum to represent you in the release of your assigned inheritance
funds with the ORA BANK TOGO.

I want to inform you that the ministry of finance and the inheritance
funds reconciliation forum in conjuction with the ORA BANK TOGO has
agreed to wire USD$ 7,500.000.00 (Seven Million, Five Hundred Thousand
United States Dollars Only) get in touch with me by my private email
immediately: (norasexyone@gmail.com)for more details.

Warmest Regards

A Banker: Nora Stanley

^ permalink raw reply

* Re: UDP sockets oddities
From: Florian Fainelli @ 2017-08-26 18:56 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: netdev, pabeni, willemb
In-Reply-To: <1503751671.11498.25.camel@edumazet-glaptop3.roam.corp.google.com>



On 08/26/2017 05:47 AM, Eric Dumazet wrote:
> On Fri, 2017-08-25 at 21:19 -0700, David Miller wrote:
> 
>> Agreed, but the ARP resolution queue really needs to scale it's backlog
>> to the physical technology it is attached to.
> Yes, last time (in 2011) we increased the old limit of 3 packets :/
> 
> We probably should match sysctl_wmem_max so that a single socket
> provider would hit its sk_sndbuf limit

Before:
/proc/sys/net/ipv4/neigh/eth0/unres_qlen:34
/proc/sys/net/ipv4/neigh/eth0/unres_qlen_bytes:65536
/proc/sys/net/ipv4/neigh/gphy/unres_qlen:34
/proc/sys/net/ipv4/neigh/gphy/unres_qlen_bytes:65536

After:
/proc/sys/net/ipv4/neigh/eth0/unres_qlen:106
/proc/sys/net/ipv4/neigh/eth0/unres_qlen_bytes:229376
/proc/sys/net/ipv4/neigh/gphy/unres_qlen:106
/proc/sys/net/ipv4/neigh/gphy/unres_qlen_bytes:229376

and this does help a lot with the test case reported over an hour, only
2 packets lost:

# perf record -a -g -e skb:kfree_skb iperf -c 192.168.1.23 -b 900M -t
3600 -u
------------------------------------------------------------
Client connecting to 192.168.1.23, UDP port 5001
Sending 1470 byte datagrams, IPG target: 13.07 us (kalman adjust)
UDP buffer size:  224 KByte (default)
------------------------------------------------------------
[  4] local 192.168.1.66 port 48209 connected with 192.168.1.23 port 5001
write failed: Invalid argument
[ ID] Interval       Transfer     Bandwidth
[  4]  0.0-404.9 sec  4.51 GBytes  95.7 Mbits/sec
[  4] Sent 3294727 datagrams
[  4] Server Report:
[  4]  0.0-405.1 sec  4.51 GBytes  95.6 Mbits/sec  14.979 ms
2/3294728 (6.1e-05%)

Thanks Eric!

> 
> Something like :
> 
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index 6b0bc0f715346a097a6df46e2ba2771359abcd23..7777dceb78107c0019fb39d5b69be1959005b78e 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -109,7 +109,8 @@ neigh/default/unres_qlen_bytes - INTEGER
>  	queued for each	unresolved address by other network layers.
>  	(added in linux 3.3)
>  	Setting negative value is meaningless and will return error.
> -	Default: 65536 Bytes(64KB)
> +	Default: SK_WMEM_MAX, enough to store 256 packets of medium size
> +		 (less than 256 bytes per packet)
>  
>  neigh/default/unres_qlen - INTEGER
>  	The maximum number of packets which may be queued for each
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 1c2912d433e81b10f3fdc87bcfcbb091570edc03..03a362568357acc7278a318423dd3873103f90ca 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2368,6 +2368,16 @@ bool sk_net_capable(const struct sock *sk, int cap);
>  
>  void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
>  
> +/* Take into consideration the size of the struct sk_buff overhead in the
> + * determination of these values, since that is non-constant across
> + * platforms.  This makes socket queueing behavior and performance
> + * not depend upon such differences.
> + */
> +#define _SK_MEM_PACKETS		256
> +#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
> +#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
> +#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
> +
>  extern __u32 sysctl_wmem_max;
>  extern __u32 sysctl_rmem_max;
>  
> diff --git a/net/core/sock.c b/net/core/sock.c
> index dfdd14cac775e9bfcee0085ee32ffcd0ab28b67b..9b7b6bbb2a23e7652a1f34a305f29d49de00bc8c 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
>  static struct lock_class_key af_elock_keys[AF_MAX];
>  static struct lock_class_key af_kern_callback_keys[AF_MAX];
>  
> -/* Take into consideration the size of the struct sk_buff overhead in the
> - * determination of these values, since that is non-constant across
> - * platforms.  This makes socket queueing behavior and performance
> - * not depend upon such differences.
> - */
> -#define _SK_MEM_PACKETS		256
> -#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
> -#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
> -#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
> -
>  /* Run time adjustable parameters. */
>  __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
>  EXPORT_SYMBOL(sysctl_wmem_max);
> diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
> index 21dedf6fd0f76dec22b2b3685beb89cfefea7ded..22bf0b95d6edc3c27ef3a99d27cb70a1551e3e0e 100644
> --- a/net/decnet/dn_neigh.c
> +++ b/net/decnet/dn_neigh.c
> @@ -94,7 +94,7 @@ struct neigh_table dn_neigh_table = {
>  			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
>  			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
>  			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
> -			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024,
> +			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
>  			[NEIGH_VAR_PROXY_QLEN] = 0,
>  			[NEIGH_VAR_ANYCAST_DELAY] = 0,
>  			[NEIGH_VAR_PROXY_DELAY] = 0,
> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
> index 8b52179ddc6e54eabf6d3c2ed0132083228680bb..7c45b8896709815c5dde5972fd57cb5c3bcb2648 100644
> --- a/net/ipv4/arp.c
> +++ b/net/ipv4/arp.c
> @@ -171,7 +171,7 @@ struct neigh_table arp_tbl = {
>  			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
>  			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
>  			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
> -			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
> +			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
>  			[NEIGH_VAR_PROXY_QLEN] = 64,
>  			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
>  			[NEIGH_VAR_PROXY_DELAY]	= (8 * HZ) / 10,
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index 5e338eb89509b1df6ebd060f8bd19fcb4b86fe05..266a530414d7be4f1e7be922e465bbab46f7cbac 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -127,7 +127,7 @@ struct neigh_table nd_tbl = {
>  			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
>  			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
>  			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
> -			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
> +			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
>  			[NEIGH_VAR_PROXY_QLEN] = 64,
>  			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
>  			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
> 
> 

-- 
Florian

^ permalink raw reply

* Re: [PATCH net 0/4] xfrm_user info leaks
From: Mathias Krause @ 2017-08-26 19:56 UTC (permalink / raw)
  To: Joe Perches; +Cc: Steffen Klassert, David S. Miller, Herbert Xu, netdev
In-Reply-To: <1503763138.12569.45.camel@perches.com>

On 26 August 2017 at 17:58, Joe Perches <joe@perches.com> wrote:
> On Sat, 2017-08-26 at 17:08 +0200, Mathias Krause wrote:
>> Hi David, Steffen,
>>
>> the following series fixes a few info leaks due to missing padding byte
>> initialization in the xfrm_user netlink interface.
>
> Were these found by inspection or by some tool?
> If by tool, perhaps there are other _to_user cases?

I found the one in the offload API by manual inspection, looked around
a little and found the others. No tool involved.

I already looked at the xfrm_user API back in 2012 and fixed quite a
few info leaks but missed the ones in the netlink multicast
notification code :/


Regards,
Mathias

^ permalink raw reply

* [PATCH] net: ethernet: broadcom: Remove null check before kfree
From: Himanshu Jha @ 2017-08-26 20:17 UTC (permalink / raw)
  To: davem; +Cc: jarod, netdev, linux-kernel, Himanshu Jha

Kfree on NULL pointer is a no-op and therefore checking is redundant.

Signed-off-by: Himanshu Jha <himanshujha199640@gmail.com>
---
 drivers/net/ethernet/broadcom/sb1250-mac.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 16a0f19..ecdef42 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -1367,15 +1367,11 @@ static int sbmac_initctx(struct sbmac_softc *s)
 
 static void sbdma_uninitctx(struct sbmacdma *d)
 {
-	if (d->sbdma_dscrtable_unaligned) {
-		kfree(d->sbdma_dscrtable_unaligned);
-		d->sbdma_dscrtable_unaligned = d->sbdma_dscrtable = NULL;
-	}
+	kfree(d->sbdma_dscrtable_unaligned);
+	d->sbdma_dscrtable_unaligned = d->sbdma_dscrtable = NULL;
 
-	if (d->sbdma_ctxtable) {
-		kfree(d->sbdma_ctxtable);
-		d->sbdma_ctxtable = NULL;
-	}
+	kfree(d->sbdma_ctxtable);
+	d->sbdma_ctxtable = NULL;
 }
 
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH RFC WIP 5/5] net: dsa: Don't include CPU port when adding MDB to a port
From: Andrew Lunn @ 2017-08-26 20:56 UTC (permalink / raw)
  To: netdev
  Cc: Andrew Lunn, Florian Fainelli, Vivien Didelot, nikolay, roopa,
	bridge, jiri
In-Reply-To: <1503780970-10312-1-git-send-email-andrew@lunn.ch>

Now that the MDB are explicitly added to the CPU port when required,
don't add the CPU port adding an MDB to a switch port.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
---
 net/dsa/switch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 97e2e9c8cf3f..c178e2b86a9a 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -130,7 +130,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 	if (ds->index == info->sw_index)
 		set_bit(info->port, group);
 	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+		if (dsa_is_dsa_port(ds, port))
 			set_bit(port, group);
 
 	if (switchdev_trans_ph_prepare(trans)) {
-- 
2.14.1

^ permalink raw reply related

* [PATCH RFC WIP 0/5] IGMP snooping for local traffic
From: Andrew Lunn @ 2017-08-26 20:56 UTC (permalink / raw)
  To: netdev
  Cc: Vivien Didelot, Florian Fainelli, nikolay, jiri, roopa, stephen,
	bridge, Andrew Lunn

This is a WIP patchset i would like comments on from bridge, switchdev
and hardware offload people.

The linux bridge supports IGMP snooping. It will listen to IGMP
reports on bridge ports and keep track of which groups have been
joined on an interface. It will then forward multicast based on this
group membership.

When the bridge adds or removed groups from an interface, it uses
switchdev to request the hardware add an mdb to a port, so the
hardware can perform the selective forwarding between ports.

What is not covered by the current bridge code, is IGMP joins/leaves
from the host on the brX interface. No such monitoring is
performed. With a pure software bridge, it is not required. All
mulitcast frames are passed to the brX interface, and the network
stack filters them, as it does for any interface. However, when
hardware offload is involved, things change. We should program the
hardware to only send multcast packets to the host when the host has
in interest in them.

Thus we need to perform IGMP snooping on the brX interface, just like
any other interface of the bridge. However, currently the brX
interface is missing all the needed data structures to do this. There
is no net_bridge_port structure for the brX interface. This strucuture
is created when an interface is added to the bridge. But the brX
interface is not a member of the bridge. So this patchset makes the
brX interface a first class member of the bridge. When the brX
interface is opened, the interface is added to the bridge. A
net_bridge_port is allocated for it, and IGMP snooping is performed as
usual.

There are some complexities here. Some assumptions are broken, like
the master interface of a port interface is the bridge interface. The
brX interface cannot be its own master. The use of
netdev_master_upper_dev_get() within the bridge code has been changed
to reflecit this. The bridge receive handler needs to not process
frames for the brX interface, etc.

The interface downward to the hardware is also an issue. The code
presented here is a hack and needs to change. But that is secondary
and can be solved once it is agreed how the bridge needs to change to
support this use case.

Comment welcome and wanted.

	Andrew

Andrew Lunn (5):
  net: rtnetlink: Handle bridge port without upper device
  net: bridge: Skip receive handler on brX interface
  net: bridge: Make the brX interface a member of the bridge
  net: dsa: HACK: Handle MDB add/remove for none-switch ports
  net: dsa: Don't include CPU port when adding MDB to a port

 include/linux/if_bridge.h |  1 +
 net/bridge/br_device.c    | 12 ++++++++++--
 net/bridge/br_if.c        | 37 ++++++++++++++++++++++++-------------
 net/bridge/br_input.c     |  4 ++++
 net/bridge/br_mdb.c       |  2 --
 net/bridge/br_multicast.c |  7 ++++---
 net/bridge/br_private.h   |  1 +
 net/core/rtnetlink.c      | 23 +++++++++++++++++++++--
 net/dsa/port.c            | 19 +++++++++++++++++--
 net/dsa/switch.c          |  2 +-
 10 files changed, 83 insertions(+), 25 deletions(-)

-- 
2.14.1

^ permalink raw reply

* [PATCH RFC WIP 1/5] net: rtnetlink: Handle bridge port without upper device
From: Andrew Lunn @ 2017-08-26 20:56 UTC (permalink / raw)
  To: netdev
  Cc: Vivien Didelot, Florian Fainelli, nikolay, jiri, roopa, stephen,
	bridge, Andrew Lunn
In-Reply-To: <1503780970-10312-1-git-send-email-andrew@lunn.ch>

The brX interface will with a following patch becomes a member of the
bridge. It however cannot be a slave interface, since it would have to
be a slave of itself. netdev_master_upper_dev_get() returns NULL as a
result. Handle this NULL, by knowing this bridge slave must also be
the master, i.e. what we are looking for.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
---
 net/core/rtnetlink.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9201e3621351..2673eb430b6f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3093,8 +3093,12 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
-		const struct net_device_ops *ops = br_dev->netdev_ops;
+		const struct net_device_ops *ops;
 
+		if (!br_dev)
+			br_dev = dev;
+
+		ops = br_dev->netdev_ops;
 		err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
 				       nlh->nlmsg_flags);
 		if (err)
@@ -3197,7 +3201,12 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
-		const struct net_device_ops *ops = br_dev->netdev_ops;
+		const struct net_device_ops *ops;
+
+		if (!br_dev)
+			br_dev = dev;
+
+		ops = br_dev->netdev_ops;
 
 		if (ops->ndo_fdb_del)
 			err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
@@ -3332,6 +3341,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			if (!br_idx) { /* user did not specify a specific bridge */
 				if (dev->priv_flags & IFF_BRIDGE_PORT) {
 					br_dev = netdev_master_upper_dev_get(dev);
+					if (!br_dev)
+						br_dev = dev;
 					cops = br_dev->netdev_ops;
 				}
 			} else {
@@ -3410,6 +3421,9 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 	int err = 0;
 
+	if (!br_dev)
+		br_dev = dev;
+
 	nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
@@ -3647,6 +3661,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+		if (!br_dev)
+			br_dev = dev;
 
 		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) {
 			err = -EOPNOTSUPP;
@@ -3723,6 +3739,9 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
+		if (!br_dev)
+			br_dev = dev;
+
 		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
 			err = -EOPNOTSUPP;
 			goto out;
-- 
2.14.1

^ permalink raw reply related

* [PATCH RFC WIP 2/5] net: bridge: Skip receive handler on brX interface
From: Andrew Lunn @ 2017-08-26 20:56 UTC (permalink / raw)
  To: netdev
  Cc: Vivien Didelot, Florian Fainelli, nikolay, jiri, roopa, stephen,
	bridge, Andrew Lunn
In-Reply-To: <1503780970-10312-1-git-send-email-andrew@lunn.ch>

The brX interface will soon become a member of the bridge. As such, it
will get a receiver handler assigned. However, we don't want to handle
packets received on this soft interfaces. So detect the condition and
say all the packets pass.
---
 net/bridge/br_input.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..38c2a41968f2 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -267,6 +267,10 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 		return RX_HANDLER_CONSUMED;
 
 	p = br_port_get_rcu(skb->dev);
+
+	if (p->dev == p->br->dev)
+		return RX_HANDLER_PASS;
+
 	if (p->flags & BR_VLAN_TUNNEL) {
 		if (br_handle_ingress_vlan_tunnel(skb, p,
 						  nbp_vlan_group_rcu(p)))
-- 
2.14.1

^ permalink raw reply related

* [PATCH RFC WIP 3/5] net: bridge: Make the brX interface a member of the bridge
From: Andrew Lunn @ 2017-08-26 20:56 UTC (permalink / raw)
  To: netdev
  Cc: Vivien Didelot, Florian Fainelli, nikolay, jiri, roopa, stephen,
	bridge, Andrew Lunn
In-Reply-To: <1503780970-10312-1-git-send-email-andrew@lunn.ch>

In order to perform IGMP snooping on the brX interface, it has to be
part of the bridge, so that the code snooping on normal bridge ports
keeps track of IGMP joins and leaves.

When the brX interface is opened, add the interface to the bridge.
When the brX interface is closed, remove it from the bridge.

This port does however need some special handling. So add a bridge
port flag, BR_SOFT_INTERFACE, indicating a port is the sort interface
of the bridge.

When the port is added to the bridge, the netdev for this port cannot
be linked to the master device, since it is the master device.
Similarly when removing the port, it cannot be unlinked from the
master device.

With the brX interface now being a member of the bridge, and having
all associated structures, we can process IGMP messages sent by the
interface. This is done by the br_multicast_rcv() function, which
takes the bridge_port structure as a parameter. This cannot be easily
found, so keep track of it in the net_bridge structure.
---
 include/linux/if_bridge.h |  1 +
 net/bridge/br_device.c    | 12 ++++++++++--
 net/bridge/br_if.c        | 37 ++++++++++++++++++++++++-------------
 net/bridge/br_mdb.c       |  2 --
 net/bridge/br_multicast.c |  7 ++++---
 net/bridge/br_private.h   |  1 +
 6 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 3cd18ac0697f..8a03821d1827 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -49,6 +49,7 @@ struct br_ip_list {
 #define BR_MULTICAST_TO_UNICAST	BIT(12)
 #define BR_VLAN_TUNNEL		BIT(13)
 #define BR_BCAST_FLOOD		BIT(14)
+#define BR_SOFT_INTERFACE	BIT(15)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 861ae2a165f4..f27ca62fd4a5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -69,7 +69,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
 			goto out;
 		}
-		if (br_multicast_rcv(br, NULL, skb, vid)) {
+		if (br_multicast_rcv(br, br->local_port, skb, vid)) {
 			kfree_skb(skb);
 			goto out;
 		}
@@ -133,6 +133,14 @@ static void br_dev_uninit(struct net_device *dev)
 static int br_dev_open(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
+	int err;
+
+	err = br_add_if(br, br->dev);
+	if (err)
+		return err;
+
+	br->local_port = list_first_or_null_rcu(&br->port_list,
+						struct net_bridge_port, list);
 
 	netdev_update_features(dev);
 	netif_start_queue(dev);
@@ -161,7 +169,7 @@ static int br_dev_stop(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	return 0;
+	return br_del_if(br, br->dev);
 }
 
 static void br_get_stats64(struct net_device *dev,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..49208e774191 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -284,7 +284,8 @@ static void del_nbp(struct net_bridge_port *p)
 
 	nbp_update_port_count(br);
 
-	netdev_upper_dev_unlink(dev, br->dev);
+	if (!(p->flags & BR_SOFT_INTERFACE))
+		netdev_upper_dev_unlink(dev, br->dev);
 
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 
@@ -362,6 +363,8 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	p->priority = 0x8000 >> BR_PORT_BITS;
 	p->port_no = index;
 	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+	if (br->dev == dev)
+		p->flags |= BR_SOFT_INTERFACE;
 	br_init_port(p);
 	br_set_state(p, BR_STATE_DISABLED);
 	br_stp_port_timer_init(p);
@@ -500,8 +503,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		return -EINVAL;
 
 	/* No bridging of bridges */
-	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
-		return -ELOOP;
+	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
+		/* Unless it is our own soft interface */
+		if (br->dev != dev)
+			return -ELOOP;
+	}
 
 	/* Device is already being bridged */
 	if (br_port_exists(dev))
@@ -540,9 +546,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	dev->priv_flags |= IFF_BRIDGE_PORT;
 
-	err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
-	if (err)
-		goto err5;
+	if (!(p->flags & BR_SOFT_INTERFACE)) {
+		err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
+		if (err)
+			goto err5;
+	}
 
 	err = nbp_switchdev_mark_set(p);
 	if (err)
@@ -563,13 +571,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	else
 		netdev_set_rx_headroom(dev, br_hr);
 
-	if (br_fdb_insert(br, p, dev->dev_addr, 0))
-		netdev_err(dev, "failed insert local address bridge forwarding table\n");
+	if (!(p->flags & BR_SOFT_INTERFACE)) {
+		if (br_fdb_insert(br, p, dev->dev_addr, 0))
+			netdev_err(dev, "failed insert local address bridge forwarding table\n");
 
-	err = nbp_vlan_init(p);
-	if (err) {
-		netdev_err(dev, "failed to initialize vlan filtering on this port\n");
-		goto err7;
+		err = nbp_vlan_init(p);
+		if (err) {
+			netdev_err(dev, "failed to initialize vlan filtering on this port\n");
+			goto err7;
+		}
 	}
 
 	spin_lock_bh(&br->lock);
@@ -597,7 +607,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_fdb_delete_by_port(br, p, 0, 1);
 	nbp_update_port_count(br);
 err6:
-	netdev_upper_dev_unlink(dev, br->dev);
+	if (!(p->flags & BR_SOFT_INTERFACE))
+		netdev_upper_dev_unlink(dev, br->dev);
 err5:
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 	netdev_rx_handler_unregister(dev);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a0b11e7d67d9..47f0d9b4221d 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -117,8 +117,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 				struct br_mdb_entry e;
 
 				port = p->port;
-				if (!port)
-					continue;
 
 				memset(&e, 0, sizeof(e));
 				e.ifindex = port->dev->ifindex;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dae3af1f531a..f1bf9ec15de8 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -915,7 +915,7 @@ static void __br_multicast_send_query(struct net_bridge *br,
 	if (!skb)
 		return;
 
-	if (port) {
+	if (port && !(port->flags & BR_SOFT_INTERFACE)) {
 		skb->dev = port->dev;
 		br_multicast_count(br, port, skb, igmp_type,
 				   BR_MCAST_DIR_TX);
@@ -944,8 +944,9 @@ static void br_multicast_send_query(struct net_bridge *br,
 
 	memset(&br_group.u, 0, sizeof(br_group.u));
 
-	if (port ? (own_query == &port->ip4_own_query) :
-		   (own_query == &br->ip4_own_query)) {
+	if (port && !(port->flags & BR_SOFT_INTERFACE) ?
+	    (own_query == &port->ip4_own_query) :
+	    (own_query == &br->ip4_own_query)) {
 		other_query = &br->ip4_other_query;
 		br_group.proto = htons(ETH_P_IP);
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fd9ee73e0a6d..c4b99a35abb0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -296,6 +296,7 @@ struct net_bridge {
 	spinlock_t			lock;
 	spinlock_t			hash_lock;
 	struct list_head		port_list;
+	struct net_bridge_port		*local_port;
 	struct net_device		*dev;
 	struct pcpu_sw_netstats		__percpu *stats;
 	/* These fields are accessed on each packet */
-- 
2.14.1

^ permalink raw reply related

* [PATCH RFC WIP 4/5] net: dsa: HACK: Handle MDB add/remove for none-switch ports
From: Andrew Lunn @ 2017-08-26 20:56 UTC (permalink / raw)
  To: netdev
  Cc: Vivien Didelot, Florian Fainelli, nikolay, jiri, roopa, stephen,
	bridge, Andrew Lunn
In-Reply-To: <1503780970-10312-1-git-send-email-andrew@lunn.ch>

When there is a mdb added to a port which is not in the switch, we
need the switch to forward traffic for the group to the software
bridge, so it can forward it out the none-switch port.

The current implementation is a hack and will be replaced. Currently
only the bridge soft interface is supported. When there is a
join/leave on the soft interface, switchdev calls are made on the soft
interface device, brX. This does not have a switchdev ops structure
registered, so all lower interfaces of brX get there switchdev
function called. These are switch ports, and do have switchdev ops. By
comparing the original interface to the called interface, we can
determine this is not for a switch port, and add/remove the mdb to the
CPU port.
---
 net/dsa/port.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index d6e07176df3f..d8e4bfefd97d 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -194,8 +194,15 @@ int dsa_port_mdb_add(struct dsa_port *dp,
 		.mdb = mdb,
 	};
 
-	pr_info("dsa_port_mdb_add: %d %d", info.sw_index, info.port);
-
+	if (dp->netdev != mdb->obj.orig_dev) {
+		/* Not a port for this switch, so forward
+		 * multicast out the CPU port to the bridge.
+		 */
+		struct dsa_switch_tree *dst = dp->ds->dst;
+		struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+		info.port = cpu_dp->index;
+		return dsa_port_notify(cpu_dp, DSA_NOTIFIER_MDB_ADD, &info);
+	}
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
 }
 
@@ -208,6 +215,14 @@ int dsa_port_mdb_del(struct dsa_port *dp,
 		.mdb = mdb,
 	};
 
+	if (dp->netdev != mdb->obj.orig_dev) {
+		struct dsa_switch_tree *dst = dp->ds->dst;
+		struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
+
+		info.port = cpu_dp->index;
+		return dsa_port_notify(cpu_dp, DSA_NOTIFIER_MDB_DEL, &info);
+	}
+
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
 }
 
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH v4 4/5] net: stmmac: dwmac-sun8i: choose internal PHY via phy-is-integrated
From: Andrew Lunn @ 2017-08-26 21:20 UTC (permalink / raw)
  To: Corentin Labbe
  Cc: robh+dt, mark.rutland, maxime.ripard, wens, linux,
	peppe.cavallaro, alexandre.torgue, f.fainelli, icenowy, netdev,
	devicetree, linux-arm-kernel, linux-kernel
In-Reply-To: <20170826073311.25612-5-clabbe.montjoie@gmail.com>

Hi Corentin

I think we have now all agreed this is an mdio-mux, plus it is also an
MII mux. We should represent that in device tree. This patchset does
this. However, as it is now, the mux structure in DT is ignored. All
it does is search for the phy-is-integrated flags and goes on that.

I made the comment that the device tree representation cannot be
implemented using an MDIO mux driver, because of driver loading
issues.  However, the core of the MDIO mux code is just a library,
symbols exported as GPL, free for anything to use.

What i think should happen is the mdio-mux is implemented inside the
MAC driver, using the mux-core as a library. The device tree structure
of a mix is then reflected within Linux. The mux switch callback is
implemented within the MAC driver. So it can reset the MAC when the
mux is switched. The 'phy-is-integrated' property is then no longer
needed.

I would suggest a binding something like:

emac: ethernet@1c0b000 {
        compatible = "allwinner,sun8i-h3-emac";
        syscon = <&syscon>;
        reg = <0x01c0b000 0x104>;
        interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
        interrupt-names = "macirq";
        resets = <&ccu RST_BUS_EMAC>;
        reset-names = "stmmaceth";
        clocks = <&ccu CLK_BUS_EMAC>;
        clock-names = "stmmaceth";
        #address-cells = <1>;
        #size-cells = <0>;

        phy-handle = <&int_mii_phy>;
        phy-mode = "mii";
        allwinner,leds-active-low;

        mdio: mdio {
                #address-cells = <1>;
                #size-cells = <0>;
	}

	mdio-mux {
                #address-cells = <1>;
                #size-cells = <0>;

		mdio@0 {
			reg = <0>;
                        #address-cells = <1>;
                        #size-cells = <0>;

                        int_mii_phy: ethernet-phy@1 {
                                reg = <1>;
                                clocks = <&ccu CLK_BUS_EPHY>;
                                resets = <&ccu RST_BUS_EPHY>;
                        };
                };
                ext_mdio: mdio@0 {
                        #address-cells = <1>;
                        #size-cells = <0>;

                        ext_rgmii_phy: ethernet-phy@1 {
                                reg = <1>;
                        };
                };
       };
};

	Andrew

^ permalink raw reply

* Re: [PATCH RFC WIP 0/5] IGMP snooping for local traffic
From: Nikolay Aleksandrov @ 2017-08-26 22:17 UTC (permalink / raw)
  To: Andrew Lunn, netdev
  Cc: Vivien Didelot, Florian Fainelli, jiri, roopa, stephen, bridge
In-Reply-To: <1503780970-10312-1-git-send-email-andrew@lunn.ch>

On 26/08/17 23:56, Andrew Lunn wrote:
> This is a WIP patchset i would like comments on from bridge, switchdev
> and hardware offload people.
> 
> The linux bridge supports IGMP snooping. It will listen to IGMP
> reports on bridge ports and keep track of which groups have been
> joined on an interface. It will then forward multicast based on this
> group membership.
> 
> When the bridge adds or removed groups from an interface, it uses
> switchdev to request the hardware add an mdb to a port, so the
> hardware can perform the selective forwarding between ports.
> 
> What is not covered by the current bridge code, is IGMP joins/leaves
> from the host on the brX interface. No such monitoring is

Hi Andrew,

Have you taken a look at mglist (the boolean, probably needs a rename) ? It is for
exactly that purpose, to track which groups the bridge is interested in.
I assume I'm forgetting or missing something here.

> performed. With a pure software bridge, it is not required. All
> mulitcast frames are passed to the brX interface, and the network

If mglist (again the boolean) is false then they won't be passed up.

> stack filters them, as it does for any interface. However, when
> hardware offload is involved, things change. We should program the
> hardware to only send multcast packets to the host when the host has
> in interest in them.

Granted the boolean mglist might need some changes (esp. with host group leave)
but I think it can be used to program switchdev for host join/leave, can't
we adjust its behaviour instead of introducing this complexity and avoid many
headaches ?

> 
> Thus we need to perform IGMP snooping on the brX interface, just like
> any other interface of the bridge. However, currently the brX
> interface is missing all the needed data structures to do this. There
> is no net_bridge_port structure for the brX interface. This strucuture
> is created when an interface is added to the bridge. But the brX
> interface is not a member of the bridge. So this patchset makes the
> brX interface a first class member of the bridge. When the brX
> interface is opened, the interface is added to the bridge. A
> net_bridge_port is allocated for it, and IGMP snooping is performed as
> usual.

I have actually discussed this idea long time ago with Vlad and it has very nice
upsides (most important one removing br/port checks everywhere) but it blows up
fast with special cases for the bridge and things look very similar. You'll need
to rework the whole bridge and turn every bridge special case into either a port 
generic one or again bridge-specific special case but with a check for the new flag.
I will not point out every bug that comes out of this, but registering the bridge
rx handler to itself is simply wrong on many levels and breaks many setups.

> 
> There are some complexities here. Some assumptions are broken, like
> the master interface of a port interface is the bridge interface. The
> brX interface cannot be its own master. The use of
> netdev_master_upper_dev_get() within the bridge code has been changed
> to reflecit this. The bridge receive handler needs to not process
> frames for the brX interface, etc.
> 
> The interface downward to the hardware is also an issue. The code
> presented here is a hack and needs to change. But that is secondary
> and can be solved once it is agreed how the bridge needs to change to
> support this use case.

Definitely agree with this statement. :-)

> 
> Comment welcome and wanted.
> 
> 	Andrew
> 
> Andrew Lunn (5):
>   net: rtnetlink: Handle bridge port without upper device
>   net: bridge: Skip receive handler on brX interface
>   net: bridge: Make the brX interface a member of the bridge
>   net: dsa: HACK: Handle MDB add/remove for none-switch ports
>   net: dsa: Don't include CPU port when adding MDB to a port
> 
>  include/linux/if_bridge.h |  1 +
>  net/bridge/br_device.c    | 12 ++++++++++--
>  net/bridge/br_if.c        | 37 ++++++++++++++++++++++++-------------
>  net/bridge/br_input.c     |  4 ++++
>  net/bridge/br_mdb.c       |  2 --
>  net/bridge/br_multicast.c |  7 ++++---
>  net/bridge/br_private.h   |  1 +
>  net/core/rtnetlink.c      | 23 +++++++++++++++++++++--
>  net/dsa/port.c            | 19 +++++++++++++++++--
>  net/dsa/switch.c          |  2 +-
>  10 files changed, 83 insertions(+), 25 deletions(-)
> 

^ permalink raw reply

* Re: [PATCH RFC WIP 0/5] IGMP snooping for local traffic
From: Nikolay Aleksandrov @ 2017-08-26 22:40 UTC (permalink / raw)
  To: Andrew Lunn, netdev; +Cc: Florian Fainelli, Vivien Didelot, roopa, bridge, jiri
In-Reply-To: <c3f5050f-7a38-c28c-31d0-df5909259fb1@cumulusnetworks.com>

On 27.08.2017 01:17, Nikolay Aleksandrov wrote:
> On 26/08/17 23:56, Andrew Lunn wrote:
>> This is a WIP patchset i would like comments on from bridge, switchdev
>> and hardware offload people.
>>
>> The linux bridge supports IGMP snooping. It will listen to IGMP
>> reports on bridge ports and keep track of which groups have been
>> joined on an interface. It will then forward multicast based on this
>> group membership.
>>
>> When the bridge adds or removed groups from an interface, it uses
>> switchdev to request the hardware add an mdb to a port, so the
>> hardware can perform the selective forwarding between ports.
>>
>> What is not covered by the current bridge code, is IGMP joins/leaves
>> from the host on the brX interface. No such monitoring is
> 
> Hi Andrew,
> 
> Have you taken a look at mglist (the boolean, probably needs a rename) ? It is for
> exactly that purpose, to track which groups the bridge is interested in.
> I assume I'm forgetting or missing something here.
> 
>> performed. With a pure software bridge, it is not required. All
>> mulitcast frames are passed to the brX interface, and the network
> 
> If mglist (again the boolean) is false then they won't be passed up.
> 
>> stack filters them, as it does for any interface. However, when
>> hardware offload is involved, things change. We should program the
>> hardware to only send multcast packets to the host when the host has
>> in interest in them.
> 
> Granted the boolean mglist might need some changes (esp. with host group leave)
> but I think it can be used to program switchdev for host join/leave, can't
> we adjust its behaviour instead of introducing this complexity and avoid many
> headaches ?
> 
>>
>> Thus we need to perform IGMP snooping on the brX interface, just like
>> any other interface of the bridge. However, currently the brX
>> interface is missing all the needed data structures to do this. There
>> is no net_bridge_port structure for the brX interface. This strucuture
>> is created when an interface is added to the bridge. But the brX
>> interface is not a member of the bridge. So this patchset makes the
>> brX interface a first class member of the bridge. When the brX
>> interface is opened, the interface is added to the bridge. A
>> net_bridge_port is allocated for it, and IGMP snooping is performed as
>> usual.
> 
> I have actually discussed this idea long time ago with Vlad and it has very nice
> upsides (most important one removing br/port checks everywhere) but it blows up
> fast with special cases for the bridge and things look very similar. You'll need
> to rework the whole bridge and turn every bridge special case into either a port 
> generic one or again bridge-specific special case but with a check for the new flag.
> I will not point out every bug that comes out of this, but registering the bridge
> rx handler to itself is simply wrong on many levels and breaks many setups.

This was a digression about making the bridge a proper port of itself
(e.g. port 0, linked and all), it is only tangential to this
implementation as it doesn't link the new port.

> 
>>
>> There are some complexities here. Some assumptions are broken, like
>> the master interface of a port interface is the bridge interface. The
>> brX interface cannot be its own master. The use of
>> netdev_master_upper_dev_get() within the bridge code has been changed
>> to reflecit this. The bridge receive handler needs to not process
>> frames for the brX interface, etc.
>>
>> The interface downward to the hardware is also an issue. The code
>> presented here is a hack and needs to change. But that is secondary
>> and can be solved once it is agreed how the bridge needs to change to
>> support this use case.
> 
> Definitely agree with this statement. :-)
> 
>>
>> Comment welcome and wanted.
>>
>> 	Andrew
>>
>> Andrew Lunn (5):
>>   net: rtnetlink: Handle bridge port without upper device
>>   net: bridge: Skip receive handler on brX interface
>>   net: bridge: Make the brX interface a member of the bridge
>>   net: dsa: HACK: Handle MDB add/remove for none-switch ports
>>   net: dsa: Don't include CPU port when adding MDB to a port
>>
>>  include/linux/if_bridge.h |  1 +
>>  net/bridge/br_device.c    | 12 ++++++++++--
>>  net/bridge/br_if.c        | 37 ++++++++++++++++++++++++-------------
>>  net/bridge/br_input.c     |  4 ++++
>>  net/bridge/br_mdb.c       |  2 --
>>  net/bridge/br_multicast.c |  7 ++++---
>>  net/bridge/br_private.h   |  1 +
>>  net/core/rtnetlink.c      | 23 +++++++++++++++++++++--
>>  net/dsa/port.c            | 19 +++++++++++++++++--
>>  net/dsa/switch.c          |  2 +-
>>  10 files changed, 83 insertions(+), 25 deletions(-)
>>
> 

^ permalink raw reply

* Re: [PATCH RFC WIP 0/5] IGMP snooping for local traffic
From: Andrew Lunn @ 2017-08-26 23:02 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: netdev, Vivien Didelot, Florian Fainelli, jiri, roopa, stephen,
	bridge
In-Reply-To: <c3f5050f-7a38-c28c-31d0-df5909259fb1@cumulusnetworks.com>

> Hi Andrew,
> 
> Have you taken a look at mglist (the boolean, probably needs a rename) ? It is for
> exactly that purpose, to track which groups the bridge is interested in.
> I assume I'm forgetting or missing something here.
> 
> > performed. With a pure software bridge, it is not required. All
> > mulitcast frames are passed to the brX interface, and the network
> 
> If mglist (again the boolean) is false then they won't be passed up.
> 
> > stack filters them, as it does for any interface. However, when
> > hardware offload is involved, things change. We should program the
> > hardware to only send multcast packets to the host when the host has
> > in interest in them.
> 
> Granted the boolean mglist might need some changes (esp. with host group leave)
> but I think it can be used to program switchdev for host join/leave, can't
> we adjust its behaviour instead of introducing this complexity and avoid many
> headaches ?

I would like to avoid this complexity as well. I will take a look at
mglist. Thanks for the hint.

	Andrew

^ permalink raw reply

* [PATCH net] bridge: check for null fdb->dst before notifying switchdev drivers
From: Roopa Prabhu @ 2017-08-27  4:13 UTC (permalink / raw)
  To: davem; +Cc: netdev, arkadis

From: Roopa Prabhu <roopa@cumulusnetworks.com>

current switchdev drivers dont seem to support offloading fdb
entries pointing to the bridge device which have fdb->dst
not set to any port. This patch adds a NULL fdb->dst check in
the switchdev notifier code.

This patch fixes the below NULL ptr dereference:
$bridge fdb add 00:02:00:00:00:33 dev br0 self

[   69.953374] BUG: unable to handle kernel NULL pointer dereference at
0000000000000008
[   69.954044] IP: br_switchdev_fdb_notify+0x29/0x80
[   69.954044] PGD 66527067
[   69.954044] P4D 66527067
[   69.954044] PUD 7899c067
[   69.954044] PMD 0
[   69.954044]
[   69.954044] Oops: 0000 [#1] SMP
[   69.954044] Modules linked in:
[   69.954044] CPU: 1 PID: 3074 Comm: bridge Not tainted 4.13.0-rc6+ #1
[   69.954044] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org
04/01/2014
[   69.954044] task: ffff88007b827140 task.stack: ffffc90001564000
[   69.954044] RIP: 0010:br_switchdev_fdb_notify+0x29/0x80
[   69.954044] RSP: 0018:ffffc90001567918 EFLAGS: 00010246
[   69.954044] RAX: 0000000000000000 RBX: ffff8800795e0880 RCX:
00000000000000c0
[   69.954044] RDX: ffffc90001567920 RSI: 000000000000001c RDI:
ffff8800795d0600
[   69.954044] RBP: ffffc90001567938 R08: ffff8800795d0600 R09:
0000000000000000
[   69.954044] R10: ffffc90001567a88 R11: ffff88007b849400 R12:
ffff8800795e0880
[   69.954044] R13: ffff8800795d0600 R14: ffffffff81ef8880 R15:
000000000000001c
[   69.954044] FS:  00007f93d3085700(0000) GS:ffff88007fd00000(0000)
knlGS:0000000000000000
[   69.954044] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   69.954044] CR2: 0000000000000008 CR3: 0000000066551000 CR4:
00000000000006e0
[   69.954044] Call Trace:
[   69.954044]  fdb_notify+0x3f/0xf0
[   69.954044]  __br_fdb_add.isra.12+0x1a7/0x370
[   69.954044]  br_fdb_add+0x178/0x280
[   69.954044]  rtnl_fdb_add+0x10a/0x200
[   69.954044]  rtnetlink_rcv_msg+0x1b4/0x240
[   69.954044]  ? skb_free_head+0x21/0x40
[   69.954044]  ? rtnl_calcit.isra.18+0xf0/0xf0
[   69.954044]  netlink_rcv_skb+0xed/0x120
[   69.954044]  rtnetlink_rcv+0x15/0x20
[   69.954044]  netlink_unicast+0x180/0x200
[   69.954044]  netlink_sendmsg+0x291/0x370
[   69.954044]  ___sys_sendmsg+0x180/0x2e0
[   69.954044]  ? filemap_map_pages+0x2db/0x370
[   69.954044]  ? do_wp_page+0x11d/0x420
[   69.954044]  ? __handle_mm_fault+0x794/0xd80
[   69.954044]  ? vma_link+0xcb/0xd0
[   69.954044]  __sys_sendmsg+0x4c/0x90
[   69.954044]  SyS_sendmsg+0x12/0x20
[   69.954044]  do_syscall_64+0x63/0xe0
[   69.954044]  entry_SYSCALL64_slow_path+0x25/0x25
[   69.954044] RIP: 0033:0x7f93d2bad690
[   69.954044] RSP: 002b:00007ffc7217a638 EFLAGS: 00000246 ORIG_RAX:
000000000000002e
[   69.954044] RAX: ffffffffffffffda RBX: 00007ffc72182eac RCX:
00007f93d2bad690
[   69.954044] RDX: 0000000000000000 RSI: 00007ffc7217a670 RDI:
0000000000000003
[   69.954044] RBP: 0000000059a1f7f8 R08: 0000000000000006 R09:
000000000000000a
[   69.954044] R10: 00007ffc7217a400 R11: 0000000000000246 R12:
00007ffc7217a670
[   69.954044] R13: 00007ffc72182a98 R14: 00000000006114c0 R15:
00007ffc72182aa0
[   69.954044] Code: 1f 00 66 66 66 66 90 55 48 89 e5 48 83 ec 20 f6 47
20 04 74 0a 83 fe 1c 74 09 83 fe 1d 74 2c c9 66 90 c3 48 8b 47 10 48 8d
55 e8 <48> 8b 70 08 0f b7 47 1e 48 83 c7 18 48 89 7d f0 bf 03 00 00 00
[   69.954044] RIP: br_switchdev_fdb_notify+0x29/0x80 RSP:
ffffc90001567918
[   69.954044] CR2: 0000000000000008
[   69.954044] ---[ end trace 03e9eec4a82c238b ]---

Fixes: 6b26b51b1d13 ("net: bridge: Add support for notifying devices about FDB add/del")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 net/bridge/br_switchdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 181a44d..f6b1c7d 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -115,7 +115,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
-	if (!fdb->added_by_user)
+	if (!fdb->added_by_user || !fdb->dst)
 		return;
 
 	switch (type) {
-- 
2.1.4

^ permalink raw reply related

* [PATCH] igb: check memory allocation failure
From: Christophe JAILLET @ 2017-08-27  6:39 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: intel-wired-lan, netdev, linux-kernel, kernel-janitors,
	Christophe JAILLET

Check memory allocation failures and return -ENOMEM in such cases, as
already done for other memory allocations in this function.

This avoids NULL pointers dereference.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fd4a46b03cc8..837d9b46a390 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3162,6 +3162,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
 	/* Setup and initialize a copy of the hw vlan table array */
 	adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
 				       GFP_ATOMIC);
+	if (!adapter->shadow_vfta)
+		return -ENOMEM;
 
 	/* This call may decrease the number of queues */
 	if (igb_init_interrupt_scheme(adapter, true)) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH] be2net: Fix some u16 fields appropriately
From: Haishuang Yan @ 2017-08-27  7:24 UTC (permalink / raw)
  To: Sathya Perla, jit Khaparde, Sriharsha Basavapatna, Somnath Kotur
  Cc: netdev, linux-kernel, Haishuang Yan

In be_tx_compl_process, frag_index declared as u32, so it's better to
declare last_index as u32 also.

CC: Ajit Khaparde <ajit.khaparde@broadcom.com>
Fixes: b0fd2eb28bd4 ("be2net: Declare some u16 fields as u32 to improve
performance")
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
---
 drivers/net/ethernet/emulex/benet/be.h      | 2 +-
 drivers/net/ethernet/emulex/benet/be_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 674cf9d..2ba4d61 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -255,7 +255,7 @@ struct be_tx_stats {
 /* Structure to hold some data of interest obtained from a TX CQE */
 struct be_tx_compl_info {
 	u8 status;		/* Completion status */
-	u16 end_index;		/* Completed TXQ Index */
+	u32 end_index;		/* Completed TXQ Index */
 };
 
 struct be_tx_obj {
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 319eee3..3645344 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2606,7 +2606,7 @@ static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
 }
 
 static u16 be_tx_compl_process(struct be_adapter *adapter,
-			       struct be_tx_obj *txo, u16 last_index)
+			       struct be_tx_obj *txo, u32 last_index)
 {
 	struct sk_buff **sent_skbs = txo->sent_skb_list;
 	struct be_queue_info *txq = &txo->q;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH] pktgen: add a new sample script for 40G and above link testing
From: Tariq Toukan @ 2017-08-27  8:25 UTC (permalink / raw)
  To: Robert Hoo, davem, tariqt, brouer, kyle.leet; +Cc: netdev, robert.hu
In-Reply-To: <1503653196-64418-1-git-send-email-robert.hu@linux.intel.com>



On 25/08/2017 12:26 PM, Robert Hoo wrote:
> (Sorry for yesterday's wrong sending, I finally fixed my MTA and git
> send-email settings.)
> 
> It's hard to benchmark 40G+ network bandwidth using ordinary
> tools like iperf, netperf (see reference 1).
> Pktgen, packet generator from Kernel sapce, shall be a candidate.
> I then tried with pktgen multiqueue sample scripts, but still
> cannot reach line rate.

Try samples 03 and 04.

> I then derived this NUMA awared irq affinity sample script from
> multi-queue sample one, successfully benchmarked 40G link. I think this can
> also be useful for 100G reference, though I haven't got device to test yet.
> 
> This script simply does:
> Detect $DEV's NUMA node belonging.
> Bind each thread (processor from that NUMA node) with each $DEV queue's
> irq affinity, 1:1 mapping.
> How many '-t' threads input determines how many queues will be
> utilized.

I agree this is an essential capability.
This was the main reason I added support for the -f argument.
Using it, I could choose cores of local NUMA, especially for single 
thread, or when cores of the NUMA are sequential.

> 
> Tested with Intel XL710 NIC with Cisco 3172 switch.
> 
> It would be even slightly better if the irqbalance service is turned
> off outside.
> 
> Referrences:
> https://people.netfilter.org/hawk/presentations/LCA2015/net_stack_challenges_100G_LCA2015.pdf
> http://www.intel.cn/content/dam/www/public/us/en/documents/reference-guides/xl710-x710-performance-tuning-linux-guide.pdf
> 
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> ---

Regards,
Tariq Toukan

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH] e1000e: apply burst mode settings only on default
From: Neftin, Sasha @ 2017-08-27  8:30 UTC (permalink / raw)
  To: Willem de Bruijn, "jeffrey.t.kirsher
  Cc: netdev, Willem de Bruijn, intel-wired-lan
In-Reply-To: <20170825150626.2843-1-willemdebruijn.kernel@gmail.com>

On 8/25/2017 18:06, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Devices that support FLAG2_DMA_BURST have different default values
> for RDTR and RADV. Apply burst mode default settings only when no
> explicit value was passed at module load.
>
> The RDTR default is zero. If the module is loaded for low latency
> operation with RxIntDelay=0, do not override this value with a burst
> default of 32.
>
> Move the decision to apply burst values earlier, where explicitly
> initialized module variables can be distinguished from defaults.
>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>   drivers/net/ethernet/intel/e1000e/e1000.h  |  4 ----
>   drivers/net/ethernet/intel/e1000e/netdev.c |  8 --------
>   drivers/net/ethernet/intel/e1000e/param.c  | 16 +++++++++++++++-
>   3 files changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
> index 98e68888abb1..2311b31bdcac 100644
> --- a/drivers/net/ethernet/intel/e1000e/e1000.h
> +++ b/drivers/net/ethernet/intel/e1000e/e1000.h
> @@ -94,10 +94,6 @@ struct e1000_info;
>    */
>   #define E1000_CHECK_RESET_COUNT		25
>   
> -#define DEFAULT_RDTR			0
> -#define DEFAULT_RADV			8
> -#define BURST_RDTR			0x20
> -#define BURST_RADV			0x20
>   #define PCICFG_DESC_RING_STATUS		0xe4
>   #define FLUSH_DESC_REQUIRED		0x100
>   
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 327dfe5bedc0..47b89aac7969 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -3223,14 +3223,6 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
>   		 */
>   		ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
>   		ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
> -
> -		/* override the delay timers for enabling bursting, only if
> -		 * the value was not set by the user via module options
> -		 */
> -		if (adapter->rx_int_delay == DEFAULT_RDTR)
> -			adapter->rx_int_delay = BURST_RDTR;
> -		if (adapter->rx_abs_int_delay == DEFAULT_RADV)
> -			adapter->rx_abs_int_delay = BURST_RADV;
>   	}
>   
>   	/* set the Receive Delay Timer Register */
> diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
> index 6d8c39abee16..bb696c98f9b0 100644
> --- a/drivers/net/ethernet/intel/e1000e/param.c
> +++ b/drivers/net/ethernet/intel/e1000e/param.c
> @@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
>   /* Receive Interrupt Delay in units of 1.024 microseconds
>    * hardware will likely hang if you set this to anything but zero.
>    *
> + * Burst variant is used as default if device has FLAG2_DMA_BURST.
> + *
>    * Valid Range: 0-65535
>    */
>   E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
> +#define DEFAULT_RDTR			0
> +#define BURST_RDTR			0x20
>   #define MAX_RXDELAY 0xFFFF
>   #define MIN_RXDELAY 0
>   
>   /* Receive Absolute Interrupt Delay in units of 1.024 microseconds
> + *
> + * Burst variant is used as default if device has FLAG2_DMA_BURST.
>    *
>    * Valid Range: 0-65535
>    */
>   E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
> +#define DEFAULT_RADV			8
> +#define BURST_RADV			0x20
>   #define MAX_RXABSDELAY 0xFFFF
>   #define MIN_RXABSDELAY 0
>   
> @@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter *adapter)
>   					 .max = MAX_RXDELAY } }
>   		};
>   
> +		if (adapter->flags2 & FLAG2_DMA_BURST)
> +			opt.def = BURST_RDTR;
> +
>   		if (num_RxIntDelay > bd) {
>   			adapter->rx_int_delay = RxIntDelay[bd];
>   			e1000_validate_option(&adapter->rx_int_delay, &opt,
> @@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter *adapter)
>   	}
>   	/* Receive Absolute Interrupt Delay */
>   	{
> -		static const struct e1000_option opt = {
> +		static struct e1000_option opt = {
>   			.type = range_option,
>   			.name = "Receive Absolute Interrupt Delay",
>   			.err  = "using default of "
> @@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter *adapter)
>   					 .max = MAX_RXABSDELAY } }
>   		};
>   
> +		if (adapter->flags2 & FLAG2_DMA_BURST)
> +			opt.def = BURST_RADV;
> +
>   		if (num_RxAbsIntDelay > bd) {
>   			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
>   			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,

This patch looks good for me, but I would like hear second opinion.

^ permalink raw reply

* Re: [patch net-next 11/12] mlxsw: spectrum_dpipe: Add support for IPv4 host table dump
From: Arkadi Sharshevsky @ 2017-08-27  8:31 UTC (permalink / raw)
  To: David Ahern, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw
In-Reply-To: <a4335a01-a98b-573c-9747-b4039188340c@gmail.com>



On 08/25/2017 10:51 PM, David Ahern wrote:
> On 8/25/17 2:26 AM, Arkadi Sharshevsky wrote:
>>
>>
>> On 08/24/2017 10:26 PM, David Ahern wrote:
>>> On 8/23/17 11:40 PM, Jiri Pirko wrote:
>>>> +static int
>>>> +mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
>>>> +				      struct devlink_dpipe_entry *entry,
>>>> +				      bool counters_enabled,
>>>> +				      struct devlink_dpipe_dump_ctx *dump_ctx,
>>>> +				      int type)
>>>> +{
>>>> +	int rif_neigh_count = 0;
>>>> +	int rif_neigh_skip = 0;
>>>> +	int neigh_count = 0;
>>>> +	int rif_count;
>>>> +	int i, j;
>>>> +	int err;
>>>> +
>>>> +	rtnl_lock();
>>>
>>> Why does a h/w driver dumping its tables need the rtnl lock?
>>>
>>
>> This table represents the hw IPv4 arp table, and the
>> driver depends on rtnl to be held.
>>
> 
> Meaning mlxsw does not have its own locks protecting data structures --
> e.g., rif adds and deletes, so it is relying on rtnl?
> 
> Also, this dpipe capability seems to be just dumping data structures
> maintained by the driver. ie., you can compare the mlxsw view of
> networking state to IPv4 and IPv6 level tables. Any plans to offer a
> command that reads data from the h/w and passes that back to the user?
> i.e, a command to compare kernel tables to h/w state?
> 

So this infra should provide several things-

1) Reveal the interactions between various hardware tables
2) Counters for this tables
3) Debugabillity

The first two can be achieved right now. Regarding debugabillity, which
is a bit vague, the current assumption is that the drivers internal data
structures are synced with hardware (which is no always true), and maybe
are not synced with the kernel, so this can be achieved right now by
dumping the internal state of the driver. Furthermore, the counters are
dumped from the hardware and give the user additional indication.

I completely agree that the hardware should be dumped in order to
validate the internal data structures are really synced with HW. This
could be usable for observing data corruptions inside the ASIC and
various complex bugs.

In order to address that I though about maybe add a flag called
"validate_hw" so that during the dump the driver<-->hw state could be
validated.

What do you think about it?

Thanks,
Arkadi

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH] e1000e: apply burst mode settings only on default
From: Neftin, Sasha @ 2017-08-27  8:32 UTC (permalink / raw)
  To: Willem de Bruijn, "jeffrey.t.kirsher
  Cc: netdev, Willem de Bruijn, intel-wired-lan
In-Reply-To: <291b863f-552e-2f3b-f658-e812d0848949@intel.com>

On 8/27/2017 11:30, Neftin, Sasha wrote:
> On 8/25/2017 18:06, Willem de Bruijn wrote:
>> From: Willem de Bruijn <willemb@google.com>
>>
>> Devices that support FLAG2_DMA_BURST have different default values
>> for RDTR and RADV. Apply burst mode default settings only when no
>> explicit value was passed at module load.
>>
>> The RDTR default is zero. If the module is loaded for low latency
>> operation with RxIntDelay=0, do not override this value with a burst
>> default of 32.
>>
>> Move the decision to apply burst values earlier, where explicitly
>> initialized module variables can be distinguished from defaults.
>>
>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>> ---
>>   drivers/net/ethernet/intel/e1000e/e1000.h  |  4 ----
>>   drivers/net/ethernet/intel/e1000e/netdev.c |  8 --------
>>   drivers/net/ethernet/intel/e1000e/param.c  | 16 +++++++++++++++-
>>   3 files changed, 15 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
>> b/drivers/net/ethernet/intel/e1000e/e1000.h
>> index 98e68888abb1..2311b31bdcac 100644
>> --- a/drivers/net/ethernet/intel/e1000e/e1000.h
>> +++ b/drivers/net/ethernet/intel/e1000e/e1000.h
>> @@ -94,10 +94,6 @@ struct e1000_info;
>>    */
>>   #define E1000_CHECK_RESET_COUNT        25
>>   -#define DEFAULT_RDTR            0
>> -#define DEFAULT_RADV            8
>> -#define BURST_RDTR            0x20
>> -#define BURST_RADV            0x20
>>   #define PCICFG_DESC_RING_STATUS        0xe4
>>   #define FLUSH_DESC_REQUIRED        0x100
>>   diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
>> b/drivers/net/ethernet/intel/e1000e/netdev.c
>> index 327dfe5bedc0..47b89aac7969 100644
>> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
>> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
>> @@ -3223,14 +3223,6 @@ static void e1000_configure_rx(struct 
>> e1000_adapter *adapter)
>>            */
>>           ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
>>           ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
>> -
>> -        /* override the delay timers for enabling bursting, only if
>> -         * the value was not set by the user via module options
>> -         */
>> -        if (adapter->rx_int_delay == DEFAULT_RDTR)
>> -            adapter->rx_int_delay = BURST_RDTR;
>> -        if (adapter->rx_abs_int_delay == DEFAULT_RADV)
>> -            adapter->rx_abs_int_delay = BURST_RADV;
>>       }
>>         /* set the Receive Delay Timer Register */
>> diff --git a/drivers/net/ethernet/intel/e1000e/param.c 
>> b/drivers/net/ethernet/intel/e1000e/param.c
>> index 6d8c39abee16..bb696c98f9b0 100644
>> --- a/drivers/net/ethernet/intel/e1000e/param.c
>> +++ b/drivers/net/ethernet/intel/e1000e/param.c
>> @@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute 
>> Interrupt Delay");
>>   /* Receive Interrupt Delay in units of 1.024 microseconds
>>    * hardware will likely hang if you set this to anything but zero.
>>    *
>> + * Burst variant is used as default if device has FLAG2_DMA_BURST.
>> + *
>>    * Valid Range: 0-65535
>>    */
>>   E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
>> +#define DEFAULT_RDTR            0
>> +#define BURST_RDTR            0x20
>>   #define MAX_RXDELAY 0xFFFF
>>   #define MIN_RXDELAY 0
>>     /* Receive Absolute Interrupt Delay in units of 1.024 microseconds
>> + *
>> + * Burst variant is used as default if device has FLAG2_DMA_BURST.
>>    *
>>    * Valid Range: 0-65535
>>    */
>>   E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
>> +#define DEFAULT_RADV            8
>> +#define BURST_RADV            0x20
>>   #define MAX_RXABSDELAY 0xFFFF
>>   #define MIN_RXABSDELAY 0
>>   @@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter 
>> *adapter)
>>                        .max = MAX_RXDELAY } }
>>           };
>>   +        if (adapter->flags2 & FLAG2_DMA_BURST)
>> +            opt.def = BURST_RDTR;
>> +
>>           if (num_RxIntDelay > bd) {
>>               adapter->rx_int_delay = RxIntDelay[bd];
>> e1000_validate_option(&adapter->rx_int_delay, &opt,
>> @@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter 
>> *adapter)
>>       }
>>       /* Receive Absolute Interrupt Delay */
>>       {
>> -        static const struct e1000_option opt = {
>> +        static struct e1000_option opt = {
>>               .type = range_option,
>>               .name = "Receive Absolute Interrupt Delay",
>>               .err  = "using default of "
>> @@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter 
>> *adapter)
>>                        .max = MAX_RXABSDELAY } }
>>           };
>>   +        if (adapter->flags2 & FLAG2_DMA_BURST)
>> +            opt.def = BURST_RADV;
>> +
>>           if (num_RxAbsIntDelay > bd) {
>>               adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
>> e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
>
> This patch looks good for me, but I would like hear second opinion.
>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH] e1000e: apply burst mode settings only on default
From: Neftin, Sasha @ 2017-08-27  8:34 UTC (permalink / raw)
  To: Willem de Bruijn, jeffrey.t.kirsher, alexander.h.duyck,
	raanan.avargil, dima.ruinskiy
  Cc: netdev, Willem de Bruijn, intel-wired-lan
In-Reply-To: <e589e146-3536-9ef3-486c-f5d115eb83cf@intel.com>

On 8/27/2017 11:32, Neftin, Sasha wrote:
> On 8/27/2017 11:30, Neftin, Sasha wrote:
>> On 8/25/2017 18:06, Willem de Bruijn wrote:
>>> From: Willem de Bruijn <willemb@google.com>
>>>
>>> Devices that support FLAG2_DMA_BURST have different default values
>>> for RDTR and RADV. Apply burst mode default settings only when no
>>> explicit value was passed at module load.
>>>
>>> The RDTR default is zero. If the module is loaded for low latency
>>> operation with RxIntDelay=0, do not override this value with a burst
>>> default of 32.
>>>
>>> Move the decision to apply burst values earlier, where explicitly
>>> initialized module variables can be distinguished from defaults.
>>>
>>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>>> ---
>>>   drivers/net/ethernet/intel/e1000e/e1000.h  |  4 ----
>>>   drivers/net/ethernet/intel/e1000e/netdev.c |  8 --------
>>>   drivers/net/ethernet/intel/e1000e/param.c  | 16 +++++++++++++++-
>>>   3 files changed, 15 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h 
>>> b/drivers/net/ethernet/intel/e1000e/e1000.h
>>> index 98e68888abb1..2311b31bdcac 100644
>>> --- a/drivers/net/ethernet/intel/e1000e/e1000.h
>>> +++ b/drivers/net/ethernet/intel/e1000e/e1000.h
>>> @@ -94,10 +94,6 @@ struct e1000_info;
>>>    */
>>>   #define E1000_CHECK_RESET_COUNT        25
>>>   -#define DEFAULT_RDTR            0
>>> -#define DEFAULT_RADV            8
>>> -#define BURST_RDTR            0x20
>>> -#define BURST_RADV            0x20
>>>   #define PCICFG_DESC_RING_STATUS        0xe4
>>>   #define FLUSH_DESC_REQUIRED        0x100
>>>   diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
>>> b/drivers/net/ethernet/intel/e1000e/netdev.c
>>> index 327dfe5bedc0..47b89aac7969 100644
>>> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
>>> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
>>> @@ -3223,14 +3223,6 @@ static void e1000_configure_rx(struct 
>>> e1000_adapter *adapter)
>>>            */
>>>           ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
>>>           ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
>>> -
>>> -        /* override the delay timers for enabling bursting, only if
>>> -         * the value was not set by the user via module options
>>> -         */
>>> -        if (adapter->rx_int_delay == DEFAULT_RDTR)
>>> -            adapter->rx_int_delay = BURST_RDTR;
>>> -        if (adapter->rx_abs_int_delay == DEFAULT_RADV)
>>> -            adapter->rx_abs_int_delay = BURST_RADV;
>>>       }
>>>         /* set the Receive Delay Timer Register */
>>> diff --git a/drivers/net/ethernet/intel/e1000e/param.c 
>>> b/drivers/net/ethernet/intel/e1000e/param.c
>>> index 6d8c39abee16..bb696c98f9b0 100644
>>> --- a/drivers/net/ethernet/intel/e1000e/param.c
>>> +++ b/drivers/net/ethernet/intel/e1000e/param.c
>>> @@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute 
>>> Interrupt Delay");
>>>   /* Receive Interrupt Delay in units of 1.024 microseconds
>>>    * hardware will likely hang if you set this to anything but zero.
>>>    *
>>> + * Burst variant is used as default if device has FLAG2_DMA_BURST.
>>> + *
>>>    * Valid Range: 0-65535
>>>    */
>>>   E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
>>> +#define DEFAULT_RDTR            0
>>> +#define BURST_RDTR            0x20
>>>   #define MAX_RXDELAY 0xFFFF
>>>   #define MIN_RXDELAY 0
>>>     /* Receive Absolute Interrupt Delay in units of 1.024 microseconds
>>> + *
>>> + * Burst variant is used as default if device has FLAG2_DMA_BURST.
>>>    *
>>>    * Valid Range: 0-65535
>>>    */
>>>   E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
>>> +#define DEFAULT_RADV            8
>>> +#define BURST_RADV            0x20
>>>   #define MAX_RXABSDELAY 0xFFFF
>>>   #define MIN_RXABSDELAY 0
>>>   @@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter 
>>> *adapter)
>>>                        .max = MAX_RXDELAY } }
>>>           };
>>>   +        if (adapter->flags2 & FLAG2_DMA_BURST)
>>> +            opt.def = BURST_RDTR;
>>> +
>>>           if (num_RxIntDelay > bd) {
>>>               adapter->rx_int_delay = RxIntDelay[bd];
>>> e1000_validate_option(&adapter->rx_int_delay, &opt,
>>> @@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter 
>>> *adapter)
>>>       }
>>>       /* Receive Absolute Interrupt Delay */
>>>       {
>>> -        static const struct e1000_option opt = {
>>> +        static struct e1000_option opt = {
>>>               .type = range_option,
>>>               .name = "Receive Absolute Interrupt Delay",
>>>               .err  = "using default of "
>>> @@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter 
>>> *adapter)
>>>                        .max = MAX_RXABSDELAY } }
>>>           };
>>>   +        if (adapter->flags2 & FLAG2_DMA_BURST)
>>> +            opt.def = BURST_RADV;
>>> +
>>>           if (num_RxAbsIntDelay > bd) {
>>>               adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
>>> e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
>>
>> This patch looks good for me, but I would like hear second opinion.
>>
>> _______________________________________________
>> Intel-wired-lan mailing list
>> Intel-wired-lan@osuosl.org
>> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
>
>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan

^ permalink raw reply

* Re: [PATCH net] bridge: check for null fdb->dst before notifying switchdev drivers
From: Arkadi Sharshevsky @ 2017-08-27  8:46 UTC (permalink / raw)
  To: Roopa Prabhu, davem; +Cc: netdev
In-Reply-To: <1503807228-16281-1-git-send-email-roopa@cumulusnetworks.com>



On 08/27/2017 07:13 AM, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
> 
> current switchdev drivers dont seem to support offloading fdb
> entries pointing to the bridge device which have fdb->dst
> not set to any port. This patch adds a NULL fdb->dst check in
> the switchdev notifier code.
> 
> This patch fixes the below NULL ptr dereference:
> $bridge fdb add 00:02:00:00:00:33 dev br0 self
> 
> [   69.953374] BUG: unable to handle kernel NULL pointer dereference at
> 0000000000000008
> [   69.954044] IP: br_switchdev_fdb_notify+0x29/0x80
> [   69.954044] PGD 66527067
> [   69.954044] P4D 66527067
> [   69.954044] PUD 7899c067
> [   69.954044] PMD 0
> [   69.954044]
> [   69.954044] Oops: 0000 [#1] SMP
> [   69.954044] Modules linked in:
> [   69.954044] CPU: 1 PID: 3074 Comm: bridge Not tainted 4.13.0-rc6+ #1
> [   69.954044] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org
> 04/01/2014
> [   69.954044] task: ffff88007b827140 task.stack: ffffc90001564000
> [   69.954044] RIP: 0010:br_switchdev_fdb_notify+0x29/0x80
> [   69.954044] RSP: 0018:ffffc90001567918 EFLAGS: 00010246
> [   69.954044] RAX: 0000000000000000 RBX: ffff8800795e0880 RCX:
> 00000000000000c0
> [   69.954044] RDX: ffffc90001567920 RSI: 000000000000001c RDI:
> ffff8800795d0600
> [   69.954044] RBP: ffffc90001567938 R08: ffff8800795d0600 R09:
> 0000000000000000
> [   69.954044] R10: ffffc90001567a88 R11: ffff88007b849400 R12:
> ffff8800795e0880
> [   69.954044] R13: ffff8800795d0600 R14: ffffffff81ef8880 R15:
> 000000000000001c
> [   69.954044] FS:  00007f93d3085700(0000) GS:ffff88007fd00000(0000)
> knlGS:0000000000000000
> [   69.954044] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   69.954044] CR2: 0000000000000008 CR3: 0000000066551000 CR4:
> 00000000000006e0
> [   69.954044] Call Trace:
> [   69.954044]  fdb_notify+0x3f/0xf0
> [   69.954044]  __br_fdb_add.isra.12+0x1a7/0x370
> [   69.954044]  br_fdb_add+0x178/0x280
> [   69.954044]  rtnl_fdb_add+0x10a/0x200
> [   69.954044]  rtnetlink_rcv_msg+0x1b4/0x240
> [   69.954044]  ? skb_free_head+0x21/0x40
> [   69.954044]  ? rtnl_calcit.isra.18+0xf0/0xf0
> [   69.954044]  netlink_rcv_skb+0xed/0x120
> [   69.954044]  rtnetlink_rcv+0x15/0x20
> [   69.954044]  netlink_unicast+0x180/0x200
> [   69.954044]  netlink_sendmsg+0x291/0x370
> [   69.954044]  ___sys_sendmsg+0x180/0x2e0
> [   69.954044]  ? filemap_map_pages+0x2db/0x370
> [   69.954044]  ? do_wp_page+0x11d/0x420
> [   69.954044]  ? __handle_mm_fault+0x794/0xd80
> [   69.954044]  ? vma_link+0xcb/0xd0
> [   69.954044]  __sys_sendmsg+0x4c/0x90
> [   69.954044]  SyS_sendmsg+0x12/0x20
> [   69.954044]  do_syscall_64+0x63/0xe0
> [   69.954044]  entry_SYSCALL64_slow_path+0x25/0x25
> [   69.954044] RIP: 0033:0x7f93d2bad690
> [   69.954044] RSP: 002b:00007ffc7217a638 EFLAGS: 00000246 ORIG_RAX:
> 000000000000002e
> [   69.954044] RAX: ffffffffffffffda RBX: 00007ffc72182eac RCX:
> 00007f93d2bad690
> [   69.954044] RDX: 0000000000000000 RSI: 00007ffc7217a670 RDI:
> 0000000000000003
> [   69.954044] RBP: 0000000059a1f7f8 R08: 0000000000000006 R09:
> 000000000000000a
> [   69.954044] R10: 00007ffc7217a400 R11: 0000000000000246 R12:
> 00007ffc7217a670
> [   69.954044] R13: 00007ffc72182a98 R14: 00000000006114c0 R15:
> 00007ffc72182aa0
> [   69.954044] Code: 1f 00 66 66 66 66 90 55 48 89 e5 48 83 ec 20 f6 47
> 20 04 74 0a 83 fe 1c 74 09 83 fe 1d 74 2c c9 66 90 c3 48 8b 47 10 48 8d
> 55 e8 <48> 8b 70 08 0f b7 47 1e 48 83 c7 18 48 89 7d f0 bf 03 00 00 00
> [   69.954044] RIP: br_switchdev_fdb_notify+0x29/0x80 RSP:
> ffffc90001567918
> [   69.954044] CR2: 0000000000000008
> [   69.954044] ---[ end trace 03e9eec4a82c238b ]---
> 
> Fixes: 6b26b51b1d13 ("net: bridge: Add support for notifying devices about FDB add/del")
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>  net/bridge/br_switchdev.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
> index 181a44d..f6b1c7d 100644
> --- a/net/bridge/br_switchdev.c
> +++ b/net/bridge/br_switchdev.c
> @@ -115,7 +115,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
>  void
>  br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
>  {
> -	if (!fdb->added_by_user)
> +	if (!fdb->added_by_user || !fdb->dst)
>  		return;
>  
>  	switch (type) {
> 

Thanks, missed that.
Arkadi

^ permalink raw reply

* (unknown), 
From: agar2000 @ 2017-08-27 10:55 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: MAIL_34929959_netdev.zip --]
[-- Type: application/zip, Size: 72397 bytes --]

^ permalink raw reply

* [PATCH net-next 3/4] net/core: Add violation counters to VF statisctics
From: Saeed Mahameed @ 2017-08-27 11:06 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Eugenia Emantayev, Saeed Mahameed
In-Reply-To: <20170827110618.20599-1-saeedm@mellanox.com>

From: Eugenia Emantayev <eugenia@mellanox.com>

Add receive and transmit violation counters to be
displayed in iproute2 VF statistics.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/if_link.h      |  2 ++
 include/uapi/linux/if_link.h |  2 ++
 net/core/rtnetlink.c         | 10 +++++++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index da70af27e42e..ebf3448acb5b 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -12,6 +12,8 @@ struct ifla_vf_stats {
 	__u64 tx_bytes;
 	__u64 broadcast;
 	__u64 multicast;
+	__u64 rx_dropped;
+	__u64 tx_dropped;
 };
 
 struct ifla_vf_info {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 3aa895c5fbc1..68cd31b281a1 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -743,6 +743,8 @@ enum {
 	IFLA_VF_STATS_BROADCAST,
 	IFLA_VF_STATS_MULTICAST,
 	IFLA_VF_STATS_PAD,
+	IFLA_VF_STATS_RX_DROPPED,
+	IFLA_VF_STATS_TX_DROPPED,
 	__IFLA_VF_STATS_MAX,
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 56909f11d88e..1a653bb00d6e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -845,6 +845,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 nla_total_size_64bit(sizeof(__u64)) +
 			 /* IFLA_VF_STATS_MULTICAST */
 			 nla_total_size_64bit(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_RX_DROPPED */
+			 nla_total_size_64bit(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_TX_DROPPED */
+			 nla_total_size_64bit(sizeof(__u64)) +
 			 nla_total_size(sizeof(struct ifla_vf_trust)));
 		return size;
 	} else
@@ -1214,7 +1218,11 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
 			      vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
-			      vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+			      vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+	    nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+			      vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+	    nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+			      vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
 		nla_nest_cancel(skb, vfstats);
 		goto nla_put_vf_failure;
 	}
-- 
2.13.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox