netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
@ 2016-06-06 12:20 Toshiaki Makita
  2016-06-11  5:35 ` David Miller
  0 siblings, 1 reply; 8+ messages in thread
From: Toshiaki Makita @ 2016-06-06 12:20 UTC (permalink / raw)
  To: David S. Miller, Stephen Hemminger
  Cc: Nikolay Aleksandrov, netdev, bridge, Patrick Schaaf

Patrick Schaaf reported that flooding due to a missing fdb entry of
the address of macvlan on the bridge device caused high CPU
consumption of an openvpn process behind a tap bridge port.
Adding an fdb entry of the macvlan address can suppress flooding
and avoid this problem.

This change makes bridge able to synchronize unicast filtering with
fdb automatically so admin do not need to manually add an fdb entry.
This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
macvlan device would not place bridge into promiscuous mode as well.

v2:
- Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
  Nikolay Aleksandrov.

Reported-by: Patrick Schaaf <netdev@bof.de>
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
---
 net/bridge/br_device.c  |   7 +--
 net/bridge/br_fdb.c     | 119 ++++++++++++++++++++++++++++++++++++++++++++----
 net/bridge/br_private.h |   7 +--
 net/bridge/br_vlan.c    |  11 ++---
 4 files changed, 122 insertions(+), 22 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2c8095a..9b56802 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -123,8 +123,9 @@ static int br_dev_open(struct net_device *dev)
 	return 0;
 }
 
-static void br_dev_set_multicast_list(struct net_device *dev)
+static void br_dev_set_rx_mode(struct net_device *dev)
 {
+	br_fdb_sync_uc(netdev_priv(dev));
 }
 
 static void br_dev_change_rx_flags(struct net_device *dev, int change)
@@ -329,7 +330,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_start_xmit		 = br_dev_xmit,
 	.ndo_get_stats64	 = br_get_stats64,
 	.ndo_set_mac_address	 = br_set_mac_address,
-	.ndo_set_rx_mode	 = br_dev_set_multicast_list,
+	.ndo_set_rx_mode	 = br_dev_set_rx_mode,
 	.ndo_change_rx_flags	 = br_dev_change_rx_flags,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
@@ -373,7 +374,7 @@ void br_dev_setup(struct net_device *dev)
 	dev->destructor = br_dev_free;
 	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
-	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
+	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE | IFF_UNICAST_FLT;
 
 	dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index dcea4f4..aef0eeb 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -184,28 +184,36 @@ static void fdb_delete_local(struct net_bridge *br,
 	vg = br_vlan_group(br);
 	v = br_vlan_find(vg, vid);
 	/* Maybe bridge device has same hw addr? */
-	if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
-	    (!vid || (v && br_vlan_should_use(v)))) {
-		f->dst = NULL;
-		f->added_by_user = 0;
-		return;
+	if (p && (!vid || (v && br_vlan_should_use(v)))) {
+		struct netdev_hw_addr *ha;
+
+		if (ether_addr_equal(br->dev->dev_addr, addr)) {
+			f->dst = NULL;
+			f->added_by_user = 0;
+			return;
+		}
+		netdev_for_each_uc_addr(ha, br->dev) {
+			if (ether_addr_equal(ha->addr, addr)) {
+				f->dst = NULL;
+				f->added_by_user = 0;
+				return;
+			}
+		}
 	}
 
 	fdb_delete(br, f);
 }
 
-void br_fdb_find_delete_local(struct net_bridge *br,
-			      const struct net_bridge_port *p,
-			      const unsigned char *addr, u16 vid)
+static void fdb_find_delete_local(struct net_bridge *br,
+				  const struct net_bridge_port *p,
+				  const unsigned char *addr, u16 vid)
 {
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *f;
 
-	spin_lock_bh(&br->hash_lock);
 	f = fdb_find(head, addr, vid);
 	if (f && f->is_local && !f->added_by_user && f->dst == p)
 		fdb_delete_local(br, p, f);
-	spin_unlock_bh(&br->hash_lock);
 }
 
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
@@ -288,6 +296,97 @@ out:
 	spin_unlock_bh(&br->hash_lock);
 }
 
+void br_fdb_sync_uc(struct net_bridge *br)
+{
+	struct net_bridge_vlan_group *vg;
+	struct netdev_hw_addr *ha;
+	int i;
+
+	spin_lock_bh(&br->hash_lock);
+
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		struct hlist_node *h;
+
+		hlist_for_each(h, &br->hash[i]) {
+			struct net_bridge_fdb_entry *f;
+
+			f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
+			if (!f->dst && f->is_local && !f->added_by_user &&
+			    !ether_addr_equal(f->addr.addr, br->dev->dev_addr)) {
+				/* delete old one */
+				fdb_delete_local(br, NULL, f);
+			}
+		}
+	}
+
+	vg = br_vlan_group(br);
+
+	/* insert new address,  may fail if invalid address or dup. */
+	netdev_for_each_uc_addr(ha, br->dev) {
+		struct net_bridge_vlan *v;
+
+		fdb_insert(br, NULL, ha->addr, 0);
+
+		if (!vg || !vg->num_vlans)
+			continue;
+
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			if (br_vlan_should_use(v))
+				fdb_insert(br, NULL, ha->addr, v->vid);
+		}
+	}
+
+	spin_unlock_bh(&br->hash_lock);
+}
+
+int br_fdb_add_vlan(struct net_bridge *br, struct net_bridge_port *p, u16 vid)
+{
+	struct netdev_hw_addr *ha, *ha2;
+	int err = 0;
+
+	spin_lock_bh(&br->hash_lock);
+	if (p) {
+		err = fdb_insert(br, p, p->dev->dev_addr, vid);
+	} else {
+		netdev_for_each_uc_addr(ha, br->dev) {
+			err = fdb_insert(br, NULL, ha->addr, vid);
+			if (err)
+				goto undo;
+		}
+		err = fdb_insert(br, NULL, br->dev->dev_addr, vid);
+		if (err)
+			goto undo;
+	}
+out:
+	spin_unlock_bh(&br->hash_lock);
+
+	return err;
+
+undo:
+	netdev_for_each_uc_addr(ha2, br->dev) {
+		if (ha2 == ha)
+			break;
+		fdb_find_delete_local(br, NULL, ha2->addr, vid);
+	}
+	goto out;
+}
+
+void br_fdb_delete_vlan(struct net_bridge *br, struct net_bridge_port *p,
+			u16 vid)
+{
+	struct netdev_hw_addr *ha;
+
+	spin_lock_bh(&br->hash_lock);
+	if (p) {
+		fdb_find_delete_local(br, p, p->dev->dev_addr, vid);
+	} else {
+		netdev_for_each_uc_addr(ha, br->dev)
+			fdb_find_delete_local(br, NULL, ha->addr, vid);
+		fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
+	}
+	spin_unlock_bh(&br->hash_lock);
+}
+
 void br_fdb_cleanup(unsigned long _data)
 {
 	struct net_bridge *br = (struct net_bridge *)_data;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c7fb5d7..d07d24c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -463,11 +463,12 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
 int br_fdb_init(void);
 void br_fdb_fini(void);
 void br_fdb_flush(struct net_bridge *br);
-void br_fdb_find_delete_local(struct net_bridge *br,
-			      const struct net_bridge_port *p,
-			      const unsigned char *addr, u16 vid);
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
 void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
+void br_fdb_sync_uc(struct net_bridge *br);
+int br_fdb_add_vlan(struct net_bridge *br, struct net_bridge_port *p, u16 vid);
+void br_fdb_delete_vlan(struct net_bridge *br, struct net_bridge_port *p,
+			u16 vid);
 void br_fdb_cleanup(unsigned long arg);
 void br_fdb_delete_by_port(struct net_bridge *br,
 			   const struct net_bridge_port *p, u16 vid, int do_all);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index b6de4f4..86ac873 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -246,7 +246,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
 
 	/* Add the dev mac and count the vlan only if it's usable */
 	if (br_vlan_should_use(v)) {
-		err = br_fdb_insert(br, p, dev->dev_addr, v->vid);
+		err = br_fdb_add_vlan(br, p, v->vid);
 		if (err) {
 			br_err(br, "failed insert local address into bridge forwarding table\n");
 			goto out_filt;
@@ -266,7 +266,7 @@ out:
 
 out_fdb_insert:
 	if (br_vlan_should_use(v)) {
-		br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid);
+		br_fdb_delete_vlan(br, p, v->vid);
 		vg->num_vlans--;
 	}
 
@@ -557,8 +557,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
 			if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
 				return -EINVAL;
 			/* It was only kept for port vlans, now make it real */
-			ret = br_fdb_insert(br, NULL, br->dev->dev_addr,
-					    vlan->vid);
+			ret = br_fdb_add_vlan(br, NULL, vlan->vid);
 			if (ret) {
 				br_err(br, "failed insert local address into bridge forwarding table\n");
 				return ret;
@@ -610,7 +609,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
 	if (!v || !br_vlan_is_brentry(v))
 		return -ENOENT;
 
-	br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
+	br_fdb_delete_vlan(br, NULL, vid);
 	br_fdb_delete_by_port(br, NULL, vid, 0);
 
 	return __vlan_del(v);
@@ -1036,7 +1035,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
 	v = br_vlan_find(nbp_vlan_group(port), vid);
 	if (!v)
 		return -ENOENT;
-	br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
+	br_fdb_delete_vlan(port->br, port, vid);
 	br_fdb_delete_by_port(port->br, port, vid, 0);
 
 	return __vlan_del(v);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-06 12:20 [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB Toshiaki Makita
@ 2016-06-11  5:35 ` David Miller
  2016-06-11 16:17   ` Nikolay Aleksandrov via Bridge
  0 siblings, 1 reply; 8+ messages in thread
From: David Miller @ 2016-06-11  5:35 UTC (permalink / raw)
  To: makita.toshiaki; +Cc: netdev, bridge, nikolay, netdev

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon,  6 Jun 2016 21:20:13 +0900

> Patrick Schaaf reported that flooding due to a missing fdb entry of
> the address of macvlan on the bridge device caused high CPU
> consumption of an openvpn process behind a tap bridge port.
> Adding an fdb entry of the macvlan address can suppress flooding
> and avoid this problem.
> 
> This change makes bridge able to synchronize unicast filtering with
> fdb automatically so admin do not need to manually add an fdb entry.
> This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
> macvlan device would not place bridge into promiscuous mode as well.
> 
> v2:
> - Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
>   Nikolay Aleksandrov.
> 
> Reported-by: Patrick Schaaf <netdev@bof.de>
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

I really need bridging experts to review and ACK/NACK this.

Thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-11  5:35 ` David Miller
@ 2016-06-11 16:17   ` Nikolay Aleksandrov via Bridge
  2016-06-11 22:50     ` David Miller
  2016-06-12  6:35     ` [Bridge] " Toshiaki Makita
  0 siblings, 2 replies; 8+ messages in thread
From: Nikolay Aleksandrov via Bridge @ 2016-06-11 16:17 UTC (permalink / raw)
  To: David Miller, makita.toshiaki; +Cc: netdev, bridge, netdev

On 06/11/2016 07:35 AM, David Miller wrote:
> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> Date: Mon,  6 Jun 2016 21:20:13 +0900
> 
>> Patrick Schaaf reported that flooding due to a missing fdb entry of
>> the address of macvlan on the bridge device caused high CPU
>> consumption of an openvpn process behind a tap bridge port.
>> Adding an fdb entry of the macvlan address can suppress flooding
>> and avoid this problem.
>>
>> This change makes bridge able to synchronize unicast filtering with
>> fdb automatically so admin do not need to manually add an fdb entry.
>> This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
>> macvlan device would not place bridge into promiscuous mode as well.
>>
>> v2:
>> - Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
>>   Nikolay Aleksandrov.
>>
>> Reported-by: Patrick Schaaf <netdev@bof.de>
>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> 
> I really need bridging experts to review and ACK/NACK this.
> 
> Thanks.
> 

Oops, I almost missed the v2, sorry about that. So, technically it looks correct, but
I only fear the scalability impact of the change. If there're a large number of vlans
adding a macvlan (or any device that syncs uc addr) might become very slow and every
flag change will become very slow too without an option to revert to the original
behaviour so we'll have to wait for the entries to be added in order to delete them.
Another common scenario is having 8021q interfaces on top of the bridge with different
mac addresses for some of the configured vlans (or with macvlans on top of them for VRR),
that use case would suffer as well because their macs need to be local only for those vlans,
and not the 2000+ other vlans that might exist.
On every sync_uc() call all the fdb entries get deleted and added again, so even after deleting
some manually they can come back unexpectedly after some operation and also the message storm from
all the deletes and adds could be problematic as well.

E.g. 2000 br0 vlans, 25 macvlans on br0 (adding them took more than 5 minutes, 53k fdb entries):
$ bridge fdb del de:8e:9f:16:c5:71 dev br0 vlan 289
$ ip l set br0 multicast on
$ bridge fdb | grep 289 | grep de:8e:9f:16:c5:71
de:8e:9f:16:c5:71 dev br0 vlan 1289 master br0 permanent
de:8e:9f:16:c5:71 dev br0 vlan 289 master br0 permanent

In fact you can't escape the slow performance even if you delete all entries because on the
next flag change or interface add, they will be added back.

Cheers,
 Nik

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-11 16:17   ` Nikolay Aleksandrov via Bridge
@ 2016-06-11 22:50     ` David Miller
  2016-06-12  6:35     ` [Bridge] " Toshiaki Makita
  1 sibling, 0 replies; 8+ messages in thread
From: David Miller @ 2016-06-11 22:50 UTC (permalink / raw)
  To: nikolay; +Cc: makita.toshiaki, stephen, netdev, bridge, netdev

From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Sat, 11 Jun 2016 18:17:53 +0200

> Oops, I almost missed the v2, sorry about that. So, technically it
> looks correct, but I only fear the scalability impact of the
> change. If there're a large number of vlans adding a macvlan (or any
> device that syncs uc addr) might become very slow and every flag
> change will become very slow too without an option to revert to the
> original behaviour so we'll have to wait for the entries to be added
> in order to delete them.  Another common scenario is having 8021q
> interfaces on top of the bridge with different mac addresses for
> some of the configured vlans (or with macvlans on top of them for
> VRR), that use case would suffer as well because their macs need to
> be local only for those vlans, and not the 2000+ other vlans that
> might exist.  On every sync_uc() call all the fdb entries get
> deleted and added again, so even after deleting some manually they
> can come back unexpectedly after some operation and also the message
> storm from all the deletes and adds could be problematic as well.
> 
> 
> E.g. 2000 br0 vlans, 25 macvlans on br0 (adding them took more than 5 minutes, 53k fdb entries):
> $ bridge fdb del de:8e:9f:16:c5:71 dev br0 vlan 289
> $ ip l set br0 multicast on
> $ bridge fdb | grep 289 | grep de:8e:9f:16:c5:71
> de:8e:9f:16:c5:71 dev br0 vlan 1289 master br0 permanent
> de:8e:9f:16:c5:71 dev br0 vlan 289 master br0 permanent
> 
> In fact you can't escape the slow performance even if you delete all
> entries because on the next flag change or interface add, they will
> be added back.

Yeah, I think the performance implications are too severe too, I'm
not applying this.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Bridge] [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-11 16:17   ` Nikolay Aleksandrov via Bridge
  2016-06-11 22:50     ` David Miller
@ 2016-06-12  6:35     ` Toshiaki Makita
  2016-06-13 11:13       ` Toshiaki Makita
  1 sibling, 1 reply; 8+ messages in thread
From: Toshiaki Makita @ 2016-06-12  6:35 UTC (permalink / raw)
  To: Nikolay Aleksandrov, David Miller, makita.toshiaki; +Cc: netdev, bridge, netdev

On 16/06/12 (日) 1:17, Nikolay Aleksandrov via Bridge wrote:
> On 06/11/2016 07:35 AM, David Miller wrote:
>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>> Date: Mon,  6 Jun 2016 21:20:13 +0900
>>
>>> Patrick Schaaf reported that flooding due to a missing fdb entry of
>>> the address of macvlan on the bridge device caused high CPU
>>> consumption of an openvpn process behind a tap bridge port.
>>> Adding an fdb entry of the macvlan address can suppress flooding
>>> and avoid this problem.
>>>
>>> This change makes bridge able to synchronize unicast filtering with
>>> fdb automatically so admin do not need to manually add an fdb entry.
>>> This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
>>> macvlan device would not place bridge into promiscuous mode as well.
>>>
>>> v2:
>>> - Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
>>>    Nikolay Aleksandrov.
>>>
>>> Reported-by: Patrick Schaaf <netdev@bof.de>
>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>
>> I really need bridging experts to review and ACK/NACK this.
>>
>> Thanks.
>>
>
> Oops, I almost missed the v2, sorry about that. So, technically it looks correct, but
> I only fear the scalability impact of the change. If there're a large number of vlans
> adding a macvlan (or any device that syncs uc addr) might become very slow and every
> flag change will become very slow too without an option to revert to the original
> behaviour so we'll have to wait for the entries to be added in order to delete them.
> Another common scenario is having 8021q interfaces on top of the bridge with different
> mac addresses for some of the configured vlans (or with macvlans on top of them for VRR),
> that use case would suffer as well because their macs need to be local only for those vlans,
> and not the 2000+ other vlans that might exist.
> On every sync_uc() call all the fdb entries get deleted and added again, so even after deleting
> some manually they can come back unexpectedly after some operation and also the message storm from
> all the deletes and adds could be problematic as well.
>
> E.g. 2000 br0 vlans, 25 macvlans on br0 (adding them took more than 5 minutes, 53k fdb entries):
> $ bridge fdb del de:8e:9f:16:c5:71 dev br0 vlan 289
> $ ip l set br0 multicast on
> $ bridge fdb | grep 289 | grep de:8e:9f:16:c5:71
> de:8e:9f:16:c5:71 dev br0 vlan 1289 master br0 permanent
> de:8e:9f:16:c5:71 dev br0 vlan 289 master br0 permanent
>
> In fact you can't escape the slow performance even if you delete all entries because on the
> next flag change or interface add, they will be added back.

I still think this auto-sync should be done, otherwise macvlan imposes 
promiscuous mode on bridge even if you manually add such fdb entries.
I believe most of your concern would disappear by making use of 
__dev_uc_sync() instead.
Indeed it seems that there is no easy way to propagate the combination 
of uc addr and vlan from upper device, so local entries for unneeded 
vlan can still be created even if using __dev_uc_sync(). In case you 
worry about those unneeded entries, I can add a knob to disable this 
feature.
Are you comfortable with this change?

Toshiaki Makita

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-12  6:35     ` [Bridge] " Toshiaki Makita
@ 2016-06-13 11:13       ` Toshiaki Makita
  2016-06-13 11:23         ` [Bridge] " Nikolay Aleksandrov
  0 siblings, 1 reply; 8+ messages in thread
From: Toshiaki Makita @ 2016-06-13 11:13 UTC (permalink / raw)
  To: Nikolay Aleksandrov, David Miller; +Cc: netdev, netdev, bridge

On 2016/06/12 15:35, Toshiaki Makita wrote:
> On 16/06/12 (日) 1:17, Nikolay Aleksandrov via Bridge wrote:
>> On 06/11/2016 07:35 AM, David Miller wrote:
>>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>> Date: Mon,  6 Jun 2016 21:20:13 +0900
>>>
>>>> Patrick Schaaf reported that flooding due to a missing fdb entry of
>>>> the address of macvlan on the bridge device caused high CPU
>>>> consumption of an openvpn process behind a tap bridge port.
>>>> Adding an fdb entry of the macvlan address can suppress flooding
>>>> and avoid this problem.
>>>>
>>>> This change makes bridge able to synchronize unicast filtering with
>>>> fdb automatically so admin do not need to manually add an fdb entry.
>>>> This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
>>>> macvlan device would not place bridge into promiscuous mode as well.
>>>>
>>>> v2:
>>>> - Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
>>>>    Nikolay Aleksandrov.
>>>>
>>>> Reported-by: Patrick Schaaf <netdev@bof.de>
>>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>
>>> I really need bridging experts to review and ACK/NACK this.
>>>
>>> Thanks.
>>>
>>
>> Oops, I almost missed the v2, sorry about that. So, technically it
>> looks correct, but
>> I only fear the scalability impact of the change. If there're a large
>> number of vlans
>> adding a macvlan (or any device that syncs uc addr) might become very
>> slow and every
>> flag change will become very slow too without an option to revert to
>> the original
>> behaviour so we'll have to wait for the entries to be added in order
>> to delete them.
>> Another common scenario is having 8021q interfaces on top of the
>> bridge with different
>> mac addresses for some of the configured vlans (or with macvlans on
>> top of them for VRR),
>> that use case would suffer as well because their macs need to be local
>> only for those vlans,
>> and not the 2000+ other vlans that might exist.
>> On every sync_uc() call all the fdb entries get deleted and added
>> again, so even after deleting
>> some manually they can come back unexpectedly after some operation and
>> also the message storm from
>> all the deletes and adds could be problematic as well.
>>
>> E.g. 2000 br0 vlans, 25 macvlans on br0 (adding them took more than 5
>> minutes, 53k fdb entries):
>> $ bridge fdb del de:8e:9f:16:c5:71 dev br0 vlan 289
>> $ ip l set br0 multicast on
>> $ bridge fdb | grep 289 | grep de:8e:9f:16:c5:71
>> de:8e:9f:16:c5:71 dev br0 vlan 1289 master br0 permanent
>> de:8e:9f:16:c5:71 dev br0 vlan 289 master br0 permanent
>>
>> In fact you can't escape the slow performance even if you delete all
>> entries because on the
>> next flag change or interface add, they will be added back.
> 
> I still think this auto-sync should be done, otherwise macvlan imposes
> promiscuous mode on bridge even if you manually add such fdb entries.
> I believe most of your concern would disappear by making use of
> __dev_uc_sync() instead.
> Indeed it seems that there is no easy way to propagate the combination
> of uc addr and vlan from upper device, so local entries for unneeded
> vlan can still be created even if using __dev_uc_sync(). In case you
> worry about those unneeded entries, I can add a knob to disable this
> feature.
> Are you comfortable with this change?

Tested performance using __dev_uc_sync() and got expected results.
(Add 3000 br0 vlans, 50 macvlans on br0)

* Without patch
 1.8s

* Patch v2
 2m42s

* Patch using __dev_uc_sync()
 3.5s
 Also, a manually deleted entry is not restored by flag change.


Nikolay, David, I'd like to hear your feedback.
I'm thinking the performance implication now looks reasonable.
If you don't have objection, I'll work on v3 (using __dev_uc_sync() and
knob to disable the feature).

Thanks,
Toshiaki Makita

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Bridge] [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-13 11:13       ` Toshiaki Makita
@ 2016-06-13 11:23         ` Nikolay Aleksandrov
  2016-06-13 14:49           ` Toshiaki Makita
  0 siblings, 1 reply; 8+ messages in thread
From: Nikolay Aleksandrov @ 2016-06-13 11:23 UTC (permalink / raw)
  To: Toshiaki Makita, David Miller; +Cc: Toshiaki Makita, netdev, bridge, netdev

On 13/06/16 13:13, Toshiaki Makita wrote:
> On 2016/06/12 15:35, Toshiaki Makita wrote:
>> On 16/06/12 (日) 1:17, Nikolay Aleksandrov via Bridge wrote:
>>> On 06/11/2016 07:35 AM, David Miller wrote:
>>>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>> Date: Mon,  6 Jun 2016 21:20:13 +0900
>>>>
>>>>> Patrick Schaaf reported that flooding due to a missing fdb entry of
>>>>> the address of macvlan on the bridge device caused high CPU
>>>>> consumption of an openvpn process behind a tap bridge port.
>>>>> Adding an fdb entry of the macvlan address can suppress flooding
>>>>> and avoid this problem.
>>>>>
>>>>> This change makes bridge able to synchronize unicast filtering with
>>>>> fdb automatically so admin do not need to manually add an fdb entry.
>>>>> This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
>>>>> macvlan device would not place bridge into promiscuous mode as well.
>>>>>
>>>>> v2:
>>>>> - Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
>>>>>     Nikolay Aleksandrov.
>>>>>
>>>>> Reported-by: Patrick Schaaf <netdev@bof.de>
>>>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>>
>>>> I really need bridging experts to review and ACK/NACK this.
>>>>
>>>> Thanks.
>>>>
>>>
>>> Oops, I almost missed the v2, sorry about that. So, technically it
>>> looks correct, but
>>> I only fear the scalability impact of the change. If there're a large
>>> number of vlans
>>> adding a macvlan (or any device that syncs uc addr) might become very
>>> slow and every
>>> flag change will become very slow too without an option to revert to
>>> the original
>>> behaviour so we'll have to wait for the entries to be added in order
>>> to delete them.
>>> Another common scenario is having 8021q interfaces on top of the
>>> bridge with different
>>> mac addresses for some of the configured vlans (or with macvlans on
>>> top of them for VRR),
>>> that use case would suffer as well because their macs need to be local
>>> only for those vlans,
>>> and not the 2000+ other vlans that might exist.
>>> On every sync_uc() call all the fdb entries get deleted and added
>>> again, so even after deleting
>>> some manually they can come back unexpectedly after some operation and
>>> also the message storm from
>>> all the deletes and adds could be problematic as well.
>>>
>>> E.g. 2000 br0 vlans, 25 macvlans on br0 (adding them took more than 5
>>> minutes, 53k fdb entries):
>>> $ bridge fdb del de:8e:9f:16:c5:71 dev br0 vlan 289
>>> $ ip l set br0 multicast on
>>> $ bridge fdb | grep 289 | grep de:8e:9f:16:c5:71
>>> de:8e:9f:16:c5:71 dev br0 vlan 1289 master br0 permanent
>>> de:8e:9f:16:c5:71 dev br0 vlan 289 master br0 permanent
>>>
>>> In fact you can't escape the slow performance even if you delete all
>>> entries because on the
>>> next flag change or interface add, they will be added back.
>>
>> I still think this auto-sync should be done, otherwise macvlan imposes
>> promiscuous mode on bridge even if you manually add such fdb entries.
>> I believe most of your concern would disappear by making use of
>> __dev_uc_sync() instead.
>> Indeed it seems that there is no easy way to propagate the combination
>> of uc addr and vlan from upper device, so local entries for unneeded
>> vlan can still be created even if using __dev_uc_sync(). In case you
>> worry about those unneeded entries, I can add a knob to disable this
>> feature.
>> Are you comfortable with this change?
>
> Tested performance using __dev_uc_sync() and got expected results.
> (Add 3000 br0 vlans, 50 macvlans on br0)
>
> * Without patch
>   1.8s
>
> * Patch v2
>   2m42s
Your machine is much faster apparently. :-) It took me ~5 minutes for 25 macvlans
in my VM.

>
> * Patch using __dev_uc_sync()
>   3.5s
>   Also, a manually deleted entry is not restored by flag change.
>
>
> Nikolay, David, I'd like to hear your feedback.
> I'm thinking the performance implication now looks reasonable.
> If you don't have objection, I'll work on v3 (using __dev_uc_sync() and
> knob to disable the feature).
>
> Thanks,
> Toshiaki Makita
>

The numbers sound okay, but I'd have to see the patch to be able to comment
further. I wonder why the push for this change when this can be currently
"fixed" by adding the macs manually as local (pointing to the bridge) ?
Anyway I don't mind having it automated if the change doesn't break anything
or introduce any performance regressions.

Cheers,
  Nik

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Bridge] [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB
  2016-06-13 11:23         ` [Bridge] " Nikolay Aleksandrov
@ 2016-06-13 14:49           ` Toshiaki Makita
  0 siblings, 0 replies; 8+ messages in thread
From: Toshiaki Makita @ 2016-06-13 14:49 UTC (permalink / raw)
  To: Nikolay Aleksandrov, David Miller; +Cc: Toshiaki Makita, netdev, bridge, netdev

On 16/06/13 (月) 20:23, Nikolay Aleksandrov wrote:
> On 13/06/16 13:13, Toshiaki Makita wrote:
>> On 2016/06/12 15:35, Toshiaki Makita wrote:
>>> On 16/06/12 (日) 1:17, Nikolay Aleksandrov via Bridge wrote:
>>>> On 06/11/2016 07:35 AM, David Miller wrote:
>>>>> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>>> Date: Mon,  6 Jun 2016 21:20:13 +0900
>>>>>
>>>>>> Patrick Schaaf reported that flooding due to a missing fdb entry of
>>>>>> the address of macvlan on the bridge device caused high CPU
>>>>>> consumption of an openvpn process behind a tap bridge port.
>>>>>> Adding an fdb entry of the macvlan address can suppress flooding
>>>>>> and avoid this problem.
>>>>>>
>>>>>> This change makes bridge able to synchronize unicast filtering with
>>>>>> fdb automatically so admin do not need to manually add an fdb entry.
>>>>>> This effectively supports IFF_UNICAST_FLT in bridge, thus adding an
>>>>>> macvlan device would not place bridge into promiscuous mode as well.
>>>>>>
>>>>>> v2:
>>>>>> - Test vlan with br_vlan_should_use() in br_fdb_sync_uc() as per
>>>>>>     Nikolay Aleksandrov.
>>>>>>
>>>>>> Reported-by: Patrick Schaaf <netdev@bof.de>
>>>>>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>>>>>
>>>>> I really need bridging experts to review and ACK/NACK this.
>>>>>
>>>>> Thanks.
>>>>>
>>>>
>>>> Oops, I almost missed the v2, sorry about that. So, technically it
>>>> looks correct, but
>>>> I only fear the scalability impact of the change. If there're a large
>>>> number of vlans
>>>> adding a macvlan (or any device that syncs uc addr) might become very
>>>> slow and every
>>>> flag change will become very slow too without an option to revert to
>>>> the original
>>>> behaviour so we'll have to wait for the entries to be added in order
>>>> to delete them.
>>>> Another common scenario is having 8021q interfaces on top of the
>>>> bridge with different
>>>> mac addresses for some of the configured vlans (or with macvlans on
>>>> top of them for VRR),
>>>> that use case would suffer as well because their macs need to be local
>>>> only for those vlans,
>>>> and not the 2000+ other vlans that might exist.
>>>> On every sync_uc() call all the fdb entries get deleted and added
>>>> again, so even after deleting
>>>> some manually they can come back unexpectedly after some operation and
>>>> also the message storm from
>>>> all the deletes and adds could be problematic as well.
>>>>
>>>> E.g. 2000 br0 vlans, 25 macvlans on br0 (adding them took more than 5
>>>> minutes, 53k fdb entries):
>>>> $ bridge fdb del de:8e:9f:16:c5:71 dev br0 vlan 289
>>>> $ ip l set br0 multicast on
>>>> $ bridge fdb | grep 289 | grep de:8e:9f:16:c5:71
>>>> de:8e:9f:16:c5:71 dev br0 vlan 1289 master br0 permanent
>>>> de:8e:9f:16:c5:71 dev br0 vlan 289 master br0 permanent
>>>>
>>>> In fact you can't escape the slow performance even if you delete all
>>>> entries because on the
>>>> next flag change or interface add, they will be added back.
>>>
>>> I still think this auto-sync should be done, otherwise macvlan imposes
>>> promiscuous mode on bridge even if you manually add such fdb entries.
>>> I believe most of your concern would disappear by making use of
>>> __dev_uc_sync() instead.
>>> Indeed it seems that there is no easy way to propagate the combination
>>> of uc addr and vlan from upper device, so local entries for unneeded
>>> vlan can still be created even if using __dev_uc_sync(). In case you
>>> worry about those unneeded entries, I can add a knob to disable this
>>> feature.
>>> Are you comfortable with this change?
>>
>> Tested performance using __dev_uc_sync() and got expected results.
>> (Add 3000 br0 vlans, 50 macvlans on br0)
>>
>> * Without patch
>>   1.8s
>>
>> * Patch v2
>>   2m42s
> Your machine is much faster apparently. :-) It took me ~5 minutes for 25
> macvlans
> in my VM.
>
>>
>> * Patch using __dev_uc_sync()
>>   3.5s
>>   Also, a manually deleted entry is not restored by flag change.
>>
>>
>> Nikolay, David, I'd like to hear your feedback.
>> I'm thinking the performance implication now looks reasonable.
>> If you don't have objection, I'll work on v3 (using __dev_uc_sync() and
>> knob to disable the feature).
>>
>> Thanks,
>> Toshiaki Makita
>>
>
> The numbers sound okay, but I'd have to see the patch to be able to comment
> further.

Sure.

 > I wonder why the push for this change when this can be currently
> "fixed" by adding the macs manually as local (pointing to the bridge) ?

Well, there are two problems.
One is what Patrick Schaaf reported and can be solved by manually adding 
local entries as well.
The other is that macvlans force the bridge to be promiscuous. That 
means bridge ports cannot be non-promiscuous if any macvlan exists. This 
is because bridge's non-promiscuous feature requires all necessary fdb 
entries to be configured in order to sync them to brport's uc list. 
Without sync_uc of the bridge device, bridge cannot know what addresses 
are needed for brport's uc filter so we cannot get around promiscuous 
mode with macvlans. This is what I feel is worth fixing with this patch. 
I'm thinking this auto non-promiscuous should be transparently done with 
macvlans...
Sorry about the lack of explanation.

Thanks,
Toshiaki Makita

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-06-13 14:49 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-06 12:20 [PATCH net-next v2] bridge: Synchronize unicast filtering with FDB Toshiaki Makita
2016-06-11  5:35 ` David Miller
2016-06-11 16:17   ` Nikolay Aleksandrov via Bridge
2016-06-11 22:50     ` David Miller
2016-06-12  6:35     ` [Bridge] " Toshiaki Makita
2016-06-13 11:13       ` Toshiaki Makita
2016-06-13 11:23         ` [Bridge] " Nikolay Aleksandrov
2016-06-13 14:49           ` Toshiaki Makita

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).