Ethernet Bridge development

Ethernet Bridge development
 help / color / mirror / Atom feed

* [Bridge] [PATCH net-next v2 4/4] net: bridge: mdb: allow add/delete for host-joined groups
From: Nikolay Aleksandrov @ 2019-08-14 17:05 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814170501.1808-1-nikolay@cumulusnetworks.com>

Currently this is needed only for user-space compatibility, so similar
object adds/deletes as the dumped ones would succeed. Later it can be
used for L2 mcast MAC add/delete.

v2: don't send a notification when used from user-space, arm the group
    timer if no ports are left after host entry del

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c       | 76 +++++++++++++++++++++++++++------------
 net/bridge/br_multicast.c | 30 ++++++++++++----
 net/bridge/br_private.h   |  2 ++
 3 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 985273425117..e0f789296920 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -616,6 +616,19 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 			return err;
 	}
 
+	/* host join */
+	if (!port) {
+		/* don't allow any flags for host-joined groups */
+		if (state)
+			return -EINVAL;
+		if (mp->host_joined)
+			return -EEXIST;
+
+		br_multicast_host_join(mp, false);
+
+		return 0;
+	}
+
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
@@ -640,19 +653,21 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 {
 	struct br_ip ip;
 	struct net_device *dev;
-	struct net_bridge_port *p;
+	struct net_bridge_port *p = NULL;
 	int ret;
 
 	if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return -EINVAL;
 
-	dev = __dev_get_by_index(net, entry->ifindex);
-	if (!dev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		dev = __dev_get_by_index(net, entry->ifindex);
+		if (!dev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(dev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(dev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+	}
 
 	__mdb_entry_to_br_ip(entry, &ip);
 
@@ -680,15 +695,19 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	pdev = __dev_get_by_index(net, entry->ifindex);
-	if (!pdev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		pdev = __dev_get_by_index(net, entry->ifindex);
+		if (!pdev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(pdev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(pdev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+	} else {
+		vg = br_vlan_group(br);
+	}
 
-	vg = nbp_vlan_group(p);
 	/* If vlan filtering is enabled and VLAN is not specified
 	 * install mdb entry on all vlans configured on the port.
 	 */
@@ -727,6 +746,15 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 	if (!mp)
 		goto unlock;
 
+	/* host leave */
+	if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) {
+		br_multicast_host_leave(mp, false);
+		err = 0;
+		if (!mp->ports && netif_running(br->dev))
+			mod_timer(&mp->timer, jiffies);
+		goto unlock;
+	}
+
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
@@ -759,9 +787,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(skb->sk);
 	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
 	struct net_device *dev, *pdev;
 	struct br_mdb_entry *entry;
-	struct net_bridge_port *p;
 	struct net_bridge_vlan *v;
 	struct net_bridge *br;
 	int err;
@@ -772,15 +800,19 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	pdev = __dev_get_by_index(net, entry->ifindex);
-	if (!pdev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		pdev = __dev_get_by_index(net, entry->ifindex);
+		if (!pdev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(pdev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(pdev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+	} else {
+		vg = br_vlan_group(br);
+	}
 
-	vg = nbp_vlan_group(p);
 	/* If vlan filtering is enabled and VLAN is not specified
 	 * delete mdb entry on all vlans configured on the port.
 	 */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9b379e110129..ad12fe3fca8c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -148,8 +148,7 @@ static void br_multicast_group_expired(struct timer_list *t)
 	if (!netif_running(br->dev) || timer_pending(&mp->timer))
 		goto out;
 
-	mp->host_joined = false;
-	br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+	br_multicast_host_leave(mp, true);
 
 	if (mp->ports)
 		goto out;
@@ -512,6 +511,27 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
 	return ether_addr_equal(src, p->eth_addr);
 }
 
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+{
+	if (!mp->host_joined) {
+		mp->host_joined = true;
+		if (notify)
+			br_mdb_notify(mp->br->dev, NULL, &mp->addr,
+				      RTM_NEWMDB, 0);
+	}
+	mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+}
+
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
+{
+	if (!mp->host_joined)
+		return;
+
+	mp->host_joined = false;
+	if (notify)
+		br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+}
+
 static int br_multicast_add_group(struct net_bridge *br,
 				  struct net_bridge_port *port,
 				  struct br_ip *group,
@@ -534,11 +554,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 		goto err;
 
 	if (!port) {
-		if (!mp->host_joined) {
-			mp->host_joined = true;
-			br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
-		}
-		mod_timer(&mp->timer, now + br->multicast_membership_interval);
+		br_multicast_host_join(mp, true);
 		goto out;
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b7a4942ff1b3..ce2ab14ee605 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -702,6 +702,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
 			    struct br_mcast_stats *dest);
 void br_mdb_init(void);
 void br_mdb_uninit(void);
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
 
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next v2 3/4] net: bridge: mdb: dump host-joined entries as well
From: Nikolay Aleksandrov @ 2019-08-14 17:05 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814170501.1808-1-nikolay@cumulusnetworks.com>

Currently we dump only the port mdb entries but we can have host-joined
entries on the bridge itself and they should be treated as normal temp
mdbs, they're already notified:
$ bridge monitor all
[MDB]dev br0 port br0 grp ff02::8 temp

The group will not be shown in the bridge mdb output, but it takes 1 slot
and it's timing out. If it's only host-joined then the mdb show output
can even be empty.

After this patch we show the host-joined groups:
$ bridge mdb show
dev br0 port br0 grp ff02::8 temp

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 77730983097e..985273425117 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -78,22 +78,35 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
 }
 
 static int __mdb_fill_info(struct sk_buff *skb,
+			   struct net_bridge_mdb_entry *mp,
 			   struct net_bridge_port_group *p)
 {
+	struct timer_list *mtimer;
 	struct nlattr *nest_ent;
 	struct br_mdb_entry e;
+	u8 flags = 0;
+	int ifindex;
 
 	memset(&e, 0, sizeof(e));
-	__mdb_entry_fill_flags(&e, p->flags);
-	e.ifindex = p->port->dev->ifindex;
-	e.vid = p->addr.vid;
-	if (p->addr.proto == htons(ETH_P_IP))
-		e.addr.u.ip4 = p->addr.u.ip4;
+	if (p) {
+		ifindex = p->port->dev->ifindex;
+		mtimer = &p->timer;
+		flags = p->flags;
+	} else {
+		ifindex = mp->br->dev->ifindex;
+		mtimer = &mp->timer;
+	}
+
+	__mdb_entry_fill_flags(&e, flags);
+	e.ifindex = ifindex;
+	e.vid = mp->addr.vid;
+	if (mp->addr.proto == htons(ETH_P_IP))
+		e.addr.u.ip4 = mp->addr.u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (p->addr.proto == htons(ETH_P_IPV6))
-		e.addr.u.ip6 = p->addr.u.ip6;
+	if (mp->addr.proto == htons(ETH_P_IPV6))
+		e.addr.u.ip6 = mp->addr.u.ip6;
 #endif
-	e.addr.proto = p->addr.proto;
+	e.addr.proto = mp->addr.proto;
 	nest_ent = nla_nest_start_noflag(skb,
 					 MDBA_MDB_ENTRY_INFO);
 	if (!nest_ent)
@@ -102,7 +115,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
 	if (nla_put_nohdr(skb, sizeof(e), &e) ||
 	    nla_put_u32(skb,
 			MDBA_MDB_EATTR_TIMER,
-			br_timer_value(&p->timer))) {
+			br_timer_value(mtimer))) {
 		nla_nest_cancel(skb, nest_ent);
 		return -EMSGSIZE;
 	}
@@ -139,12 +152,20 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			break;
 		}
 
+		if (mp->host_joined) {
+			err = __mdb_fill_info(skb, mp, NULL);
+			if (err) {
+				nla_nest_cancel(skb, nest2);
+				break;
+			}
+		}
+
 		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
 		      pp = &p->next) {
 			if (!p->port)
 				continue;
 
-			err = __mdb_fill_info(skb, p);
+			err = __mdb_fill_info(skb, mp, p);
 			if (err) {
 				nla_nest_cancel(skb, nest2);
 				goto out;
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next v2 2/4] net: bridge: mdb: factor out mdb filling
From: Nikolay Aleksandrov @ 2019-08-14 17:04 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814170501.1808-1-nikolay@cumulusnetworks.com>

We have to factor out the mdb fill portion in order to re-use it later for
the bridge mdb entries. No functional changes intended.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c | 68 ++++++++++++++++++++++++---------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ee6208c6d946..77730983097e 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -77,6 +77,40 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
 #endif
 }
 
+static int __mdb_fill_info(struct sk_buff *skb,
+			   struct net_bridge_port_group *p)
+{
+	struct nlattr *nest_ent;
+	struct br_mdb_entry e;
+
+	memset(&e, 0, sizeof(e));
+	__mdb_entry_fill_flags(&e, p->flags);
+	e.ifindex = p->port->dev->ifindex;
+	e.vid = p->addr.vid;
+	if (p->addr.proto == htons(ETH_P_IP))
+		e.addr.u.ip4 = p->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (p->addr.proto == htons(ETH_P_IPV6))
+		e.addr.u.ip6 = p->addr.u.ip6;
+#endif
+	e.addr.proto = p->addr.proto;
+	nest_ent = nla_nest_start_noflag(skb,
+					 MDBA_MDB_ENTRY_INFO);
+	if (!nest_ent)
+		return -EMSGSIZE;
+
+	if (nla_put_nohdr(skb, sizeof(e), &e) ||
+	    nla_put_u32(skb,
+			MDBA_MDB_EATTR_TIMER,
+			br_timer_value(&p->timer))) {
+		nla_nest_cancel(skb, nest_ent);
+		return -EMSGSIZE;
+	}
+	nla_nest_end(skb, nest_ent);
+
+	return 0;
+}
+
 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev)
 {
@@ -95,7 +129,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 	hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
 		struct net_bridge_port_group *p;
 		struct net_bridge_port_group __rcu **pp;
-		struct net_bridge_port *port;
 
 		if (idx < s_idx)
 			goto skip;
@@ -108,41 +141,14 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 
 		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
 		      pp = &p->next) {
-			struct nlattr *nest_ent;
-			struct br_mdb_entry e;
-
-			port = p->port;
-			if (!port)
+			if (!p->port)
 				continue;
 
-			memset(&e, 0, sizeof(e));
-			e.ifindex = port->dev->ifindex;
-			e.vid = p->addr.vid;
-			__mdb_entry_fill_flags(&e, p->flags);
-			if (p->addr.proto == htons(ETH_P_IP))
-				e.addr.u.ip4 = p->addr.u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-			if (p->addr.proto == htons(ETH_P_IPV6))
-				e.addr.u.ip6 = p->addr.u.ip6;
-#endif
-			e.addr.proto = p->addr.proto;
-			nest_ent = nla_nest_start_noflag(skb,
-							 MDBA_MDB_ENTRY_INFO);
-			if (!nest_ent) {
-				nla_nest_cancel(skb, nest2);
-				err = -EMSGSIZE;
-				goto out;
-			}
-			if (nla_put_nohdr(skb, sizeof(e), &e) ||
-			    nla_put_u32(skb,
-					MDBA_MDB_EATTR_TIMER,
-					br_timer_value(&p->timer))) {
-				nla_nest_cancel(skb, nest_ent);
+			err = __mdb_fill_info(skb, p);
+			if (err) {
 				nla_nest_cancel(skb, nest2);
-				err = -EMSGSIZE;
 				goto out;
 			}
-			nla_nest_end(skb, nest_ent);
 		}
 		nla_nest_end(skb, nest2);
 skip:
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next v2 1/4] net: bridge: mdb: move vlan comments
From: Nikolay Aleksandrov @ 2019-08-14 17:04 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814170501.1808-1-nikolay@cumulusnetworks.com>

Trivial patch to move the vlan comments in their proper places above the
vid 0 checks.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 428af1abf8cc..ee6208c6d946 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -653,9 +653,6 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	/* If vlan filtering is enabled and VLAN is not specified
-	 * install mdb entry on all vlans configured on the port.
-	 */
 	pdev = __dev_get_by_index(net, entry->ifindex);
 	if (!pdev)
 		return -ENODEV;
@@ -665,6 +662,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
+	/* If vlan filtering is enabled and VLAN is not specified
+	 * install mdb entry on all vlans configured on the port.
+	 */
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
@@ -745,9 +745,6 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	/* If vlan filtering is enabled and VLAN is not specified
-	 * delete mdb entry on all vlans configured on the port.
-	 */
 	pdev = __dev_get_by_index(net, entry->ifindex);
 	if (!pdev)
 		return -ENODEV;
@@ -757,6 +754,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
+	/* If vlan filtering is enabled and VLAN is not specified
+	 * delete mdb entry on all vlans configured on the port.
+	 */
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next v2 0/4] net: bridge: mdb: allow dump/add/del of host-joined entries
From: Nikolay Aleksandrov @ 2019-08-14 17:04 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <81258876-5f03-002c-5aa8-2d6d00e6d99e@cumulusnetworks.com>

Hi,
This set makes the bridge dump host-joined mdb entries, they should be
treated as normal entries since they take a slot and are aging out.
We already have notifications for them but we couldn't dump them until
now so they remained hidden. We dump them similar to how they're
notified, in order to keep user-space compatibility with the dumped
objects (e.g. iproute2 dumps mdbs in a format which can be fed into
add/del commands) we allow host-joined groups also to be added/deleted via
mdb commands. That can later be used for L2 mcast MAC manipulation as
was recently discussed. Note that iproute2 changes are not necessary,
this set will work with the current user-space mdb code.

Patch 01 - a trivial comment move
Patch 02 - factors out the mdb filling code so it can be
           re-used for the host-joined entries
Patch 03 - dumps host-joined entries
Patch 04 - allows manipulation of host-joined entries via standard mdb
           calls

v2: change patch 04 to avoid double notification and improve host group
    manual removal if no ports are present in the group

Thanks,
 Nik

Nikolay Aleksandrov (4):
  net: bridge: mdb: move vlan comments
  net: bridge: mdb: factor out mdb filling
  net: bridge: mdb: dump host-joined entries as well
  net: bridge: mdb: allow add/delete for host-joined groups

 net/bridge/br_mdb.c       | 173 +++++++++++++++++++++++++-------------
 net/bridge/br_multicast.c |  30 +++++--
 net/bridge/br_private.h   |   2 +
 3 files changed, 141 insertions(+), 64 deletions(-)

-- 
2.21.0

^ permalink raw reply

* Re: [Bridge] [PATCH net-next 0/4] net: bridge: mdb: allow dump/add/del of host-joined entries
From: Nikolay Aleksandrov @ 2019-08-14 16:01 UTC (permalink / raw)
  To: netdev; +Cc: roopa, bridge, davem
In-Reply-To: <20190814144024.9710-1-nikolay@cumulusnetworks.com>

On 8/14/19 5:40 PM, Nikolay Aleksandrov wrote:
> Hi,
> This set makes the bridge dump host-joined mdb entries, they should be
> treated as normal entries since they take a slot and are aging out.
> We already have notifications for them but we couldn't dump them until
> now so they remained hidden. We dump them similar to how they're
> notified, in order to keep user-space compatibility with the dumped
> objects (e.g. iproute2 dumps mdbs in a format which can be fed into
> add/del commands) we allow host-joined groups also to be added/deleted via
> mdb commands. That can later be used for L2 mcast MAC manipulation as
> was recently discussed. Note that iproute2 changes are not necessary,
> this set will work with the current user-space mdb code.
> 
> Patch 01 - a trivial comment move
> Patch 02 - factors out the mdb filling code so it can be
>            re-used for the host-joined entries
> Patch 03 - dumps host-joined entries
> Patch 04 - allows manipulation of host-joined entries via standard mdb
>            calls
> 
> Thanks,
>  Nik
> 
> Nikolay Aleksandrov (4):
>   net: bridge: mdb: move vlan comments
>   net: bridge: mdb: factor out mdb filling
>   net: bridge: mdb: dump host-joined entries as well
>   net: bridge: mdb: allow add/delete for host-joined groups
> 
>  net/bridge/br_mdb.c       | 171 +++++++++++++++++++++++++-------------
>  net/bridge/br_multicast.c |  24 ++++--
>  net/bridge/br_private.h   |   2 +
>  3 files changed, 133 insertions(+), 64 deletions(-)
> 

Self-NAK
There's a double notification sent for manual add/del of host groups.
It's a trivial fix, I'll spin v2 later after running more tests.


^ permalink raw reply

* [Bridge] [PATCH net-next 4/4] net: bridge: mdb: allow add/delete for host-joined groups
From: Nikolay Aleksandrov @ 2019-08-14 14:40 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814144024.9710-1-nikolay@cumulusnetworks.com>

Currently this is needed only for user-space compatibility, so similar
object adds/deletes as the dumped ones would succeed. Later it can be
used for L2 mcast MAC add/delete.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c       | 74 +++++++++++++++++++++++++++------------
 net/bridge/br_multicast.c | 24 +++++++++----
 net/bridge/br_private.h   |  2 ++
 3 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 985273425117..331a130b83b1 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -616,6 +616,19 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 			return err;
 	}
 
+	/* host join */
+	if (!port) {
+		/* don't allow any flags for host-joined groups */
+		if (state)
+			return -EINVAL;
+		if (mp->host_joined)
+			return -EEXIST;
+
+		br_multicast_host_join(mp);
+
+		return 0;
+	}
+
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
@@ -640,19 +653,21 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 {
 	struct br_ip ip;
 	struct net_device *dev;
-	struct net_bridge_port *p;
+	struct net_bridge_port *p = NULL;
 	int ret;
 
 	if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
 		return -EINVAL;
 
-	dev = __dev_get_by_index(net, entry->ifindex);
-	if (!dev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		dev = __dev_get_by_index(net, entry->ifindex);
+		if (!dev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(dev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(dev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+	}
 
 	__mdb_entry_to_br_ip(entry, &ip);
 
@@ -680,15 +695,19 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	pdev = __dev_get_by_index(net, entry->ifindex);
-	if (!pdev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		pdev = __dev_get_by_index(net, entry->ifindex);
+		if (!pdev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(pdev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(pdev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+	} else {
+		vg = br_vlan_group(br);
+	}
 
-	vg = nbp_vlan_group(p);
 	/* If vlan filtering is enabled and VLAN is not specified
 	 * install mdb entry on all vlans configured on the port.
 	 */
@@ -727,6 +746,13 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 	if (!mp)
 		goto unlock;
 
+	/* host leave */
+	if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) {
+		br_multicast_host_leave(mp);
+		err = 0;
+		goto unlock;
+	}
+
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
@@ -759,9 +785,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(skb->sk);
 	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
 	struct net_device *dev, *pdev;
 	struct br_mdb_entry *entry;
-	struct net_bridge_port *p;
 	struct net_bridge_vlan *v;
 	struct net_bridge *br;
 	int err;
@@ -772,15 +798,19 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	pdev = __dev_get_by_index(net, entry->ifindex);
-	if (!pdev)
-		return -ENODEV;
+	if (entry->ifindex != br->dev->ifindex) {
+		pdev = __dev_get_by_index(net, entry->ifindex);
+		if (!pdev)
+			return -ENODEV;
 
-	p = br_port_get_rtnl(pdev);
-	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
-		return -EINVAL;
+		p = br_port_get_rtnl(pdev);
+		if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+	} else {
+		vg = br_vlan_group(br);
+	}
 
-	vg = nbp_vlan_group(p);
 	/* If vlan filtering is enabled and VLAN is not specified
 	 * delete mdb entry on all vlans configured on the port.
 	 */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9b379e110129..f92cb6751898 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -148,8 +148,7 @@ static void br_multicast_group_expired(struct timer_list *t)
 	if (!netif_running(br->dev) || timer_pending(&mp->timer))
 		goto out;
 
-	mp->host_joined = false;
-	br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+	br_multicast_host_leave(mp);
 
 	if (mp->ports)
 		goto out;
@@ -512,6 +511,21 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
 	return ether_addr_equal(src, p->eth_addr);
 }
 
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp)
+{
+	if (!mp->host_joined) {
+		mp->host_joined = true;
+		br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
+	}
+	mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+}
+
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp)
+{
+	mp->host_joined = false;
+	br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+}
+
 static int br_multicast_add_group(struct net_bridge *br,
 				  struct net_bridge_port *port,
 				  struct br_ip *group,
@@ -534,11 +548,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 		goto err;
 
 	if (!port) {
-		if (!mp->host_joined) {
-			mp->host_joined = true;
-			br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0);
-		}
-		mod_timer(&mp->timer, now + br->multicast_membership_interval);
+		br_multicast_host_join(mp);
 		goto out;
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b7a4942ff1b3..a99dcbb9825c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -702,6 +702,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
 			    struct br_mcast_stats *dest);
 void br_mdb_init(void);
 void br_mdb_uninit(void);
+void br_multicast_host_join(struct net_bridge_mdb_entry *mp);
+void br_multicast_host_leave(struct net_bridge_mdb_entry *mp);
 
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next 3/4] net: bridge: mdb: dump host-joined entries as well
From: Nikolay Aleksandrov @ 2019-08-14 14:40 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814144024.9710-1-nikolay@cumulusnetworks.com>

Currently we dump only the port mdb entries but we can have host-joined
entries on the bridge itself and they should be treated as normal temp
mdbs, they're already notified:
$ bridge monitor all
[MDB]dev br0 port br0 grp ff02::8 temp

The group will not be shown in the bridge mdb output, but it takes 1 slot
and it's timing out. If it's only host-joined then the mdb show output
can even be empty.

After this patch we show the host-joined groups:
$ bridge mdb show
dev br0 port br0 grp ff02::8 temp

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 77730983097e..985273425117 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -78,22 +78,35 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
 }
 
 static int __mdb_fill_info(struct sk_buff *skb,
+			   struct net_bridge_mdb_entry *mp,
 			   struct net_bridge_port_group *p)
 {
+	struct timer_list *mtimer;
 	struct nlattr *nest_ent;
 	struct br_mdb_entry e;
+	u8 flags = 0;
+	int ifindex;
 
 	memset(&e, 0, sizeof(e));
-	__mdb_entry_fill_flags(&e, p->flags);
-	e.ifindex = p->port->dev->ifindex;
-	e.vid = p->addr.vid;
-	if (p->addr.proto == htons(ETH_P_IP))
-		e.addr.u.ip4 = p->addr.u.ip4;
+	if (p) {
+		ifindex = p->port->dev->ifindex;
+		mtimer = &p->timer;
+		flags = p->flags;
+	} else {
+		ifindex = mp->br->dev->ifindex;
+		mtimer = &mp->timer;
+	}
+
+	__mdb_entry_fill_flags(&e, flags);
+	e.ifindex = ifindex;
+	e.vid = mp->addr.vid;
+	if (mp->addr.proto == htons(ETH_P_IP))
+		e.addr.u.ip4 = mp->addr.u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (p->addr.proto == htons(ETH_P_IPV6))
-		e.addr.u.ip6 = p->addr.u.ip6;
+	if (mp->addr.proto == htons(ETH_P_IPV6))
+		e.addr.u.ip6 = mp->addr.u.ip6;
 #endif
-	e.addr.proto = p->addr.proto;
+	e.addr.proto = mp->addr.proto;
 	nest_ent = nla_nest_start_noflag(skb,
 					 MDBA_MDB_ENTRY_INFO);
 	if (!nest_ent)
@@ -102,7 +115,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
 	if (nla_put_nohdr(skb, sizeof(e), &e) ||
 	    nla_put_u32(skb,
 			MDBA_MDB_EATTR_TIMER,
-			br_timer_value(&p->timer))) {
+			br_timer_value(mtimer))) {
 		nla_nest_cancel(skb, nest_ent);
 		return -EMSGSIZE;
 	}
@@ -139,12 +152,20 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			break;
 		}
 
+		if (mp->host_joined) {
+			err = __mdb_fill_info(skb, mp, NULL);
+			if (err) {
+				nla_nest_cancel(skb, nest2);
+				break;
+			}
+		}
+
 		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
 		      pp = &p->next) {
 			if (!p->port)
 				continue;
 
-			err = __mdb_fill_info(skb, p);
+			err = __mdb_fill_info(skb, mp, p);
 			if (err) {
 				nla_nest_cancel(skb, nest2);
 				goto out;
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next 2/4] net: bridge: mdb: factor out mdb filling
From: Nikolay Aleksandrov @ 2019-08-14 14:40 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814144024.9710-1-nikolay@cumulusnetworks.com>

We have to factor out the mdb fill portion in order to re-use it later for
the bridge mdb entries. No functional changes intended.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c | 68 ++++++++++++++++++++++++---------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ee6208c6d946..77730983097e 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -77,6 +77,40 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
 #endif
 }
 
+static int __mdb_fill_info(struct sk_buff *skb,
+			   struct net_bridge_port_group *p)
+{
+	struct nlattr *nest_ent;
+	struct br_mdb_entry e;
+
+	memset(&e, 0, sizeof(e));
+	__mdb_entry_fill_flags(&e, p->flags);
+	e.ifindex = p->port->dev->ifindex;
+	e.vid = p->addr.vid;
+	if (p->addr.proto == htons(ETH_P_IP))
+		e.addr.u.ip4 = p->addr.u.ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+	if (p->addr.proto == htons(ETH_P_IPV6))
+		e.addr.u.ip6 = p->addr.u.ip6;
+#endif
+	e.addr.proto = p->addr.proto;
+	nest_ent = nla_nest_start_noflag(skb,
+					 MDBA_MDB_ENTRY_INFO);
+	if (!nest_ent)
+		return -EMSGSIZE;
+
+	if (nla_put_nohdr(skb, sizeof(e), &e) ||
+	    nla_put_u32(skb,
+			MDBA_MDB_EATTR_TIMER,
+			br_timer_value(&p->timer))) {
+		nla_nest_cancel(skb, nest_ent);
+		return -EMSGSIZE;
+	}
+	nla_nest_end(skb, nest_ent);
+
+	return 0;
+}
+
 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev)
 {
@@ -95,7 +129,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 	hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
 		struct net_bridge_port_group *p;
 		struct net_bridge_port_group __rcu **pp;
-		struct net_bridge_port *port;
 
 		if (idx < s_idx)
 			goto skip;
@@ -108,41 +141,14 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 
 		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
 		      pp = &p->next) {
-			struct nlattr *nest_ent;
-			struct br_mdb_entry e;
-
-			port = p->port;
-			if (!port)
+			if (!p->port)
 				continue;
 
-			memset(&e, 0, sizeof(e));
-			e.ifindex = port->dev->ifindex;
-			e.vid = p->addr.vid;
-			__mdb_entry_fill_flags(&e, p->flags);
-			if (p->addr.proto == htons(ETH_P_IP))
-				e.addr.u.ip4 = p->addr.u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-			if (p->addr.proto == htons(ETH_P_IPV6))
-				e.addr.u.ip6 = p->addr.u.ip6;
-#endif
-			e.addr.proto = p->addr.proto;
-			nest_ent = nla_nest_start_noflag(skb,
-							 MDBA_MDB_ENTRY_INFO);
-			if (!nest_ent) {
-				nla_nest_cancel(skb, nest2);
-				err = -EMSGSIZE;
-				goto out;
-			}
-			if (nla_put_nohdr(skb, sizeof(e), &e) ||
-			    nla_put_u32(skb,
-					MDBA_MDB_EATTR_TIMER,
-					br_timer_value(&p->timer))) {
-				nla_nest_cancel(skb, nest_ent);
+			err = __mdb_fill_info(skb, p);
+			if (err) {
 				nla_nest_cancel(skb, nest2);
-				err = -EMSGSIZE;
 				goto out;
 			}
-			nla_nest_end(skb, nest_ent);
 		}
 		nla_nest_end(skb, nest2);
 skip:
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next 1/4] net: bridge: mdb: move vlan comments
From: Nikolay Aleksandrov @ 2019-08-14 14:40 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem
In-Reply-To: <20190814144024.9710-1-nikolay@cumulusnetworks.com>

Trivial patch to move the vlan comments in their proper places above the
vid 0 checks.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 net/bridge/br_mdb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 428af1abf8cc..ee6208c6d946 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -653,9 +653,6 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	/* If vlan filtering is enabled and VLAN is not specified
-	 * install mdb entry on all vlans configured on the port.
-	 */
 	pdev = __dev_get_by_index(net, entry->ifindex);
 	if (!pdev)
 		return -ENODEV;
@@ -665,6 +662,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
+	/* If vlan filtering is enabled and VLAN is not specified
+	 * install mdb entry on all vlans configured on the port.
+	 */
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
@@ -745,9 +745,6 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	br = netdev_priv(dev);
 
-	/* If vlan filtering is enabled and VLAN is not specified
-	 * delete mdb entry on all vlans configured on the port.
-	 */
 	pdev = __dev_get_by_index(net, entry->ifindex);
 	if (!pdev)
 		return -ENODEV;
@@ -757,6 +754,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	vg = nbp_vlan_group(p);
+	/* If vlan filtering is enabled and VLAN is not specified
+	 * delete mdb entry on all vlans configured on the port.
+	 */
 	if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			entry->vid = v->vid;
-- 
2.21.0


^ permalink raw reply related

* [Bridge] [PATCH net-next 0/4] net: bridge: mdb: allow dump/add/del of host-joined entries
From: Nikolay Aleksandrov @ 2019-08-14 14:40 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov, roopa, bridge, davem

Hi,
This set makes the bridge dump host-joined mdb entries, they should be
treated as normal entries since they take a slot and are aging out.
We already have notifications for them but we couldn't dump them until
now so they remained hidden. We dump them similar to how they're
notified, in order to keep user-space compatibility with the dumped
objects (e.g. iproute2 dumps mdbs in a format which can be fed into
add/del commands) we allow host-joined groups also to be added/deleted via
mdb commands. That can later be used for L2 mcast MAC manipulation as
was recently discussed. Note that iproute2 changes are not necessary,
this set will work with the current user-space mdb code.

Patch 01 - a trivial comment move
Patch 02 - factors out the mdb filling code so it can be
           re-used for the host-joined entries
Patch 03 - dumps host-joined entries
Patch 04 - allows manipulation of host-joined entries via standard mdb
           calls

Thanks,
 Nik

Nikolay Aleksandrov (4):
  net: bridge: mdb: move vlan comments
  net: bridge: mdb: factor out mdb filling
  net: bridge: mdb: dump host-joined entries as well
  net: bridge: mdb: allow add/delete for host-joined groups

 net/bridge/br_mdb.c       | 171 +++++++++++++++++++++++++-------------
 net/bridge/br_multicast.c |  24 ++++--
 net/bridge/br_private.h   |   2 +
 3 files changed, 133 insertions(+), 64 deletions(-)

-- 
2.21.0

^ permalink raw reply

* [Bridge] linux bridge does not forward arp reply back packets in a vmware vm
From: Ben Shaw @ 2019-08-09  5:56 UTC (permalink / raw)
  To: bridge

[-- Attachment #1: Type: text/plain, Size: 1711 bytes --]

Hi,

there was a post on here with the same title in 2017 which seemed to
discuss the same issue I was seeing today on my ESXi environment with a
bridged Ubuntu VM. A host would not receive ARP replies because the Ubunut
VM between it and the ARP responder would drop the replies.

My issues seemed to reflect the previous poster's issue identically and
after some testing I can see that what is happening is that the ARP request
was being forwarded by the Ubuntu VM and then by the ESXi vswitch out one
of the two physical uplinks it had to the physical switched environment.
Because ESXi vswitches don't participate in STP hence do not block ports
that ARP request would be switched back to the same vSwitch via the second
uplink port and be forwarded back to the Ubuntu bridged VM.

This would cause the Ubuntu VM to see the host sending the ARP request to
be off the other interface so when the ARP reply was received the Ubuntu
host would drop the reply and not forward out the interface on the bridge
the intended recipient was actually on. This could be see in the brctl
showmacs output with the MAC address in question appearing on the wrong
bridge member port as shown below where the first entry should actually be
off port 2.

LAB-SOH01:~$ brctl showmacs br0 | grep no
port no mac addr                is local?       ageing timer
  1     00:0c:29:e7:e4:37       no                12.58
  1     00:a0:c9:0f:02:01       no                 2.00
LAB-SOH01:~$

A work around so far is to disable one of the physical uplinks to the
physical network. I will have to think if there is a better solution which
maintains redundancy. Hopefully this may be of help to some others who see
the same issue.

Thanks

[-- Attachment #2: Type: text/html, Size: 1955 bytes --]

^ permalink raw reply

* Re: [Bridge] [PATCH v2 1/1] net: bridge: use mac_len in bridge forwarding
From: Nikolay Aleksandrov @ 2019-08-08  8:34 UTC (permalink / raw)
  To: Zahari Doychev
  Cc: makita.toshiaki, dsahern, jiri, netdev, roopa, bridge, jhs,
	simon.horman, xiyou.wangcong, johannes, alexei.starovoitov, ast
In-Reply-To: <20190808080428.o2eqqfdscl274sr5@tycho>

On 08/08/2019 11:04, Zahari Doychev wrote:
> On Wed, Aug 07, 2019 at 12:17:43PM +0300, Nikolay Aleksandrov wrote:
>> Hi Zahari,
>> On 05/08/2019 18:37, Zahari Doychev wrote:
>>> The bridge code cannot forward packets from various paths that set up the
>>> SKBs in different ways. Some of these packets get corrupted during the
>>> forwarding as not always is just ETH_HLEN pulled at the front. This happens
>>> e.g. when VLAN tags are pushed bu using tc act_vlan on ingress.
>> Overall the patch looks good, I think it shouldn't introduce any regressions
>> at least from the codepaths I was able to inspect, but please include more
>> details in here from the cover letter, in fact you don't need it just add all of
>> the details here so we have them, especially the test setup. Also please provide
>> some details how this patch was tested. It'd be great if you could provide a
>> selftest for it so we can make sure it's considered when doing future changes.
> 
> Hi Nik,
> 
> Thanks for the reply. I will do the suggested corrections and try creating a
> selftest. I assume it should go to the net/forwarding together with the other
> bridge tests as a separate patch.
> 
> Regards,
> Zahari
> 

Hi,
Yes, the selftest should target net-next and go to net/forwarding.

Thanks,
 Nik

>>
>> Thank you,
>>  Nik
>>
>>>
>>> The problem is fixed by using skb->mac_len instead of ETH_HLEN, which makes
>>> sure that the skb headers are correctly restored. This usually does not
>>> change anything, execpt the local bridge transmits which now need to set
>>> the skb->mac_len correctly in br_dev_xmit, as well as the broken case noted
>>> above.
>>>
>>> Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
>>> ---
>>>  net/bridge/br_device.c  | 3 ++-
>>>  net/bridge/br_forward.c | 4 ++--
>>>  net/bridge/br_vlan.c    | 3 ++-
>>>  3 files changed, 6 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
>>> index 681b72862c16..aeb77ff60311 100644
>>> --- a/net/bridge/br_device.c
>>> +++ b/net/bridge/br_device.c
>>> @@ -55,8 +55,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>>>  	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
>>>  
>>>  	skb_reset_mac_header(skb);
>>> +	skb_reset_mac_len(skb);
>>>  	eth = eth_hdr(skb);
>>> -	skb_pull(skb, ETH_HLEN);
>>> +	skb_pull(skb, skb->mac_len);
>>>  
>>>  	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
>>>  		goto out;
>>> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
>>> index 86637000f275..edb4f3533f05 100644
>>> --- a/net/bridge/br_forward.c
>>> +++ b/net/bridge/br_forward.c
>>> @@ -32,7 +32,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
>>>  
>>>  int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
>>>  {
>>> -	skb_push(skb, ETH_HLEN);
>>> +	skb_push(skb, skb->mac_len);
>>>  	if (!is_skb_forwardable(skb->dev, skb))
>>>  		goto drop;
>>>  
>>> @@ -94,7 +94,7 @@ static void __br_forward(const struct net_bridge_port *to,
>>>  		net = dev_net(indev);
>>>  	} else {
>>>  		if (unlikely(netpoll_tx_running(to->br->dev))) {
>>> -			skb_push(skb, ETH_HLEN);
>>> +			skb_push(skb, skb->mac_len);
>>>  			if (!is_skb_forwardable(skb->dev, skb))
>>>  				kfree_skb(skb);
>>>  			else
>>> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
>>> index 021cc9f66804..88244c9cc653 100644
>>> --- a/net/bridge/br_vlan.c
>>> +++ b/net/bridge/br_vlan.c
>>> @@ -466,13 +466,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
>>>  		/* Tagged frame */
>>>  		if (skb->vlan_proto != br->vlan_proto) {
>>>  			/* Protocol-mismatch, empty out vlan_tci for new tag */
>>> -			skb_push(skb, ETH_HLEN);
>>> +			skb_push(skb, skb->mac_len);
>>>  			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
>>>  							skb_vlan_tag_get(skb));
>>>  			if (unlikely(!skb))
>>>  				return false;
>>>  
>>>  			skb_pull(skb, ETH_HLEN);
>>> +			skb_reset_network_header(skb);
>>>  			skb_reset_mac_len(skb);
>>>  			*vid = 0;
>>>  			tagged = false;
>>>
>>


^ permalink raw reply

* Re: [Bridge] [PATCH v2 1/1] net: bridge: use mac_len in bridge forwarding
From: Zahari Doychev @ 2019-08-08  8:04 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: makita.toshiaki, dsahern, jiri, netdev, roopa, bridge, jhs,
	simon.horman, xiyou.wangcong, johannes, alexei.starovoitov, ast
In-Reply-To: <48058179-9690-54e2-f60c-c372446bfde9@cumulusnetworks.com>

On Wed, Aug 07, 2019 at 12:17:43PM +0300, Nikolay Aleksandrov wrote:
> Hi Zahari,
> On 05/08/2019 18:37, Zahari Doychev wrote:
> > The bridge code cannot forward packets from various paths that set up the
> > SKBs in different ways. Some of these packets get corrupted during the
> > forwarding as not always is just ETH_HLEN pulled at the front. This happens
> > e.g. when VLAN tags are pushed bu using tc act_vlan on ingress.
> Overall the patch looks good, I think it shouldn't introduce any regressions
> at least from the codepaths I was able to inspect, but please include more
> details in here from the cover letter, in fact you don't need it just add all of
> the details here so we have them, especially the test setup. Also please provide
> some details how this patch was tested. It'd be great if you could provide a
> selftest for it so we can make sure it's considered when doing future changes.

Hi Nik,

Thanks for the reply. I will do the suggested corrections and try creating a
selftest. I assume it should go to the net/forwarding together with the other
bridge tests as a separate patch.

Regards,
Zahari

> 
> Thank you,
>  Nik
> 
> > 
> > The problem is fixed by using skb->mac_len instead of ETH_HLEN, which makes
> > sure that the skb headers are correctly restored. This usually does not
> > change anything, execpt the local bridge transmits which now need to set
> > the skb->mac_len correctly in br_dev_xmit, as well as the broken case noted
> > above.
> > 
> > Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> > ---
> >  net/bridge/br_device.c  | 3 ++-
> >  net/bridge/br_forward.c | 4 ++--
> >  net/bridge/br_vlan.c    | 3 ++-
> >  3 files changed, 6 insertions(+), 4 deletions(-)
> > 
> > diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> > index 681b72862c16..aeb77ff60311 100644
> > --- a/net/bridge/br_device.c
> > +++ b/net/bridge/br_device.c
> > @@ -55,8 +55,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> >  	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
> >  
> >  	skb_reset_mac_header(skb);
> > +	skb_reset_mac_len(skb);
> >  	eth = eth_hdr(skb);
> > -	skb_pull(skb, ETH_HLEN);
> > +	skb_pull(skb, skb->mac_len);
> >  
> >  	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
> >  		goto out;
> > diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> > index 86637000f275..edb4f3533f05 100644
> > --- a/net/bridge/br_forward.c
> > +++ b/net/bridge/br_forward.c
> > @@ -32,7 +32,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
> >  
> >  int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
> >  {
> > -	skb_push(skb, ETH_HLEN);
> > +	skb_push(skb, skb->mac_len);
> >  	if (!is_skb_forwardable(skb->dev, skb))
> >  		goto drop;
> >  
> > @@ -94,7 +94,7 @@ static void __br_forward(const struct net_bridge_port *to,
> >  		net = dev_net(indev);
> >  	} else {
> >  		if (unlikely(netpoll_tx_running(to->br->dev))) {
> > -			skb_push(skb, ETH_HLEN);
> > +			skb_push(skb, skb->mac_len);
> >  			if (!is_skb_forwardable(skb->dev, skb))
> >  				kfree_skb(skb);
> >  			else
> > diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> > index 021cc9f66804..88244c9cc653 100644
> > --- a/net/bridge/br_vlan.c
> > +++ b/net/bridge/br_vlan.c
> > @@ -466,13 +466,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
> >  		/* Tagged frame */
> >  		if (skb->vlan_proto != br->vlan_proto) {
> >  			/* Protocol-mismatch, empty out vlan_tci for new tag */
> > -			skb_push(skb, ETH_HLEN);
> > +			skb_push(skb, skb->mac_len);
> >  			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
> >  							skb_vlan_tag_get(skb));
> >  			if (unlikely(!skb))
> >  				return false;
> >  
> >  			skb_pull(skb, ETH_HLEN);
> > +			skb_reset_network_header(skb);
> >  			skb_reset_mac_len(skb);
> >  			*vid = 0;
> >  			tagged = false;
> > 
> 

^ permalink raw reply

* Re: [Bridge] linux-next: Tree for Aug 7 (net/bridge/netfilter/nf_conntrack_bridge.c)
From: Jeremy Sowden @ 2019-08-07 15:57 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: bridge, Stephen Rothwell, Linux Next Mailing List,
	Linux Kernel Mailing List, netdev@vger.kernel.org
In-Reply-To: <f54391d9-6259-d08b-8b5f-c844093071d8@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 846 bytes --]

On 2019-08-07, at 08:29:44 -0700, Randy Dunlap wrote:
> On 8/7/19 1:36 AM, Stephen Rothwell wrote:
> > Hi all,
> >
> > Changes since 20190806:
>
> on i386:
> when CONFIG_NF_TABLES is not set/enabled:
>
>   CC      net/bridge/netfilter/nf_conntrack_bridge.o
> In file included from
> ../net/bridge/netfilter/nf_conntrack_bridge.c:21:0:
> ../include/net/netfilter/nf_tables.h: In function
> ‘nft_gencursor_next’:
> ../include/net/netfilter/nf_tables.h:1224:14: error: ‘const struct
> net’ has no member named ‘nft’; did you mean ‘nf’?
>   return net->nft.gencursor + 1 == 1 ? 1 : 0;
>               ^~~

I've just posted a series of fixes for netfilter header compilation
failures, and I think it includes the fix for that:

  https://lore.kernel.org/netdev/20190807141705.4864-5-jeremy@azazel.net/T/#u

J.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [Bridge] linux-next: Tree for Aug 7 (net/bridge/netfilter/nf_conntrack_bridge.c)
From: Randy Dunlap @ 2019-08-07 15:29 UTC (permalink / raw)
  To: Stephen Rothwell, Linux Next Mailing List
  Cc: netdev@vger.kernel.org, bridge, Linux Kernel Mailing List
In-Reply-To: <20190807183606.372ca1a4@canb.auug.org.au>

On 8/7/19 1:36 AM, Stephen Rothwell wrote:
> Hi all,
> 
> Changes since 20190806:
> 

on i386:
when CONFIG_NF_TABLES is not set/enabled:

  CC      net/bridge/netfilter/nf_conntrack_bridge.o
In file included from ../net/bridge/netfilter/nf_conntrack_bridge.c:21:0:
../include/net/netfilter/nf_tables.h: In function ‘nft_gencursor_next’:
../include/net/netfilter/nf_tables.h:1224:14: error: ‘const struct net’ has no member named ‘nft’; did you mean ‘nf’?
  return net->nft.gencursor + 1 == 1 ? 1 : 0;
              ^~~
              nf
In file included from ../include/linux/export.h:45:0,
                 from ../include/linux/linkage.h:7,
                 from ../include/linux/kernel.h:8,
                 from ../include/linux/skbuff.h:13,
                 from ../include/linux/ip.h:16,
                 from ../net/bridge/netfilter/nf_conntrack_bridge.c:3:
../include/net/netfilter/nf_tables.h: In function ‘nft_genmask_cur’:
../include/net/netfilter/nf_tables.h:1235:29: error: ‘const struct net’ has no member named ‘nft’; did you mean ‘nf’?
  return 1 << READ_ONCE(net->nft.gencursor);
                             ^
../include/linux/compiler.h:261:17: note: in definition of macro ‘__READ_ONCE’
  union { typeof(x) __val; char __c[1]; } __u;   \
                 ^
../include/net/netfilter/nf_tables.h:1235:14: note: in expansion of macro ‘READ_ONCE’
  return 1 << READ_ONCE(net->nft.gencursor);
              ^~~~~~~~~
../include/net/netfilter/nf_tables.h:1235:29: error: ‘const struct net’ has no member named ‘nft’; did you mean ‘nf’?
  return 1 << READ_ONCE(net->nft.gencursor);
                             ^
../include/linux/compiler.h:263:22: note: in definition of macro ‘__READ_ONCE’
   __read_once_size(&(x), __u.__c, sizeof(x));  \
                      ^
../include/net/netfilter/nf_tables.h:1235:14: note: in expansion of macro ‘READ_ONCE’
  return 1 << READ_ONCE(net->nft.gencursor);
              ^~~~~~~~~
../include/net/netfilter/nf_tables.h:1235:29: error: ‘const struct net’ has no member named ‘nft’; did you mean ‘nf’?
  return 1 << READ_ONCE(net->nft.gencursor);
                             ^
../include/linux/compiler.h:263:42: note: in definition of macro ‘__READ_ONCE’
   __read_once_size(&(x), __u.__c, sizeof(x));  \
                                          ^
../include/net/netfilter/nf_tables.h:1235:14: note: in expansion of macro ‘READ_ONCE’
  return 1 << READ_ONCE(net->nft.gencursor);
              ^~~~~~~~~
../include/net/netfilter/nf_tables.h:1235:29: error: ‘const struct net’ has no member named ‘nft’; did you mean ‘nf’?
  return 1 << READ_ONCE(net->nft.gencursor);
                             ^
../include/linux/compiler.h:265:30: note: in definition of macro ‘__READ_ONCE’
   __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \
                              ^
../include/net/netfilter/nf_tables.h:1235:14: note: in expansion of macro ‘READ_ONCE’
  return 1 << READ_ONCE(net->nft.gencursor);
              ^~~~~~~~~
../include/net/netfilter/nf_tables.h:1235:29: error: ‘const struct net’ has no member named ‘nft’; did you mean ‘nf’?
  return 1 << READ_ONCE(net->nft.gencursor);
                             ^
../include/linux/compiler.h:265:50: note: in definition of macro ‘__READ_ONCE’
   __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \
                                                  ^
../include/net/netfilter/nf_tables.h:1235:14: note: in expansion of macro ‘READ_ONCE’
  return 1 << READ_ONCE(net->nft.gencursor);
              ^~~~~~~~~
make[4]: *** [../scripts/Makefile.build:273: net/bridge/netfilter/nf_conntrack_bridge.o] Error 1



-- 
~Randy

^ permalink raw reply

* Re: [Bridge] [PATCH v2 1/1] net: bridge: use mac_len in bridge forwarding
From: Nikolay Aleksandrov @ 2019-08-07  9:17 UTC (permalink / raw)
  To: Zahari Doychev, netdev
  Cc: dsahern, jiri, simon.horman, roopa, bridge, jhs, makita.toshiaki,
	xiyou.wangcong, johannes, ast
In-Reply-To: <20190805153740.29627-2-zahari.doychev@linux.com>

Hi Zahari,
On 05/08/2019 18:37, Zahari Doychev wrote:
> The bridge code cannot forward packets from various paths that set up the
> SKBs in different ways. Some of these packets get corrupted during the
> forwarding as not always is just ETH_HLEN pulled at the front. This happens
> e.g. when VLAN tags are pushed bu using tc act_vlan on ingress.
Overall the patch looks good, I think it shouldn't introduce any regressions
at least from the codepaths I was able to inspect, but please include more
details in here from the cover letter, in fact you don't need it just add all of
the details here so we have them, especially the test setup. Also please provide
some details how this patch was tested. It'd be great if you could provide a
selftest for it so we can make sure it's considered when doing future changes.

Thank you,
 Nik

> 
> The problem is fixed by using skb->mac_len instead of ETH_HLEN, which makes
> sure that the skb headers are correctly restored. This usually does not
> change anything, execpt the local bridge transmits which now need to set
> the skb->mac_len correctly in br_dev_xmit, as well as the broken case noted
> above.
> 
> Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> ---
>  net/bridge/br_device.c  | 3 ++-
>  net/bridge/br_forward.c | 4 ++--
>  net/bridge/br_vlan.c    | 3 ++-
>  3 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 681b72862c16..aeb77ff60311 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -55,8 +55,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>  	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
>  
>  	skb_reset_mac_header(skb);
> +	skb_reset_mac_len(skb);
>  	eth = eth_hdr(skb);
> -	skb_pull(skb, ETH_HLEN);
> +	skb_pull(skb, skb->mac_len);
>  
>  	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
>  		goto out;
> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> index 86637000f275..edb4f3533f05 100644
> --- a/net/bridge/br_forward.c
> +++ b/net/bridge/br_forward.c
> @@ -32,7 +32,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
>  
>  int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
>  {
> -	skb_push(skb, ETH_HLEN);
> +	skb_push(skb, skb->mac_len);
>  	if (!is_skb_forwardable(skb->dev, skb))
>  		goto drop;
>  
> @@ -94,7 +94,7 @@ static void __br_forward(const struct net_bridge_port *to,
>  		net = dev_net(indev);
>  	} else {
>  		if (unlikely(netpoll_tx_running(to->br->dev))) {
> -			skb_push(skb, ETH_HLEN);
> +			skb_push(skb, skb->mac_len);
>  			if (!is_skb_forwardable(skb->dev, skb))
>  				kfree_skb(skb);
>  			else
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 021cc9f66804..88244c9cc653 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -466,13 +466,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
>  		/* Tagged frame */
>  		if (skb->vlan_proto != br->vlan_proto) {
>  			/* Protocol-mismatch, empty out vlan_tci for new tag */
> -			skb_push(skb, ETH_HLEN);
> +			skb_push(skb, skb->mac_len);
>  			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
>  							skb_vlan_tag_get(skb));
>  			if (unlikely(!skb))
>  				return false;
>  
>  			skb_pull(skb, ETH_HLEN);
> +			skb_reset_network_header(skb);
>  			skb_reset_mac_len(skb);
>  			*vid = 0;
>  			tagged = false;
> 


^ permalink raw reply

* Re: [Bridge] [PATCH net v4] net: bridge: move default pvid init/deinit to NETDEV_REGISTER/UNREGISTER
From: David Miller @ 2019-08-05 20:33 UTC (permalink / raw)
  To: nikolay; +Cc: netdev, roopa, bridge, michael-dev
In-Reply-To: <20190802105736.26767-1-nikolay@cumulusnetworks.com>

From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri,  2 Aug 2019 13:57:36 +0300

> Most of the bridge device's vlan init bugs come from the fact that its
> default pvid is created at the wrong time, way too early in ndo_init()
> before the device is even assigned an ifindex. It introduces a bug when the
> bridge's dev_addr is added as fdb during the initial default pvid creation
> the notification has ifindex/NDA_MASTER both equal to 0 (see example below)
> which really makes no sense for user-space[0] and is wrong.
> Usually user-space software would ignore such entries, but they are
> actually valid and will eventually have all necessary attributes.
> It makes much more sense to send a notification *after* the device has
> registered and has a proper ifindex allocated rather than before when
> there's a chance that the registration might still fail or to receive
> it with ifindex/NDA_MASTER == 0. Note that we can remove the fdb flush
> from br_vlan_flush() since that case can no longer happen. At
> NETDEV_REGISTER br->default_pvid is always == 1 as it's initialized by
> br_vlan_init() before that and at NETDEV_UNREGISTER it can be anything
> depending why it was called (if called due to NETDEV_REGISTER error
> it'll still be == 1, otherwise it could be any value changed during the
> device life time).
> 
> For the demonstration below a small change to iproute2 for printing all fdb
> notifications is added, because it contained a workaround not to show
> entries with ifindex == 0.
> Command executed while monitoring: $ ip l add br0 type bridge
> Before (both ifindex and master == 0):
> $ bridge monitor fdb
> 36:7e:8a:b3:56:ba dev * vlan 1 master * permanent
> 
> After (proper br0 ifindex):
> $ bridge monitor fdb
> e6:2a:ae:7a:b7:48 dev br0 vlan 1 master br0 permanent
> 
> v4: move only the default pvid init/deinit to NETDEV_REGISTER/UNREGISTER
> v3: send the correct v2 patch with all changes (stub should return 0)
> v2: on error in br_vlan_init set br->vlgrp to NULL and return 0 in
>     the br_vlan_bridge_event stub when bridge vlans are disabled
> 
> [0] https://bugzilla.kernel.org/show_bug.cgi?id=204389
> 
> Reported-by: michael-dev <michael-dev@fami-braun.de>
> Fixes: 5be5a2df40f0 ("bridge: Add filtering support for default_pvid")
> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* [Bridge] [PATCH v2 1/1] net: bridge: use mac_len in bridge forwarding
From: Zahari Doychev @ 2019-08-05 15:37 UTC (permalink / raw)
  To: netdev
  Cc: makita.toshiaki, jiri, nikolay, simon.horman, roopa, bridge,
	Zahari Doychev, jhs, dsahern, xiyou.wangcong, johannes, ast
In-Reply-To: <20190805153740.29627-1-zahari.doychev@linux.com>

The bridge code cannot forward packets from various paths that set up the
SKBs in different ways. Some of these packets get corrupted during the
forwarding as not always is just ETH_HLEN pulled at the front. This happens
e.g. when VLAN tags are pushed bu using tc act_vlan on ingress.

The problem is fixed by using skb->mac_len instead of ETH_HLEN, which makes
sure that the skb headers are correctly restored. This usually does not
change anything, execpt the local bridge transmits which now need to set
the skb->mac_len correctly in br_dev_xmit, as well as the broken case noted
above.

Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
---
 net/bridge/br_device.c  | 3 ++-
 net/bridge/br_forward.c | 4 ++--
 net/bridge/br_vlan.c    | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 681b72862c16..aeb77ff60311 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -55,8 +55,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
 
 	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
 	eth = eth_hdr(skb);
-	skb_pull(skb, ETH_HLEN);
+	skb_pull(skb, skb->mac_len);
 
 	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
 		goto out;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 86637000f275..edb4f3533f05 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -32,7 +32,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	skb_push(skb, ETH_HLEN);
+	skb_push(skb, skb->mac_len);
 	if (!is_skb_forwardable(skb->dev, skb))
 		goto drop;
 
@@ -94,7 +94,7 @@ static void __br_forward(const struct net_bridge_port *to,
 		net = dev_net(indev);
 	} else {
 		if (unlikely(netpoll_tx_running(to->br->dev))) {
-			skb_push(skb, ETH_HLEN);
+			skb_push(skb, skb->mac_len);
 			if (!is_skb_forwardable(skb->dev, skb))
 				kfree_skb(skb);
 			else
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 021cc9f66804..88244c9cc653 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -466,13 +466,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
 		/* Tagged frame */
 		if (skb->vlan_proto != br->vlan_proto) {
 			/* Protocol-mismatch, empty out vlan_tci for new tag */
-			skb_push(skb, ETH_HLEN);
+			skb_push(skb, skb->mac_len);
 			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
 							skb_vlan_tag_get(skb));
 			if (unlikely(!skb))
 				return false;
 
 			skb_pull(skb, ETH_HLEN);
+			skb_reset_network_header(skb);
 			skb_reset_mac_len(skb);
 			*vid = 0;
 			tagged = false;
-- 
2.22.0


^ permalink raw reply related

* [Bridge] [PATCH v2 0/1] Fix bridge mac_len handling
From: Zahari Doychev @ 2019-08-05 15:37 UTC (permalink / raw)
  To: netdev
  Cc: makita.toshiaki, jiri, nikolay, simon.horman, roopa, bridge,
	Zahari Doychev, jhs, dsahern, xiyou.wangcong, johannes, ast

After the last discussion about the possible solution of the problem. I have
decided to resend the patches making the discussed corrections. It seems that
the patches are still not the complete solution as there still can be problem
handling skb->data pointing after the VLAN tag but I got the impression that
all agreed that the bridge code should be able to handle mac_len correctly.

Here again the description how to problem can be reproduce.

The Linux bridge does not correctly forward double vlan tagged packets added
using the tc act_vlan action. I am using a bridge with two netdevs and on one
of them a have the clsact qdisc with tc flower rule adding two vlan tags.

ip link add name br0 type bridge vlan_filtering 1
ip link set dev br0 up

ip link set dev net0 up
ip link set dev net0 master br0

ip link set dev net1 up
ip link set dev net1 master br0

bridge vlan add dev net0 vid 100 master
bridge vlan add dev br0 vid 100 self
bridge vlan add dev net1 vid 100 master

tc qdisc add dev net0 handle ffff: clsact
tc qdisc add dev net1 handle ffff: clsact

tc filter add dev net0 ingress pref 1 protocol all flower \
		  action vlan push id 10 pipe action vlan push id 100

tc filter add dev net0 egress pref 1 protocol 802.1q flower \
		  vlan_id 100 vlan_ethtype 802.1q cvlan_id 10 \
		  action vlan pop pipe action vlan pop

When using the setup above the packets coming on net0 get double tagged but
the MAC headers gets corrupted when the packets go out of net1. The second vlan
header is not considered in br_dev_queue_push_xmit. The skb data pointer is
decremented only by the ETH_HLEN length. This later causes the function
validate_xmit_vlan to insert the outer vlan tag behind the inner vlan tag. 
The inner vlan becomes in this way part of the source mac address.

The problem in the bridge forwarding is fixed by using the mac_len when using
skb_push before forwarding the packets which ensures that the skb->data is
set correctly on push/pull.

Changes from v1:

- reset mac_len in br_dev_xmit 

Zahari Doychev (1):
  net: bridge: use mac_len in bridge forwarding

 net/bridge/br_device.c  | 3 ++-
 net/bridge/br_forward.c | 4 ++--
 net/bridge/br_vlan.c    | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

-- 
2.22.0

^ permalink raw reply

* Re: [Bridge] Split path for packets, and bridge configuration problem.
From: Stephen Hemminger @ 2019-08-05 15:32 UTC (permalink / raw)
  To: John Clark via Bridge; +Cc: John Clark
In-Reply-To: <A23F6E83-0634-4BFA-AE82-DA947D3544CF@aim.com>

On Wed, 31 Jul 2019 11:55:37 -0700
John Clark via Bridge <bridge@lists.linux-foundation.org> wrote:

> I’m trying to configure a Linux system to bridge an ethernet link for packet ingress from a local network and combine that with a radio link for the reply packet egress.
> 
> I vaguely recall something like this use to be used for Satellite links, where the incoming packets from the outside world were via the Sat link, but outgoing packets were via a terrestrial link, phone modem or DSL, depending.
> 
> What I have working is the packet entering the bridge on the ethernet link, then I see the MAC address of the node from the in the ‘bridge show’ output, but even if I assign the MAC address to the ‘radio’ tap interface, I see both the ‘ethernet’ port as ‘master’, and the tap device as ‘self’, but no packets are sent to the tap device.
> 
> In the past I’ve just written user apps that basically implement tunnels and using collection of such apps and taps, gotten packets to where I need them to be.
> 
> I was trying to use more standard tools, and not write more custom code.
> 
> Thanks for any assistance.
> John Clark.
> 

Hacks like this are best done with netfilter.

^ permalink raw reply

* Re: [Bridge] [PATCH net v4] net: bridge: move default pvid init/deinit to NETDEV_REGISTER/UNREGISTER
From: Nikolay Aleksandrov @ 2019-08-02 15:41 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, roopa, bridge, davem, michael-dev
In-Reply-To: <20190802083519.71cb4fa2@hermes.lan>

On 02/08/2019 18:35, Stephen Hemminger wrote:
> On Fri,  2 Aug 2019 13:57:36 +0300
> Nikolay Aleksandrov <nikolay@cumulusnetworks.com> wrote:
> 
>> +int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
>>  {
>>  	struct netdev_notifier_changeupper_info *info;
>> -	struct net_bridge *br;
>> +	struct net_bridge *br = netdev_priv(dev);
>> +	bool changed;
>> +	int ret = 0;
>>  
>>  	switch (event) {
>> +	case NETDEV_REGISTER:
>> +		ret = br_vlan_add(br, br->default_pvid,
>> +				  BRIDGE_VLAN_INFO_PVID |
>> +				  BRIDGE_VLAN_INFO_UNTAGGED |
>> +				  BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
>> +		break;
> 
> Looks good.
> 
> As minor optimization br_vlan_add could ignore changed pointer if NULL.
> This would save places where you don't care.
> 
> 
> Something like:
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 021cc9f66804..bacd3543b215 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -626,10 +626,11 @@ static int br_vlan_add_existing(struct net_bridge *br,
>  		refcount_inc(&vlan->refcnt);
>  		vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
>  		vg->num_vlans++;
> -		*changed = true;
> +		if (changed)
> +			*changed = true;
>  	}
>  
> -	if (__vlan_add_flags(vlan, flags))
> +	if (__vlan_add_flags(vlan, flags) && changed)
>  		*changed = true;
>  
>  	return 0;
> @@ -653,7 +654,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
>  
>  	ASSERT_RTNL();
>  
> -	*changed = false;
> +	if (changed)
> +		*changed = false;
>  	vg = br_vlan_group(br);
>  	vlan = br_vlan_find(vg, vid);
>  	if (vlan)
> @@ -679,7 +681,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
>  	if (ret) {
>  		free_percpu(vlan->stats);
>  		kfree(vlan);
> -	} else {
> +	} else if (changed) {
>  		*changed = true;
>  	}
>  
> 

sure, this is ok for net-next


^ permalink raw reply

* Re: [Bridge] [PATCH net v4] net: bridge: move default pvid init/deinit to NETDEV_REGISTER/UNREGISTER
From: Stephen Hemminger @ 2019-08-02 15:35 UTC (permalink / raw)
  To: Nikolay Aleksandrov; +Cc: netdev, roopa, bridge, davem, michael-dev
In-Reply-To: <20190802105736.26767-1-nikolay@cumulusnetworks.com>

On Fri,  2 Aug 2019 13:57:36 +0300
Nikolay Aleksandrov <nikolay@cumulusnetworks.com> wrote:

> +int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr)
>  {
>  	struct netdev_notifier_changeupper_info *info;
> -	struct net_bridge *br;
> +	struct net_bridge *br = netdev_priv(dev);
> +	bool changed;
> +	int ret = 0;
>  
>  	switch (event) {
> +	case NETDEV_REGISTER:
> +		ret = br_vlan_add(br, br->default_pvid,
> +				  BRIDGE_VLAN_INFO_PVID |
> +				  BRIDGE_VLAN_INFO_UNTAGGED |
> +				  BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
> +		break;

Looks good.

As minor optimization br_vlan_add could ignore changed pointer if NULL.
This would save places where you don't care.


Something like:
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 021cc9f66804..bacd3543b215 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -626,10 +626,11 @@ static int br_vlan_add_existing(struct net_bridge *br,
 		refcount_inc(&vlan->refcnt);
 		vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
 		vg->num_vlans++;
-		*changed = true;
+		if (changed)
+			*changed = true;
 	}
 
-	if (__vlan_add_flags(vlan, flags))
+	if (__vlan_add_flags(vlan, flags) && changed)
 		*changed = true;
 
 	return 0;
@@ -653,7 +654,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
 
 	ASSERT_RTNL();
 
-	*changed = false;
+	if (changed)
+		*changed = false;
 	vg = br_vlan_group(br);
 	vlan = br_vlan_find(vg, vid);
 	if (vlan)
@@ -679,7 +681,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
 	if (ret) {
 		free_percpu(vlan->stats);
 		kfree(vlan);
-	} else {
+	} else if (changed) {
 		*changed = true;
 	}
 


^ permalink raw reply related

* Re: [Bridge] [PATCH net v4] net: bridge: move default pvid init/deinit to NETDEV_REGISTER/UNREGISTER
From: Roopa Prabhu @ 2019-08-02 14:29 UTC (permalink / raw)
  To: Nikolay Aleksandrov; +Cc: netdev, bridge, David Miller, michael-dev
In-Reply-To: <20190802105736.26767-1-nikolay@cumulusnetworks.com>

On Fri, Aug 2, 2019 at 3:57 AM Nikolay Aleksandrov
<nikolay@cumulusnetworks.com> wrote:
>
> Most of the bridge device's vlan init bugs come from the fact that its
> default pvid is created at the wrong time, way too early in ndo_init()
> before the device is even assigned an ifindex. It introduces a bug when the
> bridge's dev_addr is added as fdb during the initial default pvid creation
> the notification has ifindex/NDA_MASTER both equal to 0 (see example below)
> which really makes no sense for user-space[0] and is wrong.
> Usually user-space software would ignore such entries, but they are
> actually valid and will eventually have all necessary attributes.
> It makes much more sense to send a notification *after* the device has
> registered and has a proper ifindex allocated rather than before when
> there's a chance that the registration might still fail or to receive
> it with ifindex/NDA_MASTER == 0. Note that we can remove the fdb flush
> from br_vlan_flush() since that case can no longer happen. At
> NETDEV_REGISTER br->default_pvid is always == 1 as it's initialized by
> br_vlan_init() before that and at NETDEV_UNREGISTER it can be anything
> depending why it was called (if called due to NETDEV_REGISTER error
> it'll still be == 1, otherwise it could be any value changed during the
> device life time).
>
> For the demonstration below a small change to iproute2 for printing all fdb
> notifications is added, because it contained a workaround not to show
> entries with ifindex == 0.
> Command executed while monitoring: $ ip l add br0 type bridge
> Before (both ifindex and master == 0):
> $ bridge monitor fdb
> 36:7e:8a:b3:56:ba dev * vlan 1 master * permanent
>
> After (proper br0 ifindex):
> $ bridge monitor fdb
> e6:2a:ae:7a:b7:48 dev br0 vlan 1 master br0 permanent
>
> v4: move only the default pvid init/deinit to NETDEV_REGISTER/UNREGISTER
> v3: send the correct v2 patch with all changes (stub should return 0)
> v2: on error in br_vlan_init set br->vlgrp to NULL and return 0 in
>     the br_vlan_bridge_event stub when bridge vlans are disabled
>
> [0] https://bugzilla.kernel.org/show_bug.cgi?id=204389
>
> Reported-by: michael-dev <michael-dev@fami-braun.de>
> Fixes: 5be5a2df40f0 ("bridge: Add filtering support for default_pvid")
> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>

Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>

^ permalink raw reply

* Re: [Bridge] [net-next, rfc] net: bridge: mdb: Extend with multicast LLADDR
From: Allan W. Nielsen @ 2019-08-02 14:21 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: andrew, netdev, roopa, bridge, fw, linux-kernel, davem, idosch,
	tglx, Horatiu Vultur
In-Reply-To: <fcb0e526-778f-5451-9934-e6c2421e4eb3@cumulusnetworks.com>

The 08/02/2019 17:16, Nikolay Aleksandrov wrote:
> On 02/08/2019 17:07, Allan W. Nielsen wrote:
> > The 08/01/2019 17:07, Nikolay Aleksandrov wrote:
> >>> To create a group for two of the front ports the following entries can
> >>> be added:
> >>> bridge mdb add dev br0 port eth0 grp 01:00:00:00:00:04 permanent vid 1
> >>> bridge mdb add dev br0 port eth1 grp 01:00:00:00:00:04 permanent vid 1
> >>>
> >>> Now the entries will be display as following:
> >>> dev br0 port eth0 grp 01:00:00:00:00:04 permanent offload vid 1
> >>> dev br0 port eth1 grp 01:00:00:00:00:04 permanent offload vid 1
> >>>
> >>> This requires changes to iproute2 as well, see the follogin patch for that.
> >>>
> >>> Now if frame with dmac '01:00:00:00:00:04' will arrive at one of the front
> >>> ports. If we have HW offload support, then the frame will be forwarded by
> >>> the switch, and need not to go to the CPU. In a pure SW world, the frame is
> >>> forwarded by the SW bridge, which will flooded it only the ports which are
> >>> part of the group.
> >>>
> >>> So far so good. This is an important part of the problem we wanted to solve.
> >>>
> >>> But, there is one drawback of this approach. If you want to add two of the
> >>> front ports and br0 to receive the frame then I can't see a way of doing it
> >>> with the bridge mdb command. To do that it requireds many more changes to
> >>> the existing code.
> >>>
> >>> Example:
> >>> bridge mdb add dev br0 port eth0 grp 01:00:00:00:00:04 permanent vid 1
> >>> bridge mdb add dev br0 port eth1 grp 01:00:00:00:00:04 permanent vid 1
> >>> bridge mdb add dev br0 port br0 grp 01:00:00:00:00:04 permanent vid 1 // This looks wrong.
> >>>
> >>> We believe we come a long way by re-using the facilities in MDB (thanks for
> >>> convincing us in doing this), but we are still not completely happy with
> >>> the result.
> >> Just add self argument for the bridge mdb command, no need to specify it twice.
> > Like this:
> > bridge mdb add dev br0 port eth1 grp 01:00:00:00:00:04 permanent vid self
> 
> What ?! No, that is not what I meant.
> bridge mdb add dev br0 grp 01:00:00:00:00:04 permanent vid self
> bridge mdb del dev br0 grp 01:00:00:00:00:04 permanent vid self
> 
> Similar to fdb. You don't need no-self..
> I don't mind a different approach, this was just a suggestion. But please
> without "no-self" :)

Good, then we are in sync on that one :-D

> > 
> > Then if I want to remove br0 rom the group, should I then have a no-self, and
> > then it becomes even more strange what to write in the port.
> > 
> > bridge mdb add dev br0 port ?? grp 01:00:00:00:00:04 permanent vid no-self
> >                             ^^
> > And, what if it is a group with only br0 (the traffic should go to br0 and
> > not any of the slave interfaces)?
> > 
> > Also, the 'self' keyword has different meanings in the 'bridge vlan' and the
> > 'bridge fdb' commands where it refres to if the offload rule should be install
> > in HW - or only in the SW bridge.
> 
> No, it shouldn't. Self means act on the device, in this case act on the bridge,
> otherwise master is assumed.
> 
> > 
> > The proposed does not look pretty bad, but at least it will be possible to
> > configured the different scenarios:
> > 
> > bridge mdb add dev br0 port br0 grp 01:00:00:00:00:04 permanent vid 1
> > bridge mdb del dev br0 port br0 grp 01:00:00:00:00:04 permanent vid 1
> > 
> 
> That works too, but the "port" part is redundant.
> 
> > The more I look at the "bridge mdb { add | del } dev DEV port PORT" command, the
> > less I understand why do we have both 'dev' and 'port'? The implementation will
> > only allow this if 'port' has become enslaved to the switch represented by
> > 'dev'. Anyway, what is done is done, and we need to stay backwards compatible,
> > but we could make it optional, and then it looks a bit less strange to allow the
> > port to specify a br0.
> > 
> > Like this:
> > 
> > bridge mdb { add | del } [dev DEV] port PORT grp GROUP [ permanent | temp ] [ vid VID ]
> > 
> > bridge mdb add port eth0 grp 01:00:00:00:00:04 permanent vid 1
> > bridge mdb add port eth1 grp 01:00:00:00:00:04 permanent vid 1
> > bridge mdb add port br0  grp 01:00:00:00:00:04 permanent vid 1 // Add br0 to the gruop
> > bridge mdb del port br0  grp 01:00:00:00:00:04 permanent vid 1 // Delete it again
> > 
> 
> br0 is not a port, thus the "self" or just dev or whatever you choose..
Ahh, now I understand what you meant.

> > Alternative we could also make the port optional:
> > 
> > bridge mdb { add | del } dev DEV [port PORT] grp GROUP [ permanent | temp ] [ vid VID ]
> > 
> > bridge mdb add dev br0 port eth0 grp 01:00:00:00:00:04 permanent vid 1
> > bridge mdb add dev br0 port eth1 grp 01:00:00:00:00:04 permanent vid 1
> > bridge mdb add dev br0           grp 01:00:00:00:00:04 permanent vid 1 // Add br0 to the gruop
> > bridge mdb del dev br0           grp 01:00:00:00:00:04 permanent vid 1 // Delete it again
> > 
> 
> Right. I read this one later. :)
> 
> > Any preferences?
> Not really, up to you. Any of the above seem fine to me.
Perfect, I like this one the most:

bridge mdb { add | del } dev DEV [ port PORT ] grp GROUP [ permanent | temp ] [ vid VID ]

bridge mdb add dev br0 port eth0 grp 01:00:00:00:00:04 permanent vid 1
bridge mdb add dev br0 port eth1 grp 01:00:00:00:00:04 permanent vid 1
bridge mdb add dev br0           grp 01:00:00:00:00:04 permanent vid 1 // Add br0 to the gruop
bridge mdb del dev br0           grp 01:00:00:00:00:04 permanent vid 1 // Delete it again

/Allan

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox