netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] gre: transparent ethernet bridging
@ 2006-07-31 10:06 Philip Craig
  2006-07-31 16:14 ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-07-31 10:06 UTC (permalink / raw)
  To: netdev

This patch implements transparent ethernet bridging for gre tunnels.
There are a few outstanding issues.

There is no way for userspace to select the type of gre tunnel. The
#if 0 near the top of the patch forces all gre tunnels to be bridges.
The problem is that userspace uses an IPPROTO_ to select the type of
tunnel, but both types of gre tunnel are IPPROTO_GRE. I can't see
anything else in struct ip_tunnel_parm that could be used to select
this. One approach that I've seen mentioned in the archives is to add
a netlink interface to replace the tunnel ioctls.

Network loops are bad. See the comments at the top of ip_gre.c for
a description of how gre tunnels handle this normally. But for gre
bridges, we don't want to copy the ttl (it breaks routing protocols),
and we don't want to force DF (we want to bridge 1500 byte packets).
I couldn't think of any solution for this.

Some routers set LLC_SAP_BSPAN in the gre protocol field, and then
give the bpdu packet without any other ethernet/llc header. This patch
currently tries to fake the ethernet/llc header before passing the
packet up, but it is buggy (mac addresses are wrong at least). Maybe a
better approach is to call directly into the bridging code. I didn't try
that at first because it isn't modular, and may break other things that
want to see the packet.


--- linux-2.6.x/net/ipv4/ip_gre.c	18 Jun 2006 23:30:56 -0000	1.1.1.33
+++ linux-2.6.x/net/ipv4/ip_gre.c	31 Jul 2006 09:57:41 -0000
@@ -30,6 +30,8 @@
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/llc.h>

 #include <net/sock.h>
 #include <net/ip.h>
@@ -41,6 +43,8 @@
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>

 #ifdef CONFIG_IPV6
 #include <net/ipv6.h>
@@ -119,6 +123,7 @@

 static int ipgre_tunnel_init(struct net_device *dev);
 static void ipgre_tunnel_setup(struct net_device *dev);
+static void ipgre_ether_tunnel_setup(struct net_device *dev);

 /* Fallback tunnel: no source, no destination, no key, no options */

@@ -274,7 +279,11 @@ static struct ip_tunnel * ipgre_tunnel_l
 			goto failed;
 	}

+#if 0
 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
+#else
+	dev = alloc_netdev(sizeof(*t), name, ipgre_ether_tunnel_setup);
+#endif
 	if (!dev)
 	  return NULL;

@@ -550,6 +559,68 @@ ipgre_ecn_encapsulate(u8 tos, struct iph
 	return INET_ECN_encapsulate(tos, inner);
 }

+__be16 ipgre_type_trans(struct sk_buff *skb, int offset)
+{
+	u8     *h = skb->data;
+	__be16 flags = *(__be16*)h;
+	__be16 proto = *(__be16*)(h + 2);
+
+	/* WCCP version 1 and 2 protocol decoding.
+	 * - Change protocol to IP
+	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+	 */
+	if (flags == 0 &&
+	    proto == __constant_htons(ETH_P_WCCP)) {
+		proto = __constant_htons(ETH_P_IP);
+		if ((*(h + offset) & 0xF0) != 0x40)
+			offset += 4;
+	}
+
+	skb->mac.raw = skb->nh.raw;
+	skb->nh.raw = __pskb_pull(skb, offset);
+	skb_postpull_rcsum(skb, skb->h.raw, offset);
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+	if (MULTICAST(iph->daddr)) {
+		/* Looped back packet, drop it! */
+		if (((struct rtable*)skb->dst)->fl.iif == 0)
+			return 0;
+		/* tunnel->stat.multicast++; */
+		skb->pkt_type = PACKET_BROADCAST;
+	}
+#endif
+
+	return proto;
+}
+
+extern const u8 br_group_address[ETH_ALEN];
+
+__be16 ipgre_ether_type_trans(struct sk_buff *skb, struct net_device *dev,
+			      int offset)
+{
+	u8     *h = skb->data;
+	__be16 proto = *(__be16*)(h + 2);
+
+	if (proto == htons(ETH_P_BRIDGE)) {
+		if (!pskb_may_pull(skb, offset + ETH_HLEN))
+			return 0;
+		skb_pull_rcsum(skb, offset);
+		return eth_type_trans(skb, dev);
+	} else if (proto == htons(LLC_SAP_BSPAN)) {
+		skb_pull_rcsum(skb, offset);
+
+		llc_pdu_header_init(skb, LLC_PDU_TYPE_U, LLC_SAP_BSPAN,
+				    LLC_SAP_BSPAN, LLC_PDU_CMD);
+		llc_pdu_init_as_ui_cmd(skb);
+
+		llc_mac_hdr_init(skb, dev->dev_addr, dev->dev_addr);
+		skb_pull(skb, ETH_HLEN);
+
+		return htons(ETH_P_802_2);
+	}
+
+	return 0;
+}
+
 static int ipgre_rcv(struct sk_buff *skb)
 {
 	struct iphdr *iph;
@@ -603,32 +674,8 @@ static int ipgre_rcv(struct sk_buff *skb
 	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
 		secpath_reset(skb);

-		skb->protocol = *(u16*)(h + 2);
-		/* WCCP version 1 and 2 protocol decoding.
-		 * - Change protocol to IP
-		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
-		 */
-		if (flags == 0 &&
-		    skb->protocol == __constant_htons(ETH_P_WCCP)) {
-			skb->protocol = __constant_htons(ETH_P_IP);
-			if ((*(h + offset) & 0xF0) != 0x40)
-				offset += 4;
-		}
-
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = __pskb_pull(skb, offset);
-		skb_postpull_rcsum(skb, skb->h.raw, offset);
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		skb->pkt_type = PACKET_HOST;
-#ifdef CONFIG_NET_IPGRE_BROADCAST
-		if (MULTICAST(iph->daddr)) {
-			/* Looped back packet, drop it! */
-			if (((struct rtable*)skb->dst)->fl.iif == 0)
-				goto drop;
-			tunnel->stat.multicast++;
-			skb->pkt_type = PACKET_BROADCAST;
-		}
-#endif

 		if (((flags&GRE_CSUM) && csum) ||
 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
@@ -645,6 +692,15 @@ static int ipgre_rcv(struct sk_buff *skb
 			}
 			tunnel->i_seqno = seqno + 1;
 		}
+		if (tunnel->dev->type == ARPHRD_ETHER)
+			skb->protocol = ipgre_ether_type_trans(skb, tunnel->dev,
+							       offset);
+		else
+			skb->protocol = ipgre_type_trans(skb, offset);
+		if (!skb->protocol) {
+			tunnel->stat.rx_errors++;
+			goto drop;
+		}
 		tunnel->stat.rx_packets++;
 		tunnel->stat.rx_bytes += skb->len;
 		skb->dev = tunnel->dev;
@@ -686,7 +742,10 @@ static int ipgre_tunnel_xmit(struct sk_b
 		goto tx_error;
 	}

-	if (dev->hard_header) {
+	if (dev->type == ARPHRD_ETHER) {
+		gre_hlen = tunnel->hlen - ETH_HLEN;
+		tiph = &tunnel->parms.iph;
+	} else if (dev->hard_header) {
 		gre_hlen = 0;
 		tiph = (struct iphdr*)skb->data;
 	} else {
@@ -767,7 +826,7 @@ static int ipgre_tunnel_xmit(struct sk_b
 	else
 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;

-	if (skb->dst)
+	if (skb->dst && skb->dst->ops)
 		skb->dst->ops->update_pmtu(skb->dst, mtu);

 	if (skb->protocol == htons(ETH_P_IP)) {
@@ -849,7 +908,9 @@ static int ipgre_tunnel_xmit(struct sk_b
 	iph->saddr		=	rt->rt_src;

 	if ((iph->ttl = tiph->ttl) == 0) {
-		if (skb->protocol == htons(ETH_P_IP))
+		if (dev->type == ARPHRD_ETHER)
+			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+		else if (skb->protocol == htons(ETH_P_IP))
 			iph->ttl = old_iph->ttl;
 #ifdef CONFIG_IPV6
 		else if (skb->protocol == htons(ETH_P_IPV6))
@@ -860,7 +921,10 @@ static int ipgre_tunnel_xmit(struct sk_b
 	}

 	((u16*)(iph+1))[0] = tunnel->parms.o_flags;
-	((u16*)(iph+1))[1] = skb->protocol;
+	if (dev->type == ARPHRD_ETHER)
+		((__be16*)(iph+1))[1] = htons(ETH_P_BRIDGE);
+	else
+		((__be16*)(iph+1))[1] = skb->protocol;

 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
 		u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
@@ -956,7 +1020,9 @@ ipgre_tunnel_ioctl (struct net_device *d

 				t = netdev_priv(dev);

-				if (MULTICAST(p.iph.daddr))
+				if (t->dev->type == ARPHRD_ETHER)
+					nflags = IFF_BROADCAST;
+				else if (MULTICAST(p.iph.daddr))
 					nflags = IFF_BROADCAST;
 				else if (p.iph.daddr)
 					nflags = IFF_POINTOPOINT;
@@ -1147,6 +1213,18 @@ static void ipgre_tunnel_setup(struct ne
 	dev->addr_len		= 4;
 }

+static void ipgre_ether_tunnel_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	SET_MODULE_OWNER(dev);
+	dev->uninit		= ipgre_tunnel_uninit;
+	dev->destructor 	= free_netdev;
+	dev->hard_start_xmit	= ipgre_tunnel_xmit;
+	dev->get_stats		= ipgre_tunnel_get_stats;
+	dev->do_ioctl		= ipgre_tunnel_ioctl;
+}
+
 static int ipgre_tunnel_init(struct net_device *dev)
 {
 	struct net_device *tdev = NULL;
@@ -1162,8 +1240,27 @@ static int ipgre_tunnel_init(struct net_
 	tunnel->dev = dev;
 	strcpy(tunnel->parms.name, dev->name);

-	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
-	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+	if (dev->type == ARPHRD_ETHER)
+		random_ether_addr(dev->dev_addr);
+	else {
+		memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+		memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+	}
+
+	if (dev->type == ARPHRD_ETHER)
+		dev->flags |= IFF_BROADCAST;
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+	else if (MULTICAST(iph->daddr)) {
+		if (!iph->saddr)
+			return -EINVAL;
+		dev->flags = IFF_BROADCAST;
+		dev->hard_header = ipgre_header;
+		dev->open = ipgre_open;
+		dev->stop = ipgre_close;
+	}
+#endif
+	else if (iph->daddr)
+		dev->flags |= IFF_POINTOPOINT;

 	/* Guess output device to choose reasonable mtu and hard_header_len */

@@ -1179,19 +1276,6 @@ static int ipgre_tunnel_init(struct net_
 			tdev = rt->u.dst.dev;
 			ip_rt_put(rt);
 		}
-
-		dev->flags |= IFF_POINTOPOINT;
-
-#ifdef CONFIG_NET_IPGRE_BROADCAST
-		if (MULTICAST(iph->daddr)) {
-			if (!iph->saddr)
-				return -EINVAL;
-			dev->flags = IFF_BROADCAST;
-			dev->hard_header = ipgre_header;
-			dev->open = ipgre_open;
-			dev->stop = ipgre_close;
-		}
-#endif
 	}

 	if (!tdev && tunnel->parms.link)
@@ -1212,6 +1296,8 @@ static int ipgre_tunnel_init(struct net_
 		if (tunnel->parms.o_flags&GRE_SEQ)
 			addend += 4;
 	}
+	if (dev->type == ARPHRD_ETHER)
+		addend += ETH_HLEN;
 	dev->hard_header_len = hlen + addend;
 	dev->mtu = mtu - addend;
 	tunnel->hlen = addend;
--- linux-2.6.x/include/linux/if_ether.h	18 Jun 2006 23:30:44 -0000	1.1.1.11
+++ linux-2.6.x/include/linux/if_ether.h	31 Jul 2006 09:57:41 -0000
@@ -55,6 +55,7 @@
 #define ETH_P_DIAG      0x6005          /* DEC Diagnostics              */
 #define ETH_P_CUST      0x6006          /* DEC Customer use             */
 #define ETH_P_SCA       0x6007          /* DEC Systems Comms Arch       */
+#define ETH_P_BRIDGE    0x6558          /* Transparent Ethernet Bridging */
 #define ETH_P_RARP      0x8035		/* Reverse Addr Res packet	*/
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-07-31 10:06 [RFC] gre: transparent ethernet bridging Philip Craig
@ 2006-07-31 16:14 ` Stephen Hemminger
  2006-08-01  1:15   ` Philip Craig
  0 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2006-07-31 16:14 UTC (permalink / raw)
  To: Philip Craig; +Cc: netdev

On Mon, 31 Jul 2006 20:06:41 +1000
Philip Craig <philipc@snapgear.com> wrote:

> This patch implements transparent ethernet bridging for gre tunnels.
> There are a few outstanding issues.

Why not use existing bridge code?

> There is no way for userspace to select the type of gre tunnel. The
> #if 0 near the top of the patch forces all gre tunnels to be bridges.
> The problem is that userspace uses an IPPROTO_ to select the type of
> tunnel, but both types of gre tunnel are IPPROTO_GRE. I can't see
> anything else in struct ip_tunnel_parm that could be used to select
> this. One approach that I've seen mentioned in the archives is to add
> a netlink interface to replace the tunnel ioctls.
> 
> Network loops are bad. See the comments at the top of ip_gre.c for
> a description of how gre tunnels handle this normally. But for gre
> bridges, we don't want to copy the ttl (it breaks routing protocols),
> and we don't want to force DF (we want to bridge 1500 byte packets).
> I couldn't think of any solution for this.
> 
> Some routers set LLC_SAP_BSPAN in the gre protocol field, and then
> give the bpdu packet without any other ethernet/llc header. This patch
> currently tries to fake the ethernet/llc header before passing the
> packet up, but it is buggy (mac addresses are wrong at least). Maybe a
> better approach is to call directly into the bridging code. I didn't try
> that at first because it isn't modular, and may break other things that
> want to see the packet.

Existing bridge code already has spanning tree.

> --- linux-2.6.x/net/ipv4/ip_gre.c	18 Jun 2006 23:30:56 -0000	1.1.1.33
> +++ linux-2.6.x/net/ipv4/ip_gre.c	31 Jul 2006 09:57:41 -0000
> @@ -30,6 +30,8 @@
>  #include <linux/igmp.h>
>  #include <linux/netfilter_ipv4.h>
>  #include <linux/if_ether.h>
> +#include <linux/etherdevice.h>
> +#include <linux/llc.h>
> 
>  #include <net/sock.h>
>  #include <net/ip.h>
> @@ -41,6 +43,8 @@
>  #include <net/dsfield.h>
>  #include <net/inet_ecn.h>
>  #include <net/xfrm.h>
> +#include <net/llc.h>
> +#include <net/llc_pdu.h>
> 
>  #ifdef CONFIG_IPV6
>  #include <net/ipv6.h>
> @@ -119,6 +123,7 @@
> 
>  static int ipgre_tunnel_init(struct net_device *dev);
>  static void ipgre_tunnel_setup(struct net_device *dev);
> +static void ipgre_ether_tunnel_setup(struct net_device *dev);
> 
>  /* Fallback tunnel: no source, no destination, no key, no options */
> 
> @@ -274,7 +279,11 @@ static struct ip_tunnel * ipgre_tunnel_l
>  			goto failed;
>  	}
> 
> +#if 0
>  	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
> +#else
> +	dev = alloc_netdev(sizeof(*t), name, ipgre_ether_tunnel_setup);
> +#endif

"Do, or do not there is no try"


> +__be16 ipgre_type_trans(struct sk_buff *skb, int offset)
> +{
> +	u8     *h = skb->data;
> +	__be16 flags = *(__be16*)h;
> +	__be16 proto = *(__be16*)(h + 2);
> +
> +	/* WCCP version 1 and 2 protocol decoding.
> +	 * - Change protocol to IP
> +	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
> +	 */
> +	if (flags == 0 &&
> +	    proto == __constant_htons(ETH_P_WCCP)) {
> +		proto = __constant_htons(ETH_P_IP);
> +		if ((*(h + offset) & 0xF0) != 0x40)
> +			offset += 4;
> +	}

Don't use __constant_htons() except in initializers and switch cases
(where gcc is too stupid to optimize the macro).

-- 
Stephen Hemminger <shemminger@osdl.org>
"And in the Packet there writ down that doome"

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-07-31 16:14 ` Stephen Hemminger
@ 2006-08-01  1:15   ` Philip Craig
  2006-08-01  5:08     ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-01  1:15 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

Stephen Hemminger wrote:
> On Mon, 31 Jul 2006 20:06:41 +1000
> Philip Craig <philipc@snapgear.com> wrote:
> 
>> This patch implements transparent ethernet bridging for gre tunnels.
>> There are a few outstanding issues.
> 
> Why not use existing bridge code?

It does use the existing bridge code.  Perhaps the name is misleading.
All it does is encapsulate the full ethernet header in a gre packet,
rather than only layer 3.  That is, currently gre uses ARPHRD_IPGRE,
but bridging requires ARPHRD_ETHER.

>> Some routers set LLC_SAP_BSPAN in the gre protocol field, and then
>> give the bpdu packet without any other ethernet/llc header. This patch
>> currently tries to fake the ethernet/llc header before passing the
>> packet up, but it is buggy (mac addresses are wrong at least). Maybe a
>> better approach is to call directly into the bridging code. I didn't try
>> that at first because it isn't modular, and may break other things that
>> want to see the packet.
> 
> Existing bridge code already has spanning tree.

Yes, and I want to use that.  But this packet is a bit strange in
that it does not have the ethernet header on it.   So what is the
best way to pass it to existing code?  Either fake the ethernet
header, or pass it directly?

>> +#if 0
>>  	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
>> +#else
>> +	dev = alloc_netdev(sizeof(*t), name, ipgre_ether_tunnel_setup);
>> +#endif
> 
> "Do, or do not there is no try"

I am looking for comments as to whether adding a netlink interface
to control this is appropriate.

>> +__be16 ipgre_type_trans(struct sk_buff *skb, int offset)
>> +{
>> +	u8     *h = skb->data;
>> +	__be16 flags = *(__be16*)h;
>> +	__be16 proto = *(__be16*)(h + 2);
>> +
>> +	/* WCCP version 1 and 2 protocol decoding.
>> +	 * - Change protocol to IP
>> +	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
>> +	 */
>> +	if (flags == 0 &&
>> +	    proto == __constant_htons(ETH_P_WCCP)) {
>> +		proto = __constant_htons(ETH_P_IP);
>> +		if ((*(h + offset) & 0xF0) != 0x40)
>> +			offset += 4;
>> +	}
> 
> Don't use __constant_htons() except in initializers and switch cases
> (where gcc is too stupid to optimize the macro).
> 

This is a problem in the existing code, which I am simply moving
around.  Should I fix it at the same time?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-01  1:15   ` Philip Craig
@ 2006-08-01  5:08     ` Stephen Hemminger
  2006-08-01  9:29       ` Philip Craig
  2006-08-02  7:42       ` Lennert Buytenhek
  0 siblings, 2 replies; 17+ messages in thread
From: Stephen Hemminger @ 2006-08-01  5:08 UTC (permalink / raw)
  To: Philip Craig; +Cc: netdev

On Tue, 01 Aug 2006 11:15:29 +1000
Philip Craig <philipc@snapgear.com> wrote:

> Stephen Hemminger wrote:
> > On Mon, 31 Jul 2006 20:06:41 +1000
> > Philip Craig <philipc@snapgear.com> wrote:
> > 
> >> This patch implements transparent ethernet bridging for gre tunnels.
> >> There are a few outstanding issues.
> > 
> > Why not use existing bridge code?
> 
> It does use the existing bridge code.  Perhaps the name is misleading.
> All it does is encapsulate the full ethernet header in a gre packet,
> rather than only layer 3.  That is, currently gre uses ARPHRD_IPGRE,
> but bridging requires ARPHRD_ETHER.
> 

I am not against making the bridge code smarter to handle other
encapsulation.

> >> Some routers set LLC_SAP_BSPAN in the gre protocol field, and then
> >> give the bpdu packet without any other ethernet/llc header. This patch
> >> currently tries to fake the ethernet/llc header before passing the
> >> packet up, but it is buggy (mac addresses are wrong at least). Maybe a
> >> better approach is to call directly into the bridging code. I didn't try
> >> that at first because it isn't modular, and may break other things that
> >> want to see the packet.
> > 
> > Existing bridge code already has spanning tree.
> 
> Yes, and I want to use that.  But this packet is a bit strange in
> that it does not have the ethernet header on it.   So what is the
> best way to pass it to existing code?  Either fake the ethernet
> header, or pass it directly?

Likewise if the bridge STP bpdu input code was smarter, it could
deal with it maybe?

> 
> >> +#if 0
> >>  	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
> >> +#else
> >> +	dev = alloc_netdev(sizeof(*t), name, ipgre_ether_tunnel_setup);
> >> +#endif
> > 
> > "Do, or do not there is no try"
> 
> I am looking for comments as to whether adding a netlink interface
> to control this is appropriate.

If we make bridge code type aware, then the ipgre tunnel wouldn't have to change.

> >> +__be16 ipgre_type_trans(struct sk_buff *skb, int offset)
> >> +{
> >> +	u8     *h = skb->data;
> >> +	__be16 flags = *(__be16*)h;
> >> +	__be16 proto = *(__be16*)(h + 2);
> >> +
> >> +	/* WCCP version 1 and 2 protocol decoding.
> >> +	 * - Change protocol to IP
> >> +	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
> >> +	 */
> >> +	if (flags == 0 &&
> >> +	    proto == __constant_htons(ETH_P_WCCP)) {
> >> +		proto = __constant_htons(ETH_P_IP);
> >> +		if ((*(h + offset) & 0xF0) != 0x40)
> >> +			offset += 4;
> >> +	}
> > 
> > Don't use __constant_htons() except in initializers and switch cases
> > (where gcc is too stupid to optimize the macro).
> > 
> 
> This is a problem in the existing code, which I am simply moving
> around.  Should I fix it at the same time?

Usually if a diff touches some code, I try to make it use current practice.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-01  5:08     ` Stephen Hemminger
@ 2006-08-01  9:29       ` Philip Craig
  2006-08-02  6:17         ` Philip Craig
  2006-08-02  7:42       ` Lennert Buytenhek
  1 sibling, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-01  9:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

Stephen Hemminger wrote:
> I am not against making the bridge code smarter to handle other
> encapsulation.

Do you mean something like this patch?

The only drawback I see for this approach is that it means you
can only encapsulate the ethernet header if the gre interface is
bridged.  That's not too bad a restriction though.

This patch only works for local packets so far, and doesn't
handle the LLC_SAP_BSPAN packets.

Also, if the gre interface is the only port on the bridge, then
we have no mac address.


--- linux-2.6.x/net/bridge/br_device.c	18 Jun 2006 23:30:55 -0000	1.1.1.14
+++ linux-2.6.x/net/bridge/br_device.c	1 Aug 2006 09:12:42 -0000
@@ -17,6 +17,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/if_arp.h>

 #include <asm/uaccess.h>
 #include "br_private.h"
@@ -95,7 +96,9 @@ static int br_set_mac_address(struct net

 	spin_lock_bh(&br->lock);
 	list_for_each_entry(port, &br->port_list, list) {
-		if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
+		if (port->dev->type == ARPHRD_ETHER &&
+				!compare_ether_addr(port->dev->dev_addr,
+						    addr->sa_data)) {
 			br_stp_change_bridge_id(br, addr->sa_data);
 			err = 0;
 			break;
--- linux-2.6.x/net/bridge/br_fdb.c	18 Jun 2006 23:30:55 -0000	1.1.1.13
+++ linux-2.6.x/net/bridge/br_fdb.c	1 Aug 2006 09:12:42 -0000
@@ -20,6 +20,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/jhash.h>
+#include <linux/if_arp.h>
 #include <asm/atomic.h>
 #include "br_private.h"

@@ -86,6 +87,7 @@ void br_fdb_changeaddr(struct net_bridge
 				struct net_bridge_port *op;
 				list_for_each_entry(op, &br->port_list, list) {
 					if (op != p &&
+					    op->dev->type == ARPHRD_ETHER &&
 					    !compare_ether_addr(op->dev->dev_addr,
 								f->addr.addr)) {
 						f->dst = op;
@@ -151,6 +153,7 @@ void br_fdb_delete_by_port(struct net_br
 				struct net_bridge_port *op;
 				list_for_each_entry(op, &br->port_list, list) {
 					if (op != p &&
+					    op->dev->type == ARPHRD_ETHER &&
 					    !compare_ether_addr(op->dev->dev_addr,
 								f->addr.addr)) {
 						f->dst = op;
--- linux-2.6.x/net/bridge/br_forward.c	18 Jun 2006 23:30:55 -0000	1.1.1.15
+++ linux-2.6.x/net/bridge/br_forward.c	1 Aug 2006 09:12:42 -0000
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
+#include <linux/if_arp.h>
 #include "br_private.h"

 static inline int should_deliver(const struct net_bridge_port *p,
@@ -46,6 +47,8 @@ int br_dev_queue_push_xmit(struct sk_buf
 		nf_bridge_maybe_copy_header(skb);
 #endif
 		skb_push(skb, ETH_HLEN);
+		if (skb->dev->type == ARPHRD_IPGRE)
+			skb->protocol = htons(ETH_P_BRIDGE);

 		dev_queue_xmit(skb);
 	}
--- linux-2.6.x/net/bridge/br_if.c	18 Jun 2006 23:30:55 -0000	1.1.1.23
+++ linux-2.6.x/net/bridge/br_if.c	1 Aug 2006 09:12:42 -0000
@@ -391,7 +391,10 @@ int br_add_if(struct net_bridge *br, str
 	struct net_bridge_port *p;
 	int err = 0;

-	if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
+	if (dev->flags & IFF_LOOPBACK)
+		return -EINVAL;
+
+	if (dev->type != ARPHRD_ETHER && dev->type != ARPHRD_IPGRE)
 		return -EINVAL;

 	if (dev->hard_start_xmit == br_dev_xmit)
@@ -408,9 +411,11 @@ int br_add_if(struct net_bridge *br, str
 	if (err)
 		goto err0;

- 	err = br_fdb_insert(br, p, dev->dev_addr);
-	if (err)
-		goto err1;
+	if (dev->type == ARPHRD_ETHER) {
+ 		err = br_fdb_insert(br, p, dev->dev_addr);
+		if (err)
+			goto err1;
+	}

 	err = br_sysfs_addif(p);
 	if (err)
--- linux-2.6.x/net/bridge/br_input.c	18 Jun 2006 23:30:55 -0000	1.1.1.18
+++ linux-2.6.x/net/bridge/br_input.c	1 Aug 2006 09:12:42 -0000
@@ -17,6 +17,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/netfilter_bridge.h>
+#include <linux/if_arp.h>
 #include "br_private.h"

 /* Bridge group multicast address 802.1d (pg 51). */
@@ -124,11 +125,22 @@ static inline int is_link_local(const un
 int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
-	const unsigned char *dest = eth_hdr(skb)->h_dest;
+	const unsigned char *dest;
+
+	if (skb->dev->type == ARPHRD_IPGRE) {
+		if (skb->protocol != htons(ETH_P_BRIDGE))
+			return 0;
+		if (!pskb_may_pull(skb, ETH_HLEN))
+			goto err;
+		skb->protocol = eth_type_trans(skb, p->br->dev);
+		skb_postpull_rcsum(skb, skb->mac.raw, ETH_HLEN);
+		skb->nh.raw += ETH_HLEN;
+	}

 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto err;

+	dest = eth_hdr(skb)->h_dest;
 	if (unlikely(is_link_local(dest))) {
 		skb->pkt_type = PACKET_HOST;
 		return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
--- linux-2.6.x/net/bridge/br_notify.c	21 Mar 2006 01:35:39 -0000	1.1.1.12
+++ linux-2.6.x/net/bridge/br_notify.c	1 Aug 2006 09:12:42 -0000
@@ -14,6 +14,7 @@
  */

 #include <linux/kernel.h>
+#include <linux/if_arp.h>

 #include "br_private.h"

@@ -48,8 +49,10 @@ static int br_device_event(struct notifi
 		break;

 	case NETDEV_CHANGEADDR:
-		br_fdb_changeaddr(p, dev->dev_addr);
-		br_stp_recalculate_bridge_id(br);
+		if (dev->type == ARPHRD_ETHER) {
+			br_fdb_changeaddr(p, dev->dev_addr);
+			br_stp_recalculate_bridge_id(br);
+		}
 		break;

 	case NETDEV_CHANGE:
--- linux-2.6.x/include/linux/if_ether.h	18 Jun 2006 23:30:44 -0000	1.1.1.11
+++ linux-2.6.x/include/linux/if_ether.h	1 Aug 2006 09:12:42 -0000
@@ -55,6 +55,7 @@
 #define ETH_P_DIAG      0x6005          /* DEC Diagnostics              */
 #define ETH_P_CUST      0x6006          /* DEC Customer use             */
 #define ETH_P_SCA       0x6007          /* DEC Systems Comms Arch       */
+#define ETH_P_BRIDGE    0x6558          /* Transparent Ethernet Bridging */
 #define ETH_P_RARP      0x8035		/* Reverse Addr Res packet	*/
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-01  9:29       ` Philip Craig
@ 2006-08-02  6:17         ` Philip Craig
  2006-08-02 17:23           ` Stephen Hemminger
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-02  6:17 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

> Stephen Hemminger wrote:
>> I am not against making the bridge code smarter to handle other
>> encapsulation.

Here's an updated patch that fixes all issues I am aware of.

It generates a random mac address for gre ports, and also stores
a copy of the mac address for ethernet ports, rather than checking
dev->type everywhere.

The LLC_SAP_BSPAN packets are handled by simply registering that
protocol with dev_add_pack().  This would have worked for my original
patch too.

I had to release __fake_rtable as part of br_nf_dev_queue_xmit(),
otherwise ip_gre.c paniced trying to call skb->dst->ops->update_ptmu.


--- linux-2.6.x/net/bridge/br.c	18 Jun 2006 23:30:55 -0000	1.1.1.17
+++ linux-2.6.x/net/bridge/br.c	2 Aug 2006 06:05:10 -0000
@@ -26,6 +26,11 @@

 int (*br_should_route_hook) (struct sk_buff **pskb) = NULL;

+static struct packet_type br_stp_packet_type = {
+	.type = __constant_htons(LLC_SAP_BSPAN),
+	.func = br_stp_packet_rcv,
+};
+
 static struct llc_sap *br_stp_sap;

 static int __init br_init(void)
@@ -36,6 +41,8 @@ static int __init br_init(void)
 		return -EBUSY;
 	}

+	dev_add_pack(&br_stp_packet_type);
+	
 	br_fdb_init();

 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -56,6 +63,7 @@ static int __init br_init(void)
 static void __exit br_deinit(void)
 {
 	rcu_assign_pointer(br_stp_sap->rcv_func, NULL);
+	dev_remove_pack(&br_stp_packet_type);

 #ifdef CONFIG_BRIDGE_NETFILTER
 	br_netfilter_fini();
--- linux-2.6.x/net/bridge/br_device.c	18 Jun 2006 23:30:55 -0000	1.1.1.14
+++ linux-2.6.x/net/bridge/br_device.c	2 Aug 2006 06:05:10 -0000
@@ -95,7 +95,7 @@ static int br_set_mac_address(struct net

 	spin_lock_bh(&br->lock);
 	list_for_each_entry(port, &br->port_list, list) {
-		if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
+		if (!compare_ether_addr(port->addr.addr, addr->sa_data)) {
 			br_stp_change_bridge_id(br, addr->sa_data);
 			err = 0;
 			break;
--- linux-2.6.x/net/bridge/br_fdb.c	18 Jun 2006 23:30:55 -0000	1.1.1.13
+++ linux-2.6.x/net/bridge/br_fdb.c	2 Aug 2006 06:05:10 -0000
@@ -24,8 +24,7 @@
 #include "br_private.h"

 static kmem_cache_t *br_fdb_cache __read_mostly;
-static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		      const unsigned char *addr);
+static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source);

 void __init br_fdb_init(void)
 {
@@ -67,7 +66,7 @@ static __inline__ void fdb_delete(struct
 	br_fdb_put(f);
 }

-void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
+void br_fdb_changeaddr(struct net_bridge_port *p)
 {
 	struct net_bridge *br = p->br;
 	int i;
@@ -86,7 +85,7 @@ void br_fdb_changeaddr(struct net_bridge
 				struct net_bridge_port *op;
 				list_for_each_entry(op, &br->port_list, list) {
 					if (op != p &&
-					    !compare_ether_addr(op->dev->dev_addr,
+					    !compare_ether_addr(op->addr.addr,
 								f->addr.addr)) {
 						f->dst = op;
 						goto insert;
@@ -101,7 +100,7 @@ void br_fdb_changeaddr(struct net_bridge
 	}
  insert:
 	/* insert new address,  may fail if invalid address or dup. */
-	fdb_insert(br, p, newaddr);
+	fdb_insert(br, p);

 	spin_unlock_bh(&br->hash_lock);
 }
@@ -151,7 +150,7 @@ void br_fdb_delete_by_port(struct net_br
 				struct net_bridge_port *op;
 				list_for_each_entry(op, &br->port_list, list) {
 					if (op != p &&
-					    !compare_ether_addr(op->dev->dev_addr,
+					    !compare_ether_addr(op->addr.addr,
 								f->addr.addr)) {
 						f->dst = op;
 						goto skip_delete;
@@ -291,9 +290,9 @@ static struct net_bridge_fdb_entry *fdb_
 	return fdb;
 }

-static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		  const unsigned char *addr)
+static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source)
 {
+	const unsigned char *addr = source->addr.addr;
 	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
 	struct net_bridge_fdb_entry *fdb;

@@ -320,13 +319,12 @@ static int fdb_insert(struct net_bridge
 	return 0;
 }

-int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
-		  const unsigned char *addr)
+int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source)
 {
 	int ret;

 	spin_lock_bh(&br->hash_lock);
-	ret = fdb_insert(br, source, addr);
+	ret = fdb_insert(br, source);
 	spin_unlock_bh(&br->hash_lock);
 	return ret;
 }
--- linux-2.6.x/net/bridge/br_forward.c	18 Jun 2006 23:30:55 -0000	1.1.1.15
+++ linux-2.6.x/net/bridge/br_forward.c	2 Aug 2006 06:05:10 -0000
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
+#include <linux/if_arp.h>
 #include "br_private.h"

 static inline int should_deliver(const struct net_bridge_port *p,
@@ -46,6 +47,8 @@ int br_dev_queue_push_xmit(struct sk_buf
 		nf_bridge_maybe_copy_header(skb);
 #endif
 		skb_push(skb, ETH_HLEN);
+		if (skb->dev->type == ARPHRD_IPGRE)
+			skb->protocol = htons(ETH_P_BRIDGE);

 		dev_queue_xmit(skb);
 	}
--- linux-2.6.x/net/bridge/br_if.c	18 Jun 2006 23:30:55 -0000	1.1.1.23
+++ linux-2.6.x/net/bridge/br_if.c	2 Aug 2006 06:05:10 -0000
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
 #include <net/sock.h>

 #include "br_private.h"
@@ -391,7 +392,10 @@ int br_add_if(struct net_bridge *br, str
 	struct net_bridge_port *p;
 	int err = 0;

-	if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
+	if (dev->flags & IFF_LOOPBACK)
+		return -EINVAL;
+
+	if (dev->type != ARPHRD_ETHER && dev->type != ARPHRD_IPGRE)
 		return -EINVAL;

 	if (dev->hard_start_xmit == br_dev_xmit)
@@ -408,7 +412,12 @@ int br_add_if(struct net_bridge *br, str
 	if (err)
 		goto err0;

- 	err = br_fdb_insert(br, p, dev->dev_addr);
+	if (dev->type == ARPHRD_ETHER)
+		memcpy(p->addr.addr, dev->dev_addr, ETH_ALEN);
+	else
+		random_ether_addr(p->addr.addr);
+
+	err = br_fdb_insert(br, p);
 	if (err)
 		goto err1;

--- linux-2.6.x/net/bridge/br_input.c	18 Jun 2006 23:30:55 -0000	1.1.1.18
+++ linux-2.6.x/net/bridge/br_input.c	2 Aug 2006 06:05:10 -0000
@@ -17,6 +17,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/netfilter_bridge.h>
+#include <linux/if_arp.h>
 #include "br_private.h"

 /* Bridge group multicast address 802.1d (pg 51). */
@@ -124,11 +125,22 @@ static inline int is_link_local(const un
 int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 {
 	struct sk_buff *skb = *pskb;
-	const unsigned char *dest = eth_hdr(skb)->h_dest;
+	const unsigned char *dest;
+
+	if (skb->dev->type == ARPHRD_IPGRE) {
+		if (skb->protocol != htons(ETH_P_BRIDGE))
+			return 0;
+		if (!pskb_may_pull(skb, ETH_HLEN))
+			goto err;
+		skb->protocol = eth_type_trans(skb, p->br->dev);
+		skb_postpull_rcsum(skb, skb->mac.raw, ETH_HLEN);
+		skb->nh.raw += ETH_HLEN;
+	}

 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto err;

+	dest = eth_hdr(skb)->h_dest;
 	if (unlikely(is_link_local(dest))) {
 		skb->pkt_type = PACKET_HOST;
 		return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
--- linux-2.6.x/net/bridge/br_netfilter.c	18 Jun 2006 23:30:55 -0000	1.1.1.25
+++ linux-2.6.x/net/bridge/br_netfilter.c	2 Aug 2006 06:05:10 -0000
@@ -765,14 +765,24 @@ out:
 	return NF_STOLEN;
 }

+static int __br_nf_dev_queue_xmit(struct sk_buff *skb)
+{
+	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+		dst_release(skb->dst);
+		skb->dst = NULL;
+	}
+
+	return br_dev_queue_push_xmit(skb);
+}
+
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->protocol == htons(ETH_P_IP) &&
 	    skb->len > skb->dev->mtu &&
 	    !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
-		return ip_fragment(skb, br_dev_queue_push_xmit);
+		return ip_fragment(skb, __br_nf_dev_queue_xmit);
 	else
-		return br_dev_queue_push_xmit(skb);
+		return __br_nf_dev_queue_xmit(skb);
 }

 /* PF_BRIDGE/POST_ROUTING ********************************************/
--- linux-2.6.x/net/bridge/br_notify.c	21 Mar 2006 01:35:39 -0000	1.1.1.12
+++ linux-2.6.x/net/bridge/br_notify.c	2 Aug 2006 06:05:10 -0000
@@ -14,6 +14,7 @@
  */

 #include <linux/kernel.h>
+#include <linux/if_arp.h>

 #include "br_private.h"

@@ -48,8 +49,11 @@ static int br_device_event(struct notifi
 		break;

 	case NETDEV_CHANGEADDR:
-		br_fdb_changeaddr(p, dev->dev_addr);
-		br_stp_recalculate_bridge_id(br);
+		if (dev->type == ARPHRD_ETHER) {
+			memcpy(p->addr.addr, dev->dev_addr, ETH_ALEN);
+			br_fdb_changeaddr(p);
+			br_stp_recalculate_bridge_id(br);
+		}
 		break;

 	case NETDEV_CHANGE:
--- linux-2.6.x/net/bridge/br_private.h	18 Jun 2006 23:30:55 -0000	1.1.1.16
+++ linux-2.6.x/net/bridge/br_private.h	2 Aug 2006 06:05:10 -0000
@@ -77,6 +77,7 @@ struct net_bridge_port
 	bridge_id			designated_bridge;
 	u32				path_cost;
 	u32				designated_cost;
+	mac_addr			addr;

 	struct timer_list		forward_delay_timer;
 	struct timer_list		hold_timer;
@@ -139,8 +140,7 @@ extern int br_dev_xmit(struct sk_buff *s
 /* br_fdb.c */
 extern void br_fdb_init(void);
 extern void br_fdb_fini(void);
-extern void br_fdb_changeaddr(struct net_bridge_port *p,
-			      const unsigned char *newaddr);
+extern void br_fdb_changeaddr(struct net_bridge_port *p);
 extern void br_fdb_cleanup(unsigned long arg);
 extern void br_fdb_delete_by_port(struct net_bridge *br,
 			   struct net_bridge_port *p);
@@ -152,8 +152,7 @@ extern void br_fdb_put(struct net_bridge
 extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 			  unsigned long count, unsigned long off);
 extern int br_fdb_insert(struct net_bridge *br,
-			 struct net_bridge_port *source,
-			 const unsigned char *addr);
+			 struct net_bridge_port *source);
 extern void br_fdb_update(struct net_bridge *br,
 			  struct net_bridge_port *source,
 			  const unsigned char *addr);
@@ -220,6 +219,9 @@ extern ssize_t br_show_bridge_id(char *b
 /* br_stp_bpdu.c */
 extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
 		      struct packet_type *pt, struct net_device *orig_dev);
+extern int br_stp_packet_rcv(struct sk_buff *skb, struct net_device *dev,
+			     struct packet_type *pt,
+			     struct net_device *orig_dev);

 /* br_stp_timer.c */
 extern void br_stp_timer_init(struct net_bridge *br);
--- linux-2.6.x/net/bridge/br_stp_bpdu.c	18 Jun 2006 23:30:55 -0000	1.1.1.9
+++ linux-2.6.x/net/bridge/br_stp_bpdu.c	2 Aug 2006 06:05:10 -0000
@@ -50,7 +50,7 @@ static void br_send_bpdu(struct net_brid
 			    LLC_SAP_BSPAN, LLC_PDU_CMD);
 	llc_pdu_init_as_ui_cmd(skb);

-	llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
+	llc_mac_hdr_init(skb, p->addr.addr, p->br->group_addr);

 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		dev_queue_xmit);
@@ -124,35 +124,23 @@ void br_send_tcn_bpdu(struct net_bridge_
 	br_send_bpdu(p, buf, 7);
 }

-/*
- * Called from llc.
- *
- * NO locks, but rcu_read_lock (preempt_disabled)
- */
-int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
-	       struct packet_type *pt, struct net_device *orig_dev)
+static void __br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
+			 const unsigned char *dest)
 {
-	const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
-	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	struct net_bridge_port *p = rcu_dereference(dev->br_port);
 	struct net_bridge *br;
 	const unsigned char *buf;

 	if (!p)
-		goto err;
-
-	if (pdu->ssap != LLC_SAP_BSPAN
-	    || pdu->dsap != LLC_SAP_BSPAN
-	    || pdu->ctrl_1 != LLC_PDU_TYPE_U)
-		goto err;
+		return;

 	if (!pskb_may_pull(skb, 4))
-		goto err;
+		return;

 	/* compare of protocol id and version */
 	buf = skb->data;
 	if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0)
-		goto err;
+		return;

 	br = p->br;
 	spin_lock(&br->lock);
@@ -162,7 +150,7 @@ int br_stp_rcv(struct sk_buff *skb, stru
 	    || !(br->dev->flags & IFF_UP))
 		goto out;

-	if (compare_ether_addr(dest, br->group_addr) != 0)
+	if (dest && compare_ether_addr(dest, br->group_addr) != 0)
 		goto out;

 	buf = skb_pull(skb, 3);
@@ -213,7 +201,39 @@ int br_stp_rcv(struct sk_buff *skb, stru
 	}
  out:
 	spin_unlock(&br->lock);
+}
+
+/*
+ * Called from llc.
+ *
+ * NO locks, but rcu_read_lock (preempt_disabled)
+ */
+int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
+	       struct packet_type *pt, struct net_device *orig_dev)
+{
+	const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+	const unsigned char *dest = eth_hdr(skb)->h_dest;
+
+	if (pdu->ssap != LLC_SAP_BSPAN
+	    || pdu->dsap != LLC_SAP_BSPAN
+	    || pdu->ctrl_1 != LLC_PDU_TYPE_U)
+		goto err;
+
+	__br_stp_rcv(skb, dev, dest);
  err:
 	kfree_skb(skb);
 	return 0;
 }
+
+/*
+ * Called from dev/core.c for protocol LLC_SAP_BSPAN.
+ * This isn't a real ethernet protocol value, but it can occur for bridging
+ * over gre, and its value is less than 1536 so there is no confusion.
+ */
+int br_stp_packet_rcv(struct sk_buff *skb, struct net_device *dev,
+		      struct packet_type *pt, struct net_device *orig_dev)
+{
+	__br_stp_rcv(skb, dev, NULL);
+	kfree_skb(skb);
+	return 0;
+}
--- linux-2.6.x/net/bridge/br_stp_if.c	21 Mar 2006 01:35:39 -0000	1.1.1.13
+++ linux-2.6.x/net/bridge/br_stp_if.c	2 Aug 2006 06:05:10 -0000
@@ -155,8 +155,8 @@ void br_stp_recalculate_bridge_id(struct

 	list_for_each_entry(p, &br->port_list, list) {
 		if (addr == br_mac_zero ||
-		    memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
-			addr = p->dev->dev_addr;
+		    memcmp(p->addr.addr, addr, ETH_ALEN) < 0)
+			addr = p->addr.addr;

 	}

--- linux-2.6.x/include/linux/if_ether.h	18 Jun 2006 23:30:44 -0000	1.1.1.11
+++ linux-2.6.x/include/linux/if_ether.h	2 Aug 2006 06:05:10 -0000
@@ -55,6 +55,7 @@
 #define ETH_P_DIAG      0x6005          /* DEC Diagnostics              */
 #define ETH_P_CUST      0x6006          /* DEC Customer use             */
 #define ETH_P_SCA       0x6007          /* DEC Systems Comms Arch       */
+#define ETH_P_BRIDGE    0x6558          /* Transparent Ethernet Bridging */
 #define ETH_P_RARP      0x8035		/* Reverse Addr Res packet	*/
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-01  5:08     ` Stephen Hemminger
  2006-08-01  9:29       ` Philip Craig
@ 2006-08-02  7:42       ` Lennert Buytenhek
  2006-08-03  1:33         ` Philip Craig
  1 sibling, 1 reply; 17+ messages in thread
From: Lennert Buytenhek @ 2006-08-02  7:42 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Philip Craig, netdev

On Mon, Jul 31, 2006 at 10:08:22PM -0700, Stephen Hemminger wrote:

> > > Why not use existing bridge code?
> > 
> > It does use the existing bridge code.  Perhaps the name is misleading.
> > All it does is encapsulate the full ethernet header in a gre packet,
> > rather than only layer 3.  That is, currently gre uses ARPHRD_IPGRE,
> > but bridging requires ARPHRD_ETHER.
> 
> I am not against making the bridge code smarter to handle other
> encapsulation.

What if you want to run ethernet directly over a GRE tunnel, without
using bridging?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-02  6:17         ` Philip Craig
@ 2006-08-02 17:23           ` Stephen Hemminger
  2006-08-03  1:08             ` Philip Craig
  0 siblings, 1 reply; 17+ messages in thread
From: Stephen Hemminger @ 2006-08-02 17:23 UTC (permalink / raw)
  To: Philip Craig; +Cc: netdev

On Wed, 02 Aug 2006 16:17:42 +1000
Philip Craig <philipc@snapgear.com> wrote:

> > Stephen Hemminger wrote:
> >> I am not against making the bridge code smarter to handle other
> >> encapsulation.
> 
> Here's an updated patch that fixes all issues I am aware of.
> 
> It generates a random mac address for gre ports, and also stores
> a copy of the mac address for ethernet ports, rather than checking
> dev->type everywhere.

That looks cleaner. I wonder if using a fixed OUI would be better
than random addresses but then choosing an OUI would be a problem.

You probably should add a comment about what this function is doing, 
and why.

> static int __br_nf_dev_queue_xmit(struct sk_buff *skb)
> +{
> +	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
> +		dst_release(skb->dst);
> +		skb->dst = NULL;
> +	}
> +
> +	return br_dev_queue_push_xmit(skb);
> +}
> +

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-02 17:23           ` Stephen Hemminger
@ 2006-08-03  1:08             ` Philip Craig
  0 siblings, 0 replies; 17+ messages in thread
From: Philip Craig @ 2006-08-03  1:08 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

Stephen Hemminger wrote:
> On Wed, 02 Aug 2006 16:17:42 +1000
> Philip Craig <philipc@snapgear.com> wrote:
>> It generates a random mac address for gre ports, and also stores
>> a copy of the mac address for ethernet ports, rather than checking
>> dev->type everywhere.
>
> That looks cleaner. I wonder if using a fixed OUI would be better
> than random addresses but then choosing an OUI would be a problem.

random_ether_addr() sets the local assignment bit. This is what
various other virtual devices do (including tap devices, which can
also be bridged).

> You probably should add a comment about what this function is doing,
> and why.

Okay.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-02  7:42       ` Lennert Buytenhek
@ 2006-08-03  1:33         ` Philip Craig
  2006-08-03  7:33           ` Lennert Buytenhek
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-03  1:33 UTC (permalink / raw)
  To: Lennert Buytenhek; +Cc: Stephen Hemminger, netdev

Lennert Buytenhek wrote:
> On Mon, Jul 31, 2006 at 10:08:22PM -0700, Stephen Hemminger wrote:
> 
>>>> Why not use existing bridge code?
>>> It does use the existing bridge code.  Perhaps the name is misleading.
>>> All it does is encapsulate the full ethernet header in a gre packet,
>>> rather than only layer 3.  That is, currently gre uses ARPHRD_IPGRE,
>>> but bridging requires ARPHRD_ETHER.
>> I am not against making the bridge code smarter to handle other
>> encapsulation.
> 
> What if you want to run ethernet directly over a GRE tunnel, without
> using bridging?

But on the other hand, this method allows you to send both ethernet
and non-ethernet traffic over the same GRE tunnel.  Is that useful?
Actually, this feature is what makes the handling of the LLC_SAP_BSPAN
packets simple.

The patch to bridging is a lot cleaner than the patch to GRE, and it
also sidesteps the userspace configuration issues, so I don't want to
go back to modifying the GRE device.

Both could be achieved by creating a new virtual device that sits
between GRE and bridging.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-03  1:33         ` Philip Craig
@ 2006-08-03  7:33           ` Lennert Buytenhek
  2006-08-03  9:14             ` Philip Craig
  0 siblings, 1 reply; 17+ messages in thread
From: Lennert Buytenhek @ 2006-08-03  7:33 UTC (permalink / raw)
  To: Philip Craig; +Cc: Stephen Hemminger, netdev

On Thu, Aug 03, 2006 at 11:33:25AM +1000, Philip Craig wrote:

> >>> All it does is encapsulate the full ethernet header in a gre packet,
> >>> rather than only layer 3.  That is, currently gre uses ARPHRD_IPGRE,
> >>> but bridging requires ARPHRD_ETHER.
> >>
> >> I am not against making the bridge code smarter to handle other
> >> encapsulation.
> > 
> > What if you want to run ethernet directly over a GRE tunnel, without
> > using bridging?
> 
> But on the other hand, this method allows you to send both ethernet
> and non-ethernet traffic over the same GRE tunnel.  Is that useful?
> Actually, this feature is what makes the handling of the LLC_SAP_BSPAN
> packets simple.
> 
> The patch to bridging is a lot cleaner than the patch to GRE, and it
> also sidesteps the userspace configuration issues, so I don't want to
> go back to modifying the GRE device.

So now you _need_ bridging in the middle to send ethernet traffic over
a GRE tunnel?  Ugh.

If you really want to send ethernet and non-ethernet traffic over the
same tunnel, can't you make multiple devices?

If GRE generally transmits BPDUs without ethernet header, the handling
for that ought to be in GRE, IMHO.


cheers,
Lennert

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-03  7:33           ` Lennert Buytenhek
@ 2006-08-03  9:14             ` Philip Craig
  2006-08-03 19:40               ` Lennert Buytenhek
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-03  9:14 UTC (permalink / raw)
  To: Lennert Buytenhek; +Cc: Stephen Hemminger, netdev

Lennert Buytenhek wrote:
> So now you _need_ bridging in the middle to send ethernet traffic over
> a GRE tunnel?  Ugh.

Agreed that would not be nice.  What is the usage scenario for this?
At least one end of the tunnel will be bridged?

> If you really want to send ethernet and non-ethernet traffic over the
> same tunnel, can't you make multiple devices?

Do you mean make multiple GRE devices, where one has an ethernet mode set?
That would require more extensive changes to GRE than my original patch.
I don't think being able to send both is important enough to justify that.
Better to just use my original patch if that is acceptable.  There are
parts of it I can clean up more, but nothing major.  And it still needs
some userspace modifications.

Or create a new virtual device to sit between gre and bridging that
simply adds the ethernet header?  This is the most modular approach,
but it seems like overkill for such a simple thing.

> If GRE generally transmits BPDUs without ethernet header, the handling
> for that ought to be in GRE, IMHO.

I don't think this is normal behaviour.  At least, not all routers do it.
More likely some routers have done it as an optimisation.  It does appear
to be specific to GRE.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-03  9:14             ` Philip Craig
@ 2006-08-03 19:40               ` Lennert Buytenhek
  2006-08-04  1:00                 ` Philip Craig
  0 siblings, 1 reply; 17+ messages in thread
From: Lennert Buytenhek @ 2006-08-03 19:40 UTC (permalink / raw)
  To: Philip Craig; +Cc: Stephen Hemminger, netdev

On Thu, Aug 03, 2006 at 07:14:59PM +1000, Philip Craig wrote:

> > So now you _need_ bridging in the middle to send ethernet traffic over
> > a GRE tunnel?  Ugh.
> 
> Agreed that would not be nice.  What is the usage scenario for this?
> At least one end of the tunnel will be bridged?

For example for VPN purposes, I could imagine that you wouldn't want
to use bridging.


> > If you really want to send ethernet and non-ethernet traffic over the
> > same tunnel, can't you make multiple devices?
> 
> Do you mean make multiple GRE devices, where one has an ethernet mode set?

For example.  If you want to send ethernet-encapsulated and other-protocol-
encapsulated traffic over the same GRE tunnel, that would seem like the
cleanest solution to me.


> > If GRE generally transmits BPDUs without ethernet header, the handling
> > for that ought to be in GRE, IMHO.
> 
> I don't think this is normal behaviour.  At least, not all routers do it.
> More likely some routers have done it as an optimisation.  It does appear
> to be specific to GRE.

Hmm, so it depends on the device whether BPDUs are sent with or without
an ethernet header?  Do the devices that send BPDUs without ethernet
header at least accept a BPDU with an ethernet header (and vice versa),
or aren't the two modes compatible at all?


cheers,
Lennert

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-03 19:40               ` Lennert Buytenhek
@ 2006-08-04  1:00                 ` Philip Craig
  2006-08-04  8:02                   ` Lennert Buytenhek
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-04  1:00 UTC (permalink / raw)
  To: Lennert Buytenhek; +Cc: Stephen Hemminger, netdev

Lennert Buytenhek wrote:
> On Thu, Aug 03, 2006 at 07:14:59PM +1000, Philip Craig wrote:
> 
>>> So now you _need_ bridging in the middle to send ethernet traffic over
>>> a GRE tunnel?  Ugh.
>> Agreed that would not be nice.  What is the usage scenario for this?
>> At least one end of the tunnel will be bridged?
> 
> For example for VPN purposes, I could imagine that you wouldn't want
> to use bridging.

What is the purpose of including the ethernet header if it only exists
on the tunnel?  I have used GRE tunnels for VPNs, but they have always
needed bridging.  If I don't need bridging, then the VPN is only passing
IP traffic and I just send that without GRE at all.

>>> If you really want to send ethernet and non-ethernet traffic over the
>>> same tunnel, can't you make multiple devices?
>> Do you mean make multiple GRE devices, where one has an ethernet mode set?
> 
> For example.  If you want to send ethernet-encapsulated and other-protocol-
> encapsulated traffic over the same GRE tunnel, that would seem like the
> cleanest solution to me.

It is fine for sending, but when receiving, which packets go to which
device?  Does a packet appear on only one device when sending, but both
devices when receiving?  Or does the operation of the non-ethernet device
change based on whether an ethernet device exists?  Modifying GRE to handle
multiple GRE tunnels that are actually the same does not seem clean.

This seems cleaner to me:
The GRE tunnel receives a packet of any protocol (ethernet included),
and adds the IP/GRE header.  Anything else is done above GRE.
Whatever needs the ethernet header should add it.

And then the decision is, how do we add the ethernet header?
- an option to GRE to always add the ethernet header
- a new virtual device
- as part of bridging

Any of these will work for me.

> Hmm, so it depends on the device whether BPDUs are sent with or without
> an ethernet header?  Do the devices that send BPDUs without ethernet
> header at least accept a BPDU with an ethernet header (and vice versa),
> or aren't the two modes compatible at all?

I don't have any to test with currently, but we only had complaints about
us not receiving them, and only at some sites.  So I assume that devices
can accept either format.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-04  1:00                 ` Philip Craig
@ 2006-08-04  8:02                   ` Lennert Buytenhek
  2006-08-07  1:55                     ` Philip Craig
  0 siblings, 1 reply; 17+ messages in thread
From: Lennert Buytenhek @ 2006-08-04  8:02 UTC (permalink / raw)
  To: Philip Craig; +Cc: Stephen Hemminger, netdev

On Fri, Aug 04, 2006 at 11:00:55AM +1000, Philip Craig wrote:

> >>> So now you _need_ bridging in the middle to send ethernet traffic over
> >>> a GRE tunnel?  Ugh.
> >> Agreed that would not be nice.  What is the usage scenario for this?
> >> At least one end of the tunnel will be bridged?
> > 
> > For example for VPN purposes, I could imagine that you wouldn't want
> > to use bridging.
> 
> What is the purpose of including the ethernet header if it only
> exists on the tunnel?

I have one machine at home that appears to be on my employer's network
via such a tunnel.  I don't use bridging, because I don't need any other
machine at home to access this tunnel.  I do want bridging, and not proxy
ARP, because it allows me to run arpwatch, and doesn't require me to
reconfigure something at the remote end if I, for example, want to add
another IP address to my home box.


> >>> If you really want to send ethernet and non-ethernet traffic over the
> >>> same tunnel, can't you make multiple devices?
> >> Do you mean make multiple GRE devices, where one has an ethernet mode set?
> > 
> > For example.  If you want to send ethernet-encapsulated and other-protocol-
> > encapsulated traffic over the same GRE tunnel, that would seem like the
> > cleanest solution to me.
> 
> It is fine for sending, but when receiving, which packets go to which
> device?  Does a packet appear on only one device when sending, but both
> devices when receiving?

No, just send it to the device that matches the lower-level protocol.
So if you receive an ethernet packet, make it be received on the ethernet
sub-device, if it's IP, make it be received on the IP sub-device, etc.

Note that I'm not saying that this is necessarily a good idea, it's just
what I would suggest if you want to run both 'bare IP' and ethernet
encapsulated traffic over the same GRE tunnel at the same time.  (Not sure
why you'd want that.)


> This seems cleaner to me:
> The GRE tunnel receives a packet of any protocol (ethernet included),
> and adds the IP/GRE header.  Anything else is done above GRE.
> Whatever needs the ethernet header should add it.
> 
> And then the decision is, how do we add the ethernet header?
> - an option to GRE to always add the ethernet header

I think I lost you here.  You surely can't just 'make up' any ethernet
header?


cheers,
Lennert

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-04  8:02                   ` Lennert Buytenhek
@ 2006-08-07  1:55                     ` Philip Craig
  2006-08-10 13:09                       ` Lennert Buytenhek
  0 siblings, 1 reply; 17+ messages in thread
From: Philip Craig @ 2006-08-07  1:55 UTC (permalink / raw)
  To: Lennert Buytenhek; +Cc: Stephen Hemminger, netdev

Lennert Buytenhek wrote:
> I have one machine at home that appears to be on my employer's network
> via such a tunnel.  I don't use bridging, because I don't need any other
> machine at home to access this tunnel.  I do want bridging, and not proxy
> ARP, because it allows me to run arpwatch, and doesn't require me to
> reconfigure something at the remote end if I, for example, want to add
> another IP address to my home box.

Okay.
If this is using Linux, do you have a patch that does this already?

> No, just send it to the device that matches the lower-level protocol.
> So if you receive an ethernet packet, make it be received on the ethernet
> sub-device, if it's IP, make it be received on the IP sub-device, etc.
> 
> Note that I'm not saying that this is necessarily a good idea, it's just
> what I would suggest if you want to run both 'bare IP' and ethernet
> encapsulated traffic over the same GRE tunnel at the same time.  (Not sure
> why you'd want that.)

I still don't understand what the IP sub-device is, or whether
this is different from the options I list below.
But I don't have a good reason for wanting this either.
So lets forget about this and talk about the three options below.

>> This seems cleaner to me:
>> The GRE tunnel receives a packet of any protocol (ethernet included),
>> and adds the IP/GRE header.  Anything else is done above GRE.
>> Whatever needs the ethernet header should add it.
>>
>> And then the decision is, how do we add the ethernet header?
>> - an option to GRE to always add the ethernet header
> 
> I think I lost you here.  You surely can't just 'make up' any ethernet
> header?

No, you need a device that looks like an ethernet device.
We can modify the GRE device to look like ethernet (as an option).
Or we can create a new ethernet device stacked on top of the GRE device.
Or we can use the bridge device, which already looks like ethernet.

My first patch in this thread did it by modifying the GRE device.
Look at that patch if my explanation still doesn't make sense.
The option part is controlled by the "#if 0" in that patch
(which needs fixing of course).

You have a valid use that doesn't need bridging, so IMO we can rule
out doing it in the bridging code.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [RFC] gre: transparent ethernet bridging
  2006-08-07  1:55                     ` Philip Craig
@ 2006-08-10 13:09                       ` Lennert Buytenhek
  0 siblings, 0 replies; 17+ messages in thread
From: Lennert Buytenhek @ 2006-08-10 13:09 UTC (permalink / raw)
  To: Philip Craig; +Cc: Stephen Hemminger, netdev

On Mon, Aug 07, 2006 at 11:55:14AM +1000, Philip Craig wrote:

> > I have one machine at home that appears to be on my employer's network
> > via such a tunnel.  I don't use bridging, because I don't need any other
> > machine at home to access this tunnel.  I do want bridging, and not proxy
> > ARP, because it allows me to run arpwatch, and doesn't require me to
> > reconfigure something at the remote end if I, for example, want to add
> > another IP address to my home box.
> 
> Okay.
> If this is using Linux, do you have a patch that does this already?

I use vtun:

	http://vtun.sourceforge.net/

But I would prefer using some in-kernel ethernet tunneling method with
ipsec instead.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2006-08-10 13:09 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-31 10:06 [RFC] gre: transparent ethernet bridging Philip Craig
2006-07-31 16:14 ` Stephen Hemminger
2006-08-01  1:15   ` Philip Craig
2006-08-01  5:08     ` Stephen Hemminger
2006-08-01  9:29       ` Philip Craig
2006-08-02  6:17         ` Philip Craig
2006-08-02 17:23           ` Stephen Hemminger
2006-08-03  1:08             ` Philip Craig
2006-08-02  7:42       ` Lennert Buytenhek
2006-08-03  1:33         ` Philip Craig
2006-08-03  7:33           ` Lennert Buytenhek
2006-08-03  9:14             ` Philip Craig
2006-08-03 19:40               ` Lennert Buytenhek
2006-08-04  1:00                 ` Philip Craig
2006-08-04  8:02                   ` Lennert Buytenhek
2006-08-07  1:55                     ` Philip Craig
2006-08-10 13:09                       ` Lennert Buytenhek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).