Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [RFC PATCH bridge 2/5] bridge: Add vlan to unicast fdb entries
From: Vlad Yasevich @ 2012-08-30 14:48 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev
In-Reply-To: <20120830143303.GA21630@redhat.com>

On 08/30/2012 10:33 AM, Michael S. Tsirkin wrote:
> On Thu, Aug 23, 2012 at 03:29:52PM -0400, Vlad Yasevich wrote:
>> This patch adds vlan to unicast fdb entries that are created for
>> learned addresses (not the manually configured ones).  It adds
>> vlan id into the hash mix and uses vlan as an addditional parameter
>> for an entry match.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>
> I wonder whether this conflicts with Eric's
> 05423b241311c9380b7280179295bac7794281b6
> which allowed null vid.
>
> Specifically this calls vlan_tx_tag_get without
> checking vlan_tx_tag_present. Why is this OK?

We ignore the PRIO bits and just look at vlan deeper in the functions.
So if the tci is set, even to NULL/0 vid, we are still OK as we'll use
just the VID bits.

Now, the other issue with this patch is that it doesn't work when VLANs 
aren't supported by the kernel and skb->vlan_tci is never set.
I am working to address that.

-vlad

>
>> ---
>>   include/linux/if_bridge.h |    1 +
>>   net/bridge/br_device.c    |    2 +-
>>   net/bridge/br_fdb.c       |   71 ++++++++++++++++++++++++++------------------
>>   net/bridge/br_input.c     |   14 ++++++---
>>   net/bridge/br_private.h   |    7 +++-
>>   5 files changed, 58 insertions(+), 37 deletions(-)
>>
>> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
>> index dd3f201..288ff10 100644
>> --- a/include/linux/if_bridge.h
>> +++ b/include/linux/if_bridge.h
>> @@ -95,6 +95,7 @@ struct __fdb_entry {
>>   	__u8 port_hi;
>>   	__u8 pad0;
>>   	__u16 unused;
>> +#define fdb_vid unused
>>   };
>>
>>   #ifdef __KERNEL__
>> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
>> index 3334845..e321b9d 100644
>> --- a/net/bridge/br_device.c
>> +++ b/net/bridge/br_device.c
>> @@ -66,7 +66,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>>   			br_multicast_deliver(mdst, skb);
>>   		else
>>   			br_flood_deliver(br, skb);
>> -	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
>> +	} else if ((dst = __br_fdb_get(br, dest, vlan_tx_tag_get(skb))) != NULL)
>>   		br_deliver(dst->dst, skb);
>>   	else
>>   		br_flood_deliver(br, skb);
>> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
>> index d21f323..478ca1d 100644
>> --- a/net/bridge/br_fdb.c
>> +++ b/net/bridge/br_fdb.c
>> @@ -23,6 +23,7 @@
>>   #include <linux/slab.h>
>>   #include <linux/atomic.h>
>>   #include <asm/unaligned.h>
>> +#include <linux/if_vlan.h>
>>   #include "br_private.h"
>>
>>   static struct kmem_cache *br_fdb_cache __read_mostly;
>> @@ -67,11 +68,12 @@ static inline int has_expired(const struct net_bridge *br,
>>   		time_before_eq(fdb->updated + hold_time(br), jiffies);
>>   }
>>
>> -static inline int br_mac_hash(const unsigned char *mac)
>> +static inline int br_mac_hash(const unsigned char *mac, __u16 vlan_tci)
>>   {
>> -	/* use 1 byte of OUI cnd 3 bytes of NIC */
>> +	/* use 1 byte of OUI and 3 bytes of NIC */
>>   	u32 key = get_unaligned((u32 *)(mac + 2));
>> -	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
>> +	return jhash_2words(key, (vlan_tci & VLAN_VID_MASK),
>> +				fdb_salt) & (BR_HASH_SIZE - 1);
>>   }
>>
>>   static void fdb_rcu_free(struct rcu_head *head)
>> @@ -132,7 +134,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
>>   	struct net_bridge_fdb_entry *f;
>>
>>   	/* If old entry was unassociated with any port, then delete it. */
>> -	f = __br_fdb_get(br, br->dev->dev_addr);
>> +	f = __br_fdb_get(br, br->dev->dev_addr, 0);
>>   	if (f && f->is_local && !f->dst)
>>   		fdb_delete(br, f);
>>
>> @@ -231,13 +233,16 @@ void br_fdb_delete_by_port(struct net_bridge *br,
>>
>>   /* No locking or refcounting, assumes caller has rcu_read_lock */
>>   struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
>> -					  const unsigned char *addr)
>> +					  const unsigned char *addr,
>> +					  __u16 vlan_tci)
>>   {
>>   	struct hlist_node *h;
>>   	struct net_bridge_fdb_entry *fdb;
>>
>> -	hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
>> -		if (ether_addr_equal(fdb->addr.addr, addr)) {
>> +	hlist_for_each_entry_rcu(fdb, h,
>> +				&br->hash[br_mac_hash(addr, vlan_tci)], hlist) {
>> +		if (ether_addr_equal(fdb->addr.addr, addr) &&
>> +		    fdb->vlan_id == (vlan_tci & VLAN_VID_MASK) ) {
>>   			if (unlikely(has_expired(br, fdb)))
>>   				break;
>>   			return fdb;
>> @@ -261,7 +266,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
>>   	if (!port)
>>   		ret = 0;
>>   	else {
>> -		fdb = __br_fdb_get(port->br, addr);
>> +		fdb = __br_fdb_get(port->br, addr, 0);
>>   		ret = fdb && fdb->dst && fdb->dst->dev != dev &&
>>   			fdb->dst->state == BR_STATE_FORWARDING;
>>   	}
>> @@ -313,6 +318,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>>   			fe->is_local = f->is_local;
>>   			if (!f->is_static)
>>   				fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
>> +			fe->fdb_vid = f->vlan_id;
>>   			++fe;
>>   			++num;
>>   		}
>> @@ -325,26 +331,30 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>>   }
>>
>>   static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
>> -					     const unsigned char *addr)
>> +					     const unsigned char *addr,
>> +					     __u16 vlan_tci)
>>   {
>>   	struct hlist_node *h;
>>   	struct net_bridge_fdb_entry *fdb;
>>
>>   	hlist_for_each_entry(fdb, h, head, hlist) {
>> -		if (ether_addr_equal(fdb->addr.addr, addr))
>> +		if (ether_addr_equal(fdb->addr.addr, addr) &&
>> +		    fdb->vlan_id == (vlan_tci & VLAN_VID_MASK))
>>   			return fdb;
>>   	}
>>   	return NULL;
>>   }
>>
>>   static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
>> -						 const unsigned char *addr)
>> +						 const unsigned char *addr,
>> +						 __u16 vlan_tci)
>>   {
>>   	struct hlist_node *h;
>>   	struct net_bridge_fdb_entry *fdb;
>>
>>   	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
>> -		if (ether_addr_equal(fdb->addr.addr, addr))
>> +		if (ether_addr_equal(fdb->addr.addr, addr) &&
>> +		    fdb->vlan_id == (vlan_tci & VLAN_VID_MASK))
>>   			return fdb;
>>   	}
>>   	return NULL;
>> @@ -352,7 +362,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
>>
>>   static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>>   					       struct net_bridge_port *source,
>> -					       const unsigned char *addr)
>> +					       const unsigned char *addr,
>> +					       __u16 vlan_tci)
>>   {
>>   	struct net_bridge_fdb_entry *fdb;
>>
>> @@ -360,6 +371,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>>   	if (fdb) {
>>   		memcpy(fdb->addr.addr, addr, ETH_ALEN);
>>   		fdb->dst = source;
>> +		fdb->vlan_id = (vlan_tci & VLAN_VID_MASK);
>>   		fdb->is_local = 0;
>>   		fdb->is_static = 0;
>>   		fdb->updated = fdb->used = jiffies;
>> @@ -371,13 +383,13 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>>   static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>>   		  const unsigned char *addr)
>>   {
>> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
>> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
>>   	struct net_bridge_fdb_entry *fdb;
>>
>>   	if (!is_valid_ether_addr(addr))
>>   		return -EINVAL;
>>
>> -	fdb = fdb_find(head, addr);
>> +	fdb = fdb_find(head, addr, 0);
>>   	if (fdb) {
>>   		/* it is okay to have multiple ports with same
>>   		 * address, just use the first one.
>> @@ -390,7 +402,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>>   		fdb_delete(br, fdb);
>>   	}
>>
>> -	fdb = fdb_create(head, source, addr);
>> +	fdb = fdb_create(head, source, addr, 0);
>>   	if (!fdb)
>>   		return -ENOMEM;
>>
>> @@ -412,9 +424,9 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>>   }
>>
>>   void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>> -		   const unsigned char *addr)
>> +		   const unsigned char *addr, __u16 vlan_tci)
>>   {
>> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
>> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan_tci)];
>>   	struct net_bridge_fdb_entry *fdb;
>>
>>   	/* some users want to always flood. */
>> @@ -426,7 +438,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>>   	      source->state == BR_STATE_FORWARDING))
>>   		return;
>>
>> -	fdb = fdb_find_rcu(head, addr);
>> +	fdb = fdb_find_rcu(head, addr, vlan_tci);
>>   	if (likely(fdb)) {
>>   		/* attempt to update an entry for a local interface */
>>   		if (unlikely(fdb->is_local)) {
>> @@ -441,8 +453,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>>   		}
>>   	} else {
>>   		spin_lock(&br->hash_lock);
>> -		if (likely(!fdb_find(head, addr))) {
>> -			fdb = fdb_create(head, source, addr);
>> +		if (likely(!fdb_find(head, addr, vlan_tci))) {
>> +			fdb = fdb_create(head, source, addr, vlan_tci);
>>   			if (fdb)
>>   				fdb_notify(br, fdb, RTM_NEWNEIGH);
>>   		}
>> @@ -571,18 +583,18 @@ out:
>>
>>   /* Update (create or replace) forwarding database entry */
>>   static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
>> -			 __u16 state, __u16 flags)
>> +			 __u16 state, __u16 flags, __u16 vlan_tci)
>>   {
>>   	struct net_bridge *br = source->br;
>> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
>> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan_tci)];
>>   	struct net_bridge_fdb_entry *fdb;
>>
>> -	fdb = fdb_find(head, addr);
>> +	fdb = fdb_find(head, addr, vlan_tci);
>>   	if (fdb == NULL) {
>>   		if (!(flags & NLM_F_CREATE))
>>   			return -ENOENT;
>>
>> -		fdb = fdb_create(head, source, addr);
>> +		fdb = fdb_create(head, source, addr, vlan_tci);
>>   		if (!fdb)
>>   			return -ENOMEM;
>>   		fdb_notify(br, fdb, RTM_NEWNEIGH);
>> @@ -628,11 +640,12 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
>>
>>   	if (ndm->ndm_flags & NTF_USE) {
>>   		rcu_read_lock();
>> -		br_fdb_update(p->br, p, addr);
>> +		br_fdb_update(p->br, p, addr, 0);
>>   		rcu_read_unlock();
>>   	} else {
>>   		spin_lock_bh(&p->br->hash_lock);
>> -		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
>> +		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags,
>> +				0);
>>   		spin_unlock_bh(&p->br->hash_lock);
>>   	}
>>
>> @@ -642,10 +655,10 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
>>   static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
>>   {
>>   	struct net_bridge *br = p->br;
>> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
>> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
>>   	struct net_bridge_fdb_entry *fdb;
>>
>> -	fdb = fdb_find(head, addr);
>> +	fdb = fdb_find(head, addr, 0);
>>   	if (!fdb)
>>   		return -ENOENT;
>>
>> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
>> index b7ca3b3..a4579ee 100644
>> --- a/net/bridge/br_input.c
>> +++ b/net/bridge/br_input.c
>> @@ -62,12 +62,15 @@ int br_handle_frame_finish(struct sk_buff *skb)
>>   	 * only traffic matching the VLAN filter.
>>   	 */
>>   	vlan_map = rcu_dereference(p->vlan_map);
>> -	if (vlan_map && !test_bit((skb->vlan_tci & VLAN_VID_MASK), vlan_map))
>> -		goto drop;
>> +	if (vlan_map && vlan_tx_tag_present(skb)) {
>> +		unsigned short vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
>> +		if (!test_bit(vid+1, vlan_map))
>> +			goto drop;
>> +	}
>>
>>   	/* insert into forwarding database after filtering to avoid spoofing */
>>   	br = p->br;
>> -	br_fdb_update(br, p, eth_hdr(skb)->h_source);
>> +	br_fdb_update(br, p, eth_hdr(skb)->h_source, vlan_tx_tag_get(skb));
>>
>>   	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
>>   	    br_multicast_rcv(br, p, skb))
>> @@ -102,7 +105,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
>>   			skb2 = skb;
>>
>>   		br->dev->stats.multicast++;
>> -	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
>> +	} else if ((dst = __br_fdb_get(br, dest, vlan_tx_tag_get(skb))) &&
>> +			dst->is_local) {
>>   		skb2 = skb;
>>   		/* Do not forward the packet since it's local. */
>>   		skb = NULL;
>> @@ -131,7 +135,7 @@ static int br_handle_local_finish(struct sk_buff *skb)
>>   {
>>   	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
>>
>> -	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
>> +	br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vlan_tx_tag_get(skb));
>>   	return 0;	 /* process further */
>>   }
>>
>> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
>> index 8da90e8..921b927 100644
>> --- a/net/bridge/br_private.h
>> +++ b/net/bridge/br_private.h
>> @@ -72,6 +72,7 @@ struct net_bridge_fdb_entry
>>   	unsigned long			updated;
>>   	unsigned long			used;
>>   	mac_addr			addr;
>> +	__u16				vlan_id;
>>   	unsigned char			is_local;
>>   	unsigned char			is_static;
>>   };
>> @@ -352,7 +353,8 @@ extern void br_fdb_cleanup(unsigned long arg);
>>   extern void br_fdb_delete_by_port(struct net_bridge *br,
>>   				  const struct net_bridge_port *p, int do_all);
>>   extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
>> -						 const unsigned char *addr);
>> +						 const unsigned char *addr,
>> +						 __u16 vlan_tci);
>>   extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
>>   extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>>   			  unsigned long count, unsigned long off);
>> @@ -361,7 +363,8 @@ extern int br_fdb_insert(struct net_bridge *br,
>>   			 const unsigned char *addr);
>>   extern void br_fdb_update(struct net_bridge *br,
>>   			  struct net_bridge_port *source,
>> -			  const unsigned char *addr);
>> +			  const unsigned char *addr,
>> +			  __u16 vlan_tci);
>>
>>   extern int br_fdb_delete(struct ndmsg *ndm,
>>   			 struct net_device *dev,
>> --
>> 1.7.7.6
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [RFC PATCH bridge 0/5] Add basic VLAN support to bridges
From: Michael S. Tsirkin @ 2012-08-30 14:46 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <503F7B7C.7050608@redhat.com>

On Thu, Aug 30, 2012 at 10:41:00AM -0400, Vlad Yasevich wrote:
> On 08/30/2012 10:34 AM, Michael S. Tsirkin wrote:
> >On Thu, Aug 30, 2012 at 09:37:17AM -0400, Vlad Yasevich wrote:
> >>On 08/30/2012 08:37 AM, Michael S. Tsirkin wrote:
> >>>On Thu, Aug 23, 2012 at 03:29:50PM -0400, Vlad Yasevich wrote:
> >>>>This series of patches provides an ability to add VLAN IDs to the bridge
> >>>>ports.  This is similar to what can be found in most switches.  The bridge
> >>>>port may have any number of VLANs added to it including vlan 0 for untagged
> >>>>traffic.  When vlans are added to the port, only traffic tagged with particular
> >>>>vlan will forwarded over this port.  Additionally, vlan ids are added to FDB
> >>>>entries and become part of the lookup.  This way we correctly identify the FDB
> >>>>entry.
> >>>>
> >>>>There are still pieces missing.  I don't yet support adding a static fdb entry
> >>>>with a particular vlan.  There is no netlink support for carrying a vlan id.
> >>>>
> >>>>I'd like to hear thoughts of whether this is usufull and something we should
> >>>>persue.
> >>>>
> >>>>The default behavior ofthe bridge is unchanged if no vlans have been
> >>>>configured.
> >>>
> >>>Overall the feature looks good, I can think of some uses
> >>>for it - for example, it could become useful for VMs if
> >>>we add support to tap essentially stripping tags in Xmit but maybe you
> >>>could be more explicit about what you have in mind?
> >>>Do you plan to add tap support as well?
> >>
> >>Yes,  this is something I've thought of.  Not sure if it would be at tap
> >>or bridge itself.  Need to work out where best to do it.
> >
> >It's certainly much easier to do in tap.
> >A 20 line patch should do it.
> >Does stripping tags seem like something bridge should do?
> 
> I agree.  It would be easier in tap.  There also the other side of
> adding tags for outbound traffic.  This would allow auto-access like
> functionality where the guest itself doesn't know anything about
> vlans,
> but the bridge port will add/remove vlans as appropriate.  This is on
> the list of features I want to support.
> 
> -vlad

Looks like easier in tap too. You can only add 1 vlan :)

> >
> >>>Also - what tool support do you plan?
> >>
> >>the patchset includes brctl to configure, but that seems to be
> >>getting deprecated.  I am working on iproute2 to add capability to
> >>configure this.
> >>
> >>>
> >>>I also found some coding style issues and some bugs in
> >>>the patchset. Sent on list.
> >>
> >>Thanks
> >>-vlad
> >>
> >>>
> >>>Hope this helps.
> >>>

^ permalink raw reply

* Re: [RFC PATCH bridge 5/5] bridge: Add sysfs interface to display VLANS
From: Michael S. Tsirkin @ 2012-08-30 14:44 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <503F7A72.6070101@redhat.com>

On Thu, Aug 30, 2012 at 10:36:34AM -0400, Vlad Yasevich wrote:
> On 08/30/2012 10:26 AM, Michael S. Tsirkin wrote:
> >On Thu, Aug 30, 2012 at 10:05:16AM -0400, Vlad Yasevich wrote:
> >>On 08/30/2012 08:27 AM, Michael S. Tsirkin wrote:
> >>>On Thu, Aug 23, 2012 at 03:29:55PM -0400, Vlad Yasevich wrote:
> >>>>Add a binary sysfs file that will dump out vlans currently configured on the
> >>>>port.
> >>>>
> >>>>Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> >>>
> >>>So what's the format here? I could not tell.
> >>>List of vlans? Why binary? Why not make it text in that case?
> >>>This would also allow reporting "all" if filtering
> >>>is disabled and "untagged" for untagged packets.
> >>
> >>I decided to do binary because text may result in more then page
> >>worth of data.
> >
> >Well yes, you need 3 bytes to print a vid and 1 extra for a space.
> 
> 4 bytes per vid and 1 for space or NULL.

Why 4? It's 12 bit so you can print in hex: 'ABC ' 
Whatever :)

> > But
> >in binary, 2 bytes per VID and there are 4K valid VIDs so binary needs
> >more than 1 page too?
> >
> >If using binary I would go for bit per valid bit
> >this solves it nicely.
> 
> True.  The tool reads it in chunks.  I thought about dumping the
> bitmap, but that meant that made the tool more complex as it had to
> know the bitmap construction.

To me, it makes more sense than knowing that vids are
zero-extended to 16 bytes and packed in an array,
and that you need to read it in chunks.
Could be just me :)

> >We can have a separate field that shows whether untagged
> >packets are filtered.
> >
> >>The display tool will know how to display things
> >>properly.
> >>
> >>-vlad
> >
> >I think you know that one of the points of sysfs
> >is to allow use without tools.
> >Tools are happier using other interfaces such as
> >ioctl or netlink.
> >
> 
> I initially though of creating a sysfs object per vlan.  That would
> have made it easy to see which vlans are configured without any
> tools.
> But that could result in a lot of objects being created, so I abandoned it.
> 
> I did think about a text interface, but due to a page of output
> limitation, I didn't go that route.  The reason is that if someone
> cats the file, they may not see all the vlans configured.  So I
> decided on the binary interface, since a binary interface with a
> tool to read it could avoid the single page limitation.
> 
> -vlad

Maybe it's not needed in sysfs then - expose it to
brctl or whatever.

> >
> >
> >>>
> >>>>---
> >>>>  include/linux/if_bridge.h |    1 +
> >>>>  net/bridge/br_if.c        |   34 ++++++++++++++++++++++++++++++++++
> >>>>  net/bridge/br_private.h   |    2 ++
> >>>>  net/bridge/br_sysfs_if.c  |   28 ++++++++++++++++++++++++++++
> >>>>  4 files changed, 65 insertions(+), 0 deletions(-)
> >>>>
> >>>>diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> >>>>index ab750dd..d0f869b 100644
> >>>>--- a/include/linux/if_bridge.h
> >>>>+++ b/include/linux/if_bridge.h
> >>>>@@ -20,6 +20,7 @@
> >>>>  #define SYSFS_BRIDGE_PORT_SUBDIR "brif"
> >>>>  #define SYSFS_BRIDGE_PORT_ATTR	"brport"
> >>>>  #define SYSFS_BRIDGE_PORT_LINK	"bridge"
> >>>>+#define SYSFS_BRIDGE_PORT_VLANS "vlans"
> >>>>
> >>>>  #define BRCTL_VERSION 1
> >>>>
> >>>>diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> >>>>index 90c1038..3963748 100644
> >>>>--- a/net/bridge/br_if.c
> >>>>+++ b/net/bridge/br_if.c
> >>>>@@ -510,6 +510,40 @@ int br_del_port_vlan(struct net_bridge_port *p, unsigned long vlan)
> >>>>  	return 0;
> >>>>  }
> >>>>
> >>>>+size_t br_port_fill_vlans(struct net_bridge_port *p, char* buf,
> >>>>+			unsigned long max, unsigned long skip)
> >>>>+{
> >>>>+	unsigned long *map;
> >>>>+	unsigned short *vid = (unsigned short *)buf;
> >>>>+	unsigned short i;
> >>>>+	int num = 0;
> >>>>+
> >>>>+	if (skip > (VLAN_N_VID+1))
> >>>>+		return -EINVAL;
> >>>>+
> >>>>+	memset(buf, 0, max * sizeof(unsigned short));
> >>>
> >>>Isn't max is in bytes? why is this safe?
> >>>
> >>>>+
> >>>>+	rcu_read_lock();
> >>>>+	map = rcu_dereference(p->vlan_map);
> >>>>+	if (!map)
> >>>>+		goto out;
> >>>>+
> >>>>+	for (i = skip + 1; i < VLAN_N_VID + 1; i++) {
> >>>
> >>>Isn't skip in bytes too? Why do you compare it to i which is
> >>>in dwords?
> >>>
> >>>>+		if (test_bit(i, map)) {
> >>>>+			if (num > max)
> >>>>+				goto out;
> >>>>+
> >>>>+			*vid = i-1;
> >>>>+			vid++;
> >>>>+			num++;
> >>>>+		}
> >>>>+	}
> >>>>+out:
> >>>>+	rcu_read_unlock();
> >>>>+
> >>>>+	return num*sizeof(unsigned short);
> >>>>+}
> >>>>+
> >>>>  void __net_exit br_net_exit(struct net *net)
> >>>>  {
> >>>>  	struct net_device *dev;
> >>>>diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> >>>>index 5639c1c..cf95cd7 100644
> >>>>--- a/net/bridge/br_private.h
> >>>>+++ b/net/bridge/br_private.h
> >>>>@@ -404,6 +404,8 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
> >>>>  	netdev_features_t features);
> >>>>  extern int br_set_port_vlan(struct net_bridge_port *p, unsigned long vid);
> >>>>  extern int br_del_port_vlan(struct net_bridge_port *p, unsigned long vid);
> >>>>+extern size_t br_port_fill_vlans(struct net_bridge_port *p, char *buf,
> >>>>+				unsigned long max, unsigned long skip);
> >>>>
> >>>>  /* br_input.c */
> >>>>  extern int br_handle_frame_finish(struct sk_buff *skb);
> >>>>diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
> >>>>index 13b36bd..a81e2ef 100644
> >>>>--- a/net/bridge/br_sysfs_if.c
> >>>>+++ b/net/bridge/br_sysfs_if.c
> >>>>@@ -234,6 +234,29 @@ const struct sysfs_ops brport_sysfs_ops = {
> >>>>  };
> >>>>
> >>>>  /*
> >>>>+ * Export the vlan table for a given port as a binary file.
> >>>>+ * The records are unsgined shorts.
> >>>>+ *
> >>>>+ * Returns the number of bytes read.
> >>>>+ */
> >>>>+static ssize_t brport_vlans_read(struct file *filp, struct kobject *kobj,
> >>>>+				struct bin_attribute *bin_attr,
> >>>>+				char *buf, loff_t off, size_t count)
> >>>>+{
> >>>>+	struct net_bridge_port *p = to_brport(kobj);
> >>>>+
> >>>>+	return br_port_fill_vlans(p, buf,
> >>>>+				count/sizeof(unsigned short),
> >>>>+				off/sizeof(unsigned short));
> >>>>+}
> >>>>+
> >>>>+static struct bin_attribute port_vlans = {
> >>>>+	.attr = { .name = SYSFS_BRIDGE_PORT_VLANS,
> >>>>+		  .mode = S_IRUGO, },
> >>>>+	.read = brport_vlans_read,
> >>>>+};
> >>>>+
> >>>>+/*
> >>>>   * Add sysfs entries to ethernet device added to a bridge.
> >>>>   * Creates a brport subdirectory with bridge attributes.
> >>>>   * Puts symlink in bridge's brif subdirectory
> >>>>@@ -255,6 +278,11 @@ int br_sysfs_addif(struct net_bridge_port *p)
> >>>>  			return err;
> >>>>  	}
> >>>>
> >>>>+	err = sysfs_create_bin_file(&p->kobj, &port_vlans);
> >>>>+	if (err) {
> >>>>+		return err;
> >>>>+	}
> >>>>+
> >>>>  	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
> >>>>  	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
> >>>>  }
> >>>>--
> >>>>1.7.7.6
> >>>>
> >>>>--
> >>>>To unsubscribe from this list: send the line "unsubscribe netdev" in
> >>>>the body of a message to majordomo@vger.kernel.org
> >>>>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 0/5] Add basic VLAN support to bridges
From: Vlad Yasevich @ 2012-08-30 14:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev
In-Reply-To: <20120830143457.GA21646@redhat.com>

On 08/30/2012 10:34 AM, Michael S. Tsirkin wrote:
> On Thu, Aug 30, 2012 at 09:37:17AM -0400, Vlad Yasevich wrote:
>> On 08/30/2012 08:37 AM, Michael S. Tsirkin wrote:
>>> On Thu, Aug 23, 2012 at 03:29:50PM -0400, Vlad Yasevich wrote:
>>>> This series of patches provides an ability to add VLAN IDs to the bridge
>>>> ports.  This is similar to what can be found in most switches.  The bridge
>>>> port may have any number of VLANs added to it including vlan 0 for untagged
>>>> traffic.  When vlans are added to the port, only traffic tagged with particular
>>>> vlan will forwarded over this port.  Additionally, vlan ids are added to FDB
>>>> entries and become part of the lookup.  This way we correctly identify the FDB
>>>> entry.
>>>>
>>>> There are still pieces missing.  I don't yet support adding a static fdb entry
>>>> with a particular vlan.  There is no netlink support for carrying a vlan id.
>>>>
>>>> I'd like to hear thoughts of whether this is usufull and something we should
>>>> persue.
>>>>
>>>> The default behavior ofthe bridge is unchanged if no vlans have been
>>>> configured.
>>>
>>> Overall the feature looks good, I can think of some uses
>>> for it - for example, it could become useful for VMs if
>>> we add support to tap essentially stripping tags in Xmit but maybe you
>>> could be more explicit about what you have in mind?
>>> Do you plan to add tap support as well?
>>
>> Yes,  this is something I've thought of.  Not sure if it would be at tap
>> or bridge itself.  Need to work out where best to do it.
>
> It's certainly much easier to do in tap.
> A 20 line patch should do it.
> Does stripping tags seem like something bridge should do?

I agree.  It would be easier in tap.  There also the other side of 
adding tags for outbound traffic.  This would allow auto-access like 
functionality where the guest itself doesn't know anything about vlans,
but the bridge port will add/remove vlans as appropriate.  This is on
the list of features I want to support.

-vlad

>
>>> Also - what tool support do you plan?
>>
>> the patchset includes brctl to configure, but that seems to be
>> getting deprecated.  I am working on iproute2 to add capability to
>> configure this.
>>
>>>
>>> I also found some coding style issues and some bugs in
>>> the patchset. Sent on list.
>>
>> Thanks
>> -vlad
>>
>>>
>>> Hope this helps.
>>>

^ permalink raw reply

* Re: [RFC PATCH bridge 5/5] bridge: Add sysfs interface to display VLANS
From: Vlad Yasevich @ 2012-08-30 14:36 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev
In-Reply-To: <20120830142609.GE21132@redhat.com>

On 08/30/2012 10:26 AM, Michael S. Tsirkin wrote:
> On Thu, Aug 30, 2012 at 10:05:16AM -0400, Vlad Yasevich wrote:
>> On 08/30/2012 08:27 AM, Michael S. Tsirkin wrote:
>>> On Thu, Aug 23, 2012 at 03:29:55PM -0400, Vlad Yasevich wrote:
>>>> Add a binary sysfs file that will dump out vlans currently configured on the
>>>> port.
>>>>
>>>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>>>
>>> So what's the format here? I could not tell.
>>> List of vlans? Why binary? Why not make it text in that case?
>>> This would also allow reporting "all" if filtering
>>> is disabled and "untagged" for untagged packets.
>>
>> I decided to do binary because text may result in more then page
>> worth of data.
>
> Well yes, you need 3 bytes to print a vid and 1 extra for a space.

4 bytes per vid and 1 for space or NULL.

>  But
> in binary, 2 bytes per VID and there are 4K valid VIDs so binary needs
> more than 1 page too?
>
> If using binary I would go for bit per valid bit
> this solves it nicely.

True.  The tool reads it in chunks.  I thought about dumping the bitmap, 
but that meant that made the tool more complex as it had to know the 
bitmap construction.

> We can have a separate field that shows whether untagged
> packets are filtered.
>
>> The display tool will know how to display things
>> properly.
>>
>> -vlad
>
> I think you know that one of the points of sysfs
> is to allow use without tools.
> Tools are happier using other interfaces such as
> ioctl or netlink.
>

I initially though of creating a sysfs object per vlan.  That would have 
made it easy to see which vlans are configured without any tools.
But that could result in a lot of objects being created, so I abandoned it.

I did think about a text interface, but due to a page of output 
limitation, I didn't go that route.  The reason is that if someone cats 
the file, they may not see all the vlans configured.  So I decided on 
the binary interface, since a binary interface with a tool to read it 
could avoid the single page limitation.

-vlad
>
>
>>>
>>>> ---
>>>>   include/linux/if_bridge.h |    1 +
>>>>   net/bridge/br_if.c        |   34 ++++++++++++++++++++++++++++++++++
>>>>   net/bridge/br_private.h   |    2 ++
>>>>   net/bridge/br_sysfs_if.c  |   28 ++++++++++++++++++++++++++++
>>>>   4 files changed, 65 insertions(+), 0 deletions(-)
>>>>
>>>> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
>>>> index ab750dd..d0f869b 100644
>>>> --- a/include/linux/if_bridge.h
>>>> +++ b/include/linux/if_bridge.h
>>>> @@ -20,6 +20,7 @@
>>>>   #define SYSFS_BRIDGE_PORT_SUBDIR "brif"
>>>>   #define SYSFS_BRIDGE_PORT_ATTR	"brport"
>>>>   #define SYSFS_BRIDGE_PORT_LINK	"bridge"
>>>> +#define SYSFS_BRIDGE_PORT_VLANS "vlans"
>>>>
>>>>   #define BRCTL_VERSION 1
>>>>
>>>> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
>>>> index 90c1038..3963748 100644
>>>> --- a/net/bridge/br_if.c
>>>> +++ b/net/bridge/br_if.c
>>>> @@ -510,6 +510,40 @@ int br_del_port_vlan(struct net_bridge_port *p, unsigned long vlan)
>>>>   	return 0;
>>>>   }
>>>>
>>>> +size_t br_port_fill_vlans(struct net_bridge_port *p, char* buf,
>>>> +			unsigned long max, unsigned long skip)
>>>> +{
>>>> +	unsigned long *map;
>>>> +	unsigned short *vid = (unsigned short *)buf;
>>>> +	unsigned short i;
>>>> +	int num = 0;
>>>> +
>>>> +	if (skip > (VLAN_N_VID+1))
>>>> +		return -EINVAL;
>>>> +
>>>> +	memset(buf, 0, max * sizeof(unsigned short));
>>>
>>> Isn't max is in bytes? why is this safe?
>>>
>>>> +
>>>> +	rcu_read_lock();
>>>> +	map = rcu_dereference(p->vlan_map);
>>>> +	if (!map)
>>>> +		goto out;
>>>> +
>>>> +	for (i = skip + 1; i < VLAN_N_VID + 1; i++) {
>>>
>>> Isn't skip in bytes too? Why do you compare it to i which is
>>> in dwords?
>>>
>>>> +		if (test_bit(i, map)) {
>>>> +			if (num > max)
>>>> +				goto out;
>>>> +
>>>> +			*vid = i-1;
>>>> +			vid++;
>>>> +			num++;
>>>> +		}
>>>> +	}
>>>> +out:
>>>> +	rcu_read_unlock();
>>>> +
>>>> +	return num*sizeof(unsigned short);
>>>> +}
>>>> +
>>>>   void __net_exit br_net_exit(struct net *net)
>>>>   {
>>>>   	struct net_device *dev;
>>>> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
>>>> index 5639c1c..cf95cd7 100644
>>>> --- a/net/bridge/br_private.h
>>>> +++ b/net/bridge/br_private.h
>>>> @@ -404,6 +404,8 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
>>>>   	netdev_features_t features);
>>>>   extern int br_set_port_vlan(struct net_bridge_port *p, unsigned long vid);
>>>>   extern int br_del_port_vlan(struct net_bridge_port *p, unsigned long vid);
>>>> +extern size_t br_port_fill_vlans(struct net_bridge_port *p, char *buf,
>>>> +				unsigned long max, unsigned long skip);
>>>>
>>>>   /* br_input.c */
>>>>   extern int br_handle_frame_finish(struct sk_buff *skb);
>>>> diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
>>>> index 13b36bd..a81e2ef 100644
>>>> --- a/net/bridge/br_sysfs_if.c
>>>> +++ b/net/bridge/br_sysfs_if.c
>>>> @@ -234,6 +234,29 @@ const struct sysfs_ops brport_sysfs_ops = {
>>>>   };
>>>>
>>>>   /*
>>>> + * Export the vlan table for a given port as a binary file.
>>>> + * The records are unsgined shorts.
>>>> + *
>>>> + * Returns the number of bytes read.
>>>> + */
>>>> +static ssize_t brport_vlans_read(struct file *filp, struct kobject *kobj,
>>>> +				struct bin_attribute *bin_attr,
>>>> +				char *buf, loff_t off, size_t count)
>>>> +{
>>>> +	struct net_bridge_port *p = to_brport(kobj);
>>>> +
>>>> +	return br_port_fill_vlans(p, buf,
>>>> +				count/sizeof(unsigned short),
>>>> +				off/sizeof(unsigned short));
>>>> +}
>>>> +
>>>> +static struct bin_attribute port_vlans = {
>>>> +	.attr = { .name = SYSFS_BRIDGE_PORT_VLANS,
>>>> +		  .mode = S_IRUGO, },
>>>> +	.read = brport_vlans_read,
>>>> +};
>>>> +
>>>> +/*
>>>>    * Add sysfs entries to ethernet device added to a bridge.
>>>>    * Creates a brport subdirectory with bridge attributes.
>>>>    * Puts symlink in bridge's brif subdirectory
>>>> @@ -255,6 +278,11 @@ int br_sysfs_addif(struct net_bridge_port *p)
>>>>   			return err;
>>>>   	}
>>>>
>>>> +	err = sysfs_create_bin_file(&p->kobj, &port_vlans);
>>>> +	if (err) {
>>>> +		return err;
>>>> +	}
>>>> +
>>>>   	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
>>>>   	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
>>>>   }
>>>> --
>>>> 1.7.7.6
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 0/5] Add basic VLAN support to bridges
From: Michael S. Tsirkin @ 2012-08-30 14:34 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <503F6C8D.9000402@redhat.com>

On Thu, Aug 30, 2012 at 09:37:17AM -0400, Vlad Yasevich wrote:
> On 08/30/2012 08:37 AM, Michael S. Tsirkin wrote:
> >On Thu, Aug 23, 2012 at 03:29:50PM -0400, Vlad Yasevich wrote:
> >>This series of patches provides an ability to add VLAN IDs to the bridge
> >>ports.  This is similar to what can be found in most switches.  The bridge
> >>port may have any number of VLANs added to it including vlan 0 for untagged
> >>traffic.  When vlans are added to the port, only traffic tagged with particular
> >>vlan will forwarded over this port.  Additionally, vlan ids are added to FDB
> >>entries and become part of the lookup.  This way we correctly identify the FDB
> >>entry.
> >>
> >>There are still pieces missing.  I don't yet support adding a static fdb entry
> >>with a particular vlan.  There is no netlink support for carrying a vlan id.
> >>
> >>I'd like to hear thoughts of whether this is usufull and something we should
> >>persue.
> >>
> >>The default behavior ofthe bridge is unchanged if no vlans have been
> >>configured.
> >
> >Overall the feature looks good, I can think of some uses
> >for it - for example, it could become useful for VMs if
> >we add support to tap essentially stripping tags in Xmit but maybe you
> >could be more explicit about what you have in mind?
> >Do you plan to add tap support as well?
> 
> Yes,  this is something I've thought of.  Not sure if it would be at tap
> or bridge itself.  Need to work out where best to do it.

It's certainly much easier to do in tap.
A 20 line patch should do it.
Does stripping tags seem like something bridge should do?

> >Also - what tool support do you plan?
> 
> the patchset includes brctl to configure, but that seems to be
> getting deprecated.  I am working on iproute2 to add capability to
> configure this.
> 
> >
> >I also found some coding style issues and some bugs in
> >the patchset. Sent on list.
> 
> Thanks
> -vlad
> 
> >
> >Hope this helps.
> >

^ permalink raw reply

* Re: [RFC PATCH bridge 2/5] bridge: Add vlan to unicast fdb entries
From: Michael S. Tsirkin @ 2012-08-30 14:33 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-3-git-send-email-vyasevic@redhat.com>

On Thu, Aug 23, 2012 at 03:29:52PM -0400, Vlad Yasevich wrote:
> This patch adds vlan to unicast fdb entries that are created for
> learned addresses (not the manually configured ones).  It adds
> vlan id into the hash mix and uses vlan as an addditional parameter
> for an entry match.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>

I wonder whether this conflicts with Eric's
05423b241311c9380b7280179295bac7794281b6
which allowed null vid.

Specifically this calls vlan_tx_tag_get without
checking vlan_tx_tag_present. Why is this OK?

> ---
>  include/linux/if_bridge.h |    1 +
>  net/bridge/br_device.c    |    2 +-
>  net/bridge/br_fdb.c       |   71 ++++++++++++++++++++++++++------------------
>  net/bridge/br_input.c     |   14 ++++++---
>  net/bridge/br_private.h   |    7 +++-
>  5 files changed, 58 insertions(+), 37 deletions(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index dd3f201..288ff10 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -95,6 +95,7 @@ struct __fdb_entry {
>  	__u8 port_hi;
>  	__u8 pad0;
>  	__u16 unused;
> +#define fdb_vid unused
>  };
>  
>  #ifdef __KERNEL__
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 3334845..e321b9d 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -66,7 +66,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>  			br_multicast_deliver(mdst, skb);
>  		else
>  			br_flood_deliver(br, skb);
> -	} else if ((dst = __br_fdb_get(br, dest)) != NULL)
> +	} else if ((dst = __br_fdb_get(br, dest, vlan_tx_tag_get(skb))) != NULL)
>  		br_deliver(dst->dst, skb);
>  	else
>  		br_flood_deliver(br, skb);
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index d21f323..478ca1d 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -23,6 +23,7 @@
>  #include <linux/slab.h>
>  #include <linux/atomic.h>
>  #include <asm/unaligned.h>
> +#include <linux/if_vlan.h>
>  #include "br_private.h"
>  
>  static struct kmem_cache *br_fdb_cache __read_mostly;
> @@ -67,11 +68,12 @@ static inline int has_expired(const struct net_bridge *br,
>  		time_before_eq(fdb->updated + hold_time(br), jiffies);
>  }
>  
> -static inline int br_mac_hash(const unsigned char *mac)
> +static inline int br_mac_hash(const unsigned char *mac, __u16 vlan_tci)
>  {
> -	/* use 1 byte of OUI cnd 3 bytes of NIC */
> +	/* use 1 byte of OUI and 3 bytes of NIC */
>  	u32 key = get_unaligned((u32 *)(mac + 2));
> -	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
> +	return jhash_2words(key, (vlan_tci & VLAN_VID_MASK),
> +				fdb_salt) & (BR_HASH_SIZE - 1);
>  }
>  
>  static void fdb_rcu_free(struct rcu_head *head)
> @@ -132,7 +134,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
>  	struct net_bridge_fdb_entry *f;
>  
>  	/* If old entry was unassociated with any port, then delete it. */
> -	f = __br_fdb_get(br, br->dev->dev_addr);
> +	f = __br_fdb_get(br, br->dev->dev_addr, 0);
>  	if (f && f->is_local && !f->dst)
>  		fdb_delete(br, f);
>  
> @@ -231,13 +233,16 @@ void br_fdb_delete_by_port(struct net_bridge *br,
>  
>  /* No locking or refcounting, assumes caller has rcu_read_lock */
>  struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
> -					  const unsigned char *addr)
> +					  const unsigned char *addr,
> +					  __u16 vlan_tci)
>  {
>  	struct hlist_node *h;
>  	struct net_bridge_fdb_entry *fdb;
>  
> -	hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
> -		if (ether_addr_equal(fdb->addr.addr, addr)) {
> +	hlist_for_each_entry_rcu(fdb, h,
> +				&br->hash[br_mac_hash(addr, vlan_tci)], hlist) {
> +		if (ether_addr_equal(fdb->addr.addr, addr) &&
> +		    fdb->vlan_id == (vlan_tci & VLAN_VID_MASK) ) {
>  			if (unlikely(has_expired(br, fdb)))
>  				break;
>  			return fdb;
> @@ -261,7 +266,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
>  	if (!port)
>  		ret = 0;
>  	else {
> -		fdb = __br_fdb_get(port->br, addr);
> +		fdb = __br_fdb_get(port->br, addr, 0);
>  		ret = fdb && fdb->dst && fdb->dst->dev != dev &&
>  			fdb->dst->state == BR_STATE_FORWARDING;
>  	}
> @@ -313,6 +318,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>  			fe->is_local = f->is_local;
>  			if (!f->is_static)
>  				fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
> +			fe->fdb_vid = f->vlan_id;
>  			++fe;
>  			++num;
>  		}
> @@ -325,26 +331,30 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>  }
>  
>  static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
> -					     const unsigned char *addr)
> +					     const unsigned char *addr,
> +					     __u16 vlan_tci)
>  {
>  	struct hlist_node *h;
>  	struct net_bridge_fdb_entry *fdb;
>  
>  	hlist_for_each_entry(fdb, h, head, hlist) {
> -		if (ether_addr_equal(fdb->addr.addr, addr))
> +		if (ether_addr_equal(fdb->addr.addr, addr) &&
> +		    fdb->vlan_id == (vlan_tci & VLAN_VID_MASK))
>  			return fdb;
>  	}
>  	return NULL;
>  }
>  
>  static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
> -						 const unsigned char *addr)
> +						 const unsigned char *addr,
> +						 __u16 vlan_tci)
>  {
>  	struct hlist_node *h;
>  	struct net_bridge_fdb_entry *fdb;
>  
>  	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
> -		if (ether_addr_equal(fdb->addr.addr, addr))
> +		if (ether_addr_equal(fdb->addr.addr, addr) &&
> +		    fdb->vlan_id == (vlan_tci & VLAN_VID_MASK))
>  			return fdb;
>  	}
>  	return NULL;
> @@ -352,7 +362,8 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
>  
>  static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>  					       struct net_bridge_port *source,
> -					       const unsigned char *addr)
> +					       const unsigned char *addr,
> +					       __u16 vlan_tci)
>  {
>  	struct net_bridge_fdb_entry *fdb;
>  
> @@ -360,6 +371,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>  	if (fdb) {
>  		memcpy(fdb->addr.addr, addr, ETH_ALEN);
>  		fdb->dst = source;
> +		fdb->vlan_id = (vlan_tci & VLAN_VID_MASK);
>  		fdb->is_local = 0;
>  		fdb->is_static = 0;
>  		fdb->updated = fdb->used = jiffies;
> @@ -371,13 +383,13 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
>  static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>  		  const unsigned char *addr)
>  {
> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
>  	struct net_bridge_fdb_entry *fdb;
>  
>  	if (!is_valid_ether_addr(addr))
>  		return -EINVAL;
>  
> -	fdb = fdb_find(head, addr);
> +	fdb = fdb_find(head, addr, 0);
>  	if (fdb) {
>  		/* it is okay to have multiple ports with same
>  		 * address, just use the first one.
> @@ -390,7 +402,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>  		fdb_delete(br, fdb);
>  	}
>  
> -	fdb = fdb_create(head, source, addr);
> +	fdb = fdb_create(head, source, addr, 0);
>  	if (!fdb)
>  		return -ENOMEM;
>  
> @@ -412,9 +424,9 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
>  }
>  
>  void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
> -		   const unsigned char *addr)
> +		   const unsigned char *addr, __u16 vlan_tci)
>  {
> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan_tci)];
>  	struct net_bridge_fdb_entry *fdb;
>  
>  	/* some users want to always flood. */
> @@ -426,7 +438,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>  	      source->state == BR_STATE_FORWARDING))
>  		return;
>  
> -	fdb = fdb_find_rcu(head, addr);
> +	fdb = fdb_find_rcu(head, addr, vlan_tci);
>  	if (likely(fdb)) {
>  		/* attempt to update an entry for a local interface */
>  		if (unlikely(fdb->is_local)) {
> @@ -441,8 +453,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
>  		}
>  	} else {
>  		spin_lock(&br->hash_lock);
> -		if (likely(!fdb_find(head, addr))) {
> -			fdb = fdb_create(head, source, addr);
> +		if (likely(!fdb_find(head, addr, vlan_tci))) {
> +			fdb = fdb_create(head, source, addr, vlan_tci);
>  			if (fdb)
>  				fdb_notify(br, fdb, RTM_NEWNEIGH);
>  		}
> @@ -571,18 +583,18 @@ out:
>  
>  /* Update (create or replace) forwarding database entry */
>  static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
> -			 __u16 state, __u16 flags)
> +			 __u16 state, __u16 flags, __u16 vlan_tci)
>  {
>  	struct net_bridge *br = source->br;
> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan_tci)];
>  	struct net_bridge_fdb_entry *fdb;
>  
> -	fdb = fdb_find(head, addr);
> +	fdb = fdb_find(head, addr, vlan_tci);
>  	if (fdb == NULL) {
>  		if (!(flags & NLM_F_CREATE))
>  			return -ENOENT;
>  
> -		fdb = fdb_create(head, source, addr);
> +		fdb = fdb_create(head, source, addr, vlan_tci);
>  		if (!fdb)
>  			return -ENOMEM;
>  		fdb_notify(br, fdb, RTM_NEWNEIGH);
> @@ -628,11 +640,12 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
>  
>  	if (ndm->ndm_flags & NTF_USE) {
>  		rcu_read_lock();
> -		br_fdb_update(p->br, p, addr);
> +		br_fdb_update(p->br, p, addr, 0);
>  		rcu_read_unlock();
>  	} else {
>  		spin_lock_bh(&p->br->hash_lock);
> -		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags);
> +		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags,
> +				0);
>  		spin_unlock_bh(&p->br->hash_lock);
>  	}
>  
> @@ -642,10 +655,10 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
>  static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
>  {
>  	struct net_bridge *br = p->br;
> -	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
> +	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
>  	struct net_bridge_fdb_entry *fdb;
>  
> -	fdb = fdb_find(head, addr);
> +	fdb = fdb_find(head, addr, 0);
>  	if (!fdb)
>  		return -ENOENT;
>  
> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
> index b7ca3b3..a4579ee 100644
> --- a/net/bridge/br_input.c
> +++ b/net/bridge/br_input.c
> @@ -62,12 +62,15 @@ int br_handle_frame_finish(struct sk_buff *skb)
>  	 * only traffic matching the VLAN filter.
>  	 */
>  	vlan_map = rcu_dereference(p->vlan_map);
> -	if (vlan_map && !test_bit((skb->vlan_tci & VLAN_VID_MASK), vlan_map))
> -		goto drop;
> +	if (vlan_map && vlan_tx_tag_present(skb)) {
> +		unsigned short vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
> +		if (!test_bit(vid+1, vlan_map))
> +			goto drop;
> +	}
>  
>  	/* insert into forwarding database after filtering to avoid spoofing */
>  	br = p->br;
> -	br_fdb_update(br, p, eth_hdr(skb)->h_source);
> +	br_fdb_update(br, p, eth_hdr(skb)->h_source, vlan_tx_tag_get(skb));
>  
>  	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
>  	    br_multicast_rcv(br, p, skb))
> @@ -102,7 +105,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
>  			skb2 = skb;
>  
>  		br->dev->stats.multicast++;
> -	} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
> +	} else if ((dst = __br_fdb_get(br, dest, vlan_tx_tag_get(skb))) &&
> +			dst->is_local) {
>  		skb2 = skb;
>  		/* Do not forward the packet since it's local. */
>  		skb = NULL;
> @@ -131,7 +135,7 @@ static int br_handle_local_finish(struct sk_buff *skb)
>  {
>  	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
>  
> -	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
> +	br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vlan_tx_tag_get(skb));
>  	return 0;	 /* process further */
>  }
>  
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 8da90e8..921b927 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -72,6 +72,7 @@ struct net_bridge_fdb_entry
>  	unsigned long			updated;
>  	unsigned long			used;
>  	mac_addr			addr;
> +	__u16				vlan_id;
>  	unsigned char			is_local;
>  	unsigned char			is_static;
>  };
> @@ -352,7 +353,8 @@ extern void br_fdb_cleanup(unsigned long arg);
>  extern void br_fdb_delete_by_port(struct net_bridge *br,
>  				  const struct net_bridge_port *p, int do_all);
>  extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
> -						 const unsigned char *addr);
> +						 const unsigned char *addr,
> +						 __u16 vlan_tci);
>  extern int br_fdb_test_addr(struct net_device *dev, unsigned char *addr);
>  extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
>  			  unsigned long count, unsigned long off);
> @@ -361,7 +363,8 @@ extern int br_fdb_insert(struct net_bridge *br,
>  			 const unsigned char *addr);
>  extern void br_fdb_update(struct net_bridge *br,
>  			  struct net_bridge_port *source,
> -			  const unsigned char *addr);
> +			  const unsigned char *addr,
> +			  __u16 vlan_tci);
>  
>  extern int br_fdb_delete(struct ndmsg *ndm,
>  			 struct net_device *dev,
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [RFC PATCH] ipv6: fix handling of blackhole and prohibit routes
From: Nicolas Dichtel @ 2012-08-30 14:29 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 116 bytes --]

Hi,

enclosed is a patch to fix addition of blackhole and prohibit routes.

Comments are welcome.

Regards,
Nicolas

[-- Attachment #2: 0001-ipv6-fix-handling-of-blackhole-and-prohibit-routes.patch --]
[-- Type: text/x-patch, Size: 3867 bytes --]

>From 0131261ac3947631b96036ffafb30ee2e95604f2 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 30 Aug 2012 07:07:30 -0400
Subject: [PATCH] ipv6: fix handling of blackhole and prohibit routes

When adding a blackhole or a prohibit route, they were handling like classic
routes. Moreover, it was only possible to add this kind of routes by specifying
an interface.

Bug already reported here:
  http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=498498

Before the patch:
  $ ip route add blackhole 2001::1/128
  RTNETLINK answers: No such device
  $ ip route add blackhole 2001::1/128 dev eth0
  $ ip -6 route | grep 2001
  2001::1 dev eth0  metric 1024

After:
  $ ip route add blackhole 2001::1/128
  $ ip -6 route | grep 2001
  blackhole 2001::1 dev lo  metric 1024  error -22

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 include/linux/route.h |    2 ++
 net/ipv6/route.c      |   27 ++++++++++++++++++++++-----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/include/linux/route.h b/include/linux/route.h
index 6600708..166fb68 100644
--- a/include/linux/route.h
+++ b/include/linux/route.h
@@ -58,6 +58,8 @@ struct rtentry {
 #define RTF_WINDOW	0x0080		/* per route window clamping	*/
 #define RTF_IRTT	0x0100		/* Initial round trip time	*/
 #define RTF_REJECT	0x0200		/* Reject route			*/
+#define RTF_BLACKHOLE	0x0400		/* Blackhole route		*/
+#define RTF_PROHIBIT	0x0800		/* Prohibit route		*/
 
 /*
  *	<linux/ipv6_route.h> uses RTF values >= 64k
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8e80fd2..69369b0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -251,7 +251,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
 		.input		= ip6_pkt_prohibit,
 		.output		= ip6_pkt_prohibit_out,
 	},
-	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP | RTF_PROHIBIT),
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
@@ -266,7 +266,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 		.input		= dst_discard,
 		.output		= dst_discard,
 	},
-	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP | RTF_BLACKHOLE),
 	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
@@ -1463,8 +1463,15 @@ int ip6_route_add(struct fib6_config *cfg)
 		}
 		rt->dst.output = ip6_pkt_discard_out;
 		rt->dst.input = ip6_pkt_discard;
-		rt->dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+		if (cfg->fc_flags & RTF_BLACKHOLE) {
+			rt->dst.error = -EINVAL;
+			rt->rt6i_flags |= RTF_BLACKHOLE;
+		} else if (cfg->fc_flags & RTF_PROHIBIT) {
+			rt->dst.error = -EACCES;
+			rt->rt6i_flags |= RTF_PROHIBIT;
+		} else
+			rt->dst.error = -ENETUNREACH;
 		goto install_route;
 	}
 
@@ -2264,6 +2271,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (rtm->rtm_type == RTN_UNREACHABLE)
 		cfg->fc_flags |= RTF_REJECT;
+	if (rtm->rtm_type == RTN_BLACKHOLE)
+		cfg->fc_flags |= RTF_REJECT | RTF_BLACKHOLE;
+	if (rtm->rtm_type == RTN_PROHIBIT)
+		cfg->fc_flags |= RTF_REJECT | RTF_PROHIBIT;
 
 	if (rtm->rtm_type == RTN_LOCAL)
 		cfg->fc_flags |= RTF_LOCAL;
@@ -2391,8 +2402,14 @@ static int rt6_fill_node(struct net *net,
 	rtm->rtm_table = table;
 	if (nla_put_u32(skb, RTA_TABLE, table))
 		goto nla_put_failure;
-	if (rt->rt6i_flags & RTF_REJECT)
-		rtm->rtm_type = RTN_UNREACHABLE;
+	if (rt->rt6i_flags & RTF_REJECT) {
+		if (rt->rt6i_flags & RTF_BLACKHOLE)
+			rtm->rtm_type = RTN_BLACKHOLE;
+		else if (rt->rt6i_flags & RTF_PROHIBIT)
+			rtm->rtm_type = RTN_PROHIBIT;
+		else
+			rtm->rtm_type = RTN_UNREACHABLE;
+	}
 	else if (rt->rt6i_flags & RTF_LOCAL)
 		rtm->rtm_type = RTN_LOCAL;
 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
-- 
1.7.10.4


^ permalink raw reply related

* Re: [RFC PATCH bridge 5/5] bridge: Add sysfs interface to display VLANS
From: Michael S. Tsirkin @ 2012-08-30 14:26 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <503F731C.1070208@redhat.com>

On Thu, Aug 30, 2012 at 10:05:16AM -0400, Vlad Yasevich wrote:
> On 08/30/2012 08:27 AM, Michael S. Tsirkin wrote:
> >On Thu, Aug 23, 2012 at 03:29:55PM -0400, Vlad Yasevich wrote:
> >>Add a binary sysfs file that will dump out vlans currently configured on the
> >>port.
> >>
> >>Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> >
> >So what's the format here? I could not tell.
> >List of vlans? Why binary? Why not make it text in that case?
> >This would also allow reporting "all" if filtering
> >is disabled and "untagged" for untagged packets.
> 
> I decided to do binary because text may result in more then page
> worth of data.

Well yes, you need 3 bytes to print a vid and 1 extra for a space.  But
in binary, 2 bytes per VID and there are 4K valid VIDs so binary needs
more than 1 page too?

If using binary I would go for bit per valid bit
this solves it nicely.
We can have a separate field that shows whether untagged
packets are filtered.

> The display tool will know how to display things
> properly.
> 
> -vlad

I think you know that one of the points of sysfs
is to allow use without tools.
Tools are happier using other interfaces such as
ioctl or netlink.



> >
> >>---
> >>  include/linux/if_bridge.h |    1 +
> >>  net/bridge/br_if.c        |   34 ++++++++++++++++++++++++++++++++++
> >>  net/bridge/br_private.h   |    2 ++
> >>  net/bridge/br_sysfs_if.c  |   28 ++++++++++++++++++++++++++++
> >>  4 files changed, 65 insertions(+), 0 deletions(-)
> >>
> >>diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> >>index ab750dd..d0f869b 100644
> >>--- a/include/linux/if_bridge.h
> >>+++ b/include/linux/if_bridge.h
> >>@@ -20,6 +20,7 @@
> >>  #define SYSFS_BRIDGE_PORT_SUBDIR "brif"
> >>  #define SYSFS_BRIDGE_PORT_ATTR	"brport"
> >>  #define SYSFS_BRIDGE_PORT_LINK	"bridge"
> >>+#define SYSFS_BRIDGE_PORT_VLANS "vlans"
> >>
> >>  #define BRCTL_VERSION 1
> >>
> >>diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> >>index 90c1038..3963748 100644
> >>--- a/net/bridge/br_if.c
> >>+++ b/net/bridge/br_if.c
> >>@@ -510,6 +510,40 @@ int br_del_port_vlan(struct net_bridge_port *p, unsigned long vlan)
> >>  	return 0;
> >>  }
> >>
> >>+size_t br_port_fill_vlans(struct net_bridge_port *p, char* buf,
> >>+			unsigned long max, unsigned long skip)
> >>+{
> >>+	unsigned long *map;
> >>+	unsigned short *vid = (unsigned short *)buf;
> >>+	unsigned short i;
> >>+	int num = 0;
> >>+
> >>+	if (skip > (VLAN_N_VID+1))
> >>+		return -EINVAL;
> >>+
> >>+	memset(buf, 0, max * sizeof(unsigned short));
> >
> >Isn't max is in bytes? why is this safe?
> >
> >>+
> >>+	rcu_read_lock();
> >>+	map = rcu_dereference(p->vlan_map);
> >>+	if (!map)
> >>+		goto out;
> >>+
> >>+	for (i = skip + 1; i < VLAN_N_VID + 1; i++) {
> >
> >Isn't skip in bytes too? Why do you compare it to i which is
> >in dwords?
> >
> >>+		if (test_bit(i, map)) {
> >>+			if (num > max)
> >>+				goto out;
> >>+
> >>+			*vid = i-1;
> >>+			vid++;
> >>+			num++;
> >>+		}
> >>+	}
> >>+out:
> >>+	rcu_read_unlock();
> >>+
> >>+	return num*sizeof(unsigned short);
> >>+}
> >>+
> >>  void __net_exit br_net_exit(struct net *net)
> >>  {
> >>  	struct net_device *dev;
> >>diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> >>index 5639c1c..cf95cd7 100644
> >>--- a/net/bridge/br_private.h
> >>+++ b/net/bridge/br_private.h
> >>@@ -404,6 +404,8 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
> >>  	netdev_features_t features);
> >>  extern int br_set_port_vlan(struct net_bridge_port *p, unsigned long vid);
> >>  extern int br_del_port_vlan(struct net_bridge_port *p, unsigned long vid);
> >>+extern size_t br_port_fill_vlans(struct net_bridge_port *p, char *buf,
> >>+				unsigned long max, unsigned long skip);
> >>
> >>  /* br_input.c */
> >>  extern int br_handle_frame_finish(struct sk_buff *skb);
> >>diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
> >>index 13b36bd..a81e2ef 100644
> >>--- a/net/bridge/br_sysfs_if.c
> >>+++ b/net/bridge/br_sysfs_if.c
> >>@@ -234,6 +234,29 @@ const struct sysfs_ops brport_sysfs_ops = {
> >>  };
> >>
> >>  /*
> >>+ * Export the vlan table for a given port as a binary file.
> >>+ * The records are unsgined shorts.
> >>+ *
> >>+ * Returns the number of bytes read.
> >>+ */
> >>+static ssize_t brport_vlans_read(struct file *filp, struct kobject *kobj,
> >>+				struct bin_attribute *bin_attr,
> >>+				char *buf, loff_t off, size_t count)
> >>+{
> >>+	struct net_bridge_port *p = to_brport(kobj);
> >>+
> >>+	return br_port_fill_vlans(p, buf,
> >>+				count/sizeof(unsigned short),
> >>+				off/sizeof(unsigned short));
> >>+}
> >>+
> >>+static struct bin_attribute port_vlans = {
> >>+	.attr = { .name = SYSFS_BRIDGE_PORT_VLANS,
> >>+		  .mode = S_IRUGO, },
> >>+	.read = brport_vlans_read,
> >>+};
> >>+
> >>+/*
> >>   * Add sysfs entries to ethernet device added to a bridge.
> >>   * Creates a brport subdirectory with bridge attributes.
> >>   * Puts symlink in bridge's brif subdirectory
> >>@@ -255,6 +278,11 @@ int br_sysfs_addif(struct net_bridge_port *p)
> >>  			return err;
> >>  	}
> >>
> >>+	err = sysfs_create_bin_file(&p->kobj, &port_vlans);
> >>+	if (err) {
> >>+		return err;
> >>+	}
> >>+
> >>  	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
> >>  	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
> >>  }
> >>--
> >>1.7.7.6
> >>
> >>--
> >>To unsubscribe from this list: send the line "unsubscribe netdev" in
> >>the body of a message to majordomo@vger.kernel.org
> >>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 5/5] bridge: Add sysfs interface to display VLANS
From: Vlad Yasevich @ 2012-08-30 14:05 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev
In-Reply-To: <20120830122724.GC20228@redhat.com>

On 08/30/2012 08:27 AM, Michael S. Tsirkin wrote:
> On Thu, Aug 23, 2012 at 03:29:55PM -0400, Vlad Yasevich wrote:
>> Add a binary sysfs file that will dump out vlans currently configured on the
>> port.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>
> So what's the format here? I could not tell.
> List of vlans? Why binary? Why not make it text in that case?
> This would also allow reporting "all" if filtering
> is disabled and "untagged" for untagged packets.

I decided to do binary because text may result in more then page worth 
of data.  The display tool will know how to display things properly.

-vlad

>
>> ---
>>   include/linux/if_bridge.h |    1 +
>>   net/bridge/br_if.c        |   34 ++++++++++++++++++++++++++++++++++
>>   net/bridge/br_private.h   |    2 ++
>>   net/bridge/br_sysfs_if.c  |   28 ++++++++++++++++++++++++++++
>>   4 files changed, 65 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
>> index ab750dd..d0f869b 100644
>> --- a/include/linux/if_bridge.h
>> +++ b/include/linux/if_bridge.h
>> @@ -20,6 +20,7 @@
>>   #define SYSFS_BRIDGE_PORT_SUBDIR "brif"
>>   #define SYSFS_BRIDGE_PORT_ATTR	"brport"
>>   #define SYSFS_BRIDGE_PORT_LINK	"bridge"
>> +#define SYSFS_BRIDGE_PORT_VLANS "vlans"
>>
>>   #define BRCTL_VERSION 1
>>
>> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
>> index 90c1038..3963748 100644
>> --- a/net/bridge/br_if.c
>> +++ b/net/bridge/br_if.c
>> @@ -510,6 +510,40 @@ int br_del_port_vlan(struct net_bridge_port *p, unsigned long vlan)
>>   	return 0;
>>   }
>>
>> +size_t br_port_fill_vlans(struct net_bridge_port *p, char* buf,
>> +			unsigned long max, unsigned long skip)
>> +{
>> +	unsigned long *map;
>> +	unsigned short *vid = (unsigned short *)buf;
>> +	unsigned short i;
>> +	int num = 0;
>> +
>> +	if (skip > (VLAN_N_VID+1))
>> +		return -EINVAL;
>> +
>> +	memset(buf, 0, max * sizeof(unsigned short));
>
> Isn't max is in bytes? why is this safe?
>
>> +
>> +	rcu_read_lock();
>> +	map = rcu_dereference(p->vlan_map);
>> +	if (!map)
>> +		goto out;
>> +
>> +	for (i = skip + 1; i < VLAN_N_VID + 1; i++) {
>
> Isn't skip in bytes too? Why do you compare it to i which is
> in dwords?
>
>> +		if (test_bit(i, map)) {
>> +			if (num > max)
>> +				goto out;
>> +
>> +			*vid = i-1;
>> +			vid++;
>> +			num++;
>> +		}
>> +	}
>> +out:
>> +	rcu_read_unlock();
>> +
>> +	return num*sizeof(unsigned short);
>> +}
>> +
>>   void __net_exit br_net_exit(struct net *net)
>>   {
>>   	struct net_device *dev;
>> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
>> index 5639c1c..cf95cd7 100644
>> --- a/net/bridge/br_private.h
>> +++ b/net/bridge/br_private.h
>> @@ -404,6 +404,8 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
>>   	netdev_features_t features);
>>   extern int br_set_port_vlan(struct net_bridge_port *p, unsigned long vid);
>>   extern int br_del_port_vlan(struct net_bridge_port *p, unsigned long vid);
>> +extern size_t br_port_fill_vlans(struct net_bridge_port *p, char *buf,
>> +				unsigned long max, unsigned long skip);
>>
>>   /* br_input.c */
>>   extern int br_handle_frame_finish(struct sk_buff *skb);
>> diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
>> index 13b36bd..a81e2ef 100644
>> --- a/net/bridge/br_sysfs_if.c
>> +++ b/net/bridge/br_sysfs_if.c
>> @@ -234,6 +234,29 @@ const struct sysfs_ops brport_sysfs_ops = {
>>   };
>>
>>   /*
>> + * Export the vlan table for a given port as a binary file.
>> + * The records are unsgined shorts.
>> + *
>> + * Returns the number of bytes read.
>> + */
>> +static ssize_t brport_vlans_read(struct file *filp, struct kobject *kobj,
>> +				struct bin_attribute *bin_attr,
>> +				char *buf, loff_t off, size_t count)
>> +{
>> +	struct net_bridge_port *p = to_brport(kobj);
>> +
>> +	return br_port_fill_vlans(p, buf,
>> +				count/sizeof(unsigned short),
>> +				off/sizeof(unsigned short));
>> +}
>> +
>> +static struct bin_attribute port_vlans = {
>> +	.attr = { .name = SYSFS_BRIDGE_PORT_VLANS,
>> +		  .mode = S_IRUGO, },
>> +	.read = brport_vlans_read,
>> +};
>> +
>> +/*
>>    * Add sysfs entries to ethernet device added to a bridge.
>>    * Creates a brport subdirectory with bridge attributes.
>>    * Puts symlink in bridge's brif subdirectory
>> @@ -255,6 +278,11 @@ int br_sysfs_addif(struct net_bridge_port *p)
>>   			return err;
>>   	}
>>
>> +	err = sysfs_create_bin_file(&p->kobj, &port_vlans);
>> +	if (err) {
>> +		return err;
>> +	}
>> +
>>   	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
>>   	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
>>   }
>> --
>> 1.7.7.6
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 0/5] Add basic VLAN support to bridges
From: Vlad Yasevich @ 2012-08-30 13:37 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev
In-Reply-To: <20120830123724.GE20228@redhat.com>

On 08/30/2012 08:37 AM, Michael S. Tsirkin wrote:
> On Thu, Aug 23, 2012 at 03:29:50PM -0400, Vlad Yasevich wrote:
>> This series of patches provides an ability to add VLAN IDs to the bridge
>> ports.  This is similar to what can be found in most switches.  The bridge
>> port may have any number of VLANs added to it including vlan 0 for untagged
>> traffic.  When vlans are added to the port, only traffic tagged with particular
>> vlan will forwarded over this port.  Additionally, vlan ids are added to FDB
>> entries and become part of the lookup.  This way we correctly identify the FDB
>> entry.
>>
>> There are still pieces missing.  I don't yet support adding a static fdb entry
>> with a particular vlan.  There is no netlink support for carrying a vlan id.
>>
>> I'd like to hear thoughts of whether this is usufull and something we should
>> persue.
>>
>> The default behavior ofthe bridge is unchanged if no vlans have been
>> configured.
>
> Overall the feature looks good, I can think of some uses
> for it - for example, it could become useful for VMs if
> we add support to tap essentially stripping tags in Xmit but maybe you
> could be more explicit about what you have in mind?
> Do you plan to add tap support as well?

Yes,  this is something I've thought of.  Not sure if it would be at tap
or bridge itself.  Need to work out where best to do it.

> Also - what tool support do you plan?

the patchset includes brctl to configure, but that seems to be getting 
deprecated.  I am working on iproute2 to add capability to configure this.

>
> I also found some coding style issues and some bugs in
> the patchset. Sent on list.

Thanks
-vlad

>
> Hope this helps.
>

^ permalink raw reply

* Re: [PATCH 1/1] tcp: Wrong timeout for SYN segments
From: Eric Dumazet @ 2012-08-30 13:12 UTC (permalink / raw)
  To: H.K. Jerry Chu; +Cc: Alexander Bergmann, David Miller, netdev, linux-kernel
In-Reply-To: <CAFbMe2PG90X7s6s970+XK3X0Jvzx4p6vhvM+JQCwtULPvs1QLw@mail.gmail.com>

On Wed, 2012-08-29 at 10:25 -0700, H.K. Jerry Chu wrote:

> But it probably matter slightly more for TCP Fast Open (the server
> side patch has
> been completed and will be posted soon, after I finish breaking it up
> into smaller
> pieces for ease of review purpose), when a full socket will be created with data
> passed to the app upon a valid SYN+data. Dropping a fully functioning socket
> won't be the same as dropping a request_sock unknown to the app and letting
> the other side retransmitting SYN (w/o data this time).
> 
> >
> > Sure, RFC numbers are what they are, but in practice, I doubt someone
> > will really miss the extra SYNACK sent after ~32 seconds, since it would
> > matter only for the last SYN attempted.
> 
> I'd slightly prefer 1 extra retry plus longer wait time just to make
> TCP Fast Open
> a little more robust (even though the app protocol is required to be
> idempotent).
> But this is not a showstopper.

Thats very good points indeed, thanks.

Maybe we can increase SYNACK max retrans only if the FastOpen SYN cookie
was validated.

This way, we increase reliability without amplifying the effect of wild
SYN packets.

^ permalink raw reply

* Re: [RFC PATCH bridge 3/5] bridge: Add vlan id to multicast groups
From: Eric Dumazet @ 2012-08-30 12:55 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-4-git-send-email-vyasevic@redhat.com>

On Thu, 2012-08-23 at 15:29 -0400, Vlad Yasevich wrote:
> Add vlan_id to multicasts groups so that we know which vlan each group belongs
> to and can correctly forward to appropriate vlan.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---

>  #if IS_ENABLED(CONFIG_IPV6)
>  static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
> -				const struct in6_addr *ip)
> +				const struct in6_addr *ip,
> +				__u16 vid)
>  {
> -	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
> +	u32 addr = *(__force u32 *)ip->s6_addr32;
> +	return jhash_2words(addr, vid, mdb->secret) & (mdb->max - 1);
>  }
>  #endif

It seems to me this is wrong.

Hashing only the first 32bits of the IPv6 address is not enough.

We know have ipv6_addr_hash()

^ permalink raw reply

* Re: [Qemu-devel] macvlan/macvtap: guest/host cannot communicate when network cable is unplugged
From: Stefan Hajnoczi @ 2012-08-30 12:53 UTC (permalink / raw)
  To: ching; +Cc: qemu-devel, kaber, Michael S. Tsirkin, netdev
In-Reply-To: <503F58D0.2010003@gmail.com>

On Thu, Aug 30, 2012 at 1:13 PM, ching <lsching17@gmail.com> wrote:
>
>> Can you try the same test with two macvlan interfaces on the host (no
>> macvtap)?  You may need to use the ping -I <interface-address>
>> argument to force the ping source address to a specific macvlan
>> interface.
>>
>> If you see the same problem, it may just be the macvlan design - it is
>> stacked on top of eth0 and might not work when eth0 is down.  CCing
>> macvlan/macvtap folks.
>>
>> Stefan
>>
>
> tested as below
>
> $ifconfig
>
>     eth0      Link encap:Ethernet  HWaddr f4:6d:xx:xx:xx:xx
>               inet6 addr: fe80::xx:xx:xx:xx/64 Scope:Link
>               UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
>               RX packets:86507 errors:0 dropped:0 overruns:0 frame:0
>               TX packets:55940 errors:0 dropped:0 overruns:0 carrier:0
>               collisions:0 txqueuelen:1000
>               RX bytes:126005746 (120.1 MiB)  TX bytes:4394225 (4.1 MiB)
>
>     macvtap0  Link encap:Ethernet  HWaddr 52:54:xx:xx:xx:xx
>               inet6 addr: fe80::xx:xx:xx:xx/64 Scope:Link
>               UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
>               RX packets:70 errors:0 dropped:0 overruns:0 frame:0
>               TX packets:84 errors:0 dropped:0 overruns:0 carrier:0
>               collisions:0 txqueuelen:500
>               RX bytes:9036 (8.8 KiB)  TX bytes:14734 (14.3 KiB)
>
>     znet0     Link encap:Ethernet  HWaddr 00:60:xx:xx:xx:92
>               inet addr:192.168.1.2  Bcast:192.168.1.255  Mask:255.255.255.0
>               inet6 addr: 2002:xx:xx:xx:xx/64 Scope:Global
>               inet6 addr: fe80:xx:xx:xx:xx/64 Scope:Link
>               UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
>               RX packets:4463190 errors:0 dropped:0 overruns:0 frame:0
>               TX packets:12527522 errors:0 dropped:0 overruns:0 carrier:0
>               collisions:0 txqueuelen:0
>               RX bytes:3959213697 (3.6 GiB)  TX bytes:18590336476 (17.3 GiB)
>
>    znet1     Link encap:Ethernet  HWaddr 00:60:xx:xx:xx:99
>               inet addr:192.168.1.177  Bcast:192.168.1.255  Mask:255.255.255.0
>               inet6 addr: 2002:xx:xx:xx:xx64 Scope:Global
>               inet6 addr: fe80:xx:xx:xx:xx/64 Scope:Link
>               UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
>               RX packets:8 errors:0 dropped:0 overruns:0 frame:0
>               TX packets:9 errors:0 dropped:0 overruns:0 carrier:0
>               collisions:0 txqueuelen:0
>               RX bytes:1399 (1.3 KiB)  TX bytes:1522 (1.4 KiB)
>
> $ ip -d link show
>
>     10: znet0@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT
>         link/ether 00:60:xx:xx:xx:92 brd ff:ff:ff:ff:ff:ff
>         macvlan  mode bridge
>     15: znet1@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN mode DEFAULT
>         link/ether 00:60:xx:xx:xx:99 brd ff:ff:ff:ff:ff:ff
>         macvlan  mode bridge
>
>
> the macvlan interface cannot ping each other no matter network cable is plugged or not
>
> $ ping -I 192.168.1.2 192.168.1.177
> PING 192.168.1.177 (192.168.1.177) from 192.168.1.2 : 56(84) bytes of data.
>
> --- 192.168.1.177 ping statistics ---
> 6 packets transmitted, 0 received, 100% packet loss, time 4999ms

In bridge mode I expected them to be able to communicate.

> I also perform an additional test: the guests (macvtap bridge mode) CAN communicate each other no matter network cable is plugged or not.

Strange.  I thought the original problem was that the macvtap guests
cannot communicate with each other when the network cable is
unplugged?

Hopefully someone else can help you, I'm not familiar enough with
macvlan/macvtap.

Stefan

^ permalink raw reply

* Re: [RFC PATCH bridge 0/5] Add basic VLAN support to bridges
From: Michael S. Tsirkin @ 2012-08-30 12:37 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-1-git-send-email-vyasevic@redhat.com>

On Thu, Aug 23, 2012 at 03:29:50PM -0400, Vlad Yasevich wrote:
> This series of patches provides an ability to add VLAN IDs to the bridge
> ports.  This is similar to what can be found in most switches.  The bridge
> port may have any number of VLANs added to it including vlan 0 for untagged
> traffic.  When vlans are added to the port, only traffic tagged with particular
> vlan will forwarded over this port.  Additionally, vlan ids are added to FDB
> entries and become part of the lookup.  This way we correctly identify the FDB
> entry.
> 
> There are still pieces missing.  I don't yet support adding a static fdb entry
> with a particular vlan.  There is no netlink support for carrying a vlan id.
> 
> I'd like to hear thoughts of whether this is usufull and something we should
> persue.
> 
> The default behavior ofthe bridge is unchanged if no vlans have been
> configured.

Overall the feature looks good, I can think of some uses
for it - for example, it could become useful for VMs if
we add support to tap essentially stripping tags in Xmit but maybe you
could be more explicit about what you have in mind?
Do you plan to add tap support as well?
Also - what tool support do you plan?

I also found some coding style issues and some bugs in
the patchset. Sent on list.

Hope this helps.

-- 
MST

^ permalink raw reply

* Re: [RFC PATCH bridge 3/5] bridge: Add vlan id to multicast groups
From: Michael S. Tsirkin @ 2012-08-30 12:30 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-4-git-send-email-vyasevic@redhat.com>

On Thu, Aug 23, 2012 at 03:29:53PM -0400, Vlad Yasevich wrote:
> Add vlan_id to multicasts groups so that we know which vlan each group belongs
> to and can correctly forward to appropriate vlan.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  net/bridge/br_multicast.c |   64 +++++++++++++++++++++++++++++++--------------
>  net/bridge/br_private.h   |    1 +
>  2 files changed, 45 insertions(+), 20 deletions(-)
> 
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 2417434..2976a2b 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -51,6 +51,8 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
>  {
>  	if (a->proto != b->proto)
>  		return 0;
> +	if (a->vid != b->vid)
> +		return 0;
>  	switch (a->proto) {
>  	case htons(ETH_P_IP):
>  		return a->u.ip4 == b->u.ip4;
> @@ -62,16 +64,19 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
>  	return 0;
>  }
>  
> -static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
> +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
> +				__u16 vid)
>  {
> -	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
> +	return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
>  static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
> -				const struct in6_addr *ip)
> +				const struct in6_addr *ip,
> +				__u16 vid)
>  {
> -	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
> +	u32 addr = *(__force u32 *)ip->s6_addr32;
> +	return jhash_2words(addr, vid, mdb->secret) & (mdb->max - 1);
>  }
>  #endif
>  
> @@ -80,10 +85,10 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
>  {
>  	switch (ip->proto) {
>  	case htons(ETH_P_IP):
> -		return __br_ip4_hash(mdb, ip->u.ip4);
> +		return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
>  #if IS_ENABLED(CONFIG_IPV6)
>  	case htons(ETH_P_IPV6):
> -		return __br_ip6_hash(mdb, &ip->u.ip6);
> +		return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
>  #endif
>  	}
>  	return 0;
> @@ -113,24 +118,27 @@ static struct net_bridge_mdb_entry *br_mdb_ip_get(
>  }
>  
>  static struct net_bridge_mdb_entry *br_mdb_ip4_get(
> -	struct net_bridge_mdb_htable *mdb, __be32 dst)
> +	struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vlan_tci)
>  {
>  	struct br_ip br_dst;
>  
>  	br_dst.u.ip4 = dst;
>  	br_dst.proto = htons(ETH_P_IP);
> +	br_dst.vid = (vlan_tci & VLAN_VID_MASK);

() around value not needed.
Same in all cases below, I am not repeating
this comment.

>  
>  	return br_mdb_ip_get(mdb, &br_dst);
>  }
>  
>  #if IS_ENABLED(CONFIG_IPV6)
>  static struct net_bridge_mdb_entry *br_mdb_ip6_get(
> -	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
> +	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
> +	__u16 vlan_tci)
>  {
>  	struct br_ip br_dst;
>  
>  	br_dst.u.ip6 = *dst;
>  	br_dst.proto = htons(ETH_P_IPV6);
> +	br_dst.vid = vlan_tci & VLAN_VID_MASK;
>  
>  	return br_mdb_ip_get(mdb, &br_dst);
>  }
> @@ -692,7 +700,8 @@ err:
>  
>  static int br_ip4_multicast_add_group(struct net_bridge *br,
>  				      struct net_bridge_port *port,
> -				      __be32 group)
> +				      __be32 group,
> +				      __u16 vlan_tci)
>  {
>  	struct br_ip br_group;
>  
> @@ -701,6 +710,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
>  
>  	br_group.u.ip4 = group;
>  	br_group.proto = htons(ETH_P_IP);
> +	br_group.vid = vlan_tci & VLAN_VID_MASK;
>  
>  	return br_multicast_add_group(br, port, &br_group);
>  }
> @@ -708,7 +718,8 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
>  #if IS_ENABLED(CONFIG_IPV6)
>  static int br_ip6_multicast_add_group(struct net_bridge *br,
>  				      struct net_bridge_port *port,
> -				      const struct in6_addr *group)
> +				      const struct in6_addr *group,
> +				      __u16 vlan_tci)
>  {
>  	struct br_ip br_group;
>  
> @@ -717,6 +728,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
>  
>  	br_group.u.ip6 = *group;
>  	br_group.proto = htons(ETH_P_IPV6);
> +	br_group.vid = vlan_tci & VLAN_VID_MASK;
>  
>  	return br_multicast_add_group(br, port, &br_group);
>  }
> @@ -928,7 +940,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
>  			continue;
>  		}
>  
> -		err = br_ip4_multicast_add_group(br, port, group);
> +		err = br_ip4_multicast_add_group(br, port, group,
> +						skb->vlan_tci);


Pls align continuation line at (, same as other
code in this file. Same in all cases below, I am not repeating
this comment.

>  		if (err)
>  			break;
>  	}
> @@ -988,7 +1001,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
>  			continue;
>  		}
>  
> -		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
> +		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
> +						skb->vlan_tci);
>  		if (!err)
>  			break;
>  	}
> @@ -1106,7 +1120,8 @@ static int br_ip4_multicast_query(struct net_bridge *br,
>  	if (!group)
>  		goto out;
>  
> -	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
> +	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group,
> +				skb->vlan_tci);
>  	if (!mp)
>  		goto out;
>  
> @@ -1178,7 +1193,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
>  	if (!group)
>  		goto out;
>  
> -	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
> +	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group,
> +			skb->vlan_tci);
>  	if (!mp)
>  		goto out;
>  
> @@ -1262,7 +1278,8 @@ out:
>  
>  static void br_ip4_multicast_leave_group(struct net_bridge *br,
>  					 struct net_bridge_port *port,
> -					 __be32 group)
> +					 __be32 group,
> +					 __u16 vlan_tci)
>  {
>  	struct br_ip br_group;
>  
> @@ -1271,6 +1288,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
>  
>  	br_group.u.ip4 = group;
>  	br_group.proto = htons(ETH_P_IP);
> +	br_group.vid = (vlan_tci & VLAN_VID_MASK);
>  
>  	br_multicast_leave_group(br, port, &br_group);
>  }
> @@ -1278,7 +1296,8 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
>  #if IS_ENABLED(CONFIG_IPV6)
>  static void br_ip6_multicast_leave_group(struct net_bridge *br,
>  					 struct net_bridge_port *port,
> -					 const struct in6_addr *group)
> +					 const struct in6_addr *group,
> +					 __u16 vlan_tci)
>  {
>  	struct br_ip br_group;
>  
> @@ -1287,6 +1306,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
>  
>  	br_group.u.ip6 = *group;
>  	br_group.proto = htons(ETH_P_IPV6);
> +	br_group.vid = (vlan_tci & VLAN_VID_MASK);
>  
>  	br_multicast_leave_group(br, port, &br_group);
>  }
> @@ -1369,7 +1389,8 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
>  	case IGMP_HOST_MEMBERSHIP_REPORT:
>  	case IGMPV2_HOST_MEMBERSHIP_REPORT:
>  		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
> -		err = br_ip4_multicast_add_group(br, port, ih->group);
> +		err = br_ip4_multicast_add_group(br, port, ih->group,
> +						skb2->vlan_tci);
>  		break;
>  	case IGMPV3_HOST_MEMBERSHIP_REPORT:
>  		err = br_ip4_multicast_igmp3_report(br, port, skb2);
> @@ -1378,7 +1399,8 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
>  		err = br_ip4_multicast_query(br, port, skb2);
>  		break;
>  	case IGMP_HOST_LEAVE_MESSAGE:
> -		br_ip4_multicast_leave_group(br, port, ih->group);
> +		br_ip4_multicast_leave_group(br, port, ih->group,
> +						skb2->vlan_tci);
>  		break;
>  	}
>  
> @@ -1498,7 +1520,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
>  		}
>  		mld = (struct mld_msg *)skb_transport_header(skb2);
>  		BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
> -		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
> +		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca,
> +						 skb2->vlan_tci);
>  		break;
>  	    }
>  	case ICMPV6_MLD2_REPORT:
> @@ -1515,7 +1538,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
>  			goto out;
>  		}
>  		mld = (struct mld_msg *)skb_transport_header(skb2);
> -		br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
> +		br_ip6_multicast_leave_group(br, port, &mld->mld_mca,
> +						skb2->vlan_tci);
>  	    }
>  	}
>  
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 921b927..b6c56ab 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -61,6 +61,7 @@ struct br_ip
>  #endif
>  	} u;
>  	__be16		proto;
> +	__u16		vid;
>  };
>  
>  struct net_bridge_fdb_entry
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 5/5] bridge: Add sysfs interface to display VLANS
From: Michael S. Tsirkin @ 2012-08-30 12:27 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-6-git-send-email-vyasevic@redhat.com>

On Thu, Aug 23, 2012 at 03:29:55PM -0400, Vlad Yasevich wrote:
> Add a binary sysfs file that will dump out vlans currently configured on the
> port.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>

So what's the format here? I could not tell.
List of vlans? Why binary? Why not make it text in that case?
This would also allow reporting "all" if filtering
is disabled and "untagged" for untagged packets.

> ---
>  include/linux/if_bridge.h |    1 +
>  net/bridge/br_if.c        |   34 ++++++++++++++++++++++++++++++++++
>  net/bridge/br_private.h   |    2 ++
>  net/bridge/br_sysfs_if.c  |   28 ++++++++++++++++++++++++++++
>  4 files changed, 65 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index ab750dd..d0f869b 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -20,6 +20,7 @@
>  #define SYSFS_BRIDGE_PORT_SUBDIR "brif"
>  #define SYSFS_BRIDGE_PORT_ATTR	"brport"
>  #define SYSFS_BRIDGE_PORT_LINK	"bridge"
> +#define SYSFS_BRIDGE_PORT_VLANS "vlans"
>  
>  #define BRCTL_VERSION 1
>  
> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index 90c1038..3963748 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -510,6 +510,40 @@ int br_del_port_vlan(struct net_bridge_port *p, unsigned long vlan)
>  	return 0;
>  }
>  
> +size_t br_port_fill_vlans(struct net_bridge_port *p, char* buf,
> +			unsigned long max, unsigned long skip)
> +{
> +	unsigned long *map;
> +	unsigned short *vid = (unsigned short *)buf;
> +	unsigned short i;
> +	int num = 0;
> +
> +	if (skip > (VLAN_N_VID+1))
> +		return -EINVAL;
> +
> +	memset(buf, 0, max * sizeof(unsigned short));

Isn't max is in bytes? why is this safe?

> +
> +	rcu_read_lock();
> +	map = rcu_dereference(p->vlan_map);
> +	if (!map)
> +		goto out;
> +
> +	for (i = skip + 1; i < VLAN_N_VID + 1; i++) {

Isn't skip in bytes too? Why do you compare it to i which is
in dwords?

> +		if (test_bit(i, map)) {
> +			if (num > max)
> +				goto out;
> +
> +			*vid = i-1;
> +			vid++;
> +			num++;
> +		}
> +	}
> +out:
> +	rcu_read_unlock();
> +
> +	return num*sizeof(unsigned short);
> +}
> +
>  void __net_exit br_net_exit(struct net *net)
>  {
>  	struct net_device *dev;
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 5639c1c..cf95cd7 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -404,6 +404,8 @@ extern netdev_features_t br_features_recompute(struct net_bridge *br,
>  	netdev_features_t features);
>  extern int br_set_port_vlan(struct net_bridge_port *p, unsigned long vid);
>  extern int br_del_port_vlan(struct net_bridge_port *p, unsigned long vid);
> +extern size_t br_port_fill_vlans(struct net_bridge_port *p, char *buf,
> +				unsigned long max, unsigned long skip);
>  
>  /* br_input.c */
>  extern int br_handle_frame_finish(struct sk_buff *skb);
> diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
> index 13b36bd..a81e2ef 100644
> --- a/net/bridge/br_sysfs_if.c
> +++ b/net/bridge/br_sysfs_if.c
> @@ -234,6 +234,29 @@ const struct sysfs_ops brport_sysfs_ops = {
>  };
>  
>  /*
> + * Export the vlan table for a given port as a binary file.
> + * The records are unsgined shorts.
> + *
> + * Returns the number of bytes read.
> + */
> +static ssize_t brport_vlans_read(struct file *filp, struct kobject *kobj,
> +				struct bin_attribute *bin_attr,
> +				char *buf, loff_t off, size_t count)
> +{
> +	struct net_bridge_port *p = to_brport(kobj);
> +
> +	return br_port_fill_vlans(p, buf,
> +				count/sizeof(unsigned short),
> +				off/sizeof(unsigned short));
> +}
> +
> +static struct bin_attribute port_vlans = {
> +	.attr = { .name = SYSFS_BRIDGE_PORT_VLANS,
> +		  .mode = S_IRUGO, },
> +	.read = brport_vlans_read,
> +};
> +
> +/*
>   * Add sysfs entries to ethernet device added to a bridge.
>   * Creates a brport subdirectory with bridge attributes.
>   * Puts symlink in bridge's brif subdirectory
> @@ -255,6 +278,11 @@ int br_sysfs_addif(struct net_bridge_port *p)
>  			return err;
>  	}
>  
> +	err = sysfs_create_bin_file(&p->kobj, &port_vlans);
> +	if (err) {
> +		return err;
> +	}
> +
>  	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
>  	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
>  }
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 1/5] bridge: Add vlan check to forwarding path
From: Michael S. Tsirkin @ 2012-08-30 12:19 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-2-git-send-email-vyasevic@redhat.com>

On Thu, Aug 23, 2012 at 03:29:51PM -0400, Vlad Yasevich wrote:
> When forwarding packets make sure vlan matches any configured vlan for
> the port.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  net/bridge/br_forward.c |   17 ++++++++++++++++-
>  net/bridge/br_input.c   |    8 ++++++++
>  net/bridge/br_private.h |    2 ++
>  3 files changed, 26 insertions(+), 1 deletions(-)
> 
> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> index e9466d4..ab81954 100644
> --- a/net/bridge/br_forward.c
> +++ b/net/bridge/br_forward.c
> @@ -26,11 +26,26 @@ static int deliver_clone(const struct net_bridge_port *prev,
>  			 void (*__packet_hook)(const struct net_bridge_port *p,
>  					       struct sk_buff *skb));
>  
> -/* Don't forward packets to originating port or forwarding diasabled */
> +/* check to see that the vlan is allowed to be forwarded on this interface */
> +static inline int vlan_match(const struct net_bridge_port *p,
> +			     const struct sk_buff *skb)
> +{
> +	unsigned long *vlan_map = rcu_dereference(p->vlan_map);
> +	unsigned short vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
> +
> +	/* The map keeps the vlans off by 1 so adjust for that */
> +	return (vlan_map && vlan_tx_tag_present(skb) &&
> +		test_bit(vid+1, vlan_map));

It looks better if you put spaces around +,* etc:
vid + 1 This applies to all patches and many places so I am not
noting it everywhere - could you fnd such instances and fix up?

Also, this off by 1 map logic is all over this patch,
one hopes we did not forget is somewhere.
Would be better to have a wrapper.

Also do not put return value in () please - it gets us nothing
at all. Again applies everywhere.

> +}
> +
> +/* Don't forward packets to originating port or forwarding diasabled.
> + */
>  static inline int should_deliver(const struct net_bridge_port *p,
>  				 const struct sk_buff *skb)
>  {
> +

extra empty line - no needed here.

>  	return (((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
> +		vlan_match(p, skb) &&
>  		p->state == BR_STATE_FORWARDING);
>  }
>  
> diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
> index 76f15fd..b7ca3b3 100644
> --- a/net/bridge/br_input.c
> +++ b/net/bridge/br_input.c
> @@ -53,10 +53,18 @@ int br_handle_frame_finish(struct sk_buff *skb)
>  	struct net_bridge_fdb_entry *dst;
>  	struct net_bridge_mdb_entry *mdst;
>  	struct sk_buff *skb2;
> +	unsigned long *vlan_map;
>  
>  	if (!p || p->state == BR_STATE_DISABLED)
>  		goto drop;
>  
> +	/* If VLAN filter is configured on the port, make sure we accept
> +	 * only traffic matching the VLAN filter.
> +	 */
> +	vlan_map = rcu_dereference(p->vlan_map);
> +	if (vlan_map && !test_bit((skb->vlan_tci & VLAN_VID_MASK), vlan_map))

() not needed here around parameters.

> +		goto drop;
> +
>  	/* insert into forwarding database after filtering to avoid spoofing */
>  	br = p->br;
>  	br_fdb_update(br, p, eth_hdr(skb)->h_source);
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index a768b24..8da90e8 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -152,6 +152,8 @@ struct net_bridge_port
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	struct netpoll			*np;
>  #endif
> +	unsigned long __rcu		*vlan_map;
> +

Empty line not needed.

>  };
>  
>  #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH bridge 4/5] bridge: Add private ioctls to configure vlans on bridge ports
From: Michael S. Tsirkin @ 2012-08-30 12:17 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev
In-Reply-To: <1345750195-31598-5-git-send-email-vyasevic@redhat.com>

On Thu, Aug 23, 2012 at 03:29:54PM -0400, Vlad Yasevich wrote:
> Add a private ioctl to add and remove vlan configuration on bridge port.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  include/linux/if_bridge.h |    2 +
>  net/bridge/br_if.c        |   69 +++++++++++++++++++++++++++++++++++++++++++++
>  net/bridge/br_ioctl.c     |   31 ++++++++++++++++++++
>  net/bridge/br_private.h   |    2 +
>  4 files changed, 104 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
> index 288ff10..ab750dd 100644
> --- a/include/linux/if_bridge.h
> +++ b/include/linux/if_bridge.h
> @@ -42,6 +42,8 @@
>  #define BRCTL_SET_PORT_PRIORITY 16
>  #define BRCTL_SET_PATH_COST 17
>  #define BRCTL_GET_FDB_ENTRIES 18
> +#define BRCTL_ADD_VLAN 19
> +#define BRCTL_DEL_VLAN 20
>  
>  #define BR_STATE_DISABLED 0
>  #define BR_STATE_LISTENING 1


That's not a lot of documentation especially
considering tricks such as using vlan 0
to mean untagged (an internal convention in kernel).

> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index e1144e1..90c1038 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -23,6 +23,7 @@
>  #include <linux/if_ether.h>
>  #include <linux/slab.h>
>  #include <net/sock.h>
> +#include <linux/if_vlan.h>
>  
>  #include "br_private.h"
>  
> @@ -441,6 +442,74 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
>  	return 0;
>  }
>  
> +/* Called with RTNL */
> +int br_set_port_vlan(struct net_bridge_port *p, unsigned long vlan)
> +{
> +	unsigned long table_size = (VLAN_N_VID/sizeof(unsigned long)) + 1;

Use DECLARE_BITMAP[VLAN_N_VID + 1]?

> +	unsigned long *vid_map = NULL;
> +	__u16 vid = (__u16) vlan + 1;

Don't put space after cast: (__u16)vlan.

In fact, cast is not needed here at all.
Applies to other places in this patch that do this.


> +
> +	/* We are under lock so we can check this without rcu.
> +	 * The vlan map is indexed by vid+1.  This way we can store
> +	 * vid 0 (untagged) into the map as well.
> +	 */

Would be better to put this documentation in struct
definition where vid_map is defined.

> +	if (!p->vlan_map) {
> +		vid_map = kzalloc(table_size, GFP_KERNEL);
> +		if (!vid_map)
> +			return -ENOMEM;
> +
> +		set_bit(vid, vid_map);

What prevents vid from being > table_size * BITS_PER_LONG?

> +		rcu_assign_pointer(p->vlan_map, vid_map);
> +
> +	} else {
> +		/* Map is already allocated */
> +		set_bit(vid, p->vlan_map);

This should call rcu_dereference_protected.

> +	}
> +
> +	return 0;
> +}
> +
> +
> +/* Called with RTNL */
> +int br_del_port_vlan(struct net_bridge_port *p, unsigned long vlan)
> +{
> +	unsigned long first_bit;
> +	unsigned long next_bit;
> +	__u16 vid = (__u16) vlan+1;
> +	unsigned long tbl_len = VLAN_N_VID+1;

Same as above.

> +
> +	if (!p->vlan_map)
> +		return -EINVAL;
> +
> +	if (!test_bit(vlan, p->vlan_map))
> +		return -EINVAL;
> +
> +	/* Check to see if any other vlans are in this table.  If this
> +	 * is the last vlan, delete the whole table.  If this is not the
> +	 * last vlan, just clear the bit.
> +	 */
> +	first_bit = find_first_bit(p->vlan_map, tbl_len);
> +	next_bit = find_next_bit(p->vlan_map, tbl_len, (tbl_len - vid));

() not needed around arguments.

> +
> +	if ((__u16)first_bit != vid || (__u16)next_bit < tbl_len) {

What are these type casts doing? How can you get a value > 0xffff?

> +		/* There are other vlans still configured.  We can simply
> +		 * clear our bit and be safe.
> +		 */
> +		clear_bit(vid, p->vlan_map);
> +	} else {
> +		/* This is the last vlan we are removing.  Replace the
> +		 * map with a NULL pointer and free the old map
> +		 */
> +		unsigned long *map = rcu_dereference(p->vlan_map);
> +
> +		rcu_assign_pointer(p->vlan_map, NULL);
> +		synchronize_net();
> +		kfree(map);
> +	}
> +
> +	return 0;
> +}
> +
>  void __net_exit br_net_exit(struct net *net)
>  {
>  	struct net_device *dev;
> diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
> index 7222fe1..3a5b1f9 100644
> --- a/net/bridge/br_ioctl.c
> +++ b/net/bridge/br_ioctl.c
> @@ -289,6 +289,37 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
>  	case BRCTL_GET_FDB_ENTRIES:
>  		return get_fdb_entries(br, (void __user *)args[1],
>  				       args[2], args[3]);
> +	case BRCTL_ADD_VLAN:
> +	{
> +		struct net_bridge_port *p;
> +
> +		if (!capable(CAP_NET_ADMIN))
> +			return -EPERM;
> +
> +		rcu_read_lock();
> +		if ((p = br_get_port(br, args[1])) == NULL) {
> +			rcu_read_unlock();
> +			return -EINVAL;
> +		}
> +		rcu_read_unlock();
> +		return br_set_port_vlan(p, args[2]);

So this sets vlan but does not synchronize RCU.
This means if this is the 1st vlan we set,
we return to userspace and afterwards
interfaces get packets with a wrong vlan.

I am guessing all these ioctls should synchronize_net.

> +	}
> +
> +	case BRCTL_DEL_VLAN:
> +	{
> +		struct net_bridge_port *p;
> +
> +		if (!capable(CAP_NET_ADMIN))
> +			return -EPERM;
> +
> +		rcu_read_lock();
> +		if ((p = br_get_port(br, args[1])) == NULL) {
> +			rcu_read_unlock();
> +			return -EINVAL;
> +		}
> +		rcu_read_unlock();
> +		br_set_port_vlan(p, args[2]);

Same problem as above as synchronize_net is not
invoked always.

> +	}
>  	}
>  
>  	return -EOPNOTSUPP;
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index b6c56ab..5639c1c 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -402,6 +402,8 @@ extern int br_del_if(struct net_bridge *br,
>  extern int br_min_mtu(const struct net_bridge *br);
>  extern netdev_features_t br_features_recompute(struct net_bridge *br,
>  	netdev_features_t features);
> +extern int br_set_port_vlan(struct net_bridge_port *p, unsigned long vid);
> +extern int br_del_port_vlan(struct net_bridge_port *p, unsigned long vid);
>  
>  /* br_input.c */
>  extern int br_handle_frame_finish(struct sk_buff *skb);
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [Qemu-devel] macvlan/macvtap: guest/host cannot communicate when network cable is unplugged
From: ching @ 2012-08-30 12:13 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: qemu-devel, kaber, Michael S. Tsirkin, netdev
In-Reply-To: <CAJSP0QWV_RmEteRMUj4U2B9QRjGMkKL=AG3iyKyqEYiqGftFWg@mail.gmail.com>


> Can you try the same test with two macvlan interfaces on the host (no
> macvtap)?  You may need to use the ping -I <interface-address>
> argument to force the ping source address to a specific macvlan
> interface.
>
> If you see the same problem, it may just be the macvlan design - it is
> stacked on top of eth0 and might not work when eth0 is down.  CCing
> macvlan/macvtap folks.
>
> Stefan
>

tested as below

$ifconfig

    eth0      Link encap:Ethernet  HWaddr f4:6d:xx:xx:xx:xx
              inet6 addr: fe80::xx:xx:xx:xx/64 Scope:Link
              UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
              RX packets:86507 errors:0 dropped:0 overruns:0 frame:0
              TX packets:55940 errors:0 dropped:0 overruns:0 carrier:0
              collisions:0 txqueuelen:1000
              RX bytes:126005746 (120.1 MiB)  TX bytes:4394225 (4.1 MiB)

    macvtap0  Link encap:Ethernet  HWaddr 52:54:xx:xx:xx:xx
              inet6 addr: fe80::xx:xx:xx:xx/64 Scope:Link
              UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
              RX packets:70 errors:0 dropped:0 overruns:0 frame:0
              TX packets:84 errors:0 dropped:0 overruns:0 carrier:0
              collisions:0 txqueuelen:500
              RX bytes:9036 (8.8 KiB)  TX bytes:14734 (14.3 KiB)

    znet0     Link encap:Ethernet  HWaddr 00:60:xx:xx:xx:92
              inet addr:192.168.1.2  Bcast:192.168.1.255  Mask:255.255.255.0
              inet6 addr: 2002:xx:xx:xx:xx/64 Scope:Global
              inet6 addr: fe80:xx:xx:xx:xx/64 Scope:Link
              UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
              RX packets:4463190 errors:0 dropped:0 overruns:0 frame:0
              TX packets:12527522 errors:0 dropped:0 overruns:0 carrier:0
              collisions:0 txqueuelen:0
              RX bytes:3959213697 (3.6 GiB)  TX bytes:18590336476 (17.3 GiB)
             
   znet1     Link encap:Ethernet  HWaddr 00:60:xx:xx:xx:99 
              inet addr:192.168.1.177  Bcast:192.168.1.255  Mask:255.255.255.0
              inet6 addr: 2002:xx:xx:xx:xx64 Scope:Global
              inet6 addr: fe80:xx:xx:xx:xx/64 Scope:Link
              UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
              RX packets:8 errors:0 dropped:0 overruns:0 frame:0
              TX packets:9 errors:0 dropped:0 overruns:0 carrier:0
              collisions:0 txqueuelen:0
              RX bytes:1399 (1.3 KiB)  TX bytes:1522 (1.4 KiB)

$ ip -d link show
         
    10: znet0@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT
        link/ether 00:60:xx:xx:xx:92 brd ff:ff:ff:ff:ff:ff
        macvlan  mode bridge
    15: znet1@eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN mode DEFAULT
        link/ether 00:60:xx:xx:xx:99 brd ff:ff:ff:ff:ff:ff
        macvlan  mode bridge


the macvlan interface cannot ping each other no matter network cable is plugged or not

$ ping -I 192.168.1.2 192.168.1.177
PING 192.168.1.177 (192.168.1.177) from 192.168.1.2 : 56(84) bytes of data.

--- 192.168.1.177 ping statistics ---
6 packets transmitted, 0 received, 100% packet loss, time 4999ms


I also perform an additional test: the guests (macvtap bridge mode) CAN communicate each other no matter network cable is plugged or not.

^ permalink raw reply

* [PATCH] ipv6: remove some deadcode
From: Sorin Dumitru @ 2012-08-30 12:01 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, jmorris, yoshfuji, kaber, Sorin Dumitru

__ipv6_regen_rndid no longer returns anything other than 0
so there's no point in verifying what it returns

Signed-off-by: Sorin Dumitru <sdumitru@ixiacom.com>
---
 net/ipv6/addrconf.c | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6bc85f7..055627f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 #endif
 
 #ifdef CONFIG_IPV6_PRIVACY
-static int __ipv6_regen_rndid(struct inet6_dev *idev);
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void __ipv6_regen_rndid(struct inet6_dev *idev);
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 static void ipv6_regen_rndid(unsigned long data);
 #endif
 
@@ -852,16 +852,7 @@ retry:
 	}
 	in6_ifa_hold(ifp);
 	memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
-	if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
-		spin_unlock_bh(&ifp->lock);
-		write_unlock(&idev->lock);
-		pr_warn("%s: regeneration of randomized interface id failed\n",
-			__func__);
-		in6_ifa_put(ifp);
-		in6_dev_put(idev);
-		ret = -1;
-		goto out;
-	}
+	__ipv6_try_regen_rndid(idev, tmpaddr);
 	memcpy(&addr.s6_addr[8], idev->rndid, 8);
 	age = (now - ifp->tstamp) / HZ;
 	tmp_valid_lft = min_t(__u32,
@@ -1600,7 +1591,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 
 #ifdef CONFIG_IPV6_PRIVACY
 /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static int __ipv6_regen_rndid(struct inet6_dev *idev)
+static void __ipv6_regen_rndid(struct inet6_dev *idev)
 {
 regen:
 	get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1627,8 +1618,6 @@ regen:
 		if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
 			goto regen;
 	}
-
-	return 0;
 }
 
 static void ipv6_regen_rndid(unsigned long data)
@@ -1642,8 +1631,7 @@ static void ipv6_regen_rndid(unsigned long data)
 	if (idev->dead)
 		goto out;
 
-	if (__ipv6_regen_rndid(idev) < 0)
-		goto out;
+	__ipv6_regen_rndid(idev);
 
 	expires = jiffies +
 		idev->cnf.temp_prefered_lft * HZ -
@@ -1664,13 +1652,10 @@ out:
 	in6_dev_put(idev);
 }
 
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
 {
-	int ret = 0;
-
 	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
-		ret = __ipv6_regen_rndid(idev);
-	return ret;
+		__ipv6_regen_rndid(idev);
 }
 #endif
 
-- 
1.7.12

^ permalink raw reply related

* [RFC] Move in6_dev_hold under CONFIG_IPV6_PRIVACY
From: David Marchand @ 2012-08-30  9:42 UTC (permalink / raw)
  To: netdev

Hello,

I am currently looking at a problem with in6 interface refcnt on a 
really old kernel and I have just noticed something I find suspicious in 
all kernels until now.

The comment at net/ipv6/addrconf.c:396 suggests that the call to 
in6_dev_hold is only for ipv6_regen_rndid.
As a consequence, if CONFIG_IPV6_PRIVACY is not set, then this 
in6_dev_hold will leak a refcnt.

Can someone look at this ?
I did not test this patch, yet it looks reasonable to me.

Thank you.

$ git diff
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6bc85f7..263fcf3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -393,11 +393,6 @@ static struct inet6_dev *ipv6_add_dev(struct 
net_device *dev)
                 return NULL;
         }

-       /* One reference from device.  We must do this before
-        * we invoke __ipv6_regen_rndid().
-        */
-       in6_dev_hold(ndev);
-
         if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
                 ndev->cnf.accept_dad = -1;

@@ -410,6 +405,12 @@ static struct inet6_dev *ipv6_add_dev(struct 
net_device *dev)

  #ifdef CONFIG_IPV6_PRIVACY
         INIT_LIST_HEAD(&ndev->tempaddr_list);
+
+       /* One reference from device.  We must do this before
+        * we invoke ipv6_regen_rndid().
+        */
+       in6_dev_hold(ndev);
+
         setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned 
long)ndev);
         if ((dev->flags&IFF_LOOPBACK) ||
             dev->type == ARPHRD_TUNNEL ||

-- 
David Marchand

This message has been scanned for viruses by BlackSpider MailControl. - www.blackspider.com

^ permalink raw reply related

* Re: Slow inbound traffic on macvtap interfaces
From: Richard Davies @ 2012-08-30  8:20 UTC (permalink / raw)
  To: Chris Webb; +Cc: netdev, qemu-devel, Jason Wang, Arnd Bergmann, Michael Tsirkin
In-Reply-To: <20120829175256.GC3529@arachsys.com>

Chris Webb wrote:
> I found that on my laptop, the single change of host kernel config
>
> -CONFIG_INTEL_IDLE=y
> +# CONFIG_INTEL_IDLE is not set
>
> is sufficient to turn transfers into guests from slow to full wire speed

I am not deep enough in this code to write a patch, but I wonder if
macvtap_forward in macvtap.c is missing a call to kill_fasync, which I
understand is used to signal to interested processes when data arrives?


Here is the end of macvtap_forward:

  skb_queue_tail(&q->sk.sk_receive_queue, skb);
  wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
  return NET_RX_SUCCESS;


Compared to this end of tun_net_xmit in tun.c:

  /* Enqueue packet */
  skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb);

  /* Notify and wake up reader process */
  if (tun->flags & TUN_FASYNC)
          kill_fasync(&tun->fasync, SIGIO, POLL_IN);
  wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
                             POLLRDNORM | POLLRDBAND);
  return NETDEV_TX_OK;


Richard.

^ permalink raw reply

* Business Proposal
From: Mrs. Helen Wong @ 2012-08-30  8:38 UTC (permalink / raw)


Greetings to you,

I am Mrs.Helen Wong, from Shanghai Banking Corporation Limited. (China)I
have a business proposal of USD$30,000,000 (Thirty Million United States
Dollars Only)for you to transact with me

Contact me via my email address: mrshelenwong001@yahoo.co.jp

Mrs. Helen Wong

^ permalink raw reply

* Re: Slow inbound traffic on macvtap interfaces
From: Michael S. Tsirkin @ 2012-08-30  8:44 UTC (permalink / raw)
  To: Richard Davies; +Cc: Jason Wang, netdev, Chris Webb, qemu-devel, Arnd Bergmann
In-Reply-To: <20120830082057.GA18072@alpha.arachsys.com>

On Thu, Aug 30, 2012 at 09:20:57AM +0100, Richard Davies wrote:
> Chris Webb wrote:
> > I found that on my laptop, the single change of host kernel config
> >
> > -CONFIG_INTEL_IDLE=y
> > +# CONFIG_INTEL_IDLE is not set
> >
> > is sufficient to turn transfers into guests from slow to full wire speed
> 
> I am not deep enough in this code to write a patch, but I wonder if
> macvtap_forward in macvtap.c is missing a call to kill_fasync, which I
> understand is used to signal to interested processes when data arrives?
> 

No, only if TUN_FASYNC is set. qemu does not seem to set it.

> Here is the end of macvtap_forward:
> 
>   skb_queue_tail(&q->sk.sk_receive_queue, skb);
>   wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
>   return NET_RX_SUCCESS;
> 
> 
> Compared to this end of tun_net_xmit in tun.c:
> 
>   /* Enqueue packet */
>   skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb);
> 
>   /* Notify and wake up reader process */
>   if (tun->flags & TUN_FASYNC)
>           kill_fasync(&tun->fasync, SIGIO, POLL_IN);
>   wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
>                              POLLRDNORM | POLLRDBAND);
>   return NETDEV_TX_OK;
> 
> 
> Richard.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox