Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH V2 09/12] bridge: Add the ability to configure untagged vlans
From: Michael S. Tsirkin @ 2012-12-18 23:04 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev, shemminger, davem, or.gerlitz, jhs
In-Reply-To: <1355857263-31197-10-git-send-email-vyasevic@redhat.com>

On Tue, Dec 18, 2012 at 02:01:00PM -0500, Vlad Yasevich wrote:
> A user may designate a certain vlan as untagged.  This means that
> any ingress frame is assigned to this vlan and any forwarding decisions
> are made with this vlan in mind.  On egress, any frames tagged/labeled
> with untagged vlan have the vlan tag removed and are send as regular
> ethernet frames.
> 
> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  include/uapi/linux/if_bridge.h |    3 +
>  net/bridge/br_if.c             |  146 +++++++++++++++++++++++++++++++++++++---
>  net/bridge/br_netlink.c        |    6 +-
>  net/bridge/br_private.h        |    2 +
>  4 files changed, 144 insertions(+), 13 deletions(-)
> 
> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> index d0b4f5c..988d858 100644
> --- a/include/uapi/linux/if_bridge.h
> +++ b/include/uapi/linux/if_bridge.h
> @@ -127,6 +127,9 @@ enum {
>  	BR_VLAN_DEL,
>  };
>  
> +#define BRIDGE_VLAN_INFO_MASTER		1
> +#define BRIDGE_VLAN_INFO_UNTAGGED	2
> +
>  struct bridge_vlan_info {
>  	u16 op_code;
>  	u16 flags;
> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> index 57bbb35..14563fb 100644
> --- a/net/bridge/br_if.c
> +++ b/net/bridge/br_if.c
> @@ -108,6 +108,34 @@ static void br_vlan_put(struct net_bridge_vlan *vlan)
>  		br_vlan_destroy(vlan);
>  }
>  
> +/* Must be protected by RTNL */
> +static void br_vlan_add_untagged(struct net_bridge *br,
> +				 struct net_bridge_vlan *vlan)
> +{
> +	ASSERT_RTNL();
> +	if (br->untagged == vlan)
> +		return;
> +	else if (br->untagged) {
> +		/* Untagged vlan is already set on the master,
> +		 * so drop the ref since we'll be replacing it.
> +		 */
> +		br_vlan_put(br->untagged);
> +	}
> +	br_vlan_hold(vlan);
> +	rcu_assign_pointer(br->untagged, vlan);
> +}
> +
> +/* Must be protected by RTNL */
> +static void br_vlan_del_untagged(struct net_bridge *br,
> +				 struct net_bridge_vlan *vlan)
> +{
> +	ASSERT_RTNL();
> +	if (br->untagged == vlan) {
> +		br_vlan_put(vlan);
> +		rcu_assign_pointer(br->untagged, NULL);
> +	}
> +}
> +
>  struct net_bridge_vlan *br_vlan_find(struct net_bridge *br, u16 vid)
>  {
>  	struct net_bridge_vlan *vlan;
> @@ -132,7 +160,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>  
>  	vlan = br_vlan_find(br, vid);
>  	if (vlan)
> -		return vlan;
> +		goto untagged;
>  
>  	vlan = kzalloc(sizeof(struct net_bridge_vlan), GFP_KERNEL);
>  	if (!vlan)
> @@ -141,7 +169,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>  	vlan->vid = vid;
>  	atomic_set(&vlan->refcnt, 1);
>  
> -	if (flags & BRIDGE_FLAGS_SELF) {
> +	if (flags & BRIDGE_VLAN_INFO_MASTER) {
>  		/* Set bit 0 that is associated with the bridge master
>  		 * device.  Port numbers start with 1.
>  		 */
> @@ -149,15 +177,24 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>  	}
>  
>  	hlist_add_head_rcu(&vlan->hlist, &br->vlan_hlist[br_vlan_hash(vid)]);
> +
> +untagged:
> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
> +		br_vlan_add_untagged(br, vlan);
> +
>  	return vlan;
>  }
>  
>  /* Must be protected by RTNL */
> -static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
> +static void br_vlan_del(struct net_bridge *br, struct net_bridge_vlan *vlan,
> +			u16 flags)
>  {
>  	ASSERT_RTNL();
>  
> -	if (flags & BRIDGE_FLAGS_SELF) {
> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
> +		br_vlan_del_untagged(br, vlan);
> +
> +	if (flags & BRIDGE_VLAN_INFO_MASTER) {
>  		/* Clear bit 0 that is associated with the bridge master
>  		 * device.
>  		 */
> @@ -172,6 +209,14 @@ static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
>  
>  	vlan->vid = BR_INVALID_VID;
>  
> +	/* If, for whatever reason, bridge still has a ref on this vlan
> +	 * through the @untagged pointer, drop that ref and clear untagged.
> +	 */
> +	if (br->untagged == vlan) {
> +		br_vlan_put(vlan);
> +		rcu_assign_pointer(br->untagged, NULL);

Is something doing an rcu sync after this point?
If yes maybe add a comment saying where it is, if not - some rcu read
side could still be using it through this pointer.

> +	}
> +
>  	/* Drop the self-ref to trigger descrution. */
>  	br_vlan_put(vlan);
>  }
> @@ -187,7 +232,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid, u16 flags)
>  	if (!vlan)
>  		return -ENOENT;
>  
> -	br_vlan_del(vlan, flags);
> +	br_vlan_del(br, vlan, flags);
>  	return 0;
>  }
>  
> @@ -204,7 +249,9 @@ static void br_vlan_flush(struct net_bridge *br)
>  	for (i = 0; i < BR_VID_HASH_SIZE; i++) {
>  		hlist_for_each_entry_safe(vlan, node, tmp,
>  					  &br->vlan_hlist[i], hlist) {
> -			br_vlan_del(vlan, BRIDGE_FLAGS_SELF);
> +			br_vlan_del(br, vlan,
> +				    (BRIDGE_VLAN_INFO_MASTER |
> +				     BRIDGE_VLAN_INFO_UNTAGGED));
>  		}
>  	}
>  }
> @@ -224,10 +271,70 @@ struct net_port_vlan *nbp_vlan_find(const struct net_bridge_port *p, u16 vid)
>  	return NULL;
>  }
>  
> +static int nbp_vlan_add_untagged(struct net_bridge_port *p,
> +			  struct net_bridge_vlan *vlan,
> +			  u16 flags)
> +{
> +	struct net_device *dev = p->dev;
> +
> +	if (p->untagged) {
> +		/* Port already has untagged vlan set.  Drop the ref
> +		 * to the old one since we'll be replace it.
> +		 */
> +		br_vlan_put(p->untagged);
> +	} else {
> +		int err;
> +
> +		/* Add vid 0 to filter if filter is available. */
> +		if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
> +		    dev->netdev_ops->ndo_vlan_rx_add_vid &&
> +		    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
> +			err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
> +			if (err)
> +				return err;
> +		}
> +	}
> +
> +	/* This VLAN is handled as untagged/native. Save an
> +	 * additional ref.
> +	 */
> +	br_vlan_hold(vlan);
> +	rcu_assign_pointer(p->untagged, vlan);
> +
> +	return 0;
> +}
> +
> +static void nbp_vlan_delete_untagged(struct net_bridge_port *p,
> +				     struct net_bridge_vlan *vlan)
> +{
> +	if (p->untagged != vlan)
> +		return;
> +
> +	/* Remove VLAN from the device filter if it is supported. */
> +	if ((p->dev->features & NETIF_F_HW_VLAN_FILTER) &&
> +	    p->dev->netdev_ops->ndo_vlan_rx_kill_vid) {
> +		int err;
> +
> +		err = p->dev->netdev_ops->ndo_vlan_rx_kill_vid(p->dev, 0);
> +		if (err) {
> +			pr_warn("failed to kill vid %d for device %s\n",
> +				vlan->vid, p->dev->name);
> +		}
> +	}
> +
> +	/* If this VLAN is currently functioning as untagged, clear it.
> +	 * It's safe to drop the refcount, since the vlan is still held
> +	 * by the port.
> +	 */
> +	br_vlan_put(vlan);
> +	rcu_assign_pointer(p->untagged, NULL);
> +
> +}
> +
>  /* Must be protected by RTNL */
>  int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
>  {
> -	struct net_port_vlan *pve;
> +	struct net_port_vlan *pve = NULL;
>  	struct net_bridge_vlan *vlan;
>  	struct net_device *dev = p->dev;
>  	int err;
> @@ -275,11 +382,21 @@ int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
>  	set_bit(p->port_no, vlan->port_bitmap);
>  
>  	list_add_tail_rcu(&pve->list, &p->vlan_list);
> +
> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
> +		err = nbp_vlan_add_untagged(p, vlan, flags);
> +		if (err)
> +			goto del_vlan;
> +	}
> +
>  	return 0;
>  
>  clean_up:
>  	kfree(pve);
> -	br_vlan_del(vlan, flags);
> +	br_vlan_del(p->br, vlan, flags);
> +	return err;
> +del_vlan:
> +	nbp_vlan_delete(p, vid, flags);
>  	return err;
>  }
>  
> @@ -296,6 +413,9 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>  	if (!pve)
>  		return -ENOENT;
>  
> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
> +		nbp_vlan_delete_untagged(p, pve->vlan);
> +
>  	/* Remove VLAN from the device filter if it is supported. */
>  	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
>  	    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
> @@ -306,6 +426,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>  			pr_warn("failed to kill vid %d for device %s\n",
>  				vid, dev->name);
>  	}
> +
>  	pve->vid = BR_INVALID_VID;
>  
>  	vlan = pve->vlan;
> @@ -316,7 +437,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>  	list_del_rcu(&pve->list);
>  	kfree_rcu(pve, rcu);
>  
> -	br_vlan_del(vlan, flags);
> +	br_vlan_del(p->br, vlan, flags);
>  
>  	return 0;
>  }
> @@ -328,8 +449,11 @@ static void nbp_vlan_flush(struct net_bridge_port *p)
>  
>  	ASSERT_RTNL();
>  
> -	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)
> -		nbp_vlan_delete(p, pve->vid, BRIDGE_FLAGS_SELF);
> +	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)  {
> +		nbp_vlan_delete(p, pve->vid,
> +				(BRIDGE_VLAN_INFO_MASTER |
> +				 BRIDGE_VLAN_INFO_UNTAGGED));
> +	}
>  }
>  
>  static void release_nbp(struct kobject *kobj)
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index 9cf2879..1b302ce 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -199,7 +199,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
>  			if (p)
>  				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>  			else {
> -				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
> +				u16 flags = vinfo->flags |
> +					    BRIDGE_VLAN_INFO_MASTER;
>  				if (!br_vlan_add(br, vinfo->vid, flags))
>  					err = -ENOMEM;
>  			}
> @@ -210,7 +211,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
>  				err = nbp_vlan_delete(p, vinfo->vid,
>  						      vinfo->flags);
>  			else {
> -				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
> +				u16 flags = vinfo->flags |
> +					    BRIDGE_VLAN_INFO_MASTER;
>  				err = br_vlan_delete(br, vinfo->vid, flags);
>  			}
>  			break;
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index cc75212..9328463 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -179,6 +179,7 @@ struct net_bridge_port
>  	struct netpoll			*np;
>  #endif
>  	struct list_head		vlan_list;
> +	struct net_bridge_vlan __rcu	*untagged;
>  };
>  
>  #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
> @@ -298,6 +299,7 @@ struct net_bridge
>  	struct timer_list		gc_timer;
>  	struct kobject			*ifobj;
>  	struct hlist_head		vlan_hlist[BR_VID_HASH_SIZE];
> +	struct net_bridge_vlan __rcu	*untagged;
>  };
>  
>  struct br_input_skb_cb {
> -- 
> 1.7.7.6

^ permalink raw reply

* Re: [patch net-next 1/4] net: add change_carrier netdev op
From: Dan Williams @ 2012-12-18 23:02 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, edumazet, bhutchings, mirqus, shemminger, greearb,
	fbl, john.r.fastabend
In-Reply-To: <1355868858-1713-2-git-send-email-jiri@resnulli.us>

On Tue, 2012-12-18 at 23:14 +0100, Jiri Pirko wrote:
> This allows a driver to register change_carrier callback which will be
> called whenever user will like to change carrier state. This is useful
> for devices like dummy, gre, team and so on.
> 
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---
>  include/linux/netdevice.h |  9 +++++++++
>  net/core/dev.c            | 19 +++++++++++++++++++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 02e0f6b..8047330 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -891,6 +891,11 @@ struct netdev_fcoe_hbainfo {
>   * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
>   * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
>   *			     struct net_device *dev)
> + *
> + * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
> + *	Called to update device carrier. Soft-devices which do not manage
> + *	real hardware like dummy, team, etc. can define this to provide
> + *	possibility to set their carrier state.

How about something like:

---
Called to change device carrier.  Virtual devices (like dummy, team,
etc) which do not represent real hardware may define this to allow their
userspace components to manage their virtual carrier state.  Devices
that determine carrier state from physical hardware properties (eg
network cables) or protocol-dependent mechanisms (eg
USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
---

I'm just worried that without some clearer wording, somebody will end up
implementing the function for an actual hardware driver down the road
and expect things to work when they change the carrier to "on" but the
protocol isn't set up or cable isn't plugged in.  I'm also worried that
it might be used to simply work around bugs in the drivers that should
be fixed in the drivers instead.

Dan

>  struct net_device_ops {
>  	int			(*ndo_init)(struct net_device *dev);
> @@ -1008,6 +1013,8 @@ struct net_device_ops {
>  	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
>  						      u32 pid, u32 seq,
>  						      struct net_device *dev);
> +	int			(*ndo_change_carrier)(struct net_device *dev,
> +						      bool new_carrier);
>  };
>  
>  /*
> @@ -2194,6 +2201,8 @@ extern int		dev_set_mtu(struct net_device *, int);
>  extern void		dev_set_group(struct net_device *, int);
>  extern int		dev_set_mac_address(struct net_device *,
>  					    struct sockaddr *);
> +extern int		dev_change_carrier(struct net_device *,
> +					   bool new_carrier);
>  extern int		dev_hard_start_xmit(struct sk_buff *skb,
>  					    struct net_device *dev,
>  					    struct netdev_queue *txq);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index d0cbc93..268a714 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5027,6 +5027,25 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
>  }
>  EXPORT_SYMBOL(dev_set_mac_address);
>  
> +/**
> + *	dev_change_carrier - Change device carrier
> + *	@dev: device
> + *	@new_carries: new value
> + *
> + *	Change device carrier
> + */
> +int dev_change_carrier(struct net_device *dev, bool new_carrier)
> +{
> +	const struct net_device_ops *ops = dev->netdev_ops;
> +
> +	if (!ops->ndo_change_carrier)
> +		return -EOPNOTSUPP;
> +	if (!netif_device_present(dev))
> +		return -ENODEV;
> +	return ops->ndo_change_carrier(dev, new_carrier);
> +}
> +EXPORT_SYMBOL(dev_change_carrier);
> +
>  /*
>   *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
>   */

^ permalink raw reply

* Re: [PATCH V2 09/12] bridge: Add the ability to configure untagged vlans
From: Vlad Yasevich @ 2012-12-18 23:03 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, shemminger, davem, or.gerlitz, jhs
In-Reply-To: <20121218230107.GA1135@redhat.com>

On 12/18/2012 06:01 PM, Michael S. Tsirkin wrote:
> On Tue, Dec 18, 2012 at 02:01:00PM -0500, Vlad Yasevich wrote:
>> A user may designate a certain vlan as untagged.  This means that
>> any ingress frame is assigned to this vlan and any forwarding decisions
>> are made with this vlan in mind.  On egress, any frames tagged/labeled
>> with untagged vlan have the vlan tag removed and are send as regular
>> ethernet frames.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>> ---
>>   include/uapi/linux/if_bridge.h |    3 +
>>   net/bridge/br_if.c             |  146 +++++++++++++++++++++++++++++++++++++---
>>   net/bridge/br_netlink.c        |    6 +-
>>   net/bridge/br_private.h        |    2 +
>>   4 files changed, 144 insertions(+), 13 deletions(-)
>>
>> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
>> index d0b4f5c..988d858 100644
>> --- a/include/uapi/linux/if_bridge.h
>> +++ b/include/uapi/linux/if_bridge.h
>> @@ -127,6 +127,9 @@ enum {
>>   	BR_VLAN_DEL,
>>   };
>>
>> +#define BRIDGE_VLAN_INFO_MASTER		1
>> +#define BRIDGE_VLAN_INFO_UNTAGGED	2
>> +
>>   struct bridge_vlan_info {
>>   	u16 op_code;
>>   	u16 flags;
>> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
>> index 57bbb35..14563fb 100644
>> --- a/net/bridge/br_if.c
>> +++ b/net/bridge/br_if.c
>> @@ -108,6 +108,34 @@ static void br_vlan_put(struct net_bridge_vlan *vlan)
>>   		br_vlan_destroy(vlan);
>>   }
>>
>> +/* Must be protected by RTNL */
>> +static void br_vlan_add_untagged(struct net_bridge *br,
>> +				 struct net_bridge_vlan *vlan)
>> +{
>> +	ASSERT_RTNL();
>> +	if (br->untagged == vlan)
>> +		return;
>> +	else if (br->untagged) {
>> +		/* Untagged vlan is already set on the master,
>> +		 * so drop the ref since we'll be replacing it.
>> +		 */
>> +		br_vlan_put(br->untagged);
>> +	}
>> +	br_vlan_hold(vlan);
>> +	rcu_assign_pointer(br->untagged, vlan);
>
> Is there a reason for rcu here but not else where? If all users are under
> rtnl you can just assign in a simple way.
> If not then rcu_dereference_protected would be more appropriate.

Everywhere that the pointer changes rcu_assign_pointer is used.

Now, if we hold an RTNL, we can technically read the pointer with rcu 
since it's guaranteed not to change since it only changes under RTNL.
I'll check that this is consistent.

If I access the pointer without rtnl, it's always inside rcu critical 
section and with rcu_dereference().

I thought those were the basic rules of rcu.  Did that change?

-vlad

>
>> +}
>> +
>> +/* Must be protected by RTNL */
>> +static void br_vlan_del_untagged(struct net_bridge *br,
>> +				 struct net_bridge_vlan *vlan)
>> +{
>> +	ASSERT_RTNL();
>> +	if (br->untagged == vlan) {
>> +		br_vlan_put(vlan);
>> +		rcu_assign_pointer(br->untagged, NULL);
>> +	}
>> +}
>> +
>>   struct net_bridge_vlan *br_vlan_find(struct net_bridge *br, u16 vid)
>>   {
>>   	struct net_bridge_vlan *vlan;
>> @@ -132,7 +160,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>>
>>   	vlan = br_vlan_find(br, vid);
>>   	if (vlan)
>> -		return vlan;
>> +		goto untagged;
>>
>>   	vlan = kzalloc(sizeof(struct net_bridge_vlan), GFP_KERNEL);
>>   	if (!vlan)
>> @@ -141,7 +169,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>>   	vlan->vid = vid;
>>   	atomic_set(&vlan->refcnt, 1);
>>
>> -	if (flags & BRIDGE_FLAGS_SELF) {
>> +	if (flags & BRIDGE_VLAN_INFO_MASTER) {
>>   		/* Set bit 0 that is associated with the bridge master
>>   		 * device.  Port numbers start with 1.
>>   		 */
>> @@ -149,15 +177,24 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>>   	}
>>
>>   	hlist_add_head_rcu(&vlan->hlist, &br->vlan_hlist[br_vlan_hash(vid)]);
>> +
>> +untagged:
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
>> +		br_vlan_add_untagged(br, vlan);
>> +
>>   	return vlan;
>>   }
>>
>>   /* Must be protected by RTNL */
>> -static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
>> +static void br_vlan_del(struct net_bridge *br, struct net_bridge_vlan *vlan,
>> +			u16 flags)
>>   {
>>   	ASSERT_RTNL();
>>
>> -	if (flags & BRIDGE_FLAGS_SELF) {
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
>> +		br_vlan_del_untagged(br, vlan);
>> +
>> +	if (flags & BRIDGE_VLAN_INFO_MASTER) {
>>   		/* Clear bit 0 that is associated with the bridge master
>>   		 * device.
>>   		 */
>> @@ -172,6 +209,14 @@ static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
>>
>>   	vlan->vid = BR_INVALID_VID;
>>
>> +	/* If, for whatever reason, bridge still has a ref on this vlan
>> +	 * through the @untagged pointer, drop that ref and clear untagged.
>> +	 */
>> +	if (br->untagged == vlan) {
>> +		br_vlan_put(vlan);
>> +		rcu_assign_pointer(br->untagged, NULL);
>> +	}
>> +
>>   	/* Drop the self-ref to trigger descrution. */
>>   	br_vlan_put(vlan);
>>   }
>> @@ -187,7 +232,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid, u16 flags)
>>   	if (!vlan)
>>   		return -ENOENT;
>>
>> -	br_vlan_del(vlan, flags);
>> +	br_vlan_del(br, vlan, flags);
>>   	return 0;
>>   }
>>
>> @@ -204,7 +249,9 @@ static void br_vlan_flush(struct net_bridge *br)
>>   	for (i = 0; i < BR_VID_HASH_SIZE; i++) {
>>   		hlist_for_each_entry_safe(vlan, node, tmp,
>>   					  &br->vlan_hlist[i], hlist) {
>> -			br_vlan_del(vlan, BRIDGE_FLAGS_SELF);
>> +			br_vlan_del(br, vlan,
>> +				    (BRIDGE_VLAN_INFO_MASTER |
>> +				     BRIDGE_VLAN_INFO_UNTAGGED));
>>   		}
>>   	}
>>   }
>> @@ -224,10 +271,70 @@ struct net_port_vlan *nbp_vlan_find(const struct net_bridge_port *p, u16 vid)
>>   	return NULL;
>>   }
>>
>> +static int nbp_vlan_add_untagged(struct net_bridge_port *p,
>> +			  struct net_bridge_vlan *vlan,
>> +			  u16 flags)
>> +{
>> +	struct net_device *dev = p->dev;
>> +
>> +	if (p->untagged) {
>> +		/* Port already has untagged vlan set.  Drop the ref
>> +		 * to the old one since we'll be replace it.
>> +		 */
>> +		br_vlan_put(p->untagged);
>> +	} else {
>> +		int err;
>> +
>> +		/* Add vid 0 to filter if filter is available. */
>> +		if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
>> +		    dev->netdev_ops->ndo_vlan_rx_add_vid &&
>> +		    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
>> +			err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
>> +			if (err)
>> +				return err;
>> +		}
>> +	}
>> +
>> +	/* This VLAN is handled as untagged/native. Save an
>> +	 * additional ref.
>> +	 */
>> +	br_vlan_hold(vlan);
>> +	rcu_assign_pointer(p->untagged, vlan);
>> +
>> +	return 0;
>> +}
>> +
>> +static void nbp_vlan_delete_untagged(struct net_bridge_port *p,
>> +				     struct net_bridge_vlan *vlan)
>> +{
>> +	if (p->untagged != vlan)
>> +		return;
>> +
>> +	/* Remove VLAN from the device filter if it is supported. */
>> +	if ((p->dev->features & NETIF_F_HW_VLAN_FILTER) &&
>> +	    p->dev->netdev_ops->ndo_vlan_rx_kill_vid) {
>> +		int err;
>> +
>> +		err = p->dev->netdev_ops->ndo_vlan_rx_kill_vid(p->dev, 0);
>> +		if (err) {
>> +			pr_warn("failed to kill vid %d for device %s\n",
>> +				vlan->vid, p->dev->name);
>> +		}
>> +	}
>> +
>> +	/* If this VLAN is currently functioning as untagged, clear it.
>> +	 * It's safe to drop the refcount, since the vlan is still held
>> +	 * by the port.
>> +	 */
>> +	br_vlan_put(vlan);
>> +	rcu_assign_pointer(p->untagged, NULL);
>> +
>> +}
>> +
>>   /* Must be protected by RTNL */
>>   int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
>>   {
>> -	struct net_port_vlan *pve;
>> +	struct net_port_vlan *pve = NULL;
>>   	struct net_bridge_vlan *vlan;
>>   	struct net_device *dev = p->dev;
>>   	int err;
>> @@ -275,11 +382,21 @@ int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
>>   	set_bit(p->port_no, vlan->port_bitmap);
>>
>>   	list_add_tail_rcu(&pve->list, &p->vlan_list);
>> +
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
>> +		err = nbp_vlan_add_untagged(p, vlan, flags);
>> +		if (err)
>> +			goto del_vlan;
>> +	}
>> +
>>   	return 0;
>>
>>   clean_up:
>>   	kfree(pve);
>> -	br_vlan_del(vlan, flags);
>> +	br_vlan_del(p->br, vlan, flags);
>> +	return err;
>> +del_vlan:
>> +	nbp_vlan_delete(p, vid, flags);
>>   	return err;
>>   }
>>
>> @@ -296,6 +413,9 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>>   	if (!pve)
>>   		return -ENOENT;
>>
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
>> +		nbp_vlan_delete_untagged(p, pve->vlan);
>> +
>>   	/* Remove VLAN from the device filter if it is supported. */
>>   	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
>>   	    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
>> @@ -306,6 +426,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>>   			pr_warn("failed to kill vid %d for device %s\n",
>>   				vid, dev->name);
>>   	}
>> +
>>   	pve->vid = BR_INVALID_VID;
>>
>>   	vlan = pve->vlan;
>> @@ -316,7 +437,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>>   	list_del_rcu(&pve->list);
>>   	kfree_rcu(pve, rcu);
>>
>> -	br_vlan_del(vlan, flags);
>> +	br_vlan_del(p->br, vlan, flags);
>>
>>   	return 0;
>>   }
>> @@ -328,8 +449,11 @@ static void nbp_vlan_flush(struct net_bridge_port *p)
>>
>>   	ASSERT_RTNL();
>>
>> -	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)
>> -		nbp_vlan_delete(p, pve->vid, BRIDGE_FLAGS_SELF);
>> +	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)  {
>> +		nbp_vlan_delete(p, pve->vid,
>> +				(BRIDGE_VLAN_INFO_MASTER |
>> +				 BRIDGE_VLAN_INFO_UNTAGGED));
>> +	}
>>   }
>>
>>   static void release_nbp(struct kobject *kobj)
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index 9cf2879..1b302ce 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -199,7 +199,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
>>   			if (p)
>>   				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>>   			else {
>> -				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
>> +				u16 flags = vinfo->flags |
>> +					    BRIDGE_VLAN_INFO_MASTER;
>>   				if (!br_vlan_add(br, vinfo->vid, flags))
>>   					err = -ENOMEM;
>>   			}
>> @@ -210,7 +211,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
>>   				err = nbp_vlan_delete(p, vinfo->vid,
>>   						      vinfo->flags);
>>   			else {
>> -				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
>> +				u16 flags = vinfo->flags |
>> +					    BRIDGE_VLAN_INFO_MASTER;
>>   				err = br_vlan_delete(br, vinfo->vid, flags);
>>   			}
>>   			break;
>> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
>> index cc75212..9328463 100644
>> --- a/net/bridge/br_private.h
>> +++ b/net/bridge/br_private.h
>> @@ -179,6 +179,7 @@ struct net_bridge_port
>>   	struct netpoll			*np;
>>   #endif
>>   	struct list_head		vlan_list;
>> +	struct net_bridge_vlan __rcu	*untagged;
>>   };
>>
>>   #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
>> @@ -298,6 +299,7 @@ struct net_bridge
>>   	struct timer_list		gc_timer;
>>   	struct kobject			*ifobj;
>>   	struct hlist_head		vlan_hlist[BR_VID_HASH_SIZE];
>> +	struct net_bridge_vlan __rcu	*untagged;
>>   };
>>
>>   struct br_input_skb_cb {
>> --
>> 1.7.7.6

^ permalink raw reply

* Re: [PATCH V2 09/12] bridge: Add the ability to configure untagged vlans
From: Michael S. Tsirkin @ 2012-12-18 23:10 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev, shemminger, davem, or.gerlitz, jhs
In-Reply-To: <50D0F63D.9090807@redhat.com>

On Tue, Dec 18, 2012 at 06:03:25PM -0500, Vlad Yasevich wrote:
> On 12/18/2012 06:01 PM, Michael S. Tsirkin wrote:
> >On Tue, Dec 18, 2012 at 02:01:00PM -0500, Vlad Yasevich wrote:
> >>A user may designate a certain vlan as untagged.  This means that
> >>any ingress frame is assigned to this vlan and any forwarding decisions
> >>are made with this vlan in mind.  On egress, any frames tagged/labeled
> >>with untagged vlan have the vlan tag removed and are send as regular
> >>ethernet frames.
> >>
> >>Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
> >>---
> >>  include/uapi/linux/if_bridge.h |    3 +
> >>  net/bridge/br_if.c             |  146 +++++++++++++++++++++++++++++++++++++---
> >>  net/bridge/br_netlink.c        |    6 +-
> >>  net/bridge/br_private.h        |    2 +
> >>  4 files changed, 144 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> >>index d0b4f5c..988d858 100644
> >>--- a/include/uapi/linux/if_bridge.h
> >>+++ b/include/uapi/linux/if_bridge.h
> >>@@ -127,6 +127,9 @@ enum {
> >>  	BR_VLAN_DEL,
> >>  };
> >>
> >>+#define BRIDGE_VLAN_INFO_MASTER		1
> >>+#define BRIDGE_VLAN_INFO_UNTAGGED	2
> >>+
> >>  struct bridge_vlan_info {
> >>  	u16 op_code;
> >>  	u16 flags;
> >>diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
> >>index 57bbb35..14563fb 100644
> >>--- a/net/bridge/br_if.c
> >>+++ b/net/bridge/br_if.c
> >>@@ -108,6 +108,34 @@ static void br_vlan_put(struct net_bridge_vlan *vlan)
> >>  		br_vlan_destroy(vlan);
> >>  }
> >>
> >>+/* Must be protected by RTNL */
> >>+static void br_vlan_add_untagged(struct net_bridge *br,
> >>+				 struct net_bridge_vlan *vlan)
> >>+{
> >>+	ASSERT_RTNL();
> >>+	if (br->untagged == vlan)
> >>+		return;
> >>+	else if (br->untagged) {
> >>+		/* Untagged vlan is already set on the master,
> >>+		 * so drop the ref since we'll be replacing it.
> >>+		 */
> >>+		br_vlan_put(br->untagged);
> >>+	}
> >>+	br_vlan_hold(vlan);
> >>+	rcu_assign_pointer(br->untagged, vlan);
> >
> >Is there a reason for rcu here but not else where? If all users are under
> >rtnl you can just assign in a simple way.
> >If not then rcu_dereference_protected would be more appropriate.
> 
> Everywhere that the pointer changes rcu_assign_pointer is used.
> 
> Now, if we hold an RTNL, we can technically read the pointer with
> rcu since it's guaranteed not to change since it only changes under
> RTNL.
> I'll check that this is consistent.

Check what rcu_dereference_protected does. It's really just
an explicit way to say "this is accessed without rcu because I have
this lock".

> If I access the pointer without rtnl, it's always inside rcu
> critical section and with rcu_dereference().
> 
> I thought those were the basic rules of rcu.  Did that change?
> 
> -vlad



> >
> >>+}
> >>+
> >>+/* Must be protected by RTNL */
> >>+static void br_vlan_del_untagged(struct net_bridge *br,
> >>+				 struct net_bridge_vlan *vlan)
> >>+{
> >>+	ASSERT_RTNL();
> >>+	if (br->untagged == vlan) {
> >>+		br_vlan_put(vlan);
> >>+		rcu_assign_pointer(br->untagged, NULL);
> >>+	}
> >>+}
> >>+
> >>  struct net_bridge_vlan *br_vlan_find(struct net_bridge *br, u16 vid)
> >>  {
> >>  	struct net_bridge_vlan *vlan;
> >>@@ -132,7 +160,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
> >>
> >>  	vlan = br_vlan_find(br, vid);
> >>  	if (vlan)
> >>-		return vlan;
> >>+		goto untagged;
> >>
> >>  	vlan = kzalloc(sizeof(struct net_bridge_vlan), GFP_KERNEL);
> >>  	if (!vlan)
> >>@@ -141,7 +169,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
> >>  	vlan->vid = vid;
> >>  	atomic_set(&vlan->refcnt, 1);
> >>
> >>-	if (flags & BRIDGE_FLAGS_SELF) {
> >>+	if (flags & BRIDGE_VLAN_INFO_MASTER) {
> >>  		/* Set bit 0 that is associated with the bridge master
> >>  		 * device.  Port numbers start with 1.
> >>  		 */
> >>@@ -149,15 +177,24 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
> >>  	}
> >>
> >>  	hlist_add_head_rcu(&vlan->hlist, &br->vlan_hlist[br_vlan_hash(vid)]);
> >>+
> >>+untagged:
> >>+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
> >>+		br_vlan_add_untagged(br, vlan);
> >>+
> >>  	return vlan;
> >>  }
> >>
> >>  /* Must be protected by RTNL */
> >>-static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
> >>+static void br_vlan_del(struct net_bridge *br, struct net_bridge_vlan *vlan,
> >>+			u16 flags)
> >>  {
> >>  	ASSERT_RTNL();
> >>
> >>-	if (flags & BRIDGE_FLAGS_SELF) {
> >>+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
> >>+		br_vlan_del_untagged(br, vlan);
> >>+
> >>+	if (flags & BRIDGE_VLAN_INFO_MASTER) {
> >>  		/* Clear bit 0 that is associated with the bridge master
> >>  		 * device.
> >>  		 */
> >>@@ -172,6 +209,14 @@ static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
> >>
> >>  	vlan->vid = BR_INVALID_VID;
> >>
> >>+	/* If, for whatever reason, bridge still has a ref on this vlan
> >>+	 * through the @untagged pointer, drop that ref and clear untagged.
> >>+	 */
> >>+	if (br->untagged == vlan) {
> >>+		br_vlan_put(vlan);
> >>+		rcu_assign_pointer(br->untagged, NULL);
> >>+	}
> >>+
> >>  	/* Drop the self-ref to trigger descrution. */
> >>  	br_vlan_put(vlan);
> >>  }
> >>@@ -187,7 +232,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid, u16 flags)
> >>  	if (!vlan)
> >>  		return -ENOENT;
> >>
> >>-	br_vlan_del(vlan, flags);
> >>+	br_vlan_del(br, vlan, flags);
> >>  	return 0;
> >>  }
> >>
> >>@@ -204,7 +249,9 @@ static void br_vlan_flush(struct net_bridge *br)
> >>  	for (i = 0; i < BR_VID_HASH_SIZE; i++) {
> >>  		hlist_for_each_entry_safe(vlan, node, tmp,
> >>  					  &br->vlan_hlist[i], hlist) {
> >>-			br_vlan_del(vlan, BRIDGE_FLAGS_SELF);
> >>+			br_vlan_del(br, vlan,
> >>+				    (BRIDGE_VLAN_INFO_MASTER |
> >>+				     BRIDGE_VLAN_INFO_UNTAGGED));
> >>  		}
> >>  	}
> >>  }
> >>@@ -224,10 +271,70 @@ struct net_port_vlan *nbp_vlan_find(const struct net_bridge_port *p, u16 vid)
> >>  	return NULL;
> >>  }
> >>
> >>+static int nbp_vlan_add_untagged(struct net_bridge_port *p,
> >>+			  struct net_bridge_vlan *vlan,
> >>+			  u16 flags)
> >>+{
> >>+	struct net_device *dev = p->dev;
> >>+
> >>+	if (p->untagged) {
> >>+		/* Port already has untagged vlan set.  Drop the ref
> >>+		 * to the old one since we'll be replace it.
> >>+		 */
> >>+		br_vlan_put(p->untagged);
> >>+	} else {
> >>+		int err;
> >>+
> >>+		/* Add vid 0 to filter if filter is available. */
> >>+		if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
> >>+		    dev->netdev_ops->ndo_vlan_rx_add_vid &&
> >>+		    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
> >>+			err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
> >>+			if (err)
> >>+				return err;
> >>+		}
> >>+	}
> >>+
> >>+	/* This VLAN is handled as untagged/native. Save an
> >>+	 * additional ref.
> >>+	 */
> >>+	br_vlan_hold(vlan);
> >>+	rcu_assign_pointer(p->untagged, vlan);
> >>+
> >>+	return 0;
> >>+}
> >>+
> >>+static void nbp_vlan_delete_untagged(struct net_bridge_port *p,
> >>+				     struct net_bridge_vlan *vlan)
> >>+{
> >>+	if (p->untagged != vlan)
> >>+		return;
> >>+
> >>+	/* Remove VLAN from the device filter if it is supported. */
> >>+	if ((p->dev->features & NETIF_F_HW_VLAN_FILTER) &&
> >>+	    p->dev->netdev_ops->ndo_vlan_rx_kill_vid) {
> >>+		int err;
> >>+
> >>+		err = p->dev->netdev_ops->ndo_vlan_rx_kill_vid(p->dev, 0);
> >>+		if (err) {
> >>+			pr_warn("failed to kill vid %d for device %s\n",
> >>+				vlan->vid, p->dev->name);
> >>+		}
> >>+	}
> >>+
> >>+	/* If this VLAN is currently functioning as untagged, clear it.
> >>+	 * It's safe to drop the refcount, since the vlan is still held
> >>+	 * by the port.
> >>+	 */
> >>+	br_vlan_put(vlan);
> >>+	rcu_assign_pointer(p->untagged, NULL);
> >>+
> >>+}
> >>+
> >>  /* Must be protected by RTNL */
> >>  int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
> >>  {
> >>-	struct net_port_vlan *pve;
> >>+	struct net_port_vlan *pve = NULL;
> >>  	struct net_bridge_vlan *vlan;
> >>  	struct net_device *dev = p->dev;
> >>  	int err;
> >>@@ -275,11 +382,21 @@ int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
> >>  	set_bit(p->port_no, vlan->port_bitmap);
> >>
> >>  	list_add_tail_rcu(&pve->list, &p->vlan_list);
> >>+
> >>+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
> >>+		err = nbp_vlan_add_untagged(p, vlan, flags);
> >>+		if (err)
> >>+			goto del_vlan;
> >>+	}
> >>+
> >>  	return 0;
> >>
> >>  clean_up:
> >>  	kfree(pve);
> >>-	br_vlan_del(vlan, flags);
> >>+	br_vlan_del(p->br, vlan, flags);
> >>+	return err;
> >>+del_vlan:
> >>+	nbp_vlan_delete(p, vid, flags);
> >>  	return err;
> >>  }
> >>
> >>@@ -296,6 +413,9 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
> >>  	if (!pve)
> >>  		return -ENOENT;
> >>
> >>+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
> >>+		nbp_vlan_delete_untagged(p, pve->vlan);
> >>+
> >>  	/* Remove VLAN from the device filter if it is supported. */
> >>  	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
> >>  	    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
> >>@@ -306,6 +426,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
> >>  			pr_warn("failed to kill vid %d for device %s\n",
> >>  				vid, dev->name);
> >>  	}
> >>+
> >>  	pve->vid = BR_INVALID_VID;
> >>
> >>  	vlan = pve->vlan;
> >>@@ -316,7 +437,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
> >>  	list_del_rcu(&pve->list);
> >>  	kfree_rcu(pve, rcu);
> >>
> >>-	br_vlan_del(vlan, flags);
> >>+	br_vlan_del(p->br, vlan, flags);
> >>
> >>  	return 0;
> >>  }
> >>@@ -328,8 +449,11 @@ static void nbp_vlan_flush(struct net_bridge_port *p)
> >>
> >>  	ASSERT_RTNL();
> >>
> >>-	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)
> >>-		nbp_vlan_delete(p, pve->vid, BRIDGE_FLAGS_SELF);
> >>+	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)  {
> >>+		nbp_vlan_delete(p, pve->vid,
> >>+				(BRIDGE_VLAN_INFO_MASTER |
> >>+				 BRIDGE_VLAN_INFO_UNTAGGED));
> >>+	}
> >>  }
> >>
> >>  static void release_nbp(struct kobject *kobj)
> >>diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> >>index 9cf2879..1b302ce 100644
> >>--- a/net/bridge/br_netlink.c
> >>+++ b/net/bridge/br_netlink.c
> >>@@ -199,7 +199,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
> >>  			if (p)
> >>  				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
> >>  			else {
> >>-				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
> >>+				u16 flags = vinfo->flags |
> >>+					    BRIDGE_VLAN_INFO_MASTER;
> >>  				if (!br_vlan_add(br, vinfo->vid, flags))
> >>  					err = -ENOMEM;
> >>  			}
> >>@@ -210,7 +211,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
> >>  				err = nbp_vlan_delete(p, vinfo->vid,
> >>  						      vinfo->flags);
> >>  			else {
> >>-				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
> >>+				u16 flags = vinfo->flags |
> >>+					    BRIDGE_VLAN_INFO_MASTER;
> >>  				err = br_vlan_delete(br, vinfo->vid, flags);
> >>  			}
> >>  			break;
> >>diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> >>index cc75212..9328463 100644
> >>--- a/net/bridge/br_private.h
> >>+++ b/net/bridge/br_private.h
> >>@@ -179,6 +179,7 @@ struct net_bridge_port
> >>  	struct netpoll			*np;
> >>  #endif
> >>  	struct list_head		vlan_list;
> >>+	struct net_bridge_vlan __rcu	*untagged;
> >>  };
> >>
> >>  #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
> >>@@ -298,6 +299,7 @@ struct net_bridge
> >>  	struct timer_list		gc_timer;
> >>  	struct kobject			*ifobj;
> >>  	struct hlist_head		vlan_hlist[BR_VID_HASH_SIZE];
> >>+	struct net_bridge_vlan __rcu	*untagged;
> >>  };
> >>
> >>  struct br_input_skb_cb {
> >>--
> >>1.7.7.6

^ permalink raw reply

* Re: [RFC PATCH v3 2/2] tun: fix LSM/SELinux labeling of tun/tap devices
From: Michael S. Tsirkin @ 2012-12-18 23:08 UTC (permalink / raw)
  To: Paul Moore; +Cc: netdev, linux-security-module, selinux, jasowang
In-Reply-To: <20121218225351.16104.48513.stgit@localhost>

On Tue, Dec 18, 2012 at 05:53:52PM -0500, Paul Moore wrote:
> This patch corrects some problems with LSM/SELinux that were introduced
> with the multiqueue patchset.  The problem stems from the fact that the
> multiqueue work changed the relationship between the tun device and its
> associated socket; before the socket persisted for the life of the
> device, however after the multiqueue changes the socket only persisted
> for the life of the userspace connection (fd open).  For non-persistent
> devices this is not an issue, but for persistent devices this can cause
> the tun device to lose its SELinux label.
> 
> We correct this problem by adding an opaque LSM security blob to the
> tun device struct which allows us to have the LSM security state, e.g.
> SELinux labeling information, persist for the lifetime of the tun
> device.  In the process we tweak the LSM hooks to work with this new
> approach to TUN device/socket labeling and introduce a new LSM hook,
> security_tun_dev_attach_queue(), to approve requests to attach to a
> TUN queue via TUNSETQUEUE.
> 
> The SELinux code has been adjusted to match the new LSM hooks, the
> other LSMs do not make use of the LSM TUN controls.  This patch makes
> use of the recently added "tun_socket:attach_queue" permission to
> restrict access to the TUNSETQUEUE operation.  On older SELinux
> policies which do not define the "tun_socket:attach_queue" permission
> the access control decision for TUNSETQUEUE will be handled according
> to the SELinux policy's unknown permission setting.
> 
> Signed-off-by: Paul Moore <pmoore@redhat.com>


Looks good to me. A comment not directly related to this patch, below.

> ---
>  drivers/net/tun.c                 |   27 +++++++++++++----
>  include/linux/security.h          |   59 +++++++++++++++++++++++++++++--------
>  security/capability.c             |   24 +++++++++++++--
>  security/security.c               |   28 ++++++++++++++----
>  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
>  security/selinux/include/objsec.h |    4 +++
>  6 files changed, 154 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 504f7f1..4b7754c 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -186,6 +186,7 @@ struct tun_struct {
>  	unsigned long ageing_time;
>  	unsigned int numdisabled;
>  	struct list_head disabled;
> +	void *security;
>  };
>  
>  static inline u32 tun_hashfn(u32 rxhash)
> @@ -496,6 +497,10 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
>  	struct tun_file *tfile = file->private_data;
>  	int err;
>  
> +	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
> +	if (err < 0)
> +		goto out;
> +
>  	err = -EINVAL;
>  	if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
>  		goto out;
> @@ -1389,6 +1394,7 @@ static void tun_free_netdev(struct net_device *dev)
>  
>  	BUG_ON(!(list_empty(&tun->disabled)));
>  	tun_flow_uninit(tun);
> +	security_tun_dev_free_security(tun->security);
>  	free_netdev(dev);
>  }
>  
> @@ -1575,7 +1581,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		if (tun_not_capable(tun))
>  			return -EPERM;
> -		err = security_tun_dev_attach(tfile->socket.sk);
> +		err = security_tun_dev_open(tun->security);
>  		if (err < 0)
>  			return err;
>  
> @@ -1632,7 +1638,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		spin_lock_init(&tun->lock);
>  
> -		security_tun_dev_post_create(&tfile->sk);
> +		err = security_tun_dev_alloc_security(&tun->security);
> +		if (err < 0)
> +			goto err_free_dev;
>  
>  		tun_net_init(dev);
>  
> @@ -1805,12 +1813,18 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
>  
>  	if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
>  		tun = tfile->detached;
> -		if (!tun)
> +		if (!tun) {
>  			ret = -EINVAL;
> -		else if (tun_not_capable(tun))
> +			goto unlock;
> +		}
> +		if (tun_not_capable(tun)) {

By the way I don't think we need to limit set_queue
by tun_not_capable. It simply makes a queue active/inactive
which seems harmless. Jason?

>  			ret = -EPERM;
> -		else
> -			ret = tun_attach(tun, file);
> +			goto unlock;
> +		}
> +		ret = security_tun_dev_attach_queue(tun->security);
> +		if (ret < 0)
> +			goto unlock;
> +		ret = tun_attach(tun, file);
>  	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
>  		tun = rcu_dereference_protected(tfile->tun,
>  						lockdep_rtnl_is_held());
> @@ -1821,6 +1835,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
>  	} else
>  		ret = -EINVAL;
>  
> +unlock:
>  	rtnl_unlock();
>  	return ret;
>  }
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 05e88bd..e09a87b 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -983,17 +983,29 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
>   *	tells the LSM to decrement the number of secmark labeling rules loaded
>   * @req_classify_flow:
>   *	Sets the flow's sid to the openreq sid.
> + * @tun_dev_alloc_security:
> + *	This hook allows a module to allocate a security structure for a TUN
> + *	device.
> + *	@security pointer to a security structure pointer.
> + *	Returns a zero on success, negative values on failure.
> + * @tun_dev_free_security:
> + *	This hook allows a module to free the security structure for a TUN
> + *	device.
> + *	@security pointer to the TUN device's security structure
>   * @tun_dev_create:
>   *	Check permissions prior to creating a new TUN device.
> - * @tun_dev_post_create:
> - *	This hook allows a module to update or allocate a per-socket security
> - *	structure.
> - *	@sk contains the newly created sock structure.
> + * @tun_dev_attach_queue:
> + *	Check permissions prior to attaching to a TUN device queue.
> + *	@security pointer to the TUN device's security structure.
>   * @tun_dev_attach:
> - *	Check permissions prior to attaching to a persistent TUN device.  This
> - *	hook can also be used by the module to update any security state
> + *	This hook can be used by the module to update any security state
>   *	associated with the TUN device's sock structure.
>   *	@sk contains the existing sock structure.
> + *	@security pointer to the TUN device's security structure.
> + * @tun_dev_open:
> + *	This hook can be used by the module to update any security state
> + *	associated with the TUN device's security structure.
> + *	@security pointer to the TUN devices's security structure.
>   *
>   * Security hooks for XFRM operations.
>   *
> @@ -1613,9 +1625,12 @@ struct security_operations {
>  	void (*secmark_refcount_inc) (void);
>  	void (*secmark_refcount_dec) (void);
>  	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
> -	int (*tun_dev_create)(void);
> -	void (*tun_dev_post_create)(struct sock *sk);
> -	int (*tun_dev_attach)(struct sock *sk);
> +	int (*tun_dev_alloc_security) (void **security);
> +	void (*tun_dev_free_security) (void *security);
> +	int (*tun_dev_create) (void);
> +	int (*tun_dev_attach_queue) (void *security);
> +	int (*tun_dev_attach) (struct sock *sk, void *security);
> +	int (*tun_dev_open) (void *security);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> @@ -2553,9 +2568,12 @@ void security_inet_conn_established(struct sock *sk,
>  int security_secmark_relabel_packet(u32 secid);
>  void security_secmark_refcount_inc(void);
>  void security_secmark_refcount_dec(void);
> +int security_tun_dev_alloc_security(void **security);
> +void security_tun_dev_free_security(void *security);
>  int security_tun_dev_create(void);
> -void security_tun_dev_post_create(struct sock *sk);
> -int security_tun_dev_attach(struct sock *sk);
> +int security_tun_dev_attach_queue(void *security);
> +int security_tun_dev_attach(struct sock *sk, void *security);
> +int security_tun_dev_open(void *security);
>  
>  #else	/* CONFIG_SECURITY_NETWORK */
>  static inline int security_unix_stream_connect(struct sock *sock,
> @@ -2720,16 +2738,31 @@ static inline void security_secmark_refcount_dec(void)
>  {
>  }
>  
> +static inline int security_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static inline void security_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static inline int security_tun_dev_create(void)
>  {
>  	return 0;
>  }
>  
> -static inline void security_tun_dev_post_create(struct sock *sk)
> +static inline int security_tun_dev_attach_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static inline int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
>  
> -static inline int security_tun_dev_attach(struct sock *sk)
> +static inline int security_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> diff --git a/security/capability.c b/security/capability.c
> index b14a30c..76c1dc9 100644
> --- a/security/capability.c
> +++ b/security/capability.c
> @@ -704,16 +704,31 @@ static void cap_req_classify_flow(const struct request_sock *req,
>  {
>  }
>  
> +static int cap_tun_dev_alloc_security(void **security)
> +{
> +	return 0;
> +}
> +
> +static void cap_tun_dev_free_security(void *security)
> +{
> +}
> +
>  static int cap_tun_dev_create(void)
>  {
>  	return 0;
>  }
>  
> -static void cap_tun_dev_post_create(struct sock *sk)
> +static int cap_tun_dev_attach_queue(void *security)
> +{
> +	return 0;
> +}
> +
> +static int cap_tun_dev_attach(struct sock *sk, void *security)
>  {
> +	return 0;
>  }
>  
> -static int cap_tun_dev_attach(struct sock *sk)
> +static int cap_tun_dev_open(void *security)
>  {
>  	return 0;
>  }
> @@ -1044,8 +1059,11 @@ void __init security_fixup_ops(struct security_operations *ops)
>  	set_to_cap_if_null(ops, secmark_refcount_inc);
>  	set_to_cap_if_null(ops, secmark_refcount_dec);
>  	set_to_cap_if_null(ops, req_classify_flow);
> +	set_to_cap_if_null(ops, tun_dev_alloc_security);
> +	set_to_cap_if_null(ops, tun_dev_free_security);
>  	set_to_cap_if_null(ops, tun_dev_create);
> -	set_to_cap_if_null(ops, tun_dev_post_create);
> +	set_to_cap_if_null(ops, tun_dev_open);
> +	set_to_cap_if_null(ops, tun_dev_attach_queue);
>  	set_to_cap_if_null(ops, tun_dev_attach);
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/security.c b/security/security.c
> index 8dcd4ae..a271ed4 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -1244,24 +1244,42 @@ void security_secmark_refcount_dec(void)
>  }
>  EXPORT_SYMBOL(security_secmark_refcount_dec);
>  
> +int security_tun_dev_alloc_security(void **security)
> +{
> +	return security_ops->tun_dev_alloc_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_alloc_security);
> +
> +void security_tun_dev_free_security(void *security)
> +{
> +	security_ops->tun_dev_free_security(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_free_security);
> +
>  int security_tun_dev_create(void)
>  {
>  	return security_ops->tun_dev_create();
>  }
>  EXPORT_SYMBOL(security_tun_dev_create);
>  
> -void security_tun_dev_post_create(struct sock *sk)
> +int security_tun_dev_attach_queue(void *security)
>  {
> -	return security_ops->tun_dev_post_create(sk);
> +	return security_ops->tun_dev_attach_queue(security);
>  }
> -EXPORT_SYMBOL(security_tun_dev_post_create);
> +EXPORT_SYMBOL(security_tun_dev_attach_queue);
>  
> -int security_tun_dev_attach(struct sock *sk)
> +int security_tun_dev_attach(struct sock *sk, void *security)
>  {
> -	return security_ops->tun_dev_attach(sk);
> +	return security_ops->tun_dev_attach(sk, security);
>  }
>  EXPORT_SYMBOL(security_tun_dev_attach);
>  
> +int security_tun_dev_open(void *security)
> +{
> +	return security_ops->tun_dev_open(security);
> +}
> +EXPORT_SYMBOL(security_tun_dev_open);
> +
>  #endif	/* CONFIG_SECURITY_NETWORK */
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 61a5336..ef26e96 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct request_sock *req,
>  	fl->flowi_secid = req->secid;
>  }
>  
> +static int selinux_tun_dev_alloc_security(void **security)
> +{
> +	struct tun_security_struct *tunsec;
> +
> +	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
> +	if (!tunsec)
> +		return -ENOMEM;
> +	tunsec->sid = current_sid();
> +
> +	*security = tunsec;
> +	return 0;
> +}
> +
> +static void selinux_tun_dev_free_security(void *security)
> +{
> +	kfree(security);
> +}
> +
>  static int selinux_tun_dev_create(void)
>  {
>  	u32 sid = current_sid();
> @@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
>  			    NULL);
>  }
>  
> -static void selinux_tun_dev_post_create(struct sock *sk)
> +static int selinux_tun_dev_attach_queue(void *security)
>  {
> +	struct tun_security_struct *tunsec = security;
> +
> +	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
> +			    TUN_SOCKET__ATTACH_QUEUE, NULL);
> +}
> +
> +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> +{
> +	struct tun_security_struct *tunsec = security;
>  	struct sk_security_struct *sksec = sk->sk_security;
>  
>  	/* we don't currently perform any NetLabel based labeling here and it
> @@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct sock *sk)
>  	 * cause confusion to the TUN user that had no idea network labeling
>  	 * protocols were being used */
>  
> -	/* see the comments in selinux_tun_dev_create() about why we don't use
> -	 * the sockcreate SID here */
> -
> -	sksec->sid = current_sid();
> +	sksec->sid = tunsec->sid;
>  	sksec->sclass = SECCLASS_TUN_SOCKET;
> +
> +	return 0;
>  }
>  
> -static int selinux_tun_dev_attach(struct sock *sk)
> +static int selinux_tun_dev_open(void *security)
>  {
> -	struct sk_security_struct *sksec = sk->sk_security;
> +	struct tun_security_struct *tunsec = security;
>  	u32 sid = current_sid();
>  	int err;
>  
> -	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
>  			   TUN_SOCKET__RELABELFROM, NULL);
>  	if (err)
>  		return err;
> @@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
>  			   TUN_SOCKET__RELABELTO, NULL);
>  	if (err)
>  		return err;
> -
> -	sksec->sid = sid;
> +	tunsec->sid = sid;
>  
>  	return 0;
>  }
> @@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
>  	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
>  	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
>  	.req_classify_flow =		selinux_req_classify_flow,
> +	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
> +	.tun_dev_free_security =	selinux_tun_dev_free_security,
>  	.tun_dev_create =		selinux_tun_dev_create,
> -	.tun_dev_post_create = 		selinux_tun_dev_post_create,
> +	.tun_dev_attach_queue =		selinux_tun_dev_attach_queue,
>  	.tun_dev_attach =		selinux_tun_dev_attach,
> +	.tun_dev_open =			selinux_tun_dev_open,
>  
>  #ifdef CONFIG_SECURITY_NETWORK_XFRM
>  	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
> diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
> index 26c7eee..aa47bca 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -110,6 +110,10 @@ struct sk_security_struct {
>  	u16 sclass;			/* sock security class */
>  };
>  
> +struct tun_security_struct {
> +	u32 sid;			/* SID for the tun device sockets */
> +};
> +
>  struct key_security_struct {
>  	u32 sid;	/* SID of key */
>  };

^ permalink raw reply

* Re: [PATCH net-next] xfrm: removes a superfluous check and add a statistic
From: David Miller @ 2012-12-18 23:38 UTC (permalink / raw)
  To: roy.qing.li; +Cc: netdev
In-Reply-To: <1355819949-30429-1-git-send-email-roy.qing.li@gmail.com>

From: roy.qing.li@gmail.com
Date: Tue, 18 Dec 2012 16:39:09 +0800

> From: Li RongQing <roy.qing.li@gmail.com>
> 
> Remove the check if x->km.state equal to XFRM_STATE_VALID in
> xfrm_state_check_expire(), which will be done before call
> xfrm_state_check_expire().
> 
> add a LINUX_MIB_XFRMOUTSTATEINVALID statistic to record the
> outbound error due to invalid xfrm state.
> 
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>

Please add new statistic SNMP mib numbers to the end of the
enumeration, rather than in the middle.

Thanks.

^ permalink raw reply

* Re: [PATCH 1/2] be2net: fix be_close() to ensure all events are ack'ed
From: David Miller @ 2012-12-19  0:19 UTC (permalink / raw)
  To: sathya.perla; +Cc: netdev
In-Reply-To: <f1923deb-9df8-4c1e-97be-944fd119e2ce@CMEXHTCAS2.ad.emulex.com>

From: Sathya Perla <sathya.perla@emulex.com>
Date: Tue, 18 Dec 2012 11:08:50 +0530

> In be_close(), be_eq_clean() must be called after all RX/TX/MCC queues
> have been cleaned to ensure that any events caused while cleaning up
> completions are notified/acked. Not clearing all events can cause
> upredictable behaviour when RX rings are re-created in the subsequent
> be_open().
> 
> Signed-off-by: Sathya Perla <sathya.perla@emulex.com>

Applied.

^ permalink raw reply

* Re: [PATCH 2/2] be2net: fix wrong frag_idx reported by RX CQ
From: David Miller @ 2012-12-19  0:19 UTC (permalink / raw)
  To: sathya.perla; +Cc: netdev
In-Reply-To: <42b4ddf1-1024-4762-b86c-66c033afc219@CMEXHTCAS2.ad.emulex.com>

From: Sathya Perla <sathya.perla@emulex.com>
Date: Tue, 18 Dec 2012 11:08:51 +0530

> The RX CQ can report completions with invalid frag_idx when the RXQ that
> was *previously* using it, was not cleaned up properly. This hits
> a BUG_ON() in be2net.
> 
> When completion coalescing is enabled on a CQ, an explicit CQ-notify
> (with rearm) is needed for each compl, to flush partially coalesced CQ
> entries that are pending DMA.
> 
> In be_close(), this fix now notifies CQ for each compl, waits explicitly
> for the flush compl to arrive and complains if it doesn't arrive.
> 
> Also renaming be_crit_error() to be_hw_error() as it's the more
> appropriate name and to convey that we don't wait for the flush compl
> only when a HW error has occurred.
> 
> Signed-off-by: Sathya Perla <sathya.perla@emulex.com>

Applied.

^ permalink raw reply

* Re: [GIT PULL net-next 01/17] ndisc: Fix size calculation for headers.
From: David Miller @ 2012-12-19  0:23 UTC (permalink / raw)
  To: yoshfuji; +Cc: netdev
In-Reply-To: <50D04AD4.8010908@linux-ipv6.org>

From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 18 Dec 2012 19:52:04 +0900

> We used to allocate MAX_HEADER bytes more than needed but
> reserved hlen only (not MAX_HEADER + hlen) and the MAX_HEADER
> was left behind.
> 
> Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>

This change is wrong.

The MAX_HEADER is being used in order to accomodate any
possible encapsulation that may occur.

I'm really disappointed that the very first patch in this series is
buggy, and you didn't even bother to repost the absolutely required
"00/17" email for this series which is where you're supposed to give a
top-level overview of your changes as well as any GIT pull request.

^ permalink raw reply

* Re: [GIT PULL net-next 01/17] ndisc: Fix size calculation for headers.
From: David Miller @ 2012-12-19  0:24 UTC (permalink / raw)
  To: yoshfuji; +Cc: netdev
In-Reply-To: <20121218.162338.1594192050394527214.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Tue, 18 Dec 2012 16:23:38 -0800 (PST)

> I'm really disappointed that the very first patch in this series is
> buggy, and you didn't even bother to repost the absolutely required
> "00/17" email for this series which is where you're supposed to give a
> top-level overview of your changes as well as any GIT pull request.

Also, right now the net-next tree is not open, so these changes
aren't even appropriate to be submitting right now.

^ permalink raw reply

* Re: [PATCH 0/2] smsc95xx enhancements
From: David Miller @ 2012-12-19  0:25 UTC (permalink / raw)
  To: steve.glendinning; +Cc: netdev, ming.lei, oneukum, gregkh
In-Reply-To: <1355827574-15164-1-git-send-email-steve.glendinning@shawell.net>

From: Steve Glendinning <steve.glendinning@shawell.net>
Date: Tue, 18 Dec 2012 10:46:12 +0000

> Two driver enhancements for net-next.  There's ongoing discussion around
> the best way to improve autosuspend moving forwards but in the meantime
> the second patch in this set enables autosuspend for supported parts in
> most situations.

Please do not submit net-next changes until the net-next tree is
actually open and available for submissions.

^ permalink raw reply

* Re: [PATCH] net: fec: forbid FEC_PTP on SoCs that do not support
From: David Miller @ 2012-12-19  0:26 UTC (permalink / raw)
  To: shawn.guo; +Cc: netdev, s.hauer, richardcochran
In-Reply-To: <1355836004-22067-1-git-send-email-shawn.guo@linaro.org>

From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 18 Dec 2012 21:06:44 +0800

> Beside imx6q, the kernel built from imx_v6_v7_defconfig is also
> supposed to be running on other IMX SoCs that do not have the PTP
> block.  Before fec driver gets fixed to run-time detect target hardware
> rather than conditional compiling with #ifdef CONFIG_FEC_PTP, let's
> give it a quick fix in Kconfig to forbid FEC_PTP on those IMX SoCs that
> do not support PTP.
> 
> Reported-by: Sascha Hauer <s.hauer@pengutronix.de>
> Signed-off-by: Shawn Guo <shawn.guo@linaro.org>

This uglyness is exactly why this needs to be detected at run
time.

Anyways, applied.

^ permalink raw reply

* Re: [PATCH V2 09/12] bridge: Add the ability to configure untagged vlans
From: Vlad Yasevich @ 2012-12-19  1:06 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, shemminger, davem, or.gerlitz, jhs
In-Reply-To: <20121218230407.GB1135@redhat.com>

On 12/18/2012 06:04 PM, Michael S. Tsirkin wrote:
> On Tue, Dec 18, 2012 at 02:01:00PM -0500, Vlad Yasevich wrote:
>> A user may designate a certain vlan as untagged.  This means that
>> any ingress frame is assigned to this vlan and any forwarding decisions
>> are made with this vlan in mind.  On egress, any frames tagged/labeled
>> with untagged vlan have the vlan tag removed and are send as regular
>> ethernet frames.
>>
>> Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
>> ---
>>   include/uapi/linux/if_bridge.h |    3 +
>>   net/bridge/br_if.c             |  146 +++++++++++++++++++++++++++++++++++++---
>>   net/bridge/br_netlink.c        |    6 +-
>>   net/bridge/br_private.h        |    2 +
>>   4 files changed, 144 insertions(+), 13 deletions(-)
>>
>> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
>> index d0b4f5c..988d858 100644
>> --- a/include/uapi/linux/if_bridge.h
>> +++ b/include/uapi/linux/if_bridge.h
>> @@ -127,6 +127,9 @@ enum {
>>   	BR_VLAN_DEL,
>>   };
>>
>> +#define BRIDGE_VLAN_INFO_MASTER		1
>> +#define BRIDGE_VLAN_INFO_UNTAGGED	2
>> +
>>   struct bridge_vlan_info {
>>   	u16 op_code;
>>   	u16 flags;
>> diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
>> index 57bbb35..14563fb 100644
>> --- a/net/bridge/br_if.c
>> +++ b/net/bridge/br_if.c
>> @@ -108,6 +108,34 @@ static void br_vlan_put(struct net_bridge_vlan *vlan)
>>   		br_vlan_destroy(vlan);
>>   }
>>
>> +/* Must be protected by RTNL */
>> +static void br_vlan_add_untagged(struct net_bridge *br,
>> +				 struct net_bridge_vlan *vlan)
>> +{
>> +	ASSERT_RTNL();
>> +	if (br->untagged == vlan)
>> +		return;
>> +	else if (br->untagged) {
>> +		/* Untagged vlan is already set on the master,
>> +		 * so drop the ref since we'll be replacing it.
>> +		 */
>> +		br_vlan_put(br->untagged);
>> +	}
>> +	br_vlan_hold(vlan);
>> +	rcu_assign_pointer(br->untagged, vlan);
>> +}
>> +
>> +/* Must be protected by RTNL */
>> +static void br_vlan_del_untagged(struct net_bridge *br,
>> +				 struct net_bridge_vlan *vlan)
>> +{
>> +	ASSERT_RTNL();
>> +	if (br->untagged == vlan) {
>> +		br_vlan_put(vlan);
>> +		rcu_assign_pointer(br->untagged, NULL);
>> +	}
>> +}
>> +
>>   struct net_bridge_vlan *br_vlan_find(struct net_bridge *br, u16 vid)
>>   {
>>   	struct net_bridge_vlan *vlan;
>> @@ -132,7 +160,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>>
>>   	vlan = br_vlan_find(br, vid);
>>   	if (vlan)
>> -		return vlan;
>> +		goto untagged;
>>
>>   	vlan = kzalloc(sizeof(struct net_bridge_vlan), GFP_KERNEL);
>>   	if (!vlan)
>> @@ -141,7 +169,7 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>>   	vlan->vid = vid;
>>   	atomic_set(&vlan->refcnt, 1);
>>
>> -	if (flags & BRIDGE_FLAGS_SELF) {
>> +	if (flags & BRIDGE_VLAN_INFO_MASTER) {
>>   		/* Set bit 0 that is associated with the bridge master
>>   		 * device.  Port numbers start with 1.
>>   		 */
>> @@ -149,15 +177,24 @@ struct net_bridge_vlan *br_vlan_add(struct net_bridge *br, u16 vid,
>>   	}
>>
>>   	hlist_add_head_rcu(&vlan->hlist, &br->vlan_hlist[br_vlan_hash(vid)]);
>> +
>> +untagged:
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
>> +		br_vlan_add_untagged(br, vlan);
>> +
>>   	return vlan;
>>   }
>>
>>   /* Must be protected by RTNL */
>> -static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
>> +static void br_vlan_del(struct net_bridge *br, struct net_bridge_vlan *vlan,
>> +			u16 flags)
>>   {
>>   	ASSERT_RTNL();
>>
>> -	if (flags & BRIDGE_FLAGS_SELF) {
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
>> +		br_vlan_del_untagged(br, vlan);
>> +
>> +	if (flags & BRIDGE_VLAN_INFO_MASTER) {
>>   		/* Clear bit 0 that is associated with the bridge master
>>   		 * device.
>>   		 */
>> @@ -172,6 +209,14 @@ static void br_vlan_del(struct net_bridge_vlan *vlan, u16 flags)
>>
>>   	vlan->vid = BR_INVALID_VID;
>>
>> +	/* If, for whatever reason, bridge still has a ref on this vlan
>> +	 * through the @untagged pointer, drop that ref and clear untagged.
>> +	 */
>> +	if (br->untagged == vlan) {
>> +		br_vlan_put(vlan);
>> +		rcu_assign_pointer(br->untagged, NULL);
>
> Is something doing an rcu sync after this point?
> If yes maybe add a comment saying where it is, if not - some rcu read
> side could still be using it through this pointer.

yes, at this point, all references from the ports have been dropped and
we are trying to destroy the element.  The put below will trigger the
destruction path which will do kfree_rcu().  Thus there is a grace
period...

-vlad

>
>> +	}
>> +
>>   	/* Drop the self-ref to trigger descrution. */
>>   	br_vlan_put(vlan);
>>   }
>> @@ -187,7 +232,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid, u16 flags)
>>   	if (!vlan)
>>   		return -ENOENT;
>>
>> -	br_vlan_del(vlan, flags);
>> +	br_vlan_del(br, vlan, flags);
>>   	return 0;
>>   }
>>
>> @@ -204,7 +249,9 @@ static void br_vlan_flush(struct net_bridge *br)
>>   	for (i = 0; i < BR_VID_HASH_SIZE; i++) {
>>   		hlist_for_each_entry_safe(vlan, node, tmp,
>>   					  &br->vlan_hlist[i], hlist) {
>> -			br_vlan_del(vlan, BRIDGE_FLAGS_SELF);
>> +			br_vlan_del(br, vlan,
>> +				    (BRIDGE_VLAN_INFO_MASTER |
>> +				     BRIDGE_VLAN_INFO_UNTAGGED));
>>   		}
>>   	}
>>   }
>> @@ -224,10 +271,70 @@ struct net_port_vlan *nbp_vlan_find(const struct net_bridge_port *p, u16 vid)
>>   	return NULL;
>>   }
>>
>> +static int nbp_vlan_add_untagged(struct net_bridge_port *p,
>> +			  struct net_bridge_vlan *vlan,
>> +			  u16 flags)
>> +{
>> +	struct net_device *dev = p->dev;
>> +
>> +	if (p->untagged) {
>> +		/* Port already has untagged vlan set.  Drop the ref
>> +		 * to the old one since we'll be replace it.
>> +		 */
>> +		br_vlan_put(p->untagged);
>> +	} else {
>> +		int err;
>> +
>> +		/* Add vid 0 to filter if filter is available. */
>> +		if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
>> +		    dev->netdev_ops->ndo_vlan_rx_add_vid &&
>> +		    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
>> +			err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
>> +			if (err)
>> +				return err;
>> +		}
>> +	}
>> +
>> +	/* This VLAN is handled as untagged/native. Save an
>> +	 * additional ref.
>> +	 */
>> +	br_vlan_hold(vlan);
>> +	rcu_assign_pointer(p->untagged, vlan);
>> +
>> +	return 0;
>> +}
>> +
>> +static void nbp_vlan_delete_untagged(struct net_bridge_port *p,
>> +				     struct net_bridge_vlan *vlan)
>> +{
>> +	if (p->untagged != vlan)
>> +		return;
>> +
>> +	/* Remove VLAN from the device filter if it is supported. */
>> +	if ((p->dev->features & NETIF_F_HW_VLAN_FILTER) &&
>> +	    p->dev->netdev_ops->ndo_vlan_rx_kill_vid) {
>> +		int err;
>> +
>> +		err = p->dev->netdev_ops->ndo_vlan_rx_kill_vid(p->dev, 0);
>> +		if (err) {
>> +			pr_warn("failed to kill vid %d for device %s\n",
>> +				vlan->vid, p->dev->name);
>> +		}
>> +	}
>> +
>> +	/* If this VLAN is currently functioning as untagged, clear it.
>> +	 * It's safe to drop the refcount, since the vlan is still held
>> +	 * by the port.
>> +	 */
>> +	br_vlan_put(vlan);
>> +	rcu_assign_pointer(p->untagged, NULL);
>> +
>> +}
>> +
>>   /* Must be protected by RTNL */
>>   int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
>>   {
>> -	struct net_port_vlan *pve;
>> +	struct net_port_vlan *pve = NULL;
>>   	struct net_bridge_vlan *vlan;
>>   	struct net_device *dev = p->dev;
>>   	int err;
>> @@ -275,11 +382,21 @@ int nbp_vlan_add(struct net_bridge_port *p, u16 vid, u16 flags)
>>   	set_bit(p->port_no, vlan->port_bitmap);
>>
>>   	list_add_tail_rcu(&pve->list, &p->vlan_list);
>> +
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
>> +		err = nbp_vlan_add_untagged(p, vlan, flags);
>> +		if (err)
>> +			goto del_vlan;
>> +	}
>> +
>>   	return 0;
>>
>>   clean_up:
>>   	kfree(pve);
>> -	br_vlan_del(vlan, flags);
>> +	br_vlan_del(p->br, vlan, flags);
>> +	return err;
>> +del_vlan:
>> +	nbp_vlan_delete(p, vid, flags);
>>   	return err;
>>   }
>>
>> @@ -296,6 +413,9 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>>   	if (!pve)
>>   		return -ENOENT;
>>
>> +	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
>> +		nbp_vlan_delete_untagged(p, pve->vlan);
>> +
>>   	/* Remove VLAN from the device filter if it is supported. */
>>   	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
>>   	    dev->netdev_ops->ndo_vlan_rx_kill_vid) {
>> @@ -306,6 +426,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>>   			pr_warn("failed to kill vid %d for device %s\n",
>>   				vid, dev->name);
>>   	}
>> +
>>   	pve->vid = BR_INVALID_VID;
>>
>>   	vlan = pve->vlan;
>> @@ -316,7 +437,7 @@ int nbp_vlan_delete(struct net_bridge_port *p, u16 vid, u16 flags)
>>   	list_del_rcu(&pve->list);
>>   	kfree_rcu(pve, rcu);
>>
>> -	br_vlan_del(vlan, flags);
>> +	br_vlan_del(p->br, vlan, flags);
>>
>>   	return 0;
>>   }
>> @@ -328,8 +449,11 @@ static void nbp_vlan_flush(struct net_bridge_port *p)
>>
>>   	ASSERT_RTNL();
>>
>> -	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)
>> -		nbp_vlan_delete(p, pve->vid, BRIDGE_FLAGS_SELF);
>> +	list_for_each_entry_safe(pve, tmp, &p->vlan_list, list)  {
>> +		nbp_vlan_delete(p, pve->vid,
>> +				(BRIDGE_VLAN_INFO_MASTER |
>> +				 BRIDGE_VLAN_INFO_UNTAGGED));
>> +	}
>>   }
>>
>>   static void release_nbp(struct kobject *kobj)
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index 9cf2879..1b302ce 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -199,7 +199,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
>>   			if (p)
>>   				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>>   			else {
>> -				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
>> +				u16 flags = vinfo->flags |
>> +					    BRIDGE_VLAN_INFO_MASTER;
>>   				if (!br_vlan_add(br, vinfo->vid, flags))
>>   					err = -ENOMEM;
>>   			}
>> @@ -210,7 +211,8 @@ static int br_afspec(struct net_bridge *br, struct net_bridge_port *p,
>>   				err = nbp_vlan_delete(p, vinfo->vid,
>>   						      vinfo->flags);
>>   			else {
>> -				u16 flags = vinfo->flags | BRIDGE_FLAGS_SELF;
>> +				u16 flags = vinfo->flags |
>> +					    BRIDGE_VLAN_INFO_MASTER;
>>   				err = br_vlan_delete(br, vinfo->vid, flags);
>>   			}
>>   			break;
>> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
>> index cc75212..9328463 100644
>> --- a/net/bridge/br_private.h
>> +++ b/net/bridge/br_private.h
>> @@ -179,6 +179,7 @@ struct net_bridge_port
>>   	struct netpoll			*np;
>>   #endif
>>   	struct list_head		vlan_list;
>> +	struct net_bridge_vlan __rcu	*untagged;
>>   };
>>
>>   #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
>> @@ -298,6 +299,7 @@ struct net_bridge
>>   	struct timer_list		gc_timer;
>>   	struct kobject			*ifobj;
>>   	struct hlist_head		vlan_hlist[BR_VID_HASH_SIZE];
>> +	struct net_bridge_vlan __rcu	*untagged;
>>   };
>>
>>   struct br_input_skb_cb {
>> --
>> 1.7.7.6
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Good day!!!
From: Mr Kale mills @ 2012-12-19  1:14 UTC (permalink / raw)




My client Mrs.Alicia Robinson,93 years old and a philanthropist has  
willed to you $5.5 million,Email:kale_mills@live.com(strictly with  
this email address)

^ permalink raw reply

* Re: [PATCHv3 2/2] smsc95xx: enable dynamic autosuspend
From: Ming Lei @ 2012-12-19  2:23 UTC (permalink / raw)
  To: Steve Glendinning; +Cc: netdev, oneukum, gregkh
In-Reply-To: <1355827574-15164-3-git-send-email-steve.glendinning@shawell.net>

On Tue, Dec 18, 2012 at 6:46 PM, Steve Glendinning
<steve.glendinning@shawell.net> wrote:
> This patch enables USB dynamic autosuspend for LAN9500A.  This
> saves very little power in itself, but it allows power saving

Putting device into suspend1 after link being off does save power, so
please update the commit log.

> in upstream hubs/hosts.
>
> The earlier devices in this family (LAN9500/9512/9514) do not
> support this feature.
>
> Signed-off-by: Steve Glendinning <steve.glendinning@shawell.net>
> ---
>  drivers/net/usb/smsc95xx.c |  151 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 150 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
> index 124e67f..6a74a68 100644
> --- a/drivers/net/usb/smsc95xx.c
> +++ b/drivers/net/usb/smsc95xx.c
> @@ -55,6 +55,13 @@
>  #define FEATURE_PHY_NLP_CROSSOVER      (0x02)
>  #define FEATURE_AUTOSUSPEND            (0x04)
>
> +#define SUSPEND_SUSPEND0               (0x01)
> +#define SUSPEND_SUSPEND1               (0x02)
> +#define SUSPEND_SUSPEND2               (0x04)
> +#define SUSPEND_SUSPEND3               (0x08)
> +#define SUSPEND_ALLMODES               (SUSPEND_SUSPEND0 | SUSPEND_SUSPEND1 | \
> +                                        SUSPEND_SUSPEND2 | SUSPEND_SUSPEND3)
> +
>  struct smsc95xx_priv {
>         u32 mac_cr;
>         u32 hash_hi;
> @@ -62,6 +69,7 @@ struct smsc95xx_priv {
>         u32 wolopts;
>         spinlock_t mac_cr_lock;
>         u8 features;
> +       u8 suspend_flags;
>  };
>
>  static bool turbo_mode = true;
> @@ -1242,6 +1250,8 @@ static int smsc95xx_enter_suspend0(struct usbnet *dev)
>         /* read back PM_CTRL */
>         ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
>
> +       pdata->suspend_flags |= SUSPEND_SUSPEND0;
> +
>         return ret;
>  }
>
> @@ -1286,11 +1296,14 @@ static int smsc95xx_enter_suspend1(struct usbnet *dev)
>
>         ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
>
> +       pdata->suspend_flags |= SUSPEND_SUSPEND1;
> +
>         return ret;
>  }
>
>  static int smsc95xx_enter_suspend2(struct usbnet *dev)
>  {
> +       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
>         u32 val;
>         int ret;
>
> @@ -1303,9 +1316,97 @@ static int smsc95xx_enter_suspend2(struct usbnet *dev)
>
>         ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
>
> +       pdata->suspend_flags |= SUSPEND_SUSPEND2;
> +
>         return ret;
>  }
>
> +static int smsc95xx_enter_suspend3(struct usbnet *dev)
> +{
> +       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
> +       u32 val;
> +       int ret;
> +
> +       ret = smsc95xx_read_reg_nopm(dev, RX_FIFO_INF, &val);
> +       if (ret < 0)
> +               return ret;
> +
> +       if (val & 0xFFFF) {
> +               netdev_info(dev->net, "rx fifo not empty in autosuspend\n");
> +               return -EBUSY;
> +       }
> +
> +       ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
> +       if (ret < 0)
> +               return ret;
> +
> +       val &= ~(PM_CTL_SUS_MODE_ | PM_CTL_WUPS_ | PM_CTL_PHY_RST_);
> +       val |= PM_CTL_SUS_MODE_3 | PM_CTL_RES_CLR_WKP_STS;
> +
> +       ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
> +       if (ret < 0)
> +               return ret;
> +
> +       /* clear wol status */
> +       val &= ~PM_CTL_WUPS_;
> +       val |= PM_CTL_WUPS_WOL_;
> +
> +       ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
> +       if (ret < 0)
> +               return ret;
> +
> +       pdata->suspend_flags |= SUSPEND_SUSPEND3;
> +
> +       return 0;
> +}
> +
> +static int smsc95xx_autosuspend(struct usbnet *dev, u32 link_up)
> +{
> +       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
> +       int ret;
> +
> +       if (!netif_running(dev->net)) {
> +               /* interface is ifconfig down so fully power down hw */
> +               netdev_dbg(dev->net, "autosuspend entering SUSPEND2\n");
> +               return smsc95xx_enter_suspend2(dev);
> +       }
> +
> +       if (!link_up) {
> +               /* link is down so enter EDPD mode, but only if device can
> +                * reliably resume from it.  This check should be redundant
> +                * as current FEATURE_AUTOSUSPEND parts also support
> +                * FEATURE_PHY_NLP_CROSSOVER but it's included for clarity */
> +               if (!(pdata->features & FEATURE_PHY_NLP_CROSSOVER)) {
> +                       netdev_warn(dev->net, "EDPD not supported\n");
> +                       return -EBUSY;
> +               }
> +
> +               netdev_dbg(dev->net, "autosuspend entering SUSPEND1\n");
> +
> +               /* enable PHY wakeup events for if cable is attached */
> +               ret = smsc95xx_enable_phy_wakeup_interrupts(dev,
> +                       PHY_INT_MASK_ANEG_COMP_);
> +               if (ret < 0) {
> +                       netdev_warn(dev->net, "error enabling PHY wakeup ints\n");
> +                       return ret;
> +               }
> +
> +               netdev_info(dev->net, "entering SUSPEND1 mode\n");
> +               return smsc95xx_enter_suspend1(dev);
> +       }
> +
> +       /* enable PHY wakeup events so we remote wakeup if cable is pulled */
> +       ret = smsc95xx_enable_phy_wakeup_interrupts(dev,
> +               PHY_INT_MASK_LINK_DOWN_);
> +       if (ret < 0) {
> +               netdev_warn(dev->net, "error enabling PHY wakeup ints\n");
> +               return ret;
> +       }
> +
> +       netdev_dbg(dev->net, "autosuspend entering SUSPEND3\n");
> +       return smsc95xx_enter_suspend3(dev);
> +}
> +
>  static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
>  {
>         struct usbnet *dev = usb_get_intfdata(intf);
> @@ -1313,15 +1414,35 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
>         u32 val, link_up;
>         int ret;
>
> +       /* TODO: don't indicate this feature to usb framework if
> +        * our current hardware doesn't have the capability
> +        */
> +       if ((message.event == PM_EVENT_AUTO_SUSPEND) &&
> +           (!(pdata->features & FEATURE_AUTOSUSPEND))) {
> +               netdev_warn(dev->net, "autosuspend not supported\n");
> +               return -EBUSY;
> +       }

The above is wrong, sorry for not mentioning it in the previous review.

Firstly, the flag FEATURE_AUTOSUSPEND is misleading, and it only
means that the device can't generate remote wakeup, and it shouldn't
mean that the device can't be put into auto-suspend. So a better name
might be FEATURE_REMOTE_WAKEUP.

Secondly, the device should and can be put into auto suspend when
the network interface is down, but the above change makes that
impossible.

> +
>         ret = usbnet_suspend(intf, message);
>         if (ret < 0) {
>                 netdev_warn(dev->net, "usbnet_suspend error\n");
>                 return ret;
>         }
>
> +       if (pdata->suspend_flags) {
> +               netdev_warn(dev->net, "error during last resume\n");
> +               pdata->suspend_flags = 0;
> +       }
> +
>         /* determine if link is up using only _nopm functions */
>         link_up = smsc95xx_link_ok_nopm(dev);
>
> +       if (message.event == PM_EVENT_AUTO_SUSPEND) {
> +               ret = smsc95xx_autosuspend(dev, link_up);
> +               goto done;
> +       }
> +
> +       /* if we get this far we're not autosuspending */
>         /* if no wol options set, or if link is down and we're not waking on
>          * PHY activity, enter lowest power SUSPEND2 mode
>          */
> @@ -1552,12 +1673,18 @@ static int smsc95xx_resume(struct usb_interface *intf)
>  {
>         struct usbnet *dev = usb_get_intfdata(intf);
>         struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
> +       u8 suspend_flags = pdata->suspend_flags;
>         int ret;
>         u32 val;
>
>         BUG_ON(!dev);
>
> -       if (pdata->wolopts) {
> +       netdev_dbg(dev->net, "resume suspend_flags=0x%02x\n", suspend_flags);
> +
> +       /* do this first to ensure it's cleared even in error case */
> +       pdata->suspend_flags = 0;
> +
> +       if (suspend_flags & SUSPEND_ALLMODES) {
>                 /* clear wake-up sources */
>                 ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
>                 if (ret < 0)
> @@ -1741,6 +1868,26 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
>         return skb;
>  }
>
> +static int smsc95xx_manage_power(struct usbnet *dev, int on)
> +{
> +       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
> +
> +       dev->intf->needs_remote_wakeup = on;
> +
> +       if (pdata->features & FEATURE_AUTOSUSPEND)
> +               return 0;
> +
> +       /* this chip revision doesn't support autosuspend */
> +       netdev_info(dev->net, "hardware doesn't support USB autosuspend\n");

Both the comment and the log are misleading.

> +
> +       if (on)
> +               usb_autopm_get_interface_no_resume(dev->intf);
> +       else
> +               usb_autopm_put_interface(dev->intf);

As mentioned previously, playing the get/put trick is not good, why
can't we set .manage_power as null for these devices?

> +       return 0;
> +}
> +
>  static const struct driver_info smsc95xx_info = {
>         .description    = "smsc95xx USB 2.0 Ethernet",
>         .bind           = smsc95xx_bind,
> @@ -1750,6 +1897,7 @@ static const struct driver_info smsc95xx_info = {
>         .rx_fixup       = smsc95xx_rx_fixup,
>         .tx_fixup       = smsc95xx_tx_fixup,
>         .status         = smsc95xx_status,
> +       .manage_power   = smsc95xx_manage_power,
>         .flags          = FLAG_ETHER | FLAG_SEND_ZLP | FLAG_LINK_INTR,
>  };
>
> @@ -1857,6 +2005,7 @@ static struct usb_driver smsc95xx_driver = {
>         .reset_resume   = smsc95xx_resume,
>         .disconnect     = usbnet_disconnect,
>         .disable_hub_initiated_lpm = 1,
> +       .supports_autosuspend = 1,
>  };
>
>  module_usb_driver(smsc95xx_driver);
> --
> 1.7.10.4
>

Thanks,
--
Ming Lei

^ permalink raw reply

* Re: [GIT PULL net-next 01/17] ndisc: Fix size calculation for headers.
From: YOSHIFUJI Hideaki @ 2012-12-19  3:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20121218.162430.512853714678597525.davem@davemloft.net>

David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Tue, 18 Dec 2012 16:23:38 -0800 (PST)
> 
>> I'm really disappointed that the very first patch in this series is
>> buggy, and you didn't even bother to repost the absolutely required
>> "00/17" email for this series which is where you're supposed to give a
>> top-level overview of your changes as well as any GIT pull request.
> 
> Also, right now the net-next tree is not open, so these changes
> aren't even appropriate to be submitting right now.

Sorry about this, and I'll repost later.

--yoshfuji

^ permalink raw reply

* Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
From: Joe Jin @ 2012-12-19  3:04 UTC (permalink / raw)
  To: Fujinaka, Todd
  Cc: Ethan Zhao, Ben Hutchings, Mary Mcgrath, netdev@vger.kernel.org,
	e1000-devel@lists.sf.net, linux-kernel@vger.kernel.org, linux-pci
In-Reply-To: <9B4A1B1917080E46B64F07F2989DADD638C60A9B@ORSMSX102.amr.corp.intel.com>

Hi all,

I backported mps commits and ask customer pass "pci=pcie_bus_peer2pee" to kernel
to limited MPS to 128 and issue disappeared, sound like this is a BIOS bug.

Thanks all of your help.

Best Regards,
Joe

On 11/29/12 23:52, Fujinaka, Todd wrote:
> Someone else pointed this out to me locally. If you have a non-client BIOS, you should be able to set the MaxPayloadSize using setpci. You have to make sure that you're being consistent throughout all the associated links.
> 
> Todd Fujinaka
> Technical Marketing Engineer
> LAN Access Division (LAD)
> Intel Corporation
> todd.fujinaka@intel.com
> (503) 712-4565
> 
> 
> -----Original Message-----
> From: Ethan Zhao [mailto:ethan.kernel@gmail.com] 
> Sent: Wednesday, November 28, 2012 7:10 PM
> To: Fujinaka, Todd
> Cc: Joe Jin; Ben Hutchings; Mary Mcgrath; netdev@vger.kernel.org; e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
> 
> Joe,
>     Possibly your customer is running a kernel without source code on a platform whose vendor wouldn't like to fix BIOS issue( Is that a HP/Dell server ?).
>     Anyway, to see if is a payload issue or,  you could change the payload size with setpci tool to those devices and set the link retrain bit to trigger the link retraining to debug the issue and identity the root cause.  I thinks it is much easier than modify the BIOS or  eeprom of NIC.
> 
>     e.g.
>    set device control register to 0f 00   (128 bytes payload size)
>    #   setpci -v -s 00:02.0 98.w=000f
>    set device link control register to 60h (retrain the link)
>    #  setpci -v -s 00:02.0 a0.b=60
> 
>   Hope it works,  Just my 2 cents.
> 
> Ethan.zhao@oracle.com
> 
> On Wed, Nov 28, 2012 at 11:53 PM, Fujinaka, Todd <todd.fujinaka@intel.com> wrote:
>> The only EEPROM I know about or can speak to is the one attached to the 82571 and it doesn't set the MaxPayloadSize. That's done by the BIOS.
>>
>> Todd Fujinaka
>> Technical Marketing Engineer
>> LAN Access Division (LAD)
>> Intel Corporation
>> todd.fujinaka@intel.com
>> (503) 712-4565
>>
>>
>> -----Original Message-----
>> From: Joe Jin [mailto:joe.jin@oracle.com]
>> Sent: Wednesday, November 28, 2012 12:31 AM
>> To: Ben Hutchings
>> Cc: Fujinaka, Todd; Mary Mcgrath; netdev@vger.kernel.org; 
>> e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
>> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
>>
>> On 11/28/12 02:10, Ben Hutchings wrote:
>>> On Tue, 2012-11-27 at 17:32 +0000, Fujinaka, Todd wrote:
>>>> Forgive me if I'm being too repetitious as I think some of this has 
>>>> been mentioned in the past.
>>>>
>>>> We (and by we I mean the Ethernet part and driver) can only change 
>>>> the advertised availability of a larger MaxPayloadSize. The size is 
>>>> negotiated by both sides of the link when the link is established.
>>>> The driver should not change the size of the link as it would be 
>>>> poking at registers outside of its scope and is controlled by the 
>>>> upstream bridge (not us).
>>> [...]
>>>
>>> MaxPayloadSize (MPS) is not negotiated between devices but is 
>>> programmed by the system firmware (at least for devices present at 
>>> boot - the kernel may be responsible in case of hotplug).  You can 
>>> use the kernel parameter 'pci=pcie_bus_perf' (or one of several 
>>> others) to set a policy that overrides this, but no policy will allow 
>>> setting MPS above the device's MaxPayloadSizeSupported (MPSS).
>>>
>>
>> Ben,
>>
>> Unfortunately I'm using 3.0.x kernel and this is not included in the kernel.
>> So I'm trying to use ethtool modify it from eeprom to see if help or no.
>>
>>
>> Todd, I'll review all MaxPayload for all devices, but need to say if it mismatch, customer could not modify it from BIOS for there was not entry at there, to test it, we have to find how to verify if this is the root cause, so still need to find the offset in eeprom.
>>
>> Thanks in advance,
>> Joe
>>


-- 
Oracle <http://www.oracle.com>
Joe Jin | Software Development Senior Manager | +8610.6106.5624
ORACLE | Linux and Virtualization
No. 24 Zhongguancun Software Park, Haidian District | 100193 Beijing 

^ permalink raw reply

* Re: [GIT PULL net-next 04/17] ndisc: Introduce ndisc_fill_redirect_hdr_option().
From: YOSHIFUJI Hideaki @ 2012-12-19  3:08 UTC (permalink / raw)
  To: 'netdev@vger.kernel.org', David Miller; +Cc: YOSHIFUJI Hideaki
In-Reply-To: <50D04B4B.7060002@linux-ipv6.org>

YOSHIFUJI Hideaki wrote:
> Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
> ---
>  net/ipv6/ndisc.c |   21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index a181113..0a4f3a9 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -1332,6 +1332,19 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
>  	icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
>  }
>  
> +static u8 *ndisc_fill_redirect_hdr_option(u8 *opt, struct sk_buff *orig_skb,
> +					  int rd_len)
> +{
> +	memset(opt, 0, 8);
> +	*(opt++) = ND_OPT_REDIRECT_HDR;
> +	*(opt++) = (rd_len >> 3);
> +	opt += 6;
> +
> +	memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
> +
> +	return opt;
> +}
> +
>  void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
>  {
>  	struct net_device *dev = skb->dev;

By the way, is it really safe to use memcpy here?
Should we use skb_copy_bits() instead?

--yoshfuji

^ permalink raw reply

* Re: [GIT PULL net-next 01/17] ndisc: Fix size calculation for headers.
From: YOSHIFUJI Hideaki @ 2012-12-19  3:08 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, YOSHIFUJI Hideaki
In-Reply-To: <20121218.162338.1594192050394527214.davem@davemloft.net>

David Miller wrote:
> From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
> Date: Tue, 18 Dec 2012 19:52:04 +0900
> 
>> We used to allocate MAX_HEADER bytes more than needed but
>> reserved hlen only (not MAX_HEADER + hlen) and the MAX_HEADER
>> was left behind.
>>
>> Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
> 
> This change is wrong.
> 
> The MAX_HEADER is being used in order to accomodate any
> possible encapsulation that may occur.

As I tried to explain in the commit message, ndisc_build_skb() does tI would say his:

|        int hlen = LL_RESERVED_SPACE(dev);
:
|        skb = sock_alloc_send_skb(sk,
|                                  (MAX_HEADER + sizeof(struct ipv6hdr) +
|                                   len + hlen + tlen),
|                                  1, &err);
|        if (!skb) {
|                ND_PRINTK(0, err, "ND: %s failed to allocate an skb, err=%d\n",
|                          __func__, err);
|                return NULL;
|        }
|
|        skb_reserve(skb, hlen);

This means, MAX_HEADER has been placed at the TAIL of
the buffer, instead of the HEAD of the buffer, like this.

head        data                   tail                        end
+--------------------------------------------------------------+
+           |                      |          |                |
+--------------------------------------------------------------+
|<- hlen  ->|<---ipv6 packet------>|<--tlen-->|<--MAX_HEADER-->|
|  = LL_
     RESERVED_
     SPACE(dev)

MAX_HEADER will not be used at all and I would say it is waste of
memory.

Or do you expect something like this?

+--------------------------------------------------------------+
+                |           |                      |          |
+--------------------------------------------------------------+
|<--MAX_HEADER-->|<---hlen-->|<---ipv6 packet------>|<--tlen-->|
head                       data                   tail       end

or like this:

+--------------------------------------------------+
+                |                      |          |
+--------------------------------------------------+
|<--MAX_HEADER-->|<---ipv6 packet------>|<--tlen-->|
head           data                   tail       end

If so, we should (re)visit almost all users of
sock_alloc_send_skb() and friends, anyway, I think.

--yoshfuji

^ permalink raw reply

* Re: [RFC PATCH v3 2/2] tun: fix LSM/SELinux labeling of tun/tap devices
From: Jason Wang @ 2012-12-19  5:46 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Paul Moore, netdev, linux-security-module, selinux
In-Reply-To: <20121218230808.GC1135@redhat.com>

On 12/19/2012 07:08 AM, Michael S. Tsirkin wrote:
> On Tue, Dec 18, 2012 at 05:53:52PM -0500, Paul Moore wrote:
>> This patch corrects some problems with LSM/SELinux that were introduced
>> with the multiqueue patchset.  The problem stems from the fact that the
>> multiqueue work changed the relationship between the tun device and its
>> associated socket; before the socket persisted for the life of the
>> device, however after the multiqueue changes the socket only persisted
>> for the life of the userspace connection (fd open).  For non-persistent
>> devices this is not an issue, but for persistent devices this can cause
>> the tun device to lose its SELinux label.
>>
>> We correct this problem by adding an opaque LSM security blob to the
>> tun device struct which allows us to have the LSM security state, e.g.
>> SELinux labeling information, persist for the lifetime of the tun
>> device.  In the process we tweak the LSM hooks to work with this new
>> approach to TUN device/socket labeling and introduce a new LSM hook,
>> security_tun_dev_attach_queue(), to approve requests to attach to a
>> TUN queue via TUNSETQUEUE.
>>
>> The SELinux code has been adjusted to match the new LSM hooks, the
>> other LSMs do not make use of the LSM TUN controls.  This patch makes
>> use of the recently added "tun_socket:attach_queue" permission to
>> restrict access to the TUNSETQUEUE operation.  On older SELinux
>> policies which do not define the "tun_socket:attach_queue" permission
>> the access control decision for TUNSETQUEUE will be handled according
>> to the SELinux policy's unknown permission setting.
>>
>> Signed-off-by: Paul Moore <pmoore@redhat.com>
>
> Looks good to me. A comment not directly related to this patch, below.

Good to me too, will do some test on this.
>
>> ---
>>  drivers/net/tun.c                 |   27 +++++++++++++----
>>  include/linux/security.h          |   59 +++++++++++++++++++++++++++++--------
>>  security/capability.c             |   24 +++++++++++++--
>>  security/security.c               |   28 ++++++++++++++----
>>  security/selinux/hooks.c          |   50 ++++++++++++++++++++++++-------
>>  security/selinux/include/objsec.h |    4 +++
>>  6 files changed, 154 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 504f7f1..4b7754c 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -186,6 +186,7 @@ struct tun_struct {
>>  	unsigned long ageing_time;
>>  	unsigned int numdisabled;
>>  	struct list_head disabled;
>> +	void *security;
>>  };
>>  
[...]
>>  
>> @@ -1805,12 +1813,18 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
>>  
>>  	if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
>>  		tun = tfile->detached;
>> -		if (!tun)
>> +		if (!tun) {
>>  			ret = -EINVAL;
>> -		else if (tun_not_capable(tun))
>> +			goto unlock;
>> +		}
>> +		if (tun_not_capable(tun)) {
> By the way I don't think we need to limit set_queue
> by tun_not_capable. It simply makes a queue active/inactive
> which seems harmless. Jason?

Maybe we should keep this, when a tun has a owner it would not expected
any other user except the one has CAP_NET_ADMIN to active or de-active a
queue.
>
>>  			ret = -EPERM;
>> -		else
[...]

^ permalink raw reply

* Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
From: Yijing Wang @ 2012-12-19  5:52 UTC (permalink / raw)
  To: Joe Jin
  Cc: Fujinaka, Todd, Ethan Zhao, Ben Hutchings, Mary Mcgrath,
	netdev@vger.kernel.org, e1000-devel@lists.sf.net,
	linux-kernel@vger.kernel.org, linux-pci, Jon Mason, Bjorn Helgaas
In-Reply-To: <50D12EA2.3030907@oracle.com>

On 2012/12/19 11:04, Joe Jin wrote:
> Hi all,
> 
> I backported mps commits and ask customer pass "pci=pcie_bus_peer2pee" to kernel
> to limited MPS to 128 and issue disappeared, sound like this is a BIOS bug.
> 

Hi Joe,
   I found similar problem when I do pci hotplug, discussion is here:http://marc.info/?l=linux-pci&m=134810569924220&w=2.
We try to improve Linux kernel to debug this problem easily based Bjorn's suggestion. Jon sent out the first version patch http://marc.info/?l=linux-pci&m=135002016005274&w=2.
I think we can do further here, http://marc.info/?l=linux-pci&m=135115581307869&w=2. I hope this information can help you.

Thanks!
Yijing.

> Thanks all of your help.
> 
> Best Regards,
> Joe
> 
> On 11/29/12 23:52, Fujinaka, Todd wrote:
>> Someone else pointed this out to me locally. If you have a non-client BIOS, you should be able to set the MaxPayloadSize using setpci. You have to make sure that you're being consistent throughout all the associated links.
>>
>> Todd Fujinaka
>> Technical Marketing Engineer
>> LAN Access Division (LAD)
>> Intel Corporation
>> todd.fujinaka@intel.com
>> (503) 712-4565
>>
>>
>> -----Original Message-----
>> From: Ethan Zhao [mailto:ethan.kernel@gmail.com] 
>> Sent: Wednesday, November 28, 2012 7:10 PM
>> To: Fujinaka, Todd
>> Cc: Joe Jin; Ben Hutchings; Mary Mcgrath; netdev@vger.kernel.org; e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
>> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
>>
>> Joe,
>>     Possibly your customer is running a kernel without source code on a platform whose vendor wouldn't like to fix BIOS issue( Is that a HP/Dell server ?).
>>     Anyway, to see if is a payload issue or,  you could change the payload size with setpci tool to those devices and set the link retrain bit to trigger the link retraining to debug the issue and identity the root cause.  I thinks it is much easier than modify the BIOS or  eeprom of NIC.
>>
>>     e.g.
>>    set device control register to 0f 00   (128 bytes payload size)
>>    #   setpci -v -s 00:02.0 98.w=000f
>>    set device link control register to 60h (retrain the link)
>>    #  setpci -v -s 00:02.0 a0.b=60
>>
>>   Hope it works,  Just my 2 cents.
>>
>> Ethan.zhao@oracle.com
>>
>> On Wed, Nov 28, 2012 at 11:53 PM, Fujinaka, Todd <todd.fujinaka@intel.com> wrote:
>>> The only EEPROM I know about or can speak to is the one attached to the 82571 and it doesn't set the MaxPayloadSize. That's done by the BIOS.
>>>
>>> Todd Fujinaka
>>> Technical Marketing Engineer
>>> LAN Access Division (LAD)
>>> Intel Corporation
>>> todd.fujinaka@intel.com
>>> (503) 712-4565
>>>
>>>
>>> -----Original Message-----
>>> From: Joe Jin [mailto:joe.jin@oracle.com]
>>> Sent: Wednesday, November 28, 2012 12:31 AM
>>> To: Ben Hutchings
>>> Cc: Fujinaka, Todd; Mary Mcgrath; netdev@vger.kernel.org; 
>>> e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
>>> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
>>>
>>> On 11/28/12 02:10, Ben Hutchings wrote:
>>>> On Tue, 2012-11-27 at 17:32 +0000, Fujinaka, Todd wrote:
>>>>> Forgive me if I'm being too repetitious as I think some of this has 
>>>>> been mentioned in the past.
>>>>>
>>>>> We (and by we I mean the Ethernet part and driver) can only change 
>>>>> the advertised availability of a larger MaxPayloadSize. The size is 
>>>>> negotiated by both sides of the link when the link is established.
>>>>> The driver should not change the size of the link as it would be 
>>>>> poking at registers outside of its scope and is controlled by the 
>>>>> upstream bridge (not us).
>>>> [...]
>>>>
>>>> MaxPayloadSize (MPS) is not negotiated between devices but is 
>>>> programmed by the system firmware (at least for devices present at 
>>>> boot - the kernel may be responsible in case of hotplug).  You can 
>>>> use the kernel parameter 'pci=pcie_bus_perf' (or one of several 
>>>> others) to set a policy that overrides this, but no policy will allow 
>>>> setting MPS above the device's MaxPayloadSizeSupported (MPSS).
>>>>
>>>
>>> Ben,
>>>
>>> Unfortunately I'm using 3.0.x kernel and this is not included in the kernel.
>>> So I'm trying to use ethtool modify it from eeprom to see if help or no.
>>>
>>>
>>> Todd, I'll review all MaxPayload for all devices, but need to say if it mismatch, customer could not modify it from BIOS for there was not entry at there, to test it, we have to find how to verify if this is the root cause, so still need to find the offset in eeprom.
>>>
>>> Thanks in advance,
>>> Joe
>>>
> 
> 


-- 
Thanks!
Yijing

^ permalink raw reply

* [PATCH net-next v2] xfrm: removes a superfluous check and add a statistic
From: roy.qing.li @ 2012-12-19  5:26 UTC (permalink / raw)
  To: netdev

From: Li RongQing <roy.qing.li@gmail.com>

Remove the check if x->km.state equal to XFRM_STATE_VALID in
xfrm_state_check_expire(), which will be done before call
xfrm_state_check_expire().

add a LINUX_MIB_XFRMOUTSTATEINVALID statistic to record the
outbound error due to invalid xfrm state.

v2: move newly adding statistic LINUX_MIB_XFRMOUTSTATEINVALID
to the end of the enumeration

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 include/uapi/linux/snmp.h |    1 +
 net/xfrm/xfrm_output.c    |    6 ++++++
 net/xfrm/xfrm_proc.c      |    1 +
 net/xfrm/xfrm_state.c     |    3 ---
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index fdfba23..b49eab8 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -278,6 +278,7 @@ enum
 	LINUX_MIB_XFRMOUTPOLDEAD,		/* XfrmOutPolDead */
 	LINUX_MIB_XFRMOUTPOLERROR,		/* XfrmOutPolError */
 	LINUX_MIB_XFRMFWDHDRERROR,		/* XfrmFwdHdrError*/
+	LINUX_MIB_XFRMOUTSTATEINVALID,		/* XfrmOutStateInvalid */
 	__LINUX_MIB_XFRMMAX
 };
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 95a338c..3670526 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -61,6 +61,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		}
 
 		spin_lock_bh(&x->lock);
+
+		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID);
+			goto error_nolock;
+		}
+
 		err = xfrm_state_check_expire(x);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index d0a1af8..e4cd441 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -39,6 +39,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR),
 	SNMP_MIB_ITEM("XfrmOutStateSeqError", LINUX_MIB_XFRMOUTSTATESEQERROR),
 	SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED),
+	SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID),
 	SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK),
 	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
 	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3459692..05db236 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1370,9 +1370,6 @@ int xfrm_state_check_expire(struct xfrm_state *x)
 	if (!x->curlft.use_time)
 		x->curlft.use_time = get_seconds();
 
-	if (x->km.state != XFRM_STATE_VALID)
-		return -EINVAL;
-
 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
 	    x->curlft.packets >= x->lft.hard_packet_limit) {
 		x->km.state = XFRM_STATE_EXPIRED;
-- 
1.7.10.4

^ permalink raw reply related

* Re: 82571EB: Detected Hardware Unit Hang
From: Joe Jin @ 2012-12-19  6:13 UTC (permalink / raw)
  To: Yijing Wang
  Cc: netdev@vger.kernel.org, Jon Mason, linux-kernel@vger.kernel.org,
	Bjorn Helgaas, e1000-devel@lists.sf.net, Mary Mcgrath, linux-pci,
	Ben Hutchings, Ethan Zhao
In-Reply-To: <50D15600.6060001@huawei.com>

Hi Yijing,

Thanks for your reference, the patch looks good for me, but I have no chance
to test it on customer's env.

Best Regards,
Joe

On 12/19/12 13:52, Yijing Wang wrote:
> On 2012/12/19 11:04, Joe Jin wrote:
>> Hi all,
>>
>> I backported mps commits and ask customer pass "pci=pcie_bus_peer2pee" to kernel
>> to limited MPS to 128 and issue disappeared, sound like this is a BIOS bug.
>>
> 
> Hi Joe,
>    I found similar problem when I do pci hotplug, discussion is here:http://marc.info/?l=linux-pci&m=134810569924220&w=2.
> We try to improve Linux kernel to debug this problem easily based Bjorn's suggestion. Jon sent out the first version patch http://marc.info/?l=linux-pci&m=135002016005274&w=2.
> I think we can do further here, http://marc.info/?l=linux-pci&m=135115581307869&w=2. I hope this information can help you.
> 
> Thanks!
> Yijing.
> 
>> Thanks all of your help.
>>
>> Best Regards,
>> Joe
>>
>> On 11/29/12 23:52, Fujinaka, Todd wrote:
>>> Someone else pointed this out to me locally. If you have a non-client BIOS, you should be able to set the MaxPayloadSize using setpci. You have to make sure that you're being consistent throughout all the associated links.
>>>
>>> Todd Fujinaka
>>> Technical Marketing Engineer
>>> LAN Access Division (LAD)
>>> Intel Corporation
>>> todd.fujinaka@intel.com
>>> (503) 712-4565
>>>
>>>
>>> -----Original Message-----
>>> From: Ethan Zhao [mailto:ethan.kernel@gmail.com] 
>>> Sent: Wednesday, November 28, 2012 7:10 PM
>>> To: Fujinaka, Todd
>>> Cc: Joe Jin; Ben Hutchings; Mary Mcgrath; netdev@vger.kernel.org; e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
>>> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
>>>
>>> Joe,
>>>     Possibly your customer is running a kernel without source code on a platform whose vendor wouldn't like to fix BIOS issue( Is that a HP/Dell server ?).
>>>     Anyway, to see if is a payload issue or,  you could change the payload size with setpci tool to those devices and set the link retrain bit to trigger the link retraining to debug the issue and identity the root cause.  I thinks it is much easier than modify the BIOS or  eeprom of NIC.
>>>
>>>     e.g.
>>>    set device control register to 0f 00   (128 bytes payload size)
>>>    #   setpci -v -s 00:02.0 98.w=000f
>>>    set device link control register to 60h (retrain the link)
>>>    #  setpci -v -s 00:02.0 a0.b=60
>>>
>>>   Hope it works,  Just my 2 cents.
>>>
>>> Ethan.zhao@oracle.com
>>>
>>> On Wed, Nov 28, 2012 at 11:53 PM, Fujinaka, Todd <todd.fujinaka@intel.com> wrote:
>>>> The only EEPROM I know about or can speak to is the one attached to the 82571 and it doesn't set the MaxPayloadSize. That's done by the BIOS.
>>>>
>>>> Todd Fujinaka
>>>> Technical Marketing Engineer
>>>> LAN Access Division (LAD)
>>>> Intel Corporation
>>>> todd.fujinaka@intel.com
>>>> (503) 712-4565
>>>>
>>>>
>>>> -----Original Message-----
>>>> From: Joe Jin [mailto:joe.jin@oracle.com]
>>>> Sent: Wednesday, November 28, 2012 12:31 AM
>>>> To: Ben Hutchings
>>>> Cc: Fujinaka, Todd; Mary Mcgrath; netdev@vger.kernel.org; 
>>>> e1000-devel@lists.sf.net; linux-kernel@vger.kernel.org; linux-pci
>>>> Subject: Re: [E1000-devel] 82571EB: Detected Hardware Unit Hang
>>>>
>>>> On 11/28/12 02:10, Ben Hutchings wrote:
>>>>> On Tue, 2012-11-27 at 17:32 +0000, Fujinaka, Todd wrote:
>>>>>> Forgive me if I'm being too repetitious as I think some of this has 
>>>>>> been mentioned in the past.
>>>>>>
>>>>>> We (and by we I mean the Ethernet part and driver) can only change 
>>>>>> the advertised availability of a larger MaxPayloadSize. The size is 
>>>>>> negotiated by both sides of the link when the link is established.
>>>>>> The driver should not change the size of the link as it would be 
>>>>>> poking at registers outside of its scope and is controlled by the 
>>>>>> upstream bridge (not us).
>>>>> [...]
>>>>>
>>>>> MaxPayloadSize (MPS) is not negotiated between devices but is 
>>>>> programmed by the system firmware (at least for devices present at 
>>>>> boot - the kernel may be responsible in case of hotplug).  You can 
>>>>> use the kernel parameter 'pci=pcie_bus_perf' (or one of several 
>>>>> others) to set a policy that overrides this, but no policy will allow 
>>>>> setting MPS above the device's MaxPayloadSizeSupported (MPSS).
>>>>>
>>>>
>>>> Ben,
>>>>
>>>> Unfortunately I'm using 3.0.x kernel and this is not included in the kernel.
>>>> So I'm trying to use ethtool modify it from eeprom to see if help or no.
>>>>
>>>>
>>>> Todd, I'll review all MaxPayload for all devices, but need to say if it mismatch, customer could not modify it from BIOS for there was not entry at there, to test it, we have to find how to verify if this is the root cause, so still need to find the offset in eeprom.
>>>>
>>>> Thanks in advance,
>>>> Joe
>>>>
>>
>>
> 
> 


-- 
Oracle <http://www.oracle.com>
Joe Jin | Software Development Senior Manager | +8610.6106.5624
ORACLE | Linux and Virtualization
No. 24 Zhongguancun Software Park, Haidian District | 100193 Beijing 

------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* [RFC PATCH] fix IP_ECN_set_ce
From: roy.qing.li @ 2012-12-19  6:21 UTC (permalink / raw)
  To: netdev

From: Li RongQing <roy.qing.li@gmail.com>

1. ECN uses the two least significant (right-most) bits of the DiffServ
field in the IPv4, so it should be in iph->tos, not in (iph->tos+1)

2. When setting CE, we should check if ECN Capable Transport supports,
both 10 and 01 mean ECN Capable Transport, so only check 10 is not enough
    00: Non ECN-Capable Transport — Non-ECT
    10: ECN Capable Transport — ECT(0)
    01: ECN Capable Transport — ECT(1)
    11: Congestion Encountered — CE

3. Remove the misunderstand comment

4. fix the checksum computation

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 include/net/inet_ecn.h |   22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index aab7375..545a683 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -73,27 +73,13 @@ static inline void INET_ECN_dontxmit(struct sock *sk)
 
 static inline int IP_ECN_set_ce(struct iphdr *iph)
 {
-	u32 check = (__force u32)iph->check;
-	u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
-
-	/*
-	 * After the last operation we have (in binary):
-	 * INET_ECN_NOT_ECT => 01
-	 * INET_ECN_ECT_1   => 10
-	 * INET_ECN_ECT_0   => 11
-	 * INET_ECN_CE      => 00
-	 */
-	if (!(ecn & 2))
+	u32 ecn = iph->tos & INET_ECN_MASK;
+
+	if (INET_ECN_is_ce(ecn) || INET_ECN_is_not_ect(ecn))
 		return !ecn;
 
-	/*
-	 * The following gives us:
-	 * INET_ECN_ECT_1 => check += htons(0xFFFD)
-	 * INET_ECN_ECT_0 => check += htons(0xFFFE)
-	 */
-	check += (__force u16)htons(0xFFFB) + (__force u16)htons(ecn);
+	csum_replace2(&iph->check, iph->tos, iph->tos|INET_ECN_CE);
 
-	iph->check = (__force __sum16)(check + (check>=0xFFFF));
 	iph->tos |= INET_ECN_CE;
 	return 1;
 }
-- 
1.7.10.4

^ permalink raw reply related

* Re: TCP delayed ACK heuristic
From: Cong Wang @ 2012-12-19  6:54 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, Ben Greear, David Miller, Stephen Hemminger, Rick Jones,
	Thomas Graf
In-Reply-To: <1355848224.9380.30.camel@edumazet-glaptop>

On Tue, 2012-12-18 at 08:30 -0800, Eric Dumazet wrote:
>  
> 
> ACKS might also be delayed because of bidirectional traffic, and is more
> controlled by the application response time. TCP stack can not easily
> estimate it.

So we still need a knob?

> 
> If you focus on bulk receive, LRO/GRO should already lower number of
> ACKS to an acceptable level and without major disruption.

Indeed.

> 
> Stretch acks are not only the receiver concern, there are issues for the
> sender that you cannot always control/change.
> 
> I recommend reading RFC2525 2.13
> 

Very helpful information!

On the sender's side, it needs to "notify" the receiver not to send
stretch acks when it is in slow-start. But I think the receiver can
detect slow-start too on its own side (based one the window size?).

Thanks!

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox