Netdev List
 help / color / mirror / Atom feed
* Re: |PATCH net-next] net: treewide use of RCU_INIT_POINTER
From: Paul E. McKenney @ 2011-11-29  1:44 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1322068172.17693.61.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Wed, Nov 23, 2011 at 06:09:32PM +0100, Eric Dumazet wrote:
> rcu_assign_pointer(ptr, NULL) can be safely replaced by
> RCU_INIT_POINTER(ptr, NULL)
> 
> (old rcu_assign_pointer() macro was testing the NULL value and could
> omit the smp_wmb(), but this had to be removed because of compiler
> warnings)
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

This was probably the one lost in the USA Thanksgiving turkey, but...

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> ---
>  drivers/net/ethernet/broadcom/bnx2.c               |    2 -
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c   |    2 -
>  drivers/net/ethernet/broadcom/cnic.c               |    6 ++---
>  drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c |    4 +--
>  drivers/net/macvtap.c                              |    8 +++----
>  drivers/net/ppp/pptp.c                             |    2 -
>  drivers/net/team/team_mode_activebackup.c          |    2 -
>  drivers/net/wireless/ath/carl9170/main.c           |   12 +++++------
>  net/core/netprio_cgroup.c                          |    4 +--
>  9 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
> index 83d8cef..d573169 100644
> --- a/drivers/net/ethernet/broadcom/bnx2.c
> +++ b/drivers/net/ethernet/broadcom/bnx2.c
> @@ -409,7 +409,7 @@ static int bnx2_unregister_cnic(struct net_device *dev)
>  	mutex_lock(&bp->cnic_lock);
>  	cp->drv_state = 0;
>  	bnapi->cnic_present = 0;
> -	rcu_assign_pointer(bp->cnic_ops, NULL);
> +	RCU_INIT_POINTER(bp->cnic_ops, NULL);
>  	mutex_unlock(&bp->cnic_lock);
>  	synchronize_rcu();
>  	return 0;
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> index 83481e2..0cdbb70 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
> @@ -11587,7 +11587,7 @@ static int bnx2x_unregister_cnic(struct net_device *dev)
> 
>  	mutex_lock(&bp->cnic_mutex);
>  	cp->drv_state = 0;
> -	rcu_assign_pointer(bp->cnic_ops, NULL);
> +	RCU_INIT_POINTER(bp->cnic_ops, NULL);
>  	mutex_unlock(&bp->cnic_mutex);
>  	synchronize_rcu();
>  	kfree(bp->cnic_kwq);
> diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
> index 099f41d..b336e55 100644
> --- a/drivers/net/ethernet/broadcom/cnic.c
> +++ b/drivers/net/ethernet/broadcom/cnic.c
> @@ -506,7 +506,7 @@ int cnic_unregister_driver(int ulp_type)
>  	}
>  	read_unlock(&cnic_dev_lock);
> 
> -	rcu_assign_pointer(cnic_ulp_tbl[ulp_type], NULL);
> +	RCU_INIT_POINTER(cnic_ulp_tbl[ulp_type], NULL);
> 
>  	mutex_unlock(&cnic_lock);
>  	synchronize_rcu();
> @@ -579,7 +579,7 @@ static int cnic_unregister_device(struct cnic_dev *dev, int ulp_type)
>  	}
>  	mutex_lock(&cnic_lock);
>  	if (rcu_dereference(cp->ulp_ops[ulp_type])) {
> -		rcu_assign_pointer(cp->ulp_ops[ulp_type], NULL);
> +		RCU_INIT_POINTER(cp->ulp_ops[ulp_type], NULL);
>  		cnic_put(dev);
>  	} else {
>  		pr_err("%s: device not registered to this ulp type %d\n",
> @@ -5134,7 +5134,7 @@ static void cnic_stop_hw(struct cnic_dev *dev)
>  		}
>  		cnic_shutdown_rings(dev);
>  		clear_bit(CNIC_F_CNIC_UP, &dev->flags);
> -		rcu_assign_pointer(cp->ulp_ops[CNIC_ULP_L4], NULL);
> +		RCU_INIT_POINTER(cp->ulp_ops[CNIC_ULP_L4], NULL);
>  		synchronize_rcu();
>  		cnic_cm_shutdown(dev);
>  		cp->stop_hw(dev);
> diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
> index 90ff131..7f7882d 100644
> --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
> +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
> @@ -1301,7 +1301,7 @@ int cxgb3_offload_activate(struct adapter *adapter)
> 
>  out_free_l2t:
>  	t3_free_l2t(L2DATA(dev));
> -	rcu_assign_pointer(dev->l2opt, NULL);
> +	RCU_INIT_POINTER(dev->l2opt, NULL);
>  out_free:
>  	kfree(t);
>  	return err;
> @@ -1329,7 +1329,7 @@ void cxgb3_offload_deactivate(struct adapter *adapter)
>  	rcu_read_lock();
>  	d = L2DATA(tdev);
>  	rcu_read_unlock();
> -	rcu_assign_pointer(tdev->l2opt, NULL);
> +	RCU_INIT_POINTER(tdev->l2opt, NULL);
>  	call_rcu(&d->rcu_head, clean_l2_data);
>  	if (t->nofail_skb)
>  		kfree_skb(t->nofail_skb);
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index 1b7082d..7c88d13 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -145,8 +145,8 @@ static void macvtap_put_queue(struct macvtap_queue *q)
>  	if (vlan) {
>  		int index = get_slot(vlan, q);
> 
> -		rcu_assign_pointer(vlan->taps[index], NULL);
> -		rcu_assign_pointer(q->vlan, NULL);
> +		RCU_INIT_POINTER(vlan->taps[index], NULL);
> +		RCU_INIT_POINTER(q->vlan, NULL);
>  		sock_put(&q->sk);
>  		--vlan->numvtaps;
>  	}
> @@ -223,8 +223,8 @@ static void macvtap_del_queues(struct net_device *dev)
>  					      lockdep_is_held(&macvtap_lock));
>  		if (q) {
>  			qlist[j++] = q;
> -			rcu_assign_pointer(vlan->taps[i], NULL);
> -			rcu_assign_pointer(q->vlan, NULL);
> +			RCU_INIT_POINTER(vlan->taps[i], NULL);
> +			RCU_INIT_POINTER(q->vlan, NULL);
>  			vlan->numvtaps--;
>  		}
>  	}
> diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
> index 89f829f..ede899c 100644
> --- a/drivers/net/ppp/pptp.c
> +++ b/drivers/net/ppp/pptp.c
> @@ -162,7 +162,7 @@ static void del_chan(struct pppox_sock *sock)
>  {
>  	spin_lock(&chan_lock);
>  	clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap);
> -	rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], NULL);
> +	RCU_INIT_POINTER(callid_sock[sock->proto.pptp.src_addr.call_id], NULL);
>  	spin_unlock(&chan_lock);
>  	synchronize_rcu();
>  }
> diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
> index b344275..f4d960e 100644
> --- a/drivers/net/team/team_mode_activebackup.c
> +++ b/drivers/net/team/team_mode_activebackup.c
> @@ -56,7 +56,7 @@ drop:
>  static void ab_port_leave(struct team *team, struct team_port *port)
>  {
>  	if (ab_priv(team)->active_port == port)
> -		rcu_assign_pointer(ab_priv(team)->active_port, NULL);
> +		RCU_INIT_POINTER(ab_priv(team)->active_port, NULL);
>  }
> 
>  static int ab_active_port_get(struct team *team, void *arg)
> diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
> index f06e069..5518592 100644
> --- a/drivers/net/wireless/ath/carl9170/main.c
> +++ b/drivers/net/wireless/ath/carl9170/main.c
> @@ -446,7 +446,7 @@ static void carl9170_op_stop(struct ieee80211_hw *hw)
> 
>  	mutex_lock(&ar->mutex);
>  	if (IS_ACCEPTING_CMD(ar)) {
> -		rcu_assign_pointer(ar->beacon_iter, NULL);
> +		RCU_INIT_POINTER(ar->beacon_iter, NULL);
> 
>  		carl9170_led_set_state(ar, 0);
> 
> @@ -678,7 +678,7 @@ unlock:
>  		vif_priv->active = false;
>  		bitmap_release_region(&ar->vif_bitmap, vif_id, 0);
>  		ar->vifs--;
> -		rcu_assign_pointer(ar->vif_priv[vif_id].vif, NULL);
> +		RCU_INIT_POINTER(ar->vif_priv[vif_id].vif, NULL);
>  		list_del_rcu(&vif_priv->list);
>  		mutex_unlock(&ar->mutex);
>  		synchronize_rcu();
> @@ -716,7 +716,7 @@ static void carl9170_op_remove_interface(struct ieee80211_hw *hw,
>  	WARN_ON(vif_priv->enable_beacon);
>  	vif_priv->enable_beacon = false;
>  	list_del_rcu(&vif_priv->list);
> -	rcu_assign_pointer(ar->vif_priv[id].vif, NULL);
> +	RCU_INIT_POINTER(ar->vif_priv[id].vif, NULL);
> 
>  	if (vif == main_vif) {
>  		rcu_read_unlock();
> @@ -1258,7 +1258,7 @@ static int carl9170_op_sta_add(struct ieee80211_hw *hw,
>  		}
> 
>  		for (i = 0; i < CARL9170_NUM_TID; i++)
> -			rcu_assign_pointer(sta_info->agg[i], NULL);
> +			RCU_INIT_POINTER(sta_info->agg[i], NULL);
> 
>  		sta_info->ampdu_max_len = 1 << (3 + sta->ht_cap.ampdu_factor);
>  		sta_info->ht_sta = true;
> @@ -1285,7 +1285,7 @@ static int carl9170_op_sta_remove(struct ieee80211_hw *hw,
>  			struct carl9170_sta_tid *tid_info;
> 
>  			tid_info = rcu_dereference(sta_info->agg[i]);
> -			rcu_assign_pointer(sta_info->agg[i], NULL);
> +			RCU_INIT_POINTER(sta_info->agg[i], NULL);
> 
>  			if (!tid_info)
>  				continue;
> @@ -1398,7 +1398,7 @@ static int carl9170_op_ampdu_action(struct ieee80211_hw *hw,
>  			spin_unlock_bh(&ar->tx_ampdu_list_lock);
>  		}
> 
> -		rcu_assign_pointer(sta_info->agg[tid], NULL);
> +		RCU_INIT_POINTER(sta_info->agg[tid], NULL);
>  		rcu_read_unlock();
> 
>  		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
> diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
> index 72ad0bc..3a9fd48 100644
> --- a/net/core/netprio_cgroup.c
> +++ b/net/core/netprio_cgroup.c
> @@ -285,7 +285,7 @@ static int netprio_device_event(struct notifier_block *unused,
>  		break;
>  	case NETDEV_UNREGISTER:
>  		old = rtnl_dereference(dev->priomap);
> -		rcu_assign_pointer(dev->priomap, NULL);
> +		RCU_INIT_POINTER(dev->priomap, NULL);
>  		if (old)
>  			kfree_rcu(old, rcu);
>  		break;
> @@ -332,7 +332,7 @@ static void __exit exit_cgroup_netprio(void)
>  	rtnl_lock();
>  	for_each_netdev(&init_net, dev) {
>  		old = rtnl_dereference(dev->priomap);
> -		rcu_assign_pointer(dev->priomap, NULL);
> +		RCU_INIT_POINTER(dev->priomap, NULL);
>  		if (old)
>  			kfree_rcu(old, rcu);
>  	}
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [PATCH 1/1] Adding examples in ip man page on basic use of the ip command.
From: Stephen Hemminger @ 2011-11-29  1:39 UTC (permalink / raw)
  To: Alex Juncu; +Cc: netdev
In-Reply-To: <4ECE31F6.4070002@ixiacom.com>

On Thu, 24 Nov 2011 14:00:54 +0200
Alex Juncu <ajuncu@ixiacom.com> wrote:

> ---
>   man/man8/ip.8 |   43 +++++++++++++++++++++++++++++++++++++++++++
>   1 files changed, 43 insertions(+), 0 deletions(-)
> 
> diff --git a/man/man8/ip.8 b/man/man8/ip.8
> index a20eca7..99c8953 100644
> --- a/man/man8/ip.8
> +++ b/man/man8/ip.8
> @@ -2855,6 +2855,49 @@ can be
>   .SS ip xfrm monitor - state monitoring for xfrm objects
>   The xfrm objects to monitor can be optionally specified.
> 
> +.SH EXAMPLES
> +.LP
> +To turn on an interface:
> +.RS
> +.nf
> +.B ip link set up dev eth0
> +.fi
> +.RE
> +.LP
> +To add an IPv4 address to an interface:
> +.RS
> +.nf
> +.B ip address add 141.85.37.42/24 dev eth0
> +.fi
> +.RE
> +.LP
> +To add a default route in the IPv4 routing table using a next hop IP 
> address:
> +.RS
> +.nf
> +.B ip route add  default via 141.85.37.1
> +.fi
> +.RE
> +.LP
> +Equivalent ways of listing address information for all the interfaces:
> +.RS
> +.nf
> +.B ip addr show
> +.fi
> +.RE
> +.LP
> +.RS
> +.nf
> +.B ip a s
> +.fi
> +.RE
> +.LP
> +.RS
> +.nf
> +.B ip a
> +.fi
> +.RE
> +.LP
> +


Simple examples at bottom of page are good, but it is unnecessary
to show how to abbreviate. That just confuses the newbie reading the examples.

Give one example of each of the basic normal tasks. See the git man pages
for good examples.

^ permalink raw reply

* Re: [PATCH net-next] tcp: avoid frag allocation for small frames
From: Vijay Subramanian @ 2011-11-29  1:04 UTC (permalink / raw)
  To: netdev
In-Reply-To: <1319286237.6180.49.camel@edumazet-laptop>

On 22 October 2011 05:23, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> tcp_sendmsg() uses select_size() helper to choose skb head size when a
> new skb must be allocated.
>
> If GSO is enabled for the socket, current strategy is to force all
> payload data to be outside of headroom, in PAGE fragments.
>
> This strategy is not welcome for small packets, wasting memory.
>
> Experiments show that best results are obtained when using 2048 bytes
> for skb head (This includes the skb overhead and various headers)
>
> This patch provides better len/truesize ratios for packets sent to
> loopback device, and reduce memory needs for in-flight loopback packets,
> particularly on arches with big pages.
>
> If a sender sends many 1-byte packets to an unresponsive application,
> receiver rmem_alloc will grow faster and will stop queuing these packets
> sooner, or will collapse its receive queue to free excess memory.
>
> netperf -t TCP_RR results are improved by ~4 %, and many workloads are
> improved as well (tbench, mysql...)
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  net/ipv4/tcp.c |    9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 704adad..cd45b44 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -897,9 +897,12 @@ static inline int select_size(const struct sock *sk, int sg)
>        int tmp = tp->mss_cache;
>
>        if (sg) {
> -               if (sk_can_gso(sk))
> -                       tmp = 0;
> -               else {
> +               if (sk_can_gso(sk)) {
> +                       /* Small frames wont use a full page:
> +                        * Payload will immediately follow tcp header.
> +                        */
> +                       tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
> +               } else {
>                        int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
>
>                        if (tmp >= pgbreak &&
>
>



This patch from Eric fixing select_size in tcp.c was queued to be
applied but does not seem to be in net-next tree. Was this somehow
overlooked or have I missed something?

Regards,
Vijay Subramanian

^ permalink raw reply

* Re: pull request: wireless-next 2011-11-28
From: David Miller @ 2011-11-29  0:55 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20111128200221.GD2681-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>

From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Date: Mon, 28 Nov 2011 15:02:22 -0500

> Another big pull request -- my apologies!  I have a couple of
> sub-maintainers that had let their patch queues back-up a bit too
> long...
> 
> This pull request contains a lot of refactoring in the iwlegacy and
> ath6kl drivers.  mac80211 and rndis_wlan get some love as well.  Beyond
> that, there is a handful of small updates and minor fixes to some
> drivers and around the wireless stack.
> 
> Let me know if there are problems!

Pulled, but please push back on the ath6kl guys, they seem to have this
disease where they add tons of debugfs files for controlling things like
QoS from userspace instead of creating and using generic configuration
machanisms via netlink or similar that any other driver could support too.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 0/2] tg3: Quick fixes
From: David Miller @ 2011-11-29  0:20 UTC (permalink / raw)
  To: mcarlson; +Cc: netdev
In-Reply-To: <1322509264-19636-1-git-send-email-mcarlson@broadcom.com>

From: "Matt Carlson" <mcarlson@broadcom.com>
Date: Mon, 28 Nov 2011 11:41:02 -0800

> This patchset fixes two recently discovered bugs.

Applied, but please work on the PCI quirk as suggested by Ben
Hutchings.

^ permalink raw reply

* Re: [PATCH net-next 4/4] sch_sfb: use skb_flow_dissect()
From: David Miller @ 2011-11-29  0:10 UTC (permalink / raw)
  To: eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, dan-BZ4SNL/Vixll57MIdRCFDg,
	netdev-u79uwXL29TY76Z2rM5mHXA, fw-HFFVJYpyMKqzQB+pC5nmwQ,
	jhs-jkUAjuhPggJWk0Htik3J/w,
	john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w,
	herbert-F6s6mLieUQo7FNHlEwC/lvQIK84fMopw,
	chrisw-H+wXaHxf7aLQT0dZR+AlfA, shemminger-ZtmgI6mnKB3QT0dZR+AlfA
In-Reply-To: <1322493902.2292.72.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Mon, 28 Nov 2011 16:25:02 +0100

> Current SFB double hashing is not fulfilling SFB theory, if two flows
> share same rxhash value.
> 
> Using skb_flow_dissect() permits to really have better hash dispersion,
> and get tunnelling support as well.
> 
> Double hashing point was mentioned by Florian Westphal
> 
> Signed-off-by: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 3/4] cls_flow: use skb_flow_dissect()
From: David Miller @ 2011-11-29  0:10 UTC (permalink / raw)
  To: eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w
  Cc: chrisw-H+wXaHxf7aLQT0dZR+AlfA, dev-yBygre7rU0TnMu66kgdUjQ,
	netdev-u79uwXL29TY76Z2rM5mHXA, fw-HFFVJYpyMKqzQB+pC5nmwQ,
	jhs-jkUAjuhPggJWk0Htik3J/w,
	john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w,
	herbert-F6s6mLieUQo7FNHlEwC/lvQIK84fMopw,
	shemminger-ZtmgI6mnKB3QT0dZR+AlfA, dan-BZ4SNL/Vixll57MIdRCFDg
In-Reply-To: <1322493858.2292.71.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Mon, 28 Nov 2011 16:24:18 +0100

> Instead of using a custom flow dissector, use skb_flow_dissect() and
> benefit from tunnelling support.
> 
> This lack of tunnelling support was mentioned by Dan Siemon.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 2/4] net: use skb_flow_dissect() in __skb_get_rxhash()
From: David Miller @ 2011-11-29  0:09 UTC (permalink / raw)
  To: eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, chrisw-H+wXaHxf7aLQT0dZR+AlfA,
	netdev-u79uwXL29TY76Z2rM5mHXA, fw-HFFVJYpyMKqzQB+pC5nmwQ,
	jhs-jkUAjuhPggJWk0Htik3J/w,
	john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w,
	herbert-F6s6mLieUQo7FNHlEwC/lvQIK84fMopw,
	shemminger-ZtmgI6mnKB3QT0dZR+AlfA, dan-BZ4SNL/Vixll57MIdRCFDg
In-Reply-To: <1322493803.2292.70.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Mon, 28 Nov 2011 16:23:23 +0100

> No functional changes.
> 
> This uses the code we factorized in skb_flow_dissect()
> 
> Signed-off-by: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 1/4] net: introduce skb_flow_dissect()
From: David Miller @ 2011-11-29  0:09 UTC (permalink / raw)
  To: eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, chrisw-H+wXaHxf7aLQT0dZR+AlfA,
	netdev-u79uwXL29TY76Z2rM5mHXA, fw-HFFVJYpyMKqzQB+pC5nmwQ,
	jhs-jkUAjuhPggJWk0Htik3J/w,
	john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w,
	herbert-F6s6mLieUQo7FNHlEwC/lvQIK84fMopw,
	shemminger-ZtmgI6mnKB3QT0dZR+AlfA, dan-BZ4SNL/Vixll57MIdRCFDg
In-Reply-To: <1322493738.2292.69.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Mon, 28 Nov 2011 16:22:18 +0100

> We use at least two flow dissectors in network stack, with known
> limitations and code duplication.
> 
> Introduce skb_flow_dissect() to factorize this, highly inspired from
> existing dissector from __skb_get_rxhash()
> 
> Note : We extensively use skb_header_pointer(), this permits us to not
> touch skb at all.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [PATCH v2 net-next 2/2] netem: add cell concept to simulate special MAC behavior
From: Hagen Paul Pfeifer @ 2011-11-29  0:07 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Stephen Hemminger
In-Reply-To: <1322524125.2970.48.camel@edumazet-laptop>

* Eric Dumazet | 2011-11-29 00:48:45 [+0100]:

>Is it was is really needed ?

argl ... I will repost a final version - thank you Eric!

^ permalink raw reply

* Re: [PATCH net-next 1/4] net: introduce skb_flow_dissect()
From: David Miller @ 2011-11-29  0:06 UTC (permalink / raw)
  To: dm-ut6Up61K2wZBDgjK7y7TUQ
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, chrisw-H+wXaHxf7aLQT0dZR+AlfA,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, fw-HFFVJYpyMKqzQB+pC5nmwQ,
	jhs-jkUAjuhPggJWk0Htik3J/w,
	john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w,
	herbert-F6s6mLieUQo7FNHlEwC/lvQIK84fMopw,
	shemminger-ZtmgI6mnKB3QT0dZR+AlfA, dan-BZ4SNL/Vixll57MIdRCFDg
In-Reply-To: <4ED3B603.4010702-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>

From: Dimitris Michailidis <dm-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
Date: Mon, 28 Nov 2011 08:25:39 -0800

>> +bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
>> *flow)
>> +{
>> +	int poff, nhoff = skb_network_offset(skb);
>> +	u8 ip_proto;
>> +	u16 proto = skb->protocol;
> 
> __be16 instead of u16 for proto?

I'll take care of this when I apply these patches.

^ permalink raw reply

* Re: [PATCH net-next] tcp: tcp_sendmsg() wrong access to sk_route_caps
From: David Miller @ 2011-11-28 23:58 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, mirq-linux
In-Reply-To: <1322476067.2292.12.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 28 Nov 2011 11:27:47 +0100

> Now sk_route_caps is u64, its dangerous to use an integer to store
> result of an AND operator. It wont work if NETIF_F_SG is moved on the
> upper part of u64.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Michał Mirosław <mirq-linux@rere.qmqm.pl>

Applied, thanks Eric.

^ permalink raw reply

* Re: [PATCH v6 01/10] Basic kernel memory functionality for the Memory Controller
From: KAMEZAWA Hiroyuki @ 2011-11-28 23:55 UTC (permalink / raw)
  To: Glauber Costa
  Cc: linux-kernel, lizf, ebiederm, davem, paul, gthelen, netdev,
	linux-mm, kirill, avagin, devel, eric.dumazet, cgroups
In-Reply-To: <4ED36A6D.60900@parallels.com>

On Mon, 28 Nov 2011 09:03:09 -0200
Glauber Costa <glommer@parallels.com> wrote:

> On 11/28/2011 12:24 AM, KAMEZAWA Hiroyuki wrote:
> > On Fri, 25 Nov 2011 15:38:07 -0200
> > Glauber Costa<glommer@parallels.com>  wrote:
> >
> >> This patch lays down the foundation for the kernel memory component
> >> of the Memory Controller.
> >>
> >> As of today, I am only laying down the following files:
> >>
> >>   * memory.independent_kmem_limit
> >>   * memory.kmem.limit_in_bytes (currently ignored)
> >>   * memory.kmem.usage_in_bytes (always zero)
> >>
> >> Signed-off-by: Glauber Costa<glommer@parallels.com>
> >> Reviewed-by: Kirill A. Shutemov<kirill@shutemov.name>
> >> CC: Paul Menage<paul@paulmenage.org>
> >> CC: Greg Thelen<gthelen@google.com>
> >> ---
> >>   Documentation/cgroups/memory.txt |   36 ++++++++++++-
> >>   init/Kconfig                     |   14 +++++
> >>   mm/memcontrol.c                  |  107 ++++++++++++++++++++++++++++++++++++--
> >>   3 files changed, 150 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
> >> index 06eb6d9..bf00cd2 100644
> >> --- a/Documentation/cgroups/memory.txt
> >> +++ b/Documentation/cgroups/memory.txt
> >> @@ -44,8 +44,9 @@ Features:
> >>    - oom-killer disable knob and oom-notifier
> >>    - Root cgroup has no limit controls.
> >>
> >> - Kernel memory and Hugepages are not under control yet. We just manage
> >> - pages on LRU. To add more controls, we have to take care of performance.
> >> + Hugepages is not under control yet. We just manage pages on LRU. To add more
> >> + controls, we have to take care of performance. Kernel memory support is work
> >> + in progress, and the current version provides basically functionality.
> >>
> >>   Brief summary of control files.
> >>
> >> @@ -56,8 +57,11 @@ Brief summary of control files.
> >>   				 (See 5.5 for details)
> >>    memory.memsw.usage_in_bytes	 # show current res_counter usage for memory+Swap
> >>   				 (See 5.5 for details)
> >> + memory.kmem.usage_in_bytes	 # show current res_counter usage for kmem only.
> >> +				 (See 2.7 for details)
> >>    memory.limit_in_bytes		 # set/show limit of memory usage
> >>    memory.memsw.limit_in_bytes	 # set/show limit of memory+Swap usage
> >> + memory.kmem.limit_in_bytes	 # if allowed, set/show limit of kernel memory
> >>    memory.failcnt			 # show the number of memory usage hits limits
> >>    memory.memsw.failcnt		 # show the number of memory+Swap hits limits
> >>    memory.max_usage_in_bytes	 # show max memory usage recorded
> >> @@ -72,6 +76,9 @@ Brief summary of control files.
> >>    memory.oom_control		 # set/show oom controls.
> >>    memory.numa_stat		 # show the number of memory usage per numa node
> >>
> >> + memory.independent_kmem_limit	 # select whether or not kernel memory limits are
> >> +				   independent of user limits
> >> +
> >>   1. History
> >>
> >>   The memory controller has a long history. A request for comments for the memory
> >> @@ -255,6 +262,31 @@ When oom event notifier is registered, event will be delivered.
> >>     per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
> >>     zone->lru_lock, it has no lock of its own.
> >>
> >> +2.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
> >> +
> >> + With the Kernel memory extension, the Memory Controller is able to limit
> >> +the amount of kernel memory used by the system. Kernel memory is fundamentally
> >> +different than user memory, since it can't be swapped out, which makes it
> >> +possible to DoS the system by consuming too much of this precious resource.
> >> +Kernel memory limits are not imposed for the root cgroup.
> >> +
> >> +Memory limits as specified by the standard Memory Controller may or may not
> >> +take kernel memory into consideration. This is achieved through the file
> >> +memory.independent_kmem_limit. A Value different than 0 will allow for kernel
> >> +memory to be controlled separately.
> >> +
> >> +When kernel memory limits are not independent, the limit values set in
> >> +memory.kmem files are ignored.
> >> +
> >> +Currently no soft limit is implemented for kernel memory. It is future work
> >> +to trigger slab reclaim when those limits are reached.
> >> +
> >> +CAUTION: As of this writing, the kmem extention may prevent tasks from moving
> >> +among cgroups. If a task has kmem accounting in a cgroup, the task cannot be
> >> +moved until the kmem resource is released. Also, until the resource is fully
> >> +released, the cgroup cannot be destroyed. So, please consider your use cases
> >> +and set kmem extention config option carefully.
> >> +
> >
> > This seems that memcg 'has' kernel memory limiting feature for all kinds of kmem..
> > Could you add a list of "currently controled kmems" section ?
> > And update the list in later patch ?
> >
> > Thanks,
> > -Kame
> >
> >
> Hi Kame,
> 
> Thanks for your review.
> Since none of your comments are blockers, I'd prefer to send follow up 
> patches if you don't mind - assuming Dave won't have any restrictions 
> himself that would prevent him from picking this series. If I have to 
> resend it anyway, I'll be more than happy to address them all in my next 
> submission
> 

As you like. But please clarify my comment which pointed out bugs in patch 02/10
and 06/10 aren't correct.

Thanks,
-Kame


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH 2/3] route: set iif and oif information in flowi struct
From: Julian Anastasov @ 2011-11-28 23:53 UTC (permalink / raw)
  To: Ulrich Weber; +Cc: netdev, davem
In-Reply-To: <1322511292-1413-3-git-send-email-ulrich.weber@sophos.com>


	Hello,

On Mon, 28 Nov 2011, Ulrich Weber wrote:

> Outgoing packets have loopback interface as incoming interface.
> 
> Signed-off-by: Ulrich Weber <ulrich.weber@sophos.com>
> ---
>  net/ipv4/route.c        |    4 ++++
>  net/ipv4/xfrm4_policy.c |   19 +++++++++++++++++--
>  net/ipv6/fib6_rules.c   |   10 ++++++++--
>  net/ipv6/xfrm6_policy.c |   18 ++++++++++++++++--
>  4 files changed, 45 insertions(+), 6 deletions(-)
> 
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index fb47c8f..1702ec0 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -2744,6 +2744,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
>  				flp4->saddr = rth->rt_src;
>  			if (!flp4->daddr)
>  				flp4->daddr = rth->rt_dst;
> +			if (!flp4->flowi4_iif)
> +				flp4->flowi4_iif = net->loopback_dev->ifindex;
> +			if (!flp4->flowi4_oif)
> +				flp4->flowi4_oif = rth->rt_iif;

	May be setting flowi4_oif unconditionally here is more
correct because ip_route_output_slow fills flowi4_oif with
the selected oif, it can even change the provided original
oif in flowi4_oif. What about this?:

			flp4->flowi4_oif = rth->dst.dev->ifindex;

	OTOH, rt_iif has some complex semantic: original oif
or the selected oif. May be you prefer flowi4_oif to hold
the selected oif, right?

	I see one dangerous place that must be checked:
icmp_route_lookup. Before now __ip_route_output_key was
called after xfrm_decode_session_reverse with 0 in
flowi4_oif, i.e. no oif binding was used. But now when
decode_session sets flowi4_oif we will restrict the route
via this interface?

>  			return rth;
>  		}
>  		RT_CACHE_STAT_INC(out_hlist_search);

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* Re: [PATCH v2 net-next 2/2] netem: add cell concept to simulate special MAC behavior
From: Eric Dumazet @ 2011-11-28 23:48 UTC (permalink / raw)
  To: Hagen Paul Pfeifer; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20111128233004.GH3045@nuttenaction>

Le mardi 29 novembre 2011 à 00:30 +0100, Hagen Paul Pfeifer a écrit :
> * Eric Dumazet | 2011-11-29 00:01:07 [+0100]:
> 
> >> -static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
> >> +static psched_time_t packet_len_2_sched_time(unsigned int len,
> >> +					     struct netem_sched_data *q)
> >>  {
> >> -	return PSCHED_NS2TICKS((u64)len * NSEC_PER_SEC / rate);
> >> +	len += q->packet_overhead;
> >> +
> >> +	if (q->cell_size) {
> >> +		u32 carry = len % q->cell_size;
> >> +		len += carry;
> >
> >I dont understand this part  (len += carry;)
> 
> Say the original packet is 100 byte, cellsize is 40 byte: three full size link
> layer frames are required: 40 + 40 + 40 == 100 + 20. This is used for TDMA,
> ATM or slot schemes where the remainder cannot be used.
> 

I still dont understand.

Say you send 119 bytes

119 % 40 = 39

119 + 39 = 158

Is it was is really needed ?

> Later in code carry is reused if cell overhead is configured.
> 

In this example, cells will be :

158 / 40 = 3   + one (because carry is not 0)

len += 4 * cell_overhead

^ permalink raw reply

* Re: (repost) vhost tree for linux-next
From: Stephen Rothwell @ 2011-11-28 23:36 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-next, linux-pci, netdev, linux-scsi
In-Reply-To: <20111127124816.GA23270@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 2220 bytes --]

Hi Michael,

On Sun, 27 Nov 2011 14:48:17 +0200 "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> I normally merge things through Dave, Rusty, Jesse or Jens,
> not directly to Linus. However, sometimes there's a large change
> (for example, my recent RFC for pci_iomap changes)
> where estimating how it interacts with other platforms and planned
> changes in the next kernel would be beneficial.
> 
> For this purpose, it would be helpful if my tree were added to linux-next.
> The tree is:
> git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
> 
> Could you do this please?

I have added this tree from today with just you as the contact.

Thanks for adding your subsystem tree as a participant of linux-next.  As
you may know, this is not a judgment of your code.  The purpose of
linux-next is for integration testing and to lower the impact of
conflicts between subsystems in the next merge window. 

You will need to ensure that the patches/commits in your tree/series have
been:
     * submitted under GPL v2 (or later) and include the Contributor's
	Signed-off-by,
     * posted to the relevant mailing list,
     * reviewed by you (or another maintainer of your subsystem tree),
     * successfully unit tested, and 
     * destined for the current or next Linux merge window.

Basically, this should be just what you would send to Linus (or ask him
to fetch).  It is allowed to be rebased if you deem it necessary.

-- 
Cheers,
Stephen Rothwell 
sfr@canb.auug.org.au

Legal Stuff:
By participating in linux-next, your subsystem tree contributions are
public and will be included in the linux-next trees.  You may be sent
e-mail messages indicating errors or other issues when the
patches/commits from your subsystem tree are merged and tested in
linux-next.  These messages may also be cross-posted to the linux-next
mailing list, the linux-kernel mailing list, etc.  The linux-next tree
project and IBM (my employer) make no warranties regarding the linux-next
project, the testing procedures, the results, the e-mails, etc.  If you
don't agree to these ground rules, let me know and I'll remove your tree
from participation in linux-next.

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH v2 net-next 2/2] netem: add cell concept to simulate special MAC behavior
From: Hagen Paul Pfeifer @ 2011-11-28 23:30 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Stephen Hemminger
In-Reply-To: <1322521267.2970.43.camel@edumazet-laptop>

* Eric Dumazet | 2011-11-29 00:01:07 [+0100]:

>> -static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
>> +static psched_time_t packet_len_2_sched_time(unsigned int len,
>> +					     struct netem_sched_data *q)
>>  {
>> -	return PSCHED_NS2TICKS((u64)len * NSEC_PER_SEC / rate);
>> +	len += q->packet_overhead;
>> +
>> +	if (q->cell_size) {
>> +		u32 carry = len % q->cell_size;
>> +		len += carry;
>
>I dont understand this part  (len += carry;)

Say the original packet is 100 byte, cellsize is 40 byte: three full size link
layer frames are required: 40 + 40 + 40 == 100 + 20. This is used for TDMA,
ATM or slot schemes where the remainder cannot be used.

Later in code carry is reused if cell overhead is configured.

>Also you use a lot of divides... Probably OK for netem...

I know, but

1) the branch is not hot (not taken at all if rate is not called)
2) cell_size could not restricted to a power of two - so I saw no real
   optimization potential.


Hagen

^ permalink raw reply

* Re: [ovs-dev] [GIT PULL v2] Open vSwitch
From: Ben Pfaff @ 2011-11-28 23:14 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: Herbert Xu, dev, netdev, David Miller
In-Reply-To: <1322518873.6118.7.camel@mojatatu>

On Mon, Nov 28, 2011 at 05:21:13PM -0500, Jamal Hadi Salim wrote:
> On Mon, 2011-11-28 at 08:01 -0800, Ben Pfaff wrote:
> 
> > Regarding OpenFlow rate limiting, in addition to Martin's response, Open
> > vSwitch has implemented controller rate limiting since day one.  It is
> > documented in ovs-vswitchd.conf.db(5):
> 
> Ok, I think thats a good start. My experience says just rate limiting
> may not be sufficient - unless the rate limiting is adaptive in some
> form; or just use strict prio where you let the exception traffic
> rot if you have other work - maybe thats what Martin was talking 
> about.
> 
> The problem is more in the outbound towards the external controller.
> You dont have multiple queues (given a single TCP socket) and config,
> events, and exception packets are all shared in one queue.

I believe that Martin's point was that production controllers don't
usually get any packets to the controller at all, because they
configure the flow table to handle or drop all traffic.  Individual
flow table entries can direct traffic to the controller (subject
optionally to both Open vSwitch rate limiting of packets to the
controller and to any QoS policy for the controller connection), and
some controllers might use this feature to direct specific types of
traffic (e.g. LLDP) to the controller.

Open vSwitch doesn't limit a controller to a single OpenFlow TCP
connection.  A controller can set up multiple OpenFlow connections to
a single OVS bridge, use one of them for receiving packets, and use
the others for other purposes.  I don't know whether anyone does this,
because keeping the amount of traffic sent to the controller to a
minimum is effective in practice.

^ permalink raw reply

* Re: [PATCH] ipv6: Set mcast_hops to IPV6_DEFAULT_MCASTHOPS when -1 was given.
From: David Miller @ 2011-11-28 23:09 UTC (permalink / raw)
  To: lw; +Cc: netdev
In-Reply-To: <4ED3394E.7050602@cn.fujitsu.com>

From: Li Wei <lw@cn.fujitsu.com>
Date: Mon, 28 Nov 2011 15:33:34 +0800

> We need to set np->mcast_hops to it's default value at this moment
> otherwise when we use it and found it's value is -1, the logic to
> get default hop limit doesn't take multicast into account and will
> return wrong hop limit(IPV6_DEFAULT_HOPLIMIT) which is for unicast.
> 
> Signed-off-by: Li Wei <lw@cn.fujitsu.com>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH] net: Fix corruption in /proc/*/net/dev_mcast
From: David Miller @ 2011-11-28 23:08 UTC (permalink / raw)
  To: eric.dumazet
  Cc: anton, levinsasha928, mpm, cl, penberg, linux-mm, linux-kernel,
	netdev, mihai.maruseac
In-Reply-To: <1322474116.2292.5.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 28 Nov 2011 10:55:16 +0100

>> With slub debugging on I see red zone issues in /proc/*/net/dev_mcast:
>> 
>> =============================================================================
>> BUG kmalloc-8: Redzone overwritten
>> -----------------------------------------------------------------------------
 ...
>> dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates
>> sizeof(struct seq_net_private) of private data, whereas it expects
>> sizeof(struct dev_iter_state):
>> 
>> struct dev_iter_state {
>> 	struct seq_net_private p;
>> 	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
>> };
>> 
>> Create dev_seq_open_ops and use it so we don't have to expose
>> struct dev_iter_state.
>> 
>> Signed-off-by: Anton Blanchard <anton@samba.org>
 ...
> Problem added by commit f04565ddf52e4 (dev: use name hash for
> dev_seq_ops)
> 
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Mihai Maruseac <mihai.maruseac@gmail.com>

Applied, thanks everyone.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v2 net-next 2/2] netem: add cell concept to simulate special MAC behavior
From: Eric Dumazet @ 2011-11-28 23:01 UTC (permalink / raw)
  To: Hagen Paul Pfeifer; +Cc: netdev, Stephen Hemminger
In-Reply-To: <1322187773-27768-2-git-send-email-hagen@jauu.net>

Le vendredi 25 novembre 2011 à 03:22 +0100, Hagen Paul Pfeifer a écrit :
> This extension can be used to simulate special link layer
> characteristics. Simulate because packet data is not modified, only the
> calculation base is changed to delay a packet based on the original
> packet size and artificial cell information.
> 
> packet_overhead can be used to simulate a link layer header compression
> scheme (e.g. set packet_overhead to -20) or with a positive
> packet_overhead value an additional MAC header can be simulated. It is
> also possible to "replace" the 14 byte Ethernet header with something
> else.
> 
> cell_size and cell_overhead can be used to simulate link layer schemes,
> based on cells, like some TDMA schemes. Another application area are MAC
> schemes using a link layer fragmentation with a (small) header each.
> Cell size is the maximum amount of data bytes within one cell. Cell
> overhead is an additional variable to change the per-cell-overhead (e.g.
> 5 byte header per fragment).
> 
> Example (5 kbit/s, 20 byte per packet overhead, cellsize 100 byte, per
> cell overhead 5 byte):
> 
> 	tc qdisc add dev eth0 root netem rate 5kbit 20 100 5
> 
> Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
> ---
>  include/linux/pkt_sched.h |    3 +++
>  net/sched/sch_netem.c     |   30 +++++++++++++++++++++++++++---
>  2 files changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index 26c37ca..63845cf 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -498,6 +498,9 @@ struct tc_netem_corrupt {
>  
>  struct tc_netem_rate {
>  	__u32	rate;	/* byte/s */
> +	__s32   packet_overhead;
> +	__u32   cell_size;
> +	__s32   cell_overhead;
>  };
>  
>  enum {
> diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
> index 9b7af9f..11ca527 100644
> --- a/net/sched/sch_netem.c
> +++ b/net/sched/sch_netem.c
> @@ -80,6 +80,9 @@ struct netem_sched_data {
>  	u32 reorder;
>  	u32 corrupt;
>  	u32 rate;
> +	s32 packet_overhead;
> +	u32 cell_size;
> +	s32 cell_overhead;
>  
>  	struct crndstate {
>  		u32 last;
> @@ -299,9 +302,24 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
>  	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
>  }
>  
> -static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
> +static psched_time_t packet_len_2_sched_time(unsigned int len,
> +					     struct netem_sched_data *q)
>  {
> -	return PSCHED_NS2TICKS((u64)len * NSEC_PER_SEC / rate);
> +	len += q->packet_overhead;
> +
> +	if (q->cell_size) {
> +		u32 carry = len % q->cell_size;
> +		len += carry;

I dont understand this part  (len += carry;)

Also you use a lot of divides... Probably OK for netem...

> +
> +		if (q->cell_overhead) {
> +			u32 cells = len / q->cell_size;
> +			if (carry)
> +				cells += 1;
> +			len += cells * q->cell_overhead;
> +		}
> +	}
> +
> +	return PSCHED_NS2TICKS((u64)len * NSEC_PER_SEC / q->rate);
>  }

^ permalink raw reply

* Re: Issues with openflow protocol WAS(RE: [GIT PULL v2] Open vSwitch
From: Jamal Hadi Salim @ 2011-11-28 22:55 UTC (permalink / raw)
  To: Justin Pettit
  Cc: Fischer, Anna, dev-yBygre7rU0TnMu66kgdUjQ@public.gmane.org,
	David Miller, Herbert Xu,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20124540-D566-41B0-B86F-0BCA19B948AA-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>

On Mon, 2011-11-28 at 10:44 -0800, Justin Pettit wrote:

> I realize you chair an IETF standard with overlapping goals with
> OpenFlow (ForCES), so you may have strong opinions about its design.

Yes, I do have strong opinions not really related to ForCES more towards
Linux. If i was to put ForCES in Linux (it is purely user space driven)
it would work with zero or small changes. 
And my non-Linux opinions are driven because I have implemented some of 
the things you folks are doing and look at them as advise more than
anything else.

> However, that's not relevant to this discussion, since OpenFlow's design
> has nothing to do with the discussion being held here in regards to Open
> vSwitch.  OpenFlow is just a bullet point--although an important one--in
> a large set of features that Open vSwitch provides.  Its design is such
> that it should be fairly easy to include new control protocols; OpenFlow
> is just a library in Open vSwitch.  If you have issues with OpenFlow,
> those would be better directed to the ONF or one of the OpenFlow mailing
> lists.


I am not subscribed to any of those - and besides that the openflow hype
at the moment is so high on that wave nobody will listen ;->
The only reason i keep bringing up openflow is because your architecture
in the minimal evolved from there (the fact you deal with flows and
actions and switches). I could stop talking about it if it is
offensive ;->

cheers,
jamal

^ permalink raw reply

* Re: [PATCH net-next 2/2] tg3: Scale back code that modifies MRRS
From: Matt Carlson @ 2011-11-28 22:45 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Matthew Carlson, davem@davemloft.net, netdev@vger.kernel.org,
	Michael Chan
In-Reply-To: <1322510036.26733.12.camel@bwh-desktop>

On Mon, Nov 28, 2011 at 11:53:56AM -0800, Ben Hutchings wrote:
> On Mon, 2011-11-28 at 11:41 -0800, Matt Carlson wrote:
> > Tg3 normally gets a performance boost by increasing the PCI Maximum Read
> > Request Size (MRRS) to 4k.  Unfortunately, this is causing some problems
> > on particular hardware platforms.  This patch removes all code that
> > modifies the MRRS except for one case.
> > 
> > As part of a solution to fix an internal FIFO problem on the 5719, the
> > driver artificially capped the MRRS to 2k for the entire 5719, and later
> > 5720, ASIC revs.  This was overly aggressive and only really needed to
> > be done for the 5719 A0.  In the spirit of the rest of this patch, the
> > driver will only reprogram the MRRS for this device if the value exceeds
> > the 2k cap.
> [...]
> 
> It may be better to do this as a PCI quirk, so that the generic MPS/MRRS
> configuration code has this information.

If at all possible, I'd rather do that as a follow-on patch.  This patch
fixes a critical problem that is blocking a lot of testing.

^ permalink raw reply

* Re: Open vSwitch Design
From: Jamal Hadi Salim @ 2011-11-28 22:42 UTC (permalink / raw)
  To: Justin Pettit
  Cc: Stephen Hemminger, Jesse Gross, netdev, dev, David Miller,
	Chris Wright, Herbert Xu, Eric Dumazet, John Fastabend
In-Reply-To: <06FA64A0-9CCF-4621-9DE6-61A6B37B925F@nicira.com>

On Mon, 2011-11-28 at 10:34 -0800, Justin Pettit wrote:
> On Nov 25, 2011, at 5:11 PM, Jamal Hadi Salim wrote:

> 
> Are you talking about ASICs on NICs?  

I am indifferent - looking at it entirely from a control
perspective. i.e if i do "ip link blah down" on a port
i want that to work with zero changes to iproute2; the only
way you can achieve that is if you expose those ports as
netdevs.
This is what I said was a good thing the Intel folks were trying to
achieve (and what Lennert has done for the small Marvel switch chips).

> I was referring to integrating Open vSwitch into top-of-rack switches.  
> These typically have a 48x1G or 48x10G switching ASIC and a relatively 
> slow (~800MHz PPC-class) management CPU running an operating system like 
> Linux.  There's no way that these systems can have a standard CPU on the fastpath.

No, not the datapath; just control of the hardware.  If i run
"ip route add .." i want that to work on the ASIC.
Same with tc action/classification. I want to run those tools and
configure an ACL in the ASIC with no new learning curve.

> 
> I understood the original question to be: Can we make the interface to the 
> kernel look like a hardware switch?  My answer had two main parts.  First, 
> I don't think we could define a "standard" hardware interface, since they're
> all very different.  Second, even if we could, I think a software fastpath's
> strengths and weaknesses are such that the hardware model wouldn't be ideal.

Not talking about datapath - but control interface to those devices.
We cant define how the low levels look like. But if you expose things
using standard linux interfaces, then user space tools and APIs stay
unchanged.

Then i shouldnt care where the feature runs (hardware NIC, ASIC, pure
kernel level software etc).

> 
> The problem is that DRAM isn't going to cut it on the ACL tables--which are 
> typically used for flow-based matching--on a 48x10G (or even 48x1G) switch.

There are vendors who use DRAMS with speacilized interfaces that
interleave requests behind the scenes. Maybe i can point you to one
offline. 

> I've seen a couple of switching ASICs that support many 10s of thousands of
> ACL entries, but they require expensive external TCAMs for lookup and SRAM 
> for counters.  Most of the white box vendors that I've seen that use those 
> ASICs don't bother adding the external TCAM and SRAM to their designs.  
> Even when they are added, their matching capabilities are typically limited 
> in order to keep up with traffic.

I thought SRAM markets have dried up these days. Anyways what you are
refereing to above is generally true.

> > Justin - theres nothing new you need in the kernel to have that feature.
> > Let me rephrase that, that has not been a new feature for at least a
> > decade in Linux.
> > Add exact match filters with higher priority. Have the lowest priority
> > filter to redirect to user space. Let user space lookup some service
> > rule; have it download to the kernel one or more exact matches.
> > Let the packet proceed on its way down the kernel to its destination if
> > thats what is defined.
> 
> My point was that a software fastpath should look different than a hardware-based one.

And i was pointing  to what your datapath patches which in conjunction
with your user space code.

> > 
> > That bit sounds interesting - I will look at your spec.
> 
> Great!

I am sorry - been overloaded elsewhere havent looked. But i think above
I pretty much spelt out what my desires are.


> Yes, Open vSwitch has been ported to 24x10G ASICs running Linux on their management CPUs.  
> However, in these cases the datapath is handled by hardware and not the software forwarding 
> plane, obviously.

Of course.

> > Do the vendors agree to some common interface?
> 
> Yes, if you view ofproto (as described in the porting guide) as that interface.  Every merchant silicon vendor 
> I've seen views the interfaces to their ASICs as proprietary.  

Yes, the XAL agony (HALS and PALS that run on 26 other OSes).

> Someone (with the appropriate SDK and licenses) needs to write providers for those different hardware ports.  
> We've helped multiple vendors do this and know a few others that have done it on their own.

You know what would be really nice is if you achieved what i described
above.
Can I ifconfig an ethernet switch port?

> This really seems besides the point for this discussion, though.  
> We've written an ofproto provider for software switches called "dpif" 
> (this is also described in the porting guide). What we're proposing be 
> included in Linux is the kernel module that speaks to that dpif provider 
> over a well-defined, stable, netlink-based protocol.
> 
> Here's just a quick (somewhat simplified) summary of the different layers. 
> At the top, there are controllers and switches that communicate using OpenFlow.
> OpenFlow gives controller writers the ability to inspect and modify the switches' 
> flow tables and interfaces.  If a flow entry doesn't match an existing entry, the 
> packet is forwarded to the controller for further processing.  OpenFlow 1.0 was 
> pretty basic and exposed a single flow table.  OpenFlow 1.1 introduced a number 
> of new features including multiple table support.  The forthcoming OpenFlow 1.2 
> will include support for extensible matches, which means that new fields may be 
> added without requiring a full revision of the specification.  OpenFlow is defined 
> by the Open Networking Foundation and is not directly related to Open vSwitch.
> 
> The userspace in Open vSwitch has an OpenFlow library that interacts with the 
> controllers.  Userspace has its own classifier that supports wildcard entries 
> and multiple tables.  Many of the changes to the OpenFlow protocol only require 
> modifying that library and perhaps some of the glue code with the classifier.  
> (In theory, other software-defined networking protocols could be plugged in as well.)  
> The classifier interacts with the ofproto layer below it, which implements a fastpath.

Yes, when i looked at your code i can see that you have gone past
openflow.

> On a hardware switch, since it supports wildcarding, it essentially becomes a 
> passthrough that just calls the appropriate APIs for the ASIC.  

Are these APIs documented as well? Maybe thats all we need if you dont
have the standard linux tools working.

> On software, 
> as we've discussed, exact-match flows work better.
> 
> For that reason, we've defined the dpif layer, which is an ofproto provider.  
> It's primary purpose is to take high-level concepts like "treat this group of 
> interfaces as a LACP bond" or "support this set of wildcard flow entries" and 
> explode them into exact-match entries on-demand.  We've then implemented a 
> Linux dpif provider that takes the exact match entries created by the dpif 
> layer and converts them into netlink messages that the kernel module understands.  
> These messages are well-defined and not specific to Open vSwitch or OpenFlow.

Useful but that seems more like a service layer - I want just to be able
to ifconfig a port as a basic need.
In any case, I should look at your doc to get some clarity.

> This layering has allowed us to introduce new OpenFlow-like features such as multiple tables 
> and non-OpenFlow features such as port mirroring, STP, CCM, and new bonding modes without 
> changes to the kernel module.  In fact, the only changes that should necessitate a kernel 
> interface change are new matches or actions, such as would be required for handling MPLS.

I just need the basic cobbling blocks.
If you conform to what Linux already does and i can run standard tools,
we can have a lot of creative things that could be done.


> > Make these vendor switches work with plain Linux. The Intel folks are
> > producing interfaces with L2, ACLs, VIs and are putting some effort to
> > integrate them into plain Linux. I should be able to set the qos rules
> > with tc on an intel chip.
> > You guys can still take advantage of all that and still have your nice
> > control plane.
> 
> Once again, I think we are talking about different things.  I believe you are 
> discussing interfacing with NICs, which is quite different from a high fanout 
> switching ASIC.  As I previously mentioned, the point of my original 

> post was that I think it would be best not to model a high fanout switch in the interface to the kernel.
> 

I hope my clarification above makes more sense.

cheers,
jamal

^ permalink raw reply

* [PATCH net-next] bnx2x: handle iSCSI SD mode
From: Dmitry Kravkov @ 2011-11-28 22:31 UTC (permalink / raw)
  To: davem, netdev, mchan; +Cc: Dmitry Kravkov, Eilon Greenstein

in iSCSI SD mode to bnx2x device assigned single mac address
which is supposted to be iscsi mac. If this mode is recognized
bnx2x will disable LRO, decrease number of queues to 1 and rx ring
size to the minumum allowed by FW, this in order minimize memory use.
It will tranfer mac for iscsi usage and zero primary mac of the netdev.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |   12 +++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |   21 ++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |   12 +++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   50 +++++++++++++++++++---
 4 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 0f7b7a4..6c7bd63 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2084,4 +2084,16 @@ static const u32 dmae_reg_go_c[] = {
 
 void bnx2x_set_ethtool_ops(struct net_device *netdev);
 void bnx2x_notify_link_changed(struct bnx2x *bp);
+
+
+#define BNX2X_MF_PROTOCOL(bp) \
+	((bp)->mf_config[BP_VN(bp)] & FUNC_MF_CFG_PROTOCOL_MASK)
+
+#ifdef BCM_CNIC
+#define BNX2X_IS_MF_PROTOCOL_ISCSI(bp) \
+	(BNX2X_MF_PROTOCOL(bp) == FUNC_MF_CFG_PROTOCOL_ISCSI)
+
+#define IS_MF_ISCSI_SD(bp) (IS_MF_SD(bp) && BNX2X_IS_MF_PROTOCOL_ISCSI(bp))
+#endif
+
 #endif /* bnx2x.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8336c78..76c6253 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1427,6 +1427,11 @@ void bnx2x_set_num_queues(struct bnx2x *bp)
 		break;
 	}
 
+#ifdef BCM_CNIC
+	/* override in ISCSI SD mod */
+	if (IS_MF_ISCSI_SD(bp))
+		bp->num_queues = 1;
+#endif
 	/* Add special queues */
 	bp->num_queues += NON_ETH_CONTEXT_USE;
 }
@@ -2970,8 +2975,13 @@ int bnx2x_change_mac_addr(struct net_device *dev, void *p)
 	struct bnx2x *bp = netdev_priv(dev);
 	int rc = 0;
 
-	if (!is_valid_ether_addr((u8 *)(addr->sa_data)))
+	if (!bnx2x_is_valid_ether_addr(bp, addr->sa_data))
+		return -EINVAL;
+
+#ifdef BCM_CNIC
+	if (IS_MF_ISCSI_SD(bp) && !is_zero_ether_addr(addr->sa_data))
 		return -EINVAL;
+#endif
 
 	if (netif_running(dev))  {
 		rc = bnx2x_set_eth_mac(bp, false);
@@ -3087,7 +3097,12 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
 	u8 cos;
 	int rx_ring_size = 0;
 
-	/* if rx_ring_size specified - use it */
+#ifdef BCM_CNIC
+	if (IS_MF_ISCSI_SD(bp)) {
+		rx_ring_size = MIN_RX_SIZE_NONTPA;
+		bp->rx_ring_size = rx_ring_size;
+	} else
+#endif
 	if (!bp->rx_ring_size) {
 
 		rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
@@ -3097,7 +3112,7 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
 				     MIN_RX_SIZE_TPA, rx_ring_size);
 
 		bp->rx_ring_size = rx_ring_size;
-	} else
+	} else /* if rx_ring_size specified - use it */
 		rx_ring_size = bp->rx_ring_size;
 
 	/* Common */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 80c5ed0..2891cdc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -20,6 +20,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 
 
 #include "bnx2x.h"
@@ -1554,4 +1555,15 @@ static inline void bnx2x_update_drv_flags(struct bnx2x *bp, u32 flags, u32 set)
 	}
 }
 
+static inline bool bnx2x_is_valid_ether_addr(struct bnx2x *bp, u8 *addr)
+{
+	if (is_valid_ether_addr(addr))
+		return true;
+#ifdef BCM_CNIC
+	if (is_zero_ether_addr(addr) && IS_MF_ISCSI_SD(bp))
+		return true;
+#endif
+	return false;
+}
+
 #endif /* BNX2X_CMN_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 0cdbb70..2213e0b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -7017,6 +7017,13 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set)
 {
 	unsigned long ramrod_flags = 0;
 
+#ifdef BCM_CNIC
+	if (is_zero_ether_addr(bp->dev->dev_addr) && IS_MF_ISCSI_SD(bp)) {
+		DP(NETIF_MSG_IFUP, "Ignoring Zero MAC for iSCSI SD mode\n");
+		return 0;
+	}
+#endif
+
 	DP(NETIF_MSG_IFUP, "Adding Eth MAC\n");
 
 	__set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
@@ -9400,7 +9407,8 @@ static void __devinit bnx2x_get_mac_hwinfo(struct bnx2x *bp)
 			bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2);
 
 #ifdef BCM_CNIC
-		/* iSCSI and FCoE NPAR MACs: if there is no either iSCSI or
+		/*
+		 * iSCSI and FCoE NPAR MACs: if there is no either iSCSI or
 		 * FCoE MAC then the appropriate feature should be disabled.
 		 */
 		if (IS_MF_SI(bp)) {
@@ -9422,11 +9430,22 @@ static void __devinit bnx2x_get_mac_hwinfo(struct bnx2x *bp)
 				val = MF_CFG_RD(bp, func_ext_config[func].
 						    fcoe_mac_addr_lower);
 				bnx2x_set_mac_buf(fip_mac, val, val2);
-				BNX2X_DEV_INFO("Read FCoE L2 MAC to %pM\n",
+				BNX2X_DEV_INFO("Read FCoE L2 MAC: %pM\n",
 					       fip_mac);
 
 			} else
 				bp->flags |= NO_FCOE_FLAG;
+		} else { /* SD mode */
+			if (BNX2X_IS_MF_PROTOCOL_ISCSI(bp)) {
+				/* use primary mac as iscsi mac */
+				memcpy(iscsi_mac, bp->dev->dev_addr, ETH_ALEN);
+				/* Zero primary MAC configuration */
+				memset(bp->dev->dev_addr, 0, ETH_ALEN);
+
+				BNX2X_DEV_INFO("SD ISCSI MODE\n");
+				BNX2X_DEV_INFO("Read iSCSI MAC: %pM\n",
+					       iscsi_mac);
+			}
 		}
 #endif
 	} else {
@@ -9475,7 +9494,7 @@ static void __devinit bnx2x_get_mac_hwinfo(struct bnx2x *bp)
 	}
 #endif
 
-	if (!is_valid_ether_addr(bp->dev->dev_addr))
+	if (!bnx2x_is_valid_ether_addr(bp, bp->dev->dev_addr))
 		dev_err(&bp->pdev->dev,
 			"bad Ethernet MAC address configuration: "
 			"%pM, change it manually before bringing up "
@@ -9866,15 +9885,20 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
 
 	bp->multi_mode = multi_mode;
 
+	bp->disable_tpa = disable_tpa;
+
+#ifdef BCM_CNIC
+	bp->disable_tpa |= IS_MF_ISCSI_SD(bp);
+#endif
+
 	/* Set TPA flags */
-	if (disable_tpa) {
+	if (bp->disable_tpa) {
 		bp->flags &= ~TPA_ENABLE_FLAG;
 		bp->dev->features &= ~NETIF_F_LRO;
 	} else {
 		bp->flags |= TPA_ENABLE_FLAG;
 		bp->dev->features |= NETIF_F_LRO;
 	}
-	bp->disable_tpa = disable_tpa;
 
 	if (CHIP_IS_E1(bp))
 		bp->dropless_fc = 0;
@@ -10145,6 +10169,11 @@ void bnx2x_set_rx_mode(struct net_device *dev)
 	}
 
 	bp->rx_mode = rx_mode;
+#ifdef BCM_CNIC
+	/* handle ISCSI SD mode */
+	if (IS_MF_ISCSI_SD(bp))
+		bp->rx_mode = BNX2X_RX_MODE_NONE;
+#endif
 
 	/* Schedule the rx_mode command */
 	if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) {
@@ -10224,6 +10253,15 @@ static void poll_bnx2x(struct net_device *dev)
 }
 #endif
 
+static int bnx2x_validate_addr(struct net_device *dev)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+
+	if (!bnx2x_is_valid_ether_addr(bp, dev->dev_addr))
+		return -EADDRNOTAVAIL;
+	return 0;
+}
+
 static const struct net_device_ops bnx2x_netdev_ops = {
 	.ndo_open		= bnx2x_open,
 	.ndo_stop		= bnx2x_close,
@@ -10231,7 +10269,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 	.ndo_select_queue	= bnx2x_select_queue,
 	.ndo_set_rx_mode	= bnx2x_set_rx_mode,
 	.ndo_set_mac_address	= bnx2x_change_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_validate_addr	= bnx2x_validate_addr,
 	.ndo_do_ioctl		= bnx2x_ioctl,
 	.ndo_change_mtu		= bnx2x_change_mtu,
 	.ndo_fix_features	= bnx2x_fix_features,
-- 
1.7.7.2

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox