Netdev List
 help / color / mirror / Atom feed
* [net-2.6 PATCH 1/2] ixgbe: add support for 82599 Combined Backplane
From: Jeff Kirsher @ 2009-10-02 18:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Don Skidmore, Peter P Waskiewicz Jr, Jeff Kirsher

From: Don Skidmore <donald.c.skidmore@intel.com>

This patch will add support for the 82599 Dual port Backplane
device (0x10f8).  This device has the ability to link in serial (KR) and
parallel (KX4/KX) modes, depending on what the switch capabilities are in
the blade chassis.

Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/ixgbe/ixgbe_82599.c |    1 +
 drivers/net/ixgbe/ixgbe_main.c  |    2 ++
 drivers/net/ixgbe/ixgbe_type.h  |    1 +
 3 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 2ec58dc..bb87c43 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -330,6 +330,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
 
 	switch (hw->device_id) {
 	case IXGBE_DEV_ID_82599_KX4:
+	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
 	case IXGBE_DEV_ID_82599_XAUI_LOM:
 		/* Default device ID is mezzanine card KX/KX4 */
 		media_type = ixgbe_media_type_backplane;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1cbc6a3..dc4afa5 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -99,6 +99,8 @@ static struct pci_device_id ixgbe_pci_tbl[] = {
 	 board_82599 },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4),
 	 board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE),
+	 board_82599 },
 
 	/* required last entry */
 	{0, }
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index 7c93e92..a71f712 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -52,6 +52,7 @@
 #define IXGBE_DEV_ID_82599_CX4           0x10F9
 #define IXGBE_DEV_ID_82599_SFP           0x10FB
 #define IXGBE_DEV_ID_82599_XAUI_LOM      0x10FC
+#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8
 
 /* General Registers */
 #define IXGBE_CTRL      0x00000


^ permalink raw reply related

* [net-2.6 PATCH 2/2] ixgbe: add support for 82599 based X520 10G Dual KX4 Mezz card
From: Jeff Kirsher @ 2009-10-02 18:58 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Don Skidmore, Peter P Waskiewicz Jr, Jeff Kirsher
In-Reply-To: <20091002185803.8771.46655.stgit@localhost.localdomain>

From: Don Skidmore <donald.c.skidmore@intel.com>

This patch adds device support for the 82599 based X520 10GbE
Dual Port KX4 Mezzanine card.

Signed-off-by: Don Skidmore<donald.c.skidmore@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/ixgbe/ixgbe_82599.c |    1 +
 drivers/net/ixgbe/ixgbe_main.c  |    2 ++
 drivers/net/ixgbe/ixgbe_type.h  |    1 +
 3 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index bb87c43..34b0492 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -330,6 +330,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
 
 	switch (hw->device_id) {
 	case IXGBE_DEV_ID_82599_KX4:
+	case IXGBE_DEV_ID_82599_KX4_MEZZ:
 	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
 	case IXGBE_DEV_ID_82599_XAUI_LOM:
 		/* Default device ID is mezzanine card KX/KX4 */
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index dc4afa5..1a2eb79 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -97,6 +97,8 @@ static struct pci_device_id ixgbe_pci_tbl[] = {
 	 board_82599 },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP),
 	 board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ),
+	 board_82599 },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4),
 	 board_82599 },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE),
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index a71f712..ef4bdd5 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -49,6 +49,7 @@
 #define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM      0x10E1
 #define IXGBE_DEV_ID_82598EB_XF_LR       0x10F4
 #define IXGBE_DEV_ID_82599_KX4           0x10F7
+#define IXGBE_DEV_ID_82599_KX4_MEZZ      0x1514
 #define IXGBE_DEV_ID_82599_CX4           0x10F9
 #define IXGBE_DEV_ID_82599_SFP           0x10FB
 #define IXGBE_DEV_ID_82599_XAUI_LOM      0x10FC


^ permalink raw reply related

* Re: [PATCH v2] net: export device speed and duplex via sysfs
From: Andy Gospodarek @ 2009-10-02 19:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <4AC6432C.1020202@gmail.com>

On Fri, Oct 02, 2009 at 08:15:08PM +0200, Eric Dumazet wrote:
> Andy Gospodarek a écrit :
> > +static ssize_t show_speed(struct device *dev,
> > +			  struct device_attribute *attr, char *buf)
> > +{
> > +	struct net_device *netdev = to_net_dev(dev);
> > +	int ret = -EINVAL;
> > +
> > +	if (!rtnl_trylock())
> > +		return restart_syscall();
> > +
> > +	if (netif_running(netdev) && netdev->ethtool_ops->get_settings) {
> > +		struct ethtool_cmd cmd = { ETHTOOL_GSET };
> > +
> > +		if (netdev->ethtool_ops->get_settings(netdev, &cmd) < 0)
> 
> 		rtnl lock leak ?
> 
> 
> > +			return -EINVAL;
> > +		ret = sprintf(buf, fmt_dec, cmd.speed);
> > +	}
> > +	rtnl_unlock();
> > +	return ret;
> > +}
> > +
> > +static ssize_t show_duplex(struct device *dev,
> > +			   struct device_attribute *attr, char *buf)
> > +{
> > +	struct net_device *netdev = to_net_dev(dev);
> > +	int ret = -EINVAL;
> > +
> > +	if (!rtnl_trylock())
> > +		return restart_syscall();
> > +
> > +	if (netif_running(netdev) && netdev->ethtool_ops->get_settings) {
> > +		struct ethtool_cmd cmd = { ETHTOOL_GSET };
> > +
> > +		if (netdev->ethtool_ops->get_settings(netdev, &cmd) < 0)
> 
> 		rtnl lock leak ?
> 
> > +			return -EINVAL;
> > +		ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
> > +	}
> > +	rtnl_unlock();
> > +	return ret;
> > +}
> > +
> 

Thanks for spotting that, Eric.  Here's an updated (and tested patch).
I also switched to using ethtool_cmd_speed to get link speed to get the
'entire' speed.

[PATCH] net: export device speed and duplex via sysfs

This patch exports the link-speed (in Mbps) and duplex of an interface
via sysfs.  This eliminates the need to use ethtool just to check the
link-speed.  Not requiring 'ethtool' and not relying on the SIOCETHTOOL
ioctl should be helpful in an embedded environment where space is at a
premium as well.

NOTE: This patch also intentionally allows non-root users to check the link
speed and duplex -- something not possible with ethtool.

Here's some sample output:

# cat /sys/class/net/eth0/speed 
100
# cat /sys/class/net/eth0/duplex
half
# ethtool eth0
Settings for eth0:
        Supported ports: [ TP ]
        Supported link modes:   10baseT/Half 10baseT/Full 
                                100baseT/Half 100baseT/Full 
                                1000baseT/Half 1000baseT/Full 
        Supports auto-negotiation: Yes
        Advertised link modes:  Not reported
        Advertised auto-negotiation: No
        Speed: 100Mb/s
        Duplex: Half
        Port: Twisted Pair
        PHYAD: 1
        Transceiver: internal
        Auto-negotiation: off
        Supports Wake-on: g
        Wake-on: g
        Current message level: 0x000000ff (255)
        Link detected: yes


---
 net/core/net-sysfs.c |   40 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3994680..133dbc4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -130,6 +130,44 @@ static ssize_t show_carrier(struct device *dev,
 	return -EINVAL;
 }
 
+static ssize_t show_speed(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (netif_running(netdev) && netdev->ethtool_ops->get_settings) {
+		struct ethtool_cmd cmd = { ETHTOOL_GSET };
+
+		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
+			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+	}
+	rtnl_unlock();
+	return ret;
+}
+
+static ssize_t show_duplex(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	int ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (netif_running(netdev) && netdev->ethtool_ops->get_settings) {
+		struct ethtool_cmd cmd = { ETHTOOL_GSET };
+
+		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
+			ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
+	}
+	rtnl_unlock();
+	return ret;
+}
+
 static ssize_t show_dormant(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -259,6 +297,8 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(address, S_IRUGO, show_address, NULL),
 	__ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
 	__ATTR(carrier, S_IRUGO, show_carrier, NULL),
+	__ATTR(speed, S_IRUGO, show_speed, NULL),
+	__ATTR(duplex, S_IRUGO, show_duplex, NULL),
 	__ATTR(dormant, S_IRUGO, show_dormant, NULL),
 	__ATTR(operstate, S_IRUGO, show_operstate, NULL),
 	__ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),

^ permalink raw reply related

* New qdisc monitor
From: clownix @ 2009-10-02 16:41 UTC (permalink / raw)
  To: netdev

At http://clownix.net, there is a qdisc monitor based on a new sched
qdisc named spy and a module that periodicaly sends the
enqueues/dequeues/drops/queue-size/delays... to be plotted to the user
world through a netlink socket.
Note that the name of this software package is "clownix_spy", and not
cloonix_net which is another project on the same site.
Regards 
Vincent Perrier


^ permalink raw reply

* Re: [PATCH] net: export device speed and duplex via sysfs
From: Andy Gospodarek @ 2009-10-02 20:01 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1254507554.8795.12.camel@achroite>

On Fri, Oct 02, 2009 at 07:19:14PM +0100, Ben Hutchings wrote:
> On Fri, 2009-10-02 at 14:07 -0400, Andy Gospodarek wrote:
> > This exports the link-speed (in Mbps) and duplex of an interface via
> > sysfs.  This eliminates the need to use ethtool just to check the
> > link-speed.  Not requiring 'ethtool' and not relying on the SIOCETHTOOL
> > ioctl should be helpful in an embedded environment where space is at a
> > premium as well.
> 
> It's trivial to write an ethtool-lite that does this.  That might be
> worth adding to busybox.
> 

It probably would be.  I was just using this as an example of another
use for it.  Embedded usage was not the primary purpose.

> > NOTE: This patch also intentionally allows non-root users to check the link
> > speed and duplex -- something not possible with ethtool.
> [...]
> 
> Assuming this is desirable (I'm not sure), wouldn't it would make more
> sense to move the permissions check for SIOCETHTOOL so that get_settings
> is non-privileged?
> 

That could be done as well I just chose to go a slightly different
direction.

I took a look at /sys/class/net/ethX/ and felt like the information was
pretty complete with the exception of the link speed and duplex, so I
thought it would be a good place to add it.  I personally wouldn't mind
having most of the information presented in ethtool available via sysfs,
but I figured I would walk before running.


^ permalink raw reply

* Re: [RFC take2] pkt_sched: gen_estimator: Dont report fake rate estimators
From: Jarek Poplawski @ 2009-10-02 20:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, kaber, netdev
In-Reply-To: <4AC5F46B.6030308@gmail.com>

Eric Dumazet wrote, On 10/02/2009 02:39 PM:

> Jarek Poplawski a écrit :
> 
>> So you prefer the additional parameter version, but since these
>> _active tests are not needed e.g. for HTB classes, which got it
>> active by default, so maybe bstats == NULL would let skip such a test?
>>
>> ...
>>> --- a/include/net/gen_stats.h
>>> +++ b/include/net/gen_stats.h
>>> @@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
>>>  extern int gnet_stats_copy_basic(struct gnet_dump *d,
>>>  				 struct gnet_stats_basic_packed *b);
>>>  extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
>>> +				    const struct gnet_stats_basic_packed *bstats,
>> It seems these *b/*bstats defs could look more consistent. Otherwise
>> it looks OK to me.
> 
> Agreed, here is the updated version, I added your Signoff if you dont mind :)


Hmm... So you made me to do some "real" work here, and guess what?:
there is one serious checkpatch warning! ;-) Plus, this new parameter
should be added to the function description. Otherwise:
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>

Thanks,
Jarek P.

PS: I guess full "Don't" would show we really mean it...

> [RFC] pkt_sched: gen_estimator: Dont report fake rate estimators
> 
> We currently send TCA_STATS_RATE_EST elements to netlink users, even if no estimator
> is running.
> 
> # tc -s -d qdisc
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 112833764978 bytes 1495081739 pkt (dropped 0, overlimits 0 requeues 0)
>  rate 0bit 0pps backlog 0b 0p requeues 0
> 
> User has no way to tell if the "rate 0bit 0pps" is a real estimation, or a fake
> one (because no estimator is active)
> 
> After this patch, tc command output is :
> $ tc -s -d qdisc
> qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
>  Sent 561075 bytes 1196 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> 
> We add a parameter to gnet_stats_copy_rate_est() function so that
> it can use gen_estimator_active(bstats, r), as suggested by Jarek.
> 
> This parameter can be NULL if check is not necessary, (htb for
> example has a mandatory rate estimator)
> 
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
> ---
>  include/net/gen_stats.h |    1 +
>  net/core/gen_stats.c    |    7 ++++++-
>  net/sched/act_api.c     |    2 +-
>  net/sched/sch_api.c     |    2 +-
>  net/sched/sch_cbq.c     |    2 +-
>  net/sched/sch_drr.c     |    2 +-
>  net/sched/sch_hfsc.c    |    2 +-
>  net/sched/sch_htb.c     |    2 +-
>  8 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
> index c148855..eb87a14 100644
> --- a/include/net/gen_stats.h
> +++ b/include/net/gen_stats.h
> @@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
>  extern int gnet_stats_copy_basic(struct gnet_dump *d,
>  				 struct gnet_stats_basic_packed *b);
>  extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
> +				    const struct gnet_stats_basic_packed *b,
>  				    struct gnet_stats_rate_est *r);
>  extern int gnet_stats_copy_queue(struct gnet_dump *d,
>  				 struct gnet_stats_queue *q);
> diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
> index 8569310..054a49c 100644
> --- a/net/core/gen_stats.c
> +++ b/net/core/gen_stats.c
> @@ -136,8 +136,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
>   * if the room in the socket buffer was not sufficient.
>   */
>  int
> -gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
> +gnet_stats_copy_rate_est(struct gnet_dump *d,
> +			 const struct gnet_stats_basic_packed *b,
> +			 struct gnet_stats_rate_est *r)
>  {
> +	if (b && !gen_estimator_active(b, r))
> +		return 0;
> +
>  	if (d->compat_tc_stats) {
>  		d->tc_stats.bps = r->bps;
>  		d->tc_stats.pps = r->pps;
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 2dfb3e7..2b0d5ee 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -618,7 +618,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
>  			goto errout;
>  
>  	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
> -	    gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
> +	    gnet_stats_copy_rate_est(&d, &h->tcf_bstats, &h->tcf_rate_est) < 0 ||
>  	    gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
>  		goto errout;
>  
> diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
> index 903e418..1acfd29 100644
> --- a/net/sched/sch_api.c
> +++ b/net/sched/sch_api.c
> @@ -1179,7 +1179,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
>  		goto nla_put_failure;
>  
>  	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
> -	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
> +	    gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
>  	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
>  		goto nla_put_failure;
>  
> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
> index 5b132c4..3846d65 100644
> --- a/net/sched/sch_cbq.c
> +++ b/net/sched/sch_cbq.c
> @@ -1609,7 +1609,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
>  		cl->xstats.undertime = cl->undertime - q->now;
>  
>  	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
> -	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
> +	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
>  	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
>  		return -1;
>  
> diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
> index 5a888af..a65604f 100644
> --- a/net/sched/sch_drr.c
> +++ b/net/sched/sch_drr.c
> @@ -280,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
>  	}
>  
>  	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
> -	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
> +	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
>  	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
>  		return -1;
>  
> diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
> index 2c5c76b..b38b39c 100644
> --- a/net/sched/sch_hfsc.c
> +++ b/net/sched/sch_hfsc.c
> @@ -1375,7 +1375,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
>  	xstats.rtwork  = cl->cl_cumul;
>  
>  	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
> -	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
> +	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
>  	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
>  		return -1;
>  
> diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
> index 85acab9..2e38d1a 100644
> --- a/net/sched/sch_htb.c
> +++ b/net/sched/sch_htb.c
> @@ -1105,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
>  	cl->xstats.ctokens = cl->ctokens;
>  
>  	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
> -	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
> +	    gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
>  	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
>  		return -1;
>  
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 



^ permalink raw reply

* Re: [RFC take2] pkt_sched: gen_estimator: Dont report fake rate estimators
From: Eric Dumazet @ 2009-10-02 20:32 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: David Miller, kaber, netdev
In-Reply-To: <4AC65E6F.8050403@gmail.com>

Jarek Poplawski a écrit :
> 
> 
> Hmm... So you made me to do some "real" work here, and guess what?:
> there is one serious checkpatch warning! ;-) Plus, this new parameter
> should be added to the function description. Otherwise:
> Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
> 
> Thanks,
> Jarek P.
> 
> PS: I guess full "Don't" would show we really mean it...

Okay :) Here is the last round, before the night !

Thanks again


[RFC] pkt_sched: gen_estimator: Don't report fake rate estimators

We currently send TCA_STATS_RATE_EST elements to netlink users, even if no estimator
is running.

# tc -s -d qdisc
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 112833764978 bytes 1495081739 pkt (dropped 0, overlimits 0 requeues 0)
 rate 0bit 0pps backlog 0b 0p requeues 0

User has no way to tell if the "rate 0bit 0pps" is a real estimation, or a fake
one (because no estimator is active)

After this patch, tc command output is :
$ tc -s -d qdisc
qdisc pfifo_fast 0: dev eth0 root bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
 Sent 561075 bytes 1196 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0

We add a parameter to gnet_stats_copy_rate_est() function so that
it can use gen_estimator_active(bstats, r), as suggested by Jarek.

This parameter can be NULL if check is not necessary, (htb for
example has a mandatory rate estimator)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
---
 include/net/gen_stats.h |    1 +
 net/core/gen_stats.c    |    8 +++++++-
 net/sched/act_api.c     |    3 ++-
 net/sched/sch_api.c     |    2 +-
 net/sched/sch_cbq.c     |    2 +-
 net/sched/sch_drr.c     |    2 +-
 net/sched/sch_hfsc.c    |    2 +-
 net/sched/sch_htb.c     |    2 +-
 8 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index c148855..eb87a14 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -30,6 +30,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 extern int gnet_stats_copy_basic(struct gnet_dump *d,
 				 struct gnet_stats_basic_packed *b);
 extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
+				    const struct gnet_stats_basic_packed *b,
 				    struct gnet_stats_rate_est *r);
 extern int gnet_stats_copy_queue(struct gnet_dump *d,
 				 struct gnet_stats_queue *q);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 8569310..393b1d8 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -127,6 +127,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
 /**
  * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
  * @d: dumping handle
+ * @b: basic statistics
  * @r: rate estimator statistics
  *
  * Appends the rate estimator statistics to the top level TLV created by
@@ -136,8 +137,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+gnet_stats_copy_rate_est(struct gnet_dump *d,
+			 const struct gnet_stats_basic_packed *b,
+			 struct gnet_stats_rate_est *r)
 {
+	if (b && !gen_estimator_active(b, r))
+		return 0;
+
 	if (d->compat_tc_stats) {
 		d->tc_stats.bps = r->bps;
 		d->tc_stats.pps = r->pps;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2dfb3e7..ca2e1fd 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -618,7 +618,8 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
 			goto errout;
 
 	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
-	    gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &h->tcf_bstats,
+				     &h->tcf_rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
 		goto errout;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 903e418..1acfd29 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1179,7 +1179,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		goto nla_put_failure;
 
 	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
 		goto nla_put_failure;
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 5b132c4..3846d65 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1609,7 +1609,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 		cl->xstats.undertime = cl->undertime - q->now;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
 		return -1;
 
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 5a888af..a65604f 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -280,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	}
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
 		return -1;
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 2c5c76b..b38b39c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1375,7 +1375,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	xstats.rtwork  = cl->cl_cumul;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
 		return -1;
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 85acab9..2e38d1a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1105,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	cl->xstats.ctokens = cl->ctokens;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
 		return -1;
 

^ permalink raw reply related

* Re: Messages are printed on screen
From: Ben Hutchings @ 2009-10-02 20:49 UTC (permalink / raw)
  To: Markus Feldmann; +Cc: netdev
In-Reply-To: <ha4q26$7mt$1@ger.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1910 bytes --]

On Fri, 2009-10-02 at 14:01 +0200, Markus Feldmann wrote:
> Ben Hutchings schrieb:
> > On Fri, 2009-10-02 at 11:52 +0200, Markus Feldmann wrote:
> >
> >>
> >> As you see some of my IRQ-Lines are multiply in use, so my Server is 
> >> working hard at his limit.
> > 
> > IRQ sharing is normal on PCs without MSI support, but to see where
> > that's happening you need to look at /proc/interrupts and not the BIOS
> > setup program or wherever you got the above information from.
> Ok i did <cat /proc/interrupts> and got:
>             CPU0
>    0:     259603    XT-PIC-XT        timer
>    1:       1421    XT-PIC-XT        i8042
>    2:          0    XT-PIC-XT        cascade
>    4:     200000    XT-PIC-XT        ohci_hcd:usb3, pppp0

This number is a clue because after every 100,000 interrupts for a
particular IRQ the kernel checks how many of them were handled.  If this
is less than 100 then it disables the IRQ.  So I suspect one of these
devices is misbehaving, or its driver is not handling interrupts
correctly.  Is 'pppp0' actually an Ethernet device that you're using for
PPPoE?  If so, what model of network card is it?

>    5:          0    XT-PIC-XT        ehci_hcd:usb1, lan0
>    7:       6959    XT-PIC-XT        lan1
>    8:          2    XT-PIC-XT        rtc0
>    9:          0    XT-PIC-XT        acpi
>   11:      37697    XT-PIC-XT        ide2, ide3, ohci_hcd:usb2, lan2
>   14:          0    XT-PIC-XT        ide0
> NMI:          0   Non-maskable interrupts
> TRM:          0   Thermal event interrupts
> MCE:          0   Machine check exceptions
> MCP:         13   Machine check polls
> ERR:          2
> 
> How can i assigned IRQs during Boot?
[...]

They are assigned by the BIOS.  You may be able to configure this in
BIOS setup.

Ben.

-- 
Ben Hutchings
Who are all these weirdos? - David Bowie, about L-Space IRC channel #afp

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply

* Re: [PATCH] TCPCT-1: adding a sysctl
From: Andi Kleen @ 2009-10-02 21:06 UTC (permalink / raw)
  To: William Allen Simpson; +Cc: netdev
In-Reply-To: <4AC61505.8030701@gmail.com>

William Allen Simpson <william.allen.simpson@gmail.com> writes:
>
> Any suggestions for improvement?  Or general approval?

The patch seems incomplete, can't find callers for most of the new functions.

In general cookies fell a bit out of favour because they don't support window
scaling etc.  But you don't seem to fix that by putting that data into
the new option.

My immediate gut reaction is that it will be likely challenging to 
traverse many packet filters (which often have a tendency to drop
anything they don't know) with this option on. That is also what killed
ECN.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply

* Re: Performance questions using bridge and macvlan
From: Ira W. Snyder @ 2009-10-02 21:11 UTC (permalink / raw)
  To: netdev
In-Reply-To: <20091001212133.GD28963@ovro.caltech.edu>

On Thu, Oct 01, 2009 at 02:21:33PM -0700, Ira W. Snyder wrote:
> Hello all,
> 
> I've got an "interesting" network setup (using bridge), and I wonder if
> macvlan might help me get some more performance. Unfortunately, there
> isn't much documentation for macvlan, so I'm asking here.
> 
> I have a computer acting as an ethernet bridge. NIC A is on a network
> with a 1500 byte mtu. NIC B is a point-to-point ethernet device with a
> 64K mtu.
> 
> Adding both devices to a bridge using brctl works as expected. The
> computer attached to NIC B can send/recv normal ethernet traffic onto
> the outside network through NIC A.
> 
> Unfortunately, the bridge code does not fragment packets, and so the 64K
> mtu is reduced to a 1500 byte mtu. This kills performance by a factor of
> about 5x on the point-to-point device. All of my tests were using
> netperf/netserver running on the machine doing the bridging.
> 
> Without bridge, using full 64K mtu packets, netperf gives ~600mbit/sec.
> With brigde, using 1500 byte mtu packets, netperf gives ~120mbit/sec.
> 
> My question is this: is it possible to setup routing or macvlans such
> that any traffic from the bridge machine itself travels across the
> point-to-point link (at full 64K mtu), but any other traffic goes
> through the bridge (using 1500 byte mtu).
> 
> I'm aware that either running NAT or routing will fragment packets and
> solve my problem, but this introduces some complexity in my network
> setup that I would like to avoid.
> 

I have solved my problem using a veth pair device instead of macvlan. My
solution is described below to help anyone else who comes across this
problem.

eth0: physical ethernet, 1500 byte mtu
eth2: point-to-point ethernet, 65522 byte mtu
veth0, veth1: a veth pair device
br0, br1: ethernet bridges

# create the veth devices, increase mtu
ip link add link eth0 type veth
ip link set veth0 mtu 65522
ip link set veth1 mtu 65522

# create the bridges
brctl addbr br0
brctl addbr br1

# setup bridge 0
brctl addif br0 eth0
brctl addif br0 veth0

# setup bridge 1
brctl addif br1 eth2
brctl addif br1 veth1

# bring everything up
ip link set eth0 up
ip link set eth2 up
ip link set veth0 up
ip link set veth1 up
ip link set br0 up
ip link set br1 up

At this point, I use dhcpcd on the br0 interface to get an address. In
order for routing to work properly, I had to get an address for the br1
interface as well. Without an address for br1, everything was still
slow.

My routing table now looks like this:
[root@mybox ~]# route -n
Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
192.168.17.0    0.0.0.0         255.255.255.0   U     0      0        0 br1
192.168.17.0    0.0.0.0         255.255.255.0   U     204    0        0 br0
0.0.0.0         192.168.17.1    0.0.0.0         UG    204    0        0 br0

I'm curious if I could run dhcpcd on br1 and still have everything work
correctly. I think the answer is yes, but it will take more testing to
confirm this.

Ira

^ permalink raw reply

* Re: New qdisc monitor
From: Andi Kleen @ 2009-10-02 21:21 UTC (permalink / raw)
  To: clownix; +Cc: netdev
In-Reply-To: <1254501684.2670.7.camel@localhost>

clownix <clownix@clownix.net> writes:

> At http://clownix.net, there is a qdisc monitor based on a new sched
> qdisc named spy and a module that periodicaly sends the
> enqueues/dequeues/drops/queue-size/delays... to be plotted to the user
> world through a netlink socket.
> Note that the name of this software package is "clownix_spy", and not
> cloonix_net which is another project on the same site.

Sounds like a cool idea, but could you consider submitting the qdisc
for mainline and getting it reviewed? Out of tree kernel patches for these 
kinds of things tend to be a pain to use.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply

* Re: [Bug #14261] e1000e jumbo frames no longer work: 'Unsupported MTU setting'
From: Rafael J. Wysocki @ 2009-10-02 21:31 UTC (permalink / raw)
  To: Nix
  Cc: Linux Kernel Mailing List, Kernel Testers List, Alexander Duyck,
	Network Development, Jeff Kirsher, Jesse Brandeburg,
	e1000-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <877hvd8rj5.fsf-AdTWujXS48Mg67Zj9sPl2A@public.gmane.org>

On Friday 02 October 2009, Nix wrote:
> On 1 Oct 2009, Rafael J. Wysocki stated:
> 
> > The following bug entry is on the current list of known regressions
> > introduced between 2.6.30 and 2.6.31.  Please verify if it still should
> > be listed and let me know (either way).
> 
> The patch fixes it.
> 
> > Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14261
> > Subject		: e1000e jumbo frames no longer work: 'Unsupported MTU setting'
> > Submitter	: Nix <nix-dKoSMcxRz+Te9xe1eoZjHA@public.gmane.org>
> > Date		: 2009-09-26 11:16 (6 days old)
> > References	: http://marc.info/?l=linux-kernel&m=125396433321342&w=4
> > Handled-By	: Alexander Duyck <alexander.duyck-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> > Patch		: http://patchwork.kernel.org/patch/50277/
> 
> (Possibly a stable candidate? It's not in 2.6.31.2-to-be, perhaps the
> only patch that isn't. ;) )

Most likely because it's not in the Linus' tree yet.

[e1000e maintainers, we have a regression fix to merge, please.]

Thanks,
Rafael

^ permalink raw reply

* Re: [PATCH] make TLLAO option for NA packets configurable
From: Octavian Purdila @ 2009-10-02 21:39 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David Miller, cratiu, netdev
In-Reply-To: <20091002105351.0fbde9c2@nehalam>

[-- Attachment #1: Type: text/plain, Size: 157 bytes --]

On Friday 02 October 2009 20:53:51 you wrote:

> This is good although I would have shortened the name.

Ah, I knew I forgot something :) Here is v4. 

tavi

[-- Attachment #2: 0001-ipv6-new-sysctl-for-sending-TLLAO-with-unicast-NAs.patch --]
[-- Type: text/x-patch, Size: 3838 bytes --]

From 24d96d825b9fa832b22878cc6c990d5711968734 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Fri, 2 Oct 2009 00:51:15 +0300
Subject: [PATCH] ipv6: new sysctl for sending TLLAO with unicast NAs

Neighbor advertisements responding to unicast neighbor solicitations
did not include the target link-layer address option. This patch adds
a new sysctl option (disabled by default) which controls whether this
option should be sent even with unicast NAs.

The need for this arose because certain routers expect the TLLAO in
some situations even as a response to unicast NS packets.

Moreover, RFC 2461 recommends sending this to avoid a race condition
(section 4.4, Target link-layer address)

Signed-off-by: Cosmin Ratiu <cratiu@ixiacom.com>
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 Documentation/networking/ip-sysctl.txt |   18 ++++++++++++++++++
 include/linux/ipv6.h                   |    1 +
 net/ipv6/addrconf.c                    |    8 ++++++++
 net/ipv6/ndisc.c                       |    1 +
 4 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fbe427a..a0e134d 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1086,6 +1086,24 @@ accept_dad - INTEGER
 	2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
 	   link-local address has been found.
 
+force_tllao - BOOLEAN
+	Enable sending the target link-layer address option even when
+	responding to a unicast neighbor solicitation.
+	Default: FALSE
+
+	Quoting from RFC 2461, section 4.4, Target link-layer address:
+
+	"The option MUST be included for multicast solicitations in order to
+	avoid infinite Neighbor Solicitation "recursion" when the peer node
+	does not have a cache entry to return a Neighbor Advertisements
+	message.  When responding to unicast solicitations, the option can be
+	omitted since the sender of the solicitation has the correct link-
+	layer address; otherwise it would not have be able to send the unicast
+	solicitation in the first place. However, including the link-layer
+	address in this case adds little overhead and eliminates a potential
+	race condition where the sender deletes the cached link-layer address
+	prior to receiving a response to a previous solicitation."
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c662efa..ae74ede 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -167,6 +167,7 @@ struct ipv6_devconf {
 #endif
 	__s32		disable_ipv6;
 	__s32		accept_dad;
+	__s32		force_tllao;
 	void		*sysctl;
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1fd0a3d..bdcee69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4353,6 +4353,14 @@ static struct addrconf_sysctl_table
 			.proc_handler	=	proc_dointvec,
 		},
 		{
+			.ctl_name       = CTL_UNNUMBERED,
+			.procname       = "force_tllao",
+			.data           = &ipv6_devconf.force_tllao,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec
+		},
+		{
 			.ctl_name	=	0,	/* sentinel */
 		}
 	},
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f74e4e2..3507cfe 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -598,6 +598,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	icmp6h.icmp6_solicited = solicited;
 	icmp6h.icmp6_override = override;
 
+	inc_opt |= ifp->idev->cnf.force_tllao;
 	__ndisc_send(dev, neigh, daddr, src_addr,
 		     &icmp6h, solicited_addr,
 		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
-- 
1.5.6.5


^ permalink raw reply related

* Re: [PATCH] TCPCT-1: adding a sysctl
From: William Allen Simpson @ 2009-10-02 21:46 UTC (permalink / raw)
  To: netdev
In-Reply-To: <877hvdbj55.fsf@basil.nowhere.org>

Andi Kleen wrote:
> William Allen Simpson <william.allen.simpson@gmail.com> writes:
>> Any suggestions for improvement?  Or general approval?
> 
> The patch seems incomplete, can't find callers for most of the new functions.
> 
Ummm, I was following the suggested practice of breaking it into smaller
pieces for review.  This is just the control functions and headers.  I've
actually completed most of the port, and am champing at the bit.

I was hoping for concrete suggestions from the experienced Linux coders,
before submitting the rest of the code.


> In general cookies fell a bit out of favour because they don't support window
> scaling etc.  But you don't seem to fix that by putting that data into
> the new option.
> 
You mean DJB's "optionless" SYN cookies?  They saved everybody's bacon
back in the day, but that was when there were fewer options.  In 1996,
we all thought it was a quick hack on the way to a better solution.  But
the hack solved enough of the problem that nobody finished the work.

This option fixes (obviates and eventually obsoletes) SYN cookies, and
passes other options just fine.  That's one reason for doing it!

There should be a paper explaining in December's Usenix Login.  This is
the running code to go with the paper.


> My immediate gut reaction is that it will be likely challenging to 
> traverse many packet filters (which often have a tendency to drop
> anything they don't know) with this option on. That is also what killed
> ECN.
> 
Too true.  Not much we can do about it, but the various research surveys
suggest that an unknown option passes better....


^ permalink raw reply

* Re: [PATCH] TCPCT-1: adding a sysctl
From: William Allen Simpson @ 2009-10-02 22:00 UTC (permalink / raw)
  To: netdev
In-Reply-To: <4AC63DD1.3030705@gmail.com>

William Allen Simpson wrote:
> William Allen Simpson wrote:
>> This is a straightforward re-implementation of an earlier patch, that no
>> longer applies cleanly, that was reviewed:
>>
>>   http://thread.gmane.org/gmane.linux.network/102586
>>
> In that thread, David Miller wrote:
> 
>   "This looks mostly fine to me.  I would even advocate not using a config
>   option for this."
> 
> It would make the code look cleaner, and with the sysctl instead, it
> would probably be fine.  But SYN cookies has both.
> 
> Before I go much further, I'd like guidance.
> 
Based on Andi's expressed desire for more complete code before reviewing, and
the general utility of using a sysctl instead, I'll remove the config #ifdefs.
Sorry, I was overly cautious....

^ permalink raw reply

* Re: [Bug #14261] e1000e jumbo frames no longer work: 'Unsupported MTU setting'
From: Jeff Kirsher @ 2009-10-02 22:13 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: e1000-devel, Network Development, Linux Kernel Mailing List,
	Alexander Duyck, Jesse Brandeburg, Nix, Kernel Testers List
In-Reply-To: <200910022331.35916.rjw@sisk.pl>

On Fri, Oct 2, 2009 at 14:31, Rafael J. Wysocki <rjw@sisk.pl> wrote:
> On Friday 02 October 2009, Nix wrote:
>> On 1 Oct 2009, Rafael J. Wysocki stated:
>>
>> > The following bug entry is on the current list of known regressions
>> > introduced between 2.6.30 and 2.6.31.  Please verify if it still should
>> > be listed and let me know (either way).
>>
>> The patch fixes it.
>>
>> > Bug-Entry   : http://bugzilla.kernel.org/show_bug.cgi?id=14261
>> > Subject             : e1000e jumbo frames no longer work: 'Unsupported MTU setting'
>> > Submitter   : Nix <nix@esperi.org.uk>
>> > Date                : 2009-09-26 11:16 (6 days old)
>> > References  : http://marc.info/?l=linux-kernel&m=125396433321342&w=4
>> > Handled-By  : Alexander Duyck <alexander.duyck@gmail.com>
>> > Patch               : http://patchwork.kernel.org/patch/50277/
>>
>> (Possibly a stable candidate? It's not in 2.6.31.2-to-be, perhaps the
>> only patch that isn't. ;) )
>
> Most likely because it's not in the Linus' tree yet.
>
> [e1000e maintainers, we have a regression fix to merge, please.]
>
> Thanks,
> Rafael

Sorry, I forgot to send this patch out last night.  I will send it now.

-- 
Cheers,
Jeff

------------------------------------------------------------------------------
Come build with us! The BlackBerry&reg; Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9&#45;12, 2009. Register now&#33;
http://p.sf.net/sfu/devconf
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel

^ permalink raw reply

* [net-2.6 PATCH] e1000e: swap max hw supported frame size between 82574 and 82583
From: Jeff Kirsher @ 2009-10-02 22:30 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, rjw, Alexander Duyck, Jeff Kirsher

From: Alexander Duyck <alexander.h.duyck@intel.com>

There appears to have been a mixup in the max supported jumbo frame size
between 82574 and 82583 which ended up disabling jumbo frames on the 82574
as a result.  This patch swaps the two so that this issue is resolved.

This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=14261

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/e1000e/82571.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index b53b40b..d1e0563 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -1803,7 +1803,7 @@ struct e1000_info e1000_82574_info = {
 				  | FLAG_HAS_AMT
 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
 	.pba			= 20,
-	.max_hw_frame_size	= ETH_FRAME_LEN + ETH_FCS_LEN,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
 	.get_variants		= e1000_get_variants_82571,
 	.mac_ops		= &e82571_mac_ops,
 	.phy_ops		= &e82_phy_ops_bm,
@@ -1820,7 +1820,7 @@ struct e1000_info e1000_82583_info = {
 				  | FLAG_HAS_AMT
 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
 	.pba			= 20,
-	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.max_hw_frame_size	= ETH_FRAME_LEN + ETH_FCS_LEN,
 	.get_variants		= e1000_get_variants_82571,
 	.mac_ops		= &e82571_mac_ops,
 	.phy_ops		= &e82_phy_ops_bm,


^ permalink raw reply related

* Re: [PATCH] TCPCT-1: adding a sysctl
From: David Miller @ 2009-10-02 22:48 UTC (permalink / raw)
  To: william.allen.simpson; +Cc: netdev
In-Reply-To: <4AC674A4.2040900@gmail.com>

From: William Allen Simpson <william.allen.simpson@gmail.com>
Date: Fri, 02 Oct 2009 17:46:12 -0400

> Andi Kleen wrote:
>> William Allen Simpson <william.allen.simpson@gmail.com> writes:
>>> Any suggestions for improvement?  Or general approval?
>> The patch seems incomplete, can't find callers for most of the new
>> functions.
>> 
> Ummm, I was following the suggested practice of breaking it into
> smaller
> pieces for review.  This is just the control functions and headers.
> I've
> actually completed most of the port, and am champing at the bit.

We can't review the helper functions and infrastructure properly until
we can see how they are actually used.

Seeing how they are used shows us how well they are designed.

Otherwise asking for a is absolutely pointless as we have no context
in which to judge the code you're showing us.

^ permalink raw reply

* tc: indirect shaping of incoming traffic
From: Hauke Laging @ 2009-10-02 22:46 UTC (permalink / raw)
  To: netdev

Hello,

I am not a programmer thus I cannot try to solve this problem myself but I 
have thought about using traffic shaping for incoming traffic:

http://www.hauke-laging.de/ideen/shape-incoming-ip-traffic/index.en.html

Maybe somebody with the necessary capabilities finds this interesting 
enough to give it a try...


CU

Hauke

^ permalink raw reply

* [PATCH 1/1] net: mark net_proto_ops as const
From: Stephen Hemminger @ 2009-10-02 23:25 UTC (permalink / raw)
  To: David S. Miller
  Cc: Bernard Pidoux F6BVP, Dan Carpenter, Andy Grover, Qinghuang Feng,
	Christine Caulfield, Ursula Braun, James Morris, David Howells,
	Hendrik Brueckner, Matthias Urlichs, Denis V. Lunev,
	Stephen Hemminger, linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Alan Cox, Harvey Harrison, Chas Williams,
	linux-s390-u79uwXL29TY76Z2rM5mHXA, Mark Smith,
	Pekka Savola (ipv6), Eric Dumazet, Huang Weiyi, James Chapman,
	Henner 
In-Reply-To: <20091002232520.925496630@vyatta.com>

[-- Attachment #1: cons-proto-family.patch --]
[-- Type: text/plain, Size: 16409 bytes --]

All users of structure net_proto_ops should be declared const.

---
 drivers/isdn/mISDN/socket.c       |    3 +--
 drivers/net/pppox.c               |    2 +-
 include/net/bluetooth/bluetooth.h |    2 +-
 net/appletalk/ddp.c               |    2 +-
 net/atm/pvc.c                     |    2 +-
 net/atm/svc.c                     |    2 +-
 net/ax25/af_ax25.c                |    2 +-
 net/bluetooth/af_bluetooth.c      |    4 ++--
 net/bluetooth/bnep/sock.c         |    2 +-
 net/bluetooth/cmtp/sock.c         |    2 +-
 net/bluetooth/hci_sock.c          |    2 +-
 net/bluetooth/hidp/sock.c         |    2 +-
 net/bluetooth/l2cap.c             |    2 +-
 net/bluetooth/rfcomm/sock.c       |    2 +-
 net/bluetooth/sco.c               |    2 +-
 net/can/af_can.c                  |    2 +-
 net/decnet/af_decnet.c            |    2 +-
 net/econet/af_econet.c            |    2 +-
 net/ieee802154/af_ieee802154.c    |    2 +-
 net/ipv4/af_inet.c                |    2 +-
 net/ipv6/af_inet6.c               |    2 +-
 net/ipx/af_ipx.c                  |    2 +-
 net/irda/af_irda.c                |    2 +-
 net/iucv/af_iucv.c                |    2 +-
 net/key/af_key.c                  |    2 +-
 net/llc/af_llc.c                  |    2 +-
 net/netlink/af_netlink.c          |    2 +-
 net/netrom/af_netrom.c            |    2 +-
 net/packet/af_packet.c            |    2 +-
 net/phonet/af_phonet.c            |    2 +-
 net/rds/af_rds.c                  |    2 +-
 net/rose/af_rose.c                |    2 +-
 net/rxrpc/af_rxrpc.c              |    2 +-
 net/unix/af_unix.c                |    2 +-
 net/x25/af_x25.c                  |    2 +-
 35 files changed, 36 insertions(+), 37 deletions(-)

--- a/drivers/isdn/mISDN/socket.c	2009-10-02 16:20:02.320291655 -0700
+++ b/drivers/isdn/mISDN/socket.c	2009-10-02 16:20:43.210280312 -0700
@@ -808,8 +808,7 @@ mISDN_sock_create(struct net *net, struc
 	return err;
 }
 
-static struct
-net_proto_family mISDN_sock_family_ops = {
+static const struct net_proto_family mISDN_sock_family_ops = {
 	.owner  = THIS_MODULE,
 	.family = PF_ISDN,
 	.create = mISDN_sock_create,
--- a/drivers/net/pppox.c	2009-10-01 19:03:16.918349768 -0700
+++ b/drivers/net/pppox.c	2009-10-02 16:20:43.210280312 -0700
@@ -125,7 +125,7 @@ out:
 	return rc;
 }
 
-static struct net_proto_family pppox_proto_family = {
+static const struct net_proto_family pppox_proto_family = {
 	.family	= PF_PPPOX,
 	.create	= pppox_create,
 	.owner	= THIS_MODULE,
--- a/include/net/bluetooth/bluetooth.h	2009-10-01 19:03:17.038350923 -0700
+++ b/include/net/bluetooth/bluetooth.h	2009-10-02 16:20:43.210280312 -0700
@@ -121,7 +121,7 @@ struct bt_sock_list {
 	rwlock_t          lock;
 };
 
-int  bt_sock_register(int proto, struct net_proto_family *ops);
+int  bt_sock_register(int proto, const struct net_proto_family *ops);
 int  bt_sock_unregister(int proto);
 void bt_sock_link(struct bt_sock_list *l, struct sock *s);
 void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
--- a/net/appletalk/ddp.c	2009-10-01 19:03:16.634348458 -0700
+++ b/net/appletalk/ddp.c	2009-10-02 16:20:43.210280312 -0700
@@ -1821,7 +1821,7 @@ static int atalk_compat_ioctl(struct soc
 #endif
 
 
-static struct net_proto_family atalk_family_ops = {
+static const struct net_proto_family atalk_family_ops = {
 	.family		= PF_APPLETALK,
 	.create		= atalk_create,
 	.owner		= THIS_MODULE,
--- a/net/atm/pvc.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/atm/pvc.c	2009-10-02 16:20:43.210280312 -0700
@@ -137,7 +137,7 @@ static int pvc_create(struct net *net, s
 }
 
 
-static struct net_proto_family pvc_family_ops = {
+static const struct net_proto_family pvc_family_ops = {
 	.family = PF_ATMPVC,
 	.create = pvc_create,
 	.owner = THIS_MODULE,
--- a/net/atm/svc.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/atm/svc.c	2009-10-02 16:20:43.210280312 -0700
@@ -666,7 +666,7 @@ static int svc_create(struct net *net, s
 }
 
 
-static struct net_proto_family svc_family_ops = {
+static const struct net_proto_family svc_family_ops = {
 	.family = PF_ATMSVC,
 	.create = svc_create,
 	.owner = THIS_MODULE,
--- a/net/ax25/af_ax25.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/ax25/af_ax25.c	2009-10-02 16:20:43.210280312 -0700
@@ -1961,7 +1961,7 @@ static const struct file_operations ax25
 
 #endif
 
-static struct net_proto_family ax25_family_ops = {
+static const struct net_proto_family ax25_family_ops = {
 	.family =	PF_AX25,
 	.create =	ax25_create,
 	.owner	=	THIS_MODULE,
--- a/net/bluetooth/af_bluetooth.c	2009-10-01 19:03:16.854348407 -0700
+++ b/net/bluetooth/af_bluetooth.c	2009-10-02 16:20:43.210280312 -0700
@@ -45,7 +45,7 @@
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
-static struct net_proto_family *bt_proto[BT_MAX_PROTO];
+static const struct net_proto_family *bt_proto[BT_MAX_PROTO];
 static DEFINE_RWLOCK(bt_proto_lock);
 
 static struct lock_class_key bt_lock_key[BT_MAX_PROTO];
@@ -86,7 +86,7 @@ static inline void bt_sock_reclassify_lo
 				bt_key_strings[proto], &bt_lock_key[proto]);
 }
 
-int bt_sock_register(int proto, struct net_proto_family *ops)
+int bt_sock_register(int proto, const struct net_proto_family *ops)
 {
 	int err = 0;
 
--- a/net/bluetooth/bnep/sock.c	2009-10-01 19:03:16.898350369 -0700
+++ b/net/bluetooth/bnep/sock.c	2009-10-02 16:20:43.210280312 -0700
@@ -222,7 +222,7 @@ static int bnep_sock_create(struct net *
 	return 0;
 }
 
-static struct net_proto_family bnep_sock_family_ops = {
+static const struct net_proto_family bnep_sock_family_ops = {
 	.family = PF_BLUETOOTH,
 	.owner	= THIS_MODULE,
 	.create = bnep_sock_create
--- a/net/bluetooth/cmtp/sock.c	2009-10-01 19:03:16.874349134 -0700
+++ b/net/bluetooth/cmtp/sock.c	2009-10-02 16:20:43.220342414 -0700
@@ -217,7 +217,7 @@ static int cmtp_sock_create(struct net *
 	return 0;
 }
 
-static struct net_proto_family cmtp_sock_family_ops = {
+static const struct net_proto_family cmtp_sock_family_ops = {
 	.family	= PF_BLUETOOTH,
 	.owner	= THIS_MODULE,
 	.create	= cmtp_sock_create
--- a/net/bluetooth/hci_sock.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/bluetooth/hci_sock.c	2009-10-02 16:20:43.220342414 -0700
@@ -687,7 +687,7 @@ static int hci_sock_dev_event(struct not
 	return NOTIFY_DONE;
 }
 
-static struct net_proto_family hci_sock_family_ops = {
+static const struct net_proto_family hci_sock_family_ops = {
 	.family	= PF_BLUETOOTH,
 	.owner	= THIS_MODULE,
 	.create	= hci_sock_create,
--- a/net/bluetooth/hidp/sock.c	2009-10-01 19:03:16.794397698 -0700
+++ b/net/bluetooth/hidp/sock.c	2009-10-02 16:20:43.220342414 -0700
@@ -268,7 +268,7 @@ static int hidp_sock_create(struct net *
 	return 0;
 }
 
-static struct net_proto_family hidp_sock_family_ops = {
+static const struct net_proto_family hidp_sock_family_ops = {
 	.family	= PF_BLUETOOTH,
 	.owner	= THIS_MODULE,
 	.create	= hidp_sock_create
--- a/net/bluetooth/l2cap.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/bluetooth/l2cap.c	2009-10-02 16:20:43.220342414 -0700
@@ -3916,7 +3916,7 @@ static const struct proto_ops l2cap_sock
 	.getsockopt	= l2cap_sock_getsockopt
 };
 
-static struct net_proto_family l2cap_sock_family_ops = {
+static const struct net_proto_family l2cap_sock_family_ops = {
 	.family	= PF_BLUETOOTH,
 	.owner	= THIS_MODULE,
 	.create	= l2cap_sock_create,
--- a/net/bluetooth/rfcomm/sock.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/bluetooth/rfcomm/sock.c	2009-10-02 16:20:43.220342414 -0700
@@ -1101,7 +1101,7 @@ static const struct proto_ops rfcomm_soc
 	.mmap		= sock_no_mmap
 };
 
-static struct net_proto_family rfcomm_sock_family_ops = {
+static const struct net_proto_family rfcomm_sock_family_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.create		= rfcomm_sock_create
--- a/net/bluetooth/sco.c	2009-10-02 16:20:02.440301882 -0700
+++ b/net/bluetooth/sco.c	2009-10-02 16:20:43.220342414 -0700
@@ -993,7 +993,7 @@ static const struct proto_ops sco_sock_o
 	.getsockopt	= sco_sock_getsockopt
 };
 
-static struct net_proto_family sco_sock_family_ops = {
+static const struct net_proto_family sco_sock_family_ops = {
 	.family	= PF_BLUETOOTH,
 	.owner	= THIS_MODULE,
 	.create	= sco_sock_create,
--- a/net/can/af_can.c	2009-10-01 19:03:16.437350580 -0700
+++ b/net/can/af_can.c	2009-10-02 16:20:43.220342414 -0700
@@ -842,7 +842,7 @@ static struct packet_type can_packet __r
 	.func = can_rcv,
 };
 
-static struct net_proto_family can_family_ops __read_mostly = {
+static const struct net_proto_family can_family_ops __read_mostly = {
 	.family = PF_CAN,
 	.create = can_create,
 	.owner  = THIS_MODULE,
--- a/net/decnet/af_decnet.c	2009-10-02 16:20:02.450290721 -0700
+++ b/net/decnet/af_decnet.c	2009-10-02 16:20:43.220342414 -0700
@@ -2325,7 +2325,7 @@ static const struct file_operations dn_s
 };
 #endif
 
-static struct net_proto_family	dn_family_ops = {
+static const struct net_proto_family	dn_family_ops = {
 	.family =	AF_DECnet,
 	.create =	dn_create,
 	.owner	=	THIS_MODULE,
--- a/net/econet/af_econet.c	2009-10-01 19:03:16.381348558 -0700
+++ b/net/econet/af_econet.c	2009-10-02 16:20:43.220342414 -0700
@@ -742,7 +742,7 @@ static int econet_ioctl(struct socket *s
 	return 0;
 }
 
-static struct net_proto_family econet_family_ops = {
+static const struct net_proto_family econet_family_ops = {
 	.family =	PF_ECONET,
 	.create =	econet_create,
 	.owner	=	THIS_MODULE,
--- a/net/ieee802154/af_ieee802154.c	2009-10-01 19:03:16.534350765 -0700
+++ b/net/ieee802154/af_ieee802154.c	2009-10-02 16:20:43.220342414 -0700
@@ -285,7 +285,7 @@ out:
 	return rc;
 }
 
-static struct net_proto_family ieee802154_family_ops = {
+static const struct net_proto_family ieee802154_family_ops = {
 	.family		= PF_IEEE802154,
 	.create		= ieee802154_create,
 	.owner		= THIS_MODULE,
--- a/net/ipv4/af_inet.c	2009-10-02 16:20:02.450290721 -0700
+++ b/net/ipv4/af_inet.c	2009-10-02 16:20:43.220342414 -0700
@@ -931,7 +931,7 @@ static const struct proto_ops inet_sockr
 #endif
 };
 
-static struct net_proto_family inet_family_ops = {
+static const struct net_proto_family inet_family_ops = {
 	.family = PF_INET,
 	.create = inet_create,
 	.owner	= THIS_MODULE,
--- a/net/ipv6/af_inet6.c	2009-10-01 19:03:16.558349136 -0700
+++ b/net/ipv6/af_inet6.c	2009-10-02 16:20:43.220342414 -0700
@@ -552,7 +552,7 @@ const struct proto_ops inet6_dgram_ops =
 #endif
 };
 
-static struct net_proto_family inet6_family_ops = {
+static const struct net_proto_family inet6_family_ops = {
 	.family = PF_INET6,
 	.create = inet6_create,
 	.owner	= THIS_MODULE,
--- a/net/ipx/af_ipx.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/ipx/af_ipx.c	2009-10-02 16:20:43.220342414 -0700
@@ -1927,7 +1927,7 @@ static int ipx_compat_ioctl(struct socke
  * Socket family declarations
  */
 
-static struct net_proto_family ipx_family_ops = {
+static const struct net_proto_family ipx_family_ops = {
 	.family		= PF_IPX,
 	.create		= ipx_create,
 	.owner		= THIS_MODULE,
--- a/net/irda/af_irda.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/irda/af_irda.c	2009-10-02 16:20:43.220342414 -0700
@@ -2463,7 +2463,7 @@ bed:
 	return 0;
 }
 
-static struct net_proto_family irda_family_ops = {
+static const struct net_proto_family irda_family_ops = {
 	.family = PF_IRDA,
 	.create = irda_create,
 	.owner	= THIS_MODULE,
--- a/net/iucv/af_iucv.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/iucv/af_iucv.c	2009-10-02 16:20:43.220342414 -0700
@@ -1715,7 +1715,7 @@ static const struct proto_ops iucv_sock_
 	.getsockopt	= iucv_sock_getsockopt,
 };
 
-static struct net_proto_family iucv_sock_family_ops = {
+static const struct net_proto_family iucv_sock_family_ops = {
 	.family	= AF_IUCV,
 	.owner	= THIS_MODULE,
 	.create	= iucv_sock_create,
--- a/net/key/af_key.c	2009-10-01 19:03:16.477349034 -0700
+++ b/net/key/af_key.c	2009-10-02 16:20:43.230306460 -0700
@@ -3644,7 +3644,7 @@ static const struct proto_ops pfkey_ops 
 	.recvmsg	=	pfkey_recvmsg,
 };
 
-static struct net_proto_family pfkey_family_ops = {
+static const struct net_proto_family pfkey_family_ops = {
 	.family	=	PF_KEY,
 	.create	=	pfkey_create,
 	.owner	=	THIS_MODULE,
--- a/net/llc/af_llc.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/llc/af_llc.c	2009-10-02 16:20:43.230306460 -0700
@@ -1092,7 +1092,7 @@ out:
 	return rc;
 }
 
-static struct net_proto_family llc_ui_family_ops = {
+static const struct net_proto_family llc_ui_family_ops = {
 	.family = PF_LLC,
 	.create = llc_ui_create,
 	.owner	= THIS_MODULE,
--- a/net/netlink/af_netlink.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/netlink/af_netlink.c	2009-10-02 16:20:43.230306460 -0700
@@ -2050,7 +2050,7 @@ static const struct proto_ops netlink_op
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct net_proto_family netlink_family_ops = {
+static const struct net_proto_family netlink_family_ops = {
 	.family = PF_NETLINK,
 	.create = netlink_create,
 	.owner	= THIS_MODULE,	/* for consistency 8) */
--- a/net/netrom/af_netrom.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/netrom/af_netrom.c	2009-10-02 16:20:43.230306460 -0700
@@ -1372,7 +1372,7 @@ static const struct file_operations nr_i
 };
 #endif	/* CONFIG_PROC_FS */
 
-static struct net_proto_family nr_family_ops = {
+static const struct net_proto_family nr_family_ops = {
 	.family		=	PF_NETROM,
 	.create		=	nr_create,
 	.owner		=	THIS_MODULE,
--- a/net/packet/af_packet.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/packet/af_packet.c	2009-10-02 16:20:43.230306460 -0700
@@ -2363,7 +2363,7 @@ static const struct proto_ops packet_ops
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct net_proto_family packet_family_ops = {
+static const struct net_proto_family packet_family_ops = {
 	.family =	PF_PACKET,
 	.create =	packet_create,
 	.owner	=	THIS_MODULE,
--- a/net/phonet/af_phonet.c	2009-10-01 19:03:16.674395303 -0700
+++ b/net/phonet/af_phonet.c	2009-10-02 16:20:43.230306460 -0700
@@ -118,7 +118,7 @@ out:
 	return err;
 }
 
-static struct net_proto_family phonet_proto_family = {
+static const struct net_proto_family phonet_proto_family = {
 	.family = PF_PHONET,
 	.create = pn_socket_create,
 	.owner = THIS_MODULE,
--- a/net/rds/af_rds.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/rds/af_rds.c	2009-10-02 16:20:43.230306460 -0700
@@ -431,7 +431,7 @@ void rds_sock_put(struct rds_sock *rs)
 	sock_put(rds_rs_to_sk(rs));
 }
 
-static struct net_proto_family rds_family_ops = {
+static const struct net_proto_family rds_family_ops = {
 	.family =	AF_RDS,
 	.create =	rds_create,
 	.owner	=	THIS_MODULE,
--- a/net/rose/af_rose.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/rose/af_rose.c	2009-10-02 16:20:43.230306460 -0700
@@ -1509,7 +1509,7 @@ static const struct file_operations rose
 };
 #endif	/* CONFIG_PROC_FS */
 
-static struct net_proto_family rose_family_ops = {
+static const struct net_proto_family rose_family_ops = {
 	.family		=	PF_ROSE,
 	.create		=	rose_create,
 	.owner		=	THIS_MODULE,
--- a/net/rxrpc/af_rxrpc.c	2009-10-02 16:20:02.460301560 -0700
+++ b/net/rxrpc/af_rxrpc.c	2009-10-02 16:20:43.230306460 -0700
@@ -777,7 +777,7 @@ static struct proto rxrpc_proto = {
 	.max_header	= sizeof(struct rxrpc_header),
 };
 
-static struct net_proto_family rxrpc_family_ops = {
+static const struct net_proto_family rxrpc_family_ops = {
 	.family	= PF_RXRPC,
 	.create = rxrpc_create,
 	.owner	= THIS_MODULE,
--- a/net/unix/af_unix.c	2009-10-01 19:03:16.518348870 -0700
+++ b/net/unix/af_unix.c	2009-10-02 16:20:43.230306460 -0700
@@ -2214,7 +2214,7 @@ static const struct file_operations unix
 
 #endif
 
-static struct net_proto_family unix_family_ops = {
+static const struct net_proto_family unix_family_ops = {
 	.family = PF_UNIX,
 	.create = unix_create,
 	.owner	= THIS_MODULE,
--- a/net/x25/af_x25.c	2009-10-02 16:20:02.470281669 -0700
+++ b/net/x25/af_x25.c	2009-10-02 16:20:43.230306460 -0700
@@ -1476,7 +1476,7 @@ static int x25_ioctl(struct socket *sock
 	return rc;
 }
 
-static struct net_proto_family x25_family_ops = {
+static const struct net_proto_family x25_family_ops = {
 	.family =	AF_X25,
 	.create =	x25_create,
 	.owner	=	THIS_MODULE,

-- 


------------------------------------------------------------------------------
Come build with us! The BlackBerry&reg; Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9&#45;12, 2009. Register now&#33;
http://p.sf.net/sfu/devconf

^ permalink raw reply

* Re: [PATCH 1/1] net: mark net_proto_ops as const
From: Alexey Dobriyan @ 2009-10-03  0:00 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Bernard Pidoux F6BVP, Dan Carpenter, Andy Grover, Henner Eisen,
	Christine Caulfield, Ursula Braun, James Morris, David Howells,
	Hendrik Brueckner, Matthias Urlichs, Denis V. Lunev,
	linux-afs-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Alan Cox,
	Harvey Harrison, Chas Williams, linux-s390-u79uwXL29TY76Z2rM5mHXA,
	Mark Smith, Pekka Savola (ipv6), Eric Dumazet, Huang Weiyi,
	James Chapman, Johan Hedberg, Arnaldo 
In-Reply-To: <20091002232721.385245919-ZtmgI6mnKB3QT0dZR+AlfA@public.gmane.org>

On Fri, Oct 02, 2009 at 04:25:21PM -0700, Stephen Hemminger wrote:
> --- a/net/can/af_can.c
> +++ b/net/can/af_can.c
> @@ -842,7 +842,7 @@ static struct packet_type can_packet __r
>  	.func = can_rcv,
>  };
>  
> -static struct net_proto_family can_family_ops __read_mostly = {
> +static const struct net_proto_family can_family_ops __read_mostly = {
						       ^^^^^^^^^^^^^
>  	.family = PF_CAN,
>  	.create = can_create,
>  	.owner  = THIS_MODULE,

ACK, except this chunk: const already means read-only.

------------------------------------------------------------------------------
Come build with us! The BlackBerry&reg; Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9&#45;12, 2009. Register now&#33;
http://p.sf.net/sfu/devconf

^ permalink raw reply

* Re: [PATCH] TCPCT-1: adding a sysctl
From: William Allen Simpson @ 2009-10-03  0:32 UTC (permalink / raw)
  To: netdev
In-Reply-To: <20091002.154808.137771153.davem@davemloft.net>

David Miller wrote:
> From: William Allen Simpson <william.allen.simpson@gmail.com>
>> Ummm, I was following the suggested practice of breaking it into
>> smaller
>> pieces for review.  This is just the control functions and headers.
>> I've
>> actually completed most of the port, and am champing at the bit.
> 
> We can't review the helper functions and infrastructure properly until
> we can see how they are actually used.
> 
> Seeing how they are used shows us how well they are designed.
> 
> Otherwise asking for a is absolutely pointless as we have no context
> in which to judge the code you're showing us.
> 
Thanks.  I'd hand-split my code into much smaller patches for review.
Now, I know there are patches that are *too* small....

I've merged the several things you've mentioned, and will post it soon
(after making sure it compiles and runs separately).

^ permalink raw reply

* Re: [PATCH 1/3] bonding: allow previous slave to be used when re-balancing traffic on tlb/alb interfaces
From: Jay Vosburgh @ 2009-10-03  1:13 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev, David Miller
In-Reply-To: <1254269731-7341-2-git-send-email-fubar@us.ibm.com>

Jay Vosburgh <fubar@us.ibm.com> wrote:

>From: Andy Gospodarek <andy@greyhouse.net>
>
>When using tlb (mode 5) or alb (mode 6) bonding, a task runs every 10s
>and re-balances the output devices based on load.  I was trying to
>diagnose some connectivity issues and realized that a high-traffic host
>would often switch output interfaces every 10s.  I discovered this
>happened because the 'least loaded interface' was chosen as the next
>output interface for any given stream and quite often some lower load
>traffic would slip in an take the interface previously used by our
>stream.  This meant the 'least loaded interface' was no longer the one
>we used during the last interval.
>
>The switching of streams to another interface was not extremely helpful
>as it would force the destination host or router to update its ARP
>tables and produce some additional ARP traffic as the destination host
>verified that is was using the MAC address it expected.  Having the
>destination MAC for a given IP change every 10s seems undesirable.
>
>The decision was made to use the same slave during this interval if the
>current load on that interface was < 10.  A load of < 10 indicates that
>during the last 10s sample, roughly 100bytes were sent by all streams
>currently assigned to that interface.  This essentially means the
>interface is unloaded, but allows for a few frames that will probably
>have minimal impact to slip into the same interface we were using in the
>past.

	Andy, I've been doing some further testing with this patch, and
I'm seeing some panics that I believe are related to this patch.  It
appears that the last_slave isn't cleared (or isn't cleared soon enough)
when a slave is released, and concurrent transmit activity is getting
into alb_get_best_slave() and finding a last_slave pointer that is stale
(points to no slave currently on the slave list).

	This seems to reproduce fairly consistently when I set up alb
mode with two slaves, change the active slave so that alb mode moves the
MACs around, then release the inactive slave.  I run a concurrent "ping
-f" of some remote host.

	I added some code to tlb_clear_slave to set last_last to NULL if
save_load is 0, but the problem still happened.  I think the race is
that bond_alb_deinit_slave is called with the bond->lock released, but
the slave has already been detached in bond_release, and concurrent
transmit activity gets in and looks up last_slave.

	I'm out of time for today, so I'll look at this more on Monday
if I haven't heard back from you.

	-J

>Signed-off-by: Andy Gospodarek <andy@greyhouse.net>
>Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
>
>---
> drivers/net/bonding/bond_alb.c |   21 ++++++++++++++++++++-
> drivers/net/bonding/bond_alb.h |    4 ++++
> 2 files changed, 24 insertions(+), 1 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index 9b5936f..cf2842e 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -150,6 +150,7 @@ static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_
> 		entry->load_history = 1 + entry->tx_bytes /
> 				      BOND_TLB_REBALANCE_INTERVAL;
> 		entry->tx_bytes = 0;
>+		entry->last_slave = entry->tx_slave;
> 	}
>
> 	entry->tx_slave = NULL;
>@@ -270,6 +271,24 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
> 	return least_loaded;
> }
>
>+/* Caller must hold bond lock for read and hashtbl lock */
>+static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
>+{
>+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>+	struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
>+	struct slave *last_slave = tx_hash_table[hash_index].last_slave;
>+	struct slave *next_slave = NULL;
>+
>+	if (last_slave && SLAVE_IS_OK(last_slave)) {
>+		/* Use the last slave listed in the tx hashtbl if:
>+		   the last slave currently is essentially unloaded. */
>+		if (SLAVE_TLB_INFO(last_slave).load < 10)
>+			next_slave = last_slave;
>+	}
>+
>+	return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
>+}
>+
> /* Caller must hold bond lock for read */
> static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
> {
>@@ -282,7 +301,7 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> 	hash_table = bond_info->tx_hashtbl;
> 	assigned_slave = hash_table[hash_index].tx_slave;
> 	if (!assigned_slave) {
>-		assigned_slave = tlb_get_least_loaded_slave(bond);
>+		assigned_slave = tlb_get_best_slave(bond, hash_index);
>
> 		if (assigned_slave) {
> 			struct tlb_slave_info *slave_info =
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index 50968f8..b65fd29 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -36,6 +36,10 @@ struct tlb_client_info {
> 				 * packets to a Client that the Hash function
> 				 * gave this entry index.
> 				 */
>+	struct slave *last_slave; /* Pointer to last slave used for transmiting
>+				 * packets to a Client that the Hash function
>+				 * gave this entry index.
>+				 */
> 	u32 tx_bytes;		/* Each Client acumulates the BytesTx that
> 				 * were tranmitted to it, and after each
> 				 * CallBack the LoadHistory is devided
>-- 
>1.6.0.2
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply

* clownix_spy: qdisc monitor + generic kernel variable plotter
From: clownix @ 2009-10-02 18:15 UTC (permalink / raw)
  To: netdev

At http://clownix.net, there is a qdisc monitor based on a sched
qdisc named "spy" and a module that periodicaly sends the
enqueues/dequeues/drops/queue-size/delays... to be gtk-plotted to the
user world (through a netlink socket).

With a few lines written in a module, any kernel variable can be
plotted.

Note that the name of this software package is "clownix_spy", and not
cloonix_net which is another project on the same site.
Regards 
Vincent Perrier


^ permalink raw reply

* [PATCH] pktgen: Fix multiqueue handling
From: Eric Dumazet @ 2009-10-03  6:24 UTC (permalink / raw)
  To: David S. Miller; +Cc: Robert Olsson, Linux Netdev List, Stephen Hemminger

Note : I could not really test this patch, I dont have multi queue hardware yet.

I found this by code inspection, please double check, thanks

[PATCH] pktgen: Fix multiqueue handling

It is not currently possible to instruct pktgen to use one selected tx queue.

When Robert added multiqueue support in commit 45b270f8, he added
an interval (queue_map_min, queue_map_max), and his code doesnt take
into account the case of min = max, to select one tx queue exactly.

I suspect a high performance setup on a eight txqueue device wants 
to use exactly eight cpus, and assign one tx queue to each sender.

This patchs makes pktgen select the right tx queue, not the first one.

Also updates Documentation to reflect Robert changes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 Documentation/networking/pktgen.txt |    8 ++++++++
 net/core/pktgen.c                   |    2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index c6cf4a3..61bb645 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -90,6 +90,11 @@ Examples:
  pgset "dstmac 00:00:00:00:00:00"    sets MAC destination address
  pgset "srcmac 00:00:00:00:00:00"    sets MAC source address
 
+ pgset "queue_map_min 0" Sets the min value of tx queue interval
+ pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
+                         To select queue 1 of a given device,
+                         use queue_map_min=1 and queue_map_max=1
+
  pgset "src_mac_count 1" Sets the number of MACs we'll range through.  
                          The 'minimum' MAC is what you set with srcmac.
 
@@ -101,6 +106,9 @@ Examples:
                               IPDST_RND, UDPSRC_RND,
                               UDPDST_RND, MACSRC_RND, MACDST_RND 
                               MPLS_RND, VID_RND, SVID_RND
+                              QUEUE_MAP_RND # queue map random
+                              QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
+
 
  pgset "udp_src_min 9"   set UDP source port min, If < udp_src_max, then
                          cycle through the port range.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b694552..421857c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2212,7 +2212,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->flags & F_QUEUE_MAP_CPU)
 		pkt_dev->cur_queue_map = smp_processor_id();
 
-	else if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+	else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
 		__u16 t;
 		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
 			t = random32() %

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox