Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 2/4] bridge: multicast flood
From: Stephen Hemminger @ 2010-04-28  1:01 UTC (permalink / raw)
  To: David S. Miller, Herbert Xu; +Cc: netdev
In-Reply-To: <20100428010103.386761596@vyatta.com>

[-- Attachment #1: br-router-list-rcu.patch --]
[-- Type: text/plain, Size: 786 bytes --]

Fix unsafe usage of RCU. Would never work on Alpha SMP because
of lack of rcu_dereference()

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/bridge/br_forward.c	2010-04-27 17:11:52.008626080 -0700
+++ b/net/bridge/br_forward.c	2010-04-27 17:23:46.388602995 -0700
@@ -216,7 +216,7 @@ static void br_multicast_flood(struct ne
 
 	prev = NULL;
 
-	rp = br->router_list.first;
+	rp = rcu_dereference(br->router_list.first);
 	p = mdst ? mdst->ports : NULL;
 	while (p || rp) {
 		lport = p ? p->port : NULL;
@@ -233,7 +233,7 @@ static void br_multicast_flood(struct ne
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = p->next;
 		if ((unsigned long)rport >= (unsigned long)port)
-			rp = rp->next;
+			rp = rcu_dereference(rp->next);
 	}
 
 	if (!prev)

-- 


^ permalink raw reply

* [PATCH net-next 3/4] bridge: multicast port group RCU fix
From: Stephen Hemminger @ 2010-04-28  1:01 UTC (permalink / raw)
  To: David S. Miller, Herbert Xu; +Cc: netdev
In-Reply-To: <20100428010103.386761596@vyatta.com>

[-- Attachment #1: br-portlist-rcu.patch --]
[-- Type: text/plain, Size: 1463 bytes --]

The recently introduced bridge mulitcast port group list was only
partially using RCU correctly. It was missing rcu_dereference()
and missing the necessary barrier on deletion.

The code should have used one of the standard list methods (list or hlist)
instead of open coding a RCU based link list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/bridge/br_forward.c	2010-04-27 17:51:27.909588950 -0700
+++ b/net/bridge/br_forward.c	2010-04-27 17:53:18.790721091 -0700
@@ -217,7 +217,7 @@ static void br_multicast_flood(struct ne
 	prev = NULL;
 
 	rp = rcu_dereference(br->router_list.first);
-	p = mdst ? mdst->ports : NULL;
+	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
 		lport = p ? p->port : NULL;
 		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
@@ -231,7 +231,7 @@ static void br_multicast_flood(struct ne
 			goto out;
 
 		if ((unsigned long)lport >= (unsigned long)port)
-			p = p->next;
+			p = rcu_dereference(p->next);
 		if ((unsigned long)rport >= (unsigned long)port)
 			rp = rcu_dereference(rp->next);
 	}
--- a/net/bridge/br_multicast.c	2010-04-27 17:51:31.509593914 -0700
+++ b/net/bridge/br_multicast.c	2010-04-27 17:52:48.209243982 -0700
@@ -259,7 +259,7 @@ static void br_multicast_del_pg(struct n
 		if (p != pg)
 			continue;
 
-		*pp = p->next;
+		rcu_assign_pointer(*pp, p->next);
 		hlist_del_init(&p->mglist);
 		del_timer(&p->timer);
 		del_timer(&p->query_timer);

-- 


^ permalink raw reply

* [PATCH net-next 4/4] bridge: multicast_flood cleanup
From: Stephen Hemminger @ 2010-04-28  1:01 UTC (permalink / raw)
  To: David S. Miller, Herbert Xu; +Cc: netdev
In-Reply-To: <20100428010103.386761596@vyatta.com>

[-- Attachment #1: br-flood-clean.patch --]
[-- Type: text/plain, Size: 958 bytes --]

Move some declarations around to make it clearer which variables
are being used inside loop.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/bridge/br_forward.c	2010-04-27 17:58:25.739592056 -0700
+++ b/net/bridge/br_forward.c	2010-04-27 17:59:17.182654034 -0700
@@ -208,17 +208,15 @@ static void br_multicast_flood(struct ne
 {
 	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *port;
-	struct net_bridge_port *lport, *rport;
-	struct net_bridge_port *prev;
+	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port_group *p;
 	struct hlist_node *rp;
 
-	prev = NULL;
-
 	rp = rcu_dereference(br->router_list.first);
 	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
+		struct net_bridge_port *port, *lport, *rport;
+
 		lport = p ? p->port : NULL;
 		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
 			     NULL;

-- 


^ permalink raw reply

* Re: [PATCH v5] net/usb: add sierra_net.c driver
From: David Miller @ 2010-04-28  1:07 UTC (permalink / raw)
  To: epasheva; +Cc: dbrownell, rfiler, netdev, linux-usb
In-Reply-To: <1272415711.10975.3.camel@Linuxdev4-laptop>

From: Elina Pasheva <epasheva@sierrawireless.com>
Date: Tue, 27 Apr 2010 17:48:31 -0700

> Subject: [PATCH v5] net/usb: add sierra_net.c driver

Applied, although I had to manually remove the stray extra
newline at the end of drivers/net/usb/sierra_net.c which GIT
complains about when the patch is applied.

^ permalink raw reply

* Re: linux-next: build failure after merge of the final tree (net tree related)
From: Stephen Rothwell @ 2010-04-28  1:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-next, linux-kernel, yoshfuji
In-Reply-To: <20100427.101804.13765995.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 312 bytes --]

Hi Dave,

On Tue, 27 Apr 2010 10:18:04 -0700 (PDT) David Miller <davem@davemloft.net> wrote:
>
> I just committed the following for this:
> 
> bridge: Fix build of ipv6 multicast code.

Thanks.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* Re: [PATCH v5] net/usb: add sierra_net.c driver
From: David Miller @ 2010-04-28  1:11 UTC (permalink / raw)
  To: epasheva; +Cc: dbrownell, rfiler, netdev, linux-usb
In-Reply-To: <20100427.180732.121248582.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Tue, 27 Apr 2010 18:07:32 -0700 (PDT)

> From: Elina Pasheva <epasheva@sierrawireless.com>
> Date: Tue, 27 Apr 2010 17:48:31 -0700
> 
>> Subject: [PATCH v5] net/usb: add sierra_net.c driver
> 
> Applied, although I had to manually remove the stray extra
> newline at the end of drivers/net/usb/sierra_net.c which GIT
> complains about when the patch is applied.

I also had to fix up the following warning during the build:

drivers/net/usb/sierra_net.c: In function ‘sierra_net_parse_lsi’:
drivers/net/usb/sierra_net.c:365: warning: format ‘%u’ expects type ‘unsigned int’, but argument 7 has type ‘long unsigned int’

"size_t" objects must use the "z" format specifier, so "%zu" is what
should have been used here.

^ permalink raw reply

* Re: [PATCH net-next 0/4] Bridge IGMP cleanup and fixes
From: David Miller @ 2010-04-28  1:14 UTC (permalink / raw)
  To: shemminger; +Cc: herbert, netdev
In-Reply-To: <20100428010103.386761596@vyatta.com>


All looks good, I'll apply to net-next-2.6 and build test.

Thanks Stephen.

^ permalink raw reply

* Re: [PATCH v5] net/usb: add sierra_net.c driver
From: Elina Pasheva @ 2010-04-28  1:19 UTC (permalink / raw)
  To: David Miller
  Cc: dbrownell-Rn4VEauK+AKRv+LV9MX5uipxlwaOVQ5f@public.gmane.org,
	Rory Filer, netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-usb-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20100427.181149.179278301.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

On Tue, 2010-04-27 at 18:11 -0700, David Miller wrote:
> From: David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
> Date: Tue, 27 Apr 2010 18:07:32 -0700 (PDT)
> 
> > From: Elina Pasheva <epasheva-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8@public.gmane.org>
> > Date: Tue, 27 Apr 2010 17:48:31 -0700
> > 
> >> Subject: [PATCH v5] net/usb: add sierra_net.c driver
> > 
> > Applied, although I had to manually remove the stray extra
> > newline at the end of drivers/net/usb/sierra_net.c which GIT
> > complains about when the patch is applied.
> 
> I also had to fix up the following warning during the build:
> 
> drivers/net/usb/sierra_net.c: In function ‘sierra_net_parse_lsi’:
> drivers/net/usb/sierra_net.c:365: warning: format ‘%u’ expects type ‘unsigned int’, but argument 7 has type ‘long unsigned int’
> 
> "size_t" objects must use the "z" format specifier, so "%zu" is what
> should have been used here.
Sorry, my mistake.
Thank you very much, David.
Elina

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next] bridge: multicast router list manipulation
From: Herbert Xu @ 2010-04-28  1:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David S. Miller, netdev
In-Reply-To: <20100427101311.2f445227@nehalam>

On Tue, Apr 27, 2010 at 10:13:11AM -0700, Stephen Hemminger wrote:
> I prefer that the hlist be only accessed through the hlist macro
> objects. Explicit twiddling of links (especially with RCU) exposes
> the code to future bugs.
> 
> Compile tested only.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> 
> --- a/net/bridge/br_multicast.c	2010-04-27 09:54:02.180531924 -0700
> +++ b/net/bridge/br_multicast.c	2010-04-27 10:07:19.188688664 -0700
> @@ -1041,21 +1041,21 @@ static int br_ip6_multicast_mld2_report(
>  static void br_multicast_add_router(struct net_bridge *br,
>  				    struct net_bridge_port *port)
>  {
> -	struct hlist_node *p;
> -	struct hlist_node **h;
> +	struct net_bridge_port *p;
> +	struct hlist_node *n, *last = NULL;
>  
> -	for (h = &br->router_list.first;
> -	     (p = *h) &&
> -	     (unsigned long)container_of(p, struct net_bridge_port, rlist) >
> -	     (unsigned long)port;
> -	     h = &p->next)
> -		;
> -
> -	port->rlist.pprev = h;
> -	port->rlist.next = p;
> -	rcu_assign_pointer(*h, &port->rlist);
> -	if (p)
> -		p->pprev = &port->rlist.next;
> +	hlist_for_each_entry(p, n, &br->router_list, rlist) {
> +		if ((unsigned long) port >= (unsigned long) p) {
> +			hlist_add_before_rcu(n, &port->rlist);
> +			return;
> +		}
> +		last = n;
> +	}
> +
> +	if (last)
> +		hlist_add_after_rcu(last, &port->rlist);
> +	else
> +		hlist_add_head_rcu(&port->rlist, &br->router_list);
>  }

Thanks Stephen, this looks good to me too.
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC] random SYN drops causing connect() delays
From: David Miller @ 2010-04-28  1:56 UTC (permalink / raw)
  To: tgraf; +Cc: netdev
In-Reply-To: <20100412083903.GA19763@bombadil.infradead.org>

From: Thomas Graf <tgraf@infradead.org>
Date: Mon, 12 Apr 2010 04:39:03 -0400

> On Mon, Apr 12, 2010 at 04:06:33AM -0400, Thomas Graf wrote:
>>  - While the issue is appearing, the acceptq seems to be overflowing. Both
>>    LISTENOVERFLOWS and LISTENDROPS are increasing although not by the exact
>>    number of delay occurences. inetdiag reports sk_max_ack_backlog to be 0
>>    therefore one possibility that comes to mind is that sk_ack_backlog
>>    underflows due to a race.
> 
> Forget about the underflow thought, inetdiag was reporting falsely.
> sk_max_ack_backlog is set to 128 as it should and the listen overflow
> happens normally. Still the fact remains that while the issue is appearing
> listen overflows are counted.

I can't reproduce on my system even with sched_child_runs_first set to '1'.

Are you running identd or something like that which intercepts the connections
to port 22 before 'sshd' actually gets it?

^ permalink raw reply

* Re: [v4 Patch 3/3] bonding: make bonding support netpoll
From: Cong Wang @ 2010-04-28  2:08 UTC (permalink / raw)
  To: David Miller
  Cc: linux-kernel, mpm, netdev, bridge, gospo, nhorman, jmoyer,
	shemminger, bonding-devel, fubar
In-Reply-To: <20100427.152424.236240666.davem@davemloft.net>

David Miller wrote:
> From: Amerigo Wang <amwang@redhat.com>
> Date: Tue, 27 Apr 2010 03:56:09 -0400
> 
>> +		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL)
>> +				|| !slave->dev->netdev_ops->ndo_poll_controller)
> 
> "|| on first line please, plus fix second line's indentation as per
> comments given in patch #1 and #2

Thanks, David!
I will fix all of this style problem.

^ permalink raw reply

* Re: [PATCH 2/4] [RFC] Add sock_create_kern_net()
From: Vlad Yasevich @ 2010-04-28  2:18 UTC (permalink / raw)
  To: David Miller; +Cc: danms, containers, netdev
In-Reply-To: <20100427.171844.77354120.davem@davemloft.net>



David Miller wrote:
> From: Dan Smith <danms@us.ibm.com>
> Date: Fri, 23 Apr 2010 07:55:37 -0700
> 
>> This helper allows kernel routines to create a socket in a given netns,
>> instead of forcing it to the initial or current one.
>>
>> I know this seems like it's violating the netns boundary.  The intended
>> use (as in the following patches) is specifically when talking to RTNETLINK
>> in another netns for the purposes of creating or examining resources there.
>> It is expected that this will be used for that sort of transient socket
>> creation only.  In other words:
> 
> If you can create netlink sockets in a remote NS you can also make
> changes there, and the whole point is to disallow changes.
> 
> So maybe you won't be making changes, but others will think about
> using this and doing so.
> 
> At a high level, I think this is a really bad idea, so I won't be
> applying this, sorry.

What this is changed to be a socket option or ioctl or some other mechanism
that allows one to move an existing unbound, unconnected socket to another
namespace?

This way, we do not modify the namespace directly, but still have ability
to move sockets into it for required communication.

In my mind moving a socket is someone similar to moving an interface to a
namespace.  You just moving a required resource.

Thanks
-vlad

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [net-next-2.6 PATCH v3] ixgbe: disable MSI-X by default on certain Cisco adapters
From: Jeff Kirsher @ 2010-04-28  2:45 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Nicholas Nunley, John Ronciak, Jeff Kirsher

From: Nick Nunley <nicholasx.d.nunley@intel.com>

Due to an errata in 82598 parts MSI-X needs to be disabled
in certain ixgbe devices designed to transfer peer-to-peer
traffic on the PCIe bus. This patch sets the default
interrupt type to MSI rather than MSI-X for specific Cisco
ixgbe adapters.

Signed-off-by: Nicholas Nunley <nicholasx.d.nunley@intel.com>
Acked-by: John Ronciak <john.ronciak@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/ixgbe/ixgbe.h      |    4 ++++
 drivers/net/ixgbe/ixgbe_main.c |   17 ++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index 79c35ae..ec6bcc0 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -104,6 +104,10 @@
 #define MAX_EMULATION_MAC_ADDRS         16
 #define VMDQ_P(p)   ((p) + adapter->num_vfs)
 
+#define IXGBE_SUBDEV_ID_82598AF_MEZZ		0x0049
+#define IXGBE_SUBDEV_ID_82598AF_MENLO_Q_MEZZ	0x004a
+#define IXGBE_SUBDEV_ID_82598AF_MENLO_E_MEZZ	0x004b
+
 struct vf_data_storage {
 	unsigned char vf_mac_addresses[ETH_ALEN];
 	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 2ae5a51..ff59f88 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4314,6 +4314,9 @@ static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
 	int err = 0;
 	int vector, v_budget;
 
+	if (!(adapter->flags & IXGBE_FLAG_MSIX_CAPABLE))
+		goto try_msi;
+
 	/*
 	 * It's easy to be greedy for MSI-X vectors, but it really
 	 * doesn't do us much good if we have a lot more vectors
@@ -4345,7 +4348,7 @@ static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
 		if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
 			goto out;
 	}
-
+try_msi:
 	adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
 	adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED;
 	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
@@ -4626,6 +4629,18 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	adapter->ring_feature[RING_F_RSS].indices = rss;
 	adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
 	adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES;
+	adapter->flags |= IXGBE_FLAG_MSIX_CAPABLE;
+	if (adapter->hw.device_id == IXGBE_DEV_ID_82598AF_DUAL_PORT) {
+		switch (adapter->hw.subsystem_device_id) {
+		case IXGBE_SUBDEV_ID_82598AF_MEZZ:
+		case IXGBE_SUBDEV_ID_82598AF_MENLO_Q_MEZZ:
+		case IXGBE_SUBDEV_ID_82598AF_MENLO_E_MEZZ:
+			adapter->flags &= ~IXGBE_FLAG_MSIX_CAPABLE;
+			break;
+		default:
+			break;
+		}
+	}
 	if (hw->mac.type == ixgbe_mac_82598EB) {
 		if (hw->device_id == IXGBE_DEV_ID_82598AT)
 			adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;


^ permalink raw reply related

* Re: [net-next-2.6 PATCH v3] ixgbe: disable MSI-X by default on certain Cisco adapters
From: David Miller @ 2010-04-28  2:47 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, nicholasx.d.nunley, john.ronciak
In-Reply-To: <20100428024521.28991.37874.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Tue, 27 Apr 2010 19:45:26 -0700

> From: Nick Nunley <nicholasx.d.nunley@intel.com>
> 
> Due to an errata in 82598 parts MSI-X needs to be disabled
> in certain ixgbe devices designed to transfer peer-to-peer
> traffic on the PCIe bus. This patch sets the default
> interrupt type to MSI rather than MSI-X for specific Cisco
> ixgbe adapters.
> 
> Signed-off-by: Nicholas Nunley <nicholasx.d.nunley@intel.com>
> Acked-by: John Ronciak <john.ronciak@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 3/4] bridge: multicast port group RCU fix
From: Herbert Xu @ 2010-04-28  3:07 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David S. Miller, netdev
In-Reply-To: <20100428010336.237294971@vyatta.com>

On Tue, Apr 27, 2010 at 06:01:06PM -0700, Stephen Hemminger wrote:
> The recently introduced bridge mulitcast port group list was only
> partially using RCU correctly. It was missing rcu_dereference()
> and missing the necessary barrier on deletion.
> 
> The code should have used one of the standard list methods (list or hlist)
> instead of open coding a RCU based link list.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 
> --- a/net/bridge/br_forward.c	2010-04-27 17:51:27.909588950 -0700
> +++ b/net/bridge/br_forward.c	2010-04-27 17:53:18.790721091 -0700
> @@ -217,7 +217,7 @@ static void br_multicast_flood(struct ne
>  	prev = NULL;
>  
>  	rp = rcu_dereference(br->router_list.first);
> -	p = mdst ? mdst->ports : NULL;
> +	p = mdst ? rcu_dereference(mdst->ports) : NULL;
>  	while (p || rp) {
>  		lport = p ? p->port : NULL;
>  		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
> @@ -231,7 +231,7 @@ static void br_multicast_flood(struct ne
>  			goto out;
>  
>  		if ((unsigned long)lport >= (unsigned long)port)
> -			p = p->next;
> +			p = rcu_dereference(p->next);
>  		if ((unsigned long)rport >= (unsigned long)port)
>  			rp = rcu_dereference(rp->next);
>  	}

Thanks for catching this!

> --- a/net/bridge/br_multicast.c	2010-04-27 17:51:31.509593914 -0700
> +++ b/net/bridge/br_multicast.c	2010-04-27 17:52:48.209243982 -0700
> @@ -259,7 +259,7 @@ static void br_multicast_del_pg(struct n
>  		if (p != pg)
>  			continue;
>  
> -		*pp = p->next;
> +		rcu_assign_pointer(*pp, p->next);

But this is bogus.  br_multicast_del_pg is removing an entry from
the RCU list.

You only need write barriers when you're putting a new entry in
it, and only if there is no other barrier between the code filling
in the new entry and the line adding it to the RCU list.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH net-next 3/4] bridge: multicast port group RCU fix
From: Stephen Hemminger @ 2010-04-28  3:47 UTC (permalink / raw)
  To: Herbert Xu; +Cc: David S. Miller, netdev
In-Reply-To: <20100428030709.GA12910@gondor.apana.org.au>

On Wed, 28 Apr 2010 11:07:09 +0800
Herbert Xu <herbert@gondor.apana.org.au> wrote:

> On Tue, Apr 27, 2010 at 06:01:06PM -0700, Stephen Hemminger wrote:
> > The recently introduced bridge mulitcast port group list was only
> > partially using RCU correctly. It was missing rcu_dereference()
> > and missing the necessary barrier on deletion.
> > 
> > The code should have used one of the standard list methods (list or hlist)
> > instead of open coding a RCU based link list.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> > 
> > --- a/net/bridge/br_forward.c	2010-04-27 17:51:27.909588950 -0700
> > +++ b/net/bridge/br_forward.c	2010-04-27 17:53:18.790721091 -0700
> > @@ -217,7 +217,7 @@ static void br_multicast_flood(struct ne
> >  	prev = NULL;
> >  
> >  	rp = rcu_dereference(br->router_list.first);
> > -	p = mdst ? mdst->ports : NULL;
> > +	p = mdst ? rcu_dereference(mdst->ports) : NULL;
> >  	while (p || rp) {
> >  		lport = p ? p->port : NULL;
> >  		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
> > @@ -231,7 +231,7 @@ static void br_multicast_flood(struct ne
> >  			goto out;
> >  
> >  		if ((unsigned long)lport >= (unsigned long)port)
> > -			p = p->next;
> > +			p = rcu_dereference(p->next);
> >  		if ((unsigned long)rport >= (unsigned long)port)
> >  			rp = rcu_dereference(rp->next);
> >  	}
> 
> Thanks for catching this!
> 
> > --- a/net/bridge/br_multicast.c	2010-04-27 17:51:31.509593914 -0700
> > +++ b/net/bridge/br_multicast.c	2010-04-27 17:52:48.209243982 -0700
> > @@ -259,7 +259,7 @@ static void br_multicast_del_pg(struct n
> >  		if (p != pg)
> >  			continue;
> >  
> > -		*pp = p->next;
> > +		rcu_assign_pointer(*pp, p->next);
> 
> But this is bogus.  br_multicast_del_pg is removing an entry from
> the RCU list.
> 
> You only need write barriers when you're putting a new entry in
> it, and only if there is no other barrier between the code filling
> in the new entry and the line adding it to the RCU list.

Yeah, it is extra barrier (one more reason to stick to hlist_del_rcu)




-- 

^ permalink raw reply

* Re: [v4 Patch 1/3] netpoll: add generic support for bridge and bonding devices
From: Dongdong Deng @ 2010-04-28  4:02 UTC (permalink / raw)
  To: Amerigo Wang
  Cc: linux-kernel, Matt Mackall, netdev, bridge, Andy Gospodarek,
	Neil Horman, Jeff Moyer, Stephen Hemminger, bonding-devel,
	Jay Vosburgh, David Miller
In-Reply-To: <20100427075937.4908.18468.sendpatchset@localhost.localdomain>

On Tue, Apr 27, 2010 at 3:55 PM, Amerigo Wang <amwang@redhat.com> wrote:
> V4:
> Use "unlikely" to mark netpoll call path, suggested by Stephen.
> Handle NETDEV_GOING_DOWN case.
>
> V3:
> Update to latest Linus' tree.
> Fix deadlocks when releasing slaves of bonding devices.
> Thanks to Andy.
>
> V2:
> Fix some bugs of previous version.
> Remove ->netpoll_setup and ->netpoll_xmit, they are not necessary.
> Don't poll all underlying devices, poll ->real_dev in struct netpoll.
> Thanks to David for suggesting above.
>
> ------------>
>
> This whole patchset is for adding netpoll support to bridge and bonding
> devices. I already tested it for bridge, bonding, bridge over bonding,
> and bonding over bridge. It looks fine now.
>
>
> To make bridge and bonding support netpoll, we need to adjust
> some netpoll generic code. This patch does the following things:
>
> 1) introduce two new priv_flags for struct net_device:
>   IFF_IN_NETPOLL which identifies we are processing a netpoll;
>   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
>   at run-time;
>
> 2) introduce one new method for netdev_ops:
>   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
>     removed.
>
> 3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
>   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;
>
> 4) hide a pointer to struct netpoll in struct netpoll_info, ditto.
>
> 5) introduce ->real_dev for struct netpoll.
>
> 6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
>   netconsole before releasing a slave, to avoid deadlocks.
>
> Cc: David Miller <davem@davemloft.net>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Signed-off-by: WANG Cong <amwang@redhat.com>
>
> ---
>
> Index: linux-2.6/include/linux/if.h
> ===================================================================
> --- linux-2.6.orig/include/linux/if.h
> +++ linux-2.6/include/linux/if.h
> @@ -71,6 +71,8 @@
>                                         * release skb->dst
>                                         */
>  #define IFF_DONT_BRIDGE 0x800          /* disallow bridging this ether dev */
> +#define IFF_IN_NETPOLL 0x1000          /* whether we are processing netpoll */
> +#define IFF_DISABLE_NETPOLL    0x2000  /* disable netpoll at run-time */
>
>  #define IF_GET_IFACE   0x0001          /* for querying only */
>  #define IF_GET_PROTO   0x0002
> Index: linux-2.6/include/linux/netdevice.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netdevice.h
> +++ linux-2.6/include/linux/netdevice.h
> @@ -667,6 +667,7 @@ struct net_device_ops {
>                                                        unsigned short vid);
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>        void                    (*ndo_poll_controller)(struct net_device *dev);
> +       void                    (*ndo_netpoll_cleanup)(struct net_device *dev);
>  #endif
>        int                     (*ndo_set_vf_mac)(struct net_device *dev,
>                                                  int queue, u8 *mac);
> Index: linux-2.6/include/linux/netpoll.h
> ===================================================================
> --- linux-2.6.orig/include/linux/netpoll.h
> +++ linux-2.6/include/linux/netpoll.h
> @@ -14,6 +14,7 @@
>
>  struct netpoll {
>        struct net_device *dev;
> +       struct net_device *real_dev;
>        char dev_name[IFNAMSIZ];
>        const char *name;
>        void (*rx_hook)(struct netpoll *, int, char *, int);
> @@ -36,8 +37,11 @@ struct netpoll_info {
>        struct sk_buff_head txq;
>
>        struct delayed_work tx_work;
> +
> +       struct netpoll *netpoll;
>  };
>
> +void netpoll_poll_dev(struct net_device *dev);
>  void netpoll_poll(struct netpoll *np);
>  void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
>  void netpoll_print_options(struct netpoll *np);
> @@ -47,6 +51,7 @@ int netpoll_trap(void);
>  void netpoll_set_trap(int trap);
>  void netpoll_cleanup(struct netpoll *np);
>  int __netpoll_rx(struct sk_buff *skb);
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
>
>
>  #ifdef CONFIG_NETPOLL
> Index: linux-2.6/net/core/netpoll.c
> ===================================================================
> --- linux-2.6.orig/net/core/netpoll.c
> +++ linux-2.6/net/core/netpoll.c
> @@ -179,9 +179,8 @@ static void service_arp_queue(struct net
>        }
>  }
>
> -void netpoll_poll(struct netpoll *np)
> +void netpoll_poll_dev(struct net_device *dev)
>  {
> -       struct net_device *dev = np->dev;
>        const struct net_device_ops *ops;
>
>        if (!dev || !netif_running(dev))
> @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
>        zap_completion_queue();
>  }
>
> +void netpoll_poll(struct netpoll *np)
> +{
> +       netpoll_poll_dev(np->dev);
> +}
> +
>  static void refill_skbs(void)
>  {
>        struct sk_buff *skb;
> @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct n
>        return 0;
>  }
>
> -static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
> +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
>  {
>        int status = NETDEV_TX_BUSY;
>        unsigned long tries;
> @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netp
>                     tries > 0; --tries) {
>                        if (__netif_tx_trylock(txq)) {
>                                if (!netif_tx_queue_stopped(txq)) {
> +                                       dev->priv_flags |= IFF_IN_NETPOLL;
>                                        status = ops->ndo_start_xmit(skb, dev);
> +                                       dev->priv_flags &= ~IFF_IN_NETPOLL;
>                                        if (status == NETDEV_TX_OK)
>                                                txq_trans_update(txq);
>                                }
> @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
>                atomic_inc(&npinfo->refcnt);
>        }
>
> -       if (!ndev->netdev_ops->ndo_poll_controller) {
> +       npinfo->netpoll = np;
> +
> +       if (ndev->priv_flags & IFF_DISABLE_NETPOLL
> +                       || !ndev->netdev_ops->ndo_poll_controller) {
>                printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
>                       np->name, np->dev_name);
>                err = -ENOTSUPP;
> @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
>                        }
>
>                        if (atomic_dec_and_test(&npinfo->refcnt)) {
> +                               const struct net_device_ops *ops;
>                                skb_queue_purge(&npinfo->arp_tx);
>                                skb_queue_purge(&npinfo->txq);
>                                cancel_rearming_delayed_work(&npinfo->tx_work);
> @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
>                                /* clean after last, unfinished work */
>                                __skb_queue_purge(&npinfo->txq);
>                                kfree(npinfo);
> -                               np->dev->npinfo = NULL;
> +                               ops = np->dev->netdev_ops;
> +                               if (ops->ndo_netpoll_cleanup)
> +                                       ops->ndo_netpoll_cleanup(np->dev);
> +                               else
> +                                       np->dev->npinfo = NULL;


+             if (ops->ndo_netpoll_cleanup)
+                                       ops->ndo_netpoll_cleanup(np->dev);
+             np->dev->npinfo = NULL;

I think it is good to set np->dev->npinfo to NULL  even though we have
the netpoll_cleanup opt.

Regards
Dongdong

>                        }
>                }
>
> @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
>                atomic_dec(&trapped);
>  }
>
> +EXPORT_SYMBOL(netpoll_send_skb);
>  EXPORT_SYMBOL(netpoll_set_trap);
>  EXPORT_SYMBOL(netpoll_trap);
>  EXPORT_SYMBOL(netpoll_print_options);
> @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
>  EXPORT_SYMBOL(netpoll_setup);
>  EXPORT_SYMBOL(netpoll_cleanup);
>  EXPORT_SYMBOL(netpoll_send_udp);
> +EXPORT_SYMBOL(netpoll_poll_dev);
>  EXPORT_SYMBOL(netpoll_poll);
> Index: linux-2.6/drivers/net/netconsole.c
> ===================================================================
> --- linux-2.6.orig/drivers/net/netconsole.c
> +++ linux-2.6/drivers/net/netconsole.c
> @@ -665,7 +665,8 @@ static int netconsole_netdev_event(struc
>        struct netconsole_target *nt;
>        struct net_device *dev = ptr;
>
> -       if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
> +       if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
> +             event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
>                goto done;
>
>        spin_lock_irqsave(&target_list_lock, flags);
> @@ -677,19 +678,21 @@ static int netconsole_netdev_event(struc
>                                strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
>                                break;
>                        case NETDEV_UNREGISTER:
> -                               if (!nt->enabled)
> -                                       break;
>                                netpoll_cleanup(&nt->np);
> +                               /* Fall through */
> +                       case NETDEV_GOING_DOWN:
> +                       case NETDEV_BONDING_DESLAVE:
>                                nt->enabled = 0;
> -                               printk(KERN_INFO "netconsole: network logging stopped"
> -                                       ", interface %s unregistered\n",
> -                                       dev->name);
>                                break;
>                        }
>                }
>                netconsole_target_put(nt);
>        }
>        spin_unlock_irqrestore(&target_list_lock, flags);
> +       if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
> +               printk(KERN_INFO "netconsole: network logging stopped, "
> +                       "interface %s %s\n",  dev->name,
> +                       event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
>
>  done:
>        return NOTIFY_DONE;
> Index: linux-2.6/include/linux/notifier.h
> ===================================================================
> --- linux-2.6.orig/include/linux/notifier.h
> +++ linux-2.6/include/linux/notifier.h
> @@ -203,6 +203,7 @@ static inline int notifier_to_errno(int
>  #define NETDEV_BONDING_NEWTYPE  0x000F
>  #define NETDEV_POST_INIT       0x0010
>  #define NETDEV_UNREGISTER_BATCH 0x0011
> +#define NETDEV_BONDING_DESLAVE  0x0012
>
>  #define SYS_DOWN       0x0001  /* Notify of system down */
>  #define SYS_RESTART    SYS_DOWN
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply

* [net-next-2.6 PATCH 1/2] Add netdev port-profile support (take III, was iovnl)
From: Scott Feldman @ 2010-04-28  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd

From: Scott Feldman <scofeldm@cisco.com>

Add new netdev ops ndo_{set|get}_port_profile to allow setting of port-profile
on a netdev interface.  Extends RTM_SETLINK/RTM_GETLINK with new sub cmd called
IFLA_PORT_PROFILE (added to end of IFLA_cmd list).  The port-profile cmd
arguments are (as seen from modified iproute2 cmdline):

       ip link set DEVICE [ { up | down } ]
                          [ arp { on | off } ]
                          [ dynamic { on | off } ]
                          [ multicast { on | off } ]
                          ...
                          [ vf NUM [ mac LLADDR ]
                                   [ vlan VLANID [ qos VLAN-QOS ] ]
                                   [ rate TXRATE ] ] 
                          [ port_profile [ PORT-PROFILE
                                   [ mac LLADDR ]
                                   [ host_uuid HOST_UUID ]
                                   [ client_uuid CLIENT_UUID ]
                                   [ client_name CLIENT_NAME ] ] ]
       ip link show [ DEVICE ]


A port-profile is used to configure/enable the switch port backing the netdev
interface, not to configure the host-facing side of the netdev.  A port-
profile is an identifier known to the switch.  How port-profiles are installed
on the switch or how available port-profiles is made know to the host is
outside the scope of this patch.

The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg (more about that later)
communicates with the switch, and the switch port backing the host netdev
interface is configured/enabled based on the settings defined by the port-
profile.  What those settings comprise, and how those settings are managed is
again outside the scope of this patch, since this patch only deals with the
first step in the flow.

Since we're using netlink sockets, the receiver of the RTM_SETLINK msg can
be in kernel- or user-space.  For kernel-space recipient, rtnetlink.c, the
new ndo_set_port_profile netdev op is called to set the port-profile.
User-space recipients can decide how they propagate the msg to the switch.
There is also a RTM_GETLINK cmd to to return port-profile setting of an
interface and to also return the status of the last port-profile.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 include/linux/if_link.h   |   26 ++++++++++++++++++++++++++
 include/linux/netdevice.h |   10 ++++++++++
 net/core/rtnetlink.c      |   22 ++++++++++++++++++++++
 3 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index cfd420b..6f02398 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -116,6 +116,7 @@ enum {
 	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
 	IFLA_VFINFO,
 	IFLA_STATS64,
+	IFLA_PORT_PROFILE,
 	__IFLA_MAX
 };
 
@@ -259,4 +260,29 @@ struct ifla_vf_info {
 	__u32 qos;
 	__u32 tx_rate;
 };
+
+/* Port-profile managment section */
+
+#define IFLA_PORT_PROFILE_MAX	40
+#define IFLA_PP_HOST_UUID_MAX	40
+#define IFLA_PP_CLIENT_UUID_MAX	40
+#define IFLA_PP_CLIENT_NAME_MAX	40
+
+enum ifla_port_profile_status {
+	IFLA_PORT_PROFILE_STATUS_UNKNOWN,
+	IFLA_PORT_PROFILE_STATUS_SUCCESS,
+	IFLA_PORT_PROFILE_STATUS_ERROR,
+	IFLA_PORT_PROFILE_STATUS_INPROGRESS,
+};
+
+struct ifla_port_profile {
+	__u8 status;
+	__u8 port_profile[IFLA_PORT_PROFILE_MAX];
+	__u8 mac[32]; /* MAX_ADDR_LEN */
+	__u8 host_uuid[IFLA_PP_HOST_UUID_MAX];
+		/* e.g. "CEEFD3B1-9E11-11DE-BDFD-000BAB01C0FB" */
+	__u8 client_uuid[IFLA_PP_CLIENT_UUID_MAX];
+	__u8 client_name[IFLA_PP_CLIENT_NAME_MAX]; /* e.g. "vm0-eth1" */
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f..2962288 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -696,6 +696,12 @@ struct netdev_rx_queue {
  * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
+ *
+ *	Port-profile management functions.
+ * int (*ndo_set_port_profile)(struct net_device *dev,
+ *			       struct ifla_port_profile *ipp);
+ * int (*ndo_get_port_profile)(struct net_device *dev,
+ *			       struct ifla_port_profile *ipp);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -744,6 +750,10 @@ struct net_device_ops {
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
+	int			(*ndo_set_port_profile)(struct net_device *dev,
+					struct ifla_port_profile *ipp);
+	int			(*ndo_get_port_profile)(struct net_device *dev,
+					struct ifla_port_profile *ipp);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c8598..1d7e9a7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -758,6 +758,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
 		}
 	}
+
+	if (dev->netdev_ops->ndo_get_port_profile) {
+		struct ifla_port_profile ipp;
+
+		if (!dev->netdev_ops->ndo_get_port_profile(dev, &ipp))
+			NLA_PUT(skb, IFLA_PORT_PROFILE, sizeof(ipp), &ipp);
+	}
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -824,6 +832,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_vlan) },
 	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_profile)},
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1028,6 +1038,18 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_PORT_PROFILE]) {
+		struct ifla_port_profile *ipp;
+		ipp = nla_data(tb[IFLA_PORT_PROFILE]);
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_port_profile)
+			err = ops->ndo_set_port_profile(dev, ipp);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "


^ permalink raw reply related

* [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
From: Scott Feldman @ 2010-04-28  4:42 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd
In-Reply-To: <20100428044235.8646.61943.stgit@savbu-pc100.cisco.com>

From: Scott Feldman <scofeldm@cisco.com>

Add enic ndo_{set|get}_port_profile op to support setting port-profile for
dynamic vnics.  Enic dynamic vnics are just like normal enic eth vnics except
dynamic vnics require an extra configuration step to assign a port-profile
identifier to the interface before the interface is useable. Once assigned,
link comes up on the interface and is ready for I/O.  The port-profile is
used to configure the network port assigned to the interface.  The network
port configuration includes VLAN membership, QoS policies, and port security
settings typical of a data center network.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 drivers/net/enic/Makefile    |    2 -
 drivers/net/enic/enic.h      |    3 +
 drivers/net/enic/enic_main.c |  163 +++++++++++++++++++++++++++++++++++++++---
 drivers/net/enic/vnic_dev.c  |   50 +++++++++++++
 drivers/net/enic/vnic_dev.h  |    3 +
 drivers/net/enic/vnic_vic.c  |   73 +++++++++++++++++++
 drivers/net/enic/vnic_vic.h  |   59 +++++++++++++++
 7 files changed, 338 insertions(+), 15 deletions(-)

diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile
index 391c3bc..e7b6c31 100644
--- a/drivers/net/enic/Makefile
+++ b/drivers/net/enic/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_ENIC) := enic.o
 
 enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \
-	enic_res.o vnic_dev.o vnic_rq.o
+	enic_res.o vnic_dev.o vnic_rq.o vnic_vic.o
 
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index 5fa56f1..b54e9eb 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -34,7 +34,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"1.3.1.1"
+#define DRV_VERSION		"1.3.1.1-iov"
 #define DRV_COPYRIGHT		"Copyright 2008-2009 Cisco Systems, Inc"
 #define PFX			DRV_NAME ": "
 
@@ -93,6 +93,7 @@ struct enic {
 	unsigned int mc_count;
 	int csum_rx_enabled;
 	u32 port_mtu;
+	struct ifla_port_profile port_profile;
 	u32 rx_coalesce_usecs;
 	u32 tx_coalesce_usecs;
 
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 1232887..394771d 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -29,6 +29,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_link.h>
 #include <linux/ethtool.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -40,6 +41,7 @@
 #include "vnic_dev.h"
 #include "vnic_intr.h"
 #include "vnic_stats.h"
+#include "vnic_vic.h"
 #include "enic_res.h"
 #include "enic.h"
 
@@ -49,10 +51,12 @@
 #define ENIC_DESC_MAX_SPLITS		(MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1)
 
 #define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
+#define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN     0x0044  /* enet dynamic vnic */
 
 /* Supported devices */
 static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = {
 	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
+	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) },
 	{ 0, }	/* end of table */
 };
 
@@ -113,6 +117,11 @@ static const struct enic_stat enic_rx_stats[] = {
 static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
 static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
 
+static int enic_is_dynamic(struct enic *enic)
+{
+	return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
+}
+
 static int enic_get_settings(struct net_device *netdev,
 	struct ethtool_cmd *ecmd)
 {
@@ -810,14 +819,24 @@ static void enic_reset_mcaddrs(struct enic *enic)
 
 static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 {
-	if (!is_valid_ether_addr(addr))
-		return -EADDRNOTAVAIL;
+	struct enic *enic = netdev_priv(netdev);
 
-	memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	if (enic_is_dynamic(enic)) {
+		random_ether_addr(netdev->dev_addr);
+	} else {
+		if (!is_valid_ether_addr(addr))
+			return -EADDRNOTAVAIL;
+		memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	}
 
 	return 0;
 }
 
+static int enic_set_mac_address(struct net_device *netdev, void *p)
+{
+	return -EOPNOTSUPP;
+}
+
 /* netif_tx_lock held, BHs disabled */
 static void enic_set_multicast_list(struct net_device *netdev)
 {
@@ -922,6 +941,118 @@ static void enic_tx_timeout(struct net_device *netdev)
 	schedule_work(&enic->reset);
 }
 
+static int enic_vnic_dev_deinit(struct enic *enic)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_deinit(enic->vdev);
+	spin_unlock(&enic->devcmd_lock);
+	return err;
+}
+
+static int enic_dev_init_prov(struct enic *enic, struct vic_provinfo *vp)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_prov(enic->vdev, (u8 *)vp, vic_provinfo_size(vp));
+	spin_unlock(&enic->devcmd_lock);
+	return err;
+}
+
+static int enic_provinfo_add_tlv_str(struct vic_provinfo *vp, u16 type,
+	u16 max_length, char *str)
+{
+	if (!str)
+		return 0;
+
+	if (strlen(str) + 1 > max_length)
+		return 0;
+
+	return vic_provinfo_add_tlv(vp, type, strlen(str) + 1, str);
+}
+
+static int enic_set_port_profile(struct net_device *netdev,
+	struct ifla_port_profile *ipp)
+{
+	struct enic *enic = netdev_priv(netdev);
+	struct vic_provinfo *vp;
+	u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
+	u8 *mac = ipp->mac;
+	int err;
+
+	memset(&enic->port_profile, 0, sizeof(enic->port_profile));
+
+	if (!enic_is_dynamic(enic))
+		return -EOPNOTSUPP;
+
+	enic_vnic_dev_deinit(enic);
+
+	if (strlen(ipp->port_profile) == 0)
+		return 0;
+
+	if (is_zero_ether_addr(mac))
+		mac = netdev->dev_addr;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE);
+	if (!vp)
+		return -ENOMEM;
+
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR,
+		IFLA_PORT_PROFILE_MAX, ipp->port_profile);
+	vic_provinfo_add_tlv(vp, VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR,
+		ETH_ALEN, mac);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_HOST_UUID_STR,
+		IFLA_PP_HOST_UUID_MAX, ipp->host_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_UUID_STR,
+		IFLA_PP_CLIENT_UUID_MAX, ipp->client_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_NAME_STR,
+		IFLA_PP_CLIENT_NAME_MAX, ipp->client_name);
+
+	err = enic_dev_init_prov(enic, vp);
+	if (err)
+		goto err_out;
+
+	enic_set_multicast_list(netdev);
+
+	memcpy(&enic->port_profile, ipp, sizeof(enic->port_profile));
+
+err_out:
+	vic_provinfo_free(vp);
+
+	return err;
+}
+
+static int enic_get_port_profile(struct net_device *netdev,
+	struct ifla_port_profile *ipp)
+{
+	struct enic *enic = netdev_priv(netdev);
+	int done, err, error;
+
+	enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_UNKNOWN;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_done(enic->vdev, &done, &error);
+	spin_unlock(&enic->devcmd_lock);
+
+	if (err || error)
+		enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_ERROR;
+
+	if (!done)
+		enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_INPROGRESS;
+
+	if (!error)
+		enic->port_profile.status = IFLA_PORT_PROFILE_STATUS_SUCCESS;
+
+	memcpy(ipp, &enic->port_profile, sizeof(enic->port_profile));
+
+	return 0;
+}
+
 static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
 {
 	struct enic *enic = vnic_dev_priv(rq->vdev);
@@ -1440,10 +1571,12 @@ static int enic_open(struct net_device *netdev)
 	for (i = 0; i < enic->rq_count; i++)
 		vnic_rq_enable(&enic->rq[i]);
 
-	spin_lock(&enic->devcmd_lock);
-	enic_add_station_addr(enic);
-	spin_unlock(&enic->devcmd_lock);
-	enic_set_multicast_list(netdev);
+	if (!enic_is_dynamic(enic)) {
+		spin_lock(&enic->devcmd_lock);
+		enic_add_station_addr(enic);
+		spin_unlock(&enic->devcmd_lock);
+		enic_set_multicast_list(netdev);
+	}
 
 	netif_wake_queue(netdev);
 	napi_enable(&enic->napi);
@@ -1780,13 +1913,15 @@ static const struct net_device_ops enic_netdev_ops = {
 	.ndo_start_xmit		= enic_hard_start_xmit,
 	.ndo_get_stats		= enic_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_set_multicast_list	= enic_set_multicast_list,
+	.ndo_set_mac_address	= enic_set_mac_address,
 	.ndo_change_mtu		= enic_change_mtu,
 	.ndo_vlan_rx_register	= enic_vlan_rx_register,
 	.ndo_vlan_rx_add_vid	= enic_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= enic_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= enic_tx_timeout,
+	.ndo_set_port_profile	= enic_set_port_profile,
+	.ndo_get_port_profile	= enic_get_port_profile,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= enic_poll_controller,
 #endif
@@ -2010,11 +2145,13 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 
 	netif_carrier_off(netdev);
 
-	err = vnic_dev_init(enic->vdev, 0);
-	if (err) {
-		printk(KERN_ERR PFX
-			"vNIC dev init failed, aborting.\n");
-		goto err_out_dev_close;
+	if (!enic_is_dynamic(enic)) {
+		err = vnic_dev_init(enic->vdev, 0);
+		if (err) {
+			printk(KERN_ERR PFX
+				"vNIC dev init failed, aborting.\n");
+			goto err_out_dev_close;
+		}
 	}
 
 	err = enic_dev_init(enic);
diff --git a/drivers/net/enic/vnic_dev.c b/drivers/net/enic/vnic_dev.c
index d43a9d4..e351b0f 100644
--- a/drivers/net/enic/vnic_dev.c
+++ b/drivers/net/enic/vnic_dev.c
@@ -682,6 +682,56 @@ int vnic_dev_init(struct vnic_dev *vdev, int arg)
 	return r;
 }
 
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int ret;
+
+	*done = 0;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_STATUS, &a0, &a1, wait);
+	if (ret)
+		return ret;
+
+	*done = (a0 == 0);
+
+	*err = (a0 == 0) ? a1 : 0;
+
+	return 0;
+}
+
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len)
+{
+	u64 a0, a1 = len;
+	int wait = 1000;
+	u64 prov_pa;
+	void *prov_buf;
+	int ret;
+
+	prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa);
+	if (!prov_buf)
+		return -ENOMEM;
+
+	memcpy(prov_buf, buf, len);
+
+	a0 = prov_pa;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO, &a0, &a1, wait);
+
+	pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa);
+
+	return ret;
+}
+
+int vnic_dev_deinit(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+
+	return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait);
+}
+
 int vnic_dev_link_status(struct vnic_dev *vdev)
 {
 	if (vdev->linkstatus)
diff --git a/drivers/net/enic/vnic_dev.h b/drivers/net/enic/vnic_dev.h
index f5be640..27f5a5a 100644
--- a/drivers/net/enic/vnic_dev.h
+++ b/drivers/net/enic/vnic_dev.h
@@ -124,6 +124,9 @@ int vnic_dev_disable(struct vnic_dev *vdev);
 int vnic_dev_open(struct vnic_dev *vdev, int arg);
 int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
 int vnic_dev_init(struct vnic_dev *vdev, int arg);
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err);
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len);
+int vnic_dev_deinit(struct vnic_dev *vdev);
 int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
 int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
 void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
diff --git a/drivers/net/enic/vnic_vic.c b/drivers/net/enic/vnic_vic.c
new file mode 100644
index 0000000..d769772
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "vnic_vic.h"
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type)
+{
+	struct vic_provinfo *vp = kzalloc(VIC_PROVINFO_MAX_DATA, flags);
+
+	if (!vp || !oui)
+		return NULL;
+
+	memcpy(vp->oui, oui, sizeof(vp->oui));
+	vp->type = type;
+	vp->length = htonl(sizeof(vp->num_tlvs));
+
+	return vp;
+}
+
+void vic_provinfo_free(struct vic_provinfo *vp)
+{
+	kfree(vp);
+}
+
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value)
+{
+	struct vic_provinfo_tlv *tlv;
+
+	if (!vp || !value)
+		return -EINVAL;
+
+	if (ntohl(vp->length) + sizeof(*tlv) + length >
+		VIC_PROVINFO_MAX_TLV_DATA)
+		return -ENOMEM;
+
+	tlv = (struct vic_provinfo_tlv *)((u8 *)vp->tlv +
+		ntohl(vp->length) - sizeof(vp->num_tlvs));
+
+	tlv->type = htons(type);
+	tlv->length = htons(length);
+	memcpy(tlv->value, value, length);
+
+	vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1);
+	vp->length = htonl(ntohl(vp->length) + sizeof(*tlv) + length);
+
+	return 0;
+}
+
+size_t vic_provinfo_size(struct vic_provinfo *vp)
+{
+	return vp ?  ntohl(vp->length) + sizeof(*vp) - sizeof(vp->num_tlvs) : 0;
+}
diff --git a/drivers/net/enic/vnic_vic.h b/drivers/net/enic/vnic_vic.h
new file mode 100644
index 0000000..085c2a2
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _VNIC_VIC_H_
+#define _VNIC_VIC_H_
+
+/* Note: All integer fields in NETWORK byte order */
+
+/* Note: String field lengths include null char */
+
+#define VIC_PROVINFO_CISCO_OUI		{ 0x00, 0x00, 0x0c }
+#define VIC_PROVINFO_LINUX_TYPE		0x2
+
+enum vic_linux_prov_tlv_type {
+	VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR = 0,
+	VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR = 1,			/* u8[6] */
+	VIC_LINUX_PROV_TLV_CLIENT_NAME_STR = 2,
+	VIC_LINUX_PROV_TLV_HOST_UUID_STR = 8,
+	VIC_LINUX_PROV_TLV_CLIENT_UUID_STR = 9,
+};
+
+struct vic_provinfo {
+	u8 oui[3];		/* OUI of data provider */
+	u8 type;		/* provider-specific type */
+	u32 length;		/* length of data below */
+	u32 num_tlvs;		/* number of tlvs */
+	struct vic_provinfo_tlv {
+		u16 type;
+		u16 length;
+		u8 value[0];
+	} tlv[0];
+} __attribute__ ((packed));
+
+#define VIC_PROVINFO_MAX_DATA		1385
+#define VIC_PROVINFO_MAX_TLV_DATA (VIC_PROVINFO_MAX_DATA - \
+	sizeof(struct vic_provinfo))
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type);
+void vic_provinfo_free(struct vic_provinfo *vp);
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value);
+size_t vic_provinfo_size(struct vic_provinfo *vp);
+
+#endif	/* _VNIC_VIC_H_ */


^ permalink raw reply related

* Re: [RFC] random SYN drops causing connect() delays
From: Thomas Graf @ 2010-04-28  4:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20100427.185602.221594952.davem@davemloft.net>

On Tue, Apr 27, 2010 at 06:56:02PM -0700, David Miller wrote:
> I can't reproduce on my system even with sched_child_runs_first set to '1'.
> 
> Are you running identd or something like that which intercepts the connections
> to port 22 before 'sshd' actually gets it?

No, sshd is listening on the port directly.

I am having more difficulties to reproduce the issue with the latest git tree. It
still happens but I sometimes have to run the test several times to get a single
delayed connection attempt.

^ permalink raw reply

* Re: [PATCH 0/3] [RFC] ptp: IEEE 1588 clock support
From: Richard Cochran @ 2010-04-28  5:47 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: netdev
In-Reply-To: <4BD70EC9.7080004@grandegger.com>

On Tue, Apr 27, 2010 at 06:20:25PM +0200, Wolfgang Grandegger wrote:
> Do you have also a patch adding support for hardware timestamping to ptpd?

Yes, I do:

   https://sourceforge.net/tracker/index.php?func=detail&aid=2992847&group_id=139814&atid=744634

I should have mentioned, you also need the gianfar HW time stamping
patches, recently posted to netdev by Manfred Rudigier.

Enjoy,

Richard

^ permalink raw reply

* Re: [RFC] random SYN drops causing connect() delays
From: Eric Dumazet @ 2010-04-28  5:52 UTC (permalink / raw)
  To: Thomas Graf; +Cc: David Miller, netdev
In-Reply-To: <20100428044443.GL19763@bombadil.infradead.org>

Le mercredi 28 avril 2010 à 00:44 -0400, Thomas Graf a écrit :
> On Tue, Apr 27, 2010 at 06:56:02PM -0700, David Miller wrote:
> > I can't reproduce on my system even with sched_child_runs_first set to '1'.
> > 
> > Are you running identd or something like that which intercepts the connections
> > to port 22 before 'sshd' actually gets it?
> 
> No, sshd is listening on the port directly.
> 
> I am having more difficulties to reproduce the issue with the latest git tree. It
> still happens but I sometimes have to run the test several times to get a single
> delayed connection attempt.

On machine/kernel reproducing the behavior, please send

grep . /proc/sys/net/ipv4/*




^ permalink raw reply

* Re: [PATCH 1/3] ptp: Added a brand new class driver for ptp clocks.
From: Richard Cochran @ 2010-04-28  6:08 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: netdev
In-Reply-To: <20100427153239.0977c1aa.randy.dunlap@oracle.com>

On Tue, Apr 27, 2010 at 03:32:39PM -0700, Randy Dunlap wrote:
> How do I use the testptp.mk file?

The makefile uses the KBUILD_OUTPUT environment variable to find the
kernel includes, with the new header. I do something like this:

  export ARCH=powerpc
  export KBUILD_OUTPUT=~/work/kernel/ptp_p2020
  mkdir -p $KBUILD_OUTPUT
  make mpc85xx_smp_defconfig
  make menuconfig
  make -j3 uImage
  make headers_install
  make -C Documentation/ptp -f testptp.mk

> Drop the ".ko".  We normally don't include that part of the module name.

Okay, can do. I just imitated what I saw in other Kbuild files.

> > diff --git a/include/linux/Kbuild b/include/linux/Kbuild
> > index 2fc8e14..2d616cb 100644
> > --- a/include/linux/Kbuild
> > +++ b/include/linux/Kbuild
> > @@ -318,6 +318,7 @@ unifdef-y += poll.h
> >  unifdef-y += ppp_defs.h
> >  unifdef-y += ppp-comp.h
> >  unifdef-y += pps.h
> > +unifdef-y += ptp_clock.h
> >  unifdef-y += ptrace.h
> >  unifdef-y += quota.h
> >  unifdef-y += random.h
> 
> I think that the Kbuild file also needs this line:
> header-y += ptp_clock.h
> 
> so that builds that use O=objdir will work, but even with that
> change, I couldn't get it to work.  (?)

Well, I am not sure what to do here. I followed the example of the PPS
code. That code only has the unifdef-y assigment. But now I see that
Documentation/kbuild/makefiles.txt says unifdef-y is deprecated.

Can someone clarify what is correct: is just header-y enough?

Thanks,
Richard

^ permalink raw reply

* Re: [RFC] random SYN drops causing connect() delays
From: Thomas Graf @ 2010-04-28  6:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1272433966.2343.34.camel@edumazet-laptop>

On Wed, Apr 28, 2010 at 07:52:46AM +0200, Eric Dumazet wrote:
> On machine/kernel reproducing the behavior, please send
> 
> grep . /proc/sys/net/ipv4/*

net.ipv4.route.gc_thresh = 262144
net.ipv4.route.max_size = 4194304
net.ipv4.route.gc_min_interval = 0
net.ipv4.route.gc_min_interval_ms = 500
net.ipv4.route.gc_timeout = 300
net.ipv4.route.gc_interval = 60
net.ipv4.route.redirect_load = 20
net.ipv4.route.redirect_number = 9
net.ipv4.route.redirect_silence = 20480
net.ipv4.route.error_cost = 1000
net.ipv4.route.error_burst = 5000
net.ipv4.route.gc_elasticity = 8
net.ipv4.route.mtu_expires = 600
net.ipv4.route.min_pmtu = 552
net.ipv4.route.min_adv_mss = 256
net.ipv4.route.secret_interval = 600
net.ipv4.neigh.default.mcast_solicit = 3
net.ipv4.neigh.default.ucast_solicit = 3
net.ipv4.neigh.default.app_solicit = 0
net.ipv4.neigh.default.retrans_time = 99
net.ipv4.neigh.default.base_reachable_time = 30
net.ipv4.neigh.default.delay_first_probe_time = 5
net.ipv4.neigh.default.gc_stale_time = 60
net.ipv4.neigh.default.unres_qlen = 3
net.ipv4.neigh.default.proxy_qlen = 64
net.ipv4.neigh.default.anycast_delay = 99
net.ipv4.neigh.default.proxy_delay = 79
net.ipv4.neigh.default.locktime = 99
net.ipv4.neigh.default.retrans_time_ms = 1000
net.ipv4.neigh.default.base_reachable_time_ms = 30000
net.ipv4.neigh.default.gc_interval = 30
net.ipv4.neigh.default.gc_thresh1 = 128
net.ipv4.neigh.default.gc_thresh2 = 512
net.ipv4.neigh.default.gc_thresh3 = 1024
net.ipv4.neigh.lo.mcast_solicit = 3
net.ipv4.neigh.lo.ucast_solicit = 3
net.ipv4.neigh.lo.app_solicit = 0
net.ipv4.neigh.lo.retrans_time = 99
net.ipv4.neigh.lo.base_reachable_time = 30
net.ipv4.neigh.lo.delay_first_probe_time = 5
net.ipv4.neigh.lo.gc_stale_time = 60
net.ipv4.neigh.lo.unres_qlen = 3
net.ipv4.neigh.lo.proxy_qlen = 64
net.ipv4.neigh.lo.anycast_delay = 99
net.ipv4.neigh.lo.proxy_delay = 79
net.ipv4.neigh.lo.locktime = 99
net.ipv4.neigh.lo.retrans_time_ms = 1000
net.ipv4.neigh.lo.base_reachable_time_ms = 30000
net.ipv4.neigh.eth0.mcast_solicit = 3
net.ipv4.neigh.eth0.ucast_solicit = 3
net.ipv4.neigh.eth0.app_solicit = 0
net.ipv4.neigh.eth0.retrans_time = 99
net.ipv4.neigh.eth0.base_reachable_time = 30
net.ipv4.neigh.eth0.delay_first_probe_time = 5
net.ipv4.neigh.eth0.gc_stale_time = 60
net.ipv4.neigh.eth0.unres_qlen = 3
net.ipv4.neigh.eth0.proxy_qlen = 64
net.ipv4.neigh.eth0.anycast_delay = 99
net.ipv4.neigh.eth0.proxy_delay = 79
net.ipv4.neigh.eth0.locktime = 99
net.ipv4.neigh.eth0.retrans_time_ms = 1000
net.ipv4.neigh.eth0.base_reachable_time_ms = 30000
net.ipv4.neigh.eth1.mcast_solicit = 3
net.ipv4.neigh.eth1.ucast_solicit = 3
net.ipv4.neigh.eth1.app_solicit = 0
net.ipv4.neigh.eth1.retrans_time = 99
net.ipv4.neigh.eth1.base_reachable_time = 30
net.ipv4.neigh.eth1.delay_first_probe_time = 5
net.ipv4.neigh.eth1.gc_stale_time = 60
net.ipv4.neigh.eth1.unres_qlen = 3
net.ipv4.neigh.eth1.proxy_qlen = 64
net.ipv4.neigh.eth1.anycast_delay = 99
net.ipv4.neigh.eth1.proxy_delay = 79
net.ipv4.neigh.eth1.locktime = 99
net.ipv4.neigh.eth1.retrans_time_ms = 1000
net.ipv4.neigh.eth1.base_reachable_time_ms = 30000
net.ipv4.neigh.pan0.mcast_solicit = 3
net.ipv4.neigh.pan0.ucast_solicit = 3
net.ipv4.neigh.pan0.app_solicit = 0
net.ipv4.neigh.pan0.retrans_time = 99
net.ipv4.neigh.pan0.base_reachable_time = 30
net.ipv4.neigh.pan0.delay_first_probe_time = 5
net.ipv4.neigh.pan0.gc_stale_time = 60
net.ipv4.neigh.pan0.unres_qlen = 3
net.ipv4.neigh.pan0.proxy_qlen = 64
net.ipv4.neigh.pan0.anycast_delay = 99
net.ipv4.neigh.pan0.proxy_delay = 79
net.ipv4.neigh.pan0.locktime = 99
net.ipv4.neigh.pan0.retrans_time_ms = 1000
net.ipv4.neigh.pan0.base_reachable_time_ms = 30000
net.ipv4.neigh.virbr0.mcast_solicit = 3
net.ipv4.neigh.virbr0.ucast_solicit = 3
net.ipv4.neigh.virbr0.app_solicit = 0
net.ipv4.neigh.virbr0.retrans_time = 99
net.ipv4.neigh.virbr0.base_reachable_time = 30
net.ipv4.neigh.virbr0.delay_first_probe_time = 5
net.ipv4.neigh.virbr0.gc_stale_time = 60
net.ipv4.neigh.virbr0.unres_qlen = 3
net.ipv4.neigh.virbr0.proxy_qlen = 64
net.ipv4.neigh.virbr0.anycast_delay = 99
net.ipv4.neigh.virbr0.proxy_delay = 79
net.ipv4.neigh.virbr0.locktime = 99
net.ipv4.neigh.virbr0.retrans_time_ms = 1000
net.ipv4.neigh.virbr0.base_reachable_time_ms = 30000
net.ipv4.neigh.virbr1.mcast_solicit = 3
net.ipv4.neigh.virbr1.ucast_solicit = 3
net.ipv4.neigh.virbr1.app_solicit = 0
net.ipv4.neigh.virbr1.retrans_time = 99
net.ipv4.neigh.virbr1.base_reachable_time = 30
net.ipv4.neigh.virbr1.delay_first_probe_time = 5
net.ipv4.neigh.virbr1.gc_stale_time = 60
net.ipv4.neigh.virbr1.unres_qlen = 3
net.ipv4.neigh.virbr1.proxy_qlen = 64
net.ipv4.neigh.virbr1.anycast_delay = 99
net.ipv4.neigh.virbr1.proxy_delay = 79
net.ipv4.neigh.virbr1.locktime = 99
net.ipv4.neigh.virbr1.retrans_time_ms = 1000
net.ipv4.neigh.virbr1.base_reachable_time_ms = 30000
net.ipv4.neigh.virbr2.mcast_solicit = 3
net.ipv4.neigh.virbr2.ucast_solicit = 3
net.ipv4.neigh.virbr2.app_solicit = 0
net.ipv4.neigh.virbr2.retrans_time = 99
net.ipv4.neigh.virbr2.base_reachable_time = 30
net.ipv4.neigh.virbr2.delay_first_probe_time = 5
net.ipv4.neigh.virbr2.gc_stale_time = 60
net.ipv4.neigh.virbr2.unres_qlen = 3
net.ipv4.neigh.virbr2.proxy_qlen = 64
net.ipv4.neigh.virbr2.anycast_delay = 99
net.ipv4.neigh.virbr2.proxy_delay = 79
net.ipv4.neigh.virbr2.locktime = 99
net.ipv4.neigh.virbr2.retrans_time_ms = 1000
net.ipv4.neigh.virbr2.base_reachable_time_ms = 30000
net.ipv4.neigh.tun0.mcast_solicit = 3
net.ipv4.neigh.tun0.ucast_solicit = 3
net.ipv4.neigh.tun0.app_solicit = 0
net.ipv4.neigh.tun0.retrans_time = 99
net.ipv4.neigh.tun0.base_reachable_time = 30
net.ipv4.neigh.tun0.delay_first_probe_time = 5
net.ipv4.neigh.tun0.gc_stale_time = 60
net.ipv4.neigh.tun0.unres_qlen = 3
net.ipv4.neigh.tun0.proxy_qlen = 64
net.ipv4.neigh.tun0.anycast_delay = 99
net.ipv4.neigh.tun0.proxy_delay = 79
net.ipv4.neigh.tun0.locktime = 99
net.ipv4.neigh.tun0.retrans_time_ms = 1000
net.ipv4.neigh.tun0.base_reachable_time_ms = 30000
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_sack = 1
net.ipv4.tcp_retrans_collapse = 1
net.ipv4.ip_default_ttl = 64
net.ipv4.ip_no_pmtu_disc = 0
net.ipv4.ip_nonlocal_bind = 0
net.ipv4.tcp_syn_retries = 5
net.ipv4.tcp_synack_retries = 5
net.ipv4.tcp_max_orphans = 65536
net.ipv4.tcp_max_tw_buckets = 180000
net.ipv4.ip_dynaddr = 0
net.ipv4.tcp_keepalive_time = 7200
net.ipv4.tcp_keepalive_probes = 9
net.ipv4.tcp_keepalive_intvl = 75
net.ipv4.tcp_retries1 = 3
net.ipv4.tcp_retries2 = 15
net.ipv4.tcp_fin_timeout = 60
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_recycle = 0
net.ipv4.tcp_abort_on_overflow = 0
net.ipv4.tcp_stdurg = 0
net.ipv4.tcp_rfc1337 = 0
net.ipv4.tcp_max_syn_backlog = 1024
net.ipv4.ip_local_port_range = 32768	61000
net.ipv4.igmp_max_memberships = 20
net.ipv4.igmp_max_msf = 10
net.ipv4.inet_peer_threshold = 65664
net.ipv4.inet_peer_minttl = 120
net.ipv4.inet_peer_maxttl = 600
net.ipv4.inet_peer_gc_mintime = 10
net.ipv4.inet_peer_gc_maxtime = 120
net.ipv4.tcp_orphan_retries = 0
net.ipv4.tcp_fack = 1
net.ipv4.tcp_reordering = 3
net.ipv4.tcp_ecn = 2
net.ipv4.tcp_dsack = 1
net.ipv4.tcp_mem = 573408	764544	1146816
net.ipv4.tcp_wmem = 4096	16384	4194304
net.ipv4.tcp_rmem = 4096	87380	4194304
net.ipv4.tcp_app_win = 31
net.ipv4.tcp_adv_win_scale = 2
net.ipv4.tcp_tw_reuse = 0
net.ipv4.tcp_frto = 2
net.ipv4.tcp_frto_response = 0
net.ipv4.tcp_low_latency = 0
net.ipv4.tcp_no_metrics_save = 0
net.ipv4.tcp_moderate_rcvbuf = 1
net.ipv4.tcp_tso_win_divisor = 3
net.ipv4.tcp_congestion_control = cubic
net.ipv4.tcp_abc = 0
net.ipv4.tcp_mtu_probing = 0
net.ipv4.tcp_base_mss = 512
net.ipv4.tcp_workaround_signed_windows = 0
net.ipv4.tcp_dma_copybreak = 4096
net.ipv4.tcp_slow_start_after_idle = 1
net.ipv4.cipso_cache_enable = 1
net.ipv4.cipso_cache_bucket_size = 10
net.ipv4.cipso_rbm_optfmt = 0
net.ipv4.cipso_rbm_strictvalid = 1
net.ipv4.tcp_available_congestion_control = cubic reno
net.ipv4.tcp_allowed_congestion_control = cubic reno
net.ipv4.tcp_max_ssthresh = 0
net.ipv4.tcp_cookie_size = 0
net.ipv4.tcp_thin_linear_timeouts = 0
net.ipv4.tcp_thin_dupack = 0
net.ipv4.udp_mem = 573408	764544	1146816
net.ipv4.udp_rmem_min = 4096
net.ipv4.udp_wmem_min = 4096
net.ipv4.conf.all.forwarding = 1
net.ipv4.conf.all.mc_forwarding = 0
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.all.secure_redirects = 1
net.ipv4.conf.all.shared_media = 1
net.ipv4.conf.all.rp_filter = 0
net.ipv4.conf.all.send_redirects = 1
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.all.accept_local = 0
net.ipv4.conf.all.src_valid_mark = 0
net.ipv4.conf.all.proxy_arp = 0
net.ipv4.conf.all.medium_id = 0
net.ipv4.conf.all.bootp_relay = 0
net.ipv4.conf.all.log_martians = 0
net.ipv4.conf.all.tag = 0
net.ipv4.conf.all.arp_filter = 0
net.ipv4.conf.all.arp_announce = 0
net.ipv4.conf.all.arp_ignore = 0
net.ipv4.conf.all.arp_accept = 0
net.ipv4.conf.all.arp_notify = 0
net.ipv4.conf.all.proxy_arp_pvlan = 0
net.ipv4.conf.all.disable_xfrm = 0
net.ipv4.conf.all.disable_policy = 0
net.ipv4.conf.all.force_igmp_version = 0
net.ipv4.conf.all.promote_secondaries = 0
net.ipv4.conf.default.forwarding = 1
net.ipv4.conf.default.mc_forwarding = 0
net.ipv4.conf.default.accept_redirects = 1
net.ipv4.conf.default.secure_redirects = 1
net.ipv4.conf.default.shared_media = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv4.conf.default.send_redirects = 1
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.conf.default.accept_local = 0
net.ipv4.conf.default.src_valid_mark = 0
net.ipv4.conf.default.proxy_arp = 0
net.ipv4.conf.default.medium_id = 0
net.ipv4.conf.default.bootp_relay = 0
net.ipv4.conf.default.log_martians = 0
net.ipv4.conf.default.tag = 0
net.ipv4.conf.default.arp_filter = 0
net.ipv4.conf.default.arp_announce = 0
net.ipv4.conf.default.arp_ignore = 0
net.ipv4.conf.default.arp_accept = 0
net.ipv4.conf.default.arp_notify = 0
net.ipv4.conf.default.proxy_arp_pvlan = 0
net.ipv4.conf.default.disable_xfrm = 0
net.ipv4.conf.default.disable_policy = 0
net.ipv4.conf.default.force_igmp_version = 0
net.ipv4.conf.default.promote_secondaries = 0
net.ipv4.conf.lo.forwarding = 1
net.ipv4.conf.lo.mc_forwarding = 0
net.ipv4.conf.lo.accept_redirects = 1
net.ipv4.conf.lo.secure_redirects = 1
net.ipv4.conf.lo.shared_media = 1
net.ipv4.conf.lo.rp_filter = 1
net.ipv4.conf.lo.send_redirects = 1
net.ipv4.conf.lo.accept_source_route = 0
net.ipv4.conf.lo.accept_local = 0
net.ipv4.conf.lo.src_valid_mark = 0
net.ipv4.conf.lo.proxy_arp = 0
net.ipv4.conf.lo.medium_id = 0
net.ipv4.conf.lo.bootp_relay = 0
net.ipv4.conf.lo.log_martians = 0
net.ipv4.conf.lo.tag = 0
net.ipv4.conf.lo.arp_filter = 0
net.ipv4.conf.lo.arp_announce = 0
net.ipv4.conf.lo.arp_ignore = 0
net.ipv4.conf.lo.arp_accept = 0
net.ipv4.conf.lo.arp_notify = 0
net.ipv4.conf.lo.proxy_arp_pvlan = 0
net.ipv4.conf.lo.disable_xfrm = 1
net.ipv4.conf.lo.disable_policy = 1
net.ipv4.conf.lo.force_igmp_version = 0
net.ipv4.conf.lo.promote_secondaries = 0
net.ipv4.conf.eth0.forwarding = 1
net.ipv4.conf.eth0.mc_forwarding = 0
net.ipv4.conf.eth0.accept_redirects = 1
net.ipv4.conf.eth0.secure_redirects = 1
net.ipv4.conf.eth0.shared_media = 1
net.ipv4.conf.eth0.rp_filter = 1
net.ipv4.conf.eth0.send_redirects = 1
net.ipv4.conf.eth0.accept_source_route = 0
net.ipv4.conf.eth0.accept_local = 0
net.ipv4.conf.eth0.src_valid_mark = 0
net.ipv4.conf.eth0.proxy_arp = 0
net.ipv4.conf.eth0.medium_id = 0
net.ipv4.conf.eth0.bootp_relay = 0
net.ipv4.conf.eth0.log_martians = 0
net.ipv4.conf.eth0.tag = 0
net.ipv4.conf.eth0.arp_filter = 0
net.ipv4.conf.eth0.arp_announce = 0
net.ipv4.conf.eth0.arp_ignore = 0
net.ipv4.conf.eth0.arp_accept = 0
net.ipv4.conf.eth0.arp_notify = 0
net.ipv4.conf.eth0.proxy_arp_pvlan = 0
net.ipv4.conf.eth0.disable_xfrm = 0
net.ipv4.conf.eth0.disable_policy = 0
net.ipv4.conf.eth0.force_igmp_version = 0
net.ipv4.conf.eth0.promote_secondaries = 0
net.ipv4.conf.eth1.forwarding = 1
net.ipv4.conf.eth1.mc_forwarding = 0
net.ipv4.conf.eth1.accept_redirects = 1
net.ipv4.conf.eth1.secure_redirects = 1
net.ipv4.conf.eth1.shared_media = 1
net.ipv4.conf.eth1.rp_filter = 1
net.ipv4.conf.eth1.send_redirects = 1
net.ipv4.conf.eth1.accept_source_route = 0
net.ipv4.conf.eth1.accept_local = 0
net.ipv4.conf.eth1.src_valid_mark = 0
net.ipv4.conf.eth1.proxy_arp = 0
net.ipv4.conf.eth1.medium_id = 0
net.ipv4.conf.eth1.bootp_relay = 0
net.ipv4.conf.eth1.log_martians = 0
net.ipv4.conf.eth1.tag = 0
net.ipv4.conf.eth1.arp_filter = 0
net.ipv4.conf.eth1.arp_announce = 0
net.ipv4.conf.eth1.arp_ignore = 0
net.ipv4.conf.eth1.arp_accept = 0
net.ipv4.conf.eth1.arp_notify = 0
net.ipv4.conf.eth1.proxy_arp_pvlan = 0
net.ipv4.conf.eth1.disable_xfrm = 0
net.ipv4.conf.eth1.disable_policy = 0
net.ipv4.conf.eth1.force_igmp_version = 0
net.ipv4.conf.eth1.promote_secondaries = 0
net.ipv4.conf.pan0.forwarding = 1
net.ipv4.conf.pan0.mc_forwarding = 0
net.ipv4.conf.pan0.accept_redirects = 1
net.ipv4.conf.pan0.secure_redirects = 1
net.ipv4.conf.pan0.shared_media = 1
net.ipv4.conf.pan0.rp_filter = 1
net.ipv4.conf.pan0.send_redirects = 1
net.ipv4.conf.pan0.accept_source_route = 0
net.ipv4.conf.pan0.accept_local = 0
net.ipv4.conf.pan0.src_valid_mark = 0
net.ipv4.conf.pan0.proxy_arp = 0
net.ipv4.conf.pan0.medium_id = 0
net.ipv4.conf.pan0.bootp_relay = 0
net.ipv4.conf.pan0.log_martians = 0
net.ipv4.conf.pan0.tag = 0
net.ipv4.conf.pan0.arp_filter = 0
net.ipv4.conf.pan0.arp_announce = 0
net.ipv4.conf.pan0.arp_ignore = 0
net.ipv4.conf.pan0.arp_accept = 0
net.ipv4.conf.pan0.arp_notify = 0
net.ipv4.conf.pan0.proxy_arp_pvlan = 0
net.ipv4.conf.pan0.disable_xfrm = 0
net.ipv4.conf.pan0.disable_policy = 0
net.ipv4.conf.pan0.force_igmp_version = 0
net.ipv4.conf.pan0.promote_secondaries = 0
net.ipv4.conf.virbr0.forwarding = 1
net.ipv4.conf.virbr0.mc_forwarding = 0
net.ipv4.conf.virbr0.accept_redirects = 1
net.ipv4.conf.virbr0.secure_redirects = 1
net.ipv4.conf.virbr0.shared_media = 1
net.ipv4.conf.virbr0.rp_filter = 1
net.ipv4.conf.virbr0.send_redirects = 1
net.ipv4.conf.virbr0.accept_source_route = 0
net.ipv4.conf.virbr0.accept_local = 0
net.ipv4.conf.virbr0.src_valid_mark = 0
net.ipv4.conf.virbr0.proxy_arp = 0
net.ipv4.conf.virbr0.medium_id = 0
net.ipv4.conf.virbr0.bootp_relay = 0
net.ipv4.conf.virbr0.log_martians = 0
net.ipv4.conf.virbr0.tag = 0
net.ipv4.conf.virbr0.arp_filter = 0
net.ipv4.conf.virbr0.arp_announce = 0
net.ipv4.conf.virbr0.arp_ignore = 0
net.ipv4.conf.virbr0.arp_accept = 0
net.ipv4.conf.virbr0.arp_notify = 0
net.ipv4.conf.virbr0.proxy_arp_pvlan = 0
net.ipv4.conf.virbr0.disable_xfrm = 0
net.ipv4.conf.virbr0.disable_policy = 0
net.ipv4.conf.virbr0.force_igmp_version = 0
net.ipv4.conf.virbr0.promote_secondaries = 0
net.ipv4.conf.virbr1.forwarding = 1
net.ipv4.conf.virbr1.mc_forwarding = 0
net.ipv4.conf.virbr1.accept_redirects = 1
net.ipv4.conf.virbr1.secure_redirects = 1
net.ipv4.conf.virbr1.shared_media = 1
net.ipv4.conf.virbr1.rp_filter = 1
net.ipv4.conf.virbr1.send_redirects = 1
net.ipv4.conf.virbr1.accept_source_route = 0
net.ipv4.conf.virbr1.accept_local = 0
net.ipv4.conf.virbr1.src_valid_mark = 0
net.ipv4.conf.virbr1.proxy_arp = 0
net.ipv4.conf.virbr1.medium_id = 0
net.ipv4.conf.virbr1.bootp_relay = 0
net.ipv4.conf.virbr1.log_martians = 0
net.ipv4.conf.virbr1.tag = 0
net.ipv4.conf.virbr1.arp_filter = 0
net.ipv4.conf.virbr1.arp_announce = 0
net.ipv4.conf.virbr1.arp_ignore = 0
net.ipv4.conf.virbr1.arp_accept = 0
net.ipv4.conf.virbr1.arp_notify = 0
net.ipv4.conf.virbr1.proxy_arp_pvlan = 0
net.ipv4.conf.virbr1.disable_xfrm = 0
net.ipv4.conf.virbr1.disable_policy = 0
net.ipv4.conf.virbr1.force_igmp_version = 0
net.ipv4.conf.virbr1.promote_secondaries = 0
net.ipv4.conf.virbr2.forwarding = 1
net.ipv4.conf.virbr2.mc_forwarding = 0
net.ipv4.conf.virbr2.accept_redirects = 1
net.ipv4.conf.virbr2.secure_redirects = 1
net.ipv4.conf.virbr2.shared_media = 1
net.ipv4.conf.virbr2.rp_filter = 1
net.ipv4.conf.virbr2.send_redirects = 1
net.ipv4.conf.virbr2.accept_source_route = 0
net.ipv4.conf.virbr2.accept_local = 0
net.ipv4.conf.virbr2.src_valid_mark = 0
net.ipv4.conf.virbr2.proxy_arp = 0
net.ipv4.conf.virbr2.medium_id = 0
net.ipv4.conf.virbr2.bootp_relay = 0
net.ipv4.conf.virbr2.log_martians = 0
net.ipv4.conf.virbr2.tag = 0
net.ipv4.conf.virbr2.arp_filter = 0
net.ipv4.conf.virbr2.arp_announce = 0
net.ipv4.conf.virbr2.arp_ignore = 0
net.ipv4.conf.virbr2.arp_accept = 0
net.ipv4.conf.virbr2.arp_notify = 0
net.ipv4.conf.virbr2.proxy_arp_pvlan = 0
net.ipv4.conf.virbr2.disable_xfrm = 0
net.ipv4.conf.virbr2.disable_policy = 0
net.ipv4.conf.virbr2.force_igmp_version = 0
net.ipv4.conf.virbr2.promote_secondaries = 0
net.ipv4.conf.tun0.forwarding = 1
net.ipv4.conf.tun0.mc_forwarding = 0
net.ipv4.conf.tun0.accept_redirects = 1
net.ipv4.conf.tun0.secure_redirects = 1
net.ipv4.conf.tun0.shared_media = 1
net.ipv4.conf.tun0.rp_filter = 1
net.ipv4.conf.tun0.send_redirects = 1
net.ipv4.conf.tun0.accept_source_route = 0
net.ipv4.conf.tun0.accept_local = 0
net.ipv4.conf.tun0.src_valid_mark = 0
net.ipv4.conf.tun0.proxy_arp = 0
net.ipv4.conf.tun0.medium_id = 0
net.ipv4.conf.tun0.bootp_relay = 0
net.ipv4.conf.tun0.log_martians = 0
net.ipv4.conf.tun0.tag = 0
net.ipv4.conf.tun0.arp_filter = 0
net.ipv4.conf.tun0.arp_announce = 0
net.ipv4.conf.tun0.arp_ignore = 0
net.ipv4.conf.tun0.arp_accept = 0
net.ipv4.conf.tun0.arp_notify = 0
net.ipv4.conf.tun0.proxy_arp_pvlan = 0
net.ipv4.conf.tun0.disable_xfrm = 0
net.ipv4.conf.tun0.disable_policy = 0
net.ipv4.conf.tun0.force_igmp_version = 0
net.ipv4.conf.tun0.promote_secondaries = 0
net.ipv4.ip_forward = 1
net.ipv4.xfrm4_gc_thresh = 2097152
net.ipv4.ipfrag_high_thresh = 262144
net.ipv4.ipfrag_low_thresh = 196608
net.ipv4.ipfrag_time = 30
net.ipv4.icmp_echo_ignore_all = 0
net.ipv4.icmp_echo_ignore_broadcasts = 1
net.ipv4.icmp_ignore_bogus_error_responses = 1
net.ipv4.icmp_errors_use_inbound_ifaddr = 0
net.ipv4.icmp_ratelimit = 1000
net.ipv4.icmp_ratemask = 6168
net.ipv4.rt_cache_rebuild_count = 4
net.ipv4.ipfrag_secret_interval = 600
net.ipv4.ipfrag_max_dist = 64

^ permalink raw reply

* [PATCH] forcedeth: Stay in NAPI as long as there's work
From: Tom Herbert @ 2010-04-28  6:36 UTC (permalink / raw)
  To: netdev, aabdulla, davem

Add loop in NAPI poll routine to keep processing RX and TX as long as
there is more work to do.  This is similar to what tg3 and some other
drivers do.

This modification seems improves performance (maximum pps).  Running
500 instances of netperf TCP_RR test with one byte sizes on between
two sixteen core AMD machines (RPS enabled) gives:

Before patch: 186715 tps
With patch: 400949 tps

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index a1c0e7b..1e4de7b 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3736,6 +3736,23 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
 }
 
 #ifdef CONFIG_FORCEDETH_NAPI
+static inline int nv_has_work(struct fe_priv *np)
+{
+	if (nv_optimized(np)) {
+		return (
+		    ((np->get_rx.ex != np->put_rx.ex) &&
+		     !(le32_to_cpu(np->get_rx.ex->flaglen) & NV_RX2_AVAIL)) ||
+		    ((np->get_tx.ex != np->put_tx.ex) &&
+		     !(le32_to_cpu(np->get_tx.ex->flaglen) & NV_TX_VALID)));
+	} else {
+		return (
+		    ((np->get_rx.orig != np->put_rx.orig) &&
+		     !(le32_to_cpu(np->get_rx.orig->flaglen) & NV_RX_AVAIL)) ||
+		    ((np->get_tx.orig != np->put_tx.orig) &&
+		     !(le32_to_cpu(np->get_tx.orig->flaglen) & NV_TX_VALID)));
+	}
+}
+
 static int nv_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct fe_priv *np = container_of(napi, struct fe_priv, napi);
@@ -3743,30 +3760,33 @@ static int nv_napi_poll(struct napi_struct *napi, int budget)
 	u8 __iomem *base = get_hwbase(dev);
 	unsigned long flags;
 	int retcode;
-	int tx_work, rx_work;
+	int tx_work = 0, rx_work = 0;
 
-	if (!nv_optimized(np)) {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+	do {
+		if (!nv_optimized(np)) {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process(dev, budget);
-		retcode = nv_alloc_rx(dev);
-	} else {
-		spin_lock_irqsave(&np->lock, flags);
-		tx_work = nv_tx_done_optimized(dev, np->tx_ring_size);
-		spin_unlock_irqrestore(&np->lock, flags);
+			rx_work += nv_rx_process(dev, budget);
+			retcode = nv_alloc_rx(dev);
+		} else {
+			spin_lock_irqsave(&np->lock, flags);
+			tx_work += nv_tx_done_optimized(dev, np->tx_ring_size);
+			spin_unlock_irqrestore(&np->lock, flags);
 
-		rx_work = nv_rx_process_optimized(dev, budget);
-		retcode = nv_alloc_rx_optimized(dev);
-	}
+			rx_work += nv_rx_process_optimized(dev, budget);
+			retcode = nv_alloc_rx_optimized(dev);
+		}
 
-	if (retcode) {
-		spin_lock_irqsave(&np->lock, flags);
-		if (!np->in_shutdown)
-			mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-		spin_unlock_irqrestore(&np->lock, flags);
-	}
+		if (unlikely(retcode)) {
+			spin_lock_irqsave(&np->lock, flags);
+			if (!np->in_shutdown)
+				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+			spin_unlock_irqrestore(&np->lock, flags);
+			break;
+		}
+	} while (rx_work < budget && nv_has_work(np));
 
 	nv_change_interrupt_mode(dev, tx_work + rx_work);
 

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox