Netdev List
 help / color / mirror / Atom feed
* Re: [patch] iwl: cleanup: remove unneeded error handling
From: Zhu Yi @ 2010-04-29  2:12 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Ortiz, Samuel, Intel Linux Wireless, John W. Linville,
	Andrew Morton, Alexey Dobriyan, linux-wireless@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	kernel-janitors@vger.kernel.org
In-Reply-To: <20100428090115.GF29093@bicker>

On Wed, 2010-04-28 at 17:01 +0800, Dan Carpenter wrote:
> This is just a cleanup and doesn't change how the code works.
> 
> debugfs_create_dir() and debugfs_create_file() return an error pointer 
> (-ENODEV) if CONFIG_DEBUG_FS is not enabled, otherwise if an error occurs
> they return NULL.  This is how they are implemented and what it says in 
> the DebugFS documentation.  DebugFS can not be compiled as a module.  
> 
> As a result, we only need to check for error pointers and particularly 
> -ENODEV one time to know that DebugFS is enabled.  This patch keeps the 
> first check for error pointers and removes the rest. 
> 
> The other reason for this patch, is that it silences some Smatch warnings.
> Smatch sees the condition "(result != -ENODEV)" and assumes that it's 
> possible for "result" to equal -ENODEV.  If it were possible it would lead
> to an error pointer dereference.  But since it's not, we can just remove
> the check.
> 
> Signed-off-by: Dan Carpenter <error27@gmail.com>

Thanks. But looks like you missed the one in if_sdio_debugfs_init().

I don't think we even need to check -ENODEV ourselves because if
DEBUG_FS is not compiled in, all the debugfs utility functions will
become no-op.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>

diff --git a/drivers/net/wireless/iwmc3200wifi/bus.h b/drivers/net/wireless/iwmc3200wifi/bus.h
index 836663e..62edd58 100644
--- a/drivers/net/wireless/iwmc3200wifi/bus.h
+++ b/drivers/net/wireless/iwmc3200wifi/bus.h
@@ -31,7 +31,7 @@ struct iwm_if_ops {
 	int (*disable)(struct iwm_priv *iwm);
 	int (*send_chunk)(struct iwm_priv *iwm, u8* buf, int count);
 
-	int (*debugfs_init)(struct iwm_priv *iwm, struct dentry *parent_dir);
+	void (*debugfs_init)(struct iwm_priv *iwm, struct dentry *parent_dir);
 	void (*debugfs_exit)(struct iwm_priv *iwm);
 
 	const char *umac_name;
diff --git a/drivers/net/wireless/iwmc3200wifi/debug.h b/drivers/net/wireless/iwmc3200wifi/debug.h
index e35c9b6..f98bf12 100644
--- a/drivers/net/wireless/iwmc3200wifi/debug.h
+++ b/drivers/net/wireless/iwmc3200wifi/debug.h
@@ -113,10 +113,10 @@ struct iwm_debugfs {
 };
 
 #ifdef CONFIG_IWM_DEBUG
-int iwm_debugfs_init(struct iwm_priv *iwm);
+void iwm_debugfs_init(struct iwm_priv *iwm);
 void iwm_debugfs_exit(struct iwm_priv *iwm);
 #else
-static inline int iwm_debugfs_init(struct iwm_priv *iwm)
+static inline void iwm_debugfs_init(struct iwm_priv *iwm)
 {
 	return 0;
 }
diff --git a/drivers/net/wireless/iwmc3200wifi/debugfs.c b/drivers/net/wireless/iwmc3200wifi/debugfs.c
index 7244413..53b0b77 100644
--- a/drivers/net/wireless/iwmc3200wifi/debugfs.c
+++ b/drivers/net/wireless/iwmc3200wifi/debugfs.c
@@ -48,12 +48,11 @@ static struct {
 
 #define add_dbg_module(dbg, name, id, initlevel) 	\
 do {							\
-	struct dentry *d;				\
 	dbg.dbg_module[id] = (initlevel);		\
-	d = debugfs_create_x8(name, 0600, dbg.dbgdir,	\
-			     &(dbg.dbg_module[id]));	\
-	if (!IS_ERR(d))					\
-		dbg.dbg_module_dentries[id] = d;        \
+	dbg.dbg_module_dentries[id] =			\
+		debugfs_create_x8(name, 0600,		\
+				dbg.dbgdir,		\
+				&(dbg.dbg_module[id]));	\
 } while (0)
 
 static int iwm_debugfs_u32_read(void *data, u64 *val)
@@ -423,89 +422,29 @@ static const struct file_operations iwm_debugfs_fw_err_fops = {
 	.read =		iwm_debugfs_fw_err_read,
 };
 
-int iwm_debugfs_init(struct iwm_priv *iwm)
+void iwm_debugfs_init(struct iwm_priv *iwm)
 {
-	int i, result;
-	char devdir[16];
+	int i;
 
 	iwm->dbg.rootdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	result = PTR_ERR(iwm->dbg.rootdir);
-	if (!result || IS_ERR(iwm->dbg.rootdir)) {
-		if (result == -ENODEV) {
-			IWM_ERR(iwm, "DebugFS (CONFIG_DEBUG_FS) not "
-				"enabled in kernel config\n");
-			result = 0;	/* No debugfs support */
-		}
-		IWM_ERR(iwm, "Couldn't create rootdir: %d\n", result);
-		goto error;
-	}
-
-	snprintf(devdir, sizeof(devdir), "%s", wiphy_name(iwm_to_wiphy(iwm)));
-
-	iwm->dbg.devdir = debugfs_create_dir(devdir, iwm->dbg.rootdir);
-	result = PTR_ERR(iwm->dbg.devdir);
-	if (IS_ERR(iwm->dbg.devdir) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create devdir: %d\n", result);
-		goto error;
-	}
-
+	iwm->dbg.devdir = debugfs_create_dir(wiphy_name(iwm_to_wiphy(iwm)),
+					     iwm->dbg.rootdir);
 	iwm->dbg.dbgdir = debugfs_create_dir("debug", iwm->dbg.devdir);
-	result = PTR_ERR(iwm->dbg.dbgdir);
-	if (IS_ERR(iwm->dbg.dbgdir) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create dbgdir: %d\n", result);
-		goto error;
-	}
-
 	iwm->dbg.rxdir = debugfs_create_dir("rx", iwm->dbg.devdir);
-	result = PTR_ERR(iwm->dbg.rxdir);
-	if (IS_ERR(iwm->dbg.rxdir) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create rx dir: %d\n", result);
-		goto error;
-	}
-
 	iwm->dbg.txdir = debugfs_create_dir("tx", iwm->dbg.devdir);
-	result = PTR_ERR(iwm->dbg.txdir);
-	if (IS_ERR(iwm->dbg.txdir) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create tx dir: %d\n", result);
-		goto error;
-	}
-
 	iwm->dbg.busdir = debugfs_create_dir("bus", iwm->dbg.devdir);
-	result = PTR_ERR(iwm->dbg.busdir);
-	if (IS_ERR(iwm->dbg.busdir) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create bus dir: %d\n", result);
-		goto error;
-	}
-
-	if (iwm->bus_ops->debugfs_init) {
-		result = iwm->bus_ops->debugfs_init(iwm, iwm->dbg.busdir);
-		if (result < 0) {
-			IWM_ERR(iwm, "Couldn't create bus entry: %d\n", result);
-			goto error;
-		}
-	}
-
+	if (iwm->bus_ops->debugfs_init)
+		iwm->bus_ops->debugfs_init(iwm, iwm->dbg.busdir);
 
 	iwm->dbg.dbg_level = IWM_DL_NONE;
 	iwm->dbg.dbg_level_dentry =
 		debugfs_create_file("level", 0200, iwm->dbg.dbgdir, iwm,
 				    &fops_iwm_dbg_level);
-	result = PTR_ERR(iwm->dbg.dbg_level_dentry);
-	if (IS_ERR(iwm->dbg.dbg_level_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create dbg_level: %d\n", result);
-		goto error;
-	}
-
 
 	iwm->dbg.dbg_modules = IWM_DM_DEFAULT;
 	iwm->dbg.dbg_modules_dentry =
 		debugfs_create_file("modules", 0200, iwm->dbg.dbgdir, iwm,
 				    &fops_iwm_dbg_modules);
-	result = PTR_ERR(iwm->dbg.dbg_modules_dentry);
-	if (IS_ERR(iwm->dbg.dbg_modules_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create dbg_modules: %d\n", result);
-		goto error;
-	}
 
 	for (i = 0; i < __IWM_DM_NR; i++)
 		add_dbg_module(iwm->dbg, iwm_debug_module[i].name,
@@ -514,44 +453,15 @@ int iwm_debugfs_init(struct iwm_priv *iwm)
 	iwm->dbg.txq_dentry = debugfs_create_file("queues", 0200,
 						  iwm->dbg.txdir, iwm,
 						  &iwm_debugfs_txq_fops);
-	result = PTR_ERR(iwm->dbg.txq_dentry);
-	if (IS_ERR(iwm->dbg.txq_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create tx queue: %d\n", result);
-		goto error;
-	}
-
 	iwm->dbg.tx_credit_dentry = debugfs_create_file("credits", 0200,
 						   iwm->dbg.txdir, iwm,
 						   &iwm_debugfs_tx_credit_fops);
-	result = PTR_ERR(iwm->dbg.tx_credit_dentry);
-	if (IS_ERR(iwm->dbg.tx_credit_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create tx credit: %d\n", result);
-		goto error;
-	}
-
 	iwm->dbg.rx_ticket_dentry = debugfs_create_file("tickets", 0200,
 						  iwm->dbg.rxdir, iwm,
 						  &iwm_debugfs_rx_ticket_fops);
-	result = PTR_ERR(iwm->dbg.rx_ticket_dentry);
-	if (IS_ERR(iwm->dbg.rx_ticket_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create rx ticket: %d\n", result);
-		goto error;
-	}
-
 	iwm->dbg.fw_err_dentry = debugfs_create_file("last_fw_err", 0200,
 						     iwm->dbg.dbgdir, iwm,
 						     &iwm_debugfs_fw_err_fops);
-	result = PTR_ERR(iwm->dbg.fw_err_dentry);
-	if (IS_ERR(iwm->dbg.fw_err_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create last FW err: %d\n", result);
-		goto error;
-	}
-
-
-	return 0;
-
- error:
-	return result;
 }
 
 void iwm_debugfs_exit(struct iwm_priv *iwm)
diff --git a/drivers/net/wireless/iwmc3200wifi/sdio.c b/drivers/net/wireless/iwmc3200wifi/sdio.c
index 1eafd6d..1acea37 100644
--- a/drivers/net/wireless/iwmc3200wifi/sdio.c
+++ b/drivers/net/wireless/iwmc3200wifi/sdio.c
@@ -366,21 +366,13 @@ static const struct file_operations iwm_debugfs_sdio_fops = {
 	.read =		iwm_debugfs_sdio_read,
 };
 
-static int if_sdio_debugfs_init(struct iwm_priv *iwm, struct dentry *parent_dir)
+static void if_sdio_debugfs_init(struct iwm_priv *iwm, struct dentry *parent_dir)
 {
-	int result;
 	struct iwm_sdio_priv *hw = iwm_to_if_sdio(iwm);
 
 	hw->cccr_dentry = debugfs_create_file("cccr", 0200,
 					      parent_dir, iwm,
 					      &iwm_debugfs_sdio_fops);
-	result = PTR_ERR(hw->cccr_dentry);
-	if (IS_ERR(hw->cccr_dentry) && (result != -ENODEV)) {
-		IWM_ERR(iwm, "Couldn't create CCCR entry: %d\n", result);
-		return result;
-	}
-
-	return 0;
 }
 
 static void if_sdio_debugfs_exit(struct iwm_priv *iwm)
@@ -440,11 +432,7 @@ static int iwm_sdio_probe(struct sdio_func *func,
 	hw = iwm_private(iwm);
 	hw->iwm = iwm;
 
-	ret = iwm_debugfs_init(iwm);
-	if (ret < 0) {
-		IWM_ERR(iwm, "Debugfs registration failed\n");
-		goto if_free;
-	}
+	iwm_debugfs_init(iwm);
 
 	sdio_set_drvdata(func, hw);
 
@@ -473,7 +461,6 @@ static int iwm_sdio_probe(struct sdio_func *func,
 	destroy_workqueue(hw->isr_wq);
  debugfs_exit:
 	iwm_debugfs_exit(iwm);
- if_free:
 	iwm_if_free(iwm);
 	return ret;
 }

^ permalink raw reply related

* Re: TCP MD5 issue
From: Bijay Singh @ 2010-04-29  1:49 UTC (permalink / raw)
  To: netdev@vger.kernel.org
In-Reply-To: <F7E69C56-3340-476E-8C5B-415E4B2E8DF4@guavus.com>



> Hi,
> 
> I have hit upon bug in the TCP MD5 implementation. The bug shows up in load conditions. I haven't  been able to exactly identify the issue, but is easily reproducible.
> 
> I run multiple instances (44 pairs) of servers and clients. All servers running on ubuntu 2.6.31 and all clients running on 2.6.26.
> 
> I observe that few messages from either end have invalid MD5 Hash signatures.
> 
> I browsed thru the code and did not find anything suspicious.
> 
> I hacked the code (2.6.26) to call the .calc_md5_hash function twice at the same point and printked the hashes from the .calc_md5_hash function. I noticed that sometimes the 1st call gives the in correct value and sometimes the 2nd call gives the in correct value, which essentially leads me the believe that the input to the fucntions are not getting modified. 
> 
> I short-circuited the crypto and called the md5 functions directly and allocated the md5 context from the stack, to remove any kind of sharing violation that may be happening. (the context otherwise is saved per cpu in the md5_sig_pool and the exection of the hash generation code is made non-preemtbale to prevent sharing voilation). btw what will happen if there is an interupt.
> 
> After this change the code is running much more smoothly, however i did manage to get 6 error in 4 hours, earlier i was seen invalid cheksum errors with secs of starting the load.
> 
> I have rerun the test and haven't observed any error since last 2 hours.
> 
> I need to fix this issue and am heavily dependent on you to provide me with some clues to proceed further. Pls. let me know if you need any more data.
> 
> Looking forward to your response.
> 
> Thanks
> BIjay


^ permalink raw reply

* RE: [RFC][PATCH v4 05/18] Add a function to indicate if device use external buffer.
From: Xin, Xiaohui @ 2010-04-29  1:38 UTC (permalink / raw)
  To: Changli Gao
  Cc: netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, mst@redhat.com, mingo@elte.hu,
	davem@davemloft.net, jdike@linux.intel.com
In-Reply-To: <t2z412e6f7f1004250235ib1002b3doe604d786dadefaa5@mail.gmail.com>

> +static int dev_is_mpassthru(struct net_device *dev)
>
>bool return value should be better here.
>
>-- 
>Regards,
>Changli Gao(xiaosuo@gmail.com)

Thanks, would fix that.

Thanks
Xiaohui

^ permalink raw reply

* RE: [RFC][PATCH v4 00/18] Provide a zero-copy method on KVM virtio-net.
From: Xin, Xiaohui @ 2010-04-29  1:33 UTC (permalink / raw)
  To: Michael S. Tsirkin, David Miller
  Cc: netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, mingo@elte.hu,
	jdike@linux.intel.com
In-Reply-To: <20100425104604.GA10238@redhat.com>

> > The idea is simple, just to pin the guest VM user space and then let
> > host NIC driver has the chance to directly DMA to it.
> >
> >Isn't it much easier to map the RX ring of the network device into the
> >guest's address space, have DMA map calls translate guest addresses to
> >physical/DMA addresses as well as do all of this crazy page pinning
> >stuff, and provide the translations and protections via the IOMMU?

>This means we need guest know how the specific network device works.
>So we won't be able to, for example, move guest between different hosts.
>There are other problems: many physical systems do not have an iommu,
>some guest OS-es do not support DMA map calls, doing VM exit
>on each DMA map call might turn out to be very slow. And so on.

This solution is what now we can think of to implement zero-copy. Some
modifications are made to net core to try to avoid network device driver
changes.  The major change is to __alloc_skb(), in which we added a dev
parameter to indicate whether the device will DMA to/from guest/user buffer
which is pointed by host skb->data. We also modify skb_release_data() and
skb_reserve(). We made it now works with ixgbe driver with PS mode disabled,
and got some performance data with it.
  
	using netperf with GSO/TSO disabled, 10G NIC, 
	disabled packet split mode, with raw socket case compared to vhost.

	bindwidth will be from 1.1Gbps to 1.7Gbps
	CPU % from 120%-140% to 140%-160%

We are now trying to get decent performance data with advanced features.
Do you have any other concerns with this solution? 

>> What's being proposed here looks a bit over-engineered.

>This is an attempt to reduce overhead for virtio (paravirtualization).
>'Don't use PV' is kind of an alternative, but I do not
>think it's a simpler one.

-- 
MST

^ permalink raw reply

* Re: [PATCH]: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4)
From: Eugene Teo @ 2010-04-29  0:25 UTC (permalink / raw)
  To: Neil Horman
  Cc: David Miller, vladislav.yasevich, sri, linux-sctp, netdev,
	security
In-Reply-To: <20100428215045.GA25146@hmsreliant.think-freely.org>

On 04/29/2010 05:50 AM, Neil Horman wrote:
> On Wed, Apr 28, 2010 at 02:23:39PM -0700, David Miller wrote:
>> From: Vlad Yasevich<vladislav.yasevich@hp.com>
>> Date: Wed, 28 Apr 2010 16:37:04 -0400
>>
>>>
>>> Looks good.
>>>
>>> Acked-by: Vlad Yasevich<vladislav.yasevich@hp.com>
>>
>> Applied, thanks Neil and Vlad.
>>
> Thanks!

Thanks :)

Eugene

^ permalink raw reply

* Re: [PATCH net-next-2.6] net: speedup udp receive path
From: jamal @ 2010-04-29  0:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, xiaosuo, therbert, shemminger, netdev,
	Eilon Greenstein, Brian Bloniarz
In-Reply-To: <1272498293.4258.121.camel@bigi>

On Wed, 2010-04-28 at 19:45 -0400, jamal wrote:

> Your patch has improved the performance of rps relative to what is in
> net-next very lightly; but it has also improved the performance of
> non-rps;->

Correction: Last part of sentence not true (obvious if you look at
results i attached)

cheers,
jamal



^ permalink raw reply

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
From: Tom Herbert @ 2010-04-28 23:56 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, joe, netdev, aabdulla
In-Reply-To: <20100428.142534.57451492.davem@davemloft.net>

On Wed, Apr 28, 2010 at 2:25 PM, David Miller <davem@davemloft.net> wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Wed, 28 Apr 2010 11:25:28 -0700
>
>> The following does the same thing without the extra overhead
>> of testing all the registers. It also handles the out of memory
>> case.
>>
>> Compile tested only...
>>
>> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
>
> Tom can you test this version?
>

Looks good.  406038 tps in my quick test which still is showing the
benefits.  Thanks for cleaning this up Stephen!

Tom

^ permalink raw reply

* Re: [PATCH net-next-2.6] net: speedup udp receive path
From: jamal @ 2010-04-28 23:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, xiaosuo, therbert, shemminger, netdev,
	Eilon Greenstein, Brian Bloniarz
In-Reply-To: <1272463605.2267.70.camel@edumazet-laptop>

[-- Attachment #1: Type: text/plain, Size: 1188 bytes --]

On Wed, 2010-04-28 at 16:06 +0200, Eric Dumazet wrote:

> Here it is ;)

Sorry - things got a little hectic with TheMan.

I am afraid i dont have good news.
Actually, I should say i dont have good news in regards to rps.
For my sample app, two things seem to be happening:
a) The overall performance has gotten better for both rps
and non-rps.
b) non-rps is now performing relatively better

This is just what i see in net-next not related to your patch.
It seems the kernels i tested prior to April 23 showed rps better.
The one i tested on Apr23 showed rps being about the same as non-rps.
As i stated in my last result posting, I thought i didnt test properly
but i did again today and saw the same thing. And now non-rps is
_consistently_ better.
So some regression is going on...

Your patch has improved the performance of rps relative to what is in
net-next very lightly; but it has also improved the performance of
non-rps;->
My traces look different for the app cpu than yours - likely because of
the apps being different.

At the moment i dont have time to dig deeper into code, but i could
test as cycles show up.

I am attaching the profile traces and results.

cheers,
jamal

[-- Attachment #2: sum-apr23and28.txt --]
[-- Type: text/plain, Size: 1469 bytes --]


April 23 net-next

kernel           sink    cpu all     cpuint       cpuapp
---------------------------------------------------------
nn              93.95%   84.5%        99.8%        79.8%
nn-rps          96.41%   85.4%        95.5%        82.5%
nn-cl           97.29%   84.0%        99.9%        79.6%
nn-cl-rps       97.76%   86.5%        96.5%        84.8%

nn: Basic net-next from Apr23
nn-rps: Basic net-next from Apr23 with rps mask ee and irq affinity to cpu0
nn-cl: Basic net-next from Apr23 + Changli patch
nn-cl-rps: Basic net-next from Apr23 + Changli patch + rps mask ee,irq aff cpu0
sink: the amount of traffic the system was able to sink in.
cpu all: avg % system cpu consumed in test
cpuint: avg %cpu consumed by the cpu where interrupts happened
cpuapp: avg %cpu consumed by a sample cpu which did app processing

Now repeat with Erics changes and kernel from Apr-28

kernel         sink      cpu all     cpuint       cpuapp
---------------------------------------------------------
nn2              98.78%   83.6%        100.0%       82.8%
nn2-rps          94.43%   84.2%        98.1%        82.0%
nn2-ed           98.74%   83.2%        99.9%        81.6%
nn2-ed-rps       95.15%   84.5%        97.3%        82.1%


nn2: Basic net-next from Apr28
nn2-rps: Basic net-next from Apr23 with rps mask ee and irq affinity to cpu0
nn2-ed: Basic net-next from Apr23 + Eric patch
nn2-ed-rps: Basic net-next from Apr23 + Eric patch + rps mask ee,irq aff cpu0

[-- Attachment #3: nn-apr28-summary.txt --]
[-- Type: text/plain, Size: 78977 bytes --]


I: net-next

Average udp sink: 98.78%

--------------------------------------------------------------------------------------------------
   PerfTop:    3632 irqs/sec  kernel:83.7% [1000Hz cycles],  (all, 8 CPUs)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ____________________

             2738.00  9.8% sky2_poll                   [sky2]              
             1543.00  5.5% _raw_spin_lock_irqsave      [kernel]            
             1019.00  3.7% system_call                 [kernel]            
              740.00  2.7% copy_user_generic_string    [kernel]            
              687.00  2.5% fget                        [kernel]            
              640.00  2.3% _raw_spin_unlock_irqrestore [kernel]            
              634.00  2.3% sys_epoll_ctl               [kernel]            
              613.00  2.2% datagram_poll               [kernel]            
              553.00  2.0% _raw_spin_lock_bh           [kernel]            
              530.00  1.9% kmem_cache_free             [kernel]            
              522.00  1.9% schedule                    [kernel]            
              487.00  1.7% vread_tsc                   [kernel].vsyscall_fn
              467.00  1.7% _raw_spin_lock              [kernel]            
              432.00  1.5% udp_recvmsg                 [kernel]            
              426.00  1.5% kmem_cache_alloc            [kernel]            
              418.00  1.5% __udp4_lib_lookup           [kernel]            
              417.00  1.5% sys_epoll_wait              [kernel]            
              376.00  1.3% fput                        [kernel]            
              361.00  1.3% ip_route_input              [kernel]            
              344.00  1.2% local_bh_enable_ip          [kernel]            
              326.00  1.2% ip_rcv                      [kernel]            
              321.00  1.2% first_packet_length         [kernel]            
              307.00  1.1% ep_remove                   [kernel]            
              303.00  1.1% dst_release                 [kernel]            
              301.00  1.1% skb_copy_datagram_iovec     [kernel]            
              297.00  1.1% mutex_lock                  [kernel]            



--------------------------------------------------------------------------------------------------
   PerfTop:    4018 irqs/sec  kernel:83.3% [1000Hz cycles],  (all, 8 CPUs)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ______________________

             4274.00  9.7% sky2_poll                   [sky2]                
             2473.00  5.6% _raw_spin_lock_irqsave      [kernel]              
             1585.00  3.6% system_call                 [kernel]              
             1179.00  2.7% copy_user_generic_string    [kernel]              
             1089.00  2.5% fget                        [kernel]              
             1019.00  2.3% _raw_spin_unlock_irqrestore [kernel]              
             1011.00  2.3% sys_epoll_ctl               [kernel]              
              965.00  2.2% datagram_poll               [kernel]              
              902.00  2.0% kmem_cache_free             [kernel]              
              841.00  1.9% _raw_spin_lock_bh           [kernel]              
              837.00  1.9% schedule                    [kernel]              
              735.00  1.7% vread_tsc                   [kernel].vsyscall_fn  
              730.00  1.7% udp_recvmsg                 [kernel]              
              729.00  1.7% _raw_spin_lock              [kernel]              
              678.00  1.5% kmem_cache_alloc            [kernel]              
              651.00  1.5% sys_epoll_wait              [kernel]              
              635.00  1.4% __udp4_lib_lookup           [kernel]              
              595.00  1.3% fput                        [kernel]              
              568.00  1.3% local_bh_enable_ip          [kernel]              
              562.00  1.3% ip_route_input              [kernel]              
              516.00  1.2% dst_release                 [kernel]              
              502.00  1.1% ep_remove                   [kernel]              
              485.00  1.1% skb_copy_datagram_iovec     [kernel]              
              484.00  1.1% first_packet_length         [kernel]              
              476.00  1.1% ip_rcv                      [kernel]              
              470.00  1.1% __alloc_skb                 [kernel]              
              459.00  1.0% epoll_ctl                   /lib/libc-2.7.so      
              458.00  1.0% mutex_lock                  [kernel]              


--------------------------------------------------------------------------------------------------
   PerfTop:    1000 irqs/sec  kernel:100.0% [1000Hz cycles],  (all, cpu: 0)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ________

             3534.00 34.7% sky2_poll                   [sky2]  
              545.00  5.3% __udp4_lib_lookup           [kernel]
              537.00  5.3% ip_route_input              [kernel]
              427.00  4.2% _raw_spin_lock_irqsave      [kernel]
              401.00  3.9% __alloc_skb                 [kernel]
              360.00  3.5% ip_rcv                      [kernel]
              332.00  3.3% _raw_spin_lock              [kernel]
              292.00  2.9% sock_queue_rcv_skb          [kernel]
              291.00  2.9% __udp4_lib_rcv              [kernel]
              273.00  2.7% sock_def_readable           [kernel]
              269.00  2.6% __netif_receive_skb         [kernel]
              209.00  2.1% __wake_up_common            [kernel]
              196.00  1.9% __kmalloc                   [kernel]
              164.00  1.6% _raw_read_lock              [kernel]
              157.00  1.5% kmem_cache_alloc            [kernel]
              157.00  1.5% ep_poll_callback            [kernel]
              133.00  1.3% resched_task                [kernel]
              128.00  1.3% task_rq_lock                [kernel]
              120.00  1.2% swiotlb_sync_single         [kernel]
              120.00  1.2% sky2_rx_submit              [sky2]  
              117.00  1.1% udp_queue_rcv_skb           [kernel]
              108.00  1.1% ip_local_deliver            [kernel]
              104.00  1.0% try_to_wake_up              [kernel]
              102.00  1.0% _raw_spin_unlock_irqrestore [kernel]
               98.00  1.0% select_task_rq_fair         [kernel]



--------------------------------------------------------------------------------------------------
   PerfTop:    1000 irqs/sec  kernel:100.0% [1000Hz cycles],  (all, cpu: 0)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ________

             4601.00 34.0% sky2_poll                   [sky2]  
              732.00  5.4% __udp4_lib_lookup           [kernel]
              724.00  5.3% ip_route_input              [kernel]
              527.00  3.9% _raw_spin_lock_irqsave      [kernel]
              520.00  3.8% __alloc_skb                 [kernel]
              483.00  3.6% ip_rcv                      [kernel]
              441.00  3.3% _raw_spin_lock              [kernel]
              401.00  3.0% sock_queue_rcv_skb          [kernel]
              373.00  2.8% __udp4_lib_rcv              [kernel]
              365.00  2.7% sock_def_readable           [kernel]
              353.00  2.6% __netif_receive_skb         [kernel]
              285.00  2.1% __wake_up_common            [kernel]
              273.00  2.0% __kmalloc                   [kernel]
              230.00  1.7% _raw_read_lock              [kernel]
              208.00  1.5% ep_poll_callback            [kernel]
              199.00  1.5% kmem_cache_alloc            [kernel]
              180.00  1.3% task_rq_lock                [kernel]
              172.00  1.3% sky2_rx_submit              [sky2]  
              171.00  1.3% resched_task                [kernel]
              165.00  1.2% ip_local_deliver            [kernel]
              162.00  1.2% udp_queue_rcv_skb           [kernel]
              158.00  1.2% _raw_spin_unlock_irqrestore [kernel]
              148.00  1.1% select_task_rq_fair         [kernel]
              144.00  1.1% try_to_wake_up              [kernel]
              142.00  1.0% sky2_remove                 [sky2]  
              140.00  1.0% swiotlb_sync_single         [kernel]
               95.00  0.7% cache_alloc_refill          [kernel]
               92.00  0.7% dev_gro_receive             [kernel]
               82.00  0.6% is_swiotlb_buffer           [kernel]


--------------------------------------------------------------------------------------------------
   PerfTop:     622 irqs/sec  kernel:74.9% [1000Hz cycles],  (all, cpu: 2)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ _____________________________________

              113.00  6.5% _raw_spin_lock_irqsave      /lib/modules/2.6.34-rc5/build/vmlinux
              105.00  6.0% system_call                 /lib/modules/2.6.34-rc5/build/vmlinux
               69.00  3.9% fget                        /lib/modules/2.6.34-rc5/build/vmlinux
               64.00  3.7% datagram_poll               /lib/modules/2.6.34-rc5/build/vmlinux
               56.00  3.2% copy_user_generic_string    /lib/modules/2.6.34-rc5/build/vmlinux
               55.00  3.1% sys_epoll_ctl               /lib/modules/2.6.34-rc5/build/vmlinux
               53.00  3.0% _raw_spin_unlock_irqrestore /lib/modules/2.6.34-rc5/build/vmlinux
               46.00  2.6% _raw_spin_lock_bh           /lib/modules/2.6.34-rc5/build/vmlinux
               42.00  2.4% kmem_cache_free             /lib/modules/2.6.34-rc5/build/vmlinux
               37.00  2.1% dst_release                 /lib/modules/2.6.34-rc5/build/vmlinux
               37.00  2.1% schedule                    /lib/modules/2.6.34-rc5/build/vmlinux
               35.00  2.0% mutex_lock                  /lib/modules/2.6.34-rc5/build/vmlinux
               35.00  2.0% vread_tsc                   [kernel].vsyscall_fn                 
               35.00  2.0% udp_recvmsg                 /lib/modules/2.6.34-rc5/build/vmlinux
               34.00  1.9% sys_epoll_wait              /lib/modules/2.6.34-rc5/build/vmlinux
               31.00  1.8% local_bh_enable_ip          /lib/modules/2.6.34-rc5/build/vmlinux
               29.00  1.7% ep_remove                   /lib/modules/2.6.34-rc5/build/vmlinux
               28.00  1.6% kmem_cache_alloc            /lib/modules/2.6.34-rc5/build/vmlinux
               27.00  1.5% process_recv                /home/hadi/udp_sink/mcpudp           
               25.00  1.4% mutex_unlock                /lib/modules/2.6.34-rc5/build/vmlinux
               24.00  1.4% ep_send_events_proc         /lib/modules/2.6.34-rc5/build/vmlinux
               24.00  1.4% clock_gettime               /lib/librt-2.7.so                    
               23.00  1.3% fput                        /lib/modules/2.6.34-rc5/build/vmlinux
               23.00  1.3% skb_copy_datagram_iovec     /lib/modules/2.6.34-rc5/build/vmlinux
               20.00  1.1% sock_recv_ts_and_drops      /lib/modules/2.6.34-rc5/build/vmlinux
               20.00  1.1% inet_recvmsg                /lib/modules/2.6.34-rc5/build/vmlinux
               19.00  1.1% epoll_dispatch              /usr/lib/libevent-1.3e.so.1.0.3      
               19.00  1.1% first_packet_length         /lib/modules/2.6.34-rc5/build/vmlinux



--------------------------------------------------------------------------------------------------
   PerfTop:     625 irqs/sec  kernel:83.0% [1000Hz cycles],  (all, cpu: 2)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ _____________________________________

              315.00  6.8% _raw_spin_lock_irqsave      /lib/modules/2.6.34-rc5/build/vmlinux
              232.00  5.0% system_call                 /lib/modules/2.6.34-rc5/build/vmlinux
              175.00  3.8% fget                        /lib/modules/2.6.34-rc5/build/vmlinux
              174.00  3.8% datagram_poll               /lib/modules/2.6.34-rc5/build/vmlinux
              168.00  3.6% sys_epoll_ctl               /lib/modules/2.6.34-rc5/build/vmlinux
              155.00  3.4% copy_user_generic_string    /lib/modules/2.6.34-rc5/build/vmlinux
              144.00  3.1% kmem_cache_free             /lib/modules/2.6.34-rc5/build/vmlinux
              133.00  2.9% _raw_spin_lock_bh           /lib/modules/2.6.34-rc5/build/vmlinux
              126.00  2.7% _raw_spin_unlock_irqrestore /lib/modules/2.6.34-rc5/build/vmlinux
              113.00  2.4% vread_tsc                   [kernel].vsyscall_fn                 
              110.00  2.4% _raw_spin_unlock_bh         /lib/modules/2.6.34-rc5/build/vmlinux
              106.00  2.3% schedule                    /lib/modules/2.6.34-rc5/build/vmlinux
              103.00  2.2% local_bh_enable_ip          /lib/modules/2.6.34-rc5/build/vmlinux
              101.00  2.2% udp_recvmsg                 /lib/modules/2.6.34-rc5/build/vmlinux
               97.00  2.1% sys_epoll_wait              /lib/modules/2.6.34-rc5/build/vmlinux
               84.00  1.8% dst_release                 /lib/modules/2.6.34-rc5/build/vmlinux
               78.00  1.7% fput                        /lib/modules/2.6.34-rc5/build/vmlinux
               75.00  1.6% first_packet_length         /lib/modules/2.6.34-rc5/build/vmlinux
               74.00  1.6% kmem_cache_alloc            /lib/modules/2.6.34-rc5/build/vmlinux
               71.00  1.5% ep_remove                   /lib/modules/2.6.34-rc5/build/vmlinux
               69.00  1.5% epoll_ctl                   /lib/libc-2.7.so                     
               67.00  1.5% mutex_lock                  /lib/modules/2.6.34-rc5/build/vmlinux
               65.00  1.4% sock_recv_ts_and_drops      /lib/modules/2.6.34-rc5/build/vmlinux
               65.00  1.4% inet_recvmsg                /lib/modules/2.6.34-rc5/build/vmlinux
               64.00  1.4% process_recv                /home/hadi/udp_sink/mcpudp           
               62.00  1.3% skb_copy_datagram_iovec     /lib/modules/2.6.34-rc5/build/vmlinux
               60.00  1.3% clock_gettime               /lib/librt-2.7.so                    


--------------------------------------------------------------------------------------------------
   PerfTop:     700 irqs/sec  kernel:84.3% [1000Hz cycles],  (all, cpu: 2)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ _____________________________________

              489.00  6.4% _raw_spin_lock_irqsave      /lib/modules/2.6.34-rc5/build/vmlinux
              376.00  4.9% system_call                 /lib/modules/2.6.34-rc5/build/vmlinux
              308.00  4.0% fget                        /lib/modules/2.6.34-rc5/build/vmlinux
              302.00  3.9% copy_user_generic_string    /lib/modules/2.6.34-rc5/build/vmlinux
              280.00  3.6% sys_epoll_ctl               /lib/modules/2.6.34-rc5/build/vmlinux
              274.00  3.6% datagram_poll               /lib/modules/2.6.34-rc5/build/vmlinux
              249.00  3.2% kmem_cache_free             /lib/modules/2.6.34-rc5/build/vmlinux
              223.00  2.9% _raw_spin_unlock_irqrestore /lib/modules/2.6.34-rc5/build/vmlinux
              221.00  2.9% _raw_spin_unlock_bh         /lib/modules/2.6.34-rc5/build/vmlinux
              221.00  2.9% local_bh_enable_ip          /lib/modules/2.6.34-rc5/build/vmlinux
              208.00  2.7% vread_tsc                   [kernel].vsyscall_fn                 
              200.00  2.6% _raw_spin_lock_bh           /lib/modules/2.6.34-rc5/build/vmlinux
              191.00  2.5% schedule                    /lib/modules/2.6.34-rc5/build/vmlinux
              188.00  2.4% sys_epoll_wait              /lib/modules/2.6.34-rc5/build/vmlinux
              177.00  2.3% udp_recvmsg                 /lib/modules/2.6.34-rc5/build/vmlinux
              141.00  1.8% fput                        /lib/modules/2.6.34-rc5/build/vmlinux
              140.00  1.8% first_packet_length         /lib/modules/2.6.34-rc5/build/vmlinux
              128.00  1.7% kmem_cache_alloc            /lib/modules/2.6.34-rc5/build/vmlinux
              119.00  1.5% dst_release                 /lib/modules/2.6.34-rc5/build/vmlinux
              105.00  1.4% ep_remove                   /lib/modules/2.6.34-rc5/build/vmlinux
              104.00  1.4% epoll_ctl                   /lib/libc-2.7.so                     
              102.00  1.3% skb_copy_datagram_iovec     /lib/modules/2.6.34-rc5/build/vmlinux
              100.00  1.3% mutex_lock                  /lib/modules/2.6.34-rc5/build/vmlinux
               95.00  1.2% mutex_unlock                /lib/modules/2.6.34-rc5/build/vmlinux
               94.00  1.2% sock_recv_ts_and_drops      /lib/modules/2.6.34-rc5/build/vmlinux
               92.00  1.2% ep_send_events_proc         /lib/modules/2.6.34-rc5/build/vmlinux
               92.00  1.2% clock_gettime               /lib/librt-2.7.so                    
               92.00  1.2% __skb_recv_datagram         /lib/modules/2.6.34-rc5/build/vmlinux
               91.00  1.2% process_recv                /home/hadi/udp_sink/mcpudp           
               88.00  1.1% kfree                       /lib/modules/2.6.34-rc5/build/vmlinux
               86.00  1.1% _raw_spin_lock              /lib/modules/2.6.34-rc5/build/vmlinux



II: net-next with rps = ee

94.43%
--------------



--------------------------------------------------------------------------------------------------
   PerfTop:    4328 irqs/sec  kernel:84.0% [1000Hz cycles],  (all, 8 CPUs)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                       DSO
             _______ _____ ______________________________ ______________________

             3908.00 17.1% sky2_poll                      [sky2]                
              694.00  3.0% _raw_spin_lock_irqsave         [kernel]              
              584.00  2.6% sky2_intr                      [sky2]                
              557.00  2.4% system_call                    [kernel]              
              490.00  2.1% _raw_spin_unlock_irqrestore    [kernel]              
              488.00  2.1% fget                           [kernel]              
              425.00  1.9% ip_rcv                         [kernel]              
              405.00  1.8% sys_epoll_ctl                  [kernel]              
              398.00  1.7% __netif_receive_skb            [kernel]              
              375.00  1.6% _raw_spin_lock                 [kernel]              
              365.00  1.6% copy_user_generic_string       [kernel]              
              363.00  1.6% ip_route_input                 [kernel]              
              350.00  1.5% kmem_cache_free                [kernel]              
              346.00  1.5% schedule                       [kernel]              
              319.00  1.4% call_function_single_interrupt [kernel]              
              295.00  1.3% vread_tsc                      [kernel].vsyscall_fn  
              270.00  1.2% __udp4_lib_lookup              [kernel]              
              264.00  1.2% kmem_cache_alloc               [kernel]              
              235.00  1.0% fput                           [kernel]              
              219.00  1.0% datagram_poll                  [kernel]              


--------------------------------------------------------------------------------------------------
   PerfTop:    3791 irqs/sec  kernel:84.4% [1000Hz cycles],  (all, 8 CPUs)
--------------------------------------------------------------------------------------------------

             samples  pcnt function                       DSO
             _______ _____ ______________________________ ______________________

             6274.00 17.2% sky2_poll                      [sky2]                
             1139.00  3.1% _raw_spin_lock_irqsave         [kernel]              
              953.00  2.6% system_call                    [kernel]              
              942.00  2.6% sky2_intr                      [sky2]                
              785.00  2.2% _raw_spin_unlock_irqrestore    [kernel]              
              745.00  2.0% fget                           [kernel]              
              695.00  1.9% ip_rcv                         [kernel]              
              653.00  1.8% sys_epoll_ctl                  [kernel]              
              609.00  1.7% ip_route_input                 [kernel]              
              606.00  1.7% __netif_receive_skb            [kernel]              
              583.00  1.6% _raw_spin_lock                 [kernel]              
              569.00  1.6% kmem_cache_free                [kernel]              
              564.00  1.5% copy_user_generic_string       [kernel]              
              554.00  1.5% schedule                       [kernel]              
              510.00  1.4% call_function_single_interrupt [kernel]              
              488.00  1.3% vread_tsc                      [kernel].vsyscall_fn  
              459.00  1.3% kmem_cache_alloc               [kernel]              
              417.00  1.1% __udp4_lib_lookup              [kernel]              
              387.00  1.1% fput                           [kernel]              
              358.00  1.0% __udp4_lib_rcv                 [kernel]              
              347.00  1.0% event_base_loop                libevent-1.3e.so.1.0.3

-----------------------------------------------------------------------------------------------
   PerfTop:     997 irqs/sec  kernel:98.2% [1000Hz cycles],  (all, cpu: 0)
-----------------------------------------------------------------------------------------------

             samples  pcnt function                            DSO
             _______ _____ ___________________________________ ________

             3926.00 61.0% sky2_poll                           [sky2]  
              671.00 10.4% sky2_intr                           [sky2]  
              192.00  3.0% __alloc_skb                         [kernel]
              126.00  2.0% get_rps_cpu                         [kernel]
              111.00  1.7% __kmalloc                           [kernel]
               97.00  1.5% enqueue_to_backlog                  [kernel]
               95.00  1.5% _raw_spin_lock_irqsave              [kernel]
               93.00  1.4% _raw_spin_lock                      [kernel]
               79.00  1.2% kmem_cache_alloc                    [kernel]
               63.00  1.0% sky2_rx_submit                      [sky2]  

-----------------------------------------------------------------------------------------------
   PerfTop:     980 irqs/sec  kernel:98.0% [1000Hz cycles],  (all, cpu: 0)
-----------------------------------------------------------------------------------------------

             samples  pcnt function                            DSO
             _______ _____ ___________________________________ ____________________

             6945.00 61.4% sky2_poll                           [sky2]              
             1219.00 10.8% sky2_intr                           [sky2]              
              323.00  2.9% __alloc_skb                         [kernel]            
              243.00  2.1% get_rps_cpu                         [kernel]            
              195.00  1.7% __kmalloc                           [kernel]            
              161.00  1.4% _raw_spin_lock_irqsave              [kernel]            
              149.00  1.3% enqueue_to_backlog                  [kernel]            
              139.00  1.2% _raw_spin_lock                      [kernel]            
              136.00  1.2% kmem_cache_alloc                    [kernel]            
              135.00  1.2% irq_entries_start                   [kernel]            
              108.00  1.0% sky2_rx_submit                      [sky2]              


-----------------------------------------------------------------------------------------------
   PerfTop:     458 irqs/sec  kernel:80.8% [1000Hz cycles],  (all, cpu: 2)
-----------------------------------------------------------------------------------------------

             samples  pcnt function                       DSO
             _______ _____ ______________________________ _____________________________________

              130.00  4.7% _raw_spin_lock_irqsave         /lib/modules/2.6.34-rc5/build/vmlinux
              114.00  4.1% system_call                    /lib/modules/2.6.34-rc5/build/vmlinux
               91.00  3.3% ip_rcv                         /lib/modules/2.6.34-rc5/build/vmlinux
               82.00  3.0% _raw_spin_unlock_irqrestore    /lib/modules/2.6.34-rc5/build/vmlinux
               74.00  2.7% call_function_single_interrupt /lib/modules/2.6.34-rc5/build/vmlinux
               74.00  2.7% fget                           /lib/modules/2.6.34-rc5/build/vmlinux
               71.00  2.6% __netif_receive_skb            /lib/modules/2.6.34-rc5/build/vmlinux
               69.00  2.5% ip_route_input                 /lib/modules/2.6.34-rc5/build/vmlinux
               66.00  2.4% schedule                       /lib/modules/2.6.34-rc5/build/vmlinux
               63.00  2.3% kmem_cache_free                /lib/modules/2.6.34-rc5/build/vmlinux
               61.00  2.2% sys_epoll_ctl                  /lib/modules/2.6.34-rc5/build/vmlinux
               61.00  2.2% __udp4_lib_lookup              /lib/modules/2.6.34-rc5/build/vmlinux
               57.00  2.1% copy_user_generic_string       /lib/modules/2.6.34-rc5/build/vmlinux
               49.00  1.8% vread_tsc                      [kernel].vsyscall_fn                 
               49.00  1.8% _raw_spin_lock                 /lib/modules/2.6.34-rc5/build/vmlinux
               47.00  1.7% ep_remove                      /lib/modules/2.6.34-rc5/build/vmlinux
               45.00  1.6% fput                           /lib/modules/2.6.34-rc5/build/vmlinux
               44.00  1.6% sys_epoll_wait                 /lib/modules/2.6.34-rc5/build/vmlinux
               40.00  1.4% kmem_cache_alloc               /lib/modules/2.6.34-rc5/build/vmlinux
               40.00  1.4% local_bh_enable_ip             /lib/modules/2.6.34-rc5/build/vmlinux
               38.00  1.4% sock_recv_ts_and_drops         /lib/modules/2.6.34-rc5/build/vmlinux
               35.00  1.3% process_recv                   /home/hadi/udp_sink/mcpudp           
               34.00  1.2% mutex_unlock                   /lib/modules/2.6.34-rc5/build/vmlinux
               31.00  1.1% _raw_spin_unlock_bh            /lib/modules/2.6.34-rc5/build/vmlinux
               31.00  1.1% event_base_loop                /usr/lib/libevent-1.3e.so.1.0.3      


-----------------------------------------------------------------------------------------------
   PerfTop:     552 irqs/sec  kernel:82.4% [1000Hz cycles],  (all, cpu: 2)
-----------------------------------------------------------------------------------------------

             samples  pcnt function                       DSO
             _______ _____ ______________________________ _____________________________________

              204.00  4.7% _raw_spin_lock_irqsave         /lib/modules/2.6.34-rc5/build/vmlinux
              169.00  3.9% system_call                    /lib/modules/2.6.34-rc5/build/vmlinux
              151.00  3.5% _raw_spin_unlock_irqrestore    /lib/modules/2.6.34-rc5/build/vmlinux
              132.00  3.0% ip_rcv                         /lib/modules/2.6.34-rc5/build/vmlinux
              129.00  3.0% fget                           /lib/modules/2.6.34-rc5/build/vmlinux
              123.00  2.8% __netif_receive_skb            /lib/modules/2.6.34-rc5/build/vmlinux
              115.00  2.6% ip_route_input                 /lib/modules/2.6.34-rc5/build/vmlinux
              112.00  2.6% call_function_single_interrupt /lib/modules/2.6.34-rc5/build/vmlinux
              112.00  2.6% sys_epoll_ctl                  /lib/modules/2.6.34-rc5/build/vmlinux
              103.00  2.4% schedule                       /lib/modules/2.6.34-rc5/build/vmlinux
               94.00  2.2% kmem_cache_free                /lib/modules/2.6.34-rc5/build/vmlinux
               89.00  2.0% copy_user_generic_string       /lib/modules/2.6.34-rc5/build/vmlinux
               86.00  2.0% _raw_spin_lock                 /lib/modules/2.6.34-rc5/build/vmlinux
               83.00  1.9% __udp4_lib_lookup              /lib/modules/2.6.34-rc5/build/vmlinux
               76.00  1.7% vread_tsc                      [kernel].vsyscall_fn                 
               68.00  1.6% ep_remove                      /lib/modules/2.6.34-rc5/build/vmlinux
               67.00  1.5% fput                           /lib/modules/2.6.34-rc5/build/vmlinux
               64.00  1.5% kmem_cache_alloc               /lib/modules/2.6.34-rc5/build/vmlinux
               62.00  1.4% sys_epoll_wait                 /lib/modules/2.6.34-rc5/build/vmlinux
               60.00  1.4% dst_release                    /lib/modules/2.6.34-rc5/build/vmlinux
               60.00  1.4% sock_recv_ts_and_drops         /lib/modules/2.6.34-rc5/build/vmlinux
               56.00  1.3% _raw_spin_lock_bh              /lib/modules/2.6.34-rc5/build/vmlinux
               53.00  1.2% event_base_loop                /usr/lib/libevent-1.3e.so.1.0.3      
               51.00  1.2% datagram_poll                  /lib/modules/2.6.34-rc5/build/vmlinux
               48.00  1.1% epoll_ctl                      /lib/libc-2.7.so                     
               48.00  1.1% kfree                          /lib/modules/2.6.34-rc5/build/vmlinux
               47.00  1.1% _raw_spin_unlock_bh            /lib/modules/2.6.34-rc5/build/vmlinux
               47.00  1.1% mutex_unlock                   /lib/modules/2.6.34-rc5/build/vmlinux
               45.00  1.0% __udp4_lib_rcv                 /lib/modules/2.6.34-rc5/build/vmlinux
               45.00  1.0% tick_nohz_stop_sched_tick      /lib/modules/2.6.34-rc5/build/vmlinux

-----------------------------------------------------------------------------------------------
   PerfTop:     408 irqs/sec  kernel:82.1% [1000Hz cycles],  (all, cpu: 2)
-----------------------------------------------------------------------------------------------

             samples  pcnt function                       DSO
             _______ _____ ______________________________ _____________________________________

              240.00  4.8% _raw_spin_lock_irqsave         /lib/modules/2.6.34-rc5/build/vmlinux
              200.00  4.0% system_call                    /lib/modules/2.6.34-rc5/build/vmlinux
              165.00  3.3% _raw_spin_unlock_irqrestore    /lib/modules/2.6.34-rc5/build/vmlinux
              161.00  3.2% ip_rcv                         /lib/modules/2.6.34-rc5/build/vmlinux
              158.00  3.1% fget                           /lib/modules/2.6.34-rc5/build/vmlinux
              150.00  3.0% sys_epoll_ctl                  /lib/modules/2.6.34-rc5/build/vmlinux
              135.00  2.7% __netif_receive_skb            /lib/modules/2.6.34-rc5/build/vmlinux
              122.00  2.4% ip_route_input                 /lib/modules/2.6.34-rc5/build/vmlinux
              117.00  2.3% call_function_single_interrupt /lib/modules/2.6.34-rc5/build/vmlinux
              114.00  2.3% schedule                       /lib/modules/2.6.34-rc5/build/vmlinux
              110.00  2.2% _raw_spin_lock                 /lib/modules/2.6.34-rc5/build/vmlinux
              108.00  2.1% copy_user_generic_string       /lib/modules/2.6.34-rc5/build/vmlinux
              101.00  2.0% kmem_cache_free                /lib/modules/2.6.34-rc5/build/vmlinux
               94.00  1.9% vread_tsc                      [kernel].vsyscall_fn                 
               90.00  1.8% __udp4_lib_lookup              /lib/modules/2.6.34-rc5/build/vmlinux
               85.00  1.7% fput                           /lib/modules/2.6.34-rc5/build/vmlinux
               78.00  1.5% dst_release                    /lib/modules/2.6.34-rc5/build/vmlinux
               77.00  1.5% ep_remove                      /lib/modules/2.6.34-rc5/build/vmlinux
               75.00  1.5% kmem_cache_alloc               /lib/modules/2.6.34-rc5/build/vmlinux
               74.00  1.5% _raw_spin_lock_bh              /lib/modules/2.6.34-rc5/build/vmlinux
               69.00  1.4% sys_epoll_wait                 /lib/modules/2.6.34-rc5/build/vmlinux
               68.00  1.3% event_base_loop                /usr/lib/libevent-1.3e.so.1.0.3      
               68.00  1.3% sock_recv_ts_and_drops         /lib/modules/2.6.34-rc5/build/vmlinux
               62.00  1.2% _raw_spin_unlock_bh            /lib/modules/2.6.34-rc5/build/vmlinux
               62.00  1.2% datagram_poll                  /lib/modules/2.6.34-rc5/build/vmlinux
               55.00  1.1% epoll_ctl                      /lib/libc-2.7.so                     
               53.00  1.1% local_bh_enable_ip             /lib/modules/2.6.34-rc5/build/vmlinux
               53.00  1.1% tick_nohz_stop_sched_tick      /lib/modules/2.6.34-rc5/build/vmlinux
               52.00  1.0% mutex_unlock                   /lib/modules/2.6.34-rc5/build/vmlinux

-----------------------------------------------------------------------------------------------
   PerfTop:     440 irqs/sec  kernel:85.0% [1000Hz cycles],  (all, cpu: 2)
-----------------------------------------------------------------------------------------------

             samples  pcnt function                       DSO
             _______ _____ ______________________________ _____________________________________

              226.00  4.6% _raw_spin_lock_irqsave         /lib/modules/2.6.34-rc5/build/vmlinux
              213.00  4.3% system_call                    /lib/modules/2.6.34-rc5/build/vmlinux
              154.00  3.1% _raw_spin_unlock_irqrestore    /lib/modules/2.6.34-rc5/build/vmlinux
              148.00  3.0% ip_rcv                         /lib/modules/2.6.34-rc5/build/vmlinux
              143.00  2.9% fget                           /lib/modules/2.6.34-rc5/build/vmlinux
              143.00  2.9% ip_route_input                 /lib/modules/2.6.34-rc5/build/vmlinux
              140.00  2.8% __netif_receive_skb            /lib/modules/2.6.34-rc5/build/vmlinux
              124.00  2.5% call_function_single_interrupt /lib/modules/2.6.34-rc5/build/vmlinux
              124.00  2.5% sys_epoll_ctl                  /lib/modules/2.6.34-rc5/build/vmlinux
              104.00  2.1% copy_user_generic_string       /lib/modules/2.6.34-rc5/build/vmlinux
              103.00  2.1% vread_tsc                      [kernel].vsyscall_fn                 
              101.00  2.0% schedule                       /lib/modules/2.6.34-rc5/build/vmlinux
              100.00  2.0% kmem_cache_free                /lib/modules/2.6.34-rc5/build/vmlinux
               99.00  2.0% _raw_spin_lock                 /lib/modules/2.6.34-rc5/build/vmlinux
               93.00  1.9% __udp4_lib_lookup              /lib/modules/2.6.34-rc5/build/vmlinux
               80.00  1.6% fput                           /lib/modules/2.6.34-rc5/build/vmlinux
               76.00  1.5% kmem_cache_alloc               /lib/modules/2.6.34-rc5/build/vmlinux
               75.00  1.5% sock_recv_ts_and_drops         /lib/modules/2.6.34-rc5/build/vmlinux
               73.00  1.5% dst_release                    /lib/modules/2.6.34-rc5/build/vmlinux
               70.00  1.4% sys_epoll_wait                 /lib/modules/2.6.34-rc5/build/vmlinux
               69.00  1.4% datagram_poll                  /lib/modules/2.6.34-rc5/build/vmlinux
               65.00  1.3% event_base_loop                /usr/lib/libevent-1.3e.so.1.0.3      
               65.00  1.3% ep_remove                      /lib/modules/2.6.34-rc5/build/vmlinux



III: Kernel compiled with Erics patch, rps mask 00

Avg udp packets sunk: 98.74%

-------------------------------------------------------------------------------
   PerfTop:    4202 irqs/sec  kernel:82.5% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ______________________

             1639.00  9.0% sky2_poll                   [sky2]                
             1051.00  5.8% _raw_spin_lock_irqsave      [kernel]              
              665.00  3.7% system_call                 [kernel]              
              578.00  3.2% fget                        [kernel]              
              476.00  2.6% _raw_spin_unlock_irqrestore [kernel]              
              457.00  2.5% copy_user_generic_string    [kernel]              
              427.00  2.4% sys_epoll_ctl               [kernel]              
              401.00  2.2% datagram_poll               [kernel]              
              391.00  2.2% kmem_cache_free             [kernel]              
              349.00  1.9% schedule                    [kernel]              
              339.00  1.9% vread_tsc                   [kernel].vsyscall_fn  
              323.00  1.8% udp_recvmsg                 [kernel]              
              292.00  1.6% kmem_cache_alloc            [kernel]              
              285.00  1.6% _raw_spin_lock              [kernel]              
              272.00  1.5% _raw_spin_lock_bh           [kernel]              
              268.00  1.5% sys_epoll_wait              [kernel]              
              260.00  1.4% fput                        [kernel]              
              234.00  1.3% ip_route_input              [kernel]              
              221.00  1.2% __udp4_lib_lookup           [kernel]              
              212.00  1.2% dst_release                 [kernel]              
              209.00  1.2% ip_rcv                      [kernel]              
              203.00  1.1% ep_remove                   [kernel]              
              202.00  1.1% first_packet_length         [kernel]              


-------------------------------------------------------------------------------
   PerfTop:    3999 irqs/sec  kernel:82.3% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ______________________

             3452.00  9.3% sky2_poll                   [sky2]                
             2212.00  5.9% _raw_spin_lock_irqsave      [kernel]              
             1350.00  3.6% system_call                 [kernel]              
             1187.00  3.2% fget                        [kernel]              
             1010.00  2.7% copy_user_generic_string    [kernel]              
              965.00  2.6% _raw_spin_unlock_irqrestore [kernel]              
              842.00  2.3% sys_epoll_ctl               [kernel]              
              833.00  2.2% datagram_poll               [kernel]              
              770.00  2.1% kmem_cache_free             [kernel]              
              710.00  1.9% vread_tsc                   [kernel].vsyscall_fn  
              688.00  1.8% schedule                    [kernel]              
              651.00  1.7% udp_recvmsg                 [kernel]              
              603.00  1.6% _raw_spin_lock_bh           [kernel]              
              599.00  1.6% _raw_spin_lock              [kernel]              
              597.00  1.6% sys_epoll_wait              [kernel]              
              594.00  1.6% kmem_cache_alloc            [kernel]              
              553.00  1.5% ip_route_input              [kernel]              
              528.00  1.4% fput                        [kernel]              
              496.00  1.3% __udp4_lib_lookup           [kernel]              
              444.00  1.2% dst_release                 [kernel]              
              433.00  1.2% ip_rcv                      [kernel]              
              408.00  1.1% first_packet_length         [kernel]              

-------------------------------------------------------------------------------
   PerfTop:    3765 irqs/sec  kernel:83.7% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ______________________

             4275.00  9.5% sky2_poll                   [sky2]                
             2684.00  6.0% _raw_spin_lock_irqsave      [kernel]              
             1654.00  3.7% system_call                 [kernel]              
             1447.00  3.2% fget                        [kernel]              
             1223.00  2.7% copy_user_generic_string    [kernel]              
             1146.00  2.5% _raw_spin_unlock_irqrestore [kernel]              
             1036.00  2.3% sys_epoll_ctl               [kernel]              
             1019.00  2.3% datagram_poll               [kernel]              
              974.00  2.2% kmem_cache_free             [kernel]              
              843.00  1.9% vread_tsc                   [kernel].vsyscall_fn  
              799.00  1.8% schedule                    [kernel]              
              761.00  1.7% udp_recvmsg                 [kernel]              
              736.00  1.6% kmem_cache_alloc            [kernel]              
              719.00  1.6% _raw_spin_lock_bh           [kernel]              
              716.00  1.6% _raw_spin_lock              [kernel]              
              696.00  1.5% sys_epoll_wait              [kernel]              
              680.00  1.5% ip_route_input              [kernel]              
              657.00  1.5% fput                        [kernel]              
              613.00  1.4% __udp4_lib_lookup           [kernel]              
              552.00  1.2% dst_release                 [kernel]              
              507.00  1.1% ip_rcv                      [kernel]            


-------------------------------------------------------------------------------
   PerfTop:    1001 irqs/sec  kernel:99.9% [1000Hz cycles],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ________

              669.00 32.2% sky2_poll                   [sky2]  
              128.00  6.2% ip_route_input              [kernel]
              106.00  5.1% ip_rcv                      [kernel]
              105.00  5.1% __udp4_lib_lookup           [kernel]
               86.00  4.1% _raw_spin_lock              [kernel]
               85.00  4.1% _raw_spin_lock_irqsave      [kernel]
               82.00  3.9% __alloc_skb                 [kernel]
               78.00  3.8% sock_queue_rcv_skb          [kernel]
               57.00  2.7% __netif_receive_skb         [kernel]
               53.00  2.6% __wake_up_common            [kernel]
               47.00  2.3% __udp4_lib_rcv              [kernel]
               42.00  2.0% sock_def_readable           [kernel]
               37.00  1.8% kmem_cache_alloc            [kernel]
               34.00  1.6% ep_poll_callback            [kernel]
               34.00  1.6% __kmalloc                   [kernel]
               34.00  1.6% select_task_rq_fair         [kernel]
               30.00  1.4% _raw_read_lock              [kernel]
               27.00  1.3% _raw_spin_unlock_irqrestore [kernel]
               24.00  1.2% sky2_rx_submit              [sky2]  
               22.00  1.1% udp_queue_rcv_skb           [kernel]
               21.00  1.0% try_to_wake_up              [kernel]


-------------------------------------------------------------------------------
   PerfTop:    1000 irqs/sec  kernel:100.0% [1000Hz cycles],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ________

             3061.00 31.9% sky2_poll                   [sky2]  
              529.00  5.5% ip_route_input              [kernel]
              518.00  5.4% __udp4_lib_lookup           [kernel]
              424.00  4.4% ip_rcv                      [kernel]
              390.00  4.1% _raw_spin_lock_irqsave      [kernel]
              389.00  4.1% __alloc_skb                 [kernel]
              365.00  3.8% _raw_spin_lock              [kernel]
              326.00  3.4% sock_queue_rcv_skb          [kernel]
              297.00  3.1% __netif_receive_skb         [kernel]
              273.00  2.8% __udp4_lib_rcv              [kernel]
              223.00  2.3% sock_def_readable           [kernel]
              205.00  2.1% __wake_up_common            [kernel]
              181.00  1.9% __kmalloc                   [kernel]
              151.00  1.6% kmem_cache_alloc            [kernel]
              147.00  1.5% _raw_read_lock              [kernel]
              143.00  1.5% ep_poll_callback            [kernel]
              136.00  1.4% sky2_rx_submit              [sky2]  
              123.00  1.3% task_rq_lock                [kernel]
              118.00  1.2% _raw_spin_unlock_irqrestore [kernel]
              114.00  1.2% select_task_rq_fair         [kernel]
              104.00  1.1% resched_task                [kernel]
              104.00  1.1% sky2_remove                 [sky2]  
              102.00  1.1% udp_queue_rcv_skb           [kernel]


-------------------------------------------------------------------------------
   PerfTop:    1001 irqs/sec  kernel:100.0% [1000Hz cycles],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ________

             3898.00 31.0% sky2_poll                   [sky2]  
              715.00  5.7% ip_route_input              [kernel]
              651.00  5.2% __udp4_lib_lookup           [kernel]
              576.00  4.6% ip_rcv                      [kernel]
              534.00  4.2% __alloc_skb                 [kernel]
              518.00  4.1% _raw_spin_lock_irqsave      [kernel]
              441.00  3.5% sock_queue_rcv_skb          [kernel]
              439.00  3.5% _raw_spin_lock              [kernel]
              396.00  3.1% __netif_receive_skb         [kernel]
              351.00  2.8% __udp4_lib_rcv              [kernel]
              300.00  2.4% sock_def_readable           [kernel]
              264.00  2.1% __wake_up_common            [kernel]
              260.00  2.1% __kmalloc                   [kernel]
              198.00  1.6% kmem_cache_alloc            [kernel]
              193.00  1.5% ep_poll_callback            [kernel]
              192.00  1.5% _raw_read_lock              [kernel]
              168.00  1.3% sky2_rx_submit              [sky2]  
              167.00  1.3% task_rq_lock                [kernel]
              153.00  1.2% udp_queue_rcv_skb           [kernel]
              149.00  1.2% _raw_spin_unlock_irqrestore [kernel]
              147.00  1.2% ip_local_deliver            [kernel]
              144.00  1.1% resched_task                [kernel]
              137.00  1.1% sky2_remove                 [sky2]  


-------------------------------------------------------------------------------
   PerfTop:     663 irqs/sec  kernel:81.9% [1000Hz cycles],  (all, cpu: 2)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ____________________

              129.00  7.0% _raw_spin_lock_irqsave      [kernel]            
               84.00  4.5% fget                        [kernel]            
               83.00  4.5% system_call                 [kernel]            
               82.00  4.4% copy_user_generic_string    [kernel]            
               67.00  3.6% _raw_spin_unlock_irqrestore [kernel]            
               63.00  3.4% datagram_poll               [kernel]            
               57.00  3.1% udp_recvmsg                 [kernel]            
               55.00  3.0% sys_epoll_ctl               [kernel]            
               55.00  3.0% vread_tsc                   [kernel].vsyscall_fn
               43.00  2.3% sys_epoll_wait              [kernel]            
               43.00  2.3% _raw_spin_lock_bh           [kernel]            
               41.00  2.2% first_packet_length         [kernel]            
               40.00  2.2% dst_release                 [kernel]            
               37.00  2.0% fput                        [kernel]            
               37.00  2.0% kmem_cache_free             [kernel]            
               36.00  1.9% mutex_unlock                [kernel]            
               35.00  1.9% schedule                    [kernel]            
               34.00  1.8% skb_copy_datagram_iovec     [kernel]            
               34.00  1.8% ep_remove                   [kernel]            
               29.00  1.6% mutex_lock                  [kernel]            
               29.00  1.6% _raw_spin_lock              [kernel]            
               28.00  1.5% __skb_recv_datagram         [kernel]            
               25.00  1.4% epoll_ctl                   /lib/libc-2.7.so    
               25.00  1.4% tick_nohz_stop_sched_tick   [kernel]            


-------------------------------------------------------------------------------
   PerfTop:     629 irqs/sec  kernel:81.1% [1000Hz cycles],  (all, cpu: 2)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ______________________

              351.00  7.9% _raw_spin_lock_irqsave      [kernel]              
              248.00  5.6% system_call                 [kernel]              
              219.00  5.0% fget                        [kernel]              
              194.00  4.4% copy_user_generic_string    [kernel]              
              184.00  4.2% datagram_poll               [kernel]              
              162.00  3.7% sys_epoll_ctl               [kernel]              
              159.00  3.6% _raw_spin_unlock_irqrestore [kernel]              
              129.00  2.9% udp_recvmsg                 [kernel]              
              129.00  2.9% kmem_cache_free             [kernel]              
              123.00  2.8% vread_tsc                   [kernel].vsyscall_fn  
              108.00  2.4% schedule                    [kernel]              
              107.00  2.4% _raw_spin_lock_bh           [kernel]              
              104.00  2.4% sys_epoll_wait              [kernel]              
              100.00  2.3% fput                        [kernel]              
               94.00  2.1% dst_release                 [kernel]              
               78.00  1.8% first_packet_length         [kernel]              
               73.00  1.7% ep_remove                   [kernel]              
               69.00  1.6% epoll_ctl                   /lib/libc-2.7.so      
               66.00  1.5% skb_copy_datagram_iovec     [kernel]              
               66.00  1.5% mutex_unlock                [kernel]              
               64.00  1.4% __skb_recv_datagram         [kernel]              
               64.00  1.4% mutex_lock                  [kernel]              
               57.00  1.3% sock_recv_ts_and_drops      [kernel]              
               51.00  1.2% kmem_cache_alloc            [kernel]              
               49.00  1.1% ep_send_events_proc         [kernel]              

-------------------------------------------------------------------------------
   PerfTop:     457 irqs/sec  kernel:72.0% [1000Hz cycles],  (all, cpu: 2)
-------------------------------------------------------------------------------

             samples  pcnt function                    DSO
             _______ _____ ___________________________ ______________________

              411.00  7.8% _raw_spin_lock_irqsave      [kernel]              
              280.00  5.3% system_call                 [kernel]              
              269.00  5.1% fget                        [kernel]              
              239.00  4.5% copy_user_generic_string    [kernel]              
              232.00  4.4% datagram_poll               [kernel]              
              175.00  3.3% _raw_spin_unlock_irqrestore [kernel]              
              170.00  3.2% sys_epoll_ctl               [kernel]              
              169.00  3.2% kmem_cache_free             [kernel]              
              149.00  2.8% udp_recvmsg                 [kernel]              
              144.00  2.7% vread_tsc                   [kernel].vsyscall_fn  
              129.00  2.4% sys_epoll_wait              [kernel]              
              128.00  2.4% _raw_spin_lock_bh           [kernel]              
              115.00  2.2% fput                        [kernel]              
              112.00  2.1% schedule                    [kernel]              
              108.00  2.0% dst_release                 [kernel]              
               88.00  1.7% first_packet_length         [kernel]              
               86.00  1.6% ep_remove                   [kernel]              
               83.00  1.6% mutex_lock                  [kernel]              
               79.00  1.5% skb_copy_datagram_iovec     [kernel]              
               76.00  1.4% mutex_unlock                [kernel]              
               75.00  1.4% epoll_ctl                   /lib/libc-2.7.so      
               73.00  1.4% sock_recv_ts_and_drops      [kernel]              
               67.00  1.3% __skb_recv_datagram         [kernel]              
               65.00  1.2% tick_nohz_stop_sched_tick   [kernel]              


Interesting stuff; check cache miss contributions - wow, how low is eth_type_trans..
and yet we keep optimizing that!

-------------------------------------------------------------------------------
   PerfTop:    1021 irqs/sec  kernel:98.8% [1000Hz cache-misses],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                        DSO
             _______ _____ _______________________________ ________

             5271.00 77.8% sky2_poll                       [sky2]  
              706.00 10.4% kmem_cache_alloc                [kernel]
              154.00  2.3% dev_gro_receive                 [kernel]
              149.00  2.2% __napi_gro_receive              [kernel]
              128.00  1.9% napi_gro_receive                [kernel]
              106.00  1.6% __alloc_skb                     [kernel]
               57.00  0.8% eth_type_trans                  [kernel]
               45.00  0.7% skb_gro_reset_offset            [kernel]
               26.00  0.4% drain_array                     [kernel]
               23.00  0.3% perf_session__mmap_read_counter perf    
               10.00  0.1% cache_alloc_refill              [kernel]
                9.00  0.1% __netdev_alloc_skb              [kernel]
                9.00  0.1% event__preprocess_sample        perf    


-------------------------------------------------------------------------------
   PerfTop:     997 irqs/sec  kernel:100.0% [1000Hz cache-misses],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function             DSO
             _______ _____ ____________________ ________

             3019.00 79.4% sky2_poll            [sky2]  
              360.00  9.5% kmem_cache_alloc     [kernel]
               91.00  2.4% dev_gro_receive      [kernel]
               86.00  2.3% __alloc_skb          [kernel]
               83.00  2.2% __napi_gro_receive   [kernel]
               69.00  1.8% napi_gro_receive     [kernel]
               45.00  1.2% eth_type_trans       [kernel]
               25.00  0.7% skb_gro_reset_offset [kernel]
                9.00  0.2% __netdev_alloc_skb   [kernel]
                5.00  0.1% cache_alloc_refill   [kernel]
                5.00  0.1% skb_pull             [kernel]


-------------------------------------------------------------------------------
   PerfTop:     997 irqs/sec  kernel:100.0% [1000Hz cache-misses],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function             DSO
             _______ _____ ____________________ ________

             8887.00 79.8% sky2_poll            [sky2]  
             1138.00 10.2% kmem_cache_alloc     [kernel]
              273.00  2.5% __napi_gro_receive   [kernel]
              246.00  2.2% dev_gro_receive      [kernel]
              189.00  1.7% napi_gro_receive     [kernel]
              159.00  1.4% __alloc_skb          [kernel]
              119.00  1.1% eth_type_trans       [kernel]
               86.00  0.8% skb_gro_reset_offset [kernel]
               13.00  0.1% __netdev_alloc_skb   [kernel]
                8.00  0.1% skb_pull             [kernel]
                7.00  0.1% cache_alloc_refill   [kernel]


Not much going on in other cpus .. i.e hardly anything shows up in
the profile ..

IV: rps with ee and irq affinity to cpu0

Avg udp packets sunk: 95.15%


-------------------------------------------------------------------------------
   PerfTop:    3558 irqs/sec  kernel:84.6% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

             3096.00 17.1% sky2_poll                     [sky2]                
              645.00  3.6% _raw_spin_lock_irqsave        [kernel]              
              493.00  2.7% system_call                   [kernel]              
              462.00  2.6% sky2_intr                     [sky2]                
              416.00  2.3% _raw_spin_unlock_irqrestore   [kernel]              
              382.00  2.1% fget                          [kernel]              
              361.00  2.0% __netif_receive_skb           [kernel]              
              342.00  1.9% ip_rcv                        [kernel]              
              334.00  1.8% _raw_spin_lock                [kernel]              
              320.00  1.8% sys_epoll_ctl                 [kernel]              
              298.00  1.6% copy_user_generic_string      [kernel]              
              288.00  1.6% call_function_single_interrup [kernel]              
              277.00  1.5% load_balance                  [kernel]              
              271.00  1.5% ip_route_input                [kernel]              
              270.00  1.5% vread_tsc                     [kernel].vsyscall_fn  
              256.00  1.4% kmem_cache_free               [kernel]              
              222.00  1.2% __udp4_lib_lookup             [kernel]              
              222.00  1.2% schedule                      [kernel]              
              194.00  1.1% fput                          [kernel]              
              189.00  1.0% kmem_cache_alloc              [kernel]              
              171.00  0.9% sys_epoll_wait                [kernel]              
              164.00  0.9% ep_remove                     [kernel]          

-------------------------------------------------------------------------------
   PerfTop:    3452 irqs/sec  kernel:84.3% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

             5033.00 16.2% sky2_poll                     [sky2]                
             1147.00  3.7% _raw_spin_lock_irqsave        [kernel]              
              888.00  2.9% system_call                   [kernel]              
              774.00  2.5% sky2_intr                     [sky2]                
              757.00  2.4% _raw_spin_unlock_irqrestore   [kernel]              
              702.00  2.3% fget                          [kernel]              
              630.00  2.0% __netif_receive_skb           [kernel]              
              609.00  2.0% _raw_spin_lock                [kernel]              
              607.00  2.0% ip_rcv                        [kernel]              
              553.00  1.8% sys_epoll_ctl                 [kernel]              
              514.00  1.7% ip_route_input                [kernel]              
              508.00  1.6% call_function_single_interrup [kernel]              
              504.00  1.6% copy_user_generic_string      [kernel]              
              466.00  1.5% kmem_cache_free               [kernel]              
              452.00  1.5% schedule                      [kernel]              
              450.00  1.4% vread_tsc                     [kernel].vsyscall_fn  
              390.00  1.3% load_balance                  [kernel]              
              377.00  1.2% fput                          [kernel]              
              364.00  1.2% __udp4_lib_lookup             [kernel]              
              329.00  1.1% kmem_cache_alloc              [kernel]              
              314.00  1.0% ep_remove                     [kernel]              
              289.00  0.9% dst_release                   [kernel]              
              276.00  0.9% sys_epoll_wait                [kernel]              
              265.00  0.9% datagram_poll                 [kernel]              

-------------------------------------------------------------------------------
   PerfTop:    3328 irqs/sec  kernel:85.7% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

             6788.00 17.5% sky2_poll                     [sky2]                
             1413.00  3.6% _raw_spin_lock_irqsave        [kernel]              
             1042.00  2.7% system_call                   [kernel]              
              997.00  2.6% sky2_intr                     [sky2]                
              903.00  2.3% _raw_spin_unlock_irqrestore   [kernel]              
              837.00  2.2% fget                          [kernel]              
              740.00  1.9% _raw_spin_lock                [kernel]              
              725.00  1.9% __netif_receive_skb           [kernel]              
              722.00  1.9% ip_rcv                        [kernel]              
              651.00  1.7% sys_epoll_ctl                 [kernel]              
              609.00  1.6% call_function_single_interrup [kernel]              
              604.00  1.6% ip_route_input                [kernel]              
              601.00  1.5% copy_user_generic_string      [kernel]              
              573.00  1.5% schedule                      [kernel]              
              561.00  1.4% kmem_cache_free               [kernel]              
              538.00  1.4% load_balance                  [kernel]              
              515.00  1.3% vread_tsc                     [kernel].vsyscall_fn  
              480.00  1.2% fput                          [kernel]              
              421.00  1.1% kmem_cache_alloc              [kernel]              
              418.00  1.1% __udp4_lib_lookup             [kernel]              
              377.00  1.0% ep_remove                     [kernel]              
              347.00  0.9% datagram_poll                 [kernel]              
              335.00  0.9% dst_release                   [kernel]              

-------------------------------------------------------------------------------
   PerfTop:    1000 irqs/sec  kernel:96.2% [1000Hz cycles],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

             2109.00 61.3% sky2_poll                     [sky2]                
              366.00 10.6% sky2_intr                     [sky2]                
               84.00  2.4% __alloc_skb                   [kernel]              
               57.00  1.7% _raw_spin_lock_irqsave        [kernel]              
               56.00  1.6% get_rps_cpu                   [kernel]              
               52.00  1.5% __kmalloc                     [kernel]              
               39.00  1.1% irq_entries_start             [kernel]              
               39.00  1.1% enqueue_to_backlog            [kernel]              
               34.00  1.0% kmem_cache_alloc              [kernel]              
               33.00  1.0% default_send_IPI_mask_sequenc [kernel]              
               32.00  0.9% sky2_rx_submit                [sky2]                
               30.00  0.9% swiotlb_sync_single           [kernel]              
               28.00  0.8% _raw_spin_lock                [kernel]              
               23.00  0.7% sky2_remove                   [sky2]                
               22.00  0.6% __smp_call_function_single    [kernel]              
               19.00  0.6% system_call                   [kernel]              
               18.00  0.5% sys_epoll_ctl                 [kernel]              
               18.00  0.5% fget                          [kernel]              
               17.00  0.5% cache_alloc_refill            [kernel]              
               16.00  0.5% copy_user_generic_string      [kernel]              
               16.00  0.5% _raw_spin_unlock_irqrestore   [kernel]              
               15.00  0.4% dev_gro_receive               [kernel]              
               14.00  0.4% net_rx_action                 [kernel]             

-------------------------------------------------------------------------------
   PerfTop:    1000 irqs/sec  kernel:97.9% [1000Hz cycles],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function                        DSO
             _______ _____ _______________________________ ____________________

             4479.00 60.9% sky2_poll                       [sky2]              
              849.00 11.5% sky2_intr                       [sky2]              
              163.00  2.2% __alloc_skb                     [kernel]            
              155.00  2.1% get_rps_cpu                     [kernel]            
              121.00  1.6% _raw_spin_lock_irqsave          [kernel]            
               92.00  1.3% __kmalloc                       [kernel]            
               89.00  1.2% _raw_spin_lock                  [kernel]            
               83.00  1.1% enqueue_to_backlog              [kernel]            
               79.00  1.1% irq_entries_start               [kernel]            
               78.00  1.1% kmem_cache_alloc                [kernel]            
               69.00  0.9% sky2_rx_submit                  [sky2]              
               65.00  0.9% swiotlb_sync_single             [kernel]            
               58.00  0.8% default_send_IPI_mask_sequence_ [kernel]            
               50.00  0.7% system_call                     [kernel]            
               45.00  0.6% fget                            [kernel]            
               40.00  0.5% sky2_remove                     [sky2]              
               37.00  0.5% __smp_call_function_single      [kernel]            
               36.00  0.5% datagram_poll                   [kernel]            
               36.00  0.5% _raw_spin_unlock_irqrestore     [kernel]            
               34.00  0.5% cache_alloc_refill              [kernel]            
               31.00  0.4% net_rx_action                   [kernel]            
               28.00  0.4% kmem_cache_free                 [kernel]            
               27.00  0.4% _raw_spin_lock_bh               [kernel]            
               27.00  0.4% copy_user_generic_string        [kernel]            
               25.00  0.3% dev_gro_receive                 [kernel]            


-------------------------------------------------------------------------------
   PerfTop:     980 irqs/sec  kernel:97.3% [1000Hz cycles],  (all, cpu: 0)
-------------------------------------------------------------------------------

             samples  pcnt function                        DSO
             _______ _____ _______________________________ ____________________

             6544.00 61.6% sky2_poll                       [sky2]              
             1098.00 10.3% sky2_intr                       [sky2]              
              248.00  2.3% __alloc_skb                     [kernel]            
              198.00  1.9% get_rps_cpu                     [kernel]            
              182.00  1.7% _raw_spin_lock_irqsave          [kernel]            
              144.00  1.4% __kmalloc                       [kernel]            
              138.00  1.3% _raw_spin_lock                  [kernel]            
              127.00  1.2% kmem_cache_alloc                [kernel]            
              125.00  1.2% irq_entries_start               [kernel]            
              119.00  1.1% enqueue_to_backlog              [kernel]            
               93.00  0.9% sky2_rx_submit                  [sky2]              
               91.00  0.9% swiotlb_sync_single             [kernel]            
               83.00  0.8% default_send_IPI_mask_sequence_ [kernel]            
               82.00  0.8% system_call                     [kernel]            
               64.00  0.6% sky2_remove                     [sky2]              
               60.00  0.6% fget                            [kernel]            
               58.00  0.5% cache_alloc_refill              [kernel]            
               57.00  0.5% _raw_spin_unlock_irqrestore     [kernel]            
               51.00  0.5% datagram_poll                   [kernel]            
               47.00  0.4% copy_user_generic_string        [kernel]            


-------------------------------------------------------------------------------
   PerfTop:     315 irqs/sec  kernel:81.0% [1000Hz cycles],  (all, cpu: 2)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

              114.00  4.5% system_call                   [kernel]              
               98.00  3.9% _raw_spin_lock_irqsave        [kernel]              
               89.00  3.5% _raw_spin_unlock_irqrestore   [kernel]              
               89.00  3.5% ip_rcv                        [kernel]              
               83.00  3.3% call_function_single_interrup [kernel]              
               76.00  3.0% __netif_receive_skb           [kernel]              
               67.00  2.6% fget                          [kernel]              
               62.00  2.4% ip_route_input                [kernel]              
               59.00  2.3% vread_tsc                     [kernel].vsyscall_fn  
               54.00  2.1% kmem_cache_free               [kernel]              
               54.00  2.1% sys_epoll_ctl                 [kernel]              
               51.00  2.0% schedule                      [kernel]              
               49.00  1.9% _raw_spin_lock                [kernel]              
               49.00  1.9% __udp4_lib_lookup             [kernel]              
               44.00  1.7% ep_remove                     [kernel]              
               44.00  1.7% copy_user_generic_string      [kernel]              
               41.00  1.6% fput                          [kernel]              
               38.00  1.5% sys_epoll_wait                [kernel]              
               37.00  1.5% tick_nohz_stop_sched_tick     [kernel]              
               36.00  1.4% kmem_cache_alloc              [kernel]              
               34.00  1.3% datagram_poll                 [kernel]              
               33.00  1.3% __udp4_lib_rcv                [kernel]              
               31.00  1.2% process_recv                  mcpudp               

-------------------------------------------------------------------------------
   PerfTop:     292 irqs/sec  kernel:82.9% [1000Hz cycles],  (all, cpu: 2)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

              154.00  4.7% _raw_spin_lock_irqsave        [kernel]              
              140.00  4.2% system_call                   [kernel]              
              111.00  3.4% ip_rcv                        [kernel]              
              106.00  3.2% _raw_spin_unlock_irqrestore   [kernel]              
               96.00  2.9% call_function_single_interrup [kernel]              
               95.00  2.9% fget                          [kernel]              
               90.00  2.7% __netif_receive_skb           [kernel]              
               89.00  2.7% sys_epoll_ctl                 [kernel]              
               77.00  2.3% copy_user_generic_string      [kernel]              
               77.00  2.3% ip_route_input                [kernel]              
               76.00  2.3% kmem_cache_free               [kernel]              
               74.00  2.2% _raw_spin_lock                [kernel]              
               71.00  2.1% schedule                      [kernel]              
               69.00  2.1% vread_tsc                     [kernel].vsyscall_fn  
               58.00  1.8% __udp4_lib_lookup             [kernel]              
               52.00  1.6% __udp4_lib_rcv                [kernel]              
               51.00  1.5% fput                          [kernel]              
               47.00  1.4% ep_remove                     [kernel]              
               47.00  1.4% event_base_loop               libevent-1.3e.so.1.0.3
               39.00  1.2% process_recv                  mcpudp                
               39.00  1.2% sys_epoll_wait                [kernel]              
               38.00  1.2% udp_recvmsg                   [kernel]              
               38.00  1.2% sock_recv_ts_and_drops        [kernel]              
               37.00  1.1% __switch_to                   [kernel]              

-------------------------------------------------------------------------------
   PerfTop:     290 irqs/sec  kernel:82.1% [1000Hz cycles],  (all, cpu: 2)
-------------------------------------------------------------------------------

             samples  pcnt function                      DSO
             _______ _____ _____________________________ ______________________

              175.00  4.7% _raw_spin_lock_irqsave        [kernel]              
              153.00  4.2% system_call                   [kernel]              
              122.00  3.3% ip_rcv                        [kernel]              
              114.00  3.1% _raw_spin_unlock_irqrestore   [kernel]              
              114.00  3.1% fget                          [kernel]              
              105.00  2.8% __netif_receive_skb           [kernel]              
              101.00  2.7% sys_epoll_ctl                 [kernel]              
              100.00  2.7% call_function_single_interrup [kernel]              
               90.00  2.4% copy_user_generic_string      [kernel]              
               84.00  2.3% schedule                      [kernel]              
               76.00  2.1% kmem_cache_free               [kernel]              
               76.00  2.1% _raw_spin_lock                [kernel]              
               72.00  2.0% ip_route_input                [kernel]              
               70.00  1.9% vread_tsc                     [kernel].vsyscall_fn  
               68.00  1.8% __udp4_lib_lookup             [kernel]              
               68.00  1.8% __udp4_lib_rcv                [kernel]              
               57.00  1.5% ep_remove                     [kernel]              
               57.00  1.5% fput                          [kernel]              
               55.00  1.5% kmem_cache_alloc              [kernel]              
               51.00  1.4% process_recv                  mcpudp           




^ permalink raw reply

* [PATCH 1/1] net/usb: initiate sync sequence in sierra_net.c driver
From: Elina Pasheva @ 2010-04-28 23:28 UTC (permalink / raw)
  To: dbrownell-Rn4VEauK+AKRv+LV9MX5uipxlwaOVQ5f,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: rfiler-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8,
	epasheva-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA

Subject: [PATCH 1/1] net/usb: initiate sync sequence in sierra_net.c driver
From: Elina Pasheva <epasheva-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8@public.gmane.org>

The following patch adds the initiation of the sync sequence to
"sierra_net_bind()". If this step is omitted, the modem will never sync up
with the host and it will not be possible to establish a data connection.
This is a high priority patch.

This patch has been checked against net-2.6 tree.
Signed-off-by: Elina Pasheva <epasheva-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8@public.gmane.org>
Signed-off-by: Rory Filer <rfiler-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8@public.gmane.org>
Tested-by: Elina Pasheva <epasheva-ywE8TTl5eJHWpu6QEFMNjNBPR1lH4CV8@public.gmane.org>
---
 drivers/net/usb/sierra_net.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index a44f9e0..f1942d6 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -789,6 +789,9 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
 	/* prepare sync message from template */
 	memcpy(priv->sync_msg, sync_tmplate, sizeof(priv->sync_msg));
 
+	/* initiate the sync sequence */
+	sierra_net_dosync(dev);
+
 	return 0;
 }
 
-- 
1.5.4.3



--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [net-next-2.6 PATCH 2/2] add ndo_set_port_profile op support for enic dynamic vnics
From: Scott Feldman @ 2010-04-28 22:38 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: davem, netdev, chrisw
In-Reply-To: <201004282116.14601.arnd@arndb.de>

On 4/28/10 12:16 PM, "Arnd Bergmann" <arnd@arndb.de> wrote:

> On Wednesday 28 April 2010, Scott Feldman wrote:

>>> Passing just the slave device however would not work in the general case, as
>>> I tried to point out in the mail you replied to. If the slave interface is
>>> owned by a guest using PCI passthrough, or it sits below a stack of nested
>>> interfaces (vlan, bridge, tap, vhost, ...), it's impossible to know what
>>> interface is responsible for setting up the slave.
>> 
>> For port-profile, we want to pass the device that is to be "plugged-in" to
>> the network based on port-profile association.  This is the device that
>> gives basic connectivity to the guest interface, regardless of how the guest
>> interface is wired to the device.  It could be direct PCI pass-thru, macvtap
>> stack, some yet-to-be-invented kernel-bypass stack, etc.
> 
> But if the device is already passed to the guest using pass-thru or
> containers, you would no longer to query or change the port profile,
> because it is no longer visible in the host, right?

Drats, I made a mistake here.  You're right, in the pass-thru case the host
lost control of the device, so we need another device to proxy the
port-profile for the pass-thru device.  I had this in the second patch
submission where I was trying to extend the SR-IOV if_link cmds to included
port-profile, but that got mired down in the VF discussions.
  
>>> Note that you cannot perform the association
>>> through the slave interface itself because the remote switch would discard
>>> any traffic originating from an unassociated interface.
>> 
>> That's not a limitation of our device/switch.
> 
> This seems to contradict what you write above, at least when you drop the
> assumption that the protocol is implemented in the NIC firmware.
> The switch obviously does not care about the interface name in Linux or
> any of its data structures. What it cares about instead is the traffic on
> the wire and which of its ports this takes place on.
> 
> When you create a new dynamic enic device or a macvtap port but not assoicate
> it, the switch cannot allow this device to send or receive any traffic itself,
> as you write above (not 'plugged in'). The application (or firmware, for that
> matter) therefore needs to talk to the switch over an interface that is
> already
> associated. With VDP, this is the base device that a VEPA port is created
> from,
> i.e. the one that talks LLDP to the switch, i.e. the one that comes up at boot
> time when you have no virtualization and plug into a dumb switch.
> 
> I assumed that this was a specific PF in your NIC,  but it now sounds like it
> could be an internal device that is only visible in your firmware and not
> exposed
> as a network interface in Linux, right?

Yes, that's right.  Without going into implementation details, assume any
enic has firmware with private mgmt channel to switch to do the equivalent
of your base device->LLDP->switch.

> Your firmware can obviously find out the right communication channel for
> a associating a dynamic interface with the switch, but when this is implemnted
> in software, we cannot generally know that and rely on getting access to the
> interface that lets us talk to the switch. The information which interface
> is getting associated however is completely useless to an implementation like
> this.

So we're kind of back to where we were with iovnl.  We need to specify both
devices, the base device that has access to the switch and the target device
to associate the port-profile with.  Something like:

   ip port_profile set DEVICE [ base DEVICE ] [ { pre_associate |
                                                  pre_associate_rr } ]
                              { name PORT-PROFILE | vsi MGR:VTID:VER }
                              mac LLADDR
                              [ vlan VID ]
                              [ host_uuid HOST_UUID ]
                              [ client_uuid CLIENT_UUID ]
                              [ client_name CLIENT_NAME ]
   ip port_profile del DEVICE [ base DEVICE ] [ mac LLADDR [ vlan VID ] ]
   ip port_profile show DEVICE [ base DEVICE ]

The netdev ops are (when netlink msg handled in kernel):

    ndo_set_port_profile(netdev *target, ...)
    ndo_get_port_profile(netdev *target, ...)
    ndo_del_port_profile(netdev *target, ...)

Base device is optional.  If base device is not given, then target device
gets netdev ops.  If base device is given, then base device gets netdev ops
and *target refers to target device.  This covers the following cases:

1. Current enic where base == target since target can communicate directly
with switch to associate port-profile.  This will not work for the enic
pass-thru case as noted earlier.  We get:

    ip port_profile set eth0 name joes-garage ...

And

    eth0:ndo_set_port_profile(NULL, ...)

2. Future enic for pass-thru case where base != target.  We get:

    ip port_profile set eth1 base eth0 name joes-garage ...

And

    eth0:ndi_set_port_profile(eth1, ...)

3. Future VEPA, we get:

    ip port_profile set eth11 base eth10 vsi 1:23456:7

And (here netlink msg handled in user-space):

    VDP msg sent on eth10 to set port-profile on eth11 using vis tuple
    

Does this work?  I want to get agreement before coding up patch attempt #4.

-scott



^ permalink raw reply

* Re: [PATCH net-next-2.6] net: ip_queue_rcv_skb() helper
From: David Miller @ 2010-04-28 22:39 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1272493364.2201.67.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 29 Apr 2010 00:22:44 +0200

> Next patch will try to not touch dst refcount in input path (previously
> attempted in July 2009)
> Ref : http://kerneltrap.org/mailarchive/linux-netdev/2009/7/22/6248753

Yes, I remember this.

> [PATCH net-next-2.6] net: ip_queue_rcv_skb() helper
> 
> When queueing a skb to socket, we can immediately release its dst if
> target socket do not use IP_CMSG_PKTINFO.
> 
> tcp_data_queue() can drop dst too.
> 
> This to benefit from a hot cache line and avoid the receiver, possibly
> on another cpu, to dirty this cache line himself.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Pretty soon the whole receive path will be "read mostly" :-)

Applied, thanks Eric.

^ permalink raw reply

* [PATCH net-next-2.6] net: ip_queue_rcv_skb() helper
From: Eric Dumazet @ 2010-04-28 22:22 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20100428.143610.232922250.davem@davemloft.net>

Le mercredi 28 avril 2010 à 14:36 -0700, David Miller a écrit :

> 
> Clever, let's see what this breaks :-)
> 
> Applied, thanks Eric.

Thanks ;)

Let's respin an old work about dst, with a first small work unit :

Next patch will try to not touch dst refcount in input path (previously
attempted in July 2009)
Ref : http://kerneltrap.org/mailarchive/linux-netdev/2009/7/22/6248753


[PATCH net-next-2.6] net: ip_queue_rcv_skb() helper

When queueing a skb to socket, we can immediately release its dst if
target socket do not use IP_CMSG_PKTINFO.

tcp_data_queue() can drop dst too.

This to benefit from a hot cache line and avoid the receiver, possibly
on another cpu, to dirty this cache line himself.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/ip.h       |    1 +
 net/ipv4/ip_sockglue.c |   16 ++++++++++++++++
 net/ipv4/raw.c         |    2 +-
 net/ipv4/tcp_input.c   |    1 +
 net/ipv4/udp.c         |    2 +-
 net/ipv6/raw.c         |    2 +-
 net/ipv6/udp.c         |    2 +-
 7 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index a84ceb6..8149b77 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -393,6 +393,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
  *	Functions provided by ip_sockglue.c
  */
 
+extern int	ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 extern void	ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
 extern int	ip_cmsg_send(struct net *net,
 			     struct msghdr *msg, struct ipcm_cookie *ipc);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b0aa054..ce23178 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -954,6 +954,22 @@ e_inval:
 	return -EINVAL;
 }
 
+/**
+ * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * @sk: socket
+ * @skb: buffer
+ *
+ * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
+ * is not set, we drop skb dst entry now, while dst cache line is hot.
+ */
+int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
+		skb_dst_drop(skb);
+	return sock_queue_rcv_skb(sk, skb);
+}
+EXPORT_SYMBOL(ip_queue_rcv_skb);
+
 int ip_setsockopt(struct sock *sk, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cc6f097..52ef5af 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	/* Charge it to the socket. */
 
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
+	if (ip_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ae3ec15..e82162c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4367,6 +4367,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
 
+	skb_dst_drop(skb);
 	__skb_pull(skb, th->doff * 4);
 
 	TCP_ECN_accept_cwr(tp, skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 63eb56b..8591398 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1264,7 +1264,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (inet_sk(sk)->inet_daddr)
 		sock_rps_save_rxhash(sk, skb->rxhash);
 
-	rc = sock_queue_rcv_skb(sk, skb);
+	rc = ip_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8562738..0e3d2dd 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -381,7 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	}
 
 	/* Charge it to the socket. */
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
+	if (ip_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3ead20a..aa0e47a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -514,7 +514,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
+	if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM)
 			UDP6_INC_STATS_BH(sock_net(sk),



^ permalink raw reply related

* Re: [PATCH]: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4)
From: Neil Horman @ 2010-04-28 21:50 UTC (permalink / raw)
  To: David Miller; +Cc: vladislav.yasevich, sri, linux-sctp, eteo, netdev, security
In-Reply-To: <20100428.142339.45883821.davem@davemloft.net>

On Wed, Apr 28, 2010 at 02:23:39PM -0700, David Miller wrote:
> From: Vlad Yasevich <vladislav.yasevich@hp.com>
> Date: Wed, 28 Apr 2010 16:37:04 -0400
> 
> > 
> > Looks good.
> > 
> > Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
> 
> Applied, thanks Neil and Vlad.
> 
Thanks!
Neil


^ permalink raw reply

* Re: [PATCH] RCU: don't turn off lockdep when find suspicious rcu_dereference_check() usage
From: Paul E. McKenney @ 2010-04-28 21:37 UTC (permalink / raw)
  To: Miles Lane
  Cc: Vivek Goyal, Eric Paris, Lai Jiangshan, Ingo Molnar,
	Peter Zijlstra, LKML, nauman, eric.dumazet, netdev, Jens Axboe,
	Gui Jianfeng, Li Zefan, Johannes Berg
In-Reply-To: <p2ka44ae5cd1004281358n86ce29d2tbece16b2fb974dab@mail.gmail.com>

On Wed, Apr 28, 2010 at 04:58:54PM -0400, Miles Lane wrote:
> On Sat, Apr 24, 2010 at 10:34 PM, Paul E. McKenney <
> paulmck@linux.vnet.ibm.com> wrote:
> On Fri, Apr 23, 2010 at 06:59:12PM -0400, Miles Lane wrote:

[ . . . ]

> > commit 0868dd631def762ba00c2f0f397a53c5cdf24ae2
> > Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > Date:   Sat Apr 24 19:23:30 2010 -0700
> >
> >    block-cgroup: fix RCU-lockdep splat in blkiocg_add_blkio_group()
> >
> >    It is necessary to be in an RCU read-side critical section when invoking
> >    css_id(), so this patch adds one to blkiocg_add_blkio_group().  This is
> >    actually a false positive, because this is called at initialization
> > time,
> >    and hence always refers to the root cgroup, which cannot go away.
> >
> >    Located-by: Miles Lane <miles.lane@gmail.com>
> >    Suggested-by: Vivek Goyal <vgoyal@redhat.com>
> >    Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >
> > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> > index 5fe03de..55c8c73 100644
> > --- a/block/blk-cgroup.c
> > +++ b/block/blk-cgroup.c
> > @@ -71,7 +71,9 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> >
> >        spin_lock_irqsave(&blkcg->lock, flags);
> >        rcu_assign_pointer(blkg->key, key);
> > +       rcu_read_lock();
> >        blkg->blkcg_id = css_id(&blkcg->css);
> > +       rcu_read_unlock();
> >        hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
> >        spin_unlock_irqrestore(&blkcg->lock, flags);
> >  #ifdef CONFIG_DEBUG_BLK_CGROUP
> >
> 
> Hi Paul,
> Did this patch make it into your patch set?  Has your patch set gone into
> the Linus tree?
> I just tested 2.6.34-rc5-git8 and hit one of the issues again.  I think this
> patch is intended to correct this issue?

I replaced the above with an improved patch from Vivek Goyal, which has
not yet reached mainline.  I will resend my patch stack.

							Thanx, Paul

> [    2.289598] [ INFO: suspicious rcu_dereference_check() usage. ]
> [    2.289604] ---------------------------------------------------
> [    2.289610] kernel/cgroup.c:4438 invoked rcu_dereference_check() without
> protection!
> [    2.289615]
> [    2.289617] other info that might help us debug this:
> [    2.289619]
> [    2.289624]
> [    2.289626] rcu_scheduler_active = 1, debug_locks = 1
> [    2.289632] 2 locks held by async/1/575:
> [    2.289637]  #0:  (&shost->scan_mutex){+.+.+.}, at: [<c121063d>]
> __scsi_add_device+0x5b/0xab
> [    2.289660]  #1:  (&(&blkcg->lock)->rlock){......}, at: [<c1143acb>]
> blkiocg_add_blkio_group+0x1a/0x73
> [    2.289678]
> [    2.289680] stack backtrace:
> [    2.289688] Pid: 575, comm: async/1 Not tainted 2.6.34-rc5-git8 #17
> [    2.289693] Call Trace:
> [    2.289704]  [<c12ee273>] ? printk+0xf/0x14
> [    2.289715]  [<c1050fbd>] lockdep_rcu_dereference+0x74/0x7d
> [    2.289725]  [<c106227d>] css_id+0x37/0x46
> [    2.289734]  [<c1143adc>] blkiocg_add_blkio_group+0x2b/0x73
> [    2.289744]  [<c1146a19>] cfq_init_queue+0xd6/0x2a3
> [    2.289755]  [<c120d657>] ? scsi_request_fn+0x0/0x3ea
> [    2.289764]  [<c113a316>] elevator_init+0xa1/0xd5
> [    2.289774]  [<c113bc3f>] blk_init_queue_node+0x103/0x109
> [    2.289783]  [<c113bc50>] blk_init_queue+0xb/0xd
> [    2.289792]  [<c120da58>] __scsi_alloc_queue+0x17/0xef
> [    2.289802]  [<c120db40>] scsi_alloc_queue+0x10/0x49
> [    2.289811]  [<c120f381>] scsi_alloc_sdev+0x14f/0x1ef
> [    2.289821]  [<c120f617>] scsi_probe_and_add_lun+0xb5/0x7ed
> [    2.289831]  [<c10517dc>] ? trace_hardirqs_on_caller+0x119/0x141
> [    2.289843]  [<c1210669>] __scsi_add_device+0x87/0xab
> [    2.289854]  [<c1232dfe>] ata_scsi_scan_host+0x64/0x136
> [    2.289865]  [<c12312c3>] async_port_probe+0x9e/0xa4
> [    2.289876]  [<c10479c8>] async_thread+0xf0/0x1d4
> [    2.289887]  [<c102b474>] ? default_wake_function+0x0/0xd
> [    2.289896]  [<c10478d8>] ? async_thread+0x0/0x1d4
> [    2.289906]  [<c1041a2a>] kthread+0x6a/0x6f
> [    2.289916]  [<c10419c0>] ? kthread+0x0/0x6f
> [    2.289926]  [<c1003742>] kernel_thread_helper+0x6/0x1a
> [    2.289934]
> [    2.289935] ===================================================
> [    2.289941] [ INFO: suspicious rcu_dereference_check() usage. ]
> [    2.289946] ---------------------------------------------------
> [    2.289951] kernel/cgroup.c:1651 invoked rcu_dereference_check() without
> protection!
> [    2.289957]
> [    2.289958] other info that might help us debug this:
> [    2.289961]
> [    2.289966]
> [    2.289967] rcu_scheduler_active = 1, debug_locks = 1
> [    2.289973] 1 lock held by async/1/575:
> [    2.289978]  #0:  (&shost->scan_mutex){+.+.+.}, at: [<c121063d>]
> __scsi_add_device+0x5b/0xab
> [    2.289995]
> [    2.289996] stack backtrace:
> [    2.290003] Pid: 575, comm: async/1 Not tainted 2.6.34-rc5-git8 #17
> [    2.290008] Call Trace:
> [    2.290016]  [<c12ee273>] ? printk+0xf/0x14
> [    2.290025]  [<c1050fbd>] lockdep_rcu_dereference+0x74/0x7d
> [    2.290035]  [<c106423a>] cgroup_path+0x4a/0x110
> [    2.290045]  [<c1143b12>] blkiocg_add_blkio_group+0x61/0x73
> [    2.290055]  [<c1146a19>] cfq_init_queue+0xd6/0x2a3
> [    2.290065]  [<c120d657>] ? scsi_request_fn+0x0/0x3ea
> [    2.290074]  [<c113a316>] elevator_init+0xa1/0xd5
> [    2.290083]  [<c113bc3f>] blk_init_queue_node+0x103/0x109
> [    2.290093]  [<c113bc50>] blk_init_queue+0xb/0xd
> [    2.290102]  [<c120da58>] __scsi_alloc_queue+0x17/0xef
> [    2.290111]  [<c120db40>] scsi_alloc_queue+0x10/0x49
> [    2.290120]  [<c120f381>] scsi_alloc_sdev+0x14f/0x1ef
> [    2.290131]  [<c120f617>] scsi_probe_and_add_lun+0xb5/0x7ed
> [    2.290140]  [<c10517dc>] ? trace_hardirqs_on_caller+0x119/0x141
> [    2.290152]  [<c1210669>] __scsi_add_device+0x87/0xab
> [    2.290162]  [<c1232dfe>] ata_scsi_scan_host+0x64/0x136
> [    2.290172]  [<c12312c3>] async_port_probe+0x9e/0xa4
> [    2.290182]  [<c10479c8>] async_thread+0xf0/0x1d4
> [    2.290192]  [<c102b474>] ? default_wake_function+0x0/0xd
> [    2.290202]  [<c10478d8>] ? async_thread+0x0/0x1d4
> [    2.290211]  [<c1041a2a>] kthread+0x6a/0x6f
> [    2.290221]  [<c10419c0>] ? kthread+0x0/0x6f
> [    2.290230]  [<c1003742>] kernel_thread_helper+0x6/0x1a

^ permalink raw reply

* Re: [PATCH net-next-2.6] net: speedup udp receive path
From: David Miller @ 2010-04-28 21:36 UTC (permalink / raw)
  To: eric.dumazet; +Cc: hadi, xiaosuo, therbert, shemminger, netdev, eilong, bmb
In-Reply-To: <1272463605.2267.70.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 16:06:45 +0200

> [PATCH net-next-2.6] net: speedup udp receive path
> 
> Since commit 95766fff ([UDP]: Add memory accounting.), 
> each received packet needs one extra sock_lock()/sock_release() pair.
> 
> This added latency because of possible backlog handling. Then later,
> ticket spinlocks added yet another latency source in case of DDOS.
> 
> This patch introduces lock_sock_bh() and unlock_sock_bh()
> synchronization primitives, avoiding one atomic operation and backlog
> processing.
> 
> skb_free_datagram_locked() uses them instead of full blown
> lock_sock()/release_sock(). skb is orphaned inside locked section for
> proper socket memory reclaim, and finally freed outside of it.
> 
> UDP receive path now take the socket spinlock only once.
> 
> Signed-off-by: Eric DUmazet <eric.dumazet@gmail.com>

Clever, let's see what this breaks :-)

Applied, thanks Eric.

^ permalink raw reply

* Re: [PATCH v2] net/sb1250: register mdio bus in probe
From: David Miller @ 2010-04-28 21:32 UTC (permalink / raw)
  To: sebastian; +Cc: ralf, netdev
In-Reply-To: <20100428195701.GA3461@Chamillionaire.breakpoint.cc>

From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Wed, 28 Apr 2010 21:57:01 +0200

> "ifconfig eth0 up && ifconfig eth0 down" triggers:
 ...
> mdiobus_register() calls device_register() which initializes the kobj of
> the device. mdiobus_unregister() calls only device_del() so we have one
> reference left. That one is leaving with mdiobus_free() which is only
> called on remove.
> Since I don't see any reason why mdiobus_register()/mdiobus_unregister()
> should happen in ->open()/->close() I move them to probe & exit.
> 
> Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>

This looks a lot better, applied, thanks Sebastian!

^ permalink raw reply

* Re: [net-next-2.6 PATCH] igb: Clean up left over prototype of igb_get_hw_dev_name()
From: David Miller @ 2010-04-28 21:26 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, emil.s.tantilov
In-Reply-To: <20100428205949.30209.69599.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Wed, 28 Apr 2010 13:59:53 -0700

> From: Emil Tantilov <emil.s.tantilov@intel.com>
> 
> Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH] forcedeth: Stay in NAPI as long as there's work
From: David Miller @ 2010-04-28 21:25 UTC (permalink / raw)
  To: shemminger; +Cc: joe, therbert, netdev, aabdulla
In-Reply-To: <20100428112528.01277670@nehalam>

From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 28 Apr 2010 11:25:28 -0700

> The following does the same thing without the extra overhead
> of testing all the registers. It also handles the out of memory
> case.
> 
> Compile tested only...
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Tom can you test this version?

^ permalink raw reply

* Re: [PATCH]: sctp: Fix skb_over_panic resulting from multiple invalid parameter errors (CVE-2010-1173) (v4)
From: David Miller @ 2010-04-28 21:23 UTC (permalink / raw)
  To: vladislav.yasevich; +Cc: nhorman, sri, linux-sctp, eteo, netdev, security
In-Reply-To: <4BD89C70.6080406@hp.com>

From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 28 Apr 2010 16:37:04 -0400

> 
> Looks good.
> 
> Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>

Applied, thanks Neil and Vlad.

^ permalink raw reply

* Re: pull request: wireless-2.6 2010-04-15
From: David Miller @ 2010-04-28 21:23 UTC (permalink / raw)
  To: hauke-5/S+JYg5SzeELgA04lAiVw
  Cc: linville-2XuSBdqkA4R54TAoqtyWWQ,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <4BD8A6B4.20603-5/S+JYg5SzeELgA04lAiVw@public.gmane.org>

From: Hauke Mehrtens <hauke-5/S+JYg5SzeELgA04lAiVw@public.gmane.org>
Date: Wed, 28 Apr 2010 23:20:52 +0200

> Hi David,
> 
> in your merge in 5c01d5669356e13f0fb468944c1dd4c6a7e978ad you added "int
> i;" into wl1271_main.c which is unused in that function.
> 
> This patch fixes the merge problem:
> 
> Signed-off-by: Hauke Mehrtens <hauke-5/S+JYg5SzeELgA04lAiVw@public.gmane.org>

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: pull request: wireless-2.6 2010-04-15
From: Hauke Mehrtens @ 2010-04-28 21:20 UTC (permalink / raw)
  To: David Miller
  Cc: linville-2XuSBdqkA4R54TAoqtyWWQ,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100415.142907.68448693.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

Hi David,

in your merge in 5c01d5669356e13f0fb468944c1dd4c6a7e978ad you added "int
i;" into wl1271_main.c which is unused in that function.

This patch fixes the merge problem:

Signed-off-by: Hauke Mehrtens <hauke-5/S+JYg5SzeELgA04lAiVw@public.gmane.org>

--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -1311,7 +1311,6 @@
 	struct wl1271_filter_params *fp;
 	struct netdev_hw_addr *ha;
 	struct wl1271 *wl = hw->priv;
-	int i;

 	if (unlikely(wl->state == WL1271_STATE_OFF))
 		return 0;

David Miller wrote:
> From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
> Date: Thu, 15 Apr 2010 16:03:31 -0400
> 
>> Another fix intended for 2.6.34...without it some firmware wierdness can
>> induce the driver into hanging the box... :-(
>>
>> Please let me know if there are problems!
> 
> Pulled, thanks.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: vlan performance issue on outgoing traffic (solved)
From: R. Weinedel @ 2010-04-28 21:13 UTC (permalink / raw)
  To: Brandeburg, Jesse; +Cc: netdev@vger.kernel.org
In-Reply-To: <alpine.WNT.2.00.1004271110300.4368@jbrandeb-desk1.amr.corp.intel.com>

I made some further test with kernel 2.6.26 and I recognized that my cpu
is really very busy (90-100 % sw-irq's) during offloads.

After that I build and install a 2.6.33 kernel and the problem seems to
be solved. Now I got 946 Mbit/s offload (iperf) !.

Ralf Weinedel


^ permalink raw reply

* [net-next-2.6 PATCH] igb: Clean up left over prototype of igb_get_hw_dev_name()
From: Jeff Kirsher @ 2010-04-28 20:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, Emil Tantilov, Jeff Kirsher

From: Emil Tantilov <emil.s.tantilov@intel.com>

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb.h |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 096a526..735ede9 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -338,7 +338,6 @@ enum igb_boards {
 extern char igb_driver_name[];
 extern char igb_driver_version[];
 
-extern char *igb_get_hw_dev_name(struct e1000_hw *hw);
 extern int igb_up(struct igb_adapter *);
 extern void igb_down(struct igb_adapter *);
 extern void igb_reinit_locked(struct igb_adapter *);


^ permalink raw reply related

* [PATCHv7] add mergeable buffers support to vhost_net
From: David L Stevens @ 2010-04-28 20:57 UTC (permalink / raw)
  To: mst; +Cc: netdev, kvm, virtualization

This patch adds mergeable receive buffer support to vhost_net.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>

diff -ruNp net-next-v0/drivers/vhost/net.c net-next-v7/drivers/vhost/net.c
--- net-next-v0/drivers/vhost/net.c	2010-04-24 21:36:54.000000000 -0700
+++ net-next-v7/drivers/vhost/net.c	2010-04-28 12:26:18.000000000 -0700
@@ -74,6 +74,23 @@ static int move_iovec_hdr(struct iovec *
 	}
 	return seg;
 }
+/* Copy iovec entries for len bytes from iovec. Return segments used. */
+static int copy_iovec_hdr(const struct iovec *from, struct iovec *to,
+			  size_t len, int iovcount)
+{
+	int seg = 0;
+	size_t size;
+	while (len && seg < iovcount) {
+		size = min(from->iov_len, len);
+		to->iov_base = from->iov_base;
+		to->iov_len = size;
+		len -= size;
+		++from;
+		++to;
+		++seg;
+	}
+	return seg;
+}
 
 /* Caller must have TX VQ lock */
 static void tx_poll_stop(struct vhost_net *net)
@@ -109,7 +126,7 @@ static void handle_tx(struct vhost_net *
 	};
 	size_t len, total_len = 0;
 	int err, wmem;
-	size_t hdr_size;
+	size_t vhost_hlen;
 	struct socket *sock = rcu_dereference(vq->private_data);
 	if (!sock)
 		return;
@@ -128,13 +145,13 @@ static void handle_tx(struct vhost_net *
 
 	if (wmem < sock->sk->sk_sndbuf / 2)
 		tx_poll_stop(net);
-	hdr_size = vq->hdr_size;
+	vhost_hlen = vq->vhost_hlen;
 
 	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 NULL, NULL);
+		head = vhost_get_desc(&net->dev, vq, vq->iov,
+				      ARRAY_SIZE(vq->iov),
+				      &out, &in,
+				      NULL, NULL);
 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
 		if (head == vq->num) {
 			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
@@ -155,20 +172,20 @@ static void handle_tx(struct vhost_net *
 			break;
 		}
 		/* Skip header. TODO: support TSO. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+		s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, out);
 		msg.msg_iovlen = out;
 		len = iov_length(vq->iov, out);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for TX: "
 			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
+			       iov_length(vq->hdr, s), vhost_hlen);
 			break;
 		}
 		/* TODO: Check specific error and bomb out unless ENOBUFS? */
 		err = sock->ops->sendmsg(NULL, sock, &msg, len);
 		if (unlikely(err < 0)) {
-			vhost_discard_vq_desc(vq);
+			vhost_discard_desc(vq, 1);
 			tx_poll_start(net, sock);
 			break;
 		}
@@ -187,12 +204,25 @@ static void handle_tx(struct vhost_net *
 	unuse_mm(net->dev.mm);
 }
 
+static int vhost_head_len(struct vhost_virtqueue *vq, struct sock *sk)
+{
+	struct sk_buff *head;
+	int len = 0;
+
+	lock_sock(sk);
+	head = skb_peek(&sk->sk_receive_queue);
+	if (head)
+		len = head->len + vq->sock_hlen;
+	release_sock(sk);
+	return len;
+}
+
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
 static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned head, out, in, log, s;
+	unsigned in, log, s;
 	struct vhost_log *vq_log;
 	struct msghdr msg = {
 		.msg_name = NULL,
@@ -203,14 +233,14 @@ static void handle_rx(struct vhost_net *
 		.msg_flags = MSG_DONTWAIT,
 	};
 
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	struct virtio_net_hdr_mrg_rxbuf hdr = {
+		.hdr.flags = 0,
+		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
 
 	size_t len, total_len = 0;
-	int err;
-	size_t hdr_size;
+	int err, headcount, datalen;
+	size_t vhost_hlen;
 	struct socket *sock = rcu_dereference(vq->private_data);
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
@@ -218,18 +248,19 @@ static void handle_rx(struct vhost_net *
 	use_mm(net->dev.mm);
 	mutex_lock(&vq->mutex);
 	vhost_disable_notify(vq);
-	hdr_size = vq->hdr_size;
+	vhost_hlen = vq->vhost_hlen;
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
 
-	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 vq_log, &log);
+	while ((datalen = vhost_head_len(vq, sock->sk))) {
+		headcount = vhost_get_desc_n(vq, vq->heads,
+					     datalen + vhost_hlen,
+					     &in, vq_log, &log);
+		if (headcount < 0)
+			break;
 		/* OK, now we need to know about added descriptors. */
-		if (head == vq->num) {
+		if (!headcount) {
 			if (unlikely(vhost_enable_notify(vq))) {
 				/* They have slipped one in as we were
 				 * doing that: check again. */
@@ -241,46 +272,53 @@ static void handle_rx(struct vhost_net *
 			break;
 		}
 		/* We don't need to be notified again. */
-		if (out) {
-			vq_err(vq, "Unexpected descriptor format for RX: "
-			       "out %d, int %d\n",
-			       out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
+		if (vhost_hlen)
+			/* Skip header. TODO: support TSO. */
+			s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
+		else
+			s = copy_iovec_hdr(vq->iov, vq->hdr, vq->sock_hlen, in);
 		msg.msg_iovlen = in;
 		len = iov_length(vq->iov, in);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for RX: "
 			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
+			       iov_length(vq->hdr, s), vhost_hlen);
 			break;
 		}
 		err = sock->ops->recvmsg(NULL, sock, &msg,
 					 len, MSG_DONTWAIT | MSG_TRUNC);
 		/* TODO: Check specific error and bomb out unless EAGAIN? */
 		if (err < 0) {
-			vhost_discard_vq_desc(vq);
+			vhost_discard_desc(vq, headcount);
 			break;
 		}
-		/* TODO: Should check and handle checksum. */
-		if (err > len) {
-			pr_err("Discarded truncated rx packet: "
-			       " len %d > %zd\n", err, len);
-			vhost_discard_vq_desc(vq);
+		if (err != datalen) {
+			pr_err("Discarded rx packet: "
+			       " len %d, expected %zd\n", err, datalen);
+			vhost_discard_desc(vq, headcount);
 			continue;
 		}
 		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
-		if (err) {
-			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
-			       vq->iov->iov_base, err);
+		if (vhost_hlen &&
+		    memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0,
+				      vhost_hlen)) {
+			vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
+			       vq->iov->iov_base);
 			break;
 		}
-		len += hdr_size;
-		vhost_add_used_and_signal(&net->dev, vq, head, len);
+		/* TODO: Should check and handle checksum. */
+		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
+		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
+				      offsetof(typeof(hdr), num_buffers),
+				      sizeof(hdr.num_buffers))) {
+			vq_err(vq, "Failed num_buffers write");
+			vhost_discard_desc(vq, headcount);
+			break;
+		}
+		len += vhost_hlen;
+		vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
+					    headcount);
 		if (unlikely(vq_log))
 			vhost_log_write(vq, vq_log, log, len);
 		total_len += len;
@@ -561,9 +599,21 @@ done:
 
 static int vhost_net_set_features(struct vhost_net *n, u64 features)
 {
-	size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
-		sizeof(struct virtio_net_hdr) : 0;
+	size_t vhost_hlen, sock_hlen, hdr_len;
 	int i;
+
+	hdr_len = (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ?
+			sizeof(struct virtio_net_hdr_mrg_rxbuf) :
+			sizeof(struct virtio_net_hdr);
+	if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
+		/* vhost provides vnet_hdr */
+		vhost_hlen = hdr_len;
+		sock_hlen = 0;
+	} else {
+		/* socket provides vnet_hdr */
+		vhost_hlen = 0;
+		sock_hlen = hdr_len;
+	}
 	mutex_lock(&n->dev.mutex);
 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
 	    !vhost_log_access_ok(&n->dev)) {
@@ -574,7 +624,8 @@ static int vhost_net_set_features(struct
 	smp_wmb();
 	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
 		mutex_lock(&n->vqs[i].mutex);
-		n->vqs[i].hdr_size = hdr_size;
+		n->vqs[i].vhost_hlen = vhost_hlen;
+		n->vqs[i].sock_hlen = sock_hlen;
 		mutex_unlock(&n->vqs[i].mutex);
 	}
 	vhost_net_flush(n);
diff -ruNp net-next-v0/drivers/vhost/vhost.c net-next-v7/drivers/vhost/vhost.c
--- net-next-v0/drivers/vhost/vhost.c	2010-04-22 11:31:57.000000000 -0700
+++ net-next-v7/drivers/vhost/vhost.c	2010-04-28 11:16:13.000000000 -0700
@@ -114,7 +114,8 @@ static void vhost_vq_reset(struct vhost_
 	vq->used_flags = 0;
 	vq->log_used = false;
 	vq->log_addr = -1ull;
-	vq->hdr_size = 0;
+	vq->vhost_hlen = 0;
+	vq->sock_hlen = 0;
 	vq->private_data = NULL;
 	vq->log_base = NULL;
 	vq->error_ctx = NULL;
@@ -861,6 +862,53 @@ static unsigned get_indirect(struct vhos
 	return 0;
 }
 
+/* This is a multi-buffer version of vhost_get_desc
+ * @vq		- the relevant virtqueue
+ * datalen	- data length we'll be reading
+ * @iovcount	- returned count of io vectors we fill
+ * @log		- vhost log
+ * @log_num	- log offset
+ *	returns number of buffer heads allocated, negative on error
+ */
+int vhost_get_desc_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+		     int datalen, unsigned *iovcount, struct vhost_log *log,
+		     unsigned int *log_num)
+{
+	unsigned int out, in;
+	int seg = 0;
+	int headcount = 0;
+	int r;
+
+	while (datalen > 0) {
+		if (headcount >= VHOST_NET_MAX_SG) {
+			r = -ENOBUFS;
+			goto err;
+		}
+		heads[headcount].id = vhost_get_desc(vq->dev, vq, vq->iov + seg,
+					      ARRAY_SIZE(vq->iov) - seg, &out,
+					      &in, log, log_num);
+		if (heads[headcount].id == vq->num) {
+			r = 0;
+			goto err;
+		}
+		if (out || in <= 0) {
+			vq_err(vq, "unexpected descriptor format for RX: "
+				"out %d, in %d\n", out, in);
+			r = -EINVAL;
+			goto err;
+		}
+		heads[headcount].len = iov_length(vq->iov + seg, in);
+		datalen -= heads[headcount].len;
+		++headcount;
+		seg += in;
+	}
+	*iovcount = seg;
+	return headcount;
+err:
+	vhost_discard_desc(vq, headcount);
+	return r;
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
@@ -868,7 +916,7 @@ static unsigned get_indirect(struct vhos
  *
  * This function returns the descriptor number found, or vq->num (which
  * is never a valid descriptor number) if none was found. */
-unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+unsigned vhost_get_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			   struct iovec iov[], unsigned int iov_size,
 			   unsigned int *out_num, unsigned int *in_num,
 			   struct vhost_log *log, unsigned int *log_num)
@@ -986,9 +1034,9 @@ unsigned vhost_get_vq_desc(struct vhost_
 }
 
 /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+void vhost_discard_desc(struct vhost_virtqueue *vq, int n)
 {
-	vq->last_avail_idx--;
+	vq->last_avail_idx -= n;
 }
 
 /* After we've used one of their buffers, we tell them about it.  We'll then
@@ -1033,6 +1081,68 @@ int vhost_add_used(struct vhost_virtqueu
 	return 0;
 }
 
+static void vhost_log_used(struct vhost_virtqueue *vq,
+			   struct vring_used_elem __user *used)
+{
+	/* Make sure data is seen before log. */
+	smp_wmb();
+	/* Log used ring entry write. */
+	log_write(vq->log_base,
+		  vq->log_addr +
+		   ((void __user *)used - (void __user *)vq->used),
+		  sizeof *used);
+	/* Log used index update. */
+	log_write(vq->log_base,
+		  vq->log_addr + offsetof(struct vring_used, idx),
+		  sizeof vq->used->idx);
+	if (vq->log_ctx)
+		eventfd_signal(vq->log_ctx, 1);
+}
+
+static int __vhost_add_used_n(struct vhost_virtqueue *vq,
+			    struct vring_used_elem *heads,
+			    unsigned count)
+{
+	struct vring_used_elem __user *used;
+	int start;
+
+	start = vq->last_used_idx % vq->num;
+	used = vq->used->ring + start;
+	if (copy_to_user(used, heads, count * sizeof *used)) {
+		vq_err(vq, "Failed to write used");
+		return -EFAULT;
+	}
+	/* Make sure buffer is written before we update index. */
+	smp_wmb();
+	if (put_user(vq->last_used_idx + count, &vq->used->idx)) {
+		vq_err(vq, "Failed to increment used idx");
+		return -EFAULT;
+	}
+	if (unlikely(vq->log_used))
+		vhost_log_used(vq, used);
+	vq->last_used_idx += count;
+	return 0;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+		     unsigned count)
+{
+	int start, n, r;
+
+	start = vq->last_used_idx % vq->num;
+	n = vq->num - start;
+	if (n < count) {
+		r = __vhost_add_used_n(vq, heads, n);
+		if (r < 0)
+			return r;
+		heads += n;
+		count -= n;
+	}
+	return __vhost_add_used_n(vq, heads, count);
+}
+
 /* This actually signals the guest, using eventfd. */
 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
@@ -1062,6 +1172,15 @@ void vhost_add_used_and_signal(struct vh
 	vhost_signal(dev, vq);
 }
 
+/* multi-buffer version of vhost_add_used_and_signal */
+void vhost_add_used_and_signal_n(struct vhost_dev *dev,
+				 struct vhost_virtqueue *vq,
+				 struct vring_used_elem *heads, unsigned count)
+{
+	vhost_add_used_n(vq, heads, count);
+	vhost_signal(dev, vq);
+}
+
 /* OK, now we need to know about added descriptors. */
 bool vhost_enable_notify(struct vhost_virtqueue *vq)
 {
@@ -1086,7 +1205,7 @@ bool vhost_enable_notify(struct vhost_vi
 		return false;
 	}
 
-	return avail_idx != vq->last_avail_idx;
+	return avail_idx != vq->avail_idx;
 }
 
 /* We don't need to be notified again. */
diff -ruNp net-next-v0/drivers/vhost/vhost.h net-next-v7/drivers/vhost/vhost.h
--- net-next-v0/drivers/vhost/vhost.h	2010-04-24 21:37:41.000000000 -0700
+++ net-next-v7/drivers/vhost/vhost.h	2010-04-26 10:35:25.000000000 -0700
@@ -84,7 +84,9 @@ struct vhost_virtqueue {
 	struct iovec indirect[VHOST_NET_MAX_SG];
 	struct iovec iov[VHOST_NET_MAX_SG];
 	struct iovec hdr[VHOST_NET_MAX_SG];
-	size_t hdr_size;
+	size_t vhost_hlen;
+	size_t sock_hlen;
+	struct vring_used_elem heads[VHOST_NET_MAX_SG];
 	/* We use a kind of RCU to access private pointer.
 	 * All readers access it from workqueue, which makes it possible to
 	 * flush the workqueue instead of synchronize_rcu. Therefore readers do
@@ -120,16 +122,23 @@ long vhost_dev_ioctl(struct vhost_dev *,
 int vhost_vq_access_ok(struct vhost_virtqueue *vq);
 int vhost_log_access_ok(struct vhost_dev *);
 
-unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+int vhost_get_desc_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
+		     int datalen, unsigned int *iovcount, struct vhost_log *log,
+		     unsigned int *log_num);
+unsigned vhost_get_desc(struct vhost_dev *, struct vhost_virtqueue *,
 			   struct iovec iov[], unsigned int iov_count,
 			   unsigned int *out_num, unsigned int *in_num,
 			   struct vhost_log *log, unsigned int *log_num);
-void vhost_discard_vq_desc(struct vhost_virtqueue *);
+void vhost_discard_desc(struct vhost_virtqueue *, int);
 
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
-void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
+		     unsigned count);
 void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
-			       unsigned int head, int len);
+			       unsigned int id, int len);
+void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
+			       struct vring_used_elem *heads, unsigned count);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
 void vhost_disable_notify(struct vhost_virtqueue *);
 bool vhost_enable_notify(struct vhost_virtqueue *);
 
@@ -149,7 +158,8 @@ enum {
 	VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
 			 (1 << VIRTIO_RING_F_INDIRECT_DESC) |
 			 (1 << VHOST_F_LOG_ALL) |
-			 (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+			 (1 << VHOST_NET_F_VIRTIO_NET_HDR) |
+			 (1 << VIRTIO_NET_F_MRG_RXBUF),
 };
 
 static inline int vhost_has_feature(struct vhost_dev *dev, int bit)



^ permalink raw reply

* Re: 2.6.34-rc5-git7 (plus all patches) -- another suspicious rcu_dereference_check() usage.
From: Paul E. McKenney @ 2010-04-28 20:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Miles Lane, Vivek Goyal, Eric Paris, Lai Jiangshan, Ingo Molnar,
	Peter Zijlstra, LKML, nauman, netdev, Jens Axboe, Gui Jianfeng,
	Li Zefan, Johannes Berg, shemminger
In-Reply-To: <1272485923.2201.22.camel@edumazet-laptop>

On Wed, Apr 28, 2010 at 10:18:43PM +0200, Eric Dumazet wrote:
> Le mercredi 28 avril 2010 à 13:09 -0700, Paul E. McKenney a écrit :
> > On Wed, Apr 28, 2010 at 09:38:11PM +0200, Eric Dumazet wrote:
> > > Le mercredi 28 avril 2010 à 10:54 -0700, Paul E. McKenney a écrit :
> > > > On Mon, Apr 26, 2010 at 08:51:06PM -0400, Miles Lane wrote:
> > > > > This one occurred during the wakeup from suspend to RAM.
> > > > > 
> > > > > [  984.724697] [ INFO: suspicious rcu_dereference_check() usage. ]
> > > > > [  984.724700] ---------------------------------------------------
> > > > > [  984.724703] include/linux/fdtable.h:88 invoked
> > > > > rcu_dereference_check() without protection!
> > > > > [  984.724706]
> > > > > [  984.724707] other info that might help us debug this:
> > > > > [  984.724708]
> > > > > [  984.724711]
> > > > > [  984.724711] rcu_scheduler_active = 1, debug_locks = 1
> > > > > [  984.724714] no locks held by dbus-daemon/4680.
> > > > > [  984.724717]
> > > > > [  984.724717] stack backtrace:
> > > > > [  984.724721] Pid: 4680, comm: dbus-daemon Not tainted 2.6.34-rc5-git7 #33
> > > > > [  984.724724] Call Trace:
> > > > > [  984.724734]  [<ffffffff81074556>] lockdep_rcu_dereference+0x9d/0xa6
> > > > > [  984.724740]  [<ffffffff810fc785>] fcheck_files+0xb1/0xc9
> > > > > [  984.724745]  [<ffffffff810fc7f5>] fget_light+0x35/0xab
> > > > > [  984.724751]  [<ffffffff81433e1b>] ? sock_poll_wait+0x13/0x18
> > > > > [  984.724755]  [<ffffffff81433e39>] ? unix_poll+0x19/0x95
> > > > > [  984.724762]  [<ffffffff8110aa95>] do_sys_poll+0x1ff/0x3e5
> > > > > [  984.724766]  [<ffffffff8110a19e>] ? __pollwait+0x0/0xc7
> > > > > [  984.724771]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724776]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724780]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724784]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724788]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724793]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724797]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724802]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724806]  [<ffffffff8110a265>] ? pollwake+0x0/0x4f
> > > > > [  984.724812]  [<ffffffff8110ae0f>] sys_poll+0x50/0xbb
> > > > > [  984.724818]  [<ffffffff81009d82>] system_call_fastpath+0x16/0x1b
> > > > 
> > > > Hmmm...  I am not convinced that this is a false positive.  Couldn't
> > > > there be a multi-threaded process where one thread is invoking poll()
> > > > on a UNIX socket just as another thread is calling close() on it?
> > > > 
> > > > The current fcheck_files() logic requires that the caller either (1) be in
> > > > an RCU read-side critical section, (2) hold ->files_lock, or (3) passing
> > > > in a files_struct with ->count equal to 1 (initialization or cleanup).
> > > > 
> > > > So I don't feel comfortable just slapping an RCU read-side critical
> > > > section around this one, at least not unless someone who understands
> > > > the locking says that doing so is OK.
> > > > 
> > > > 		
> > > 
> > > Its a single threaded program.
> > > 
> > > So fget_light() calls fcheck_files(files, fd); without rcu lock,
> > > but some /proc/pid/fd/... user temporarly raised files->count just
> > > before we perform the condition check.
> > 
> > So I should add a single-threaded check.  My first thought was to use
> > current_is_single_threaded(), but the bit about scanning the full list
> > of processes does give me pause.  However, thread_group_empty() looks
> > like a much lighter-weight alternative.
> > 
> > I believe that it is possible for a pair of single-threaded processes
> > to share a file descriptor, but that should not be a problem, as both
> > of them would need to close it for it to go away.
> > 
> > But what happens if someone does a clone() with CLONE_FILES, as some
> > of the AIO stuff seems to do?  Won't that allow one of the resulting
> > processes to close the file for both of them, even though both are
> > otherwise single-threaded?  And the ->count seems to be the only
> > distinction between these two cases.
> > 
> > And AIO does CLONE_VM as well as CLONE_FILES, but that seems to mean that
> > the check must scan the processes with current_is_single_threaded().
> > Besides which, a user could invoke clone() with only CLONE_FILES
> > specified, right?
> > 
> > Or am I just confused here?
> > 
> > 							Thanx, Paul
> 
> If a program is mono threaded, and doing a fget_light() syscall, it
> cannot possibly do a clone() in // ;)

The sequence of events that I am worried about is as follows:

1.	Single-threaded process does clone(CLONE_FILES).  The
	result is a pair of single-threaded processes that share
	file descriptors.

2.	One of these processes does files_fdtable(i) at the same
	time as the other process closes file descriptor i.

So, clone and -then- do fget_light().

> If we want to be picky, we could add a user provided condition, aka "we
> are sure we are allowed to do this because we are the owner of the files
> struct".

But yes, if I understand your trick below, the race conditions from
the above sequence of events would simply force the processes off
of the fget_light() path, which should be OK.

						Thanx, Paul

> diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
> index 6da962c..027f5e1 100644
> --- a/drivers/char/tty_io.c
> +++ b/drivers/char/tty_io.c
> @@ -2694,7 +2694,7 @@ void __do_SAK(struct tty_struct *tty)
>  			spin_lock(&p->files->file_lock);
>  			fdt = files_fdtable(p->files);
>  			for (i = 0; i < fdt->max_fds; i++) {
> -				filp = fcheck_files(p->files, i);
> +				filp = fcheck_files(p->files, i, false);
>  				if (!filp)
>  					continue;
>  				if (filp->f_op->read == tty_read &&
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index 452d02f..dabf4d8 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -119,7 +119,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
>  		int retval = oldfd;
> 
>  		rcu_read_lock();
> -		if (!fcheck_files(files, oldfd))
> +		if (!fcheck_files(files, oldfd, false))
>  			retval = -EBADF;
>  		rcu_read_unlock();
>  		return retval;
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 32d12b7..2865f72 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -274,7 +274,7 @@ struct file *fget(unsigned int fd)
>  	struct files_struct *files = current->files;
> 
>  	rcu_read_lock();
> -	file = fcheck_files(files, fd);
> +	file = fcheck_files(files, fd, false);
>  	if (file) {
>  		if (!atomic_long_inc_not_zero(&file->f_count)) {
>  			/* File object ref couldn't be taken */
> @@ -303,10 +303,10 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
> 
>  	*fput_needed = 0;
>  	if (likely((atomic_read(&files->count) == 1))) {
> -		file = fcheck_files(files, fd);
> +		file = fcheck_files(files, fd, true);
>  	} else {
>  		rcu_read_lock();
> -		file = fcheck_files(files, fd);
> +		file = fcheck_files(files, fd, false);
>  		if (file) {
>  			if (atomic_long_inc_not_zero(&file->f_count))
>  				*fput_needed = 1;
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 8418fcc..0e89448 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1716,7 +1716,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
>  		 * hold ->file_lock.
>  		 */
>  		spin_lock(&files->file_lock);
> -		file = fcheck_files(files, fd);
> +		file = fcheck_files(files, fd, false);
>  		if (file) {
>  			if (path) {
>  				*path = file->f_path;
> @@ -1755,7 +1755,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
>  		files = get_files_struct(task);
>  		if (files) {
>  			rcu_read_lock();
> -			if (fcheck_files(files, fd)) {
> +			if (fcheck_files(files, fd, false)) {
>  				rcu_read_unlock();
>  				put_files_struct(files);
>  				if (task_dumpable(task)) {
> @@ -1813,7 +1813,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
>  	 * hold ->file_lock.
>  	 */
>  	spin_lock(&files->file_lock);
> -	file = fcheck_files(files, fd);
> +	file = fcheck_files(files, fd, false);
>  	if (!file)
>  		goto out_unlock;
>  	if (file->f_mode & FMODE_READ)
> @@ -1899,7 +1899,7 @@ static int proc_readfd_common(struct file * filp, void * dirent,
>  				char name[PROC_NUMBUF];
>  				int len;
> 
> -				if (!fcheck_files(files, fd))
> +				if (!fcheck_files(files, fd, false))
>  					continue;
>  				rcu_read_unlock();
> 
> diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
> index 013dc52..76423ad 100644
> --- a/include/linux/fdtable.h
> +++ b/include/linux/fdtable.h
> @@ -57,11 +57,12 @@ struct files_struct {
>  	struct file * fd_array[NR_OPEN_DEFAULT];
>  };
> 
> -#define rcu_dereference_check_fdtable(files, fdtfd) \
> +#define rcu_dereference_check_fdtable(files, fdtfd, cond) \
>  	(rcu_dereference_check((fdtfd), \
>  			       rcu_read_lock_held() || \
>  			       lockdep_is_held(&(files)->file_lock) || \
> -			       atomic_read(&(files)->count) == 1))
> +			       atomic_read(&(files)->count) == 1 || \
> +			       cond))
> 
>  #define files_fdtable(files) \
>  		(rcu_dereference_check_fdtable((files), (files)->fdt))
> @@ -79,13 +80,13 @@ static inline void free_fdtable(struct fdtable *fdt)
>  	call_rcu(&fdt->rcu, free_fdtable_rcu);
>  }
> 
> -static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
> +static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd, bool cond)
>  {
>  	struct file * file = NULL;
>  	struct fdtable *fdt = files_fdtable(files);
> 
>  	if (fd < fdt->max_fds)
> -		file = rcu_dereference_check_fdtable(files, fdt->fd[fd]);
> +		file = rcu_dereference_check_fdtable(files, fdt->fd[fd], cond);
>  	return file;
>  }
> 
> 
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox