Netdev List
 help / color / mirror / Atom feed
* [PATCH net-2.6 7/9] sfc: Stop masking out XGMII faults over reconfigures
From: Ben Hutchings @ 2010-04-23 22:27 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-net-drivers
In-Reply-To: <1272061459.5520.15.camel@achroite.uk.solarflarecom.com>

From: Steve Hodgson <shodgson@solarflare.com>

The aim of this code was to avoid a spurious XGMII fault over a MAC
reconfigure. It's less relevant now that the PHY reconfigure isn't
called from the MAC reconfigure.

After applying this patch, our link stress test passed 48 hours of
testing without ever resetting the PHY.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/sfc/falcon_xmac.c |   20 +++++---------------
 1 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
index 8ccab2c..3d65abf 100644
--- a/drivers/net/sfc/falcon_xmac.c
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -85,14 +85,14 @@ int falcon_reset_xaui(struct efx_nic *efx)
 	return -ETIMEDOUT;
 }
 
-static void falcon_mask_status_intr(struct efx_nic *efx, bool enable)
+static void falcon_ack_status_intr(struct efx_nic *efx)
 {
 	efx_oword_t reg;
 
 	if ((efx_nic_rev(efx) != EFX_REV_FALCON_B0) || LOOPBACK_INTERNAL(efx))
 		return;
 
-	/* We expect xgmii faults if the wireside link is up */
+	/* We expect xgmii faults if the wireside link is down */
 	if (!EFX_WORKAROUND_5147(efx) || !efx->link_state.up)
 		return;
 
@@ -101,14 +101,7 @@ static void falcon_mask_status_intr(struct efx_nic *efx, bool enable)
 	if (efx->xmac_poll_required)
 		return;
 
-	/* Flush the ISR */
-	if (enable)
-		efx_reado(efx, &reg, FR_AB_XM_MGT_INT_MSK);
-
-	EFX_POPULATE_OWORD_2(reg,
-			     FRF_AB_XM_MSK_RMTFLT, !enable,
-			     FRF_AB_XM_MSK_LCLFLT, !enable);
-	efx_writeo(efx, &reg, FR_AB_XM_MGT_INT_MASK);
+	efx_reado(efx, &reg, FR_AB_XM_MGT_INT_MSK);
 }
 
 static bool falcon_xgxs_link_ok(struct efx_nic *efx)
@@ -283,15 +276,13 @@ static bool falcon_xmac_check_fault(struct efx_nic *efx)
 
 static int falcon_reconfigure_xmac(struct efx_nic *efx)
 {
-	falcon_mask_status_intr(efx, false);
-
 	falcon_reconfigure_xgxs_core(efx);
 	falcon_reconfigure_xmac_core(efx);
 
 	falcon_reconfigure_mac_wrapper(efx);
 
 	efx->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 5);
-	falcon_mask_status_intr(efx, true);
+	falcon_ack_status_intr(efx);
 
 	return 0;
 }
@@ -362,9 +353,8 @@ void falcon_poll_xmac(struct efx_nic *efx)
 	    !efx->xmac_poll_required)
 		return;
 
-	falcon_mask_status_intr(efx, false);
 	efx->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 1);
-	falcon_mask_status_intr(efx, true);
+	falcon_ack_status_intr(efx);
 }
 
 struct efx_mac_operations falcon_xmac_operations = {
-- 
1.6.2.5


-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply related

* [PATCH net-2.6 8/9] sfc: Reconfigure the XAUI serdes after an EM reset
From: Ben Hutchings @ 2010-04-23 22:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-net-drivers
In-Reply-To: <1272061459.5520.15.camel@achroite.uk.solarflarecom.com>

From: Steve Hodgson <shodgson@solarflare.com>

Fix a regression introduced in d3245b28ef2a45ec4e115062a38100bd06229289
"sfc: Refactor link configuration".

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/sfc/falcon.c      |    3 +++
 drivers/net/sfc/falcon_xmac.c |    2 +-
 drivers/net/sfc/nic.h         |    1 +
 3 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c
index e783b0a..655b697 100644
--- a/drivers/net/sfc/falcon.c
+++ b/drivers/net/sfc/falcon.c
@@ -507,6 +507,9 @@ static void falcon_reset_macs(struct efx_nic *efx)
 	/* Ensure the correct MAC is selected before statistics
 	 * are re-enabled by the caller */
 	efx_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+
+	/* This can run even when the GMAC is selected */
+	falcon_setup_xaui(efx);
 }
 
 void falcon_drain_tx_fifo(struct efx_nic *efx)
diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
index 3d65abf..c84a2ce 100644
--- a/drivers/net/sfc/falcon_xmac.c
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -26,7 +26,7 @@
  *************************************************************************/
 
 /* Configure the XAUI driver that is an output from Falcon */
-static void falcon_setup_xaui(struct efx_nic *efx)
+void falcon_setup_xaui(struct efx_nic *efx)
 {
 	efx_oword_t sdctl, txdrv;
 
diff --git a/drivers/net/sfc/nic.h b/drivers/net/sfc/nic.h
index 3166baf..bcf1ac4 100644
--- a/drivers/net/sfc/nic.h
+++ b/drivers/net/sfc/nic.h
@@ -203,6 +203,7 @@ extern void falcon_irq_ack_a1(struct efx_nic *efx);
 extern int efx_nic_flush_queues(struct efx_nic *efx);
 extern void falcon_start_nic_stats(struct efx_nic *efx);
 extern void falcon_stop_nic_stats(struct efx_nic *efx);
+extern void falcon_setup_xaui(struct efx_nic *efx);
 extern int falcon_reset_xaui(struct efx_nic *efx);
 extern void efx_nic_init_common(struct efx_nic *efx);
 
-- 
1.6.2.5


-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply related

* [PATCH net-2.6 9/9] sfc: Extend the legacy interrupt workarounds
From: Ben Hutchings @ 2010-04-23 22:28 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-net-drivers
In-Reply-To: <1272061459.5520.15.camel@achroite.uk.solarflarecom.com>

From: Steve Hodgson <shodgson@solarflare.com>

Siena has two problems with legacy interrupts:
  1. There is no synchronisation between the ISR read completion,
     and the interrupt deassert message.
  2. A downstream read at the "wrong" moment can return 0, and
     suppress generating the next interrupt.

Falcon should suffer from both of these, and it appears it does.
Enable EFX_WORKAROUND_15783 on Falcon as well.

Also, when we see queues == 0, ensure we always schedule or rearm
every event queue.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/sfc/nic.c         |   23 +++++++++--------------
 drivers/net/sfc/workarounds.h |    2 +-
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c
index 23738f8..b61674c 100644
--- a/drivers/net/sfc/nic.c
+++ b/drivers/net/sfc/nic.c
@@ -1356,33 +1356,28 @@ static irqreturn_t efx_legacy_interrupt(int irq, void *dev_id)
 		}
 		result = IRQ_HANDLED;
 
-	} else if (EFX_WORKAROUND_15783(efx) &&
-		   efx->irq_zero_count++ == 0) {
+	} else if (EFX_WORKAROUND_15783(efx)) {
 		efx_qword_t *event;
 
-		/* Ensure we rearm all event queues */
+		/* We can't return IRQ_HANDLED more than once on seeing ISR=0
+		 * because this might be a shared interrupt. */
+		if (efx->irq_zero_count++ == 0)
+			result = IRQ_HANDLED;
+
+		/* Ensure we schedule or rearm all event queues */
 		efx_for_each_channel(channel, efx) {
 			event = efx_event(channel, channel->eventq_read_ptr);
 			if (efx_event_present(event))
 				efx_schedule_channel(channel);
+			else
+				efx_nic_eventq_read_ack(channel);
 		}
-
-		result = IRQ_HANDLED;
 	}
 
 	if (result == IRQ_HANDLED) {
 		efx->last_irq_cpu = raw_smp_processor_id();
 		EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n",
 			  irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg));
-	} else if (EFX_WORKAROUND_15783(efx)) {
-		/* We can't return IRQ_HANDLED more than once on seeing ISR0=0
-		 * because this might be a shared interrupt, but we do need to
-		 * check the channel every time and preemptively rearm it if
-		 * it's idle. */
-		efx_for_each_channel(channel, efx) {
-			if (!channel->work_pending)
-				efx_nic_eventq_read_ack(channel);
-		}
 	}
 
 	return result;
diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h
index acd9c73..518f7fc 100644
--- a/drivers/net/sfc/workarounds.h
+++ b/drivers/net/sfc/workarounds.h
@@ -37,7 +37,7 @@
 /* Truncated IPv4 packets can confuse the TX packet parser */
 #define EFX_WORKAROUND_15592 EFX_WORKAROUND_FALCON_AB
 /* Legacy ISR read can return zero once */
-#define EFX_WORKAROUND_15783 EFX_WORKAROUND_SIENA
+#define EFX_WORKAROUND_15783 EFX_WORKAROUND_ALWAYS
 /* Legacy interrupt storm when interrupt fifo fills */
 #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
 
-- 
1.6.2.5

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply related

* Re: [PATCH net-2.6 1/9] sfc: Wait at most 10ms for the MC to finish reading out MAC statistics
From: Ben Hutchings @ 2010-04-23 22:33 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, sf-linux-drivers
In-Reply-To: <1272061439.5520.14.camel@achroite.uk.solarflarecom.com>

All of these 9 patches should also be applicable to 2.6.33.y, except
that one hunk of "sfc: Consistently report short MCDI responses as EIO"
is not applicable and should be dropped.

Some of the bug fixes are applicable to 2.6.32.y and maybe to 2.6.27.y,
but the patches will need some adjustment.  I intend to send backported
patches to stable@kernel.org separately.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH net-2.6 1/9] sfc: Wait at most 10ms for the MC to finish reading out MAC statistics
From: David Miller @ 2010-04-23 22:36 UTC (permalink / raw)
  To: bhutchings; +Cc: netdev, linux-net-drivers
In-Reply-To: <1272062009.5520.27.camel@achroite.uk.solarflarecom.com>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 23 Apr 2010 23:33:28 +0100

> All of these 9 patches should also be applicable to 2.6.33.y, except
> that one hunk of "sfc: Consistently report short MCDI responses as EIO"
> is not applicable and should be dropped.
> 
> Some of the bug fixes are applicable to 2.6.32.y and maybe to 2.6.27.y,
> but the patches will need some adjustment.  I intend to send backported
> patches to stable@kernel.org separately.

There is zero way I'm applying 9 patches this late in the RC
series.

If you want this stuff to go into net-2.6 and get backported
to -stable, pick a very small (2 or 3) set of the most important
fixes.

Consistent -EIO error code returns and junk like that are
not appropriate this late in the RC, and definitely not -stable
material.

^ permalink raw reply

* Re: [PATCH net-2.6 1/9] sfc: Wait at most 10ms for the MC to finish reading out MAC statistics
From: Ben Hutchings @ 2010-04-23 22:54 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-net-drivers
In-Reply-To: <20100423.153633.45897185.davem@davemloft.net>

On Fri, 2010-04-23 at 15:36 -0700, David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Fri, 23 Apr 2010 23:33:28 +0100
> 
> > All of these 9 patches should also be applicable to 2.6.33.y, except
> > that one hunk of "sfc: Consistently report short MCDI responses as EIO"
> > is not applicable and should be dropped.
> > 
> > Some of the bug fixes are applicable to 2.6.32.y and maybe to 2.6.27.y,
> > but the patches will need some adjustment.  I intend to send backported
> > patches to stable@kernel.org separately.
> 
> There is zero way I'm applying 9 patches this late in the RC
> series.
> 
> If you want this stuff to go into net-2.6 and get backported
> to -stable, pick a very small (2 or 3) set of the most important
> fixes.

This makes no sense.  You want to put a quota on bug fixes?  I could
arbitrarily pick some but I'm still going to want to get the other fixes
into distributions.

> Consistent -EIO error code returns and junk like that are
> not appropriate this late in the RC, and definitely not -stable
> material.

The important part of that change is that functions were returning 0 in
a failure case.  I should have made that the first sentence in the
commit message.  I didn't see the point in making a separate commit to
fix the wrong error codes, but I can split this up if you prefer.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH net-2.6 1/9] sfc: Wait at most 10ms for the MC to finish reading out MAC statistics
From: David Miller @ 2010-04-23 22:58 UTC (permalink / raw)
  To: bhutchings; +Cc: netdev, linux-net-drivers
In-Reply-To: <1272063270.5520.39.camel@achroite.uk.solarflarecom.com>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 23 Apr 2010 23:54:30 +0100

> This makes no sense.  You want to put a quota on bug fixes?  I could
> arbitrarily pick some but I'm still going to want to get the other fixes
> into distributions.

It's not a quota.  It's a request that only the most catastropic
bugs get fixed this late in the RC.

You don't have 9 catastropic bugs to fix in your driver.

^ permalink raw reply

* Re: [PATCH] RCU: don't turn off lockdep when find suspicious rcu_dereference_check() usage
From: Miles Lane @ 2010-04-23 22:59 UTC (permalink / raw)
  To: paulmck
  Cc: Vivek Goyal, Eric Paris, Lai Jiangshan, Ingo Molnar,
	Peter Zijlstra, LKML, nauman, eric.dumazet, netdev, Jens Axboe,
	Gui Jianfeng, Li Zefan
In-Reply-To: <20100423194255.GE2589@linux.vnet.ibm.com>

On Fri, Apr 23, 2010 at 3:42 PM, Paul E. McKenney
<paulmck@linux.vnet.ibm.com> wrote:
> On Fri, Apr 23, 2010 at 08:50:59AM -0400, Miles Lane wrote:
>> Hi Paul,
>> There has been a bit of back and forth, and I am not sure what patches
>> I should test now.
>> Could you send me a bundle of whatever needs testing now?
>
> Hello, Miles,
>
> I am posting my set as replies to this message.  There are a couple
> of KVM fixes that are going up via Avi's tree, and a number of networking
> fixes that are going up via Dave Miller's tree -- a number of these
> are against quickly changing code, so it didn't make sense for me to
> keep them separately.
>
> I believe that the two splats below are addressed by this patch set
> carried in the networking tree:
>
>        https://patchwork.kernel.org/patch/90754/

With your twelve patches and the one linked to above applied to
2.6.34-rc5-git3, here are the warnings I see:

[    0.173969] [ INFO: suspicious rcu_dereference_check() usage. ]
[    0.174097] ---------------------------------------------------
[    0.174226] include/linux/cgroup.h:534 invoked
rcu_dereference_check() without protection!
[    0.174429]
[    0.174430] other info that might help us debug this:
[    0.174431]
[    0.174792]
[    0.174793] rcu_scheduler_active = 1, debug_locks = 1
[    0.175037] no locks held by watchdog/0/5.
[    0.175162]
[    0.175163] stack backtrace:
[    0.175405] Pid: 5, comm: watchdog/0 Not tainted 2.6.34-rc5-git3 #22
[    0.175534] Call Trace:
[    0.175666]  [<ffffffff81067fbe>] lockdep_rcu_dereference+0x9d/0xa5
[    0.175799]  [<ffffffff8102d678>] task_subsys_state+0x59/0x70
[    0.175931]  [<ffffffff810328fa>] __sched_setscheduler+0x19d/0x300
[    0.176064]  [<ffffffff8102b477>] ? need_resched+0x1e/0x28
[    0.176196]  [<ffffffff813cd401>] ? schedule+0x5c3/0x66e
[    0.176327]  [<ffffffff81091943>] ? watchdog+0x0/0x8c
[    0.176457]  [<ffffffff81032a78>] sched_setscheduler+0xe/0x10
[    0.176587]  [<ffffffff8109196d>] watchdog+0x2a/0x8c
[    0.176677]  [<ffffffff81091943>] ? watchdog+0x0/0x8c
[    0.176808]  [<ffffffff81057152>] kthread+0x89/0x91
[    0.176939]  [<ffffffff8106891e>] ? trace_hardirqs_on_caller+0x114/0x13f
[    0.177073]  [<ffffffff81003994>] kernel_thread_helper+0x4/0x10
[    0.177204]  [<ffffffff813cfc40>] ? restore_args+0x0/0x30
[    0.177334]  [<ffffffff810570c9>] ? kthread+0x0/0x91
[    0.177463]  [<ffffffff81003990>] ? kernel_thread_helper+0x0/0x10

[    3.173419] [ INFO: suspicious rcu_dereference_check() usage. ]
[    3.173419] ---------------------------------------------------
[    3.173419] kernel/cgroup.c:4438 invoked rcu_dereference_check()
without protection!
[    3.173419]
[    3.173419] other info that might help us debug this:
[    3.173419]
[    3.173419]
[    3.173419] rcu_scheduler_active = 1, debug_locks = 1
[    3.173419] 2 locks held by async/0/668:
[    3.173419]  #0:  (&shost->scan_mutex){+.+.+.}, at:
[<ffffffff812df020>] __scsi_add_device+0x83/0xe4
[    3.173419]  #1:  (&(&blkcg->lock)->rlock){......}, at:
[<ffffffff811f2df9>] blkiocg_add_blkio_group+0x29/0x7f
[    3.173419]
[    3.173419] stack backtrace:
[    3.173419] Pid: 668, comm: async/0 Not tainted 2.6.34-rc5-git3 #22
[    3.173419] Call Trace:
[    3.173419]  [<ffffffff81067fbe>] lockdep_rcu_dereference+0x9d/0xa5
[    3.173419]  [<ffffffff8107f9ad>] css_id+0x3f/0x51
[    3.173419]  [<ffffffff811f2e08>] blkiocg_add_blkio_group+0x38/0x7f
[    3.173419]  [<ffffffff811f4dd0>] cfq_init_queue+0xdf/0x2dc
[    3.173419]  [<ffffffff811e33b1>] elevator_init+0xba/0xf5
[    3.173419]  [<ffffffff812dbfaa>] ? scsi_request_fn+0x0/0x451
[    3.173419]  [<ffffffff811e68d7>] blk_init_queue_node+0x12f/0x135
[    3.173419]  [<ffffffff811e68e9>] blk_init_queue+0xc/0xe
[    3.173419]  [<ffffffff812dc41c>] __scsi_alloc_queue+0x21/0x111
[    3.173419]  [<ffffffff812dc524>] scsi_alloc_queue+0x18/0x64
[    3.173419]  [<ffffffff812de520>] scsi_alloc_sdev+0x19e/0x256
[    3.173419]  [<ffffffff812de6be>] scsi_probe_and_add_lun+0xe6/0x9c5
[    3.173419]  [<ffffffff8106891e>] ? trace_hardirqs_on_caller+0x114/0x13f
[    3.173419]  [<ffffffff813ce056>] ? __mutex_lock_common+0x3e4/0x43a
[    3.173419]  [<ffffffff812df020>] ? __scsi_add_device+0x83/0xe4
[    3.173419]  [<ffffffff812d09dc>] ? transport_setup_classdev+0x0/0x17
[    3.173419]  [<ffffffff812df020>] ? __scsi_add_device+0x83/0xe4
[    3.173419]  [<ffffffff812df055>] __scsi_add_device+0xb8/0xe4
[    3.173419]  [<ffffffff812ea945>] ata_scsi_scan_host+0x74/0x16e
[    3.173419]  [<ffffffff81057699>] ? autoremove_wake_function+0x0/0x34
[    3.173419]  [<ffffffff812e8de4>] async_port_probe+0xab/0xb7
[    3.173419]  [<ffffffff8105e1b1>] ? async_thread+0x0/0x1f4
[    3.173419]  [<ffffffff8105e2b6>] async_thread+0x105/0x1f4
[    3.173419]  [<ffffffff81033d8e>] ? default_wake_function+0x0/0xf
[    3.173419]  [<ffffffff8105e1b1>] ? async_thread+0x0/0x1f4
[    3.173419]  [<ffffffff81057152>] kthread+0x89/0x91
[    3.173419]  [<ffffffff8106891e>] ? trace_hardirqs_on_caller+0x114/0x13f
[    3.173419]  [<ffffffff81003994>] kernel_thread_helper+0x4/0x10
[    3.173419]  [<ffffffff813cfc40>] ? restore_args+0x0/0x30
[    3.173419]  [<ffffffff810570c9>] ? kthread+0x0/0x91
[    3.173419]  [<ffffffff81003990>] ? kernel_thread_helper+0x0/0x10

[   32.905446] [ INFO: suspicious rcu_dereference_check() usage. ]
[   32.905449] ---------------------------------------------------
[   32.905453] net/core/dev.c:1993 invoked rcu_dereference_check()
without protection!
[   32.905456]
[   32.905457] other info that might help us debug this:
[   32.905458]
[   32.905461]
[   32.905462] rcu_scheduler_active = 1, debug_locks = 1
[   32.905466] 2 locks held by canberra-gtk-pl/4182:
[   32.905469]  #0:  (sk_lock-AF_INET){+.+.+.}, at:
[<ffffffff81394f7d>] inet_stream_connect+0x3a/0x24d
[   32.905483]  #1:  (rcu_read_lock_bh){.+....}, at:
[<ffffffff8134a789>] dev_queue_xmit+0x14e/0x4b8
[   32.905495]
[   32.905496] stack backtrace:
[   32.905500] Pid: 4182, comm: canberra-gtk-pl Not tainted 2.6.34-rc5-git3 #22
[   32.905504] Call Trace:
[   32.905512]  [<ffffffff81067fbe>] lockdep_rcu_dereference+0x9d/0xa5
[   32.905518]  [<ffffffff8134a894>] dev_queue_xmit+0x259/0x4b8
[   32.905524]  [<ffffffff8134a789>] ? dev_queue_xmit+0x14e/0x4b8
[   32.905531]  [<ffffffff81041c66>] ? _local_bh_enable_ip+0xcd/0xda
[   32.905538]  [<ffffffff813536da>] neigh_resolve_output+0x234/0x285
[   32.905544]  [<ffffffff8136f69f>] ip_finish_output2+0x257/0x28c
[   32.905549]  [<ffffffff8136f73c>] ip_finish_output+0x68/0x6a
[   32.905554]  [<ffffffff81370433>] T.866+0x52/0x59
[   32.905559]  [<ffffffff8137067e>] ip_output+0xaa/0xb4
[   32.905565]  [<ffffffff8136eb38>] ip_local_out+0x20/0x24
[   32.905571]  [<ffffffff8136f184>] ip_queue_xmit+0x309/0x368
[   32.905578]  [<ffffffff810e4226>] ? __kmalloc_track_caller+0x111/0x155
[   32.905585]  [<ffffffff8138316f>] ? tcp_connect+0x223/0x3d3
[   32.905591]  [<ffffffff813818f1>] tcp_transmit_skb+0x707/0x745
[   32.905597]  [<ffffffff813832c2>] tcp_connect+0x376/0x3d3
[   32.905604]  [<ffffffff81268a43>] ? secure_tcp_sequence_number+0x55/0x6f
[   32.905610]  [<ffffffff81387270>] tcp_v4_connect+0x3df/0x455
[   32.905617]  [<ffffffff8133cb59>] ? lock_sock_nested+0xf3/0x102
[   32.905623]  [<ffffffff81394fe7>] inet_stream_connect+0xa4/0x24d
[   32.905629]  [<ffffffff8133b398>] sys_connect+0x90/0xd0
[   32.905636]  [<ffffffff81002b9c>] ? sysret_check+0x27/0x62
[   32.905642]  [<ffffffff8106891e>] ? trace_hardirqs_on_caller+0x114/0x13f
[   32.905649]  [<ffffffff813cec80>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[   32.905655]  [<ffffffff81002b6b>] system_call_fastpath+0x16/0x1b

[   51.912282] [ INFO: suspicious rcu_dereference_check() usage. ]
[   51.912285] ---------------------------------------------------
[   51.912289] net/mac80211/sta_info.c:886 invoked
rcu_dereference_check() without protection!
[   51.912293]
[   51.912293] other info that might help us debug this:
[   51.912295]
[   51.912298]
[   51.912298] rcu_scheduler_active = 1, debug_locks = 1
[   51.912302] no locks held by wpa_supplicant/3951.
[   51.912305]
[   51.912306] stack backtrace:
[   51.912310] Pid: 3951, comm: wpa_supplicant Not tainted 2.6.34-rc5-git3 #22
[   51.912314] Call Trace:
[   51.912317]  <IRQ>  [<ffffffff81067fbe>] lockdep_rcu_dereference+0x9d/0xa5
[   51.912345]  [<ffffffffa014f9ae>]
ieee80211_find_sta_by_hw+0x46/0x10f [mac80211]
[   51.912358]  [<ffffffffa014fa8e>] ieee80211_find_sta+0x17/0x19 [mac80211]
[   51.912373]  [<ffffffffa01e50f2>] iwl_tx_queue_reclaim+0xdb/0x1b1 [iwlcore]
[   51.912380]  [<ffffffff8106842b>] ? mark_lock+0x2d/0x235
[   51.912391]  [<ffffffffa0252f1c>] iwl5000_rx_reply_tx+0x4a9/0x556 [iwlagn]
[   51.912399]  [<ffffffff8120a353>] ? is_swiotlb_buffer+0x2e/0x3b
[   51.912407]  [<ffffffffa024bbf4>] iwl_rx_handle+0x163/0x2b5 [iwlagn]
[   51.912414]  [<ffffffff81068904>] ? trace_hardirqs_on_caller+0xfa/0x13f
[   51.912422]  [<ffffffffa024c3ac>] iwl_irq_tasklet+0x2bb/0x3c0 [iwlagn]
[   51.912429]  [<ffffffff810411f3>] tasklet_action+0xa7/0x10f
[   51.912435]  [<ffffffff81042205>] __do_softirq+0x144/0x252
[   51.912442]  [<ffffffff81003a8c>] call_softirq+0x1c/0x34
[   51.912447]  [<ffffffff810050e4>] do_softirq+0x38/0x80
[   51.912452]  [<ffffffff81041cd2>] irq_exit+0x45/0x94
[   51.912457]  [<ffffffff81004829>] do_IRQ+0xad/0xc4
[   51.912463]  [<ffffffff810cbbd3>] ? might_fault+0x63/0xb3
[   51.912470]  [<ffffffff813cfb93>] ret_from_intr+0x0/0xf
[   51.912474]  <EOI>  [<ffffffff810cbbd3>] ? might_fault+0x63/0xb3
[   51.912484]  [<ffffffff8106a75d>] ? lock_release+0x208/0x215
[   51.912490]  [<ffffffff810cbc1c>] might_fault+0xac/0xb3
[   51.912495]  [<ffffffff810cbbd3>] ? might_fault+0x63/0xb3
[   51.912501]  [<ffffffff812025e3>] __clear_user+0x15/0x59
[   51.912508]  [<ffffffff8100b2bc>] save_i387_xstate+0x9c/0x1bc
[   51.912515]  [<ffffffff81002276>] do_signal+0x240/0x686
[   51.912521]  [<ffffffff81002b9c>] ? sysret_check+0x27/0x62
[   51.912527]  [<ffffffff8106891e>] ? trace_hardirqs_on_caller+0x114/0x13f
[   51.912533]  [<ffffffff813cec80>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[   51.912539]  [<ffffffff810026e3>] do_notify_resume+0x27/0x5f
[   51.912545]  [<ffffffff813cec80>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[   51.912551]  [<ffffffff81002e86>] int_signal+0x12/0x17

[   51.929529] [ INFO: suspicious rcu_dereference_check() usage. ]
[   51.929532] ---------------------------------------------------
[   51.929536] net/mac80211/sta_info.c:886 invoked
rcu_dereference_check() without protection!
[   51.929540]
[   51.929541] other info that might help us debug this:
[   51.929542]
[   51.929545]
[   51.929546] rcu_scheduler_active = 1, debug_locks = 1
[   51.929550] 1 lock held by Xorg/4013:
[   51.929553]  #0:  (clock-AF_UNIX){++.+..}, at: [<ffffffff8133cebd>]
sock_def_readable+0x19/0x62
[   51.929567]
[   51.929568] stack backtrace:
[   51.929573] Pid: 4013, comm: Xorg Not tainted 2.6.34-rc5-git3 #22
[   51.929576] Call Trace:
[   51.929579]  <IRQ>  [<ffffffff81067fbe>] lockdep_rcu_dereference+0x9d/0xa5
[   51.929603]  [<ffffffffa014f9fe>]
ieee80211_find_sta_by_hw+0x96/0x10f [mac80211]
[   51.929615]  [<ffffffffa014fa8e>] ieee80211_find_sta+0x17/0x19 [mac80211]
[   51.929631]  [<ffffffffa01e50f2>] iwl_tx_queue_reclaim+0xdb/0x1b1 [iwlcore]
[   51.929642]  [<ffffffffa0252f1c>] iwl5000_rx_reply_tx+0x4a9/0x556 [iwlagn]
[   51.929649]  [<ffffffff81068685>] ? mark_held_locks+0x52/0x70
[   51.929656]  [<ffffffff813cf46c>] ? _raw_spin_unlock_irqrestore+0x3a/0x69
[   51.929662]  [<ffffffff8120a353>] ? is_swiotlb_buffer+0x2e/0x3b
[   51.929671]  [<ffffffffa024bbf4>] iwl_rx_handle+0x163/0x2b5 [iwlagn]
[   51.929680]  [<ffffffffa024c3ac>] iwl_irq_tasklet+0x2bb/0x3c0 [iwlagn]
[   51.929687]  [<ffffffff810411f3>] tasklet_action+0xa7/0x10f
[   51.929693]  [<ffffffff81042205>] __do_softirq+0x144/0x252
[   51.929700]  [<ffffffff81003a8c>] call_softirq+0x1c/0x34
[   51.929705]  [<ffffffff810050e4>] do_softirq+0x38/0x80
[   51.929711]  [<ffffffff81041cd2>] irq_exit+0x45/0x94
[   51.929717]  [<ffffffff81019b10>] smp_apic_timer_interrupt+0x87/0x95
[   51.929724]  [<ffffffff81003553>] apic_timer_interrupt+0x13/0x20
[   51.929727]  <EOI>  [<ffffffff813cf46e>] ?
_raw_spin_unlock_irqrestore+0x3c/0x69
[   51.929739]  [<ffffffff8102d3fb>] __wake_up_sync_key+0x49/0x52
[   51.929745]  [<ffffffff8133cee7>] sock_def_readable+0x43/0x62
[   51.929751]  [<ffffffff813b1c61>] unix_stream_sendmsg+0x243/0x2e2
[   51.929758]  [<ffffffff8133b912>] ? sock_aio_write+0x0/0xcf
[   51.929764]  [<ffffffff81339342>] __sock_sendmsg+0x59/0x64
[   51.929770]  [<ffffffff8133b9cd>] sock_aio_write+0xbb/0xcf
[   51.929777]  [<ffffffff810e9909>] do_sync_readv_writev+0xbc/0xfb
[   51.929785]  [<ffffffff811c1792>] ? selinux_file_permission+0xa2/0xaf
[   51.929790]  [<ffffffff810e9690>] ? copy_from_user+0x2a/0x2c
[   51.929797]  [<ffffffff811baff1>] ? security_file_permission+0x11/0x13
[   51.929804]  [<ffffffff810ea6a6>] do_readv_writev+0xa2/0x122
[   51.929810]  [<ffffffff810ead93>] ? fcheck_files+0x8f/0xc9
[   51.929816]  [<ffffffff810ea764>] vfs_writev+0x3e/0x49
[   51.929821]  [<ffffffff810ea84a>] sys_writev+0x45/0x8e
[   51.929828]  [<ffffffff81002b6b>] system_call_fastpath+0x16/0x1b

^ permalink raw reply

* Re: eSwitch management
From: Chris Wright @ 2010-04-23 23:04 UTC (permalink / raw)
  To: Anirban Chakraborty
  Cc: Chris Wright, Scott Feldman, David Miller, netdev@vger.kernel.org,
	Arnd Bergmann, Ameen Rahman, Amit Salecha, Rajesh Borundia,
	shemminger@vyatta.com
In-Reply-To: <193C9C72-488F-4543-9BC1-F9938F189E91@qlogic.com>

* Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
> 
> On Apr 23, 2010, at 12:44 PM, Chris Wright wrote:
> 
> > * Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
> >> On Apr 23, 2010, at 9:23 AM, Chris Wright wrote:
> >>> * Anirban Chakraborty (anirban.chakraborty@qlogic.com) wrote:
> >>>> It looks like ifla_vf_info does contain most of the data set. But if I use it, what NETLINK protocol family should I use in my driver to receive netlink messages? Do I need to create a private protocol family?
> >>> 
> >>> No, you don't need to use netlink in your driver.  You just need to fill
> >>> in the relevant net_device_ops in your driver init.  Specifically:
> >>> 
> >>> *      SR-IOV management functions.
> >>> * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
> >>> * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
> >>> * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
> >>> * int (*ndo_get_vf_config)(struct net_device *dev,
> >>> *                          int vf, struct ifla_vf_info *ivf);
> >>> 
> >>> These are all operating on a VF indexed internally w/in the driver, so it's
> >>> a little cumbersome to use from userspace.
> >> 
> >> These are all intended for VFs and are configureable from PF.
> > 
> > Yes, and while the set of callbacks can change, they are always tied to
> > some net_device (typically the PF) that knows how to make hardware
> > settings on behalf of a VF.
> > 
> >> However, in our case, there are multiple physical NIC function on a
> >> port which are configureable by the eswitch.
> > 
> > Is there a PCI function that represents the switch?  Or a special PCI
> > NIC function that has VEB mgmt plane access?  And do you have examples
> > of configuration that you'll do here?
> 
> There is no PCI function that represents the switch. However, one
> of the NIC functions can act as a privileged function to configure the
> eswitch. Typically the first NIC function that is enumerated in the bus
> manages the eswitch. Typical configurations would be to set tx bandwidth,
> VLAN ID, MAC address, promiscuous mode setting for each of these ports
> at the start of the day. This is useful in virtualization scenario where
> we can do PCI passthru of the functions to the guest and these settings
> for the guest are configured via the driver in the host.

(btw, this is not uncommon, there other adapters that have multiple
functions for a single physical port that is not SR-IOV based)

How does the privileged function identify the other functions?  IOW, the
existing SR-IOV ndo callbacks have most of the above (tx bw control, mac,
vlan id), and have an 'int vf' which is basically just a driver specific
identifier to a non-privileged function or set of hw resources.  It looks
like you can use the existing bits (just need to expand a little).

So far we have only:

- tx bw control
- set mac addr
- set vlan id

You've additionally identified:

- set promiscuous mode

I'm also aware of:

- setting port aggregation
- issuing a function reset
- setting port mirroring or bcast/mcast replication
- setting anti-spoofing (mac/vlan..)
- setting security/filtering
- getting port statistics
- ...whatever else I'm forgetting

> <snip>
> > 
> > One idea that has been discussed in the past is to create essentially
> > a pluggable set of bridge_ops.  The first step would be purely internal
> > shuffling, to make the existing sw bridge code go through the bridge_ops.
> > The second step would be making your driver for whichever PCI function
> > you have that supports managing the bridge create a net_device which is
> > a bridge during driver init.  And now normal brctl can call into your
> > VEB via the bridge_ops callbacks. </handwave>
> > 
> I liked the idea of iovnl as it works by utilizing port profile. That way the eswitch can be configured with the same port profile that a vswitch in a hypervisor has.

I don't quite follow you here.

thanks,
-chris

^ permalink raw reply

* Re: [PATCH 1/2] fsl_pq_mdio: Fix kernel oops during OF address translation
From: David Miller @ 2010-04-23 23:20 UTC (permalink / raw)
  To: avorontsov; +Cc: Sandeep.Kumar, netdev, linuxppc-dev
In-Reply-To: <20100423171235.GA2140@oksana.dev.rtsoft.ru>

From: Anton Vorontsov <avorontsov@mvista.com>
Date: Fri, 23 Apr 2010 21:12:35 +0400

> Old P1020RDB device trees were not specifing tbipa address for
> MDIO nodes, which is now causing this kernel oops:
> 
>  ...
>  eth2: TX BD ring size for Q[6]: 256
>  eth2: TX BD ring size for Q[7]: 256
>  Unable to handle kernel paging request for data at address 0x00000000
>  Faulting instruction address: 0xc0015504
>  Oops: Kernel access of bad area, sig: 11 [#1]
>  ...
>  NIP [c0015504] memcpy+0x3c/0x9c
>  LR [c000a9f8] __of_translate_address+0xfc/0x21c
>  Call Trace:
>  [df839e00] [c000a94c] __of_translate_address+0x50/0x21c (unreliable)
>  [df839e50] [c01a33e8] get_gfar_tbipa+0xb0/0xe0
>  ...
> 
> The old device trees are buggy, though having a dead ethernet is
> better than a dead kernel, so fix the issue by using of_iomap().
> 
> Also, a somewhat similar issue exist in the probe() routine, though
> there the oops is only a possibility. Nonetheless, fix it too.
> 
> Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>

Seems reasonable, applied to net-2.6 thanks!

^ permalink raw reply

* Re: [PATCH 2/2] gianfar: Fix potential oops during OF address translation
From: David Miller @ 2010-04-23 23:20 UTC (permalink / raw)
  To: avorontsov; +Cc: Sandeep.Kumar, netdev, linuxppc-dev
In-Reply-To: <20100423171244.GB2140@oksana.dev.rtsoft.ru>

From: Anton Vorontsov <avorontsov@mvista.com>
Date: Fri, 23 Apr 2010 21:12:44 +0400

> gianfar driver may pass NULL pointer to the of_translate_address(),
> which may lead to a kernel oops. Fix this by using of_iomap(), which
> is also much simpler and shorter.
> 
> Signed-off-by: Anton Vorontsov <avorontsov@mvista.com>

Also applied to net-2.6, thanks.

^ permalink raw reply

* Re: pull request: wireless-next-2.6 2010-04-23
From: David Miller @ 2010-04-23 23:22 UTC (permalink / raw)
  To: linville; +Cc: linux-wireless, netdev
In-Reply-To: <20100423190139.GA7276@tuxdriver.com>

From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 23 Apr 2010 15:01:40 -0400

> Yet another huge batch of updates intended for 2.6.35.  The ath9k driver
> in particular gets a lot of attention, and the iwlwifi team continues
> its usual strong showing.
> 
> Please let me know if there are problems!  Again, this is for the
> 'for-davem' branch where I have pre-resolved some merge conflicts.

Pulled, thanks John.

^ permalink raw reply

* Re: [PATCH] e100: Fix the TX workqueue race
From: David Miller @ 2010-04-23 23:31 UTC (permalink / raw)
  To: alan; +Cc: netdev, e1000-devel
In-Reply-To: <20100423143356.7092.45260.stgit@localhost.localdomain>

From: Alan Cox <alan@linux.intel.com>
Date: Fri, 23 Apr 2010 15:34:43 +0100

> I'd assumed someone would have picked up on this and fixed it using rtnl_lock
> as was suggested but it seems to have fallen through the cracks ?
> 
> Anyway this is I assume what was meant ?

I hope this doesn't deadlock with linkwatch, as that's generally
a problem we hit with trying to take RTNL from workqueues in
the networking.

Linkwatch takes the RTNL lock, and then can make calls into the driver
in it's main work loop.

But since you don't hold any driver locks here (you can't as if we did
we couldn't take the RTNL lock here at all) so it should be OK.

I'll apply this to net-2.6, thanks Alan.

^ permalink raw reply

* Re: [PATCH] e100: Fix the TX workqueue race
From: David Miller @ 2010-04-23 23:35 UTC (permalink / raw)
  To: alan; +Cc: netdev, e1000-devel
In-Reply-To: <20100423.163127.62645049.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Fri, 23 Apr 2010 16:31:27 -0700 (PDT)

> I'll apply this to net-2.6, thanks Alan.

Nevermind...

Doesn't apply to net-2.6, but even when I fix that up it doesn't
even compile.  There is no 'dev' variable present etc.

You even use a combination of "dev" and "netdev" in the resulting
code block.

If it doesn't even build, I doubt it's been tested either.

Please resolve this and get some testing on it, thanks.

^ permalink raw reply

* Re: [PATCH net-next-2.6] l2tp: fix memory allocation
From: David Miller @ 2010-04-23 23:37 UTC (permalink / raw)
  To: jpirko; +Cc: netdev, kleptog, jchapman
In-Reply-To: <20100423105338.GA2853@psychotron.lab.eng.brq.redhat.com>

From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 12:53:39 +0200

> Since .size is set properly in "struct pernet_operations l2tp_net_ops",
> allocating space for "struct l2tp_net" by hand is not correct, even causes
> memory leakage.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next-2.6] l2tp_eth: fix memory allocation
From: David Miller @ 2010-04-23 23:37 UTC (permalink / raw)
  To: jpirko; +Cc: netdev, kleptog, jchapman
In-Reply-To: <20100423110151.GB2853@psychotron.lab.eng.brq.redhat.com>

From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 13:01:52 +0200

> Since .size is set properly in "struct pernet_operations l2tp_eth_net_ops",
> allocating space for "struct l2tp_eth_net" by hand is not correct, even causes
> memory leakage.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>

Also applied, thanks Jiri.

^ permalink raw reply

* [GIT] Networking
From: David Miller @ 2010-04-23 23:52 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


1) Incorrect ERR_PTR handling in rtnetlink and rdma, from
   Dan Carpenter.

2) New IPHONE ipheth driver from Diego Giagio.

3) ipv6 ipsec grabs wrong device when bundling.  Fix from Nicolas Dichtel.

4) ipv6 TCP code fixes transport header of wrong SKB, from Herbert Xu.

5) Bridge igmp snooping code missing ntohs(), from Eric Dumazet.

6) TCP bind can croak when many ports are bound, effects are exasperated
   when there are several local IP addresses.  Also from Eric Dumazet.

7) Fix cxgb3 link-up regression with certain chips, from Hiroshi Shimamoto.

8) ipv6 doesn't respect bind-to-device settings properly, fix from Jiri Olsa.

9) Races in KS8851 TX handling can result in an OOPS, from Abraham Arce.

10) Debugging build breaks due to typo in hex dump function name,
    fix from Alexander Kuznetsov.

11) gianfar/fsl_pq_mdio can OOPS due to buggy device trees which do
    exist in the real world, fix from Anton Vorontsov.

12) Fix RCU warning in dev_pick_tx(), from David Howells.

13) A few small wireless fixes (iwlwifi scanning races, bad
    aggregation handling, wrong regulatory bits checked in iwlwifi
    EEPROM) from Reinette Chatre, Johannes Berg, and Shanyu Zhao.

14) Fix mishandling of dead unaccepted sockets in x25, from Andrew
    Hendry.

Please pull, thanks a lot!

The following changes since commit 33eaf788345c0311ab48ae62673c05f59fb09bb3:
  Linus Torvalds (1):
        Merge branch 'for-linus' of git://git.kernel.org/.../tiwai/sound-2.6

are available in the git repository at:

  master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6.git master

Abraham Arce (1):
      KS8851: NULL pointer dereference if list is empty

Alexander Kurz (1):
      net: 3c574_cs fix stats.tx_bytes counter

Alexander Kuznetsov (1):
      8139too: Fix a typo in the function name.

Anton Vorontsov (2):
      fsl_pq_mdio: Fix kernel oops during OF address translation
      gianfar: Fix potential oops during OF address translation

Dan Carpenter (2):
      rtnetlink: potential ERR_PTR dereference
      rdma: potential ERR_PTR dereference

David Howells (1):
      net: Fix an RCU warning in dev_pick_tx()

David S. Miller (2):
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless-2.6
      Merge branch 'master' of /home/davem/src/GIT/linux-2.6/

Diego Giagio (1):
      drivers/net/usb: Add new driver ipheth

Eric Dumazet (2):
      bridge: add a missing ntohs()
      tcp: bind() fix when many ports are bound

Hans J. Koch (1):
      can: Fix possible NULL pointer dereference in ems_usb.c

Herbert Xu (1):
      ipv6: Fix tcp_v6_send_response transport header setting.

Hiroshi Shimamoto (1):
      cxgb3: fix linkup issue

Jiri Olsa (1):
      net: ipv6 bind to device issue

Johannes Berg (2):
      iwlwifi: fix scan races
      mac80211: remove bogus TX agg state assignment

Nicolas Dichtel (1):
      xfrm6: ensure to use the same dev when building a bundle

Reinette Chatre (1):
      mac80211: pass HT changes to driver when off channel

Shan Wei (1):
      ipv6: allow to send packet after receiving ICMPv6 Too Big message with MTU field less than IPV6_MIN_MTU

Shanyu Zhao (1):
      iwlwifi: correct 6000 EEPROM regulatory address

andrew hendry (1):
      X25 fix dead unaccepted sockets

 drivers/net/8139too.c                     |    2 +-
 drivers/net/Makefile                      |    1 +
 drivers/net/can/usb/ems_usb.c             |    8 +-
 drivers/net/cxgb3/ael1002.c               |    2 +-
 drivers/net/fsl_pq_mdio.c                 |   20 +-
 drivers/net/gianfar.c                     |    6 +-
 drivers/net/ks8851.c                      |   12 +-
 drivers/net/pcmcia/3c574_cs.c             |    7 +-
 drivers/net/usb/Kconfig                   |   12 +
 drivers/net/usb/Makefile                  |    1 +
 drivers/net/usb/ipheth.c                  |  568 +++++++++++++++++++++++++++++
 drivers/net/wireless/iwlwifi/iwl-6000.c   |    4 +-
 drivers/net/wireless/iwlwifi/iwl-agn.c    |    1 +
 drivers/net/wireless/iwlwifi/iwl-core.c   |    1 -
 drivers/net/wireless/iwlwifi/iwl-core.h   |    2 +-
 drivers/net/wireless/iwlwifi/iwl-dev.h    |    1 +
 drivers/net/wireless/iwlwifi/iwl-eeprom.h |    4 +
 drivers/net/wireless/iwlwifi/iwl-scan.c   |   31 +-
 net/bridge/br_multicast.c                 |    2 +-
 net/core/dev.c                            |    2 +-
 net/core/rtnetlink.c                      |    5 +-
 net/ipv4/inet_connection_sock.c           |   16 +-
 net/ipv6/inet6_connection_sock.c          |   15 +-
 net/ipv6/ip6_output.c                     |    2 +-
 net/ipv6/route.c                          |    2 +-
 net/ipv6/tcp_ipv6.c                       |    2 +-
 net/ipv6/xfrm6_policy.c                   |    2 +-
 net/mac80211/agg-tx.c                     |    1 -
 net/mac80211/mlme.c                       |    2 +
 net/rds/rdma_transport.c                  |    2 +-
 net/x25/af_x25.c                          |    1 +
 31 files changed, 678 insertions(+), 59 deletions(-)
 create mode 100644 drivers/net/usb/ipheth.c

^ permalink raw reply

* [RFC net-next-2.6 PATCH] ixgbe: Example usage of new IRQ affinity_hints for FCoE
From: John Fastabend @ 2010-04-24  0:21 UTC (permalink / raw)
  To: bhutchings, peter.p.waskiewicz.jr, netdev, arjan, davem, tglx
  Cc: john.r.fastabend, linux-kernel

If the fcoe protocol handler fcoe_rcv() is already executing
on the correct CPU, SCSI-FCP frames can avoid context switching
from the NET_RX softirq to the receive processing thread.

To avoid this context switch this patch uses the affinity_hint
callback to align the interrupt with the FCoE receive processing
threads.

To properly align interrupts for FCoE knowledge of how the
FCoE receive processing threads are setup as well as which
rx rings are dedicated to FCoE and their associated vectors
is needed.  Additionally if the FCoE application TLV moves
the FCoE priority this alignment will also need to change.
Handling this in irqbalance alone would required irqbalance
to be aware of DCB, the tx/rx ring mapping for FCoE in
ixgbe and FCoE recv thread CPU mappings. For these reasons
allowing irqbalance to accept hints from ixgbe is ideal for
this case.

Some quick investigative performance numbers show that by
aligning the interrupt correctly an increase in ~50k IOPS
and a decrease in ~10-15% CPU usage can be seen from a
standard default configuration setup by todays irqbalance.

This is a test patch to illustrate how using irq
affininty_hints in ixgbe can benifit FCoE.  This patch does
not consider the case where multiple CPU threads can map
to the same queue this case would need further work.
However, I think it does show the benefit of having
an interface to provide affinity_hints.

This patch applies on top of Peter P Waskiewicz Jr previous
series of two patches to implement the affinity hint callback
framework and sample implementation in ixgbe.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 drivers/net/ixgbe/ixgbe_main.c |   16 +++++++++++++++-
 1 files changed, 15 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index e4cff48..a680424 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1420,6 +1420,7 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 	struct ixgbe_q_vector *q_vector;
 	int i, j, q_vectors, v_idx, r_idx;
 	u32 mask;
+	struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE];
 
 	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
@@ -1463,9 +1464,22 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 		/*
 		 * Allocate the affinity_hint cpumask, assign the mask for
 		 * this vector, and register our affinity_hint callback.
+		 *
+		 * JF: Add a check for FCoE enabled adapter to catch rings
+		 * that are enabled for FCoE and align them to their
+		 * corresponding FCoE recv processing thread.
 		 */
 		alloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL);
-		cpumask_set_cpu(v_idx, q_vector->affinity_mask);
+
+		r_idx = find_first_bit(q_vector->rxr_idx,
+		                       adapter->num_rx_queues);
+		if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED &&
+		    (r_idx >= f->mask) && (r_idx < f->mask + f->indices))
+			cpumask_set_cpu(r_idx - f->mask,
+					q_vector->affinity_mask);
+		else
+			cpumask_set_cpu(v_idx, q_vector->affinity_mask);
+
 		irq_register_affinity_hint(adapter->msix_entries[v_idx].vector,
 		                           adapter,
 		                           &ixgbe_irq_affinity_callback);

^ permalink raw reply related

* [PATCH 1/4] e1000e: save skb counts in TX to avoid cache misses
From: Tom Herbert @ 2010-04-24  0:25 UTC (permalink / raw)
  To: davem, netdev

In e1000_tx_map, precompute number of segements and bytecounts which
are derived from fields in skb; these are stored in buffer_info.  When
cleaning tx in e1000_clean_tx_irq use the values in the associated
buffer_info for statistics counting, this eliminates cache misses
on skb fields.

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index 12648a1..d6da75b 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -188,6 +188,8 @@ struct e1000_buffer {
 			unsigned long time_stamp;
 			u16 length;
 			u16 next_to_watch;
+			unsigned int segs;
+			unsigned int bytecount;
 			u16 mapped_as_page;
 		};
 		/* Rx */
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 5f70c43..4f5034a 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -646,14 +635,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
 			cleaned = (i == eop);
 
 			if (cleaned) {
-				struct sk_buff *skb = buffer_info->skb;
-				unsigned int segs, bytecount;
-				segs = skb_shinfo(skb)->gso_segs ?: 1;
-				/* multiply data chunks by size of headers */
-				bytecount = ((segs - 1) * skb_headlen(skb)) +
-					    skb->len;
-				total_tx_packets += segs;
-				total_tx_bytes += bytecount;
+				total_tx_packets += buffer_info->segs;
+				total_tx_bytes += buffer_info->bytecount;
 			}
 
 			e1000_put_txbuf(adapter, buffer_info);
@@ -3906,7 +3889,7 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 	struct e1000_buffer *buffer_info;
 	unsigned int len = skb_headlen(skb);
 	unsigned int offset = 0, size, count = 0, i;
-	unsigned int f;
+	unsigned int f, bytecount, segs;
 
 	i = tx_ring->next_to_use;
 
@@ -3965,7 +3948,13 @@ static int e1000_tx_map(struct e1000_adapter *adapter,
 		}
 	}
 
+	segs = skb_shinfo(skb)->gso_segs ?: 1;
+	/* multiply data chunks by size of headers */
+	bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len;
+
 	tx_ring->buffer_info[i].skb = skb;
+	tx_ring->buffer_info[i].segs = segs;
+	tx_ring->buffer_info[i].bytecount = bytecount;
 	tx_ring->buffer_info[first].next_to_watch = i;
 
 	return count;

^ permalink raw reply related

* [PATCH 2/4] e1000e: reduce writes of RX producer ptr
From: Tom Herbert @ 2010-04-24  0:26 UTC (permalink / raw)
  To: davem, netdev

Reduce number of writes to RX producer pointer.   When alloc'ing RX
buffers, only write the RX producer pointer once every
E1000_RX_BUFFER_WRITE (16) buffers created.

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 5f70c43..4f5034a 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -193,26 +193,23 @@ map_skb:
 		rx_desc = E1000_RX_DESC(*rx_ring, i);
 		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
 
+		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
+			/*
+			 * Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64).
+			 */
+			wmb();
+			writel(i, adapter->hw.hw_addr + rx_ring->tail);
+		}
 		i++;
 		if (i == rx_ring->count)
 			i = 0;
 		buffer_info = &rx_ring->buffer_info[i];
 	}
 
-	if (rx_ring->next_to_use != i) {
-		rx_ring->next_to_use = i;
-		if (i-- == 0)
-			i = (rx_ring->count - 1);
-
-		/*
-		 * Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		writel(i, adapter->hw.hw_addr + rx_ring->tail);
-	}
+	rx_ring->next_to_use = i;
 }
 
 /**
@@ -293,6 +290,17 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 
 		rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
 
+		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
+			/*
+			 * Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64).
+			 */
+			wmb();
+			writel(i<<1, adapter->hw.hw_addr + rx_ring->tail);
+		}
+
 		i++;
 		if (i == rx_ring->count)
 			i = 0;
@@ -300,26 +308,7 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 	}
 
 no_buffers:
-	if (rx_ring->next_to_use != i) {
-		rx_ring->next_to_use = i;
-
-		if (!(i--))
-			i = (rx_ring->count - 1);
-
-		/*
-		 * Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		/*
-		 * Hardware increments by 16 bytes, but packet split
-		 * descriptors are 32 bytes...so we increment tail
-		 * twice as much.
-		 */
-		writel(i<<1, adapter->hw.hw_addr + rx_ring->tail);
-	}
+	rx_ring->next_to_use = i;
 }
 
 /**

^ permalink raw reply related

* [PATCH 3/4] e1000e: Remove unnessary log message
From: Tom Herbert @ 2010-04-24  0:26 UTC (permalink / raw)
  To: davem, netdev

Remove e_info message printed whenever TSO is enabled or disabled.
This is not very useful and just clutters dmesg.

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index 983493f..0deb2d1 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -412,7 +412,6 @@ static int e1000_set_tso(struct net_device *netdev, u32 data)
 		netdev->features &= ~NETIF_F_TSO6;
 	}
 
-	e_info("TSO is %s\n", data ? "Enabled" : "Disabled");
 	adapter->flags |= FLAG_TSO_FORCE;
 	return 0;
 }

^ permalink raw reply related

* [PATCH 4/4] e1000e: Save irq into netdev structure
From: Tom Herbert @ 2010-04-24  0:27 UTC (permalink / raw)
  To: davem, netdev

Set net->devirq to pdev->irq.  This should be consistent with other
drivers.

Signed-off-by: Tom Herbert <therbert@google.com>
---
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 5f70c43..4f5034a 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5073,6 +5062,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
+	netdev->irq = pdev->irq;
+
 	pci_set_drvdata(pdev, netdev);
 	adapter = netdev_priv(netdev);
 	hw = &adapter->hw;

^ permalink raw reply related

* [net-next-2.6 PATCH 1/2] Add ndo_set_vf_port_profile (was iovnl)
From: Scott Feldman @ 2010-04-24  0:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd

From: Scott Feldman <scofeldm@cisco.com>

(This is take #2 on the iovnl patches posted earlier based on feedback from
Chris Wright, Arnd Bergmann, and others.  Thanks guys!)

Add new netdev ops ndo_set_vf_port_profile to allow setting of port-profile
on VF, along the lines of existing nds_set_vf_* ops.  Extends RTM_SETLINK
with new sub cmd called IFLA_VF_PORT_PROFILE (added to end on cmd list).  The
port-profile cmd arguments are (as seen from iproute2 cmdline):

       ip link set DEVICE [ { up | down } ]
                          ...
                          [ vf NUM [ mac LLADDR ]
                                   [ vlan VLANID [ qos VLAN-QOS ] ]
                                   [ rate TXRATE ] ] 
                                   [ port_profile [ PORT-PROFILE
                                           [ mac LLADDR ]
                                           [ host_uuid HOST_UUID ]
                                           [ client_uuid CLIENT_UUID ]
                                           [ client_name CLIENT_NAME ] ] ] ]


I took some liberties and s/SR-IOV/IOV in the code comments around the
ndo_set_vf_* cmds as they can apply to both SR-IOV and non-SR-IOV adapters,
as long as there is a PF:VF parent:child relationship.

A port-profile is used to configure/enable the network port backing the VF, not
to configure the host-facing side of the VF.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 include/linux/if_link.h   |   15 +++++++++++++--
 include/linux/netdevice.h |   11 ++++++++++-
 net/core/rtnetlink.c      |   20 ++++++++++++++++++++
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index cfd420b..2c5cc65 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -110,12 +110,13 @@ enum {
 #define IFLA_LINKINFO IFLA_LINKINFO
 	IFLA_NET_NS_PID,
 	IFLA_IFALIAS,
-	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
+	IFLA_NUM_VF,		/* Number of VFs if device is IOV PF */
 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
 	IFLA_VF_VLAN,
 	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
 	IFLA_VFINFO,
 	IFLA_STATS64,
+	IFLA_VF_PORT_PROFILE,
 	__IFLA_MAX
 };
 
@@ -234,7 +235,7 @@ enum macvlan_mode {
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 };
 
-/* SR-IOV virtual function managment section */
+/* IOV virtual function managment section */
 
 struct ifla_vf_mac {
 	__u32 vf;
@@ -259,4 +260,14 @@ struct ifla_vf_info {
 	__u32 qos;
 	__u32 tx_rate;
 };
+
+struct ifla_vf_port_profile {
+	__u32 vf;
+	__u8 port_profile[64];
+	__u8 mac[32];
+	__u8 host_uuid[64]; /* e.g. "CEEFD3B1-9E11-11DE-BDFD-000BAB01C0FB" */
+	__u8 client_uuid[64];
+	__u8 client_name[64]; /* e.g. "vm0-eth1" */
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f..26dd4cb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -690,10 +690,13 @@ struct netdev_rx_queue {
  *
  * void (*ndo_poll_controller)(struct net_device *dev);
  *
- *	SR-IOV management functions.
+ *	IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
  * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
  * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
+ * int (*ndo_set_vf_port_profile)(struct net_device *dev, int vf,
+ *				  u8 *port_profile, u8 *mac, u8 *host_uuid,
+ *				  u8 *client_uuid, u8 *client_name);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
  */
@@ -741,6 +744,12 @@ struct net_device_ops {
 						   int queue, u16 vlan, u8 qos);
 	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
 						      int vf, int rate);
+	int			(*ndo_set_vf_port_profile)(
+					struct net_device *dev, int vf,
+					u8 *port_profile, u8 *mac,
+					u8 *host_uuid,
+					u8 *client_uuid,
+					u8 *client_name);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c8598..7268e8e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -824,6 +824,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_vlan) },
 	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VF_PORT_PROFILE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_port_profile)},
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1028,6 +1030,24 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_VF_PORT_PROFILE]) {
+		struct ifla_vf_port_profile *ivp;
+		ivp = nla_data(tb[IFLA_VF_PORT_PROFILE]);
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port_profile)
+			ivp->port_profile[sizeof(ivp->port_profile)-1] = 0;
+			ivp->host_uuid[sizeof(ivp->host_uuid)-1] = 0;
+			ivp->client_uuid[sizeof(ivp->client_uuid)-1] = 0;
+			ivp->client_name[sizeof(ivp->client_name)-1] = 0;
+			err = ops->ndo_set_vf_port_profile(dev, ivp->vf,
+				ivp->port_profile, ivp->mac, ivp->host_uuid,
+				ivp->client_uuid, ivp->client_name);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "


^ permalink raw reply related

* [net-next-2.6 PATCH 2/2] add enic ndo_vf_set_port_profile op support for dynamic vnics
From: Scott Feldman @ 2010-04-24  0:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, chrisw, arnd
In-Reply-To: <20100424003540.12745.81403.stgit@savbu-pc100.cisco.com>

From: Scott Feldman <scofeldm@cisco.com>

Add enic ndo_vf_set_port_profile op to support setting port-profile for
dynamic vnics.  Enic dynamic vnics are just like normal enic eth vnics except
dynamic vnics require an extra configuration step to assign a port-profile
identifier to the interface before the interface is useable. Once assigned,
link comes up on the interface and is ready for I/O.  The port-profile is
used to configure the network port assigned to the interface.  The network
port configuration includes VLAN membership, QoS policies, and port security
settings typical of a data center network.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
---
 drivers/net/enic/Makefile    |    2 -
 drivers/net/enic/enic.h      |    2 -
 drivers/net/enic/enic_main.c |  144 +++++++++++++++++++++++++++++++++++-------
 drivers/net/enic/vnic_dev.c  |   50 +++++++++++++++
 drivers/net/enic/vnic_dev.h  |    3 +
 drivers/net/enic/vnic_vic.c  |   73 +++++++++++++++++++++
 drivers/net/enic/vnic_vic.h  |   59 +++++++++++++++++
 7 files changed, 306 insertions(+), 27 deletions(-)

diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile
index 391c3bc..e7b6c31 100644
--- a/drivers/net/enic/Makefile
+++ b/drivers/net/enic/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_ENIC) := enic.o
 
 enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \
-	enic_res.o vnic_dev.o vnic_rq.o
+	enic_res.o vnic_dev.o vnic_rq.o vnic_vic.o
 
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index 5fa56f1..0956ca9 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -34,7 +34,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"1.3.1.1"
+#define DRV_VERSION		"1.3.1.1-iov"
 #define DRV_COPYRIGHT		"Copyright 2008-2009 Cisco Systems, Inc"
 #define PFX			DRV_NAME ": "
 
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 1232887..9a0305e 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -29,6 +29,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_link.h>
 #include <linux/ethtool.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -40,6 +41,7 @@
 #include "vnic_dev.h"
 #include "vnic_intr.h"
 #include "vnic_stats.h"
+#include "vnic_vic.h"
 #include "enic_res.h"
 #include "enic.h"
 
@@ -49,10 +51,12 @@
 #define ENIC_DESC_MAX_SPLITS		(MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1)
 
 #define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
+#define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN     0x0044  /* enet dynamic vnic */
 
 /* Supported devices */
 static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = {
 	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
+	{ PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) },
 	{ 0, }	/* end of table */
 };
 
@@ -113,6 +117,11 @@ static const struct enic_stat enic_rx_stats[] = {
 static const unsigned int enic_n_tx_stats = ARRAY_SIZE(enic_tx_stats);
 static const unsigned int enic_n_rx_stats = ARRAY_SIZE(enic_rx_stats);
 
+static int enic_is_dynamic(struct enic *enic)
+{
+	return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN;
+}
+
 static int enic_get_settings(struct net_device *netdev,
 	struct ethtool_cmd *ecmd)
 {
@@ -810,14 +819,24 @@ static void enic_reset_mcaddrs(struct enic *enic)
 
 static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 {
-	if (!is_valid_ether_addr(addr))
-		return -EADDRNOTAVAIL;
+	struct enic *enic = netdev_priv(netdev);
 
-	memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	if (enic_is_dynamic(enic)) {
+		random_ether_addr(netdev->dev_addr);
+	} else {
+		if (!is_valid_ether_addr(addr))
+			return -EADDRNOTAVAIL;
+		memcpy(netdev->dev_addr, addr, netdev->addr_len);
+	}
 
 	return 0;
 }
 
+static int enic_set_mac_address(struct net_device *netdev, void *p)
+{
+	return -EOPNOTSUPP;
+}
+
 /* netif_tx_lock held, BHs disabled */
 static void enic_set_multicast_list(struct net_device *netdev)
 {
@@ -922,6 +941,76 @@ static void enic_tx_timeout(struct net_device *netdev)
 	schedule_work(&enic->reset);
 }
 
+static int enic_vnic_dev_deinit(struct enic *enic)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_deinit(enic->vdev);
+	spin_unlock(&enic->devcmd_lock);
+	return err;
+}
+
+static int enic_dev_init_prov(struct enic *enic, struct vic_provinfo *vp)
+{
+	int err;
+
+	spin_lock(&enic->devcmd_lock);
+	err = vnic_dev_init_prov(enic->vdev, (u8 *)vp, vic_provinfo_size(vp));
+	spin_unlock(&enic->devcmd_lock);
+	return err;
+}
+
+static int enic_provinfo_add_tlv_str(struct vic_provinfo *vp, u16 type,
+	char *str)
+{
+	return str ? vic_provinfo_add_tlv(vp, type, strlen(str) + 1, str) : 0;
+}
+
+static int enic_set_vf_port_profile(struct net_device *netdev, int vf,
+	u8 *port_profile, u8 *mac, u8 *host_uuid, u8 *client_uuid,
+	u8 *client_name)
+{
+	struct enic *enic = netdev_priv(netdev);
+	struct vic_provinfo *vp;
+	u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
+	int err;
+
+	if (!enic_is_dynamic(enic))
+		return -EOPNOTSUPP;
+
+	if (strlen(port_profile) == 0)
+		return enic_vnic_dev_deinit(enic);
+
+	vp = vic_provinfo_alloc(GFP_KERNEL, oui, VIC_PROVINFO_LINUX_TYPE);
+	if (!vp)
+		return -ENOMEM;
+
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR,
+		port_profile);
+	vic_provinfo_add_tlv(vp, VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR,
+		ETH_ALEN, mac);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_HOST_UUID_STR,
+		host_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_UUID_STR,
+		client_uuid);
+	enic_provinfo_add_tlv_str(vp, VIC_LINUX_PROV_TLV_CLIENT_NAME_STR,
+		client_name);
+
+	err = enic_vnic_dev_deinit(enic);
+	if (err)
+		goto err_out;
+
+	err = enic_dev_init_prov(enic, vp);
+
+err_out:
+	vic_provinfo_free(vp);
+
+	enic_set_multicast_list(netdev);
+
+	return err;
+}
+
 static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf)
 {
 	struct enic *enic = vnic_dev_priv(rq->vdev);
@@ -1440,10 +1529,12 @@ static int enic_open(struct net_device *netdev)
 	for (i = 0; i < enic->rq_count; i++)
 		vnic_rq_enable(&enic->rq[i]);
 
-	spin_lock(&enic->devcmd_lock);
-	enic_add_station_addr(enic);
-	spin_unlock(&enic->devcmd_lock);
-	enic_set_multicast_list(netdev);
+	if (!enic_is_dynamic(enic)) {
+		spin_lock(&enic->devcmd_lock);
+		enic_add_station_addr(enic);
+		spin_unlock(&enic->devcmd_lock);
+		enic_set_multicast_list(netdev);
+	}
 
 	netif_wake_queue(netdev);
 	napi_enable(&enic->napi);
@@ -1775,20 +1866,21 @@ static void enic_clear_intr_mode(struct enic *enic)
 }
 
 static const struct net_device_ops enic_netdev_ops = {
-	.ndo_open		= enic_open,
-	.ndo_stop		= enic_stop,
-	.ndo_start_xmit		= enic_hard_start_xmit,
-	.ndo_get_stats		= enic_get_stats,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_set_multicast_list	= enic_set_multicast_list,
-	.ndo_change_mtu		= enic_change_mtu,
-	.ndo_vlan_rx_register	= enic_vlan_rx_register,
-	.ndo_vlan_rx_add_vid	= enic_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= enic_vlan_rx_kill_vid,
-	.ndo_tx_timeout		= enic_tx_timeout,
+	.ndo_open			= enic_open,
+	.ndo_stop			= enic_stop,
+	.ndo_start_xmit			= enic_hard_start_xmit,
+	.ndo_get_stats			= enic_get_stats,
+	.ndo_validate_addr		= eth_validate_addr,
+	.ndo_set_multicast_list		= enic_set_multicast_list,
+	.ndo_set_mac_address		= enic_set_mac_address,
+	.ndo_change_mtu			= enic_change_mtu,
+	.ndo_vlan_rx_register		= enic_vlan_rx_register,
+	.ndo_vlan_rx_add_vid		= enic_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid		= enic_vlan_rx_kill_vid,
+	.ndo_tx_timeout			= enic_tx_timeout,
+	.ndo_set_vf_port_profile	= enic_set_vf_port_profile,
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= enic_poll_controller,
+	.ndo_poll_controller		= enic_poll_controller,
 #endif
 };
 
@@ -2010,11 +2102,13 @@ static int __devinit enic_probe(struct pci_dev *pdev,
 
 	netif_carrier_off(netdev);
 
-	err = vnic_dev_init(enic->vdev, 0);
-	if (err) {
-		printk(KERN_ERR PFX
-			"vNIC dev init failed, aborting.\n");
-		goto err_out_dev_close;
+	if (!enic_is_dynamic(enic)) {
+		err = vnic_dev_init(enic->vdev, 0);
+		if (err) {
+			printk(KERN_ERR PFX
+				"vNIC dev init failed, aborting.\n");
+			goto err_out_dev_close;
+		}
 	}
 
 	err = enic_dev_init(enic);
diff --git a/drivers/net/enic/vnic_dev.c b/drivers/net/enic/vnic_dev.c
index d43a9d4..e351b0f 100644
--- a/drivers/net/enic/vnic_dev.c
+++ b/drivers/net/enic/vnic_dev.c
@@ -682,6 +682,56 @@ int vnic_dev_init(struct vnic_dev *vdev, int arg)
 	return r;
 }
 
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+	int ret;
+
+	*done = 0;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_STATUS, &a0, &a1, wait);
+	if (ret)
+		return ret;
+
+	*done = (a0 == 0);
+
+	*err = (a0 == 0) ? a1 : 0;
+
+	return 0;
+}
+
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len)
+{
+	u64 a0, a1 = len;
+	int wait = 1000;
+	u64 prov_pa;
+	void *prov_buf;
+	int ret;
+
+	prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa);
+	if (!prov_buf)
+		return -ENOMEM;
+
+	memcpy(prov_buf, buf, len);
+
+	a0 = prov_pa;
+
+	ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO, &a0, &a1, wait);
+
+	pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa);
+
+	return ret;
+}
+
+int vnic_dev_deinit(struct vnic_dev *vdev)
+{
+	u64 a0 = 0, a1 = 0;
+	int wait = 1000;
+
+	return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait);
+}
+
 int vnic_dev_link_status(struct vnic_dev *vdev)
 {
 	if (vdev->linkstatus)
diff --git a/drivers/net/enic/vnic_dev.h b/drivers/net/enic/vnic_dev.h
index f5be640..27f5a5a 100644
--- a/drivers/net/enic/vnic_dev.h
+++ b/drivers/net/enic/vnic_dev.h
@@ -124,6 +124,9 @@ int vnic_dev_disable(struct vnic_dev *vdev);
 int vnic_dev_open(struct vnic_dev *vdev, int arg);
 int vnic_dev_open_done(struct vnic_dev *vdev, int *done);
 int vnic_dev_init(struct vnic_dev *vdev, int arg);
+int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err);
+int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len);
+int vnic_dev_deinit(struct vnic_dev *vdev);
 int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg);
 int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done);
 void vnic_dev_set_intr_mode(struct vnic_dev *vdev,
diff --git a/drivers/net/enic/vnic_vic.c b/drivers/net/enic/vnic_vic.c
new file mode 100644
index 0000000..d769772
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "vnic_vic.h"
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type)
+{
+	struct vic_provinfo *vp = kzalloc(VIC_PROVINFO_MAX_DATA, flags);
+
+	if (!vp || !oui)
+		return NULL;
+
+	memcpy(vp->oui, oui, sizeof(vp->oui));
+	vp->type = type;
+	vp->length = htonl(sizeof(vp->num_tlvs));
+
+	return vp;
+}
+
+void vic_provinfo_free(struct vic_provinfo *vp)
+{
+	kfree(vp);
+}
+
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value)
+{
+	struct vic_provinfo_tlv *tlv;
+
+	if (!vp || !value)
+		return -EINVAL;
+
+	if (ntohl(vp->length) + sizeof(*tlv) + length >
+		VIC_PROVINFO_MAX_TLV_DATA)
+		return -ENOMEM;
+
+	tlv = (struct vic_provinfo_tlv *)((u8 *)vp->tlv +
+		ntohl(vp->length) - sizeof(vp->num_tlvs));
+
+	tlv->type = htons(type);
+	tlv->length = htons(length);
+	memcpy(tlv->value, value, length);
+
+	vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1);
+	vp->length = htonl(ntohl(vp->length) + sizeof(*tlv) + length);
+
+	return 0;
+}
+
+size_t vic_provinfo_size(struct vic_provinfo *vp)
+{
+	return vp ?  ntohl(vp->length) + sizeof(*vp) - sizeof(vp->num_tlvs) : 0;
+}
diff --git a/drivers/net/enic/vnic_vic.h b/drivers/net/enic/vnic_vic.h
new file mode 100644
index 0000000..085c2a2
--- /dev/null
+++ b/drivers/net/enic/vnic_vic.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _VNIC_VIC_H_
+#define _VNIC_VIC_H_
+
+/* Note: All integer fields in NETWORK byte order */
+
+/* Note: String field lengths include null char */
+
+#define VIC_PROVINFO_CISCO_OUI		{ 0x00, 0x00, 0x0c }
+#define VIC_PROVINFO_LINUX_TYPE		0x2
+
+enum vic_linux_prov_tlv_type {
+	VIC_LINUX_PROV_TLV_PORT_PROFILE_NAME_STR = 0,
+	VIC_LINUX_PROV_TLV_CLIENT_MAC_ADDR = 1,			/* u8[6] */
+	VIC_LINUX_PROV_TLV_CLIENT_NAME_STR = 2,
+	VIC_LINUX_PROV_TLV_HOST_UUID_STR = 8,
+	VIC_LINUX_PROV_TLV_CLIENT_UUID_STR = 9,
+};
+
+struct vic_provinfo {
+	u8 oui[3];		/* OUI of data provider */
+	u8 type;		/* provider-specific type */
+	u32 length;		/* length of data below */
+	u32 num_tlvs;		/* number of tlvs */
+	struct vic_provinfo_tlv {
+		u16 type;
+		u16 length;
+		u8 value[0];
+	} tlv[0];
+} __attribute__ ((packed));
+
+#define VIC_PROVINFO_MAX_DATA		1385
+#define VIC_PROVINFO_MAX_TLV_DATA (VIC_PROVINFO_MAX_DATA - \
+	sizeof(struct vic_provinfo))
+
+struct vic_provinfo *vic_provinfo_alloc(gfp_t flags, u8 *oui, u8 type);
+void vic_provinfo_free(struct vic_provinfo *vp);
+int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length,
+	void *value);
+size_t vic_provinfo_size(struct vic_provinfo *vp);
+
+#endif	/* _VNIC_VIC_H_ */


^ permalink raw reply related

* Re: [net-next-2.6 PATCH 1/2] Add ndo_set_vf_port_profile (was iovnl)
From: Chris Wright @ 2010-04-24  2:22 UTC (permalink / raw)
  To: Scott Feldman; +Cc: davem, netdev, chrisw, arnd
In-Reply-To: <20100424003540.12745.81403.stgit@savbu-pc100.cisco.com>

* Scott Feldman (scofeldm@cisco.com) wrote:
> From: Scott Feldman <scofeldm@cisco.com>
> 
> (This is take #2 on the iovnl patches posted earlier based on feedback from
> Chris Wright, Arnd Bergmann, and others.  Thanks guys!)
> 
> Add new netdev ops ndo_set_vf_port_profile to allow setting of port-profile
> on VF, along the lines of existing nds_set_vf_* ops.  Extends RTM_SETLINK
> with new sub cmd called IFLA_VF_PORT_PROFILE (added to end on cmd list).  The
> port-profile cmd arguments are (as seen from iproute2 cmdline):
> 
>        ip link set DEVICE [ { up | down } ]
>                           ...
>                           [ vf NUM [ mac LLADDR ]
>                                    [ vlan VLANID [ qos VLAN-QOS ] ]
>                                    [ rate TXRATE ] ] 
>                                    [ port_profile [ PORT-PROFILE
>                                            [ mac LLADDR ]
>                                            [ host_uuid HOST_UUID ]
>                                            [ client_uuid CLIENT_UUID ]
>                                            [ client_name CLIENT_NAME ] ] ] ]
> 
> 
> I took some liberties and s/SR-IOV/IOV in the code comments around the
> ndo_set_vf_* cmds as they can apply to both SR-IOV and non-SR-IOV adapters,
> as long as there is a PF:VF parent:child relationship.

For enic case, which do you expect to use for net_dev and VF index?  Would
this be VF + index== 0 (meaning the degenerate case you described last
time where PF==VF)?

> A port-profile is used to configure/enable the network port backing the VF, not
> to configure the host-facing side of the VF.

How shall we do the lldpad case?

> Signed-off-by: Scott Feldman <scofeldm@cisco.com>
> Signed-off-by: Roopa Prabhu<roprabhu@cisco.com>
> ---
>  include/linux/if_link.h   |   15 +++++++++++++--
>  include/linux/netdevice.h |   11 ++++++++++-
>  net/core/rtnetlink.c      |   20 ++++++++++++++++++++
>  3 files changed, 43 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/if_link.h b/include/linux/if_link.h
> index cfd420b..2c5cc65 100644
> --- a/include/linux/if_link.h
> +++ b/include/linux/if_link.h
> @@ -110,12 +110,13 @@ enum {
>  #define IFLA_LINKINFO IFLA_LINKINFO
>  	IFLA_NET_NS_PID,
>  	IFLA_IFALIAS,
> -	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
> +	IFLA_NUM_VF,		/* Number of VFs if device is IOV PF */
>  	IFLA_VF_MAC,		/* Hardware queue specific attributes */
>  	IFLA_VF_VLAN,
>  	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
>  	IFLA_VFINFO,
>  	IFLA_STATS64,
> +	IFLA_VF_PORT_PROFILE,
>  	__IFLA_MAX
>  };
>  
> @@ -234,7 +235,7 @@ enum macvlan_mode {
>  	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
>  };
>  
> -/* SR-IOV virtual function managment section */
> +/* IOV virtual function managment section */
>  
>  struct ifla_vf_mac {
>  	__u32 vf;
> @@ -259,4 +260,14 @@ struct ifla_vf_info {
>  	__u32 qos;
>  	__u32 tx_rate;
>  };
> +
> +struct ifla_vf_port_profile {
> +	__u32 vf;
> +	__u8 port_profile[64];
> +	__u8 mac[32];
> +	__u8 host_uuid[64]; /* e.g. "CEEFD3B1-9E11-11DE-BDFD-000BAB01C0FB" */
> +	__u8 client_uuid[64];
> +	__u8 client_name[64]; /* e.g. "vm0-eth1" */
> +};
> +
>  #endif /* _LINUX_IF_LINK_H */
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 3c5ed5f..26dd4cb 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -690,10 +690,13 @@ struct netdev_rx_queue {
>   *
>   * void (*ndo_poll_controller)(struct net_device *dev);
>   *
> - *	SR-IOV management functions.
> + *	IOV management functions.
>   * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
>   * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
>   * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
> + * int (*ndo_set_vf_port_profile)(struct net_device *dev, int vf,
> + *				  u8 *port_profile, u8 *mac, u8 *host_uuid,
> + *				  u8 *client_uuid, u8 *client_name);
>   * int (*ndo_get_vf_config)(struct net_device *dev,
>   *			    int vf, struct ifla_vf_info *ivf);
>   */
> @@ -741,6 +744,12 @@ struct net_device_ops {
>  						   int queue, u16 vlan, u8 qos);
>  	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
>  						      int vf, int rate);
> +	int			(*ndo_set_vf_port_profile)(
> +					struct net_device *dev, int vf,
> +					u8 *port_profile, u8 *mac,
> +					u8 *host_uuid,
> +					u8 *client_uuid,
> +					u8 *client_name);
>  	int			(*ndo_get_vf_config)(struct net_device *dev,
>  						     int vf,
>  						     struct ifla_vf_info *ivf);
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 78c8598..7268e8e 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -824,6 +824,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
>  				    .len = sizeof(struct ifla_vf_vlan) },
>  	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
>  				    .len = sizeof(struct ifla_vf_tx_rate) },
> +	[IFLA_VF_PORT_PROFILE]	= { .type = NLA_BINARY,
> +				    .len = sizeof(struct ifla_vf_port_profile)},
>  };
>  EXPORT_SYMBOL(ifla_policy);
>  
> @@ -1028,6 +1030,24 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
>  	}
>  	err = 0;
>  
> +	if (tb[IFLA_VF_PORT_PROFILE]) {
> +		struct ifla_vf_port_profile *ivp;
> +		ivp = nla_data(tb[IFLA_VF_PORT_PROFILE]);
> +		err = -EOPNOTSUPP;
> +		if (ops->ndo_set_vf_port_profile)
> +			ivp->port_profile[sizeof(ivp->port_profile)-1] = 0;
> +			ivp->host_uuid[sizeof(ivp->host_uuid)-1] = 0;
> +			ivp->client_uuid[sizeof(ivp->client_uuid)-1] = 0;
> +			ivp->client_name[sizeof(ivp->client_name)-1] = 0;

Seems a little unusual to modify the buffer, add a kernel internal structure
that can be passed to ndo callback (where buffer lens can be knonw)?

> +			err = ops->ndo_set_vf_port_profile(dev, ivp->vf,
> +				ivp->port_profile, ivp->mac, ivp->host_uuid,
> +				ivp->client_uuid, ivp->client_name);
> +		if (err < 0)
> +			goto errout;
> +		modified = 1;
> +	}
> +	err = 0;
> +
>  errout:
>  	if (err < 0 && modified && net_ratelimit())
>  		printk(KERN_WARNING "A link change request failed with "
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox