[PATCH 0/3] Staging: Improvments to Octeon Ethernet driver.

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/3] Staging: Improvments to Octeon Ethernet driver.
@ 2010-01-07  0:55 David Daney
  2010-01-07  0:57 ` [PATCH 1/3] MIPS: Octeon: Fix EIO handling David Daney
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: David Daney @ 2010-01-07  0:55 UTC (permalink / raw)
  To: Ralf Baechle, linux-mips, Netdev, Greg Kroah-Hartman

These patches attempt to cleanup and improve the octeon-ethernet
driver in staging.  The first patch fixes up octeon interrupt handling
in preparation for the following patches.  The second and third
patches convert to NAPI and enable scatter-gather as well is cleaning
up some ugly garbage.

Since Octeon is a MIPS based SOC, we might want to merge the entire
series via Ralf's linux-mips.org tree.

I will reply with the three patches.

David Daney (3):
   MIPS: Octeon: Fix EIO handling.
   Staging: Octeon Ethernet: Clean up and convert to NAPI.
   Staging: Octeon Ethernet: Enable scatter-gather.

  arch/mips/cavium-octeon/octeon-irq.c      |   40 +++-
  drivers/staging/octeon/Kconfig            |    1 +
  drivers/staging/octeon/ethernet-defines.h |   28 ---
  drivers/staging/octeon/ethernet-mem.c     |   89 +++-----
  drivers/staging/octeon/ethernet-rx.c      |  377 
++++++++++++++++------------
  drivers/staging/octeon/ethernet-rx.h      |   25 ++-
  drivers/staging/octeon/ethernet-tx.c      |  330 ++++++++++++-------------
  drivers/staging/octeon/ethernet-tx.h      |   27 +--
  drivers/staging/octeon/ethernet.c         |  157 +++++--------
  drivers/staging/octeon/octeon-ethernet.h  |   48 +---
  10 files changed, 532 insertions(+), 590 deletions(-)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] MIPS: Octeon: Fix EIO handling.
  2010-01-07  0:55 [PATCH 0/3] Staging: Improvments to Octeon Ethernet driver David Daney
@ 2010-01-07  0:57 ` David Daney
  2010-01-07  0:57 ` [PATCH 2/3] Staging: Octeon Ethernet: Clean up and convert to NAPI David Daney
  2010-01-07  0:57 ` [PATCH 3/3] Staging: Octeon Ethernet: Enable scatter-gather David Daney
  2 siblings, 0 replies; 5+ messages in thread
From: David Daney @ 2010-01-07  0:57 UTC (permalink / raw)
  To: ralf, linux-mips, netdev, gregkh; +Cc: David Daney

If an interrupt handler disables interrupts, the EOI function will
just reenable them.  This will put us in an endless loop when the
upcoming Ethernet driver patches are applied.

Only reenable the interrupt on EOI if it is not IRQ_DISABLED.  This
requires that the EIO function be separate from the ENABLE function.
We also rename the ACK functions to correspond with their function.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/mips/cavium-octeon/octeon-irq.c |   40 ++++++++++++++++++++++++++++-----
 1 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 6f2acf0..1460d08 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -193,7 +193,7 @@ static void octeon_irq_ciu0_enable_v2(unsigned int irq)
  * Disable the irq on the current core for chips that have the EN*_W1{S,C}
  * registers.
  */
-static void octeon_irq_ciu0_disable_v2(unsigned int irq)
+static void octeon_irq_ciu0_ack_v2(unsigned int irq)
 {
 	int index = cvmx_get_core_num() * 2;
 	u64 mask = 1ull << (irq - OCTEON_IRQ_WORKQ0);
@@ -202,6 +202,20 @@ static void octeon_irq_ciu0_disable_v2(unsigned int irq)
 }
 
 /*
+ * Enable the irq on the current core for chips that have the EN*_W1{S,C}
+ * registers.
+ */
+static void octeon_irq_ciu0_eoi_v2(unsigned int irq)
+{
+	struct irq_desc *desc = irq_desc + irq;
+	int index = cvmx_get_core_num() * 2;
+	u64 mask = 1ull << (irq - OCTEON_IRQ_WORKQ0);
+
+	if ((desc->status & IRQ_DISABLED) == 0)
+		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
+}
+
+/*
  * Disable the irq on the all cores for chips that have the EN*_W1{S,C}
  * registers.
  */
@@ -272,8 +286,8 @@ static struct irq_chip octeon_irq_chip_ciu0_v2 = {
 	.name = "CIU0",
 	.enable = octeon_irq_ciu0_enable_v2,
 	.disable = octeon_irq_ciu0_disable_all_v2,
-	.ack = octeon_irq_ciu0_disable_v2,
-	.eoi = octeon_irq_ciu0_enable_v2,
+	.ack = octeon_irq_ciu0_ack_v2,
+	.eoi = octeon_irq_ciu0_eoi_v2,
 #ifdef CONFIG_SMP
 	.set_affinity = octeon_irq_ciu0_set_affinity_v2,
 #endif
@@ -374,7 +388,7 @@ static void octeon_irq_ciu1_enable_v2(unsigned int irq)
  * Disable the irq on the current core for chips that have the EN*_W1{S,C}
  * registers.
  */
-static void octeon_irq_ciu1_disable_v2(unsigned int irq)
+static void octeon_irq_ciu1_ack_v2(unsigned int irq)
 {
 	int index = cvmx_get_core_num() * 2 + 1;
 	u64 mask = 1ull << (irq - OCTEON_IRQ_WDOG0);
@@ -383,6 +397,20 @@ static void octeon_irq_ciu1_disable_v2(unsigned int irq)
 }
 
 /*
+ * Enable the irq on the current core for chips that have the EN*_W1{S,C}
+ * registers.
+ */
+static void octeon_irq_ciu1_eoi_v2(unsigned int irq)
+{
+	struct irq_desc *desc = irq_desc + irq;
+	int index = cvmx_get_core_num() * 2 + 1;
+	u64 mask = 1ull << (irq - OCTEON_IRQ_WDOG0);
+
+	if ((desc->status & IRQ_DISABLED) == 0)
+		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
+}
+
+/*
  * Disable the irq on the all cores for chips that have the EN*_W1{S,C}
  * registers.
  */
@@ -455,8 +483,8 @@ static struct irq_chip octeon_irq_chip_ciu1_v2 = {
 	.name = "CIU0",
 	.enable = octeon_irq_ciu1_enable_v2,
 	.disable = octeon_irq_ciu1_disable_all_v2,
-	.ack = octeon_irq_ciu1_disable_v2,
-	.eoi = octeon_irq_ciu1_enable_v2,
+	.ack = octeon_irq_ciu1_ack_v2,
+	.eoi = octeon_irq_ciu1_eoi_v2,
 #ifdef CONFIG_SMP
 	.set_affinity = octeon_irq_ciu1_set_affinity_v2,
 #endif
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] Staging: Octeon Ethernet: Clean up and convert to NAPI.
  2010-01-07  0:55 [PATCH 0/3] Staging: Improvments to Octeon Ethernet driver David Daney
  2010-01-07  0:57 ` [PATCH 1/3] MIPS: Octeon: Fix EIO handling David Daney
@ 2010-01-07  0:57 ` David Daney
  2010-01-07  1:01   ` David Miller
  2010-01-07  0:57 ` [PATCH 3/3] Staging: Octeon Ethernet: Enable scatter-gather David Daney
  2 siblings, 1 reply; 5+ messages in thread
From: David Daney @ 2010-01-07  0:57 UTC (permalink / raw)
  To: ralf, linux-mips, netdev, gregkh; +Cc: David Daney

Convert the driver to be a reasonably well behaved NAPI citizen.  Also
clean up memory allocation and tx buffer accounding code.  Remove some
configuration #defines and other unused code that are not applicable
to the in-tree driver.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 drivers/staging/octeon/Kconfig            |    1 +
 drivers/staging/octeon/ethernet-defines.h |   28 ---
 drivers/staging/octeon/ethernet-mem.c     |   89 +++-----
 drivers/staging/octeon/ethernet-rx.c      |  377 ++++++++++++++++------------
 drivers/staging/octeon/ethernet-rx.h      |   25 ++-
 drivers/staging/octeon/ethernet-tx.c      |  273 +++++++++------------
 drivers/staging/octeon/ethernet-tx.h      |   27 +--
 drivers/staging/octeon/ethernet.c         |  150 +++++-------
 drivers/staging/octeon/octeon-ethernet.h  |   48 +---
 9 files changed, 443 insertions(+), 575 deletions(-)

diff --git a/drivers/staging/octeon/Kconfig b/drivers/staging/octeon/Kconfig
index 638ad6b..579b8f1 100644
--- a/drivers/staging/octeon/Kconfig
+++ b/drivers/staging/octeon/Kconfig
@@ -3,6 +3,7 @@ config OCTEON_ETHERNET
 	depends on CPU_CAVIUM_OCTEON
 	select PHYLIB
 	select MDIO_OCTEON
+	select HIGH_RES_TIMERS
 	help
 	  This driver supports the builtin ethernet ports on Cavium
 	  Networks' products in the Octeon family. This driver supports the
diff --git a/drivers/staging/octeon/ethernet-defines.h b/drivers/staging/octeon/ethernet-defines.h
index f13131b..9c4910e 100644
--- a/drivers/staging/octeon/ethernet-defines.h
+++ b/drivers/staging/octeon/ethernet-defines.h
@@ -41,17 +41,10 @@
  *      Tells the driver to populate the packet buffers with kernel skbuffs.
  *      This allows the driver to receive packets without copying them. It also
  *      means that 32bit userspace can't access the packet buffers.
- *  USE_32BIT_SHARED
- *      This define tells the driver to allocate memory for buffers from the
- *      32bit sahred region instead of the kernel memory space.
  *  USE_HW_TCPUDP_CHECKSUM
  *      Controls if the Octeon TCP/UDP checksum engine is used for packet
  *      output. If this is zero, the kernel will perform the checksum in
  *      software.
- *  USE_MULTICORE_RECEIVE
- *      Process receive interrupts on multiple cores. This spreads the network
- *      load across the first 8 processors. If ths is zero, only one core
- *      processes incomming packets.
  *  USE_ASYNC_IOBDMA
  *      Use asynchronous IO access to hardware. This uses Octeon's asynchronous
  *      IOBDMAs to issue IO accesses without stalling. Set this to zero
@@ -75,29 +68,15 @@
 #define CONFIG_CAVIUM_RESERVE32 0
 #endif
 
-#if CONFIG_CAVIUM_RESERVE32
-#define USE_32BIT_SHARED            1
-#define USE_SKBUFFS_IN_HW           0
-#define REUSE_SKBUFFS_WITHOUT_FREE  0
-#else
-#define USE_32BIT_SHARED            0
 #define USE_SKBUFFS_IN_HW           1
 #ifdef CONFIG_NETFILTER
 #define REUSE_SKBUFFS_WITHOUT_FREE  0
 #else
 #define REUSE_SKBUFFS_WITHOUT_FREE  1
 #endif
-#endif
-
-/* Max interrupts per second per core */
-#define INTERRUPT_LIMIT             10000
 
-/* Don't limit the number of interrupts */
-/*#define INTERRUPT_LIMIT             0     */
 #define USE_HW_TCPUDP_CHECKSUM      1
 
-#define USE_MULTICORE_RECEIVE       1
-
 /* Enable Random Early Dropping under load */
 #define USE_RED                     1
 #define USE_ASYNC_IOBDMA            (CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0)
@@ -115,17 +94,10 @@
 /* Use this to not have FPA frees control L2 */
 /*#define DONT_WRITEBACK(x)         0   */
 
-/* Maximum number of packets to process per interrupt. */
-#define MAX_RX_PACKETS 120
 /* Maximum number of SKBs to try to free per xmit packet. */
 #define MAX_SKB_TO_FREE 10
 #define MAX_OUT_QUEUE_DEPTH 1000
 
-#ifndef CONFIG_SMP
-#undef USE_MULTICORE_RECEIVE
-#define USE_MULTICORE_RECEIVE 0
-#endif
-
 #define IP_PROTOCOL_TCP             6
 #define IP_PROTOCOL_UDP             0x11
 
diff --git a/drivers/staging/octeon/ethernet-mem.c b/drivers/staging/octeon/ethernet-mem.c
index b595903..b59cd16 100644
--- a/drivers/staging/octeon/ethernet-mem.c
+++ b/drivers/staging/octeon/ethernet-mem.c
@@ -26,8 +26,6 @@
 **********************************************************************/
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <linux/mii.h>
-#include <net/dst.h>
 
 #include <asm/octeon/octeon.h>
 
@@ -47,7 +45,7 @@ static int cvm_oct_fill_hw_skbuff(int pool, int size, int elements)
 	int freed = elements;
 	while (freed) {
 
-		struct sk_buff *skb = dev_alloc_skb(size + 128);
+		struct sk_buff *skb = dev_alloc_skb(size + 256);
 		if (unlikely(skb == NULL)) {
 			pr_warning
 			    ("Failed to allocate skb for hardware pool %d\n",
@@ -55,7 +53,7 @@ static int cvm_oct_fill_hw_skbuff(int pool, int size, int elements)
 			break;
 		}
 
-		skb_reserve(skb, 128 - (((unsigned long)skb->data) & 0x7f));
+		skb_reserve(skb, 256 - (((unsigned long)skb->data) & 0x7f));
 		*(struct sk_buff **)(skb->data - sizeof(void *)) = skb;
 		cvmx_fpa_free(skb->data, pool, DONT_WRITEBACK(size / 128));
 		freed--;
@@ -107,42 +105,15 @@ static int cvm_oct_fill_hw_memory(int pool, int size, int elements)
 	char *memory;
 	int freed = elements;
 
-	if (USE_32BIT_SHARED) {
-		extern uint64_t octeon_reserve32_memory;
-
-		memory =
-		    cvmx_bootmem_alloc_range(elements * size, 128,
-					     octeon_reserve32_memory,
-					     octeon_reserve32_memory +
-					     (CONFIG_CAVIUM_RESERVE32 << 20) -
-					     1);
-		if (memory == NULL)
-			panic("Unable to allocate %u bytes for FPA pool %d\n",
-			      elements * size, pool);
-
-		pr_notice("Memory range %p - %p reserved for "
-			  "hardware\n", memory,
-			  memory + elements * size - 1);
-
-		while (freed) {
-			cvmx_fpa_free(memory, pool, 0);
-			memory += size;
-			freed--;
-		}
-	} else {
-		while (freed) {
-			/* We need to force alignment to 128 bytes here */
-			memory = kmalloc(size + 127, GFP_ATOMIC);
-			if (unlikely(memory == NULL)) {
-				pr_warning("Unable to allocate %u bytes for "
-					   "FPA pool %d\n",
-				     elements * size, pool);
-				break;
-			}
-			memory = (char *)(((unsigned long)memory + 127) & -128);
-			cvmx_fpa_free(memory, pool, 0);
-			freed--;
+	while (freed) {
+		memory = kmalloc(size, GFP_ATOMIC);
+		if (unlikely(memory == NULL)) {
+			pr_warning("Unable to allocate %u bytes for FPA pool %d\n",
+				elements * size, pool);
+			break;
 		}
+		cvmx_fpa_free(memory, pool, 0);
+		freed--;
 	}
 	return elements - freed;
 }
@@ -156,33 +127,29 @@ static int cvm_oct_fill_hw_memory(int pool, int size, int elements)
  */
 static void cvm_oct_free_hw_memory(int pool, int size, int elements)
 {
-	if (USE_32BIT_SHARED) {
-		pr_warning("Warning: 32 shared memory is not freeable\n");
-	} else {
-		char *memory;
-		do {
-			memory = cvmx_fpa_alloc(pool);
-			if (memory) {
-				elements--;
-				kfree(phys_to_virt(cvmx_ptr_to_phys(memory)));
-			}
-		} while (memory);
+	char *memory;
+	do {
+		memory = cvmx_fpa_alloc(pool);
+		if (memory) {
+			elements--;
+			kfree(phys_to_virt(cvmx_ptr_to_phys(memory)));
+		}
+	} while (memory);
 
-		if (elements < 0)
-			pr_warning("Freeing of pool %u had too many "
-				   "buffers (%d)\n",
-			       pool, elements);
-		else if (elements > 0)
-			pr_warning("Warning: Freeing of pool %u is "
-				"missing %d buffers\n",
-			     pool, elements);
-	}
+	if (elements < 0)
+		pr_warning("Freeing of pool %u had too many "
+			"buffers (%d)\n",
+			pool, elements);
+	else if (elements > 0)
+		pr_warning("Warning: Freeing of pool %u is "
+			"missing %d buffers\n",
+			pool, elements);
 }
 
 int cvm_oct_mem_fill_fpa(int pool, int size, int elements)
 {
 	int freed;
-	if (USE_SKBUFFS_IN_HW)
+	if (USE_SKBUFFS_IN_HW && pool == CVMX_FPA_PACKET_POOL)
 		freed = cvm_oct_fill_hw_skbuff(pool, size, elements);
 	else
 		freed = cvm_oct_fill_hw_memory(pool, size, elements);
@@ -191,7 +158,7 @@ int cvm_oct_mem_fill_fpa(int pool, int size, int elements)
 
 void cvm_oct_mem_empty_fpa(int pool, int size, int elements)
 {
-	if (USE_SKBUFFS_IN_HW)
+	if (USE_SKBUFFS_IN_HW && pool == CVMX_FPA_PACKET_POOL)
 		cvm_oct_free_hw_skbuff(pool, size, elements);
 	else
 		cvm_oct_free_hw_memory(pool, size, elements);
diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 1b237b7..20906c6 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2007 Cavium Networks
+ * Copyright (c) 2003-2010 Cavium Networks
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -27,16 +27,14 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/cache.h>
+#include <linux/cpumask.h>
 #include <linux/netdevice.h>
 #include <linux/init.h>
 #include <linux/etherdevice.h>
 #include <linux/ip.h>
 #include <linux/string.h>
 #include <linux/prefetch.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
+#include <linux/smp.h>
 #include <net/dst.h>
 #ifdef CONFIG_XFRM
 #include <linux/xfrm.h>
@@ -48,8 +46,9 @@
 #include <asm/octeon/octeon.h>
 
 #include "ethernet-defines.h"
-#include "octeon-ethernet.h"
 #include "ethernet-mem.h"
+#include "ethernet-rx.h"
+#include "octeon-ethernet.h"
 #include "ethernet-util.h"
 
 #include "cvmx-helper.h"
@@ -61,56 +60,82 @@
 
 #include "cvmx-gmxx-defs.h"
 
-struct cvm_tasklet_wrapper {
-	struct tasklet_struct t;
-};
+struct cvm_napi_wrapper {
+	struct napi_struct napi;
+} ____cacheline_aligned_in_smp;
 
-/*
- * Aligning the tasklet_struct on cachline boundries seems to decrease
- * throughput even though in theory it would reduce contantion on the
- * cache lines containing the locks.
- */
+static struct cvm_napi_wrapper cvm_oct_napi[NR_CPUS] __cacheline_aligned_in_smp;
 
-static struct cvm_tasklet_wrapper cvm_oct_tasklet[NR_CPUS];
+struct cvm_oct_core_state {
+	int baseline_cores;
+	/*
+	 * The number of additional cores that could be processing
+	 * input packtes.
+	 */
+	atomic_t available_cores;
+	cpumask_t cpu_state;
+} ____cacheline_aligned_in_smp;
 
-/**
- * Interrupt handler. The interrupt occurs whenever the POW
- * transitions from 0->1 packets in our group.
- *
- * @cpl:
- * @dev_id:
- * @regs:
- * Returns
- */
-irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id)
+static struct cvm_oct_core_state core_state __cacheline_aligned_in_smp;
+
+static void cvm_oct_enable_napi(void *_)
 {
-	/* Acknowledge the interrupt */
-	if (INTERRUPT_LIMIT)
-		cvmx_write_csr(CVMX_POW_WQ_INT, 1 << pow_receive_group);
-	else
-		cvmx_write_csr(CVMX_POW_WQ_INT, 0x10001 << pow_receive_group);
-	preempt_disable();
-	tasklet_schedule(&cvm_oct_tasklet[smp_processor_id()].t);
-	preempt_enable();
-	return IRQ_HANDLED;
+	int cpu = smp_processor_id();
+	napi_schedule(&cvm_oct_napi[cpu].napi);
+}
+
+static void cvm_oct_enable_one_cpu(void)
+{
+	int v;
+	int cpu;
+
+	/* Check to see if more CPUs are available for receive processing... */
+	v = atomic_sub_if_positive(1, &core_state.available_cores);
+	if (v < 0)
+		return;
+
+	/* ... if a CPU is available, Turn on NAPI polling for that CPU.  */
+	for_each_online_cpu(cpu) {
+		if (!cpu_test_and_set(cpu, core_state.cpu_state)) {
+			v = smp_call_function_single(cpu, cvm_oct_enable_napi,
+						     NULL, 0);
+			if (v)
+				panic("Can't enable NAPI.");
+			break;
+		}
+	}
+}
+
+static void cvm_oct_no_more_work(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * CPU zero is special.  It always has the irq enabled when
+	 * waiting for incoming packets.
+	 */
+	if (cpu == 0) {
+		enable_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group);
+		return;
+	}
+
+	cpu_clear(cpu, core_state.cpu_state);
+	atomic_add(1, &core_state.available_cores);
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
 /**
- * This is called when the kernel needs to manually poll the
- * device. For Octeon, this is simply calling the interrupt
- * handler. We actually poll all the devices, not just the
- * one supplied.
+ * Interrupt handler. The interrupt occurs whenever the POW
+ * has packets in our group.
  *
- * @dev:    Device to poll. Unused
  */
-void cvm_oct_poll_controller(struct net_device *dev)
+static irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id)
 {
-	preempt_disable();
-	tasklet_schedule(&cvm_oct_tasklet[smp_processor_id()].t);
-	preempt_enable();
+	/* Disable the IRQ and start napi_poll. */
+	disable_irq_nosync(OCTEON_IRQ_WORKQ0 + pow_receive_group);
+	cvm_oct_enable_napi(NULL);
+
+	return IRQ_HANDLED;
 }
-#endif
 
 /**
  * This is called on receive errors, and determines if the packet
@@ -199,19 +224,19 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work)
 }
 
 /**
- * Tasklet function that is scheduled on a core when an interrupt occurs.
+ * The NAPI poll function.
  *
- * @unused:
+ * @napi: The NAPI instance, or null if called from cvm_oct_poll_controller
+ * @budget: Maximum number of packets to receive.
  */
-void cvm_oct_tasklet_rx(unsigned long unused)
+static int cvm_oct_napi_poll(struct napi_struct *napi, int budget)
 {
-	const int coreid = cvmx_get_core_num();
-	uint64_t old_group_mask;
-	uint64_t old_scratch;
-	int rx_count = 0;
-	int number_to_free;
-	int num_freed;
-	int packet_not_copied;
+	const int	coreid = cvmx_get_core_num();
+	uint64_t	old_group_mask;
+	uint64_t	old_scratch;
+	int		rx_count = 0;
+	int		did_work_request = 0;
+	int		packet_not_copied;
 
 	/* Prefetch cvm_oct_device since we know we need it soon */
 	prefetch(cvm_oct_device);
@@ -227,59 +252,63 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 	cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid),
 		       (old_group_mask & ~0xFFFFull) | 1 << pow_receive_group);
 
-	if (USE_ASYNC_IOBDMA)
+	if (USE_ASYNC_IOBDMA) {
 		cvmx_pow_work_request_async(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
+		did_work_request = 1;
+	}
 
-	while (1) {
+	while (rx_count < budget) {
 		struct sk_buff *skb = NULL;
+		struct sk_buff **pskb = NULL;
 		int skb_in_hw;
 		cvmx_wqe_t *work;
 
-		if (USE_ASYNC_IOBDMA) {
+		if (USE_ASYNC_IOBDMA && did_work_request)
 			work = cvmx_pow_work_response_async(CVMX_SCR_SCRATCH);
-		} else {
-			if ((INTERRUPT_LIMIT == 0)
-			    || likely(rx_count < MAX_RX_PACKETS))
-				work =
-				    cvmx_pow_work_request_sync
-				    (CVMX_POW_NO_WAIT);
-			else
-				work = NULL;
-		}
+		else
+			work = cvmx_pow_work_request_sync(CVMX_POW_NO_WAIT);
+
 		prefetch(work);
-		if (work == NULL)
+		did_work_request = 0;
+		if (work == NULL) {
+			union cvmx_pow_wq_int wq_int;
+			wq_int.u64 = 0;
+			wq_int.s.iq_dis = 1 << pow_receive_group;
+			wq_int.s.wq_int = 1 << pow_receive_group;
+			cvmx_write_csr(CVMX_POW_WQ_INT, wq_int.u64);
 			break;
+		}
+		pskb = (struct sk_buff **)(cvm_oct_get_buffer_ptr(work->packet_ptr) - sizeof(void *));
+		prefetch(pskb);
 
-		/*
-		 * Limit each core to processing MAX_RX_PACKETS
-		 * packets without a break.  This way the RX can't
-		 * starve the TX task.
-		 */
-		if (USE_ASYNC_IOBDMA) {
-
-			if ((INTERRUPT_LIMIT == 0)
-			    || likely(rx_count < MAX_RX_PACKETS))
-				cvmx_pow_work_request_async_nocheck
-				    (CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
-			else {
-				cvmx_scratch_write64(CVMX_SCR_SCRATCH,
-						     0x8000000000000000ull);
-				cvmx_pow_tag_sw_null_nocheck();
-			}
+		if (USE_ASYNC_IOBDMA && rx_count < (budget - 1)) {
+			cvmx_pow_work_request_async_nocheck(CVMX_SCR_SCRATCH, CVMX_POW_NO_WAIT);
+			did_work_request = 1;
+		}
+
+		if (rx_count == 0) {
+			/*
+			 * First time through, see if there is enough
+			 * work waiting to merit waking another
+			 * CPU.
+			 */
+			union cvmx_pow_wq_int_cntx counts;
+			int backlog;
+			int cores_in_use = core_state.baseline_cores - atomic_read(&core_state.available_cores);
+			counts.u64 = cvmx_read_csr(CVMX_POW_WQ_INT_CNTX(pow_receive_group));
+			backlog = counts.s.iq_cnt + counts.s.ds_cnt;
+			if (backlog > budget * cores_in_use && napi != NULL)
+				cvm_oct_enable_one_cpu();
 		}
 
 		skb_in_hw = USE_SKBUFFS_IN_HW && work->word2.s.bufs == 1;
 		if (likely(skb_in_hw)) {
-			skb =
-			    *(struct sk_buff
-			      **)(cvm_oct_get_buffer_ptr(work->packet_ptr) -
-				  sizeof(void *));
+			skb = *pskb;
 			prefetch(&skb->head);
 			prefetch(&skb->len);
 		}
 		prefetch(cvm_oct_device[work->ipprt]);
 
-		rx_count++;
 		/* Immediately throw away all packets with receive errors */
 		if (unlikely(work->word2.snoip.rcv_error)) {
 			if (cvm_oct_check_rcv_error(work))
@@ -292,39 +321,27 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 		 * buffer.
 		 */
 		if (likely(skb_in_hw)) {
-			/*
-			 * This calculation was changed in case the
-			 * skb header is using a different address
-			 * aliasing type than the buffer. It doesn't
-			 * make any differnece now, but the new one is
-			 * more correct.
-			 */
-			skb->data =
-			    skb->head + work->packet_ptr.s.addr -
-			    cvmx_ptr_to_phys(skb->head);
+			skb->data = skb->head + work->packet_ptr.s.addr - cvmx_ptr_to_phys(skb->head);
 			prefetch(skb->data);
 			skb->len = work->len;
 			skb_set_tail_pointer(skb, skb->len);
 			packet_not_copied = 1;
 		} else {
-
 			/*
 			 * We have to copy the packet. First allocate
 			 * an skbuff for it.
 			 */
 			skb = dev_alloc_skb(work->len);
 			if (!skb) {
-				DEBUGPRINT("Port %d failed to allocate "
-					   "skbuff, packet dropped\n",
-				     work->ipprt);
+				DEBUGPRINT("Port %d failed to allocate skbuff, packet dropped\n",
+					   work->ipprt);
 				cvm_oct_free_work(work);
 				continue;
 			}
 
 			/*
 			 * Check if we've received a packet that was
-			 * entirely stored in the work entry. This is
-			 * untested.
+			 * entirely stored in the work entry.
 			 */
 			if (unlikely(work->word2.s.bufs == 0)) {
 				uint8_t *ptr = work->packet_data;
@@ -343,15 +360,13 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 				/* No packet buffers to free */
 			} else {
 				int segments = work->word2.s.bufs;
-				union cvmx_buf_ptr segment_ptr =
-					work->packet_ptr;
+				union cvmx_buf_ptr segment_ptr = work->packet_ptr;
 				int len = work->len;
 
 				while (segments--) {
 					union cvmx_buf_ptr next_ptr =
-					    *(union cvmx_buf_ptr *)
-					    cvmx_phys_to_ptr(segment_ptr.s.
-							     addr - 8);
+					    *(union cvmx_buf_ptr *)cvmx_phys_to_ptr(segment_ptr.s.addr - 8);
+
 			/*
 			 * Octeon Errata PKI-100: The segment size is
 			 * wrong. Until it is fixed, calculate the
@@ -361,22 +376,17 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 			 * one: int segment_size =
 			 * segment_ptr.s.size;
 			 */
-					int segment_size =
-					    CVMX_FPA_PACKET_POOL_SIZE -
-					    (segment_ptr.s.addr -
-					     (((segment_ptr.s.addr >> 7) -
-					       segment_ptr.s.back) << 7));
-					/* Don't copy more than what is left
-					   in the packet */
+					int segment_size = CVMX_FPA_PACKET_POOL_SIZE - (segment_ptr.s.addr - (((segment_ptr.s.addr >> 7) - segment_ptr.s.back) << 7));
+					/*
+					 * Don't copy more than what
+					 * is left in the packet.
+					 */
 					if (segment_size > len)
 						segment_size = len;
 					/* Copy the data into the packet */
 					memcpy(skb_put(skb, segment_size),
-					       cvmx_phys_to_ptr(segment_ptr.s.
-								addr),
+					       cvmx_phys_to_ptr(segment_ptr.s.addr),
 					       segment_size);
-					/* Reduce the amount of bytes left
-					   to copy */
 					len -= segment_size;
 					segment_ptr = next_ptr;
 				}
@@ -389,16 +399,15 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 			struct net_device *dev = cvm_oct_device[work->ipprt];
 			struct octeon_ethernet *priv = netdev_priv(dev);
 
-			/* Only accept packets for devices
-			   that are currently up */
+			/*
+			 * Only accept packets for devices that are
+			 * currently up.
+			 */
 			if (likely(dev->flags & IFF_UP)) {
 				skb->protocol = eth_type_trans(skb, dev);
 				skb->dev = dev;
 
-				if (unlikely
-				    (work->word2.s.not_IP
-				     || work->word2.s.IP_exc
-				     || work->word2.s.L4_error))
+				if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc || work->word2.s.L4_error))
 					skb->ip_summed = CHECKSUM_NONE;
 				else
 					skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -413,16 +422,15 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 					atomic_add(skb->len, (atomic_t *)&priv->stats.rx_bytes);
 #endif
 				}
+
 				netif_receive_skb(skb);
+				rx_count++;
 			} else {
+				/* Drop any packet received for a device that isn't up */
 				/*
-				 * Drop any packet received for a
-				 * device that isn't up.
-				 */
-				/*
-				   DEBUGPRINT("%s: Device not up, packet dropped\n",
-				   dev->name);
-				 */
+				DEBUGPRINT("%s: Device not up, packet dropped\n",
+					   dev->name);
+				*/
 #ifdef CONFIG_64BIT
 				atomic64_add(1, (atomic64_t *)&priv->stats.rx_dropped);
 #else
@@ -435,9 +443,8 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 			 * Drop any packet received for a device that
 			 * doesn't exist.
 			 */
-			DEBUGPRINT("Port %d not controlled by Linux, packet "
-				   "dropped\n",
-			     work->ipprt);
+			DEBUGPRINT("Port %d not controlled by Linux, packet dropped\n",
+				   work->ipprt);
 			dev_kfree_skb_irq(skb);
 		}
 		/*
@@ -459,47 +466,93 @@ void cvm_oct_tasklet_rx(unsigned long unused)
 			cvm_oct_free_work(work);
 		}
 	}
-
 	/* Restore the original POW group mask */
 	cvmx_write_csr(CVMX_POW_PP_GRP_MSKX(coreid), old_group_mask);
 	if (USE_ASYNC_IOBDMA) {
 		/* Restore the scratch area */
 		cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
 	}
+	cvm_oct_rx_refill_pool(0);
 
-	if (USE_SKBUFFS_IN_HW) {
-		/* Refill the packet buffer pool */
-		number_to_free =
-		    cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
-
-		if (number_to_free > 0) {
-			cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
-					      -number_to_free);
-			num_freed =
-			    cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL,
-						 CVMX_FPA_PACKET_POOL_SIZE,
-						 number_to_free);
-			if (num_freed != number_to_free) {
-				cvmx_fau_atomic_add32
-				    (FAU_NUM_PACKET_BUFFERS_TO_FREE,
-				     number_to_free - num_freed);
-			}
-		}
+	if (rx_count < budget && napi != NULL) {
+		/* No more work */
+		napi_complete(napi);
+		cvm_oct_no_more_work();
 	}
+	return rx_count;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/**
+ * This is called when the kernel needs to manually poll the
+ * device.
+ *
+ * @dev:    Device to poll. Unused
+ */
+void cvm_oct_poll_controller(struct net_device *dev)
+{
+	cvm_oct_napi_poll(NULL, 16);
 }
+#endif
 
 void cvm_oct_rx_initialize(void)
 {
 	int i;
-	/* Initialize all of the tasklets */
-	for (i = 0; i < NR_CPUS; i++)
-		tasklet_init(&cvm_oct_tasklet[i].t, cvm_oct_tasklet_rx, 0);
+	struct net_device *dev_for_napi = NULL;
+	union cvmx_pow_wq_int_thrx int_thr;
+	union cvmx_pow_wq_int_pc int_pc;
+
+	for (i = 0; i < TOTAL_NUMBER_OF_PORTS; i++) {
+		if (cvm_oct_device[i]) {
+			dev_for_napi = cvm_oct_device[i];
+			break;
+		}
+	}
+
+	if (NULL == dev_for_napi)
+		panic("No net_devices were allocated.");
+
+	if (max_rx_cpus > 1  && max_rx_cpus < num_online_cpus())
+		atomic_set(&core_state.available_cores, max_rx_cpus);
+	else
+		atomic_set(&core_state.available_cores, num_online_cpus());
+	core_state.baseline_cores = atomic_read(&core_state.available_cores);
+
+	core_state.cpu_state = CPU_MASK_NONE;
+	for_each_possible_cpu(i) {
+		netif_napi_add(dev_for_napi, &cvm_oct_napi[i].napi,
+			       cvm_oct_napi_poll, rx_napi_weight);
+		napi_enable(&cvm_oct_napi[i].napi);
+	}
+	/* Register an IRQ hander for to receive POW interrupts */
+	i = request_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group,
+			cvm_oct_do_interrupt, 0, "Ethernet", cvm_oct_device);
+
+	if (i)
+		panic("Could not acquire Ethernet IRQ %d\n",
+		      OCTEON_IRQ_WORKQ0 + pow_receive_group);
+
+	disable_irq_nosync(OCTEON_IRQ_WORKQ0 + pow_receive_group);
+
+	int_thr.u64 = 0;
+	int_thr.s.tc_en = 1;
+	int_thr.s.tc_thr = 1;
+	/* Enable POW interrupt when our port has at least one packet */
+	cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group), int_thr.u64);
+
+	int_pc.u64 = 0;
+	int_pc.s.pc_thr = 5;
+	cvmx_write_csr(CVMX_POW_WQ_INT_PC, int_pc.u64);
+
+
+	/* Scheduld NAPI now.  This will indirectly enable interrupts. */
+	cvm_oct_enable_one_cpu();
 }
 
 void cvm_oct_rx_shutdown(void)
 {
 	int i;
-	/* Shutdown all of the tasklets */
-	for (i = 0; i < NR_CPUS; i++)
-		tasklet_kill(&cvm_oct_tasklet[i].t);
+	/* Shutdown all of the NAPIs */
+	for_each_possible_cpu(i)
+		netif_napi_del(&cvm_oct_napi[i].napi);
 }
diff --git a/drivers/staging/octeon/ethernet-rx.h b/drivers/staging/octeon/ethernet-rx.h
index a9b72b8..a0743b8 100644
--- a/drivers/staging/octeon/ethernet-rx.h
+++ b/drivers/staging/octeon/ethernet-rx.h
@@ -24,10 +24,29 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium Networks for more information
 *********************************************************************/
+#include "cvmx-fau.h"
 
-irqreturn_t cvm_oct_do_interrupt(int cpl, void *dev_id);
 void cvm_oct_poll_controller(struct net_device *dev);
-void cvm_oct_tasklet_rx(unsigned long unused);
-
 void cvm_oct_rx_initialize(void);
 void cvm_oct_rx_shutdown(void);
+
+static inline void cvm_oct_rx_refill_pool(int fill_threshold)
+{
+	int number_to_free;
+	int num_freed;
+	/* Refill the packet buffer pool */
+	number_to_free =
+		cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
+
+	if (number_to_free > fill_threshold) {
+		cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
+				      -number_to_free);
+		num_freed = cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL,
+						 CVMX_FPA_PACKET_POOL_SIZE,
+						 number_to_free);
+		if (num_freed != number_to_free) {
+			cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE,
+					number_to_free - num_freed);
+		}
+	}
+}
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index 5352941..28e2de6 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -4,7 +4,7 @@
  * Contact: support@caviumnetworks.com
  * This file is part of the OCTEON SDK
  *
- * Copyright (c) 2003-2007 Cavium Networks
+ * Copyright (c) 2003-2010 Cavium Networks
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -31,10 +31,6 @@
 #include <linux/etherdevice.h>
 #include <linux/ip.h>
 #include <linux/string.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
 #include <net/dst.h>
 #ifdef CONFIG_XFRM
 #include <linux/xfrm.h>
@@ -68,6 +64,49 @@
 #define GET_SKBUFF_QOS(skb) 0
 #endif
 
+
+static inline int32_t cvm_oct_adjust_skb_to_free(int32_t skb_to_free, int fau)
+{
+	int32_t undo;
+	undo = skb_to_free > 0 ? MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
+	if (undo > 0)
+		cvmx_fau_atomic_add32(fau, -undo);
+	skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ? MAX_SKB_TO_FREE : -skb_to_free;
+	return skb_to_free;
+}
+
+void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv)
+{
+	int32_t skb_to_free;
+	int qos, queues_per_port;
+	queues_per_port = cvmx_pko_get_num_queues(priv->port);
+	/* Drain any pending packets in the free list */
+	for (qos = 0; qos < queues_per_port; qos++) {
+		if (skb_queue_len(&priv->tx_free_list[qos]) == 0)
+			continue;
+		skb_to_free = cvmx_fau_fetch_and_add32(priv->fau+qos*4, MAX_SKB_TO_FREE);
+		skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau+qos*4);
+
+		while (skb_to_free > 0) {
+			dev_kfree_skb_any(skb_dequeue(&priv->tx_free_list[qos]));
+			skb_to_free--;
+		}
+	}
+}
+
+enum hrtimer_restart cvm_oct_restart_tx(struct hrtimer *timer)
+{
+	struct octeon_ethernet *priv = container_of(timer, struct octeon_ethernet, tx_restart_timer);
+	struct net_device *dev = cvm_oct_device[priv->port];
+
+	cvm_oct_free_tx_skbs(priv);
+
+	if (netif_queue_stopped(dev))
+		netif_wake_queue(dev);
+
+	return HRTIMER_NORESTART;
+}
+
 /**
  * Packet transmit
  *
@@ -81,13 +120,13 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	union cvmx_buf_ptr hw_buffer;
 	uint64_t old_scratch;
 	uint64_t old_scratch2;
-	int dropped;
 	int qos;
-	int queue_it_up;
+	enum {QUEUE_CORE, QUEUE_HW, QUEUE_DROP} queue_type;
 	struct octeon_ethernet *priv = netdev_priv(dev);
+	struct sk_buff *to_free_list;
 	int32_t skb_to_free;
-	int32_t undo;
 	int32_t buffers_to_free;
+	unsigned long flags;
 #if REUSE_SKBUFFS_WITHOUT_FREE
 	unsigned char *fpa_head;
 #endif
@@ -98,9 +137,6 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	prefetch(priv);
 
-	/* Start off assuming no drop */
-	dropped = 0;
-
 	/*
 	 * The check on CVMX_PKO_QUEUES_PER_PORT_* is designed to
 	 * completely remove "qos" in the event neither interface
@@ -190,7 +226,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * shown a 25% increase in performance under some loads.
 	 */
 #if REUSE_SKBUFFS_WITHOUT_FREE
-	fpa_head = skb->head + 128 - ((unsigned long)skb->head & 0x7f);
+	fpa_head = skb->head + 256 - ((unsigned long)skb->head & 0x7f);
 	if (unlikely(skb->data < fpa_head)) {
 		/*
 		 * printk("TX buffer beginning can't meet FPA
@@ -251,7 +287,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	pko_command.s.reg0 = 0;
 	pko_command.s.dontfree = 0;
 
-	hw_buffer.s.back = (skb->data - fpa_head) >> 7;
+	hw_buffer.s.back = ((unsigned long)skb->data >> 7) - ((unsigned long)fpa_head >> 7);
 	*(struct sk_buff **)(fpa_head - sizeof(void *)) = skb;
 
 	/*
@@ -272,9 +308,9 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb->tc_verd = 0;
 #endif /* CONFIG_NET_CLS_ACT */
 #endif /* CONFIG_NET_SCHED */
+#endif /* REUSE_SKBUFFS_WITHOUT_FREE */
 
 dont_put_skbuff_in_hw:
-#endif /* REUSE_SKBUFFS_WITHOUT_FREE */
 
 	/* Check if we can use the hardware checksumming */
 	if (USE_HW_TCPUDP_CHECKSUM && (skb->protocol == htons(ETH_P_IP)) &&
@@ -299,18 +335,7 @@ dont_put_skbuff_in_hw:
 		    cvmx_fau_fetch_and_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, 0);
 	}
 
-	/*
-	 * We try to claim MAX_SKB_TO_FREE buffers.  If there were not
-	 * that many available, we have to un-claim (undo) any that
-	 * were in excess.  If skb_to_free is positive we will free
-	 * that many buffers.
-	 */
-	undo = skb_to_free > 0 ?
-		MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
-	if (undo > 0)
-		cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
-	skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
-		MAX_SKB_TO_FREE : -skb_to_free;
+	skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau+qos*4);
 
 	/*
 	 * If we're sending faster than the receive can free them then
@@ -321,60 +346,79 @@ dont_put_skbuff_in_hw:
 		pko_command.s.reg0 = priv->fau + qos * 4;
 	}
 
-	cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos,
-				     CVMX_PKO_LOCK_CMD_QUEUE);
+	if (pko_command.s.dontfree)
+		queue_type = QUEUE_CORE;
+	else
+		queue_type = QUEUE_HW;
+
+	spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags);
 
 	/* Drop this packet if we have too many already queued to the HW */
-	if (unlikely
-	    (skb_queue_len(&priv->tx_free_list[qos]) >= MAX_OUT_QUEUE_DEPTH)) {
-		/*
-		   DEBUGPRINT("%s: Tx dropped. Too many queued\n", dev->name);
-		 */
-		dropped = 1;
+	if (unlikely(skb_queue_len(&priv->tx_free_list[qos]) >= MAX_OUT_QUEUE_DEPTH)) {
+		if (dev->tx_queue_len != 0) {
+			netif_stop_queue(dev);
+			hrtimer_start(&priv->tx_restart_timer,
+				      priv->tx_restart_interval, HRTIMER_MODE_REL);
+		} else {
+			/* If not using normal queueing.  */
+			queue_type = QUEUE_DROP;
+			goto skip_xmit;
+		}
 	}
+
+	cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos,
+				     CVMX_PKO_LOCK_NONE);
+
 	/* Send the packet to the output queue */
-	else if (unlikely
-		 (cvmx_pko_send_packet_finish
-		  (priv->port, priv->queue + qos, pko_command, hw_buffer,
-		   CVMX_PKO_LOCK_CMD_QUEUE))) {
+	if (unlikely(cvmx_pko_send_packet_finish(priv->port,
+						 priv->queue + qos,
+						 pko_command, hw_buffer,
+						 CVMX_PKO_LOCK_NONE))) {
 		DEBUGPRINT("%s: Failed to send the packet\n", dev->name);
-		dropped = 1;
+		queue_type = QUEUE_DROP;
 	}
+skip_xmit:
+	to_free_list = NULL;
 
-	if (USE_ASYNC_IOBDMA) {
-		/* Restore the scratch area */
-		cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
-		cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2);
+	switch (queue_type) {
+	case QUEUE_DROP:
+		skb->next = to_free_list;
+		to_free_list = skb;
+		priv->stats.tx_dropped++;
+		break;
+	case QUEUE_HW:
+		cvmx_fau_atomic_add32(FAU_NUM_PACKET_BUFFERS_TO_FREE, -1);
+		break;
+	case QUEUE_CORE:
+		__skb_queue_tail(&priv->tx_free_list[qos], skb);
+		break;
+	default:
+		BUG();
 	}
 
-	queue_it_up = 0;
-	if (unlikely(dropped)) {
-		dev_kfree_skb_any(skb);
-		priv->stats.tx_dropped++;
-	} else {
-		if (USE_SKBUFFS_IN_HW) {
-			/* Put this packet on the queue to be freed later */
-			if (pko_command.s.dontfree)
-				queue_it_up = 1;
-			else
-				cvmx_fau_atomic_add32
-				    (FAU_NUM_PACKET_BUFFERS_TO_FREE, -1);
-		} else {
-			/* Put this packet on the queue to be freed later */
-			queue_it_up = 1;
-		}
+	while (skb_to_free > 0) {
+		struct sk_buff *t = __skb_dequeue(&priv->tx_free_list[qos]);
+		t->next = to_free_list;
+		to_free_list = t;
+		skb_to_free--;
 	}
 
-	if (queue_it_up) {
-		spin_lock(&priv->tx_free_list[qos].lock);
-		__skb_queue_tail(&priv->tx_free_list[qos], skb);
-		cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 0);
-		spin_unlock(&priv->tx_free_list[qos].lock);
-	} else {
-		cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
+	spin_unlock_irqrestore(&priv->tx_free_list[qos].lock, flags);
+
+	/* Do the actual freeing outside of the lock. */
+	while (to_free_list) {
+		struct sk_buff *t = to_free_list;
+		to_free_list = to_free_list->next;
+		dev_kfree_skb_any(t);
 	}
 
-	return 0;
+	if (USE_ASYNC_IOBDMA) {
+		/* Restore the scratch area */
+		cvmx_scratch_write64(CVMX_SCR_SCRATCH, old_scratch);
+		cvmx_scratch_write64(CVMX_SCR_SCRATCH + 8, old_scratch2);
+	}
+
+	return NETDEV_TX_OK;
 }
 
 /**
@@ -529,101 +573,6 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev)
 }
 
 /**
- * Transmit a work queue entry out of the ethernet port. Both
- * the work queue entry and the packet data can optionally be
- * freed. The work will be freed on error as well.
- *
- * @dev:     Device to transmit out.
- * @work_queue_entry:
- *                Work queue entry to send
- * @do_free: True if the work queue entry and packet data should be
- *                freed. If false, neither will be freed.
- * @qos:     Index into the queues for this port to transmit on. This
- *                is used to implement QoS if their are multiple queues per
- *                port. This parameter must be between 0 and the number of
- *                queues per port minus 1. Values outside of this range will
- *                be change to zero.
- *
- * Returns Zero on success, negative on failure.
- */
-int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
-			 int do_free, int qos)
-{
-	unsigned long flags;
-	union cvmx_buf_ptr hw_buffer;
-	cvmx_pko_command_word0_t pko_command;
-	int dropped;
-	struct octeon_ethernet *priv = netdev_priv(dev);
-	cvmx_wqe_t *work = work_queue_entry;
-
-	if (!(dev->flags & IFF_UP)) {
-		DEBUGPRINT("%s: Device not up\n", dev->name);
-		if (do_free)
-			cvm_oct_free_work(work);
-		return -1;
-	}
-
-	/* The check on CVMX_PKO_QUEUES_PER_PORT_* is designed to completely
-	   remove "qos" in the event neither interface supports
-	   multiple queues per port */
-	if ((CVMX_PKO_QUEUES_PER_PORT_INTERFACE0 > 1) ||
-	    (CVMX_PKO_QUEUES_PER_PORT_INTERFACE1 > 1)) {
-		if (qos <= 0)
-			qos = 0;
-		else if (qos >= cvmx_pko_get_num_queues(priv->port))
-			qos = 0;
-	} else
-		qos = 0;
-
-	/* Start off assuming no drop */
-	dropped = 0;
-
-	local_irq_save(flags);
-	cvmx_pko_send_packet_prepare(priv->port, priv->queue + qos,
-				     CVMX_PKO_LOCK_CMD_QUEUE);
-
-	/* Build the PKO buffer pointer */
-	hw_buffer.u64 = 0;
-	hw_buffer.s.addr = work->packet_ptr.s.addr;
-	hw_buffer.s.pool = CVMX_FPA_PACKET_POOL;
-	hw_buffer.s.size = CVMX_FPA_PACKET_POOL_SIZE;
-	hw_buffer.s.back = work->packet_ptr.s.back;
-
-	/* Build the PKO command */
-	pko_command.u64 = 0;
-	pko_command.s.n2 = 1;	/* Don't pollute L2 with the outgoing packet */
-	pko_command.s.dontfree = !do_free;
-	pko_command.s.segs = work->word2.s.bufs;
-	pko_command.s.total_bytes = work->len;
-
-	/* Check if we can use the hardware checksumming */
-	if (unlikely(work->word2.s.not_IP || work->word2.s.IP_exc))
-		pko_command.s.ipoffp1 = 0;
-	else
-		pko_command.s.ipoffp1 = sizeof(struct ethhdr) + 1;
-
-	/* Send the packet to the output queue */
-	if (unlikely
-	    (cvmx_pko_send_packet_finish
-	     (priv->port, priv->queue + qos, pko_command, hw_buffer,
-	      CVMX_PKO_LOCK_CMD_QUEUE))) {
-		DEBUGPRINT("%s: Failed to send the packet\n", dev->name);
-		dropped = -1;
-	}
-	local_irq_restore(flags);
-
-	if (unlikely(dropped)) {
-		if (do_free)
-			cvm_oct_free_work(work);
-		priv->stats.tx_dropped++;
-	} else if (do_free)
-		cvmx_fpa_free(work, CVMX_FPA_WQE_POOL, DONT_WRITEBACK(1));
-
-	return dropped;
-}
-EXPORT_SYMBOL(cvm_oct_transmit_qos);
-
-/**
  * This function frees all skb that are currently queued for TX.
  *
  * @dev:    Device being shutdown
diff --git a/drivers/staging/octeon/ethernet-tx.h b/drivers/staging/octeon/ethernet-tx.h
index c0bebf7..b628d8c 100644
--- a/drivers/staging/octeon/ethernet-tx.h
+++ b/drivers/staging/octeon/ethernet-tx.h
@@ -30,28 +30,5 @@ int cvm_oct_xmit_pow(struct sk_buff *skb, struct net_device *dev);
 int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
 			 int do_free, int qos);
 void cvm_oct_tx_shutdown(struct net_device *dev);
-
-/**
- * Free dead transmit skbs.
- *
- * @priv:		The driver data
- * @skb_to_free:	The number of SKBs to free (free none if negative).
- * @qos:		The queue to free from.
- * @take_lock:		If true, acquire the skb list lock.
- */
-static inline void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv,
-					int skb_to_free,
-					int qos, int take_lock)
-{
-	/* Free skbuffs not in use by the hardware.  */
-	if (skb_to_free > 0) {
-		if (take_lock)
-			spin_lock(&priv->tx_free_list[qos].lock);
-		while (skb_to_free > 0) {
-			dev_kfree_skb(__skb_dequeue(&priv->tx_free_list[qos]));
-			skb_to_free--;
-		}
-		if (take_lock)
-			spin_unlock(&priv->tx_free_list[qos].lock);
-	}
-}
+void cvm_oct_free_tx_skbs(struct octeon_ethernet *priv);
+enum hrtimer_restart cvm_oct_restart_tx(struct hrtimer *timer);
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index 4cfd4b1..2fc1e8e 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -104,12 +104,15 @@ MODULE_PARM_DESC(pow_send_list, "\n"
 	"\t\"eth2,spi3,spi7\" would cause these three devices to transmit\n"
 	"\tusing the pow_send_group.");
 
-static int disable_core_queueing = 1;
-module_param(disable_core_queueing, int, 0444);
-MODULE_PARM_DESC(disable_core_queueing, "\n"
-	"\tWhen set the networking core's tx_queue_len is set to zero.  This\n"
-	"\tallows packets to be sent without lock contention in the packet\n"
-	"\tscheduler resulting in some cases in improved throughput.\n");
+int max_rx_cpus = -1;
+module_param(max_rx_cpus, int, 0444);
+MODULE_PARM_DESC(max_rx_cpus, "\n"
+	"\t\tThe maximum number of CPUs to use for packet reception.\n"
+	"\t\tUse -1 to use all available CPUs.");
+
+int rx_napi_weight = 32;
+module_param(rx_napi_weight, int, 0444);
+MODULE_PARM_DESC(rx_napi_weight, "The NAPI WEIGHT parameter.");
 
 
 /*
@@ -139,50 +142,38 @@ struct net_device *cvm_oct_device[TOTAL_NUMBER_OF_PORTS];
  */
 static void cvm_do_timer(unsigned long arg)
 {
-	int32_t skb_to_free, undo;
-	int queues_per_port;
-	int qos;
-	struct octeon_ethernet *priv;
 	static int port;
-
-	if (port >= CVMX_PIP_NUM_INPUT_PORTS) {
+	if (port < CVMX_PIP_NUM_INPUT_PORTS) {
+		if (cvm_oct_device[port]) {
+			struct octeon_ethernet *priv = netdev_priv(cvm_oct_device[port]);
+			if (priv->poll)
+				priv->poll(cvm_oct_device[port]);
+			cvm_oct_free_tx_skbs(priv);
+			cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
+		}
+		port++;
 		/*
-		 * All ports have been polled. Start the next
-		 * iteration through the ports in one second.
+		 * Poll the next port in a 50th of a second.  This
+		 * spreads the polling of ports out a little bit.
 		 */
+		mod_timer(&cvm_oct_poll_timer, jiffies + HZ/50);
+	} else {
 		port = 0;
+		/*
+		 * FPA 0 may have been drained, try to refill it if we
+		 * need more than num_packet_buffers / 2, otherwise
+		 * normal receive processing will refill it.  If it
+		 * were drained, no packets could be received so
+		 * cvm_oct_napi_poll would never be invoked to do the
+		 * refill.
+		 */
+		cvm_oct_rx_refill_pool(num_packet_buffers / 2);
+		/*
+		 * All ports have been polled. Start the next iteration through
+		 * the ports in one second.
+		 */
 		mod_timer(&cvm_oct_poll_timer, jiffies + HZ);
-		return;
 	}
-	if (!cvm_oct_device[port])
-		goto out;
-
-	priv = netdev_priv(cvm_oct_device[port]);
-	if (priv->poll)
-		priv->poll(cvm_oct_device[port]);
-
-	queues_per_port = cvmx_pko_get_num_queues(port);
-	/* Drain any pending packets in the free list */
-	for (qos = 0; qos < queues_per_port; qos++) {
-		if (skb_queue_len(&priv->tx_free_list[qos]) == 0)
-			continue;
-		skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
-						       MAX_SKB_TO_FREE);
-		undo = skb_to_free > 0 ?
-			MAX_SKB_TO_FREE : skb_to_free + MAX_SKB_TO_FREE;
-		if (undo > 0)
-			cvmx_fau_atomic_add32(priv->fau+qos*4, -undo);
-		skb_to_free = -skb_to_free > MAX_SKB_TO_FREE ?
-			MAX_SKB_TO_FREE : -skb_to_free;
-		cvm_oct_free_tx_skbs(priv, skb_to_free, qos, 1);
-	}
-	cvm_oct_device[port]->netdev_ops->ndo_get_stats(cvm_oct_device[port]);
-
-out:
-	port++;
-	/* Poll the next port in a 50th of a second.
-	   This spreads the polling of ports out a little bit */
-	mod_timer(&cvm_oct_poll_timer, jiffies + HZ / 50);
 }
 
 /**
@@ -190,7 +181,6 @@ out:
  */
 static __init void cvm_oct_configure_common_hw(void)
 {
-	int r;
 	/* Setup the FPA */
 	cvmx_fpa_enable();
 	cvm_oct_mem_fill_fpa(CVMX_FPA_PACKET_POOL, CVMX_FPA_PACKET_POOL_SIZE,
@@ -205,21 +195,6 @@ static __init void cvm_oct_configure_common_hw(void)
 		cvmx_helper_setup_red(num_packet_buffers / 4,
 				      num_packet_buffers / 8);
 
-	/* Enable the MII interface */
-	if (!octeon_is_simulation())
-		cvmx_write_csr(CVMX_SMIX_EN(0), 1);
-
-	/* Register an IRQ hander for to receive POW interrupts */
-	r = request_irq(OCTEON_IRQ_WORKQ0 + pow_receive_group,
-			cvm_oct_do_interrupt, IRQF_SHARED, "Ethernet",
-			cvm_oct_device);
-
-#if defined(CONFIG_SMP) && 0
-	if (USE_MULTICORE_RECEIVE) {
-		irq_set_affinity(OCTEON_IRQ_WORKQ0 + pow_receive_group,
-				 cpu_online_mask);
-	}
-#endif
 }
 
 /**
@@ -649,7 +624,6 @@ static int __init cvm_oct_init_module(void)
 		cvm_oct_mac_addr_offset = 0;
 
 	cvm_oct_proc_initialize();
-	cvm_oct_rx_initialize();
 	cvm_oct_configure_common_hw();
 
 	cvmx_helper_initialize_packet_io_global();
@@ -689,7 +663,18 @@ static int __init cvm_oct_init_module(void)
 		if (dev) {
 			/* Initialize the device private structure. */
 			struct octeon_ethernet *priv = netdev_priv(dev);
-			memset(priv, 0, sizeof(struct octeon_ethernet));
+
+			hrtimer_init(&priv->tx_restart_timer,
+				     CLOCK_MONOTONIC,
+				     HRTIMER_MODE_REL);
+			priv->tx_restart_timer.function = cvm_oct_restart_tx;
+
+			/*
+			 * Default for 10GE 5000nS enough time to
+			 * transmit about 100 64byte packtes.  1GE
+			 * interfaces will get 50000nS below.
+			 */
+			priv->tx_restart_interval = ktime_set(0, 5000);
 
 			dev->netdev_ops = &cvm_oct_pow_netdev_ops;
 			priv->imode = CVMX_HELPER_INTERFACE_MODE_DISABLED;
@@ -700,19 +685,16 @@ static int __init cvm_oct_init_module(void)
 				skb_queue_head_init(&priv->tx_free_list[qos]);
 
 			if (register_netdev(dev) < 0) {
-				pr_err("Failed to register ethernet "
-					 "device for POW\n");
+				pr_err("Failed to register ethernet device for POW\n");
 				kfree(dev);
 			} else {
 				cvm_oct_device[CVMX_PIP_NUM_INPUT_PORTS] = dev;
-				pr_info("%s: POW send group %d, receive "
-					"group %d\n",
-				     dev->name, pow_send_group,
-				     pow_receive_group);
+				pr_info("%s: POW send group %d, receive group %d\n",
+					dev->name, pow_send_group,
+					pow_receive_group);
 			}
 		} else {
-			pr_err("Failed to allocate ethernet device "
-				 "for POW\n");
+			pr_err("Failed to allocate ethernet device for POW\n");
 		}
 	}
 
@@ -730,12 +712,10 @@ static int __init cvm_oct_init_module(void)
 			struct net_device *dev =
 			    alloc_etherdev(sizeof(struct octeon_ethernet));
 			if (!dev) {
-				pr_err("Failed to allocate ethernet device "
-					 "for port %d\n", port);
+				pr_err("Failed to allocate ethernet device for port %d\n",
+				       port);
 				continue;
 			}
-			if (disable_core_queueing)
-				dev->tx_queue_len = 0;
 
 			/* Initialize the device private structure. */
 			priv = netdev_priv(dev);
@@ -776,6 +756,7 @@ static int __init cvm_oct_init_module(void)
 
 			case CVMX_HELPER_INTERFACE_MODE_SGMII:
 				dev->netdev_ops = &cvm_oct_sgmii_netdev_ops;
+				priv->tx_restart_interval = ktime_set(0, 50000);
 				strcpy(dev->name, "eth%d");
 				break;
 
@@ -787,6 +768,7 @@ static int __init cvm_oct_init_module(void)
 			case CVMX_HELPER_INTERFACE_MODE_RGMII:
 			case CVMX_HELPER_INTERFACE_MODE_GMII:
 				dev->netdev_ops = &cvm_oct_rgmii_netdev_ops;
+				priv->tx_restart_interval = ktime_set(0, 50000);
 				strcpy(dev->name, "eth%d");
 				break;
 			}
@@ -807,25 +789,7 @@ static int __init cvm_oct_init_module(void)
 		}
 	}
 
-	if (INTERRUPT_LIMIT) {
-		/*
-		 * Set the POW timer rate to give an interrupt at most
-		 * INTERRUPT_LIMIT times per second.
-		 */
-		cvmx_write_csr(CVMX_POW_WQ_INT_PC,
-			       octeon_bootinfo->eclock_hz / (INTERRUPT_LIMIT *
-							     16 * 256) << 8);
-
-		/*
-		 * Enable POW timer interrupt. It will count when
-		 * there are packets available.
-		 */
-		cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group),
-			       0x1ful << 24);
-	} else {
-		/* Enable POW interrupt when our port has at least one packet */
-		cvmx_write_csr(CVMX_POW_WQ_INT_THRX(pow_receive_group), 0x1001);
-	}
+	cvm_oct_rx_initialize();
 
 	/* Enable the poll timer for checking RGMII status */
 	init_timer(&cvm_oct_poll_timer);
diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h
index 402a15b..40b6956 100644
--- a/drivers/staging/octeon/octeon-ethernet.h
+++ b/drivers/staging/octeon/octeon-ethernet.h
@@ -31,6 +31,8 @@
 #ifndef OCTEON_ETHERNET_H
 #define OCTEON_ETHERNET_H
 
+#include <linux/hrtimer.h>
+
 /**
  * This is the definition of the Ethernet driver's private
  * driver state stored in netdev_priv(dev).
@@ -57,6 +59,8 @@ struct octeon_ethernet {
 	uint64_t link_info;
 	/* Called periodically to check link status */
 	void (*poll) (struct net_device *dev);
+	struct hrtimer		tx_restart_timer;
+	ktime_t			tx_restart_interval;
 };
 
 /**
@@ -68,47 +72,6 @@ struct octeon_ethernet {
  */
 int cvm_oct_free_work(void *work_queue_entry);
 
-/**
- * Transmit a work queue entry out of the ethernet port. Both
- * the work queue entry and the packet data can optionally be
- * freed. The work will be freed on error as well.
- *
- * @dev:     Device to transmit out.
- * @work_queue_entry:
- *                Work queue entry to send
- * @do_free: True if the work queue entry and packet data should be
- *                freed. If false, neither will be freed.
- * @qos:     Index into the queues for this port to transmit on. This
- *                is used to implement QoS if their are multiple queues per
- *                port. This parameter must be between 0 and the number of
- *                queues per port minus 1. Values outside of this range will
- *                be change to zero.
- *
- * Returns Zero on success, negative on failure.
- */
-int cvm_oct_transmit_qos(struct net_device *dev, void *work_queue_entry,
-			 int do_free, int qos);
-
-/**
- * Transmit a work queue entry out of the ethernet port. Both
- * the work queue entry and the packet data can optionally be
- * freed. The work will be freed on error as well. This simply
- * wraps cvmx_oct_transmit_qos() for backwards compatability.
- *
- * @dev:     Device to transmit out.
- * @work_queue_entry:
- *                Work queue entry to send
- * @do_free: True if the work queue entry and packet data should be
- *                freed. If false, neither will be freed.
- *
- * Returns Zero on success, negative on failure.
- */
-static inline int cvm_oct_transmit(struct net_device *dev,
-				   void *work_queue_entry, int do_free)
-{
-	return cvm_oct_transmit_qos(dev, work_queue_entry, do_free, 0);
-}
-
 extern int cvm_oct_rgmii_init(struct net_device *dev);
 extern void cvm_oct_rgmii_uninit(struct net_device *dev);
 extern int cvm_oct_rgmii_open(struct net_device *dev);
@@ -135,4 +98,7 @@ extern int pow_receive_group;
 extern char pow_send_list[];
 extern struct net_device *cvm_oct_device[];
 
+extern int max_rx_cpus;
+extern int rx_napi_weight;
+
 #endif
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] Staging: Octeon Ethernet: Enable scatter-gather.
  2010-01-07  0:55 [PATCH 0/3] Staging: Improvments to Octeon Ethernet driver David Daney
  2010-01-07  0:57 ` [PATCH 1/3] MIPS: Octeon: Fix EIO handling David Daney
  2010-01-07  0:57 ` [PATCH 2/3] Staging: Octeon Ethernet: Clean up and convert to NAPI David Daney
@ 2010-01-07  0:57 ` David Daney
  2 siblings, 0 replies; 5+ messages in thread
From: David Daney @ 2010-01-07  0:57 UTC (permalink / raw)
  To: ralf, linux-mips, netdev, gregkh; +Cc: David Daney

Octeon ethernet hardware can handle NETIF_F_SG, so we enable it.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 drivers/staging/octeon/ethernet-tx.c |   57 +++++++++++++++++++++++++++++----
 drivers/staging/octeon/ethernet.c    |    7 +++-
 2 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index 28e2de6..c4007a6 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -53,6 +53,8 @@
 
 #include "cvmx-gmxx-defs.h"
 
+#define CVM_OCT_SKB_CB(skb)	((u64 *)((skb)->cb))
+
 /*
  * You can define GET_SKBUFF_QOS() to override how the skbuff output
  * function determines which output queue is used. The default
@@ -121,6 +123,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	uint64_t old_scratch;
 	uint64_t old_scratch2;
 	int qos;
+	int i;
 	enum {QUEUE_CORE, QUEUE_HW, QUEUE_DROP} queue_type;
 	struct octeon_ethernet *priv = netdev_priv(dev);
 	struct sk_buff *to_free_list;
@@ -171,6 +174,28 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	/*
+	 * We have space for 6 segment pointers, If there will be more
+	 * than that, we must linearize.
+	 */
+	if (unlikely(skb_shinfo(skb)->nr_frags > 5)) {
+		if (unlikely(__skb_linearize(skb))) {
+			queue_type = QUEUE_DROP;
+			if (USE_ASYNC_IOBDMA) {
+				/* Get the number of skbuffs in use by the hardware */
+				CVMX_SYNCIOBDMA;
+				skb_to_free = cvmx_scratch_read64(CVMX_SCR_SCRATCH);
+			} else {
+				/* Get the number of skbuffs in use by the hardware */
+				skb_to_free = cvmx_fau_fetch_and_add32(priv->fau + qos * 4,
+								       MAX_SKB_TO_FREE);
+			}
+			skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free, priv->fau + qos * 4);
+			spin_lock_irqsave(&priv->tx_free_list[qos].lock, flags);
+			goto skip_xmit;
+		}
+	}
+
+	/*
 	 * The CN3XXX series of parts has an errata (GMX-401) which
 	 * causes the GMX block to hang if a collision occurs towards
 	 * the end of a <68 byte packet. As a workaround for this, we
@@ -198,13 +223,6 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	/* Build the PKO buffer pointer */
-	hw_buffer.u64 = 0;
-	hw_buffer.s.addr = cvmx_ptr_to_phys(skb->data);
-	hw_buffer.s.pool = 0;
-	hw_buffer.s.size =
-	    (unsigned long)skb_end_pointer(skb) - (unsigned long)skb->head;
-
 	/* Build the PKO command */
 	pko_command.u64 = 0;
 	pko_command.s.n2 = 1;	/* Don't pollute L2 with the outgoing packet */
@@ -215,6 +233,31 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	pko_command.s.dontfree = 1;
 	pko_command.s.reg0 = priv->fau + qos * 4;
+
+	/* Build the PKO buffer pointer */
+	hw_buffer.u64 = 0;
+	if (skb_shinfo(skb)->nr_frags == 0) {
+		hw_buffer.s.addr = XKPHYS_TO_PHYS(skb->data);
+		hw_buffer.s.pool = 0;
+		hw_buffer.s.size = skb->len;
+	} else {
+		hw_buffer.s.addr = XKPHYS_TO_PHYS(skb->data);
+		hw_buffer.s.pool = 0;
+		hw_buffer.s.size = skb_headlen(skb);
+		CVM_OCT_SKB_CB(skb)[0] = hw_buffer.u64;
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			struct skb_frag_struct *fs = skb_shinfo(skb)->frags + i;
+			hw_buffer.s.addr = XKPHYS_TO_PHYS(page_address(fs->page) + fs->page_offset);
+			hw_buffer.s.size = fs->size;
+			CVM_OCT_SKB_CB(skb)[i + 1] = hw_buffer.u64;
+		}
+		hw_buffer.s.addr = XKPHYS_TO_PHYS(CVM_OCT_SKB_CB(skb));
+		hw_buffer.s.size = skb_shinfo(skb)->nr_frags + 1;
+		pko_command.s.segs = skb_shinfo(skb)->nr_frags + 1;
+		pko_command.s.gather = 1;
+		goto dont_put_skbuff_in_hw;
+	}
+
 	/*
 	 * See if we can put this skb in the FPA pool. Any strange
 	 * behavior from the Linux networking stack will most likely
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index 2fc1e8e..3bc9a7c 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -485,8 +485,11 @@ int cvm_oct_common_init(struct net_device *dev)
 	    && (always_use_pow || strstr(pow_send_list, dev->name)))
 		priv->queue = -1;
 
-	if (priv->queue != -1 && USE_HW_TCPUDP_CHECKSUM)
-		dev->features |= NETIF_F_IP_CSUM;
+	if (priv->queue != -1) {
+		dev->features |= NETIF_F_SG;
+		if (USE_HW_TCPUDP_CHECKSUM)
+			dev->features |= NETIF_F_IP_CSUM;
+	}
 
 	/* We do our own locking, Linux doesn't need to */
 	dev->features |= NETIF_F_LLTX;
-- 
1.6.0.6


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/3] Staging: Octeon Ethernet: Clean up and convert to NAPI.
  2010-01-07  0:57 ` [PATCH 2/3] Staging: Octeon Ethernet: Clean up and convert to NAPI David Daney
@ 2010-01-07  1:01   ` David Miller
  0 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2010-01-07  1:01 UTC (permalink / raw)
  To: ddaney; +Cc: ralf, linux-mips, netdev, gregkh

From: David Daney <ddaney@caviumnetworks.com>
Date: Wed,  6 Jan 2010 16:57:15 -0800

> Convert the driver to be a reasonably well behaved NAPI citizen.  Also
> clean up memory allocation and tx buffer accounding code.  Remove some
> configuration #defines and other unused code that are not applicable
> to the in-tree driver.
> 
> Signed-off-by: David Daney <ddaney@caviumnetworks.com>

Please seperate the cleanups and conversion to NAPI into seperate
patches.

Doing both at the same time makes your patch impossible to review.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-01-07  1:01 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-07  0:55 [PATCH 0/3] Staging: Improvments to Octeon Ethernet driver David Daney
2010-01-07  0:57 ` [PATCH 1/3] MIPS: Octeon: Fix EIO handling David Daney
2010-01-07  0:57 ` [PATCH 2/3] Staging: Octeon Ethernet: Clean up and convert to NAPI David Daney
2010-01-07  1:01   ` David Miller
2010-01-07  0:57 ` [PATCH 3/3] Staging: Octeon Ethernet: Enable scatter-gather David Daney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).