* [patch] Performance enhancement patches for SB1250 MAC
@ 2006-10-12 21:54 Yang, Steve
2006-10-12 22:13 ` Stephen Hemminger
0 siblings, 1 reply; 10+ messages in thread
From: Yang, Steve @ 2006-10-12 21:54 UTC (permalink / raw)
To: netdev
[-- Attachment #1: Type: text/plain, Size: 582 bytes --]
FYI ...
Regards,
Steve Yang
-----Original Message-----
From: Yang, Steve
Sent: Monday, September 25, 2006 3:50 PM
To: 'linux-mips@linux-mips.org'
Cc: 'ralf@linux-mips.org'; 'Mark E Mason'
Subject: Performance enhancement patches for SB1250 MAC
Hi,
The attached are two network performance enhancement patches for SB1250
MAC. The NAPI patch applies first. Followed by the "skb cache" patch.
They applied and builds cleanly on 2.6.18 kernel for the following
kernel option combinations:
SBMAC_NAPI no yes yes
SKB_CACHE no no yes
Regards,
Steve Yang
[-- Attachment #2: mips-sb1250-mac-NAPI.patch --]
[-- Type: application/octet-stream, Size: 18115 bytes --]
This patch completes the NAPI functionality for SB1250 MAC, including making
NAPI a kernel option that can be turned on or off and adds the "sbmac_poll"
routine.
Signed off by: Dan Krejsa (dan.krejsa@windriver.com)
Signed off by: Steve Yang (steve.yang@windriver.com)
Index: linux-2.6.14-cgl/drivers/net/Kconfig
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/Kconfig 2006-09-20 14:58:54.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/Kconfig 2006-09-20 17:04:31.000000000 -0700
@@ -2031,6 +2031,23 @@
tristate "SB1250 Ethernet support"
depends on SIBYTE_SB1xxx_SOC
+config SBMAC_NAPI
+ bool "SBMAC: Use Rx Polling (NAPI) (EXPERIMENTAL)"
+ depends on NET_SB1250_MAC && EXPERIMENTAL
+ help
+ NAPI is a new driver API designed to reduce CPU and interrupt load
+ when the driver is receiving lots of packets from the card. It is
+ still somewhat experimental and thus not yet enabled by default.
+
+ If your estimated Rx load is 10kpps or more, or if the card will be
+ deployed on potentially unfriendly networks (e.g. in a firewall),
+ then say Y here.
+
+ See <file:Documentation/networking/NAPI_HOWTO.txt> for more
+ information.
+
+ If in doubt, say y.
+
config R8169_VLAN
bool "VLAN support"
depends on R8169 && VLAN_8021Q
@@ -2826,3 +2843,5 @@
def_bool NETPOLL
endmenu
+
+
Index: linux-2.6.14-cgl/drivers/net/sb1250-mac.c
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/sb1250-mac.c 2006-09-20 14:59:00.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/sb1250-mac.c 2006-09-20 20:16:27.000000000 -0700
@@ -95,19 +95,28 @@
#endif
#ifdef CONFIG_SBMAC_COALESCE
-static int int_pktcnt = 0;
-module_param(int_pktcnt, int, S_IRUGO);
-MODULE_PARM_DESC(int_pktcnt, "Packet count");
-
-static int int_timeout = 0;
-module_param(int_timeout, int, S_IRUGO);
-MODULE_PARM_DESC(int_timeout, "Timeout value");
+static int int_pktcnt_tx = 255;
+module_param(int_pktcnt_tx, int, S_IRUGO);
+MODULE_PARM_DESC(int_pktcnt_tx, "TX packet count");
+
+static int int_timeout_tx = 255;
+module_param(int_timeout_tx, int, S_IRUGO);
+MODULE_PARM_DESC(int_timeout_tx, "TX timeout value");
+
+static int int_pktcnt_rx = 64;
+module_param(int_pktcnt_rx, int, S_IRUGO);
+MODULE_PARM_DESC(int_pktcnt_rx, "RX packet count");
+
+static int int_timeout_rx = 64;
+module_param(int_timeout_rx, int, S_IRUGO);
+MODULE_PARM_DESC(int_timeout_rx, "RX timeout value");
#endif
#include <asm/sibyte/sb1250.h>
#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
#include <asm/sibyte/bcm1480_regs.h>
#include <asm/sibyte/bcm1480_int.h>
+#define R_MAC_DMA_OODPKTLOST_RX R_MAC_DMA_OODPKTLOST
#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_int.h>
@@ -155,8 +164,8 @@
#define NUMCACHEBLKS(x) (((x)+SMP_CACHE_BYTES-1)/SMP_CACHE_BYTES)
-#define SBMAC_MAX_TXDESCR 32
-#define SBMAC_MAX_RXDESCR 32
+#define SBMAC_MAX_TXDESCR 256
+#define SBMAC_MAX_RXDESCR 256
#define ETHER_ALIGN 2
#define ETHER_ADDR_LEN 6
@@ -185,10 +194,10 @@
* associated with it.
*/
- struct sbmac_softc *sbdma_eth; /* back pointer to associated MAC */
- int sbdma_channel; /* channel number */
+ struct sbmac_softc *sbdma_eth; /* back pointer to associated MAC */
+ int sbdma_channel; /* channel number */
int sbdma_txdir; /* direction (1=transmit) */
- int sbdma_maxdescr; /* total # of descriptors in ring */
+ int sbdma_maxdescr; /* total # of descriptors in ring */
#ifdef CONFIG_SBMAC_COALESCE
int sbdma_int_pktcnt; /* # descriptors rx/tx before interrupt*/
int sbdma_int_timeout; /* # usec rx/tx interrupt */
@@ -197,13 +206,16 @@
volatile void __iomem *sbdma_config0; /* DMA config register 0 */
volatile void __iomem *sbdma_config1; /* DMA config register 1 */
volatile void __iomem *sbdma_dscrbase; /* Descriptor base address */
- volatile void __iomem *sbdma_dscrcnt; /* Descriptor count register */
+ volatile void __iomem *sbdma_dscrcnt; /* Descriptor count register */
volatile void __iomem *sbdma_curdscr; /* current descriptor address */
+ volatile void __iomem *sbdma_oodpktlost;/* pkt drop (rx only) */
+
/*
* This stuff is for maintenance of the ring
*/
+ sbdmadscr_t *sbdma_dscrtable_unaligned;
sbdmadscr_t *sbdma_dscrtable; /* base of descriptor table */
sbdmadscr_t *sbdma_dscrtable_end; /* end of descriptor table */
@@ -286,8 +298,8 @@
static int sbdma_add_txbuffer(sbmacdma_t *d,struct sk_buff *m);
static void sbdma_emptyring(sbmacdma_t *d);
static void sbdma_fillring(sbmacdma_t *d);
-static void sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d);
-static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d);
+static int sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d, int work_to_do, int poll);
+static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d, int poll);
static int sbmac_initctx(struct sbmac_softc *s);
static void sbmac_channel_start(struct sbmac_softc *s);
static void sbmac_channel_stop(struct sbmac_softc *s);
@@ -308,6 +320,10 @@
static void sbmac_set_rx_mode(struct net_device *dev);
static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static int sbmac_close(struct net_device *dev);
+#ifdef CONFIG_SBMAC_NAPI
+static int sbmac_poll(struct net_device *poll_dev, int *budget);
+#endif
+
static int sbmac_mii_poll(struct sbmac_softc *s,int noisy);
static int sbmac_mii_probe(struct net_device *dev);
@@ -679,6 +695,10 @@
int txrx,
int maxdescr)
{
+#ifdef CONFIG_SBMAC_COALESCE
+ int int_pktcnt, int_timeout;
+#endif
+
/*
* Save away interesting stuff in the structure
*/
@@ -728,6 +748,11 @@
s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_DSCR_CNT);
d->sbdma_curdscr =
s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_CUR_DSCRADDR);
+ if (d->sbdma_txdir)
+ d->sbdma_oodpktlost = NULL;
+ else
+ d->sbdma_oodpktlost =
+ s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_OODPKTLOST_RX);
/*
* Allocate memory for the ring
@@ -735,6 +760,7 @@
d->sbdma_maxdescr = maxdescr;
+ d->sbdma_dscrtable_unaligned =
d->sbdma_dscrtable = (sbdmadscr_t *)
kmalloc((d->sbdma_maxdescr+1)*sizeof(sbdmadscr_t), GFP_KERNEL);
@@ -765,12 +791,14 @@
* Setup Rx/Tx DMA coalescing defaults
*/
+ int_pktcnt = (txrx == DMA_TX) ? int_pktcnt_tx : int_pktcnt_rx;
if ( int_pktcnt ) {
d->sbdma_int_pktcnt = int_pktcnt;
} else {
d->sbdma_int_pktcnt = 1;
}
+ int_timeout = (txrx == DMA_TX) ? int_timeout_tx : int_timeout_rx;
if ( int_timeout ) {
d->sbdma_int_timeout = int_timeout;
} else {
@@ -1130,30 +1158,41 @@
/**********************************************************************
- * SBDMA_RX_PROCESS(sc,d)
+ * SBDMA_RX_PROCESS(sc,d,work_to_do,poll)
*
* Process "completed" receive buffers on the specified DMA channel.
- * Note that this isn't really ideal for priority channels, since
- * it processes all of the packets on a given channel before
- * returning.
*
* Input parameters:
- * sc - softc structure
- * d - DMA channel context
+ * sc - softc structure
+ * d - DMA channel context
+ * work_to_do - no. of packets to process before enabling interrupt
+ * again (for NAPI)
+ * poll - 1: using polling (for NAPI)
*
* Return value:
* nothing
********************************************************************* */
-static void sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d)
+static int sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d,
+ int work_to_do, int poll)
{
int curidx;
int hwidx;
sbdmadscr_t *dsc;
struct sk_buff *sb;
int len;
+ int work_done = 0;
+ int dropped = 0;
- for (;;) {
+ prefetch(d);
+
+again:
+ /* Check if the HW dropped any frames */
+ sc->sbm_stats.rx_fifo_errors
+ += __raw_readq(sc->sbm_rxdma.sbdma_oodpktlost) & 0xffff;
+ __raw_writeq(0, sc->sbm_rxdma.sbdma_oodpktlost);
+
+ while (work_to_do-- > 0) {
/*
* figure out where we are (as an index) and where
* the hardware is (also as an index)
@@ -1165,7 +1204,12 @@
* (sbdma_remptr) and the physical address (sbdma_curdscr CSR)
*/
- curidx = d->sbdma_remptr - d->sbdma_dscrtable;
+ dsc = d->sbdma_remptr;
+ curidx = dsc - d->sbdma_dscrtable;
+
+ prefetch(dsc);
+ prefetch(&d->sbdma_ctxtable[curidx]);
+
hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
@@ -1176,13 +1220,12 @@
*/
if (curidx == hwidx)
- break;
+ goto done;
/*
* Otherwise, get the packet's sk_buff ptr back
*/
- dsc = &(d->sbdma_dscrtable[curidx]);
sb = d->sbdma_ctxtable[curidx];
d->sbdma_ctxtable[curidx] = NULL;
@@ -1194,17 +1237,22 @@
* receive ring.
*/
- if (!(dsc->dscr_a & M_DMA_ETHRX_BAD)) {
-
+ if (likely (!(dsc->dscr_a & M_DMA_ETHRX_BAD))) {
+
/*
* Add a new buffer to replace the old one. If we fail
* to allocate a buffer, we're going to drop this
* packet and put it right back on the receive ring.
*/
- if (sbdma_add_rcvbuffer(d,NULL) == -ENOBUFS) {
- sc->sbm_stats.rx_dropped++;
+ if (unlikely (sbdma_add_rcvbuffer(d,NULL) ==
+ -ENOBUFS)) {
+ sc->sbm_stats.rx_dropped++;
sbdma_add_rcvbuffer(d,sb); /* re-add old buffer */
+ /* No point in continuing at the moment */
+ printk(KERN_ERR "dropped packet (1)\n");
+ d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+ goto done;
} else {
/*
* Set length into the packet
@@ -1216,8 +1264,6 @@
* receive ring. Pass the buffer to
* the kernel
*/
- sc->sbm_stats.rx_bytes += len;
- sc->sbm_stats.rx_packets++;
sb->protocol = eth_type_trans(sb,d->sbdma_eth->sbm_dev);
/* Check hw IPv4/TCP checksum if supported */
if (sc->rx_hw_checksum == ENABLE) {
@@ -1229,8 +1275,24 @@
sb->ip_summed = CHECKSUM_NONE;
}
}
-
- netif_rx(sb);
+ prefetch(sb->data);
+ prefetch((const void *)(((char *)sb->data)+32));
+#ifdef CONFIG_SBMAC_NAPI
+ if (poll)
+ dropped = netif_receive_skb(sb);
+ else
+#endif
+ dropped = netif_rx(sb);
+
+ if (dropped == NET_RX_DROP) {
+ sc->sbm_stats.rx_dropped++;
+ d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+ goto done;
+ }
+ else {
+ sc->sbm_stats.rx_bytes += len;
+ sc->sbm_stats.rx_packets++;
+ }
}
} else {
/*
@@ -1247,12 +1309,16 @@
*/
d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
-
+ work_done++;
+ }
+ if (!poll) {
+ work_to_do = 32;
+ goto again; /* collect fifo drop statistics again */
}
+done:
+ return work_done;
}
-
-
/**********************************************************************
* SBDMA_TX_PROCESS(sc,d)
*
@@ -1264,22 +1330,30 @@
*
* Input parameters:
* sc - softc structure
- * d - DMA channel context
+ * d - DMA channel context
+ * poll - 1: using polling (for NAPI)
*
* Return value:
* nothing
********************************************************************* */
-static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d)
+static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d, int poll)
{
int curidx;
int hwidx;
sbdmadscr_t *dsc;
struct sk_buff *sb;
unsigned long flags;
+ int packets_handled = 0;
spin_lock_irqsave(&(sc->sbm_lock), flags);
+ if (d->sbdma_remptr == d->sbdma_addptr)
+ goto end_unlock;
+
+ hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
+ d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
+
for (;;) {
/*
* figure out where we are (as an index) and where
@@ -1293,8 +1367,6 @@
*/
curidx = d->sbdma_remptr - d->sbdma_dscrtable;
- hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
- d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
/*
* If they're the same, that means we've processed all
@@ -1332,6 +1404,8 @@
d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+ packets_handled++;
+
}
/*
@@ -1340,8 +1414,10 @@
* watermark on the transmit queue.
*/
- netif_wake_queue(d->sbdma_eth->sbm_dev);
+ if (packets_handled)
+ netif_wake_queue(d->sbdma_eth->sbm_dev);
+end_unlock:
spin_unlock_irqrestore(&(sc->sbm_lock), flags);
}
@@ -1415,9 +1491,9 @@
static void sbdma_uninitctx(struct sbmacdma_s *d)
{
- if (d->sbdma_dscrtable) {
- kfree(d->sbdma_dscrtable);
- d->sbdma_dscrtable = NULL;
+ if (d->sbdma_dscrtable_unaligned) {
+ kfree(d->sbdma_dscrtable_unaligned);
+ d->sbdma_dscrtable_unaligned = d->sbdma_dscrtable = NULL;
}
if (d->sbdma_ctxtable) {
@@ -1615,15 +1691,9 @@
#endif
#ifdef CONFIG_SBMAC_COALESCE
- /*
- * Accept any TX interrupt and EOP count/timer RX interrupts on ch 0
- */
__raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_RX_CH0), s->sbm_imr);
#else
- /*
- * Accept any kind of interrupt on TX and RX DMA channel 0
- */
__raw_writeq((M_MAC_INT_CHANNEL << S_MAC_TX_CH0) |
(M_MAC_INT_CHANNEL << S_MAC_RX_CH0), s->sbm_imr);
#endif
@@ -2056,8 +2126,7 @@
uint64_t isr;
int handled = 0;
- for (;;) {
-
+#ifdef CONFIG_SBMAC_NAPI
/*
* Read the ISR (this clears the bits in the real
* register, except for counter addr)
@@ -2066,8 +2135,7 @@
isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
if (isr == 0)
- break;
-
+ return IRQ_RETVAL(0);
handled = 1;
/*
@@ -2075,12 +2143,52 @@
*/
if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
- sbdma_tx_process(sc,&(sc->sbm_txdma));
+ sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap()) {
+ if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+ __netif_schedule(dev);
+ }
+#endif
}
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ if (netif_rx_schedule_prep(dev)) {
+ __raw_writeq(0, sc->sbm_imr);
+ __netif_rx_schedule(dev);
+ /* Depend on the exit from poll to reenable intr */
+ }
+ else {
+ /* may leave some packets behind */
+ sbdma_rx_process(sc,&(sc->sbm_rxdma),
+ SBMAC_MAX_RXDESCR * 2, 0);
+ }
+ }
+#else
+ /* Non NAPI */
+ for (;;) {
+
/*
- * Receives on channel 0
+ * Read the ISR (this clears the bits in the real
+ * register, except for counter addr)
*/
+ isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
+
+ if (isr == 0)
+ break;
+
+ handled = 1;
+
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+ sbdma_tx_process(sc,&(sc->sbm_txdma),
+ SBMAC_MAX_RXDESCR * 2);
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap()) {
+ if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+ __netif_schedule(dev);
+ }
+#endif
+ }
/*
* It's important to test all the bits (or at least the
@@ -2097,16 +2205,15 @@
* EOP_SEEN here takes care of this case.
* (EOP_SEEN is part of M_MAC_INT_CHANNEL << S_MAC_RX_CH0)
*/
-
-
if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
- sbdma_rx_process(sc,&(sc->sbm_rxdma));
+ sbdma_rx_process(sc,&(sc->sbm_rxdma),
+ SBMAC_MAX_RXDESCR * 2, 0);
}
}
+#endif
return IRQ_RETVAL(handled);
}
-
/**********************************************************************
* SBMAC_START_TX(skb,dev)
*
@@ -2236,8 +2343,6 @@
}
}
-
-
#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR)
/**********************************************************************
* SBMAC_PARSE_XDIGIT(str)
@@ -2400,9 +2505,12 @@
dev->do_ioctl = sbmac_mii_ioctl;
dev->tx_timeout = sbmac_tx_timeout;
dev->watchdog_timeo = TX_TIMEOUT;
-
dev->change_mtu = sb1250_change_mtu;
-
+#ifdef CONFIG_SBMAC_NAPI
+ dev->poll = sbmac_poll;
+ dev->weight = 16;
+#endif
+
/* This is needed for PASS2 for Rx H/W checksum feature */
sbmac_set_iphdr_offset(sc);
@@ -2808,7 +2916,41 @@
return 0;
}
+#ifdef CONFIG_SBMAC_NAPI
+static int sbmac_poll(struct net_device *dev, int *budget)
+{
+ int work_to_do;
+ int work_done;
+ struct sbmac_softc *sc = netdev_priv(dev);
+
+ work_to_do = min(*budget, dev->quota);
+ work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), work_to_do, 1);
+
+ if (work_done > work_to_do)
+ printk(KERN_ERR "%s exceeded work_to_do budget=%d quota=%d work-done=%d\n",
+ sc->sbm_dev->name, *budget, dev->quota, work_done);
+
+ sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (work_done < work_to_do) {
+ netif_rx_complete(dev);
+
+#ifdef CONFIG_SBMAC_COALESCE
+ __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
+ ((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_RX_CH0),
+ sc->sbm_imr);
+#else
+ __raw_writeq((M_MAC_INT_CHANNEL << S_MAC_TX_CH0) |
+ (M_MAC_INT_CHANNEL << S_MAC_RX_CH0), sc->sbm_imr);
+#endif
+ }
+
+ return (work_done >= work_to_do);
+}
+#endif
#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR)
static void
Index: linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c
===================================================================
--- linux-2.6.14-cgl.orig/arch/mips/sibyte/bcm1480/irq.c 2006-09-20 14:58:41.000000000 -0700
+++ linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c 2006-09-20 15:58:33.000000000 -0700
@@ -144,11 +144,11 @@
unsigned long flags;
unsigned int irq_dirty;
- i = first_cpu(mask);
- if (next_cpu(i, mask) <= NR_CPUS) {
+ if (cpus_weight(mask) != 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
return;
}
+ i = first_cpu(mask);
/* Convert logical CPU to physical CPU */
cpu = cpu_logical_map(i);
[-- Attachment #3: sb1250mac_skb_cache.patch --]
[-- Type: application/octet-stream, Size: 16456 bytes --]
This patch implements a private cache of buffers for SB1250_MAC driver
for performance enhancement purpose. It should be applied on top of
the NAPI patch.
Signed off by: Dan Krejsa (dan.krejsa@windriver.com)
Signed off by: Steve Yang (steve.yang@windriver.com)
Index: linux-2.6.14-cgl/drivers/net/Kconfig
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/Kconfig 2006-09-25 10:06:29.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/Kconfig 2006-09-25 12:26:08.401453626 -0700
@@ -2048,6 +2048,35 @@
If in doubt, say y.
+config SBMAC_SKB_CACHE
+ bool "SBMAC: Enable driver local buffer caching (EXPERIMENTAL)"
+ depends on NET_SB1250_MAC && EXPERIMENTAL
+ help
+ This configuration option makes the sb1250-mac.c driver
+ maintain a private cache of 'sk_buff' buffers, shared
+ between the network devices which it manages. When a packet
+ transmit completes, the corresponding sk_buff may be placed
+ in the private cache (provided it meets certain criteria)
+ rather than being returned to the general kernel pools.
+ Durin packet reception, replenishment sk_buffs may be taken
+ from the private cache (if available) in preference to using
+ dev_alloc_skb(). This has been found to significantly
+ improve performance when forwarding under heavy load.
+
+ If in doubt, say y here.
+
+config SBMAC_SKB_CACHE_SIZE
+ int "SBMAC driver sk_buff cache size"
+ depends on SBMAC_SKB_CACHE
+ default 64
+ help
+ Number of 'sk_buff' buffers in the cache. Default to 64.
+ Up to this many buffers may be held in the cache by the driver,
+ unavailable for other use, until the driver is unloaded.
+ Only linear sk_buffs with a single reference count and of
+ sufficiently large size are candidates for recycling in the
+ private cache.
+
config R8169_VLAN
bool "VLAN support"
depends on R8169 && VLAN_8021Q
Index: linux-2.6.14-cgl/drivers/net/sb1250-mac.c
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/sb1250-mac.c 2006-09-25 10:06:29.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/sb1250-mac.c 2006-09-25 12:43:01.299764714 -0700
@@ -35,6 +35,10 @@
#include <asm/processor.h> /* Processor type for cache alignment. */
#include <asm/io.h>
#include <asm/cache.h>
+#ifdef CONFIG_SBMAC_SKB_CACHE
+#include <net/dst.h>
+#include <net/xfrm.h>
+#endif /* CONFIG_SBMAC_SKB_CACHE */
/* This is only here until the firmware is ready. In that case,
the firmware leaves the ethernet address in the register for us. */
@@ -275,10 +279,40 @@
sbmacdma_t sbm_txdma; /* for now, only use channel 0 */
sbmacdma_t sbm_rxdma;
+ uint32_t sbm_events; /* record interrupt status */
int rx_hw_checksum;
int sbe_idx;
};
+#ifdef CONFIG_SBMAC_SKB_CACHE
+
+/* Try to locally cache a TX sk_buff if its truesize is at least this big: */
+#define SBMAC_SIZE_REQ (SKB_DATA_ALIGN(16 + ENET_PACKET_SIZE + \
+ SMP_CACHE_BYTES * 2 + \
+ ETHER_ALIGN))
+#ifndef SBMAC_SKB_CACHE_SIZE
+#define SBMAC_SKB_CACHE_SIZE 64
+#endif
+
+struct sbmac_skb_cache {
+ spinlock_t lock;
+ int index;
+ struct sk_buff * buf [SBMAC_SKB_CACHE_SIZE];
+};
+
+/*
+ * Local sk_buff cache, shared by devices.
+ * Remember to write clean-up code to free any buffers in
+ * the cache when the module unloads.
+ */
+
+static struct sbmac_skb_cache sbmac_skb = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .index = 0
+};
+
+#endif /* CONFIG_SBMAC_SKB_CACHE */
+
/**********************************************************************
* Externs
@@ -882,7 +916,7 @@
d->sbdma_remptr = NULL;
}
-static void sbdma_align_skb(struct sk_buff *skb,int power2,int offset)
+static inline void sbdma_align_skb(struct sk_buff *skb,int power2,int offset)
{
unsigned long addr;
unsigned long newaddr;
@@ -917,6 +951,9 @@
sbdmadscr_t *nextdsc;
struct sk_buff *sb_new = NULL;
int pktsize = ENET_PACKET_SIZE;
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ int i;
+#endif
/* get pointer to our current place in the ring */
@@ -952,26 +989,52 @@
* DMA will trash the beginning (and ending) portions.
*/
- if (sb == NULL) {
- sb_new = dev_alloc_skb(ENET_PACKET_SIZE + SMP_CACHE_BYTES * 2 + ETHER_ALIGN);
- if (sb_new == NULL) {
- printk(KERN_INFO "%s: sk_buff allocation failed\n",
- d->sbdma_eth->sbm_dev->name);
- return -ENOBUFS;
+
+ sb_new = sb;
+
+ if (sb_new == NULL) {
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ if (likely (spin_trylock (&sbmac_skb.lock))) {
+ if (likely ((i = sbmac_skb.index - 1) >= 0)) {
+ sb_new = sbmac_skb.buf [i];
+ sbmac_skb.index = i;
+ }
+ spin_unlock (&sbmac_skb.lock);
}
+ if (sb_new == NULL) {
+#endif
+ sb_new = dev_alloc_skb (ENET_PACKET_SIZE +
+ SMP_CACHE_BYTES * 2 +
+ ETHER_ALIGN);
+ if (unlikely (sb_new == NULL)) {
+ printk(KERN_INFO
+ "%s: sk_buff allocation failed\n",
+ d->sbdma_eth->sbm_dev->name);
+ return -ENOBUFS;
+ }
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ } else {
+ /* sb_new came from the local cache, init it */
+ memset(sb_new, 0, offsetof(struct sk_buff, truesize));
+ /* sb->users is already 1, as is
+ skb_shinfo(sb)->dataref */
+
+ /* reserve 16 bytes, see __dev_alloc_skb() */
+ sb_new->data = sb_new->tail = sb_new->head + 16;
+
+ /* are these necessary given that we don't support
+ * TCP segmentation offload?
+ */
+ skb_shinfo(sb_new)->gso_size = 0;
+ skb_shinfo(sb_new)->gso_segs = 0;
+ }
+#endif
sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN);
/* mark skbuff owned by our device */
sb_new->dev = d->sbdma_eth->sbm_dev;
}
- else {
- sb_new = sb;
- /*
- * nothing special to reinit buffer, it's already aligned
- * and sb->data already points to a good place.
- */
- }
/*
* fill in the descriptor
@@ -1344,8 +1407,12 @@
sbdmadscr_t *dsc;
struct sk_buff *sb;
unsigned long flags;
- int packets_handled = 0;
-
+ int out = 0;
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ int i;
+ int size;
+#endif
+
spin_lock_irqsave(&(sc->sbm_lock), flags);
if (d->sbdma_remptr == d->sbdma_addptr)
@@ -1354,8 +1421,8 @@
hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
- for (;;) {
- /*
+ for (;; d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr)) {
+ /*
* figure out where we are (as an index) and where
* the hardware is (also as an index)
*
@@ -1371,8 +1438,8 @@
/*
* If they're the same, that means we've processed all
* of the descriptors up to (but not including) the one that
- * the hardware is working on right now.
- */
+ * the hardware was working on when we started.
+ */
if (curidx == hwidx)
break;
@@ -1394,27 +1461,95 @@
/*
* for transmits, we just free buffers.
- */
-
- dev_kfree_skb_irq(sb);
+ *
+ * CONFIG_SBMAC_SKB_CACHE:
+ * We try to keep an optimized local sk_buff cache
+ * shared between devices managed by this driver.
+ * Candidate sk_buff's are linear with enough space
+ * and reference counts of 1. Note that dataref==1
+ * also means that 'header cloning' hasn't taken place.
+ *
+ * We only access the local sk_buff cache inside the
+ * device poll routine. However, as this is shared
+ * between devices and the net_rx_action may run on
+ * multiple CPUs concurrently, we need a spin lock.
+ */
+
+ if (poll) {
+ /* Interrupts are known enabled in polling routine */
+
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ if (atomic_read (&sb->users) == 1 &&
+ atomic_read (&(skb_shinfo(sb)->dataref)) == 1 &&
+ (size = (sb->end - sb->head)) >= SBMAC_SIZE_REQ &&
+ sb->data_len == 0 &&
+ sb->fclone == SKB_FCLONE_UNAVAILABLE &&
+ /*
+ * Are these last two checks necessary
+ * given that data_len == 0 ? In any case,
+ * we don't seem to handle non-linear sk_buffs
+ * in sbdma_add_txbuffer(), so it may be
+ * overly paranoid here...
+ */
+ skb_shinfo(sb)->nr_frags == 0 &&
+ skb_shinfo(sb)->frag_list == 0) {
+ smp_rmb();
+
+ /*
+ * XXX - the next several lines are from
+ * __kfree_skb().
+ */
- /*
- * .. and advance to the next buffer.
- */
+ dst_release(sb->dst);
+#ifdef CONFIG_XFRM
+ secpath_put(sb->sp);
+#endif
+ if (sb->destructor) {
+ sb->destructor(sb);
+ }
+#ifdef CONFIG_NETFILTER
+ nf_conntrack_put(sb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put_reasm(sb->nfct_reasm);
+#endif
- d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ nf_bridge_put(sb->nf_bridge);
+#endif
+#endif
+ /* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+ sb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+ sb->tc_verd = 0;
+#endif
+#endif
- packets_handled++;
+ if (spin_trylock(&sbmac_skb.lock)) {
+ if ( (i = sbmac_skb.index) <
+ SBMAC_SKB_CACHE_SIZE) {
+ sb->truesize = size +
+ sizeof (struct sk_buff);
+ sbmac_skb.buf[i] = sb;
+ sbmac_skb.index = i + 1;
+ spin_unlock (&sbmac_skb.lock);
+ continue;
+ }
+ spin_unlock (&sbmac_skb.lock);
+ }
+ kfree_skbmem (sb);
+ } else /* SKB_CACHE */
+#endif /* CONFIG_SBMAC_SKB_CACHE */
+ dev_kfree_skb(sb);
+ } else /* Poll */
+ dev_kfree_skb_irq(sb);
}
- /*
- * Decide if we should wake up the protocol or not.
- * Other drivers seem to do this when we reach a low
- * watermark on the transmit queue.
- */
+ if ( (out = (d->sbdma_addptr - d->sbdma_remptr)) < 0)
+ out += SBMAC_MAX_TXDESCR; /* d->sbdma_maxdescr; */
- if (packets_handled)
+ if (out < SBMAC_MAX_TXDESCR / 2)
netif_wake_queue(d->sbdma_eth->sbm_dev);
end_unlock:
@@ -2127,42 +2262,53 @@
int handled = 0;
#ifdef CONFIG_SBMAC_NAPI
- /*
- * Read the ISR (this clears the bits in the real
- * register, except for counter addr)
- */
-
- isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
-
- if (isr == 0)
- return IRQ_RETVAL(0);
- handled = 1;
-
- /*
- * Transmits on channel 0
- */
-
- if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
- sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
-#ifdef CONFIG_NETPOLL_TRAP
- if (netpoll_trap()) {
- if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
- __netif_schedule(dev);
- }
-#endif
- }
-
- if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ /*
+ * Read the ISR (this clears the bits in the real
+ * register, except for counter addr)
+ */
+
+ isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
+
+ if (isr == 0)
+ return IRQ_RETVAL(0);
+
+ handled = 1;
+
+ if (isr & ((M_MAC_INT_CHANNEL << S_MAC_RX_CH0) |
+ (M_MAC_INT_CHANNEL << S_MAC_TX_CH0))) {
if (netif_rx_schedule_prep(dev)) {
+ sc->sbm_events = (uint32_t)isr;
__raw_writeq(0, sc->sbm_imr);
__netif_rx_schedule(dev);
/* Depend on the exit from poll to reenable intr */
- }
- else {
- /* may leave some packets behind */
- sbdma_rx_process(sc,&(sc->sbm_rxdma),
- SBMAC_MAX_RXDESCR * 2, 0);
- }
+ } else {
+ /* Interrupt already acknowledged, sbmac_poll()
+ * already scheduled. Do nothing. Shouldn't occur
+ * anyhow.
+ */
+#if 0
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+ sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap()) {
+ if (test_and_clear_bit(__LINK_STATE_XOFF,
+ &dev->state))
+ __netif_schedule(dev);
+ }
+#endif
+ }
+ /* Note, if the ISR is allowed to call
+ * sbdma_rx_process(), provisions have to be made
+ * when the local SKB_CACHE is used; it presently
+ * doesn't have protection against accesses from
+ * interrupt level.
+ */
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ /* may leave some packets behind */
+ sbdma_rx_process(sc,&(sc->sbm_rxdma), 32, 0);
+ }
+#endif /* 0 */
+ }
}
#else
/* Non NAPI */
@@ -2180,8 +2326,7 @@
handled = 1;
if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
- sbdma_tx_process(sc,&(sc->sbm_txdma),
- SBMAC_MAX_RXDESCR * 2);
+ sbdma_tx_process(sc, &(sc->sbm_txdma), 0);
#ifdef CONFIG_NETPOLL_TRAP
if (netpoll_trap()) {
if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
@@ -2232,8 +2377,15 @@
struct sbmac_softc *sc = netdev_priv(dev);
/* lock eth irq */
- spin_lock_irq (&sc->sbm_lock);
+ /* hard_start_xmit() is always called with interrupts enabled */
+ local_irq_disable();
+ if (unlikely (!spin_trylock(&sc->sbm_lock))) {
+ /* collision */
+ local_irq_enable();
+ return NETDEV_TX_LOCKED;
+ }
+
/*
* Put the buffer on the transmit ring. If we
* don't have room, stop the queue.
@@ -2244,14 +2396,14 @@
netif_stop_queue(dev);
spin_unlock_irq(&sc->sbm_lock);
- return 1;
+ return NETDEV_TX_BUSY;
}
dev->trans_start = jiffies;
spin_unlock_irq (&sc->sbm_lock);
- return 0;
+ return NETDEV_TX_OK;
}
/**********************************************************************
@@ -2421,7 +2573,7 @@
return 0;
}
#endif
-
+
static int sb1250_change_mtu(struct net_device *_dev, int new_mtu)
{
if (new_mtu > ENET_PACKET_SIZE)
@@ -2511,6 +2663,8 @@
dev->weight = 16;
#endif
+ dev->features |= NETIF_F_LLTX; /* hard_start_xmit handles locking */
+
/* This is needed for PASS2 for Rx H/W checksum feature */
sbmac_set_iphdr_offset(sc);
@@ -2922,33 +3076,57 @@
int work_to_do;
int work_done;
struct sbmac_softc *sc = netdev_priv(dev);
+ uint32_t events = sc->sbm_events;
+ uint32_t new_events = 0;
- work_to_do = min(*budget, dev->quota);
- work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), work_to_do, 1);
+ if (events & (uint32_t)(M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ work_to_do = dev->quota;
+ work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma),
+ work_to_do, 1);
- if (work_done > work_to_do)
- printk(KERN_ERR "%s exceeded work_to_do budget=%d quota=%d work-done=%d\n",
- sc->sbm_dev->name, *budget, dev->quota, work_done);
+#if 0
+ if (work_done > work_to_do)
+ printk(KERN_ERR "%s exceeded work_to_do budget=%d "
+ "quota=%d work-done=%d\n",
+ sc->sbm_dev->name, *budget, dev->quota,
+ work_done);
+#endif
+ *budget -= work_done;
+ dev->quota -= work_done;
+ if (work_done >= work_to_do)
+ new_events = (uint32_t)(M_MAC_INT_CHANNEL <<
+ S_MAC_RX_CH0);
+ }
- sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
+ if (events & (uint32_t)(M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+ sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
+ }
- *budget -= work_done;
- dev->quota -= work_done;
+ /*
+ * If we found no more TX or RX work to do, don't reschedule,
+ * reenable interrupts.
+ */
+ sc->sbm_events = new_events |
+ ((uint32_t)__raw_readq(sc->sbm_isr) &
+ (uint32_t) ((M_MAC_INT_CHANNEL << S_MAC_RX_CH0) |
+ (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)));
- if (work_done < work_to_do) {
+ if (sc->sbm_events == 0) {
netif_rx_complete(dev);
#ifdef CONFIG_SBMAC_COALESCE
- __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
- ((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_RX_CH0),
- sc->sbm_imr);
+ __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER)
+ << S_MAC_TX_CH0) |
+ ((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER)
+ << S_MAC_RX_CH0), sc->sbm_imr);
#else
__raw_writeq((M_MAC_INT_CHANNEL << S_MAC_TX_CH0) |
- (M_MAC_INT_CHANNEL << S_MAC_RX_CH0), sc->sbm_imr);
+ (M_MAC_INT_CHANNEL << S_MAC_RX_CH0), sc->sbm_imr);
#endif
+ return 0;
}
- return (work_done >= work_to_do);
+ return 1;
}
#endif
@@ -3072,6 +3250,20 @@
return 0;
}
+#ifdef CONFIG_SBMAC_SKB_CACHE
+static void sbmac_skb_cache_flush (void)
+{
+ struct sk_buff * sb;
+ spin_lock (&sbmac_skb.lock);
+ while (sbmac_skb.index > 0) {
+ sb = sbmac_skb.buf [--sbmac_skb.index];
+ spin_unlock (&sbmac_skb.lock); /* probably not necessary */
+ kfree_skbmem (sb);
+ spin_lock (&sbmac_skb.lock);
+ }
+ spin_unlock (&sbmac_skb.lock);
+}
+#endif /* CONFIG_SBMAC_SKB_CACHE */
static void __exit
sbmac_cleanup_module(void)
@@ -3090,6 +3282,9 @@
sbmac_uninitctx(sc);
free_netdev(dev);
}
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ sbmac_skb_cache_flush();
+#endif
}
module_init(sbmac_init_module);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] Performance enhancement patches for SB1250 MAC
2006-10-12 21:54 Yang, Steve
@ 2006-10-12 22:13 ` Stephen Hemminger
0 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2006-10-12 22:13 UTC (permalink / raw)
To: Yang, Steve; +Cc: netdev
On Thu, 12 Oct 2006 14:54:33 -0700
"Yang, Steve" <steve.yang@windriver.com> wrote:
> FYI ...
>
> Regards,
> Steve Yang
>
> -----Original Message-----
> From: Yang, Steve
> Sent: Monday, September 25, 2006 3:50 PM
> To: 'linux-mips@linux-mips.org'
> Cc: 'ralf@linux-mips.org'; 'Mark E Mason'
> Subject: Performance enhancement patches for SB1250 MAC
>
> Hi,
>
> The attached are two network performance enhancement patches for SB1250
> MAC. The NAPI patch applies first. Followed by the "skb cache" patch.
> They applied and builds cleanly on 2.6.18 kernel for the following
> kernel option combinations:
>
> SBMAC_NAPI no yes yes
> SKB_CACHE no no yes
>
> Regards,
> Steve Yang
>
NAK on the SKB_CACHE it is idea that just ends up favoring your
driver at the expense of the rest of the system. Also, there are
resource/memory starvation issues and probably other races as well.
I bet it makes your benchmark run faster, but it doesn't belong in
normal kernel
--
Stephen Hemminger <shemminger@osdl.org>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] Performance enhancement patches for SB1250 MAC
[not found] <D9055C0A0A86BD4E89AD96A89EA767DBA6AFBC@ALA-MAIL03.corp.ad.wrs.com>
@ 2006-10-13 20:24 ` Stephen Hemminger
0 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2006-10-13 20:24 UTC (permalink / raw)
To: Yang, Steve; +Cc: netdev
On Thu, 12 Oct 2006 17:45:54 -0700
"Yang, Steve" <steve.yang@windriver.com> wrote:
> Stephen,
>
> I assume the "expense" you referred to is the reserved SK cache buffers.
>
>
> 1. The SKB_CACHE does hold on to buffers which would
> otherwise be returned to the system (although the
> number it holds on to is limited and configurable).
> These buffers are only returned with certainty
> at module unload time, although with normal traffic
> most of them would be recycled pretty quick. I think
> the cache was implemented as a stack, rather than a
> FIFO, which could cause a few buffers to be held for
> quite a while under light loads.
>
> 2. SKB_CACHE, just like NAPI, is also a configurable
> option. Systems that need the performance have the
> option of turning this on, at the expense of small
> number of buffers; other systems which don't care
> much about networking performance can leave this
> option off.
>
> 3. Can you elaborate other possible issues that you
> touch upon (memory starvation/race, etc.)?
>
> Regards,
> Steve Yang
Several drivers have tried to do this in the past, but the consensus
has been that this is a bad idea.
Your code captures transmit skb's and uses them for receive. Why is
this a good idea? It is a maintenance nightmare, it breaks a lot of
the cleanup/initialization of skb's and means who ever changes something
like TSO has to go looking at some driver that is doing squirrley games.
Plus, your cache requires two extra lock round trips is seems.
The cost of alloc_skb should be trivial.
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [patch] Performance enhancement patches for SB1250 MAC
@ 2006-11-20 21:40 Yang, Steve
2006-11-20 23:35 ` Stephen Hemminger
0 siblings, 1 reply; 10+ messages in thread
From: Yang, Steve @ 2006-11-20 21:40 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev
Stephen,
Sorry for responding late.
1. I've submitted two patches, one for NAPI and the other for SKB_Cache.
They can go in individually. You've expressed objections to SKB_Cache.
What about the one for NAPI?
2. As for SKB_Cache patch, your concern about "maintenance" is valid.
However, the cost for allocating skb is not trivial for performance,
hence the patch. This is backed up by performance data measured. 20%
difference is a conservative number here.
Regards,
Steve Yang
-----Original Message-----
From: Stephen Hemminger [mailto:shemminger@osdl.org]
Sent: Friday, October 13, 2006 1:25 PM
To: Yang, Steve
Cc: netdev@vger.kernel.org
Subject: Re: [patch] Performance enhancement patches for SB1250 MAC
On Thu, 12 Oct 2006 17:45:54 -0700
"Yang, Steve" <steve.yang@windriver.com> wrote:
> Stephen,
>
> I assume the "expense" you referred to is the reserved SK cache
buffers.
>
>
> 1. The SKB_CACHE does hold on to buffers which would
> otherwise be returned to the system (although the
> number it holds on to is limited and configurable).
> These buffers are only returned with certainty
> at module unload time, although with normal traffic
> most of them would be recycled pretty quick. I think
> the cache was implemented as a stack, rather than a
> FIFO, which could cause a few buffers to be held for
> quite a while under light loads.
>
> 2. SKB_CACHE, just like NAPI, is also a configurable
> option. Systems that need the performance have the
> option of turning this on, at the expense of small
> number of buffers; other systems which don't care
> much about networking performance can leave this
> option off.
>
> 3. Can you elaborate other possible issues that you
> touch upon (memory starvation/race, etc.)?
>
> Regards,
> Steve Yang
Several drivers have tried to do this in the past, but the consensus has
been that this is a bad idea.
Your code captures transmit skb's and uses them for receive. Why is this
a good idea? It is a maintenance nightmare, it breaks a lot of the
cleanup/initialization of skb's and means who ever changes something
like TSO has to go looking at some driver that is doing squirrley games.
Plus, your cache requires two extra lock round trips is seems.
The cost of alloc_skb should be trivial.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] Performance enhancement patches for SB1250 MAC
2006-11-20 21:40 [patch] Performance enhancement patches for SB1250 MAC Yang, Steve
@ 2006-11-20 23:35 ` Stephen Hemminger
2006-11-20 23:58 ` David Miller
2006-11-21 14:26 ` Martin Michlmayr
0 siblings, 2 replies; 10+ messages in thread
From: Stephen Hemminger @ 2006-11-20 23:35 UTC (permalink / raw)
To: Yang, Steve; +Cc: netdev
On Mon, 20 Nov 2006 13:40:01 -0800
"Yang, Steve" <steve.yang@windriver.com> wrote:
> Stephen,
>
> Sorry for responding late.
>
> 1. I've submitted two patches, one for NAPI and the other for SKB_Cache.
> They can go in individually. You've expressed objections to SKB_Cache.
> What about the one for NAPI?
Yes, the NAPI one is great.
>
> 2. As for SKB_Cache patch, your concern about "maintenance" is valid.
> However, the cost for allocating skb is not trivial for performance,
> hence the patch. This is backed up by performance data measured. 20%
> difference is a conservative number here.
>
I am not the final decider on this. Ask Jeff/David? Maybe there is some
more general work that can be done to speed up skb allocation.
--
Stephen Hemminger <shemminger@osdl.org>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] Performance enhancement patches for SB1250 MAC
2006-11-20 23:35 ` Stephen Hemminger
@ 2006-11-20 23:58 ` David Miller
2006-11-21 14:26 ` Martin Michlmayr
1 sibling, 0 replies; 10+ messages in thread
From: David Miller @ 2006-11-20 23:58 UTC (permalink / raw)
To: shemminger; +Cc: steve.yang, netdev
From: Stephen Hemminger <shemminger@osdl.org>
Date: Mon, 20 Nov 2006 15:35:48 -0800
> I am not the final decider on this. Ask Jeff/David? Maybe there is some
> more general work that can be done to speed up skb allocation.
We used to have a skb_head per-cpu cache in skbuff.c but we killed
it because SLAB was nearly as good and using generic code always
trumps localized hacks like this.
I suggest we don't regress in this area.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] Performance enhancement patches for SB1250 MAC
2006-11-20 23:35 ` Stephen Hemminger
2006-11-20 23:58 ` David Miller
@ 2006-11-21 14:26 ` Martin Michlmayr
1 sibling, 0 replies; 10+ messages in thread
From: Martin Michlmayr @ 2006-11-21 14:26 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: Yang, Steve, netdev
* Stephen Hemminger <shemminger@osdl.org> [2006-11-20 15:35]:
> > 1. I've submitted two patches, one for NAPI and the other for SKB_Cache.
> > They can go in individually. You've expressed objections to SKB_Cache.
> > What about the one for NAPI?
> Yes, the NAPI one is great.
Should Steve resend the patch so it can be added or is it queued up
already? It would be nice to have this in 2.6.20.
--
Martin Michlmayr
tbm@cyrius.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [patch] Performance enhancement patches for SB1250 MAC
@ 2006-11-21 16:45 Yang, Steve
2006-11-21 19:12 ` Stephen Hemminger
0 siblings, 1 reply; 10+ messages in thread
From: Yang, Steve @ 2006-11-21 16:45 UTC (permalink / raw)
To: Martin Michlmayr, Stephen Hemminger; +Cc: netdev
[-- Attachment #1: Type: text/plain, Size: 1111 bytes --]
Martin,
I've attached the two patches with this email.
Original submission email text:
"The attached are two network performance enhancement patches for SB1250
MAC. The NAPI patch applies first. Followed by the "skb cache" patch.
They applied and builds cleanly on 2.6.18 kernel for the following
kernel option combinations:
SBMAC_NAPI no yes yes
SKB_CACHE no no yes"
Regards,
Steve Yang
-----Original Message-----
From: Martin Michlmayr [mailto:tbm@cyrius.com]
Sent: Tuesday, November 21, 2006 6:27 AM
To: Stephen Hemminger
Cc: Yang, Steve; netdev@vger.kernel.org
Subject: Re: [patch] Performance enhancement patches for SB1250 MAC
* Stephen Hemminger <shemminger@osdl.org> [2006-11-20 15:35]:
> > 1. I've submitted two patches, one for NAPI and the other for
SKB_Cache.
> > They can go in individually. You've expressed objections to
SKB_Cache.
> > What about the one for NAPI?
> Yes, the NAPI one is great.
Should Steve resend the patch so it can be added or is it queued up
already? It would be nice to have this in 2.6.20.
--
Martin Michlmayr
tbm@cyrius.com
[-- Attachment #2: mips-sb1250-mac-NAPI.patch --]
[-- Type: application/octet-stream, Size: 18115 bytes --]
This patch completes the NAPI functionality for SB1250 MAC, including making
NAPI a kernel option that can be turned on or off and adds the "sbmac_poll"
routine.
Signed off by: Dan Krejsa (dan.krejsa@windriver.com)
Signed off by: Steve Yang (steve.yang@windriver.com)
Index: linux-2.6.14-cgl/drivers/net/Kconfig
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/Kconfig 2006-09-20 14:58:54.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/Kconfig 2006-09-20 17:04:31.000000000 -0700
@@ -2031,6 +2031,23 @@
tristate "SB1250 Ethernet support"
depends on SIBYTE_SB1xxx_SOC
+config SBMAC_NAPI
+ bool "SBMAC: Use Rx Polling (NAPI) (EXPERIMENTAL)"
+ depends on NET_SB1250_MAC && EXPERIMENTAL
+ help
+ NAPI is a new driver API designed to reduce CPU and interrupt load
+ when the driver is receiving lots of packets from the card. It is
+ still somewhat experimental and thus not yet enabled by default.
+
+ If your estimated Rx load is 10kpps or more, or if the card will be
+ deployed on potentially unfriendly networks (e.g. in a firewall),
+ then say Y here.
+
+ See <file:Documentation/networking/NAPI_HOWTO.txt> for more
+ information.
+
+ If in doubt, say y.
+
config R8169_VLAN
bool "VLAN support"
depends on R8169 && VLAN_8021Q
@@ -2826,3 +2843,5 @@
def_bool NETPOLL
endmenu
+
+
Index: linux-2.6.14-cgl/drivers/net/sb1250-mac.c
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/sb1250-mac.c 2006-09-20 14:59:00.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/sb1250-mac.c 2006-09-20 20:16:27.000000000 -0700
@@ -95,19 +95,28 @@
#endif
#ifdef CONFIG_SBMAC_COALESCE
-static int int_pktcnt = 0;
-module_param(int_pktcnt, int, S_IRUGO);
-MODULE_PARM_DESC(int_pktcnt, "Packet count");
-
-static int int_timeout = 0;
-module_param(int_timeout, int, S_IRUGO);
-MODULE_PARM_DESC(int_timeout, "Timeout value");
+static int int_pktcnt_tx = 255;
+module_param(int_pktcnt_tx, int, S_IRUGO);
+MODULE_PARM_DESC(int_pktcnt_tx, "TX packet count");
+
+static int int_timeout_tx = 255;
+module_param(int_timeout_tx, int, S_IRUGO);
+MODULE_PARM_DESC(int_timeout_tx, "TX timeout value");
+
+static int int_pktcnt_rx = 64;
+module_param(int_pktcnt_rx, int, S_IRUGO);
+MODULE_PARM_DESC(int_pktcnt_rx, "RX packet count");
+
+static int int_timeout_rx = 64;
+module_param(int_timeout_rx, int, S_IRUGO);
+MODULE_PARM_DESC(int_timeout_rx, "RX timeout value");
#endif
#include <asm/sibyte/sb1250.h>
#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
#include <asm/sibyte/bcm1480_regs.h>
#include <asm/sibyte/bcm1480_int.h>
+#define R_MAC_DMA_OODPKTLOST_RX R_MAC_DMA_OODPKTLOST
#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_int.h>
@@ -155,8 +164,8 @@
#define NUMCACHEBLKS(x) (((x)+SMP_CACHE_BYTES-1)/SMP_CACHE_BYTES)
-#define SBMAC_MAX_TXDESCR 32
-#define SBMAC_MAX_RXDESCR 32
+#define SBMAC_MAX_TXDESCR 256
+#define SBMAC_MAX_RXDESCR 256
#define ETHER_ALIGN 2
#define ETHER_ADDR_LEN 6
@@ -185,10 +194,10 @@
* associated with it.
*/
- struct sbmac_softc *sbdma_eth; /* back pointer to associated MAC */
- int sbdma_channel; /* channel number */
+ struct sbmac_softc *sbdma_eth; /* back pointer to associated MAC */
+ int sbdma_channel; /* channel number */
int sbdma_txdir; /* direction (1=transmit) */
- int sbdma_maxdescr; /* total # of descriptors in ring */
+ int sbdma_maxdescr; /* total # of descriptors in ring */
#ifdef CONFIG_SBMAC_COALESCE
int sbdma_int_pktcnt; /* # descriptors rx/tx before interrupt*/
int sbdma_int_timeout; /* # usec rx/tx interrupt */
@@ -197,13 +206,16 @@
volatile void __iomem *sbdma_config0; /* DMA config register 0 */
volatile void __iomem *sbdma_config1; /* DMA config register 1 */
volatile void __iomem *sbdma_dscrbase; /* Descriptor base address */
- volatile void __iomem *sbdma_dscrcnt; /* Descriptor count register */
+ volatile void __iomem *sbdma_dscrcnt; /* Descriptor count register */
volatile void __iomem *sbdma_curdscr; /* current descriptor address */
+ volatile void __iomem *sbdma_oodpktlost;/* pkt drop (rx only) */
+
/*
* This stuff is for maintenance of the ring
*/
+ sbdmadscr_t *sbdma_dscrtable_unaligned;
sbdmadscr_t *sbdma_dscrtable; /* base of descriptor table */
sbdmadscr_t *sbdma_dscrtable_end; /* end of descriptor table */
@@ -286,8 +298,8 @@
static int sbdma_add_txbuffer(sbmacdma_t *d,struct sk_buff *m);
static void sbdma_emptyring(sbmacdma_t *d);
static void sbdma_fillring(sbmacdma_t *d);
-static void sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d);
-static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d);
+static int sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d, int work_to_do, int poll);
+static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d, int poll);
static int sbmac_initctx(struct sbmac_softc *s);
static void sbmac_channel_start(struct sbmac_softc *s);
static void sbmac_channel_stop(struct sbmac_softc *s);
@@ -308,6 +320,10 @@
static void sbmac_set_rx_mode(struct net_device *dev);
static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static int sbmac_close(struct net_device *dev);
+#ifdef CONFIG_SBMAC_NAPI
+static int sbmac_poll(struct net_device *poll_dev, int *budget);
+#endif
+
static int sbmac_mii_poll(struct sbmac_softc *s,int noisy);
static int sbmac_mii_probe(struct net_device *dev);
@@ -679,6 +695,10 @@
int txrx,
int maxdescr)
{
+#ifdef CONFIG_SBMAC_COALESCE
+ int int_pktcnt, int_timeout;
+#endif
+
/*
* Save away interesting stuff in the structure
*/
@@ -728,6 +748,11 @@
s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_DSCR_CNT);
d->sbdma_curdscr =
s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_CUR_DSCRADDR);
+ if (d->sbdma_txdir)
+ d->sbdma_oodpktlost = NULL;
+ else
+ d->sbdma_oodpktlost =
+ s->sbm_base + R_MAC_DMA_REGISTER(txrx,chan,R_MAC_DMA_OODPKTLOST_RX);
/*
* Allocate memory for the ring
@@ -735,6 +760,7 @@
d->sbdma_maxdescr = maxdescr;
+ d->sbdma_dscrtable_unaligned =
d->sbdma_dscrtable = (sbdmadscr_t *)
kmalloc((d->sbdma_maxdescr+1)*sizeof(sbdmadscr_t), GFP_KERNEL);
@@ -765,12 +791,14 @@
* Setup Rx/Tx DMA coalescing defaults
*/
+ int_pktcnt = (txrx == DMA_TX) ? int_pktcnt_tx : int_pktcnt_rx;
if ( int_pktcnt ) {
d->sbdma_int_pktcnt = int_pktcnt;
} else {
d->sbdma_int_pktcnt = 1;
}
+ int_timeout = (txrx == DMA_TX) ? int_timeout_tx : int_timeout_rx;
if ( int_timeout ) {
d->sbdma_int_timeout = int_timeout;
} else {
@@ -1130,30 +1158,41 @@
/**********************************************************************
- * SBDMA_RX_PROCESS(sc,d)
+ * SBDMA_RX_PROCESS(sc,d,work_to_do,poll)
*
* Process "completed" receive buffers on the specified DMA channel.
- * Note that this isn't really ideal for priority channels, since
- * it processes all of the packets on a given channel before
- * returning.
*
* Input parameters:
- * sc - softc structure
- * d - DMA channel context
+ * sc - softc structure
+ * d - DMA channel context
+ * work_to_do - no. of packets to process before enabling interrupt
+ * again (for NAPI)
+ * poll - 1: using polling (for NAPI)
*
* Return value:
* nothing
********************************************************************* */
-static void sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d)
+static int sbdma_rx_process(struct sbmac_softc *sc,sbmacdma_t *d,
+ int work_to_do, int poll)
{
int curidx;
int hwidx;
sbdmadscr_t *dsc;
struct sk_buff *sb;
int len;
+ int work_done = 0;
+ int dropped = 0;
- for (;;) {
+ prefetch(d);
+
+again:
+ /* Check if the HW dropped any frames */
+ sc->sbm_stats.rx_fifo_errors
+ += __raw_readq(sc->sbm_rxdma.sbdma_oodpktlost) & 0xffff;
+ __raw_writeq(0, sc->sbm_rxdma.sbdma_oodpktlost);
+
+ while (work_to_do-- > 0) {
/*
* figure out where we are (as an index) and where
* the hardware is (also as an index)
@@ -1165,7 +1204,12 @@
* (sbdma_remptr) and the physical address (sbdma_curdscr CSR)
*/
- curidx = d->sbdma_remptr - d->sbdma_dscrtable;
+ dsc = d->sbdma_remptr;
+ curidx = dsc - d->sbdma_dscrtable;
+
+ prefetch(dsc);
+ prefetch(&d->sbdma_ctxtable[curidx]);
+
hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
@@ -1176,13 +1220,12 @@
*/
if (curidx == hwidx)
- break;
+ goto done;
/*
* Otherwise, get the packet's sk_buff ptr back
*/
- dsc = &(d->sbdma_dscrtable[curidx]);
sb = d->sbdma_ctxtable[curidx];
d->sbdma_ctxtable[curidx] = NULL;
@@ -1194,17 +1237,22 @@
* receive ring.
*/
- if (!(dsc->dscr_a & M_DMA_ETHRX_BAD)) {
-
+ if (likely (!(dsc->dscr_a & M_DMA_ETHRX_BAD))) {
+
/*
* Add a new buffer to replace the old one. If we fail
* to allocate a buffer, we're going to drop this
* packet and put it right back on the receive ring.
*/
- if (sbdma_add_rcvbuffer(d,NULL) == -ENOBUFS) {
- sc->sbm_stats.rx_dropped++;
+ if (unlikely (sbdma_add_rcvbuffer(d,NULL) ==
+ -ENOBUFS)) {
+ sc->sbm_stats.rx_dropped++;
sbdma_add_rcvbuffer(d,sb); /* re-add old buffer */
+ /* No point in continuing at the moment */
+ printk(KERN_ERR "dropped packet (1)\n");
+ d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+ goto done;
} else {
/*
* Set length into the packet
@@ -1216,8 +1264,6 @@
* receive ring. Pass the buffer to
* the kernel
*/
- sc->sbm_stats.rx_bytes += len;
- sc->sbm_stats.rx_packets++;
sb->protocol = eth_type_trans(sb,d->sbdma_eth->sbm_dev);
/* Check hw IPv4/TCP checksum if supported */
if (sc->rx_hw_checksum == ENABLE) {
@@ -1229,8 +1275,24 @@
sb->ip_summed = CHECKSUM_NONE;
}
}
-
- netif_rx(sb);
+ prefetch(sb->data);
+ prefetch((const void *)(((char *)sb->data)+32));
+#ifdef CONFIG_SBMAC_NAPI
+ if (poll)
+ dropped = netif_receive_skb(sb);
+ else
+#endif
+ dropped = netif_rx(sb);
+
+ if (dropped == NET_RX_DROP) {
+ sc->sbm_stats.rx_dropped++;
+ d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+ goto done;
+ }
+ else {
+ sc->sbm_stats.rx_bytes += len;
+ sc->sbm_stats.rx_packets++;
+ }
}
} else {
/*
@@ -1247,12 +1309,16 @@
*/
d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
-
+ work_done++;
+ }
+ if (!poll) {
+ work_to_do = 32;
+ goto again; /* collect fifo drop statistics again */
}
+done:
+ return work_done;
}
-
-
/**********************************************************************
* SBDMA_TX_PROCESS(sc,d)
*
@@ -1264,22 +1330,30 @@
*
* Input parameters:
* sc - softc structure
- * d - DMA channel context
+ * d - DMA channel context
+ * poll - 1: using polling (for NAPI)
*
* Return value:
* nothing
********************************************************************* */
-static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d)
+static void sbdma_tx_process(struct sbmac_softc *sc,sbmacdma_t *d, int poll)
{
int curidx;
int hwidx;
sbdmadscr_t *dsc;
struct sk_buff *sb;
unsigned long flags;
+ int packets_handled = 0;
spin_lock_irqsave(&(sc->sbm_lock), flags);
+ if (d->sbdma_remptr == d->sbdma_addptr)
+ goto end_unlock;
+
+ hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
+ d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
+
for (;;) {
/*
* figure out where we are (as an index) and where
@@ -1293,8 +1367,6 @@
*/
curidx = d->sbdma_remptr - d->sbdma_dscrtable;
- hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
- d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
/*
* If they're the same, that means we've processed all
@@ -1332,6 +1404,8 @@
d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+ packets_handled++;
+
}
/*
@@ -1340,8 +1414,10 @@
* watermark on the transmit queue.
*/
- netif_wake_queue(d->sbdma_eth->sbm_dev);
+ if (packets_handled)
+ netif_wake_queue(d->sbdma_eth->sbm_dev);
+end_unlock:
spin_unlock_irqrestore(&(sc->sbm_lock), flags);
}
@@ -1415,9 +1491,9 @@
static void sbdma_uninitctx(struct sbmacdma_s *d)
{
- if (d->sbdma_dscrtable) {
- kfree(d->sbdma_dscrtable);
- d->sbdma_dscrtable = NULL;
+ if (d->sbdma_dscrtable_unaligned) {
+ kfree(d->sbdma_dscrtable_unaligned);
+ d->sbdma_dscrtable_unaligned = d->sbdma_dscrtable = NULL;
}
if (d->sbdma_ctxtable) {
@@ -1615,15 +1691,9 @@
#endif
#ifdef CONFIG_SBMAC_COALESCE
- /*
- * Accept any TX interrupt and EOP count/timer RX interrupts on ch 0
- */
__raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_RX_CH0), s->sbm_imr);
#else
- /*
- * Accept any kind of interrupt on TX and RX DMA channel 0
- */
__raw_writeq((M_MAC_INT_CHANNEL << S_MAC_TX_CH0) |
(M_MAC_INT_CHANNEL << S_MAC_RX_CH0), s->sbm_imr);
#endif
@@ -2056,8 +2126,7 @@
uint64_t isr;
int handled = 0;
- for (;;) {
-
+#ifdef CONFIG_SBMAC_NAPI
/*
* Read the ISR (this clears the bits in the real
* register, except for counter addr)
@@ -2066,8 +2135,7 @@
isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
if (isr == 0)
- break;
-
+ return IRQ_RETVAL(0);
handled = 1;
/*
@@ -2075,12 +2143,52 @@
*/
if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
- sbdma_tx_process(sc,&(sc->sbm_txdma));
+ sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap()) {
+ if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+ __netif_schedule(dev);
+ }
+#endif
}
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ if (netif_rx_schedule_prep(dev)) {
+ __raw_writeq(0, sc->sbm_imr);
+ __netif_rx_schedule(dev);
+ /* Depend on the exit from poll to reenable intr */
+ }
+ else {
+ /* may leave some packets behind */
+ sbdma_rx_process(sc,&(sc->sbm_rxdma),
+ SBMAC_MAX_RXDESCR * 2, 0);
+ }
+ }
+#else
+ /* Non NAPI */
+ for (;;) {
+
/*
- * Receives on channel 0
+ * Read the ISR (this clears the bits in the real
+ * register, except for counter addr)
*/
+ isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
+
+ if (isr == 0)
+ break;
+
+ handled = 1;
+
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+ sbdma_tx_process(sc,&(sc->sbm_txdma),
+ SBMAC_MAX_RXDESCR * 2);
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap()) {
+ if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+ __netif_schedule(dev);
+ }
+#endif
+ }
/*
* It's important to test all the bits (or at least the
@@ -2097,16 +2205,15 @@
* EOP_SEEN here takes care of this case.
* (EOP_SEEN is part of M_MAC_INT_CHANNEL << S_MAC_RX_CH0)
*/
-
-
if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
- sbdma_rx_process(sc,&(sc->sbm_rxdma));
+ sbdma_rx_process(sc,&(sc->sbm_rxdma),
+ SBMAC_MAX_RXDESCR * 2, 0);
}
}
+#endif
return IRQ_RETVAL(handled);
}
-
/**********************************************************************
* SBMAC_START_TX(skb,dev)
*
@@ -2236,8 +2343,6 @@
}
}
-
-
#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR)
/**********************************************************************
* SBMAC_PARSE_XDIGIT(str)
@@ -2400,9 +2505,12 @@
dev->do_ioctl = sbmac_mii_ioctl;
dev->tx_timeout = sbmac_tx_timeout;
dev->watchdog_timeo = TX_TIMEOUT;
-
dev->change_mtu = sb1250_change_mtu;
-
+#ifdef CONFIG_SBMAC_NAPI
+ dev->poll = sbmac_poll;
+ dev->weight = 16;
+#endif
+
/* This is needed for PASS2 for Rx H/W checksum feature */
sbmac_set_iphdr_offset(sc);
@@ -2808,7 +2916,41 @@
return 0;
}
+#ifdef CONFIG_SBMAC_NAPI
+static int sbmac_poll(struct net_device *dev, int *budget)
+{
+ int work_to_do;
+ int work_done;
+ struct sbmac_softc *sc = netdev_priv(dev);
+
+ work_to_do = min(*budget, dev->quota);
+ work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), work_to_do, 1);
+
+ if (work_done > work_to_do)
+ printk(KERN_ERR "%s exceeded work_to_do budget=%d quota=%d work-done=%d\n",
+ sc->sbm_dev->name, *budget, dev->quota, work_done);
+
+ sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (work_done < work_to_do) {
+ netif_rx_complete(dev);
+
+#ifdef CONFIG_SBMAC_COALESCE
+ __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
+ ((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_RX_CH0),
+ sc->sbm_imr);
+#else
+ __raw_writeq((M_MAC_INT_CHANNEL << S_MAC_TX_CH0) |
+ (M_MAC_INT_CHANNEL << S_MAC_RX_CH0), sc->sbm_imr);
+#endif
+ }
+
+ return (work_done >= work_to_do);
+}
+#endif
#if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR)
static void
Index: linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c
===================================================================
--- linux-2.6.14-cgl.orig/arch/mips/sibyte/bcm1480/irq.c 2006-09-20 14:58:41.000000000 -0700
+++ linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c 2006-09-20 15:58:33.000000000 -0700
@@ -144,11 +144,11 @@
unsigned long flags;
unsigned int irq_dirty;
- i = first_cpu(mask);
- if (next_cpu(i, mask) <= NR_CPUS) {
+ if (cpus_weight(mask) != 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
return;
}
+ i = first_cpu(mask);
/* Convert logical CPU to physical CPU */
cpu = cpu_logical_map(i);
[-- Attachment #3: sb1250mac_skb_cache.patch --]
[-- Type: application/octet-stream, Size: 16456 bytes --]
This patch implements a private cache of buffers for SB1250_MAC driver
for performance enhancement purpose. It should be applied on top of
the NAPI patch.
Signed off by: Dan Krejsa (dan.krejsa@windriver.com)
Signed off by: Steve Yang (steve.yang@windriver.com)
Index: linux-2.6.14-cgl/drivers/net/Kconfig
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/Kconfig 2006-09-25 10:06:29.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/Kconfig 2006-09-25 12:26:08.401453626 -0700
@@ -2048,6 +2048,35 @@
If in doubt, say y.
+config SBMAC_SKB_CACHE
+ bool "SBMAC: Enable driver local buffer caching (EXPERIMENTAL)"
+ depends on NET_SB1250_MAC && EXPERIMENTAL
+ help
+ This configuration option makes the sb1250-mac.c driver
+ maintain a private cache of 'sk_buff' buffers, shared
+ between the network devices which it manages. When a packet
+ transmit completes, the corresponding sk_buff may be placed
+ in the private cache (provided it meets certain criteria)
+ rather than being returned to the general kernel pools.
+ Durin packet reception, replenishment sk_buffs may be taken
+ from the private cache (if available) in preference to using
+ dev_alloc_skb(). This has been found to significantly
+ improve performance when forwarding under heavy load.
+
+ If in doubt, say y here.
+
+config SBMAC_SKB_CACHE_SIZE
+ int "SBMAC driver sk_buff cache size"
+ depends on SBMAC_SKB_CACHE
+ default 64
+ help
+ Number of 'sk_buff' buffers in the cache. Default to 64.
+ Up to this many buffers may be held in the cache by the driver,
+ unavailable for other use, until the driver is unloaded.
+ Only linear sk_buffs with a single reference count and of
+ sufficiently large size are candidates for recycling in the
+ private cache.
+
config R8169_VLAN
bool "VLAN support"
depends on R8169 && VLAN_8021Q
Index: linux-2.6.14-cgl/drivers/net/sb1250-mac.c
===================================================================
--- linux-2.6.14-cgl.orig/drivers/net/sb1250-mac.c 2006-09-25 10:06:29.000000000 -0700
+++ linux-2.6.14-cgl/drivers/net/sb1250-mac.c 2006-09-25 12:43:01.299764714 -0700
@@ -35,6 +35,10 @@
#include <asm/processor.h> /* Processor type for cache alignment. */
#include <asm/io.h>
#include <asm/cache.h>
+#ifdef CONFIG_SBMAC_SKB_CACHE
+#include <net/dst.h>
+#include <net/xfrm.h>
+#endif /* CONFIG_SBMAC_SKB_CACHE */
/* This is only here until the firmware is ready. In that case,
the firmware leaves the ethernet address in the register for us. */
@@ -275,10 +279,40 @@
sbmacdma_t sbm_txdma; /* for now, only use channel 0 */
sbmacdma_t sbm_rxdma;
+ uint32_t sbm_events; /* record interrupt status */
int rx_hw_checksum;
int sbe_idx;
};
+#ifdef CONFIG_SBMAC_SKB_CACHE
+
+/* Try to locally cache a TX sk_buff if its truesize is at least this big: */
+#define SBMAC_SIZE_REQ (SKB_DATA_ALIGN(16 + ENET_PACKET_SIZE + \
+ SMP_CACHE_BYTES * 2 + \
+ ETHER_ALIGN))
+#ifndef SBMAC_SKB_CACHE_SIZE
+#define SBMAC_SKB_CACHE_SIZE 64
+#endif
+
+struct sbmac_skb_cache {
+ spinlock_t lock;
+ int index;
+ struct sk_buff * buf [SBMAC_SKB_CACHE_SIZE];
+};
+
+/*
+ * Local sk_buff cache, shared by devices.
+ * Remember to write clean-up code to free any buffers in
+ * the cache when the module unloads.
+ */
+
+static struct sbmac_skb_cache sbmac_skb = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .index = 0
+};
+
+#endif /* CONFIG_SBMAC_SKB_CACHE */
+
/**********************************************************************
* Externs
@@ -882,7 +916,7 @@
d->sbdma_remptr = NULL;
}
-static void sbdma_align_skb(struct sk_buff *skb,int power2,int offset)
+static inline void sbdma_align_skb(struct sk_buff *skb,int power2,int offset)
{
unsigned long addr;
unsigned long newaddr;
@@ -917,6 +951,9 @@
sbdmadscr_t *nextdsc;
struct sk_buff *sb_new = NULL;
int pktsize = ENET_PACKET_SIZE;
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ int i;
+#endif
/* get pointer to our current place in the ring */
@@ -952,26 +989,52 @@
* DMA will trash the beginning (and ending) portions.
*/
- if (sb == NULL) {
- sb_new = dev_alloc_skb(ENET_PACKET_SIZE + SMP_CACHE_BYTES * 2 + ETHER_ALIGN);
- if (sb_new == NULL) {
- printk(KERN_INFO "%s: sk_buff allocation failed\n",
- d->sbdma_eth->sbm_dev->name);
- return -ENOBUFS;
+
+ sb_new = sb;
+
+ if (sb_new == NULL) {
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ if (likely (spin_trylock (&sbmac_skb.lock))) {
+ if (likely ((i = sbmac_skb.index - 1) >= 0)) {
+ sb_new = sbmac_skb.buf [i];
+ sbmac_skb.index = i;
+ }
+ spin_unlock (&sbmac_skb.lock);
}
+ if (sb_new == NULL) {
+#endif
+ sb_new = dev_alloc_skb (ENET_PACKET_SIZE +
+ SMP_CACHE_BYTES * 2 +
+ ETHER_ALIGN);
+ if (unlikely (sb_new == NULL)) {
+ printk(KERN_INFO
+ "%s: sk_buff allocation failed\n",
+ d->sbdma_eth->sbm_dev->name);
+ return -ENOBUFS;
+ }
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ } else {
+ /* sb_new came from the local cache, init it */
+ memset(sb_new, 0, offsetof(struct sk_buff, truesize));
+ /* sb->users is already 1, as is
+ skb_shinfo(sb)->dataref */
+
+ /* reserve 16 bytes, see __dev_alloc_skb() */
+ sb_new->data = sb_new->tail = sb_new->head + 16;
+
+ /* are these necessary given that we don't support
+ * TCP segmentation offload?
+ */
+ skb_shinfo(sb_new)->gso_size = 0;
+ skb_shinfo(sb_new)->gso_segs = 0;
+ }
+#endif
sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN);
/* mark skbuff owned by our device */
sb_new->dev = d->sbdma_eth->sbm_dev;
}
- else {
- sb_new = sb;
- /*
- * nothing special to reinit buffer, it's already aligned
- * and sb->data already points to a good place.
- */
- }
/*
* fill in the descriptor
@@ -1344,8 +1407,12 @@
sbdmadscr_t *dsc;
struct sk_buff *sb;
unsigned long flags;
- int packets_handled = 0;
-
+ int out = 0;
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ int i;
+ int size;
+#endif
+
spin_lock_irqsave(&(sc->sbm_lock), flags);
if (d->sbdma_remptr == d->sbdma_addptr)
@@ -1354,8 +1421,8 @@
hwidx = (int) (((__raw_readq(d->sbdma_curdscr) & M_DMA_CURDSCR_ADDR) -
d->sbdma_dscrtable_phys) / sizeof(sbdmadscr_t));
- for (;;) {
- /*
+ for (;; d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr)) {
+ /*
* figure out where we are (as an index) and where
* the hardware is (also as an index)
*
@@ -1371,8 +1438,8 @@
/*
* If they're the same, that means we've processed all
* of the descriptors up to (but not including) the one that
- * the hardware is working on right now.
- */
+ * the hardware was working on when we started.
+ */
if (curidx == hwidx)
break;
@@ -1394,27 +1461,95 @@
/*
* for transmits, we just free buffers.
- */
-
- dev_kfree_skb_irq(sb);
+ *
+ * CONFIG_SBMAC_SKB_CACHE:
+ * We try to keep an optimized local sk_buff cache
+ * shared between devices managed by this driver.
+ * Candidate sk_buff's are linear with enough space
+ * and reference counts of 1. Note that dataref==1
+ * also means that 'header cloning' hasn't taken place.
+ *
+ * We only access the local sk_buff cache inside the
+ * device poll routine. However, as this is shared
+ * between devices and the net_rx_action may run on
+ * multiple CPUs concurrently, we need a spin lock.
+ */
+
+ if (poll) {
+ /* Interrupts are known enabled in polling routine */
+
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ if (atomic_read (&sb->users) == 1 &&
+ atomic_read (&(skb_shinfo(sb)->dataref)) == 1 &&
+ (size = (sb->end - sb->head)) >= SBMAC_SIZE_REQ &&
+ sb->data_len == 0 &&
+ sb->fclone == SKB_FCLONE_UNAVAILABLE &&
+ /*
+ * Are these last two checks necessary
+ * given that data_len == 0 ? In any case,
+ * we don't seem to handle non-linear sk_buffs
+ * in sbdma_add_txbuffer(), so it may be
+ * overly paranoid here...
+ */
+ skb_shinfo(sb)->nr_frags == 0 &&
+ skb_shinfo(sb)->frag_list == 0) {
+ smp_rmb();
+
+ /*
+ * XXX - the next several lines are from
+ * __kfree_skb().
+ */
- /*
- * .. and advance to the next buffer.
- */
+ dst_release(sb->dst);
+#ifdef CONFIG_XFRM
+ secpath_put(sb->sp);
+#endif
+ if (sb->destructor) {
+ sb->destructor(sb);
+ }
+#ifdef CONFIG_NETFILTER
+ nf_conntrack_put(sb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put_reasm(sb->nfct_reasm);
+#endif
- d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ nf_bridge_put(sb->nf_bridge);
+#endif
+#endif
+ /* XXX: IS this still necessary? - JHS */
+#ifdef CONFIG_NET_SCHED
+ sb->tc_index = 0;
+#ifdef CONFIG_NET_CLS_ACT
+ sb->tc_verd = 0;
+#endif
+#endif
- packets_handled++;
+ if (spin_trylock(&sbmac_skb.lock)) {
+ if ( (i = sbmac_skb.index) <
+ SBMAC_SKB_CACHE_SIZE) {
+ sb->truesize = size +
+ sizeof (struct sk_buff);
+ sbmac_skb.buf[i] = sb;
+ sbmac_skb.index = i + 1;
+ spin_unlock (&sbmac_skb.lock);
+ continue;
+ }
+ spin_unlock (&sbmac_skb.lock);
+ }
+ kfree_skbmem (sb);
+ } else /* SKB_CACHE */
+#endif /* CONFIG_SBMAC_SKB_CACHE */
+ dev_kfree_skb(sb);
+ } else /* Poll */
+ dev_kfree_skb_irq(sb);
}
- /*
- * Decide if we should wake up the protocol or not.
- * Other drivers seem to do this when we reach a low
- * watermark on the transmit queue.
- */
+ if ( (out = (d->sbdma_addptr - d->sbdma_remptr)) < 0)
+ out += SBMAC_MAX_TXDESCR; /* d->sbdma_maxdescr; */
- if (packets_handled)
+ if (out < SBMAC_MAX_TXDESCR / 2)
netif_wake_queue(d->sbdma_eth->sbm_dev);
end_unlock:
@@ -2127,42 +2262,53 @@
int handled = 0;
#ifdef CONFIG_SBMAC_NAPI
- /*
- * Read the ISR (this clears the bits in the real
- * register, except for counter addr)
- */
-
- isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
-
- if (isr == 0)
- return IRQ_RETVAL(0);
- handled = 1;
-
- /*
- * Transmits on channel 0
- */
-
- if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
- sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
-#ifdef CONFIG_NETPOLL_TRAP
- if (netpoll_trap()) {
- if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
- __netif_schedule(dev);
- }
-#endif
- }
-
- if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ /*
+ * Read the ISR (this clears the bits in the real
+ * register, except for counter addr)
+ */
+
+ isr = __raw_readq(sc->sbm_isr) & ~M_MAC_COUNTER_ADDR;
+
+ if (isr == 0)
+ return IRQ_RETVAL(0);
+
+ handled = 1;
+
+ if (isr & ((M_MAC_INT_CHANNEL << S_MAC_RX_CH0) |
+ (M_MAC_INT_CHANNEL << S_MAC_TX_CH0))) {
if (netif_rx_schedule_prep(dev)) {
+ sc->sbm_events = (uint32_t)isr;
__raw_writeq(0, sc->sbm_imr);
__netif_rx_schedule(dev);
/* Depend on the exit from poll to reenable intr */
- }
- else {
- /* may leave some packets behind */
- sbdma_rx_process(sc,&(sc->sbm_rxdma),
- SBMAC_MAX_RXDESCR * 2, 0);
- }
+ } else {
+ /* Interrupt already acknowledged, sbmac_poll()
+ * already scheduled. Do nothing. Shouldn't occur
+ * anyhow.
+ */
+#if 0
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+ sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap()) {
+ if (test_and_clear_bit(__LINK_STATE_XOFF,
+ &dev->state))
+ __netif_schedule(dev);
+ }
+#endif
+ }
+ /* Note, if the ISR is allowed to call
+ * sbdma_rx_process(), provisions have to be made
+ * when the local SKB_CACHE is used; it presently
+ * doesn't have protection against accesses from
+ * interrupt level.
+ */
+ if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ /* may leave some packets behind */
+ sbdma_rx_process(sc,&(sc->sbm_rxdma), 32, 0);
+ }
+#endif /* 0 */
+ }
}
#else
/* Non NAPI */
@@ -2180,8 +2326,7 @@
handled = 1;
if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
- sbdma_tx_process(sc,&(sc->sbm_txdma),
- SBMAC_MAX_RXDESCR * 2);
+ sbdma_tx_process(sc, &(sc->sbm_txdma), 0);
#ifdef CONFIG_NETPOLL_TRAP
if (netpoll_trap()) {
if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
@@ -2232,8 +2377,15 @@
struct sbmac_softc *sc = netdev_priv(dev);
/* lock eth irq */
- spin_lock_irq (&sc->sbm_lock);
+ /* hard_start_xmit() is always called with interrupts enabled */
+ local_irq_disable();
+ if (unlikely (!spin_trylock(&sc->sbm_lock))) {
+ /* collision */
+ local_irq_enable();
+ return NETDEV_TX_LOCKED;
+ }
+
/*
* Put the buffer on the transmit ring. If we
* don't have room, stop the queue.
@@ -2244,14 +2396,14 @@
netif_stop_queue(dev);
spin_unlock_irq(&sc->sbm_lock);
- return 1;
+ return NETDEV_TX_BUSY;
}
dev->trans_start = jiffies;
spin_unlock_irq (&sc->sbm_lock);
- return 0;
+ return NETDEV_TX_OK;
}
/**********************************************************************
@@ -2421,7 +2573,7 @@
return 0;
}
#endif
-
+
static int sb1250_change_mtu(struct net_device *_dev, int new_mtu)
{
if (new_mtu > ENET_PACKET_SIZE)
@@ -2511,6 +2663,8 @@
dev->weight = 16;
#endif
+ dev->features |= NETIF_F_LLTX; /* hard_start_xmit handles locking */
+
/* This is needed for PASS2 for Rx H/W checksum feature */
sbmac_set_iphdr_offset(sc);
@@ -2922,33 +3076,57 @@
int work_to_do;
int work_done;
struct sbmac_softc *sc = netdev_priv(dev);
+ uint32_t events = sc->sbm_events;
+ uint32_t new_events = 0;
- work_to_do = min(*budget, dev->quota);
- work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), work_to_do, 1);
+ if (events & (uint32_t)(M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
+ work_to_do = dev->quota;
+ work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma),
+ work_to_do, 1);
- if (work_done > work_to_do)
- printk(KERN_ERR "%s exceeded work_to_do budget=%d quota=%d work-done=%d\n",
- sc->sbm_dev->name, *budget, dev->quota, work_done);
+#if 0
+ if (work_done > work_to_do)
+ printk(KERN_ERR "%s exceeded work_to_do budget=%d "
+ "quota=%d work-done=%d\n",
+ sc->sbm_dev->name, *budget, dev->quota,
+ work_done);
+#endif
+ *budget -= work_done;
+ dev->quota -= work_done;
+ if (work_done >= work_to_do)
+ new_events = (uint32_t)(M_MAC_INT_CHANNEL <<
+ S_MAC_RX_CH0);
+ }
- sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
+ if (events & (uint32_t)(M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+ sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
+ }
- *budget -= work_done;
- dev->quota -= work_done;
+ /*
+ * If we found no more TX or RX work to do, don't reschedule,
+ * reenable interrupts.
+ */
+ sc->sbm_events = new_events |
+ ((uint32_t)__raw_readq(sc->sbm_isr) &
+ (uint32_t) ((M_MAC_INT_CHANNEL << S_MAC_RX_CH0) |
+ (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)));
- if (work_done < work_to_do) {
+ if (sc->sbm_events == 0) {
netif_rx_complete(dev);
#ifdef CONFIG_SBMAC_COALESCE
- __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
- ((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_RX_CH0),
- sc->sbm_imr);
+ __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER)
+ << S_MAC_TX_CH0) |
+ ((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER)
+ << S_MAC_RX_CH0), sc->sbm_imr);
#else
__raw_writeq((M_MAC_INT_CHANNEL << S_MAC_TX_CH0) |
- (M_MAC_INT_CHANNEL << S_MAC_RX_CH0), sc->sbm_imr);
+ (M_MAC_INT_CHANNEL << S_MAC_RX_CH0), sc->sbm_imr);
#endif
+ return 0;
}
- return (work_done >= work_to_do);
+ return 1;
}
#endif
@@ -3072,6 +3250,20 @@
return 0;
}
+#ifdef CONFIG_SBMAC_SKB_CACHE
+static void sbmac_skb_cache_flush (void)
+{
+ struct sk_buff * sb;
+ spin_lock (&sbmac_skb.lock);
+ while (sbmac_skb.index > 0) {
+ sb = sbmac_skb.buf [--sbmac_skb.index];
+ spin_unlock (&sbmac_skb.lock); /* probably not necessary */
+ kfree_skbmem (sb);
+ spin_lock (&sbmac_skb.lock);
+ }
+ spin_unlock (&sbmac_skb.lock);
+}
+#endif /* CONFIG_SBMAC_SKB_CACHE */
static void __exit
sbmac_cleanup_module(void)
@@ -3090,6 +3282,9 @@
sbmac_uninitctx(sc);
free_netdev(dev);
}
+#ifdef CONFIG_SBMAC_SKB_CACHE
+ sbmac_skb_cache_flush();
+#endif
}
module_init(sbmac_init_module);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [patch] Performance enhancement patches for SB1250 MAC
2006-11-21 16:45 Yang, Steve
@ 2006-11-21 19:12 ` Stephen Hemminger
0 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2006-11-21 19:12 UTC (permalink / raw)
To: Yang, Steve; +Cc: Martin Michlmayr, netdev
On Tue, 21 Nov 2006 08:45:58 -0800
"Yang, Steve" <steve.yang@windriver.com> wrote:
> Martin,
>
> I've attached the two patches with this email.
>
> Original submission email text:
>
> "The attached are two network performance enhancement patches for SB1250
> MAC. The NAPI patch applies first. Followed by the "skb cache" patch.
> They applied and builds cleanly on 2.6.18 kernel for the following
> kernel option combinations:
>
> SBMAC_NAPI no yes yes
> SKB_CACHE no no yes"
>
> Regards,
> Steve Yang
>
> -----Original Message-----
> From: Martin Michlmayr [mailto:tbm@cyrius.com]
> Sent: Tuesday, November 21, 2006 6:27 AM
> To: Stephen Hemminger
> Cc: Yang, Steve; netdev@vger.kernel.org
> Subject: Re: [patch] Performance enhancement patches for SB1250 MAC
>
> * Stephen Hemminger <shemminger@osdl.org> [2006-11-20 15:35]:
> > > 1. I've submitted two patches, one for NAPI and the other for
> SKB_Cache.
> > > They can go in individually. You've expressed objections to
> SKB_Cache.
> > > What about the one for NAPI?
> > Yes, the NAPI one is great.
>
> Should Steve resend the patch so it can be added or is it queued up
> already? It would be nice to have this in 2.6.20.
> --
> Martin Michlmayr
> tbm@cyrius.com
One comment about the driver in general (not NAPI related), is that
almost all uses of "volatile" in a driver are incorrect. Especially these
because the device memory (__iomem) should already be mapped non-cached.
Volatile protects against compiler issues, not device consistency.
@@ -197,13 +206,16 @@
volatile void __iomem *sbdma_config0; /* DMA config register 0 */
volatile void __iomem *sbdma_config1; /* DMA config register 1 */
volatile void __iomem *sbdma_dscrbase; /* Descriptor base address */
- volatile void __iomem *sbdma_dscrcnt; /* Descriptor count register */
+ volatile void __iomem *sbdma_dscrcnt; /* Descriptor count register */
volatile void __iomem *sbdma_curdscr; /* current descriptor address */
+ volatile void __iomem *sbdma_oodpktlost;/* pkt drop (rx only) */
+
Also, how is this NAPI related (probably should be a different patch.
But I no nothing about MIPS SMP.
Index: linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c
===================================================================
--- linux-2.6.14-cgl.orig/arch/mips/sibyte/bcm1480/irq.c 2006-09-20 14:58:41.000000000 -0700
+++ linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c 2006-09-20 15:58:33.000000000 -0700
@@ -144,11 +144,11 @@
unsigned long flags;
unsigned int irq_dirty;
- i = first_cpu(mask);
- if (next_cpu(i, mask) <= NR_CPUS) {
+ if (cpus_weight(mask) != 1) {
printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
return;
}
+ i = first_cpu(mask);
/* Convert logical CPU to physical CPU */
cpu = cpu_logical_map(i);
--
Stephen Hemminger <shemminger@osdl.org>
^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [patch] Performance enhancement patches for SB1250 MAC
@ 2006-11-22 0:33 Yang, Steve
0 siblings, 0 replies; 10+ messages in thread
From: Yang, Steve @ 2006-11-22 0:33 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: Martin Michlmayr, netdev
The patches have been tested on non-SMP and SMP configurations.
Regards,
Steve Yang
-----Original Message-----
From: Stephen Hemminger [mailto:shemminger@osdl.org]
Sent: Tuesday, November 21, 2006 11:13 AM
To: Yang, Steve
Cc: Martin Michlmayr; netdev@vger.kernel.org
Subject: Re: [patch] Performance enhancement patches for SB1250 MAC
On Tue, 21 Nov 2006 08:45:58 -0800
"Yang, Steve" <steve.yang@windriver.com> wrote:
> Martin,
>
> I've attached the two patches with this email.
>
> Original submission email text:
>
> "The attached are two network performance enhancement patches for
> SB1250 MAC. The NAPI patch applies first. Followed by the "skb cache"
patch.
> They applied and builds cleanly on 2.6.18 kernel for the following
> kernel option combinations:
>
> SBMAC_NAPI no yes yes
> SKB_CACHE no no yes"
>
> Regards,
> Steve Yang
>
> -----Original Message-----
> From: Martin Michlmayr [mailto:tbm@cyrius.com]
> Sent: Tuesday, November 21, 2006 6:27 AM
> To: Stephen Hemminger
> Cc: Yang, Steve; netdev@vger.kernel.org
> Subject: Re: [patch] Performance enhancement patches for SB1250 MAC
>
> * Stephen Hemminger <shemminger@osdl.org> [2006-11-20 15:35]:
> > > 1. I've submitted two patches, one for NAPI and the other for
> SKB_Cache.
> > > They can go in individually. You've expressed objections to
> SKB_Cache.
> > > What about the one for NAPI?
> > Yes, the NAPI one is great.
>
> Should Steve resend the patch so it can be added or is it queued up
> already? It would be nice to have this in 2.6.20.
> --
> Martin Michlmayr
> tbm@cyrius.com
One comment about the driver in general (not NAPI related), is that
almost all uses of "volatile" in a driver are incorrect. Especially
these because the device memory (__iomem) should already be mapped
non-cached.
Volatile protects against compiler issues, not device consistency.
@@ -197,13 +206,16 @@
volatile void __iomem *sbdma_config0; /* DMA config register 0
*/
volatile void __iomem *sbdma_config1; /* DMA config register 1
*/
volatile void __iomem *sbdma_dscrbase; /* Descriptor base
address */
- volatile void __iomem *sbdma_dscrcnt; /* Descriptor count
register */
+ volatile void __iomem *sbdma_dscrcnt; /* Descriptor count
register */
volatile void __iomem *sbdma_curdscr; /* current descriptor
address */
+ volatile void __iomem *sbdma_oodpktlost;/* pkt drop (rx only) */
+
Also, how is this NAPI related (probably should be a different patch.
But I no nothing about MIPS SMP.
Index: linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c
===================================================================
--- linux-2.6.14-cgl.orig/arch/mips/sibyte/bcm1480/irq.c
2006-09-20 14:58:41.000000000 -0700
+++ linux-2.6.14-cgl/arch/mips/sibyte/bcm1480/irq.c 2006-09-20
15:58:33.000000000 -0700
@@ -144,11 +144,11 @@
unsigned long flags;
unsigned int irq_dirty;
- i = first_cpu(mask);
- if (next_cpu(i, mask) <= NR_CPUS) {
+ if (cpus_weight(mask) != 1) {
printk("attempted to set irq affinity for irq %d to
multiple CPUs\n", irq);
return;
}
+ i = first_cpu(mask);
/* Convert logical CPU to physical CPU */
cpu = cpu_logical_map(i);
--
Stephen Hemminger <shemminger@osdl.org>
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2006-11-22 0:34 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-11-20 21:40 [patch] Performance enhancement patches for SB1250 MAC Yang, Steve
2006-11-20 23:35 ` Stephen Hemminger
2006-11-20 23:58 ` David Miller
2006-11-21 14:26 ` Martin Michlmayr
-- strict thread matches above, loose matches on Subject: below --
2006-11-22 0:33 Yang, Steve
2006-11-21 16:45 Yang, Steve
2006-11-21 19:12 ` Stephen Hemminger
[not found] <D9055C0A0A86BD4E89AD96A89EA767DBA6AFBC@ALA-MAIL03.corp.ad.wrs.com>
2006-10-13 20:24 ` Stephen Hemminger
2006-10-12 21:54 Yang, Steve
2006-10-12 22:13 ` Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).