* [PATCH v2 1/7] via-velocity: Correct 64-byte alignment for rx buffers
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
@ 2009-11-23 14:30 ` Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 2/7] via-velocity: Add ethtool interrupt coalescing support Simon Kagstrom
` (7 subsequent siblings)
8 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:30 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
(From the VIA driver). The current code does not guarantee 64-byte
alignment since it simply does
int add = skb->data & 63;
skb->data += add;
(via skb_reserve). So for example, if the skb->data address would be
0x10, this would result in 32-byte alignment (0x10 + 0x10).
Correct by adding
64 - (skb->data & 63)
instead.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index e04e5be..b6cf3b5 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1483,7 +1483,8 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx)
* Do the gymnastics to get the buffer head for data at
* 64byte alignment.
*/
- skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->data & 63);
+ skb_reserve(rd_info->skb,
+ 64 - ((unsigned long) rd_info->skb->data & 63));
rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->data,
vptr->rx.buf_sz, PCI_DMA_FROMDEVICE);
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v2 2/7] via-velocity: Add ethtool interrupt coalescing support
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
2009-11-23 14:30 ` [PATCH v2 1/7] via-velocity: Correct 64-byte alignment for rx buffers Simon Kagstrom
@ 2009-11-23 14:31 ` Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 3/7] via-velocity: Implement NAPI support Simon Kagstrom
` (6 subsequent siblings)
8 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:31 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
(Partially from the upstream VIA driver). Tweaking the number of
frames-per-interrupt and timer-until-interrupt can reduce the amount of
CPU work quite a lot.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 160 +++++++++++++++++++++++++++++++++++++++++++-
drivers/net/via-velocity.h | 7 ++-
2 files changed, 165 insertions(+), 2 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index b6cf3b5..85ee898 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1259,6 +1259,66 @@ static void mii_init(struct velocity_info *vptr, u32 mii_status)
}
}
+/**
+ * setup_queue_timers - Setup interrupt timers
+ *
+ * Setup interrupt frequency during suppression (timeout if the frame
+ * count isn't filled).
+ */
+static void setup_queue_timers(struct velocity_info *vptr)
+{
+ /* Only for newer revisions */
+ if (vptr->rev_id >= REV_ID_VT3216_A0) {
+ u8 txqueue_timer = 0;
+ u8 rxqueue_timer = 0;
+
+ if (vptr->mii_status & (VELOCITY_SPEED_1000 |
+ VELOCITY_SPEED_100)) {
+ txqueue_timer = vptr->options.txqueue_timer;
+ rxqueue_timer = vptr->options.rxqueue_timer;
+ }
+
+ writeb(txqueue_timer, &vptr->mac_regs->TQETMR);
+ writeb(rxqueue_timer, &vptr->mac_regs->RQETMR);
+ }
+}
+/**
+ * setup_adaptive_interrupts - Setup interrupt suppression
+ *
+ * @vptr velocity adapter
+ *
+ * The velocity is able to suppress interrupt during high interrupt load.
+ * This function turns on that feature.
+ */
+static void setup_adaptive_interrupts(struct velocity_info *vptr)
+{
+ struct mac_regs __iomem *regs = vptr->mac_regs;
+ u16 tx_intsup = vptr->options.tx_intsup;
+ u16 rx_intsup = vptr->options.rx_intsup;
+
+ /* Setup default interrupt mask (will be changed below) */
+ vptr->int_mask = INT_MASK_DEF;
+
+ /* Set Tx Interrupt Suppression Threshold */
+ writeb(CAMCR_PS0, ®s->CAMCR);
+ if (tx_intsup != 0) {
+ vptr->int_mask &= ~(ISR_PTXI | ISR_PTX0I | ISR_PTX1I |
+ ISR_PTX2I | ISR_PTX3I);
+ writew(tx_intsup, ®s->ISRCTL);
+ } else
+ writew(ISRCTL_TSUPDIS, ®s->ISRCTL);
+
+ /* Set Rx Interrupt Suppression Threshold */
+ writeb(CAMCR_PS1, ®s->CAMCR);
+ if (rx_intsup != 0) {
+ vptr->int_mask &= ~ISR_PRXI;
+ writew(rx_intsup, ®s->ISRCTL);
+ } else
+ writew(ISRCTL_RSUPDIS, ®s->ISRCTL);
+
+ /* Select page to interrupt hold timer */
+ writeb(0, ®s->CAMCR);
+}
/**
* velocity_init_registers - initialise MAC registers
@@ -1345,7 +1405,7 @@ static void velocity_init_registers(struct velocity_info *vptr,
*/
enable_mii_autopoll(regs);
- vptr->int_mask = INT_MASK_DEF;
+ setup_adaptive_interrupts(vptr);
writel(vptr->rx.pool_dma, ®s->RDBaseLo);
writew(vptr->options.numrx - 1, ®s->RDCSize);
@@ -1802,6 +1862,8 @@ static void velocity_error(struct velocity_info *vptr, int status)
BYTE_REG_BITS_OFF(TESTCFG_HBDIS, ®s->TESTCFG);
else
BYTE_REG_BITS_ON(TESTCFG_HBDIS, ®s->TESTCFG);
+
+ setup_queue_timers(vptr);
}
/*
* Get link status from PHYSR0
@@ -3223,6 +3285,100 @@ static void velocity_set_msglevel(struct net_device *dev, u32 value)
msglevel = value;
}
+static int get_pending_timer_val(int val)
+{
+ int mult_bits = val >> 6;
+ int mult = 1;
+
+ switch (mult_bits)
+ {
+ case 1:
+ mult = 4; break;
+ case 2:
+ mult = 16; break;
+ case 3:
+ mult = 64; break;
+ case 0:
+ default:
+ break;
+ }
+
+ return (val & 0x3f) * mult;
+}
+
+static void set_pending_timer_val(int *val, u32 us)
+{
+ u8 mult = 0;
+ u8 shift = 0;
+
+ if (us >= 0x3f) {
+ mult = 1; /* mult with 4 */
+ shift = 2;
+ }
+ if (us >= 0x3f * 4) {
+ mult = 2; /* mult with 16 */
+ shift = 4;
+ }
+ if (us >= 0x3f * 16) {
+ mult = 3; /* mult with 64 */
+ shift = 6;
+ }
+
+ *val = (mult << 6) | ((us >> shift) & 0x3f);
+}
+
+
+static int velocity_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ecmd)
+{
+ struct velocity_info *vptr = netdev_priv(dev);
+
+ ecmd->tx_max_coalesced_frames = vptr->options.tx_intsup;
+ ecmd->rx_max_coalesced_frames = vptr->options.rx_intsup;
+
+ ecmd->rx_coalesce_usecs = get_pending_timer_val(vptr->options.rxqueue_timer);
+ ecmd->tx_coalesce_usecs = get_pending_timer_val(vptr->options.txqueue_timer);
+
+ return 0;
+}
+
+static int velocity_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ecmd)
+{
+ struct velocity_info *vptr = netdev_priv(dev);
+ int max_us = 0x3f * 64;
+
+ /* 6 bits of */
+ if (ecmd->tx_coalesce_usecs > max_us)
+ return -EINVAL;
+ if (ecmd->rx_coalesce_usecs > max_us)
+ return -EINVAL;
+
+ if (ecmd->tx_max_coalesced_frames > 0xff)
+ return -EINVAL;
+ if (ecmd->rx_max_coalesced_frames > 0xff)
+ return -EINVAL;
+
+ vptr->options.rx_intsup = ecmd->rx_max_coalesced_frames;
+ vptr->options.tx_intsup = ecmd->tx_max_coalesced_frames;
+
+ set_pending_timer_val(&vptr->options.rxqueue_timer,
+ ecmd->rx_coalesce_usecs);
+ set_pending_timer_val(&vptr->options.txqueue_timer,
+ ecmd->tx_coalesce_usecs);
+
+ /* Setup the interrupt suppression and queue timers */
+ mac_disable_int(vptr->mac_regs);
+ setup_adaptive_interrupts(vptr);
+ setup_queue_timers(vptr);
+
+ mac_write_int_mask(vptr->int_mask, vptr->mac_regs);
+ mac_clear_isr(vptr->mac_regs);
+ mac_enable_int(vptr->mac_regs);
+
+ return 0;
+}
+
static const struct ethtool_ops velocity_ethtool_ops = {
.get_settings = velocity_get_settings,
.set_settings = velocity_set_settings,
@@ -3232,6 +3388,8 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.get_msglevel = velocity_get_msglevel,
.set_msglevel = velocity_set_msglevel,
.get_link = velocity_get_link,
+ .get_coalesce = velocity_get_coalesce,
+ .set_coalesce = velocity_set_coalesce,
.begin = velocity_ethtool_up,
.complete = velocity_ethtool_down
};
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index 2f00c13..6091946 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -1005,7 +1005,8 @@ struct mac_regs {
volatile __le32 RDBaseLo; /* 0x38 */
volatile __le16 RDIdx; /* 0x3C */
- volatile __le16 reserved_3E;
+ volatile u8 TQETMR; /* 0x3E, VT3216 and above only */
+ volatile u8 RQETMR; /* 0x3F, VT3216 and above only */
volatile __le32 TDBaseLo[4]; /* 0x40 */
@@ -1491,6 +1492,10 @@ struct velocity_opt {
int rx_bandwidth_hi;
int rx_bandwidth_lo;
int rx_bandwidth_en;
+ int rxqueue_timer;
+ int txqueue_timer;
+ int tx_intsup;
+ int rx_intsup;
u32 flags;
};
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v2 3/7] via-velocity: Implement NAPI support
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
2009-11-23 14:30 ` [PATCH v2 1/7] via-velocity: Correct 64-byte alignment for rx buffers Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 2/7] via-velocity: Add ethtool interrupt coalescing support Simon Kagstrom
@ 2009-11-23 14:31 ` Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 4/7] via-velocity: Change DMA_LENGTH_DEF (from the VIA driver) Simon Kagstrom
` (5 subsequent siblings)
8 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:31 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
This patch adds NAPI support for VIA velocity. The new velocity_poll
function also pairs tx/rx handling twice which improves perforamance on
some workloads (e.g., netperf UDP_STREAM) significantly (that part is
from the VIA driver).
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 81 +++++++++++++++++++++++++-------------------
drivers/net/via-velocity.h | 3 ++
2 files changed, 49 insertions(+), 35 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 85ee898..668cdf6 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -354,12 +354,6 @@ VELOCITY_PARAM(ValPktLen, "Receiving or Drop invalid 802.3 frame");
*/
VELOCITY_PARAM(wol_opts, "Wake On Lan options");
-#define INT_WORKS_DEF 20
-#define INT_WORKS_MIN 10
-#define INT_WORKS_MAX 64
-
-VELOCITY_PARAM(int_works, "Number of packets per interrupt services");
-
static int rx_copybreak = 200;
module_param(rx_copybreak, int, 0644);
MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for copy-only-tiny-frames");
@@ -516,7 +510,6 @@ static void __devinit velocity_get_options(struct velocity_opt *opts, int index,
velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
velocity_set_int_opt((int *) &opts->spd_dpx, speed_duplex[index], MED_LNK_MIN, MED_LNK_MAX, MED_LNK_DEF, "Media link mode", devname);
velocity_set_int_opt((int *) &opts->wol_opts, wol_opts[index], WOL_OPT_MIN, WOL_OPT_MAX, WOL_OPT_DEF, "Wake On Lan options", devname);
- velocity_set_int_opt((int *) &opts->int_works, int_works[index], INT_WORKS_MIN, INT_WORKS_MAX, INT_WORKS_DEF, "Interrupt service works", devname);
opts->numrx = (opts->numrx & ~3);
}
@@ -2123,13 +2116,14 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
* any received packets from the receive queue. Hand the ring
* slots back to the adapter for reuse.
*/
-static int velocity_rx_srv(struct velocity_info *vptr, int status)
+static int velocity_rx_srv(struct velocity_info *vptr, int status,
+ int budget_left)
{
struct net_device_stats *stats = &vptr->dev->stats;
int rd_curr = vptr->rx.curr;
int works = 0;
- do {
+ while (works < budget_left) {
struct rx_desc *rd = vptr->rx.ring + rd_curr;
if (!vptr->rx.info[rd_curr].skb)
@@ -2160,7 +2154,8 @@ static int velocity_rx_srv(struct velocity_info *vptr, int status)
rd_curr++;
if (rd_curr >= vptr->options.numrx)
rd_curr = 0;
- } while (++works <= 15);
+ works++;
+ }
vptr->rx.curr = rd_curr;
@@ -2171,6 +2166,40 @@ static int velocity_rx_srv(struct velocity_info *vptr, int status)
return works;
}
+static int velocity_poll(struct napi_struct *napi, int budget)
+{
+ struct velocity_info *vptr = container_of(napi,
+ struct velocity_info, napi);
+ unsigned int rx_done;
+ u32 isr_status;
+
+ spin_lock(&vptr->lock);
+ isr_status = mac_read_isr(vptr->mac_regs);
+
+ /* Ack the interrupt */
+ mac_write_isr(vptr->mac_regs, isr_status);
+ if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI)))
+ velocity_error(vptr, isr_status);
+
+ /*
+ * Do rx and tx twice for performance (taken from the VIA
+ * out-of-tree driver).
+ */
+ rx_done = velocity_rx_srv(vptr, isr_status, budget / 2);
+ velocity_tx_srv(vptr, isr_status);
+ rx_done += velocity_rx_srv(vptr, isr_status, budget - rx_done);
+ velocity_tx_srv(vptr, isr_status);
+
+ spin_unlock(&vptr->lock);
+
+ /* If budget not fully consumed, exit the polling mode */
+ if (rx_done < budget) {
+ napi_complete(napi);
+ mac_enable_int(vptr->mac_regs);
+ }
+
+ return rx_done;
+}
/**
* velocity_intr - interrupt callback
@@ -2187,8 +2216,6 @@ static irqreturn_t velocity_intr(int irq, void *dev_instance)
struct net_device *dev = dev_instance;
struct velocity_info *vptr = netdev_priv(dev);
u32 isr_status;
- int max_count = 0;
-
spin_lock(&vptr->lock);
isr_status = mac_read_isr(vptr->mac_regs);
@@ -2199,32 +2226,13 @@ static irqreturn_t velocity_intr(int irq, void *dev_instance)
return IRQ_NONE;
}
- mac_disable_int(vptr->mac_regs);
-
- /*
- * Keep processing the ISR until we have completed
- * processing and the isr_status becomes zero
- */
-
- while (isr_status != 0) {
- mac_write_isr(vptr->mac_regs, isr_status);
- if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI)))
- velocity_error(vptr, isr_status);
- if (isr_status & (ISR_PRXI | ISR_PPRXI))
- max_count += velocity_rx_srv(vptr, isr_status);
- if (isr_status & (ISR_PTXI | ISR_PPTXI))
- max_count += velocity_tx_srv(vptr, isr_status);
- isr_status = mac_read_isr(vptr->mac_regs);
- if (max_count > vptr->options.int_works) {
- printk(KERN_WARNING "%s: excessive work at interrupt.\n",
- dev->name);
- max_count = 0;
- }
+ if (likely(napi_schedule_prep(&vptr->napi))) {
+ mac_disable_int(vptr->mac_regs);
+ __napi_schedule(&vptr->napi);
}
spin_unlock(&vptr->lock);
- mac_enable_int(vptr->mac_regs);
- return IRQ_HANDLED;
+ return IRQ_HANDLED;
}
/**
@@ -2264,6 +2272,7 @@ static int velocity_open(struct net_device *dev)
mac_enable_int(vptr->mac_regs);
netif_start_queue(dev);
+ napi_enable(&vptr->napi);
vptr->flags |= VELOCITY_FLAGS_OPENED;
out:
return ret;
@@ -2499,6 +2508,7 @@ static int velocity_close(struct net_device *dev)
{
struct velocity_info *vptr = netdev_priv(dev);
+ napi_disable(&vptr->napi);
netif_stop_queue(dev);
velocity_shutdown(vptr);
@@ -2818,6 +2828,7 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi
dev->irq = pdev->irq;
dev->netdev_ops = &velocity_netdev_ops;
dev->ethtool_ops = &velocity_ethtool_ops;
+ netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT);
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER |
NETIF_F_HW_VLAN_RX;
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index 6091946..22bfea4 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -32,6 +32,7 @@
#define VELOCITY_VERSION "1.14"
#define VELOCITY_IO_SIZE 256
+#define VELOCITY_NAPI_WEIGHT 64
#define PKT_BUF_SZ 1540
@@ -1564,6 +1565,8 @@ struct velocity_info {
u32 ticks;
u8 rev_id;
+
+ struct napi_struct napi;
};
/**
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v2 4/7] via-velocity: Change DMA_LENGTH_DEF (from the VIA driver)
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
` (2 preceding siblings ...)
2009-11-23 14:31 ` [PATCH v2 3/7] via-velocity: Implement NAPI support Simon Kagstrom
@ 2009-11-23 14:31 ` Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support Simon Kagstrom
` (4 subsequent siblings)
8 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:31 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
The VIA driver has changed the default for the DMA_LENGTH_DEF parameter.
Together with adaptive interrupt supression and NAPI support, this
improves performance quite a bit
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 668cdf6..06a6d80 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -275,7 +275,7 @@ VELOCITY_PARAM(rx_thresh, "Receive fifo threshold");
#define DMA_LENGTH_MIN 0
#define DMA_LENGTH_MAX 7
-#define DMA_LENGTH_DEF 0
+#define DMA_LENGTH_DEF 6
/* DMA_length[] is used for controlling the DMA length
0: 8 DWORDs
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
` (3 preceding siblings ...)
2009-11-23 14:31 ` [PATCH v2 4/7] via-velocity: Change DMA_LENGTH_DEF (from the VIA driver) Simon Kagstrom
@ 2009-11-23 14:31 ` Simon Kagstrom
2009-11-23 18:52 ` David Miller
2009-11-25 8:22 ` [PATCH v3 " Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 6/7] via-velocity: Set tx checksum from ethtool instead of module parameter Simon Kagstrom
` (3 subsequent siblings)
8 siblings, 2 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:31 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
The velocity hardware can handle up to 7 memory segments. This can be
turned on and off via ethtool.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 88 +++++++++++++++++++++++++++++---------------
1 files changed, 58 insertions(+), 30 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 06a6d80..92b41fc 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -9,7 +9,6 @@
*
* TODO
* rx_copybreak/alignment
- * Scatter gather
* More testing
*
* The changes are (c) Copyright 2004, Red Hat Inc. <alan@lxorguk.ukuu.org.uk>
@@ -1656,12 +1655,10 @@ out:
*/
static int velocity_init_td_ring(struct velocity_info *vptr)
{
- dma_addr_t curr;
int j;
/* Init the TD ring entries */
for (j = 0; j < vptr->tx.numq; j++) {
- curr = vptr->tx.pool_dma[j];
vptr->tx.infos[j] = kcalloc(vptr->options.numtx,
sizeof(struct velocity_td_info),
@@ -1727,21 +1724,27 @@ err_free_dma_rings_0:
* Release an transmit buffer. If the buffer was preallocated then
* recycle it, if not then unmap the buffer.
*/
-static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *tdinfo)
+static void velocity_free_tx_buf(struct velocity_info *vptr,
+ struct velocity_td_info *tdinfo, struct tx_desc *td)
{
struct sk_buff *skb = tdinfo->skb;
- int i;
- int pktlen;
/*
* Don't unmap the pre-allocated tx_bufs
*/
if (tdinfo->skb_dma) {
+ int i;
- pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
for (i = 0; i < tdinfo->nskb_dma; i++) {
- pci_unmap_single(vptr->pdev, tdinfo->skb_dma[i], pktlen, PCI_DMA_TODEVICE);
- tdinfo->skb_dma[i] = 0;
+ size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
+
+ /* For scatter-gather */
+ if (skb_shinfo(skb)->nr_frags > 0)
+ pktlen = max_t(size_t, pktlen,
+ td->td_buf[i].size & ~TD_QUEUE);
+
+ pci_unmap_single(vptr->pdev, tdinfo->skb_dma[i],
+ le16_to_cpu(pktlen), PCI_DMA_TODEVICE);
}
}
dev_kfree_skb_irq(skb);
@@ -1943,7 +1946,7 @@ static int velocity_tx_srv(struct velocity_info *vptr, u32 status)
stats->tx_packets++;
stats->tx_bytes += tdinfo->skb->len;
}
- velocity_free_tx_buf(vptr, tdinfo);
+ velocity_free_tx_buf(vptr, tdinfo, td);
vptr->tx.used[qnum]--;
}
vptr->tx.tail[qnum] = idx;
@@ -2543,14 +2546,27 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
struct velocity_td_info *tdinfo;
unsigned long flags;
int pktlen;
- __le16 len;
- int index;
+ int index, prev;
+ int i = 0;
if (skb_padto(skb, ETH_ZLEN))
goto out;
- pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
- len = cpu_to_le16(pktlen);
+ /* The hardware can handle at most 7 memory segments, so merge
+ * the skb if there are more */
+ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
+ kfree_skb(skb);
+ return 0;
+ }
+ /* If it's still above 6 we can't do anything */
+ if (skb_shinfo(skb)->nr_frags > 6) {
+ dev_err(&vptr->pdev->dev,
+ "via-velocity: more than 6 frags, can't send.\n");
+ return 0;
+ }
+ pktlen = skb_shinfo(skb)->nr_frags == 0 ?
+ max_t(unsigned int, skb->len, ETH_ZLEN) :
+ skb_headlen(skb);
spin_lock_irqsave(&vptr->lock, flags);
@@ -2567,11 +2583,24 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
*/
tdinfo->skb = skb;
tdinfo->skb_dma[0] = pci_map_single(vptr->pdev, skb->data, pktlen, PCI_DMA_TODEVICE);
- td_ptr->tdesc0.len = len;
+ td_ptr->tdesc0.len = cpu_to_le16(pktlen);
td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
td_ptr->td_buf[0].pa_high = 0;
- td_ptr->td_buf[0].size = len;
- tdinfo->nskb_dma = 1;
+ td_ptr->td_buf[0].size = cpu_to_le16(pktlen);
+
+ /* Handle fragments */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ tdinfo->skb_dma[i + 1] = pci_map_page(vptr->pdev, frag->page,
+ frag->page_offset, frag->size,
+ PCI_DMA_TODEVICE);
+
+ td_ptr->td_buf[i + 1].pa_low = cpu_to_le32(tdinfo->skb_dma[i + 1]);
+ td_ptr->td_buf[i + 1].pa_high = 0;
+ td_ptr->td_buf[i + 1].size = cpu_to_le16(frag->size);
+ }
+ tdinfo->nskb_dma = i + 1;
td_ptr->tdesc1.cmd = TCPLS_NORMAL + (tdinfo->nskb_dma + 1) * 16;
@@ -2592,23 +2621,21 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
td_ptr->tdesc1.TCR |= (TCR0_UDPCK);
td_ptr->tdesc1.TCR |= TCR0_IPCK;
}
- {
- int prev = index - 1;
+ prev = index - 1;
+ if (prev < 0)
+ prev = vptr->options.numtx - 1;
+ td_ptr->tdesc0.len |= OWNED_BY_NIC;
+ vptr->tx.used[qnum]++;
+ vptr->tx.curr[qnum] = (index + 1) % vptr->options.numtx;
- if (prev < 0)
- prev = vptr->options.numtx - 1;
- td_ptr->tdesc0.len |= OWNED_BY_NIC;
- vptr->tx.used[qnum]++;
- vptr->tx.curr[qnum] = (index + 1) % vptr->options.numtx;
+ if (AVAIL_TD(vptr, qnum) < 1)
+ netif_stop_queue(dev);
- if (AVAIL_TD(vptr, qnum) < 1)
- netif_stop_queue(dev);
+ td_ptr = &(vptr->tx.rings[qnum][prev]);
+ td_ptr->td_buf[0].size |= TD_QUEUE;
+ mac_tx_queue_wake(vptr->mac_regs, qnum);
- td_ptr = &(vptr->tx.rings[qnum][prev]);
- td_ptr->td_buf[0].size |= TD_QUEUE;
- mac_tx_queue_wake(vptr->mac_regs, qnum);
- }
dev->trans_start = jiffies;
spin_unlock_irqrestore(&vptr->lock, flags);
out:
@@ -3398,6 +3425,7 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.set_wol = velocity_ethtool_set_wol,
.get_msglevel = velocity_get_msglevel,
.set_msglevel = velocity_set_msglevel,
+ .set_sg = ethtool_op_set_sg,
.get_link = velocity_get_link,
.get_coalesce = velocity_get_coalesce,
.set_coalesce = velocity_set_coalesce,
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-23 14:31 ` [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support Simon Kagstrom
@ 2009-11-23 18:52 ` David Miller
2009-11-25 8:22 ` [PATCH v3 " Simon Kagstrom
1 sibling, 0 replies; 40+ messages in thread
From: David Miller @ 2009-11-23 18:52 UTC (permalink / raw)
To: simon.kagstrom; +Cc: netdev, davej, shemminger, romieu
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Mon, 23 Nov 2009 15:31:33 +0100
> @@ -2543,14 +2546,27 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
> struct velocity_td_info *tdinfo;
> unsigned long flags;
> int pktlen;
> - __le16 len;
> - int index;
> + int index, prev;
> + int i = 0;
>
> if (skb_padto(skb, ETH_ZLEN))
> goto out;
> - pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
>
> - len = cpu_to_le16(pktlen);
> + /* The hardware can handle at most 7 memory segments, so merge
> + * the skb if there are more */
> + if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
> + kfree_skb(skb);
> + return 0;
> + }
Zero is not a valid return value for this function. It returns
a "netdev_tx_t" defined as follows in linux/netdevice.h:
enum netdev_tx {
__NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */
NETDEV_TX_OK = 0x00, /* driver took care of packet */
NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/
NETDEV_TX_LOCKED = 0x20, /* driver tx lock was already taken */
};
typedef enum netdev_tx netdev_tx_t;
^ permalink raw reply [flat|nested] 40+ messages in thread* [PATCH v3 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-23 14:31 ` [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support Simon Kagstrom
2009-11-23 18:52 ` David Miller
@ 2009-11-25 8:22 ` Simon Kagstrom
2009-11-25 23:37 ` David Miller
1 sibling, 1 reply; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-25 8:22 UTC (permalink / raw)
To: davem; +Cc: netdev, davej, shemminger, romieu
The velocity hardware can handle up to 7 memory segments. This can be
turned on and off via ethtool. The support was removed in commit
83c98a8cd04dd0f848574370594886ba3bf56750
but is re-enabled and cleaned up here. It's off by default.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
ChangeLog:
* (David Miller) return NETDEV_TX_OK from the velocity_xmit
function. I'm still a bit unsure on what to actually return in
these cases (they are error cases), but other drivers seem to
return NETDEV_TX_OK, so that's what I went with.
drivers/net/via-velocity.c | 88 +++++++++++++++++++++++++++++---------------
1 files changed, 58 insertions(+), 30 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 06a6d80..2ad25a9 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -9,7 +9,6 @@
*
* TODO
* rx_copybreak/alignment
- * Scatter gather
* More testing
*
* The changes are (c) Copyright 2004, Red Hat Inc. <alan@lxorguk.ukuu.org.uk>
@@ -1656,12 +1655,10 @@ out:
*/
static int velocity_init_td_ring(struct velocity_info *vptr)
{
- dma_addr_t curr;
int j;
/* Init the TD ring entries */
for (j = 0; j < vptr->tx.numq; j++) {
- curr = vptr->tx.pool_dma[j];
vptr->tx.infos[j] = kcalloc(vptr->options.numtx,
sizeof(struct velocity_td_info),
@@ -1727,21 +1724,27 @@ err_free_dma_rings_0:
* Release an transmit buffer. If the buffer was preallocated then
* recycle it, if not then unmap the buffer.
*/
-static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *tdinfo)
+static void velocity_free_tx_buf(struct velocity_info *vptr,
+ struct velocity_td_info *tdinfo, struct tx_desc *td)
{
struct sk_buff *skb = tdinfo->skb;
- int i;
- int pktlen;
/*
* Don't unmap the pre-allocated tx_bufs
*/
if (tdinfo->skb_dma) {
+ int i;
- pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
for (i = 0; i < tdinfo->nskb_dma; i++) {
- pci_unmap_single(vptr->pdev, tdinfo->skb_dma[i], pktlen, PCI_DMA_TODEVICE);
- tdinfo->skb_dma[i] = 0;
+ size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
+
+ /* For scatter-gather */
+ if (skb_shinfo(skb)->nr_frags > 0)
+ pktlen = max_t(size_t, pktlen,
+ td->td_buf[i].size & ~TD_QUEUE);
+
+ pci_unmap_single(vptr->pdev, tdinfo->skb_dma[i],
+ le16_to_cpu(pktlen), PCI_DMA_TODEVICE);
}
}
dev_kfree_skb_irq(skb);
@@ -1943,7 +1946,7 @@ static int velocity_tx_srv(struct velocity_info *vptr, u32 status)
stats->tx_packets++;
stats->tx_bytes += tdinfo->skb->len;
}
- velocity_free_tx_buf(vptr, tdinfo);
+ velocity_free_tx_buf(vptr, tdinfo, td);
vptr->tx.used[qnum]--;
}
vptr->tx.tail[qnum] = idx;
@@ -2543,14 +2546,27 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
struct velocity_td_info *tdinfo;
unsigned long flags;
int pktlen;
- __le16 len;
- int index;
+ int index, prev;
+ int i = 0;
if (skb_padto(skb, ETH_ZLEN))
goto out;
- pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
- len = cpu_to_le16(pktlen);
+ /* The hardware can handle at most 7 memory segments, so merge
+ * the skb if there are more */
+ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ /* If it's still above 6 we can't do anything */
+ if (skb_shinfo(skb)->nr_frags > 6) {
+ dev_err(&vptr->pdev->dev,
+ "via-velocity: more than 6 frags, can't send.\n");
+ return NETDEV_TX_OK;
+ }
+ pktlen = skb_shinfo(skb)->nr_frags == 0 ?
+ max_t(unsigned int, skb->len, ETH_ZLEN) :
+ skb_headlen(skb);
spin_lock_irqsave(&vptr->lock, flags);
@@ -2567,11 +2583,24 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
*/
tdinfo->skb = skb;
tdinfo->skb_dma[0] = pci_map_single(vptr->pdev, skb->data, pktlen, PCI_DMA_TODEVICE);
- td_ptr->tdesc0.len = len;
+ td_ptr->tdesc0.len = cpu_to_le16(pktlen);
td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
td_ptr->td_buf[0].pa_high = 0;
- td_ptr->td_buf[0].size = len;
- tdinfo->nskb_dma = 1;
+ td_ptr->td_buf[0].size = cpu_to_le16(pktlen);
+
+ /* Handle fragments */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ tdinfo->skb_dma[i + 1] = pci_map_page(vptr->pdev, frag->page,
+ frag->page_offset, frag->size,
+ PCI_DMA_TODEVICE);
+
+ td_ptr->td_buf[i + 1].pa_low = cpu_to_le32(tdinfo->skb_dma[i + 1]);
+ td_ptr->td_buf[i + 1].pa_high = 0;
+ td_ptr->td_buf[i + 1].size = cpu_to_le16(frag->size);
+ }
+ tdinfo->nskb_dma = i + 1;
td_ptr->tdesc1.cmd = TCPLS_NORMAL + (tdinfo->nskb_dma + 1) * 16;
@@ -2592,23 +2621,21 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
td_ptr->tdesc1.TCR |= (TCR0_UDPCK);
td_ptr->tdesc1.TCR |= TCR0_IPCK;
}
- {
- int prev = index - 1;
+ prev = index - 1;
+ if (prev < 0)
+ prev = vptr->options.numtx - 1;
+ td_ptr->tdesc0.len |= OWNED_BY_NIC;
+ vptr->tx.used[qnum]++;
+ vptr->tx.curr[qnum] = (index + 1) % vptr->options.numtx;
- if (prev < 0)
- prev = vptr->options.numtx - 1;
- td_ptr->tdesc0.len |= OWNED_BY_NIC;
- vptr->tx.used[qnum]++;
- vptr->tx.curr[qnum] = (index + 1) % vptr->options.numtx;
+ if (AVAIL_TD(vptr, qnum) < 1)
+ netif_stop_queue(dev);
- if (AVAIL_TD(vptr, qnum) < 1)
- netif_stop_queue(dev);
+ td_ptr = &(vptr->tx.rings[qnum][prev]);
+ td_ptr->td_buf[0].size |= TD_QUEUE;
+ mac_tx_queue_wake(vptr->mac_regs, qnum);
- td_ptr = &(vptr->tx.rings[qnum][prev]);
- td_ptr->td_buf[0].size |= TD_QUEUE;
- mac_tx_queue_wake(vptr->mac_regs, qnum);
- }
dev->trans_start = jiffies;
spin_unlock_irqrestore(&vptr->lock, flags);
out:
@@ -3398,6 +3425,7 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.set_wol = velocity_ethtool_set_wol,
.get_msglevel = velocity_get_msglevel,
.set_msglevel = velocity_set_msglevel,
+ .set_sg = ethtool_op_set_sg,
.get_link = velocity_get_link,
.get_coalesce = velocity_get_coalesce,
.set_coalesce = velocity_set_coalesce,
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v3 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-25 8:22 ` [PATCH v3 " Simon Kagstrom
@ 2009-11-25 23:37 ` David Miller
2009-11-26 7:36 ` Simon Kagstrom
0 siblings, 1 reply; 40+ messages in thread
From: David Miller @ 2009-11-25 23:37 UTC (permalink / raw)
To: simon.kagstrom; +Cc: netdev, davej, shemminger, romieu
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Wed, 25 Nov 2009 09:22:13 +0100
> The velocity hardware can handle up to 7 memory segments. This can be
> turned on and off via ethtool. The support was removed in commit
>
> 83c98a8cd04dd0f848574370594886ba3bf56750
>
> but is re-enabled and cleaned up here. It's off by default.
>
> Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
> ---
> ChangeLog:
>
> * (David Miller) return NETDEV_TX_OK from the velocity_xmit
> function. I'm still a bit unsure on what to actually return in
> these cases (they are error cases), but other drivers seem to
> return NETDEV_TX_OK, so that's what I went with.
This still isn't right, sorry.
If you return NETDEV_TX_OK you must free the packet up, either
immediately or in response to the TX interrupt which finishes the
sending of the TX packet.
The case where you aren't getting this right:
+ /* If it's still above 6 we can't do anything */
+ if (skb_shinfo(skb)->nr_frags > 6) {
+ dev_err(&vptr->pdev->dev,
+ "via-velocity: more than 6 frags, can't send.\n");
+ return NETDEV_TX_OK;
+ }
is bogus because you just did __skb_linearize() and it returned zero, therefore
it would be illegal to see ->nr_frags with a > 6 value here.
Just remove this check and block of code entirely.
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v3 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-25 23:37 ` David Miller
@ 2009-11-26 7:36 ` Simon Kagstrom
2009-11-26 21:09 ` David Miller
0 siblings, 1 reply; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 7:36 UTC (permalink / raw)
To: David Miller; +Cc: netdev, davej, shemminger, romieu
On Wed, 25 Nov 2009 15:37:35 -0800 (PST)
David Miller <davem@davemloft.net> wrote:
> The case where you aren't getting this right:
>
> + /* If it's still above 6 we can't do anything */
> + if (skb_shinfo(skb)->nr_frags > 6) {
> + dev_err(&vptr->pdev->dev,
> + "via-velocity: more than 6 frags, can't send.\n");
> + return NETDEV_TX_OK;
> + }
>
> is bogus because you just did __skb_linearize() and it returned zero, therefore
> it would be illegal to see ->nr_frags with a > 6 value here.
Thanks for the explanation, I'll submit an updated patch. I put the
check there because I was a bit unsure about what __skb_linearize()
could do to the number of fragments. In particular, __pskb_pull_tail()
ends with
pull_pages:
eat = delta;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
if (skb_shinfo(skb)->frags[i].size <= eat) {
put_page(skb_shinfo(skb)->frags[i].page);
eat -= skb_shinfo(skb)->frags[i].size;
} else {
skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
if (eat) {
skb_shinfo(skb)->frags[k].page_offset += eat;
skb_shinfo(skb)->frags[k].size -= eat;
eat = 0;
}
k++;
}
}
skb_shinfo(skb)->nr_frags = k;
skb->tail += delta;
skb->data_len -= delta;
return skb_tail_pointer(skb);
}
and after having crunched this in my head for a while, I couldn't
convince myself that nr_frags would always come out as less than 7 from
here. I don't claim to have done a thorough examination though.
Anyway, patch coming up.
// Simon
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v3 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-26 7:36 ` Simon Kagstrom
@ 2009-11-26 21:09 ` David Miller
0 siblings, 0 replies; 40+ messages in thread
From: David Miller @ 2009-11-26 21:09 UTC (permalink / raw)
To: simon.kagstrom; +Cc: netdev, davej, shemminger, romieu
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Thu, 26 Nov 2009 08:36:41 +0100
> and after having crunched this in my head for a while, I couldn't
> convince myself that nr_frags would always come out as less than 7 from
> here. I don't claim to have done a thorough examination though.
__skb_linearize() does pskb_pull_tail() with length skb->data_len
skb->data_len represents the length of the data represented by non-linear
buffers in the SKB.
Therfore ending up with ->nr_frags != 0 or ->frag_list != NULL after this
operation would be a bug, and not something we should assert about in
driver code.
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v2 6/7] via-velocity: Set tx checksum from ethtool instead of module parameter
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
` (4 preceding siblings ...)
2009-11-23 14:31 ` [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support Simon Kagstrom
@ 2009-11-23 14:31 ` Simon Kagstrom
2009-11-23 14:31 ` [PATCH v2 7/7] via-velocity: Bump version Simon Kagstrom
` (2 subsequent siblings)
8 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:31 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
Defaults to on (as before).
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 18 ++++--------------
drivers/net/via-velocity.h | 1 -
2 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 92b41fc..4af16ba 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -297,14 +297,6 @@ VELOCITY_PARAM(DMA_length, "DMA length");
*/
VELOCITY_PARAM(IP_byte_align, "Enable IP header dword aligned");
-#define TX_CSUM_DEF 1
-/* txcsum_offload[] is used for setting the checksum offload ability of NIC.
- (We only support RX checksum offload now)
- 0: disable csum_offload[checksum offload
- 1: enable checksum offload. (Default)
-*/
-VELOCITY_PARAM(txcsum_offload, "Enable transmit packet checksum offload");
-
#define FLOW_CNTL_DEF 1
#define FLOW_CNTL_MIN 1
#define FLOW_CNTL_MAX 5
@@ -503,7 +495,6 @@ static void __devinit velocity_get_options(struct velocity_opt *opts, int index,
velocity_set_int_opt(&opts->numrx, RxDescriptors[index], RX_DESC_MIN, RX_DESC_MAX, RX_DESC_DEF, "RxDescriptors", devname);
velocity_set_int_opt(&opts->numtx, TxDescriptors[index], TX_DESC_MIN, TX_DESC_MAX, TX_DESC_DEF, "TxDescriptors", devname);
- velocity_set_bool_opt(&opts->flags, txcsum_offload[index], TX_CSUM_DEF, VELOCITY_FLAGS_TX_CSUM, "txcsum_offload", devname);
velocity_set_int_opt(&opts->flow_cntl, flow_control[index], FLOW_CNTL_MIN, FLOW_CNTL_MAX, FLOW_CNTL_DEF, "flow_control", devname);
velocity_set_bool_opt(&opts->flags, IP_byte_align[index], IP_ALIG_DEF, VELOCITY_FLAGS_IP_ALIGN, "IP_byte_align", devname);
velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
@@ -2612,7 +2603,7 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
/*
* Handle hardware checksum
*/
- if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
+ if ( (dev->features & NETIF_F_IP_CSUM)
&& (skb->ip_summed == CHECKSUM_PARTIAL)) {
const struct iphdr *ip = ip_hdr(skb);
if (ip->protocol == IPPROTO_TCP)
@@ -2858,10 +2849,7 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi
netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT);
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER |
- NETIF_F_HW_VLAN_RX;
-
- if (vptr->flags & VELOCITY_FLAGS_TX_CSUM)
- dev->features |= NETIF_F_IP_CSUM;
+ NETIF_F_HW_VLAN_RX | NETIF_F_IP_CSUM;
ret = register_netdev(dev);
if (ret < 0)
@@ -3421,6 +3409,8 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.get_settings = velocity_get_settings,
.set_settings = velocity_set_settings,
.get_drvinfo = velocity_get_drvinfo,
+ .set_tx_csum = ethtool_op_set_tx_csum,
+ .get_tx_csum = ethtool_op_get_tx_csum,
.get_wol = velocity_ethtool_get_wol,
.set_wol = velocity_ethtool_set_wol,
.get_msglevel = velocity_get_msglevel,
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index 22bfea4..cf10801 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -1423,7 +1423,6 @@ enum velocity_msg_level {
*/
#define VELOCITY_FLAGS_TAGGING 0x00000001UL
-#define VELOCITY_FLAGS_TX_CSUM 0x00000002UL
#define VELOCITY_FLAGS_RX_CSUM 0x00000004UL
#define VELOCITY_FLAGS_IP_ALIGN 0x00000008UL
#define VELOCITY_FLAGS_VAL_PKT_LEN 0x00000010UL
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v2 7/7] via-velocity: Bump version
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
` (5 preceding siblings ...)
2009-11-23 14:31 ` [PATCH v2 6/7] via-velocity: Set tx checksum from ethtool instead of module parameter Simon Kagstrom
@ 2009-11-23 14:31 ` Simon Kagstrom
2009-11-24 20:13 ` [PATCH v2 0/7] via-velocity performance fixes David Miller
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
8 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-23 14:31 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index cf10801..6b5fc51 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -29,7 +29,7 @@
#define VELOCITY_NAME "via-velocity"
#define VELOCITY_FULL_DRV_NAM "VIA Networking Velocity Family Gigabit Ethernet Adapter Driver"
-#define VELOCITY_VERSION "1.14"
+#define VELOCITY_VERSION "1.15"
#define VELOCITY_IO_SIZE 256
#define VELOCITY_NAPI_WEIGHT 64
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v2 0/7] via-velocity performance fixes
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
` (6 preceding siblings ...)
2009-11-23 14:31 ` [PATCH v2 7/7] via-velocity: Bump version Simon Kagstrom
@ 2009-11-24 20:13 ` David Miller
2009-11-25 7:39 ` Simon Kagstrom
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
8 siblings, 1 reply; 40+ messages in thread
From: David Miller @ 2009-11-24 20:13 UTC (permalink / raw)
To: simon.kagstrom; +Cc: netdev, davej, shemminger, romieu
Simon I'm still waiting for you to fix the transmit function return
value problems I pointed out in patch #5.
Please get to this so I can apply your patches.
Thanks.
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v2 0/7] via-velocity performance fixes
2009-11-24 20:13 ` [PATCH v2 0/7] via-velocity performance fixes David Miller
@ 2009-11-25 7:39 ` Simon Kagstrom
2009-11-25 20:18 ` David Miller
0 siblings, 1 reply; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-25 7:39 UTC (permalink / raw)
To: David Miller; +Cc: netdev, davej, shemminger, romieu
On Tue, 24 Nov 2009 12:13:39 -0800 (PST)
David Miller <davem@davemloft.net> wrote:
> Simon I'm still waiting for you to fix the transmit function return
> value problems I pointed out in patch #5.
>
> Please get to this so I can apply your patches.
Sorry, I missed that comment. I only subscribed to netdev after having
sent out the patches, so I didn't get that reply. I've looked it up now
(and I'm subscribed now!), so I'll send out a new version.
Thanks,
// Simon
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v2 0/7] via-velocity performance fixes
2009-11-25 7:39 ` Simon Kagstrom
@ 2009-11-25 20:18 ` David Miller
0 siblings, 0 replies; 40+ messages in thread
From: David Miller @ 2009-11-25 20:18 UTC (permalink / raw)
To: simon.kagstrom; +Cc: netdev, davej, shemminger, romieu
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Wed, 25 Nov 2009 08:39:43 +0100
> On Tue, 24 Nov 2009 12:13:39 -0800 (PST)
> David Miller <davem@davemloft.net> wrote:
>
>> Simon I'm still waiting for you to fix the transmit function return
>> value problems I pointed out in patch #5.
>>
>> Please get to this so I can apply your patches.
>
> Sorry, I missed that comment. I only subscribed to netdev after having
> sent out the patches, so I didn't get that reply. I've looked it up now
> (and I'm subscribed now!), so I'll send out a new version.
Your email address was the To: field in the reply, there is no reason
you should not have received the email, here are the email headers:
--------------------
Subject: Re: [PATCH v2 5/7] via-velocity: Re-enable transmit scatter-gather support
From: David Miller <davem@davemloft.net>
To: simon.kagstrom@netinsight.net
Cc: netdev@vger.kernel.org, davej@redhat.com, shemminger@vyatta.com,
romieu@fr.zoreil.com
Date: Mon, 23 Nov 2009 10:52:21 -0800 (PST)
X-Mailer: Mew version 6.2.51 on Emacs 22.1 / Mule 5.0 (SAKAKI)
--------------------
So being subscribed to netdev has nothing to do with it, you simply
didn't get a direct email I sent to you for one reason or another.
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v4 0/7] via-velocity performance fixes
2009-11-23 14:27 ` [PATCH v2 " Simon Kagstrom
` (7 preceding siblings ...)
2009-11-24 20:13 ` [PATCH v2 0/7] via-velocity performance fixes David Miller
@ 2009-11-26 8:04 ` Simon Kagstrom
2009-11-26 8:09 ` [PATCH v4 1/7] via-velocity: Correct 64-byte alignment for rx buffers Simon Kagstrom
` (7 more replies)
8 siblings, 8 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:04 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
Hi again!
This is version four of the series posted here:
http://marc.info/?l=linux-netdev&m=125873023117769&w=2
The new version integrates Davids comments. There are 7 patches:
1. See to it that data is 64-byte aligned (same as before)
2. Add ethtool interrupt coalescing support. (same as before)
3. NAPI support for via-velocity (same as before)
4. Change DMA length default (same as before)
5. Re-enable scatter-gather support. (removed the double-check of
nr_frags according to Davids comment)
6. Set tx checksum from ethtool instead of a module parameter (rebased)
7. Bump the version.
To get the same performance results (or better!) as with the VIA
driver, you now need to setup interrupt supression with
ethtool -C ethX rx-usecs 20 tx-usecs 100 tx-frames 31 rx-frames 31
(which are the defaults from the VIA driver). Scatter-gather support is
enabled with
ethtool -K ethX sg on
// Simon
^ permalink raw reply [flat|nested] 40+ messages in thread* [PATCH v4 1/7] via-velocity: Correct 64-byte alignment for rx buffers
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
@ 2009-11-26 8:09 ` Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 2/7] via-velocity: Add ethtool interrupt coalescing support Simon Kagstrom
` (6 subsequent siblings)
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:09 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
(From the VIA driver). The current code does not guarantee 64-byte
alignment since it simply does
int add = skb->data & 63;
skb->data += add;
(via skb_reserve). So for example, if the skb->data address would be
0x10, this would result in 32-byte alignment (0x10 + 0x10).
Correct by adding
64 - (skb->data & 63)
instead.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index e04e5be..b6cf3b5 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1483,7 +1483,8 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx)
* Do the gymnastics to get the buffer head for data at
* 64byte alignment.
*/
- skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->data & 63);
+ skb_reserve(rd_info->skb,
+ 64 - ((unsigned long) rd_info->skb->data & 63));
rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->data,
vptr->rx.buf_sz, PCI_DMA_FROMDEVICE);
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v4 2/7] via-velocity: Add ethtool interrupt coalescing support
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
2009-11-26 8:09 ` [PATCH v4 1/7] via-velocity: Correct 64-byte alignment for rx buffers Simon Kagstrom
@ 2009-11-26 8:10 ` Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 3/7] via-velocity: Implement NAPI support Simon Kagstrom
` (5 subsequent siblings)
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:10 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
(Partially from the upstream VIA driver). Tweaking the number of
frames-per-interrupt and timer-until-interrupt can reduce the amount of
CPU work quite a lot.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 160 +++++++++++++++++++++++++++++++++++++++++++-
drivers/net/via-velocity.h | 7 ++-
2 files changed, 165 insertions(+), 2 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index b6cf3b5..85ee898 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1259,6 +1259,66 @@ static void mii_init(struct velocity_info *vptr, u32 mii_status)
}
}
+/**
+ * setup_queue_timers - Setup interrupt timers
+ *
+ * Setup interrupt frequency during suppression (timeout if the frame
+ * count isn't filled).
+ */
+static void setup_queue_timers(struct velocity_info *vptr)
+{
+ /* Only for newer revisions */
+ if (vptr->rev_id >= REV_ID_VT3216_A0) {
+ u8 txqueue_timer = 0;
+ u8 rxqueue_timer = 0;
+
+ if (vptr->mii_status & (VELOCITY_SPEED_1000 |
+ VELOCITY_SPEED_100)) {
+ txqueue_timer = vptr->options.txqueue_timer;
+ rxqueue_timer = vptr->options.rxqueue_timer;
+ }
+
+ writeb(txqueue_timer, &vptr->mac_regs->TQETMR);
+ writeb(rxqueue_timer, &vptr->mac_regs->RQETMR);
+ }
+}
+/**
+ * setup_adaptive_interrupts - Setup interrupt suppression
+ *
+ * @vptr velocity adapter
+ *
+ * The velocity is able to suppress interrupt during high interrupt load.
+ * This function turns on that feature.
+ */
+static void setup_adaptive_interrupts(struct velocity_info *vptr)
+{
+ struct mac_regs __iomem *regs = vptr->mac_regs;
+ u16 tx_intsup = vptr->options.tx_intsup;
+ u16 rx_intsup = vptr->options.rx_intsup;
+
+ /* Setup default interrupt mask (will be changed below) */
+ vptr->int_mask = INT_MASK_DEF;
+
+ /* Set Tx Interrupt Suppression Threshold */
+ writeb(CAMCR_PS0, ®s->CAMCR);
+ if (tx_intsup != 0) {
+ vptr->int_mask &= ~(ISR_PTXI | ISR_PTX0I | ISR_PTX1I |
+ ISR_PTX2I | ISR_PTX3I);
+ writew(tx_intsup, ®s->ISRCTL);
+ } else
+ writew(ISRCTL_TSUPDIS, ®s->ISRCTL);
+
+ /* Set Rx Interrupt Suppression Threshold */
+ writeb(CAMCR_PS1, ®s->CAMCR);
+ if (rx_intsup != 0) {
+ vptr->int_mask &= ~ISR_PRXI;
+ writew(rx_intsup, ®s->ISRCTL);
+ } else
+ writew(ISRCTL_RSUPDIS, ®s->ISRCTL);
+
+ /* Select page to interrupt hold timer */
+ writeb(0, ®s->CAMCR);
+}
/**
* velocity_init_registers - initialise MAC registers
@@ -1345,7 +1405,7 @@ static void velocity_init_registers(struct velocity_info *vptr,
*/
enable_mii_autopoll(regs);
- vptr->int_mask = INT_MASK_DEF;
+ setup_adaptive_interrupts(vptr);
writel(vptr->rx.pool_dma, ®s->RDBaseLo);
writew(vptr->options.numrx - 1, ®s->RDCSize);
@@ -1802,6 +1862,8 @@ static void velocity_error(struct velocity_info *vptr, int status)
BYTE_REG_BITS_OFF(TESTCFG_HBDIS, ®s->TESTCFG);
else
BYTE_REG_BITS_ON(TESTCFG_HBDIS, ®s->TESTCFG);
+
+ setup_queue_timers(vptr);
}
/*
* Get link status from PHYSR0
@@ -3223,6 +3285,100 @@ static void velocity_set_msglevel(struct net_device *dev, u32 value)
msglevel = value;
}
+static int get_pending_timer_val(int val)
+{
+ int mult_bits = val >> 6;
+ int mult = 1;
+
+ switch (mult_bits)
+ {
+ case 1:
+ mult = 4; break;
+ case 2:
+ mult = 16; break;
+ case 3:
+ mult = 64; break;
+ case 0:
+ default:
+ break;
+ }
+
+ return (val & 0x3f) * mult;
+}
+
+static void set_pending_timer_val(int *val, u32 us)
+{
+ u8 mult = 0;
+ u8 shift = 0;
+
+ if (us >= 0x3f) {
+ mult = 1; /* mult with 4 */
+ shift = 2;
+ }
+ if (us >= 0x3f * 4) {
+ mult = 2; /* mult with 16 */
+ shift = 4;
+ }
+ if (us >= 0x3f * 16) {
+ mult = 3; /* mult with 64 */
+ shift = 6;
+ }
+
+ *val = (mult << 6) | ((us >> shift) & 0x3f);
+}
+
+
+static int velocity_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ecmd)
+{
+ struct velocity_info *vptr = netdev_priv(dev);
+
+ ecmd->tx_max_coalesced_frames = vptr->options.tx_intsup;
+ ecmd->rx_max_coalesced_frames = vptr->options.rx_intsup;
+
+ ecmd->rx_coalesce_usecs = get_pending_timer_val(vptr->options.rxqueue_timer);
+ ecmd->tx_coalesce_usecs = get_pending_timer_val(vptr->options.txqueue_timer);
+
+ return 0;
+}
+
+static int velocity_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ecmd)
+{
+ struct velocity_info *vptr = netdev_priv(dev);
+ int max_us = 0x3f * 64;
+
+ /* 6 bits of */
+ if (ecmd->tx_coalesce_usecs > max_us)
+ return -EINVAL;
+ if (ecmd->rx_coalesce_usecs > max_us)
+ return -EINVAL;
+
+ if (ecmd->tx_max_coalesced_frames > 0xff)
+ return -EINVAL;
+ if (ecmd->rx_max_coalesced_frames > 0xff)
+ return -EINVAL;
+
+ vptr->options.rx_intsup = ecmd->rx_max_coalesced_frames;
+ vptr->options.tx_intsup = ecmd->tx_max_coalesced_frames;
+
+ set_pending_timer_val(&vptr->options.rxqueue_timer,
+ ecmd->rx_coalesce_usecs);
+ set_pending_timer_val(&vptr->options.txqueue_timer,
+ ecmd->tx_coalesce_usecs);
+
+ /* Setup the interrupt suppression and queue timers */
+ mac_disable_int(vptr->mac_regs);
+ setup_adaptive_interrupts(vptr);
+ setup_queue_timers(vptr);
+
+ mac_write_int_mask(vptr->int_mask, vptr->mac_regs);
+ mac_clear_isr(vptr->mac_regs);
+ mac_enable_int(vptr->mac_regs);
+
+ return 0;
+}
+
static const struct ethtool_ops velocity_ethtool_ops = {
.get_settings = velocity_get_settings,
.set_settings = velocity_set_settings,
@@ -3232,6 +3388,8 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.get_msglevel = velocity_get_msglevel,
.set_msglevel = velocity_set_msglevel,
.get_link = velocity_get_link,
+ .get_coalesce = velocity_get_coalesce,
+ .set_coalesce = velocity_set_coalesce,
.begin = velocity_ethtool_up,
.complete = velocity_ethtool_down
};
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index 2f00c13..6091946 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -1005,7 +1005,8 @@ struct mac_regs {
volatile __le32 RDBaseLo; /* 0x38 */
volatile __le16 RDIdx; /* 0x3C */
- volatile __le16 reserved_3E;
+ volatile u8 TQETMR; /* 0x3E, VT3216 and above only */
+ volatile u8 RQETMR; /* 0x3F, VT3216 and above only */
volatile __le32 TDBaseLo[4]; /* 0x40 */
@@ -1491,6 +1492,10 @@ struct velocity_opt {
int rx_bandwidth_hi;
int rx_bandwidth_lo;
int rx_bandwidth_en;
+ int rxqueue_timer;
+ int txqueue_timer;
+ int tx_intsup;
+ int rx_intsup;
u32 flags;
};
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v4 3/7] via-velocity: Implement NAPI support
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
2009-11-26 8:09 ` [PATCH v4 1/7] via-velocity: Correct 64-byte alignment for rx buffers Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 2/7] via-velocity: Add ethtool interrupt coalescing support Simon Kagstrom
@ 2009-11-26 8:10 ` Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 4/7] via-velocity: Change DMA_LENGTH_DEF (from the VIA driver) Simon Kagstrom
` (4 subsequent siblings)
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:10 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
This patch adds NAPI support for VIA velocity. The new velocity_poll
function also pairs tx/rx handling twice which improves perforamance on
some workloads (e.g., netperf UDP_STREAM) significantly (that part is
from the VIA driver).
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 81 +++++++++++++++++++++++++-------------------
drivers/net/via-velocity.h | 3 ++
2 files changed, 49 insertions(+), 35 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 85ee898..668cdf6 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -354,12 +354,6 @@ VELOCITY_PARAM(ValPktLen, "Receiving or Drop invalid 802.3 frame");
*/
VELOCITY_PARAM(wol_opts, "Wake On Lan options");
-#define INT_WORKS_DEF 20
-#define INT_WORKS_MIN 10
-#define INT_WORKS_MAX 64
-
-VELOCITY_PARAM(int_works, "Number of packets per interrupt services");
-
static int rx_copybreak = 200;
module_param(rx_copybreak, int, 0644);
MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for copy-only-tiny-frames");
@@ -516,7 +510,6 @@ static void __devinit velocity_get_options(struct velocity_opt *opts, int index,
velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
velocity_set_int_opt((int *) &opts->spd_dpx, speed_duplex[index], MED_LNK_MIN, MED_LNK_MAX, MED_LNK_DEF, "Media link mode", devname);
velocity_set_int_opt((int *) &opts->wol_opts, wol_opts[index], WOL_OPT_MIN, WOL_OPT_MAX, WOL_OPT_DEF, "Wake On Lan options", devname);
- velocity_set_int_opt((int *) &opts->int_works, int_works[index], INT_WORKS_MIN, INT_WORKS_MAX, INT_WORKS_DEF, "Interrupt service works", devname);
opts->numrx = (opts->numrx & ~3);
}
@@ -2123,13 +2116,14 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
* any received packets from the receive queue. Hand the ring
* slots back to the adapter for reuse.
*/
-static int velocity_rx_srv(struct velocity_info *vptr, int status)
+static int velocity_rx_srv(struct velocity_info *vptr, int status,
+ int budget_left)
{
struct net_device_stats *stats = &vptr->dev->stats;
int rd_curr = vptr->rx.curr;
int works = 0;
- do {
+ while (works < budget_left) {
struct rx_desc *rd = vptr->rx.ring + rd_curr;
if (!vptr->rx.info[rd_curr].skb)
@@ -2160,7 +2154,8 @@ static int velocity_rx_srv(struct velocity_info *vptr, int status)
rd_curr++;
if (rd_curr >= vptr->options.numrx)
rd_curr = 0;
- } while (++works <= 15);
+ works++;
+ }
vptr->rx.curr = rd_curr;
@@ -2171,6 +2166,40 @@ static int velocity_rx_srv(struct velocity_info *vptr, int status)
return works;
}
+static int velocity_poll(struct napi_struct *napi, int budget)
+{
+ struct velocity_info *vptr = container_of(napi,
+ struct velocity_info, napi);
+ unsigned int rx_done;
+ u32 isr_status;
+
+ spin_lock(&vptr->lock);
+ isr_status = mac_read_isr(vptr->mac_regs);
+
+ /* Ack the interrupt */
+ mac_write_isr(vptr->mac_regs, isr_status);
+ if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI)))
+ velocity_error(vptr, isr_status);
+
+ /*
+ * Do rx and tx twice for performance (taken from the VIA
+ * out-of-tree driver).
+ */
+ rx_done = velocity_rx_srv(vptr, isr_status, budget / 2);
+ velocity_tx_srv(vptr, isr_status);
+ rx_done += velocity_rx_srv(vptr, isr_status, budget - rx_done);
+ velocity_tx_srv(vptr, isr_status);
+
+ spin_unlock(&vptr->lock);
+
+ /* If budget not fully consumed, exit the polling mode */
+ if (rx_done < budget) {
+ napi_complete(napi);
+ mac_enable_int(vptr->mac_regs);
+ }
+
+ return rx_done;
+}
/**
* velocity_intr - interrupt callback
@@ -2187,8 +2216,6 @@ static irqreturn_t velocity_intr(int irq, void *dev_instance)
struct net_device *dev = dev_instance;
struct velocity_info *vptr = netdev_priv(dev);
u32 isr_status;
- int max_count = 0;
-
spin_lock(&vptr->lock);
isr_status = mac_read_isr(vptr->mac_regs);
@@ -2199,32 +2226,13 @@ static irqreturn_t velocity_intr(int irq, void *dev_instance)
return IRQ_NONE;
}
- mac_disable_int(vptr->mac_regs);
-
- /*
- * Keep processing the ISR until we have completed
- * processing and the isr_status becomes zero
- */
-
- while (isr_status != 0) {
- mac_write_isr(vptr->mac_regs, isr_status);
- if (isr_status & (~(ISR_PRXI | ISR_PPRXI | ISR_PTXI | ISR_PPTXI)))
- velocity_error(vptr, isr_status);
- if (isr_status & (ISR_PRXI | ISR_PPRXI))
- max_count += velocity_rx_srv(vptr, isr_status);
- if (isr_status & (ISR_PTXI | ISR_PPTXI))
- max_count += velocity_tx_srv(vptr, isr_status);
- isr_status = mac_read_isr(vptr->mac_regs);
- if (max_count > vptr->options.int_works) {
- printk(KERN_WARNING "%s: excessive work at interrupt.\n",
- dev->name);
- max_count = 0;
- }
+ if (likely(napi_schedule_prep(&vptr->napi))) {
+ mac_disable_int(vptr->mac_regs);
+ __napi_schedule(&vptr->napi);
}
spin_unlock(&vptr->lock);
- mac_enable_int(vptr->mac_regs);
- return IRQ_HANDLED;
+ return IRQ_HANDLED;
}
/**
@@ -2264,6 +2272,7 @@ static int velocity_open(struct net_device *dev)
mac_enable_int(vptr->mac_regs);
netif_start_queue(dev);
+ napi_enable(&vptr->napi);
vptr->flags |= VELOCITY_FLAGS_OPENED;
out:
return ret;
@@ -2499,6 +2508,7 @@ static int velocity_close(struct net_device *dev)
{
struct velocity_info *vptr = netdev_priv(dev);
+ napi_disable(&vptr->napi);
netif_stop_queue(dev);
velocity_shutdown(vptr);
@@ -2818,6 +2828,7 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi
dev->irq = pdev->irq;
dev->netdev_ops = &velocity_netdev_ops;
dev->ethtool_ops = &velocity_ethtool_ops;
+ netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT);
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER |
NETIF_F_HW_VLAN_RX;
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index 6091946..22bfea4 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -32,6 +32,7 @@
#define VELOCITY_VERSION "1.14"
#define VELOCITY_IO_SIZE 256
+#define VELOCITY_NAPI_WEIGHT 64
#define PKT_BUF_SZ 1540
@@ -1564,6 +1565,8 @@ struct velocity_info {
u32 ticks;
u8 rev_id;
+
+ struct napi_struct napi;
};
/**
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v4 4/7] via-velocity: Change DMA_LENGTH_DEF (from the VIA driver)
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
` (2 preceding siblings ...)
2009-11-26 8:10 ` [PATCH v4 3/7] via-velocity: Implement NAPI support Simon Kagstrom
@ 2009-11-26 8:10 ` Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 5/7] via-velocity: Re-enable transmit scatter-gather support Simon Kagstrom
` (3 subsequent siblings)
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:10 UTC (permalink / raw)
Cc: netdev, davem, davej, shemminger, romieu
The VIA driver has changed the default for the DMA_LENGTH_DEF parameter.
Together with adaptive interrupt supression and NAPI support, this
improves performance quite a bit
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 668cdf6..06a6d80 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -275,7 +275,7 @@ VELOCITY_PARAM(rx_thresh, "Receive fifo threshold");
#define DMA_LENGTH_MIN 0
#define DMA_LENGTH_MAX 7
-#define DMA_LENGTH_DEF 0
+#define DMA_LENGTH_DEF 6
/* DMA_length[] is used for controlling the DMA length
0: 8 DWORDs
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v4 5/7] via-velocity: Re-enable transmit scatter-gather support
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
` (3 preceding siblings ...)
2009-11-26 8:10 ` [PATCH v4 4/7] via-velocity: Change DMA_LENGTH_DEF (from the VIA driver) Simon Kagstrom
@ 2009-11-26 8:10 ` Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 6/7] via-velocity: Set tx checksum from ethtool instead of module parameter Simon Kagstrom
` (2 subsequent siblings)
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:10 UTC (permalink / raw)
To: netdev, davem; +Cc: davej, shemminger, romieu
The velocity hardware can handle up to 7 memory segments. This can be
turned on and off via ethtool. The support was removed in commit
83c98a8cd04dd0f848574370594886ba3bf56750
but is re-enabled and cleaned up here. It's off by default.
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
ChangeLog:
* (David Miller): Remove the second check for too-many-fragments
drivers/net/via-velocity.c | 83 ++++++++++++++++++++++++++++----------------
1 files changed, 53 insertions(+), 30 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 06a6d80..b7f676b 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -9,7 +9,6 @@
*
* TODO
* rx_copybreak/alignment
- * Scatter gather
* More testing
*
* The changes are (c) Copyright 2004, Red Hat Inc. <alan@lxorguk.ukuu.org.uk>
@@ -1656,12 +1655,10 @@ out:
*/
static int velocity_init_td_ring(struct velocity_info *vptr)
{
- dma_addr_t curr;
int j;
/* Init the TD ring entries */
for (j = 0; j < vptr->tx.numq; j++) {
- curr = vptr->tx.pool_dma[j];
vptr->tx.infos[j] = kcalloc(vptr->options.numtx,
sizeof(struct velocity_td_info),
@@ -1727,21 +1724,27 @@ err_free_dma_rings_0:
* Release an transmit buffer. If the buffer was preallocated then
* recycle it, if not then unmap the buffer.
*/
-static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *tdinfo)
+static void velocity_free_tx_buf(struct velocity_info *vptr,
+ struct velocity_td_info *tdinfo, struct tx_desc *td)
{
struct sk_buff *skb = tdinfo->skb;
- int i;
- int pktlen;
/*
* Don't unmap the pre-allocated tx_bufs
*/
if (tdinfo->skb_dma) {
+ int i;
- pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
for (i = 0; i < tdinfo->nskb_dma; i++) {
- pci_unmap_single(vptr->pdev, tdinfo->skb_dma[i], pktlen, PCI_DMA_TODEVICE);
- tdinfo->skb_dma[i] = 0;
+ size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
+
+ /* For scatter-gather */
+ if (skb_shinfo(skb)->nr_frags > 0)
+ pktlen = max_t(size_t, pktlen,
+ td->td_buf[i].size & ~TD_QUEUE);
+
+ pci_unmap_single(vptr->pdev, tdinfo->skb_dma[i],
+ le16_to_cpu(pktlen), PCI_DMA_TODEVICE);
}
}
dev_kfree_skb_irq(skb);
@@ -1943,7 +1946,7 @@ static int velocity_tx_srv(struct velocity_info *vptr, u32 status)
stats->tx_packets++;
stats->tx_bytes += tdinfo->skb->len;
}
- velocity_free_tx_buf(vptr, tdinfo);
+ velocity_free_tx_buf(vptr, tdinfo, td);
vptr->tx.used[qnum]--;
}
vptr->tx.tail[qnum] = idx;
@@ -2543,14 +2546,22 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
struct velocity_td_info *tdinfo;
unsigned long flags;
int pktlen;
- __le16 len;
- int index;
+ int index, prev;
+ int i = 0;
if (skb_padto(skb, ETH_ZLEN))
goto out;
- pktlen = max_t(unsigned int, skb->len, ETH_ZLEN);
- len = cpu_to_le16(pktlen);
+ /* The hardware can handle at most 7 memory segments, so merge
+ * the skb if there are more */
+ if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ pktlen = skb_shinfo(skb)->nr_frags == 0 ?
+ max_t(unsigned int, skb->len, ETH_ZLEN) :
+ skb_headlen(skb);
spin_lock_irqsave(&vptr->lock, flags);
@@ -2567,11 +2578,24 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
*/
tdinfo->skb = skb;
tdinfo->skb_dma[0] = pci_map_single(vptr->pdev, skb->data, pktlen, PCI_DMA_TODEVICE);
- td_ptr->tdesc0.len = len;
+ td_ptr->tdesc0.len = cpu_to_le16(pktlen);
td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
td_ptr->td_buf[0].pa_high = 0;
- td_ptr->td_buf[0].size = len;
- tdinfo->nskb_dma = 1;
+ td_ptr->td_buf[0].size = cpu_to_le16(pktlen);
+
+ /* Handle fragments */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ tdinfo->skb_dma[i + 1] = pci_map_page(vptr->pdev, frag->page,
+ frag->page_offset, frag->size,
+ PCI_DMA_TODEVICE);
+
+ td_ptr->td_buf[i + 1].pa_low = cpu_to_le32(tdinfo->skb_dma[i + 1]);
+ td_ptr->td_buf[i + 1].pa_high = 0;
+ td_ptr->td_buf[i + 1].size = cpu_to_le16(frag->size);
+ }
+ tdinfo->nskb_dma = i + 1;
td_ptr->tdesc1.cmd = TCPLS_NORMAL + (tdinfo->nskb_dma + 1) * 16;
@@ -2592,23 +2616,21 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
td_ptr->tdesc1.TCR |= (TCR0_UDPCK);
td_ptr->tdesc1.TCR |= TCR0_IPCK;
}
- {
- int prev = index - 1;
+ prev = index - 1;
+ if (prev < 0)
+ prev = vptr->options.numtx - 1;
+ td_ptr->tdesc0.len |= OWNED_BY_NIC;
+ vptr->tx.used[qnum]++;
+ vptr->tx.curr[qnum] = (index + 1) % vptr->options.numtx;
- if (prev < 0)
- prev = vptr->options.numtx - 1;
- td_ptr->tdesc0.len |= OWNED_BY_NIC;
- vptr->tx.used[qnum]++;
- vptr->tx.curr[qnum] = (index + 1) % vptr->options.numtx;
+ if (AVAIL_TD(vptr, qnum) < 1)
+ netif_stop_queue(dev);
- if (AVAIL_TD(vptr, qnum) < 1)
- netif_stop_queue(dev);
+ td_ptr = &(vptr->tx.rings[qnum][prev]);
+ td_ptr->td_buf[0].size |= TD_QUEUE;
+ mac_tx_queue_wake(vptr->mac_regs, qnum);
- td_ptr = &(vptr->tx.rings[qnum][prev]);
- td_ptr->td_buf[0].size |= TD_QUEUE;
- mac_tx_queue_wake(vptr->mac_regs, qnum);
- }
dev->trans_start = jiffies;
spin_unlock_irqrestore(&vptr->lock, flags);
out:
@@ -3398,6 +3420,7 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.set_wol = velocity_ethtool_set_wol,
.get_msglevel = velocity_get_msglevel,
.set_msglevel = velocity_set_msglevel,
+ .set_sg = ethtool_op_set_sg,
.get_link = velocity_get_link,
.get_coalesce = velocity_get_coalesce,
.set_coalesce = velocity_set_coalesce,
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v4 6/7] via-velocity: Set tx checksum from ethtool instead of module parameter
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
` (4 preceding siblings ...)
2009-11-26 8:10 ` [PATCH v4 5/7] via-velocity: Re-enable transmit scatter-gather support Simon Kagstrom
@ 2009-11-26 8:10 ` Simon Kagstrom
2009-11-26 8:10 ` [PATCH v4 7/7] via-velocity: Bump version Simon Kagstrom
2009-11-26 23:52 ` [PATCH v4 0/7] via-velocity performance fixes David Miller
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:10 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
Defaults to on (as before).
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.c | 18 ++++--------------
drivers/net/via-velocity.h | 1 -
2 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index b7f676b..9572890 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -297,14 +297,6 @@ VELOCITY_PARAM(DMA_length, "DMA length");
*/
VELOCITY_PARAM(IP_byte_align, "Enable IP header dword aligned");
-#define TX_CSUM_DEF 1
-/* txcsum_offload[] is used for setting the checksum offload ability of NIC.
- (We only support RX checksum offload now)
- 0: disable csum_offload[checksum offload
- 1: enable checksum offload. (Default)
-*/
-VELOCITY_PARAM(txcsum_offload, "Enable transmit packet checksum offload");
-
#define FLOW_CNTL_DEF 1
#define FLOW_CNTL_MIN 1
#define FLOW_CNTL_MAX 5
@@ -503,7 +495,6 @@ static void __devinit velocity_get_options(struct velocity_opt *opts, int index,
velocity_set_int_opt(&opts->numrx, RxDescriptors[index], RX_DESC_MIN, RX_DESC_MAX, RX_DESC_DEF, "RxDescriptors", devname);
velocity_set_int_opt(&opts->numtx, TxDescriptors[index], TX_DESC_MIN, TX_DESC_MAX, TX_DESC_DEF, "TxDescriptors", devname);
- velocity_set_bool_opt(&opts->flags, txcsum_offload[index], TX_CSUM_DEF, VELOCITY_FLAGS_TX_CSUM, "txcsum_offload", devname);
velocity_set_int_opt(&opts->flow_cntl, flow_control[index], FLOW_CNTL_MIN, FLOW_CNTL_MAX, FLOW_CNTL_DEF, "flow_control", devname);
velocity_set_bool_opt(&opts->flags, IP_byte_align[index], IP_ALIG_DEF, VELOCITY_FLAGS_IP_ALIGN, "IP_byte_align", devname);
velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
@@ -2607,7 +2598,7 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
/*
* Handle hardware checksum
*/
- if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
+ if ( (dev->features & NETIF_F_IP_CSUM)
&& (skb->ip_summed == CHECKSUM_PARTIAL)) {
const struct iphdr *ip = ip_hdr(skb);
if (ip->protocol == IPPROTO_TCP)
@@ -2853,10 +2844,7 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi
netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT);
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER |
- NETIF_F_HW_VLAN_RX;
-
- if (vptr->flags & VELOCITY_FLAGS_TX_CSUM)
- dev->features |= NETIF_F_IP_CSUM;
+ NETIF_F_HW_VLAN_RX | NETIF_F_IP_CSUM;
ret = register_netdev(dev);
if (ret < 0)
@@ -3416,6 +3404,8 @@ static const struct ethtool_ops velocity_ethtool_ops = {
.get_settings = velocity_get_settings,
.set_settings = velocity_set_settings,
.get_drvinfo = velocity_get_drvinfo,
+ .set_tx_csum = ethtool_op_set_tx_csum,
+ .get_tx_csum = ethtool_op_get_tx_csum,
.get_wol = velocity_ethtool_get_wol,
.set_wol = velocity_ethtool_set_wol,
.get_msglevel = velocity_get_msglevel,
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index 22bfea4..cf10801 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -1423,7 +1423,6 @@ enum velocity_msg_level {
*/
#define VELOCITY_FLAGS_TAGGING 0x00000001UL
-#define VELOCITY_FLAGS_TX_CSUM 0x00000002UL
#define VELOCITY_FLAGS_RX_CSUM 0x00000004UL
#define VELOCITY_FLAGS_IP_ALIGN 0x00000008UL
#define VELOCITY_FLAGS_VAL_PKT_LEN 0x00000010UL
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* [PATCH v4 7/7] via-velocity: Bump version
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
` (5 preceding siblings ...)
2009-11-26 8:10 ` [PATCH v4 6/7] via-velocity: Set tx checksum from ethtool instead of module parameter Simon Kagstrom
@ 2009-11-26 8:10 ` Simon Kagstrom
2009-11-26 23:52 ` [PATCH v4 0/7] via-velocity performance fixes David Miller
7 siblings, 0 replies; 40+ messages in thread
From: Simon Kagstrom @ 2009-11-26 8:10 UTC (permalink / raw)
To: netdev; +Cc: davem, davej, shemminger, romieu
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
---
drivers/net/via-velocity.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index cf10801..6b5fc51 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -29,7 +29,7 @@
#define VELOCITY_NAME "via-velocity"
#define VELOCITY_FULL_DRV_NAM "VIA Networking Velocity Family Gigabit Ethernet Adapter Driver"
-#define VELOCITY_VERSION "1.14"
+#define VELOCITY_VERSION "1.15"
#define VELOCITY_IO_SIZE 256
#define VELOCITY_NAPI_WEIGHT 64
--
1.6.0.4
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v4 0/7] via-velocity performance fixes
2009-11-26 8:04 ` [PATCH v4 " Simon Kagstrom
` (6 preceding siblings ...)
2009-11-26 8:10 ` [PATCH v4 7/7] via-velocity: Bump version Simon Kagstrom
@ 2009-11-26 23:52 ` David Miller
7 siblings, 0 replies; 40+ messages in thread
From: David Miller @ 2009-11-26 23:52 UTC (permalink / raw)
To: simon.kagstrom; +Cc: netdev, davej, shemminger, romieu
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Thu, 26 Nov 2009 09:04:29 +0100
> Hi again!
>
> This is version four of the series posted here:
>
> http://marc.info/?l=linux-netdev&m=125873023117769&w=2
>
> The new version integrates Davids comments. There are 7 patches:
All applied to net-next-2.6, thanks!
^ permalink raw reply [flat|nested] 40+ messages in thread