* [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
@ 2008-09-20 17:45 Brice Goglin
2008-09-25 0:51 ` Jeff Garzik
0 siblings, 1 reply; 6+ messages in thread
From: Brice Goglin @ 2008-09-20 17:45 UTC (permalink / raw)
To: Jeff Garzik, netdev
Add multiqueue TX support to myri10ge.
Signed-off-by: Brice Goglin <brice@myri.com>
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -102,6 +102,9 @@ MODULE_LICENSE("Dual BSD/GPL");
#define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
#define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
+#define MYRI10GE_MAX_SLICES 32
+
struct myri10ge_rx_buffer_state {
struct page *page;
int page_offset;
@@ -138,6 +141,8 @@ struct myri10ge_rx_buf {
struct myri10ge_tx_buf {
struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
+ __be32 __iomem *send_go; /* "go" doorbell ptr */
+ __be32 __iomem *send_stop; /* "stop" doorbell ptr */
struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */
char *req_bytes;
struct myri10ge_tx_buffer_state *info;
@@ -149,6 +154,7 @@ struct myri10ge_tx_buf {
int done ____cacheline_aligned; /* transmit slots completed */
int pkt_done; /* packets completed */
int wake_queue;
+ int queue_active;
};
struct myri10ge_rx_done {
@@ -420,6 +426,12 @@ myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
return -ENOSYS;
} else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
return -E2BIG;
+ } else if (result == MXGEFW_CMD_ERROR_RANGE &&
+ cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
+ (data->
+ data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
+ 0) {
+ return -ERANGE;
} else {
dev_err(&mgp->pdev->dev,
"command %d failed, result = %d\n",
@@ -949,9 +961,24 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
*/
cmd.data0 = mgp->num_slices;
- cmd.data1 = 1; /* use MSI-X */
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ if (mgp->dev->real_num_tx_queues > 1)
+ cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
+
+ /* Firmware older than 1.4.32 only supports multiple
+ * RX queues, so if we get an error, first retry using a
+ * single TX queue before giving up */
+ if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
+ mgp->dev->real_num_tx_queues = 1;
+ cmd.data0 = mgp->num_slices;
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ status = myri10ge_send_cmd(mgp,
+ MXGEFW_CMD_ENABLE_RSS_QUEUES,
+ &cmd, 0);
+ }
+
if (status != 0) {
dev_err(&mgp->pdev->dev,
"failed to set number of slices\n");
@@ -1319,6 +1346,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
{
struct pci_dev *pdev = ss->mgp->pdev;
struct myri10ge_tx_buf *tx = &ss->tx;
+ struct netdev_queue *dev_queue;
struct sk_buff *skb;
int idx, len;
@@ -1352,11 +1380,31 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
PCI_DMA_TODEVICE);
}
}
+
+ dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
+ /*
+ * Make a minimal effort to prevent the NIC from polling an
+ * idle tx queue. If we can't get the lock we leave the queue
+ * active. In this case, either a thread was about to start
+ * using the queue anyway, or we lost a race and the NIC will
+ * waste some of its resources polling an inactive queue for a
+ * while.
+ */
+
+ if ((ss->mgp->dev->real_num_tx_queues > 1) &&
+ __netif_tx_trylock(dev_queue)) {
+ if (tx->req == tx->done) {
+ tx->queue_active = 0;
+ put_be32(htonl(1), tx->send_stop);
+ }
+ __netif_tx_unlock(dev_queue);
+ }
+
/* start the queue if we've stopped it */
- if (netif_queue_stopped(ss->dev)
+ if (netif_tx_queue_stopped(dev_queue)
&& tx->req - tx->done < (tx->mask >> 1)) {
tx->wake_queue++;
- netif_wake_queue(ss->dev);
+ netif_tx_wake_queue(dev_queue);
}
}
@@ -1484,9 +1532,9 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
u32 send_done_count;
int i;
- /* an interrupt on a non-zero slice is implicitly valid
- * since MSI-X irqs are not shared */
- if (ss != mgp->ss) {
+ /* an interrupt on a non-zero receive-only slice is implicitly
+ * valid since MSI-X irqs are not shared */
+ if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
netif_rx_schedule(ss->dev, &ss->napi);
return (IRQ_HANDLED);
}
@@ -1528,7 +1576,9 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
barrier();
}
- myri10ge_check_statblock(mgp);
+ /* Only slice 0 updates stats */
+ if (ss == mgp->ss)
+ myri10ge_check_statblock(mgp);
put_be32(htonl(3), ss->irq_claim + 1);
return (IRQ_HANDLED);
@@ -1886,6 +1936,7 @@ static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
/* ensure req_list entries are aligned to 8 bytes */
ss->tx.req_list = (struct mcp_kreq_ether_send *)
ALIGN((unsigned long)ss->tx.req_bytes, 8);
+ ss->tx.queue_active = 0;
bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
@@ -2366,11 +2417,14 @@ static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
int status;
ss = &mgp->ss[slice];
- cmd.data0 = 0; /* single slice for now */
- status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0);
- ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
- (mgp->sram + cmd.data0);
-
+ status = 0;
+ if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
+ cmd.data0 = slice;
+ status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
+ &cmd, 0);
+ ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
+ (mgp->sram + cmd.data0);
+ }
cmd.data0 = slice;
status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
&cmd, 0);
@@ -2382,6 +2436,10 @@ static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
(mgp->sram + cmd.data0);
+ ss->tx.send_go = (__iomem __be32 *)
+ (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
+ ss->tx.send_stop = (__iomem __be32 *)
+ (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
return status;
}
@@ -2395,7 +2453,7 @@ static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice)
ss = &mgp->ss[slice];
cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
- cmd.data2 = sizeof(struct mcp_irq_data);
+ cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
if (status == -ENOSYS) {
dma_addr_t bus = ss->fw_stats_bus;
@@ -2436,7 +2494,9 @@ static int myri10ge_open(struct net_device *dev)
if (mgp->num_slices > 1) {
cmd.data0 = mgp->num_slices;
- cmd.data1 = 1; /* use MSI-X */
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ if (mgp->dev->real_num_tx_queues > 1)
+ cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
if (status != 0) {
@@ -2457,6 +2517,7 @@ static int myri10ge_open(struct net_device *dev)
printk(KERN_ERR
"myri10ge: %s: failed to setup rss tables\n",
dev->name);
+ goto abort_with_nothing;
}
/* just enable an identity mapping */
@@ -2527,7 +2602,11 @@ static int myri10ge_open(struct net_device *dev)
status = myri10ge_allocate_rings(ss);
if (status != 0)
goto abort_with_rings;
- if (slice == 0)
+
+ /* only firmware which supports multiple TX queues
+ * supports setting up the tx stats on non-zero
+ * slices */
+ if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
status = myri10ge_set_stats(mgp, slice);
if (status) {
printk(KERN_ERR
@@ -2593,7 +2672,8 @@ static int myri10ge_open(struct net_device *dev)
mgp->running = MYRI10GE_ETH_RUNNING;
mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
add_timer(&mgp->watchdog_timer);
- netif_wake_queue(dev);
+ netif_tx_wake_all_queues(dev);
+
return 0;
abort_with_rings:
@@ -2620,13 +2705,15 @@ static int myri10ge_close(struct net_device *dev)
if (mgp->ss[0].tx.req_bytes == NULL)
return 0;
+ dev->select_queue = NULL;
del_timer_sync(&mgp->watchdog_timer);
mgp->running = MYRI10GE_ETH_STOPPING;
for (i = 0; i < mgp->num_slices; i++) {
napi_disable(&mgp->ss[i].napi);
}
netif_carrier_off(dev);
- netif_stop_queue(dev);
+
+ netif_tx_stop_all_queues(dev);
old_down_cnt = mgp->down_cnt;
mb();
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
@@ -2731,18 +2823,23 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
struct mcp_kreq_ether_send *req;
struct myri10ge_tx_buf *tx;
struct skb_frag_struct *frag;
+ struct netdev_queue *netdev_queue;
dma_addr_t bus;
u32 low;
__be32 high_swapped;
unsigned int len;
int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
- u16 pseudo_hdr_offset, cksum_offset;
+ u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag;
- /* always transmit through slot 0 */
- ss = mgp->ss;
+ queue = skb_get_queue_mapping(skb);
+ queue &= (mgp->num_slices - 1);
+
+ ss = &mgp->ss[queue];
+ netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
tx = &ss->tx;
+
again:
req = tx->req_list;
avail = tx->mask - 1 - (tx->req - tx->done);
@@ -2758,7 +2855,7 @@ again:
if ((unlikely(avail < max_segments))) {
/* we are out of transmit resources */
tx->stop_queue++;
- netif_stop_queue(dev);
+ netif_tx_stop_queue(netdev_queue);
return 1;
}
@@ -2951,10 +3048,16 @@ again:
idx = ((count - 1) + tx->req) & tx->mask;
tx->info[idx].last = 1;
myri10ge_submit_req(tx, tx->req_list, count);
+ /* if using multiple tx queues, make sure NIC polls the
+ * current slice */
+ if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
+ tx->queue_active = 1;
+ put_be32(htonl(1), tx->send_go);
+ }
tx->pkt_start++;
if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
tx->stop_queue++;
- netif_stop_queue(dev);
+ netif_tx_stop_queue(netdev_queue);
}
dev->trans_start = jiffies;
return 0;
@@ -3532,20 +3635,21 @@ static void myri10ge_watchdog(struct work_struct *work)
for (i = 0; i < mgp->num_slices; i++) {
tx = &mgp->ss[i].tx;
printk(KERN_INFO
- "myri10ge: %s: (%d): %d %d %d %d %d\n",
- mgp->dev->name, i, tx->req, tx->done,
- tx->pkt_start, tx->pkt_done,
+ "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+ mgp->dev->name, i, tx->queue_active, tx->req,
+ tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats->
send_done_count));
msleep(2000);
printk(KERN_INFO
- "myri10ge: %s: (%d): %d %d %d %d %d\n",
- mgp->dev->name, i, tx->req, tx->done,
- tx->pkt_start, tx->pkt_done,
+ "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+ mgp->dev->name, i, tx->queue_active, tx->req,
+ tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats->
send_done_count));
}
}
+
rtnl_lock();
myri10ge_close(mgp->dev);
status = myri10ge_load_firmware(mgp, 1);
@@ -3600,10 +3704,14 @@ static void myri10ge_watchdog_timer(unsigned long arg)
/* nic seems like it might be stuck.. */
if (rx_pause_cnt != mgp->watchdog_pause) {
if (net_ratelimit())
- printk(KERN_WARNING "myri10ge %s:"
+ printk(KERN_WARNING
+ "myri10ge %s slice %d:"
"TX paused, check link partner\n",
- mgp->dev->name);
+ mgp->dev->name, i);
} else {
+ printk(KERN_WARNING
+ "myri10ge %s slice %d stuck:",
+ mgp->dev->name, i);
reset_needed = 1;
}
}
@@ -3789,6 +3897,9 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
mgp->num_slices);
if (status == 0) {
pci_disable_msix(pdev);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ mgp->features |= NETIF_F_MULTI_QUEUE;
+#endif
return;
}
if (status > 0)
@@ -3818,7 +3929,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
int status = -ENXIO;
int dac_enabled;
- netdev = alloc_etherdev(sizeof(*mgp));
+ netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
if (netdev == NULL) {
dev_err(dev, "Could not allocate ethernet device\n");
return -ENOMEM;
@@ -3923,7 +4034,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(&pdev->dev, "failed to alloc slice state\n");
goto abort_with_firmware;
}
-
+ netdev->real_num_tx_queues = mgp->num_slices;
status = myri10ge_reset(mgp);
if (status != 0) {
dev_err(&pdev->dev, "failed reset\n");
@@ -3947,6 +4058,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->set_multicast_list = myri10ge_set_multicast_list;
netdev->set_mac_address = myri10ge_set_mac_address;
netdev->features = mgp->features;
+
if (dac_enabled)
netdev->features |= NETIF_F_HIGHDMA;
@@ -4102,8 +4214,7 @@ static __init int myri10ge_init_module(void)
printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name,
MYRI10GE_VERSION_STR);
- if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT ||
- myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
+ if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
printk(KERN_ERR
"%s: Illegal rssh hash type %d, defaulting to source port\n",
myri10ge_driver.name, myri10ge_rss_hash);
@@ -4112,6 +4223,8 @@ static __init int myri10ge_init_module(void)
#ifdef CONFIG_DCA
dca_register_notify(&myri10ge_dca_notifier);
#endif
+ if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
+ myri10ge_max_slices = MYRI10GE_MAX_SLICES;
return pci_register_driver(&myri10ge_driver);
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
2008-09-20 17:45 [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support Brice Goglin
@ 2008-09-25 0:51 ` Jeff Garzik
0 siblings, 0 replies; 6+ messages in thread
From: Jeff Garzik @ 2008-09-25 0:51 UTC (permalink / raw)
To: Brice Goglin; +Cc: netdev
Brice Goglin wrote:
> Add multiqueue TX support to myri10ge.
>
> Signed-off-by: Brice Goglin <brice@myri.com>
>
> --- a/drivers/net/myri10ge/myri10ge.c
> +++ b/drivers/net/myri10ge/myri10ge.c
> @@ -102,6 +102,9 @@ MODULE_LICENSE("Dual BSD/GPL");
> #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
> #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
>
> +#define MYRI10GE_MAX_SLICES 32
> +
> struct myri10ge_rx_buffer_state {
> struct page *page;
> int page_offset;
> @@ -138,6 +141,8 @@ struct myri10ge_rx_buf {
>
> struct myri10ge_tx_buf {
> struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
> + __be32 __iomem *send_go; /* "go" doorbell ptr */
> + __be32 __iomem *send_stop; /* "stop" doorbell ptr */
> struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */
> char *req_bytes;
> struct myri10ge_tx_buffer_state *info;
> @@ -149,6 +154,7 @@ struct myri10ge_tx_buf {
> int done ____cacheline_aligned; /* transmit slots completed */
> int pkt_done; /* packets completed */
> int wake_queue;
> + int queue_active;
> };
>
> struct myri10ge_rx_done {
> @@ -420,6 +426,12 @@ myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
> return -ENOSYS;
> } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
> return -E2BIG;
> + } else if (result == MXGEFW_CMD_ERROR_RANGE &&
> + cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
> + (data->
> + data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
> + 0) {
> + return -ERANGE;
> } else {
> dev_err(&mgp->pdev->dev,
> "command %d failed, result = %d\n",
> @@ -949,9 +961,24 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
> */
>
> cmd.data0 = mgp->num_slices;
> - cmd.data1 = 1; /* use MSI-X */
> + cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
> + if (mgp->dev->real_num_tx_queues > 1)
> + cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
> status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
> &cmd, 0);
> +
> + /* Firmware older than 1.4.32 only supports multiple
> + * RX queues, so if we get an error, first retry using a
> + * single TX queue before giving up */
> + if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
> + mgp->dev->real_num_tx_queues = 1;
> + cmd.data0 = mgp->num_slices;
> + cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
> + status = myri10ge_send_cmd(mgp,
> + MXGEFW_CMD_ENABLE_RSS_QUEUES,
> + &cmd, 0);
> + }
> +
> if (status != 0) {
> dev_err(&mgp->pdev->dev,
> "failed to set number of slices\n");
> @@ -1319,6 +1346,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
> {
> struct pci_dev *pdev = ss->mgp->pdev;
> struct myri10ge_tx_buf *tx = &ss->tx;
> + struct netdev_queue *dev_queue;
> struct sk_buff *skb;
> int idx, len;
>
> @@ -1352,11 +1380,31 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
> PCI_DMA_TODEVICE);
> }
> }
> +
> + dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
> + /*
> + * Make a minimal effort to prevent the NIC from polling an
> + * idle tx queue. If we can't get the lock we leave the queue
> + * active. In this case, either a thread was about to start
> + * using the queue anyway, or we lost a race and the NIC will
> + * waste some of its resources polling an inactive queue for a
> + * while.
> + */
> +
> + if ((ss->mgp->dev->real_num_tx_queues > 1) &&
> + __netif_tx_trylock(dev_queue)) {
> + if (tx->req == tx->done) {
> + tx->queue_active = 0;
> + put_be32(htonl(1), tx->send_stop);
> + }
> + __netif_tx_unlock(dev_queue);
> + }
> +
> /* start the queue if we've stopped it */
> - if (netif_queue_stopped(ss->dev)
> + if (netif_tx_queue_stopped(dev_queue)
> && tx->req - tx->done < (tx->mask >> 1)) {
> tx->wake_queue++;
> - netif_wake_queue(ss->dev);
> + netif_tx_wake_queue(dev_queue);
> }
> }
>
> @@ -1484,9 +1532,9 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
> u32 send_done_count;
> int i;
>
> - /* an interrupt on a non-zero slice is implicitly valid
> - * since MSI-X irqs are not shared */
> - if (ss != mgp->ss) {
> + /* an interrupt on a non-zero receive-only slice is implicitly
> + * valid since MSI-X irqs are not shared */
> + if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
> netif_rx_schedule(ss->dev, &ss->napi);
> return (IRQ_HANDLED);
> }
> @@ -1528,7 +1576,9 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
> barrier();
> }
>
> - myri10ge_check_statblock(mgp);
> + /* Only slice 0 updates stats */
> + if (ss == mgp->ss)
> + myri10ge_check_statblock(mgp);
>
> put_be32(htonl(3), ss->irq_claim + 1);
> return (IRQ_HANDLED);
> @@ -1886,6 +1936,7 @@ static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
> /* ensure req_list entries are aligned to 8 bytes */
> ss->tx.req_list = (struct mcp_kreq_ether_send *)
> ALIGN((unsigned long)ss->tx.req_bytes, 8);
> + ss->tx.queue_active = 0;
>
> bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
> ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
> @@ -2366,11 +2417,14 @@ static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
> int status;
>
> ss = &mgp->ss[slice];
> - cmd.data0 = 0; /* single slice for now */
> - status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0);
> - ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
> - (mgp->sram + cmd.data0);
> -
> + status = 0;
> + if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
> + cmd.data0 = slice;
> + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
> + &cmd, 0);
> + ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
> + (mgp->sram + cmd.data0);
> + }
> cmd.data0 = slice;
> status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
> &cmd, 0);
> @@ -2382,6 +2436,10 @@ static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
> ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
> (mgp->sram + cmd.data0);
>
> + ss->tx.send_go = (__iomem __be32 *)
> + (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
> + ss->tx.send_stop = (__iomem __be32 *)
> + (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
> return status;
>
> }
> @@ -2395,7 +2453,7 @@ static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice)
> ss = &mgp->ss[slice];
> cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
> cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
> - cmd.data2 = sizeof(struct mcp_irq_data);
> + cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
> status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
> if (status == -ENOSYS) {
> dma_addr_t bus = ss->fw_stats_bus;
> @@ -2436,7 +2494,9 @@ static int myri10ge_open(struct net_device *dev)
>
> if (mgp->num_slices > 1) {
> cmd.data0 = mgp->num_slices;
> - cmd.data1 = 1; /* use MSI-X */
> + cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
> + if (mgp->dev->real_num_tx_queues > 1)
> + cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
> status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
> &cmd, 0);
> if (status != 0) {
> @@ -2457,6 +2517,7 @@ static int myri10ge_open(struct net_device *dev)
> printk(KERN_ERR
> "myri10ge: %s: failed to setup rss tables\n",
> dev->name);
> + goto abort_with_nothing;
> }
>
> /* just enable an identity mapping */
> @@ -2527,7 +2602,11 @@ static int myri10ge_open(struct net_device *dev)
> status = myri10ge_allocate_rings(ss);
> if (status != 0)
> goto abort_with_rings;
> - if (slice == 0)
> +
> + /* only firmware which supports multiple TX queues
> + * supports setting up the tx stats on non-zero
> + * slices */
> + if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
> status = myri10ge_set_stats(mgp, slice);
> if (status) {
> printk(KERN_ERR
> @@ -2593,7 +2672,8 @@ static int myri10ge_open(struct net_device *dev)
> mgp->running = MYRI10GE_ETH_RUNNING;
> mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
> add_timer(&mgp->watchdog_timer);
> - netif_wake_queue(dev);
> + netif_tx_wake_all_queues(dev);
> +
> return 0;
>
> abort_with_rings:
> @@ -2620,13 +2705,15 @@ static int myri10ge_close(struct net_device *dev)
> if (mgp->ss[0].tx.req_bytes == NULL)
> return 0;
>
> + dev->select_queue = NULL;
> del_timer_sync(&mgp->watchdog_timer);
> mgp->running = MYRI10GE_ETH_STOPPING;
> for (i = 0; i < mgp->num_slices; i++) {
> napi_disable(&mgp->ss[i].napi);
> }
> netif_carrier_off(dev);
> - netif_stop_queue(dev);
> +
> + netif_tx_stop_all_queues(dev);
> old_down_cnt = mgp->down_cnt;
> mb();
> status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
> @@ -2731,18 +2823,23 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
> struct mcp_kreq_ether_send *req;
> struct myri10ge_tx_buf *tx;
> struct skb_frag_struct *frag;
> + struct netdev_queue *netdev_queue;
> dma_addr_t bus;
> u32 low;
> __be32 high_swapped;
> unsigned int len;
> int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
> - u16 pseudo_hdr_offset, cksum_offset;
> + u16 pseudo_hdr_offset, cksum_offset, queue;
> int cum_len, seglen, boundary, rdma_count;
> u8 flags, odd_flag;
>
> - /* always transmit through slot 0 */
> - ss = mgp->ss;
> + queue = skb_get_queue_mapping(skb);
> + queue &= (mgp->num_slices - 1);
> +
> + ss = &mgp->ss[queue];
> + netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
> tx = &ss->tx;
> +
> again:
> req = tx->req_list;
> avail = tx->mask - 1 - (tx->req - tx->done);
> @@ -2758,7 +2855,7 @@ again:
> if ((unlikely(avail < max_segments))) {
> /* we are out of transmit resources */
> tx->stop_queue++;
> - netif_stop_queue(dev);
> + netif_tx_stop_queue(netdev_queue);
> return 1;
> }
>
> @@ -2951,10 +3048,16 @@ again:
> idx = ((count - 1) + tx->req) & tx->mask;
> tx->info[idx].last = 1;
> myri10ge_submit_req(tx, tx->req_list, count);
> + /* if using multiple tx queues, make sure NIC polls the
> + * current slice */
> + if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
> + tx->queue_active = 1;
> + put_be32(htonl(1), tx->send_go);
> + }
> tx->pkt_start++;
> if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
> tx->stop_queue++;
> - netif_stop_queue(dev);
> + netif_tx_stop_queue(netdev_queue);
> }
> dev->trans_start = jiffies;
> return 0;
> @@ -3532,20 +3635,21 @@ static void myri10ge_watchdog(struct work_struct *work)
> for (i = 0; i < mgp->num_slices; i++) {
> tx = &mgp->ss[i].tx;
> printk(KERN_INFO
> - "myri10ge: %s: (%d): %d %d %d %d %d\n",
> - mgp->dev->name, i, tx->req, tx->done,
> - tx->pkt_start, tx->pkt_done,
> + "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
> + mgp->dev->name, i, tx->queue_active, tx->req,
> + tx->done, tx->pkt_start, tx->pkt_done,
> (int)ntohl(mgp->ss[i].fw_stats->
> send_done_count));
> msleep(2000);
> printk(KERN_INFO
> - "myri10ge: %s: (%d): %d %d %d %d %d\n",
> - mgp->dev->name, i, tx->req, tx->done,
> - tx->pkt_start, tx->pkt_done,
> + "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
> + mgp->dev->name, i, tx->queue_active, tx->req,
> + tx->done, tx->pkt_start, tx->pkt_done,
> (int)ntohl(mgp->ss[i].fw_stats->
> send_done_count));
> }
> }
> +
> rtnl_lock();
> myri10ge_close(mgp->dev);
> status = myri10ge_load_firmware(mgp, 1);
> @@ -3600,10 +3704,14 @@ static void myri10ge_watchdog_timer(unsigned long arg)
> /* nic seems like it might be stuck.. */
> if (rx_pause_cnt != mgp->watchdog_pause) {
> if (net_ratelimit())
> - printk(KERN_WARNING "myri10ge %s:"
> + printk(KERN_WARNING
> + "myri10ge %s slice %d:"
> "TX paused, check link partner\n",
> - mgp->dev->name);
> + mgp->dev->name, i);
> } else {
> + printk(KERN_WARNING
> + "myri10ge %s slice %d stuck:",
> + mgp->dev->name, i);
> reset_needed = 1;
> }
> }
> @@ -3789,6 +3897,9 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
> mgp->num_slices);
> if (status == 0) {
> pci_disable_msix(pdev);
> +#ifdef CONFIG_NETDEVICES_MULTIQUEUE
> + mgp->features |= NETIF_F_MULTI_QUEUE;
> +#endif
> return;
> }
> if (status > 0)
> @@ -3818,7 +3929,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> int status = -ENXIO;
> int dac_enabled;
>
> - netdev = alloc_etherdev(sizeof(*mgp));
> + netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
> if (netdev == NULL) {
> dev_err(dev, "Could not allocate ethernet device\n");
> return -ENOMEM;
> @@ -3923,7 +4034,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> dev_err(&pdev->dev, "failed to alloc slice state\n");
> goto abort_with_firmware;
> }
> -
> + netdev->real_num_tx_queues = mgp->num_slices;
> status = myri10ge_reset(mgp);
> if (status != 0) {
> dev_err(&pdev->dev, "failed reset\n");
> @@ -3947,6 +4058,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> netdev->set_multicast_list = myri10ge_set_multicast_list;
> netdev->set_mac_address = myri10ge_set_mac_address;
> netdev->features = mgp->features;
> +
> if (dac_enabled)
> netdev->features |= NETIF_F_HIGHDMA;
>
> @@ -4102,8 +4214,7 @@ static __init int myri10ge_init_module(void)
> printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name,
> MYRI10GE_VERSION_STR);
>
> - if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT ||
> - myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
> + if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
> printk(KERN_ERR
> "%s: Illegal rssh hash type %d, defaulting to source port\n",
> myri10ge_driver.name, myri10ge_rss_hash);
> @@ -4112,6 +4223,8 @@ static __init int myri10ge_init_module(void)
> #ifdef CONFIG_DCA
> dca_register_notify(&myri10ge_dca_notifier);
> #endif
> + if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
> + myri10ge_max_slices = MYRI10GE_MAX_SLICES;
>
> return pci_register_driver(&myri10ge_driver);
> }
>
doesn't seem to apply to net-next...
^ permalink raw reply [flat|nested] 6+ messages in thread
* [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
@ 2008-09-25 3:42 Brice Goglin
0 siblings, 0 replies; 6+ messages in thread
From: Brice Goglin @ 2008-09-25 3:42 UTC (permalink / raw)
To: Jeff Garzik; +Cc: netdev
Add multiqueue TX support to myri10ge.
Signed-off-by: Brice Goglin <brice@myri.com>
--- linux-2.6.git.orig/drivers/net/myri10ge/myri10ge.c 2008-09-20 09:52:39.000000000 +0200
+++ linux-2.6.git/drivers/net/myri10ge/myri10ge.c 2008-09-20 09:53:09.000000000 +0200
@@ -102,6 +102,8 @@
#define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
#define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
+#define MYRI10GE_MAX_SLICES 32
+
struct myri10ge_rx_buffer_state {
struct page *page;
int page_offset;
@@ -138,6 +140,8 @@
struct myri10ge_tx_buf {
struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
+ __be32 __iomem *send_go; /* "go" doorbell ptr */
+ __be32 __iomem *send_stop; /* "stop" doorbell ptr */
struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */
char *req_bytes;
struct myri10ge_tx_buffer_state *info;
@@ -149,6 +153,7 @@
int done ____cacheline_aligned; /* transmit slots completed */
int pkt_done; /* packets completed */
int wake_queue;
+ int queue_active;
};
struct myri10ge_rx_done {
@@ -418,6 +423,12 @@
return -ENOSYS;
} else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
return -E2BIG;
+ } else if (result == MXGEFW_CMD_ERROR_RANGE &&
+ cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
+ (data->
+ data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
+ 0) {
+ return -ERANGE;
} else {
dev_err(&mgp->pdev->dev,
"command %d failed, result = %d\n",
@@ -947,9 +958,24 @@
*/
cmd.data0 = mgp->num_slices;
- cmd.data1 = 1; /* use MSI-X */
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ if (mgp->dev->real_num_tx_queues > 1)
+ cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
+
+ /* Firmware older than 1.4.32 only supports multiple
+ * RX queues, so if we get an error, first retry using a
+ * single TX queue before giving up */
+ if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
+ mgp->dev->real_num_tx_queues = 1;
+ cmd.data0 = mgp->num_slices;
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ status = myri10ge_send_cmd(mgp,
+ MXGEFW_CMD_ENABLE_RSS_QUEUES,
+ &cmd, 0);
+ }
+
if (status != 0) {
dev_err(&mgp->pdev->dev,
"failed to set number of slices\n");
@@ -1317,6 +1343,7 @@
{
struct pci_dev *pdev = ss->mgp->pdev;
struct myri10ge_tx_buf *tx = &ss->tx;
+ struct netdev_queue *dev_queue;
struct sk_buff *skb;
int idx, len;
@@ -1350,11 +1377,31 @@
PCI_DMA_TODEVICE);
}
}
+
+ dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
+ /*
+ * Make a minimal effort to prevent the NIC from polling an
+ * idle tx queue. If we can't get the lock we leave the queue
+ * active. In this case, either a thread was about to start
+ * using the queue anyway, or we lost a race and the NIC will
+ * waste some of its resources polling an inactive queue for a
+ * while.
+ */
+
+ if ((ss->mgp->dev->real_num_tx_queues > 1) &&
+ __netif_tx_trylock(dev_queue)) {
+ if (tx->req == tx->done) {
+ tx->queue_active = 0;
+ put_be32(htonl(1), tx->send_stop);
+ }
+ __netif_tx_unlock(dev_queue);
+ }
+
/* start the queue if we've stopped it */
- if (netif_queue_stopped(ss->dev)
+ if (netif_tx_queue_stopped(dev_queue)
&& tx->req - tx->done < (tx->mask >> 1)) {
tx->wake_queue++;
- netif_wake_queue(ss->dev);
+ netif_tx_wake_queue(dev_queue);
}
}
@@ -1482,9 +1529,9 @@
u32 send_done_count;
int i;
- /* an interrupt on a non-zero slice is implicitly valid
- * since MSI-X irqs are not shared */
- if (ss != mgp->ss) {
+ /* an interrupt on a non-zero receive-only slice is implicitly
+ * valid since MSI-X irqs are not shared */
+ if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
netif_rx_schedule(ss->dev, &ss->napi);
return (IRQ_HANDLED);
}
@@ -1526,7 +1573,9 @@
barrier();
}
- myri10ge_check_statblock(mgp);
+ /* Only slice 0 updates stats */
+ if (ss == mgp->ss)
+ myri10ge_check_statblock(mgp);
put_be32(htonl(3), ss->irq_claim + 1);
return (IRQ_HANDLED);
@@ -1884,6 +1933,7 @@
/* ensure req_list entries are aligned to 8 bytes */
ss->tx.req_list = (struct mcp_kreq_ether_send *)
ALIGN((unsigned long)ss->tx.req_bytes, 8);
+ ss->tx.queue_active = 0;
bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
@@ -2201,11 +2251,14 @@
int status;
ss = &mgp->ss[slice];
- cmd.data0 = 0; /* single slice for now */
- status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0);
- ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
- (mgp->sram + cmd.data0);
-
+ status = 0;
+ if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
+ cmd.data0 = slice;
+ status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
+ &cmd, 0);
+ ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
+ (mgp->sram + cmd.data0);
+ }
cmd.data0 = slice;
status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
&cmd, 0);
@@ -2217,6 +2270,10 @@
ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
(mgp->sram + cmd.data0);
+ ss->tx.send_go = (__iomem __be32 *)
+ (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
+ ss->tx.send_stop = (__iomem __be32 *)
+ (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
return status;
}
@@ -2230,7 +2287,7 @@
ss = &mgp->ss[slice];
cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
- cmd.data2 = sizeof(struct mcp_irq_data);
+ cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
if (status == -ENOSYS) {
dma_addr_t bus = ss->fw_stats_bus;
@@ -2271,7 +2328,9 @@
if (mgp->num_slices > 1) {
cmd.data0 = mgp->num_slices;
- cmd.data1 = 1; /* use MSI-X */
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ if (mgp->dev->real_num_tx_queues > 1)
+ cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
if (status != 0) {
@@ -2292,6 +2351,7 @@
printk(KERN_ERR
"myri10ge: %s: failed to setup rss tables\n",
dev->name);
+ goto abort_with_nothing;
}
/* just enable an identity mapping */
@@ -2362,7 +2422,11 @@
status = myri10ge_allocate_rings(ss);
if (status != 0)
goto abort_with_rings;
- if (slice == 0)
+
+ /* only firmware which supports multiple TX queues
+ * supports setting up the tx stats on non-zero
+ * slices */
+ if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
status = myri10ge_set_stats(mgp, slice);
if (status) {
printk(KERN_ERR
@@ -2428,7 +2492,8 @@
mgp->running = MYRI10GE_ETH_RUNNING;
mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
add_timer(&mgp->watchdog_timer);
- netif_wake_queue(dev);
+ netif_tx_wake_all_queues(dev);
+
return 0;
abort_with_rings:
@@ -2461,7 +2526,8 @@
napi_disable(&mgp->ss[i].napi);
}
netif_carrier_off(dev);
- netif_stop_queue(dev);
+
+ netif_tx_stop_all_queues(dev);
old_down_cnt = mgp->down_cnt;
mb();
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
@@ -2566,18 +2632,23 @@
struct mcp_kreq_ether_send *req;
struct myri10ge_tx_buf *tx;
struct skb_frag_struct *frag;
+ struct netdev_queue *netdev_queue;
dma_addr_t bus;
u32 low;
__be32 high_swapped;
unsigned int len;
int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
- u16 pseudo_hdr_offset, cksum_offset;
+ u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag;
- /* always transmit through slot 0 */
- ss = mgp->ss;
+ queue = skb_get_queue_mapping(skb);
+ queue &= (mgp->num_slices - 1);
+
+ ss = &mgp->ss[queue];
+ netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
tx = &ss->tx;
+
again:
req = tx->req_list;
avail = tx->mask - 1 - (tx->req - tx->done);
@@ -2593,7 +2664,7 @@
if ((unlikely(avail < max_segments))) {
/* we are out of transmit resources */
tx->stop_queue++;
- netif_stop_queue(dev);
+ netif_tx_stop_queue(netdev_queue);
return 1;
}
@@ -2786,10 +2857,16 @@
idx = ((count - 1) + tx->req) & tx->mask;
tx->info[idx].last = 1;
myri10ge_submit_req(tx, tx->req_list, count);
+ /* if using multiple tx queues, make sure NIC polls the
+ * current slice */
+ if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
+ tx->queue_active = 1;
+ put_be32(htonl(1), tx->send_go);
+ }
tx->pkt_start++;
if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
tx->stop_queue++;
- netif_stop_queue(dev);
+ netif_tx_stop_queue(netdev_queue);
}
dev->trans_start = jiffies;
return 0;
@@ -3367,20 +3444,21 @@
for (i = 0; i < mgp->num_slices; i++) {
tx = &mgp->ss[i].tx;
printk(KERN_INFO
- "myri10ge: %s: (%d): %d %d %d %d %d\n",
- mgp->dev->name, i, tx->req, tx->done,
- tx->pkt_start, tx->pkt_done,
+ "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+ mgp->dev->name, i, tx->queue_active, tx->req,
+ tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats->
send_done_count));
msleep(2000);
printk(KERN_INFO
- "myri10ge: %s: (%d): %d %d %d %d %d\n",
- mgp->dev->name, i, tx->req, tx->done,
- tx->pkt_start, tx->pkt_done,
+ "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+ mgp->dev->name, i, tx->queue_active, tx->req,
+ tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats->
send_done_count));
}
}
+
rtnl_lock();
myri10ge_close(mgp->dev);
status = myri10ge_load_firmware(mgp, 1);
@@ -3435,10 +3513,14 @@
/* nic seems like it might be stuck.. */
if (rx_pause_cnt != mgp->watchdog_pause) {
if (net_ratelimit())
- printk(KERN_WARNING "myri10ge %s:"
+ printk(KERN_WARNING
+ "myri10ge %s slice %d:"
"TX paused, check link partner\n",
- mgp->dev->name);
+ mgp->dev->name, i);
} else {
+ printk(KERN_WARNING
+ "myri10ge %s slice %d stuck:",
+ mgp->dev->name, i);
reset_needed = 1;
}
}
@@ -3624,6 +3706,9 @@
mgp->num_slices);
if (status == 0) {
pci_disable_msix(pdev);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ mgp->features |= NETIF_F_MULTI_QUEUE;
+#endif
return;
}
if (status > 0)
@@ -3653,7 +3738,7 @@
int status = -ENXIO;
int dac_enabled;
- netdev = alloc_etherdev(sizeof(*mgp));
+ netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
if (netdev == NULL) {
dev_err(dev, "Could not allocate ethernet device\n");
return -ENOMEM;
@@ -3758,7 +3843,7 @@
dev_err(&pdev->dev, "failed to alloc slice state\n");
goto abort_with_firmware;
}
-
+ netdev->real_num_tx_queues = mgp->num_slices;
status = myri10ge_reset(mgp);
if (status != 0) {
dev_err(&pdev->dev, "failed reset\n");
@@ -3782,6 +3867,7 @@
netdev->set_multicast_list = myri10ge_set_multicast_list;
netdev->set_mac_address = myri10ge_set_mac_address;
netdev->features = mgp->features;
+
if (dac_enabled)
netdev->features |= NETIF_F_HIGHDMA;
@@ -3937,8 +4023,7 @@
printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name,
MYRI10GE_VERSION_STR);
- if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT ||
- myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
+ if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
printk(KERN_ERR
"%s: Illegal rssh hash type %d, defaulting to source port\n",
myri10ge_driver.name, myri10ge_rss_hash);
@@ -3947,6 +4032,8 @@
#if (defined CONFIG_DCA) || (defined CONFIG_DCA_MODULE)
dca_register_notify(&myri10ge_dca_notifier);
#endif
+ if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
+ myri10ge_max_slices = MYRI10GE_MAX_SLICES;
return pci_register_driver(&myri10ge_driver);
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
@ 2008-09-25 4:15 Brice Goglin
2008-09-25 19:07 ` Jeff Garzik
0 siblings, 1 reply; 6+ messages in thread
From: Brice Goglin @ 2008-09-25 4:15 UTC (permalink / raw)
To: Jeff Garzik; +Cc: netdev
Add multiqueue TX support to myri10ge.
Signed-off-by: Brice Goglin <brice@myri.com>
--- linux-2.6.git.orig/drivers/net/myri10ge/myri10ge.c 2008-09-20 09:52:39.000000000 +0200
+++ linux-2.6.git/drivers/net/myri10ge/myri10ge.c 2008-09-20 09:53:09.000000000 +0200
@@ -102,6 +102,8 @@
#define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
#define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
+#define MYRI10GE_MAX_SLICES 32
+
struct myri10ge_rx_buffer_state {
struct page *page;
int page_offset;
@@ -138,6 +140,8 @@
struct myri10ge_tx_buf {
struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
+ __be32 __iomem *send_go; /* "go" doorbell ptr */
+ __be32 __iomem *send_stop; /* "stop" doorbell ptr */
struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */
char *req_bytes;
struct myri10ge_tx_buffer_state *info;
@@ -149,6 +153,7 @@
int done ____cacheline_aligned; /* transmit slots completed */
int pkt_done; /* packets completed */
int wake_queue;
+ int queue_active;
};
struct myri10ge_rx_done {
@@ -418,6 +423,12 @@
return -ENOSYS;
} else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
return -E2BIG;
+ } else if (result == MXGEFW_CMD_ERROR_RANGE &&
+ cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
+ (data->
+ data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
+ 0) {
+ return -ERANGE;
} else {
dev_err(&mgp->pdev->dev,
"command %d failed, result = %d\n",
@@ -947,9 +958,24 @@
*/
cmd.data0 = mgp->num_slices;
- cmd.data1 = 1; /* use MSI-X */
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ if (mgp->dev->real_num_tx_queues > 1)
+ cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
+
+ /* Firmware older than 1.4.32 only supports multiple
+ * RX queues, so if we get an error, first retry using a
+ * single TX queue before giving up */
+ if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
+ mgp->dev->real_num_tx_queues = 1;
+ cmd.data0 = mgp->num_slices;
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ status = myri10ge_send_cmd(mgp,
+ MXGEFW_CMD_ENABLE_RSS_QUEUES,
+ &cmd, 0);
+ }
+
if (status != 0) {
dev_err(&mgp->pdev->dev,
"failed to set number of slices\n");
@@ -1317,6 +1343,7 @@
{
struct pci_dev *pdev = ss->mgp->pdev;
struct myri10ge_tx_buf *tx = &ss->tx;
+ struct netdev_queue *dev_queue;
struct sk_buff *skb;
int idx, len;
@@ -1350,11 +1377,31 @@
PCI_DMA_TODEVICE);
}
}
+
+ dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
+ /*
+ * Make a minimal effort to prevent the NIC from polling an
+ * idle tx queue. If we can't get the lock we leave the queue
+ * active. In this case, either a thread was about to start
+ * using the queue anyway, or we lost a race and the NIC will
+ * waste some of its resources polling an inactive queue for a
+ * while.
+ */
+
+ if ((ss->mgp->dev->real_num_tx_queues > 1) &&
+ __netif_tx_trylock(dev_queue)) {
+ if (tx->req == tx->done) {
+ tx->queue_active = 0;
+ put_be32(htonl(1), tx->send_stop);
+ }
+ __netif_tx_unlock(dev_queue);
+ }
+
/* start the queue if we've stopped it */
- if (netif_queue_stopped(ss->dev)
+ if (netif_tx_queue_stopped(dev_queue)
&& tx->req - tx->done < (tx->mask >> 1)) {
tx->wake_queue++;
- netif_wake_queue(ss->dev);
+ netif_tx_wake_queue(dev_queue);
}
}
@@ -1482,9 +1529,9 @@
u32 send_done_count;
int i;
- /* an interrupt on a non-zero slice is implicitly valid
- * since MSI-X irqs are not shared */
- if (ss != mgp->ss) {
+ /* an interrupt on a non-zero receive-only slice is implicitly
+ * valid since MSI-X irqs are not shared */
+ if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
netif_rx_schedule(ss->dev, &ss->napi);
return (IRQ_HANDLED);
}
@@ -1526,7 +1573,9 @@
barrier();
}
- myri10ge_check_statblock(mgp);
+ /* Only slice 0 updates stats */
+ if (ss == mgp->ss)
+ myri10ge_check_statblock(mgp);
put_be32(htonl(3), ss->irq_claim + 1);
return (IRQ_HANDLED);
@@ -1884,6 +1933,7 @@
/* ensure req_list entries are aligned to 8 bytes */
ss->tx.req_list = (struct mcp_kreq_ether_send *)
ALIGN((unsigned long)ss->tx.req_bytes, 8);
+ ss->tx.queue_active = 0;
bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
@@ -2201,11 +2251,14 @@
int status;
ss = &mgp->ss[slice];
- cmd.data0 = 0; /* single slice for now */
- status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0);
- ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
- (mgp->sram + cmd.data0);
-
+ status = 0;
+ if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
+ cmd.data0 = slice;
+ status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
+ &cmd, 0);
+ ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
+ (mgp->sram + cmd.data0);
+ }
cmd.data0 = slice;
status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
&cmd, 0);
@@ -2217,6 +2270,10 @@
ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
(mgp->sram + cmd.data0);
+ ss->tx.send_go = (__iomem __be32 *)
+ (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
+ ss->tx.send_stop = (__iomem __be32 *)
+ (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
return status;
}
@@ -2230,7 +2287,7 @@
ss = &mgp->ss[slice];
cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
- cmd.data2 = sizeof(struct mcp_irq_data);
+ cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
if (status == -ENOSYS) {
dma_addr_t bus = ss->fw_stats_bus;
@@ -2271,7 +2328,9 @@
if (mgp->num_slices > 1) {
cmd.data0 = mgp->num_slices;
- cmd.data1 = 1; /* use MSI-X */
+ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+ if (mgp->dev->real_num_tx_queues > 1)
+ cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
if (status != 0) {
@@ -2292,6 +2351,7 @@
printk(KERN_ERR
"myri10ge: %s: failed to setup rss tables\n",
dev->name);
+ goto abort_with_nothing;
}
/* just enable an identity mapping */
@@ -2362,7 +2422,11 @@
status = myri10ge_allocate_rings(ss);
if (status != 0)
goto abort_with_rings;
- if (slice == 0)
+
+ /* only firmware which supports multiple TX queues
+ * supports setting up the tx stats on non-zero
+ * slices */
+ if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
status = myri10ge_set_stats(mgp, slice);
if (status) {
printk(KERN_ERR
@@ -2428,7 +2492,8 @@
mgp->running = MYRI10GE_ETH_RUNNING;
mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
add_timer(&mgp->watchdog_timer);
- netif_wake_queue(dev);
+ netif_tx_wake_all_queues(dev);
+
return 0;
abort_with_rings:
@@ -2461,7 +2526,8 @@
napi_disable(&mgp->ss[i].napi);
}
netif_carrier_off(dev);
- netif_stop_queue(dev);
+
+ netif_tx_stop_all_queues(dev);
old_down_cnt = mgp->down_cnt;
mb();
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
@@ -2566,18 +2632,23 @@
struct mcp_kreq_ether_send *req;
struct myri10ge_tx_buf *tx;
struct skb_frag_struct *frag;
+ struct netdev_queue *netdev_queue;
dma_addr_t bus;
u32 low;
__be32 high_swapped;
unsigned int len;
int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
- u16 pseudo_hdr_offset, cksum_offset;
+ u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag;
- /* always transmit through slot 0 */
- ss = mgp->ss;
+ queue = skb_get_queue_mapping(skb);
+ queue &= (mgp->num_slices - 1);
+
+ ss = &mgp->ss[queue];
+ netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
tx = &ss->tx;
+
again:
req = tx->req_list;
avail = tx->mask - 1 - (tx->req - tx->done);
@@ -2593,7 +2664,7 @@
if ((unlikely(avail < max_segments))) {
/* we are out of transmit resources */
tx->stop_queue++;
- netif_stop_queue(dev);
+ netif_tx_stop_queue(netdev_queue);
return 1;
}
@@ -2786,10 +2857,16 @@
idx = ((count - 1) + tx->req) & tx->mask;
tx->info[idx].last = 1;
myri10ge_submit_req(tx, tx->req_list, count);
+ /* if using multiple tx queues, make sure NIC polls the
+ * current slice */
+ if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
+ tx->queue_active = 1;
+ put_be32(htonl(1), tx->send_go);
+ }
tx->pkt_start++;
if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
tx->stop_queue++;
- netif_stop_queue(dev);
+ netif_tx_stop_queue(netdev_queue);
}
dev->trans_start = jiffies;
return 0;
@@ -3367,20 +3444,21 @@
for (i = 0; i < mgp->num_slices; i++) {
tx = &mgp->ss[i].tx;
printk(KERN_INFO
- "myri10ge: %s: (%d): %d %d %d %d %d\n",
- mgp->dev->name, i, tx->req, tx->done,
- tx->pkt_start, tx->pkt_done,
+ "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+ mgp->dev->name, i, tx->queue_active, tx->req,
+ tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats->
send_done_count));
msleep(2000);
printk(KERN_INFO
- "myri10ge: %s: (%d): %d %d %d %d %d\n",
- mgp->dev->name, i, tx->req, tx->done,
- tx->pkt_start, tx->pkt_done,
+ "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+ mgp->dev->name, i, tx->queue_active, tx->req,
+ tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats->
send_done_count));
}
}
+
rtnl_lock();
myri10ge_close(mgp->dev);
status = myri10ge_load_firmware(mgp, 1);
@@ -3435,10 +3513,14 @@
/* nic seems like it might be stuck.. */
if (rx_pause_cnt != mgp->watchdog_pause) {
if (net_ratelimit())
- printk(KERN_WARNING "myri10ge %s:"
+ printk(KERN_WARNING
+ "myri10ge %s slice %d:"
"TX paused, check link partner\n",
- mgp->dev->name);
+ mgp->dev->name, i);
} else {
+ printk(KERN_WARNING
+ "myri10ge %s slice %d stuck:",
+ mgp->dev->name, i);
reset_needed = 1;
}
}
@@ -3624,6 +3706,9 @@
mgp->num_slices);
if (status == 0) {
pci_disable_msix(pdev);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ mgp->features |= NETIF_F_MULTI_QUEUE;
+#endif
return;
}
if (status > 0)
@@ -3653,7 +3738,7 @@
int status = -ENXIO;
int dac_enabled;
- netdev = alloc_etherdev(sizeof(*mgp));
+ netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
if (netdev == NULL) {
dev_err(dev, "Could not allocate ethernet device\n");
return -ENOMEM;
@@ -3758,7 +3843,7 @@
dev_err(&pdev->dev, "failed to alloc slice state\n");
goto abort_with_firmware;
}
-
+ netdev->real_num_tx_queues = mgp->num_slices;
status = myri10ge_reset(mgp);
if (status != 0) {
dev_err(&pdev->dev, "failed reset\n");
@@ -3782,6 +3867,7 @@
netdev->set_multicast_list = myri10ge_set_multicast_list;
netdev->set_mac_address = myri10ge_set_mac_address;
netdev->features = mgp->features;
+
if (dac_enabled)
netdev->features |= NETIF_F_HIGHDMA;
@@ -3937,8 +4023,7 @@
printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name,
MYRI10GE_VERSION_STR);
- if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT ||
- myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
+ if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
printk(KERN_ERR
"%s: Illegal rssh hash type %d, defaulting to source port\n",
myri10ge_driver.name, myri10ge_rss_hash);
@@ -3947,6 +4032,8 @@
#if (defined CONFIG_DCA) || (defined CONFIG_DCA_MODULE)
dca_register_notify(&myri10ge_dca_notifier);
#endif
+ if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
+ myri10ge_max_slices = MYRI10GE_MAX_SLICES;
return pci_register_driver(&myri10ge_driver);
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
2008-09-25 4:15 Brice Goglin
@ 2008-09-25 19:07 ` Jeff Garzik
2008-09-25 20:20 ` Brice Goglin
0 siblings, 1 reply; 6+ messages in thread
From: Jeff Garzik @ 2008-09-25 19:07 UTC (permalink / raw)
To: Brice Goglin; +Cc: netdev
Brice Goglin wrote:
> Add multiqueue TX support to myri10ge.
>
> Signed-off-by: Brice Goglin <brice@myri.com>
Did not apply to net-next successfully... I dropped the two patches you
submitted, as discussed in the thread where DaveM replied. Maybe that
drop is tripping us up?
Let's resync based on DaveM's current net-next, ok?
Jeff
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
2008-09-25 19:07 ` Jeff Garzik
@ 2008-09-25 20:20 ` Brice Goglin
0 siblings, 0 replies; 6+ messages in thread
From: Brice Goglin @ 2008-09-25 20:20 UTC (permalink / raw)
To: Jeff Garzik; +Cc: netdev
Jeff Garzik wrote:
> Brice Goglin wrote:
>> Add multiqueue TX support to myri10ge.
>> Signed-off-by: Brice Goglin <brice@myri.com>
>
> Did not apply to net-next successfully... I dropped the two patches
> you submitted, as discussed in the thread where DaveM replied. Maybe
> that drop is tripping us up?
>
> Let's resync based on DaveM's current net-next, ok?
Well, I am puzzled. The last patch I sent applies fine on top of DaveM's
net-next here. We're talking about master branch of
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git,
right ?
Actually, the same patch also applies to Linus (with a bit of
offset/fuzz) since the only differences are only 3 small DCA patches. So
I don't understand how you're getting conflicts unless you have some
remants of the nacked toeplitz stuff.
Brice
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-09-25 20:19 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-20 17:45 [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support Brice Goglin
2008-09-25 0:51 ` Jeff Garzik
-- strict thread matches above, loose matches on Subject: below --
2008-09-25 3:42 Brice Goglin
2008-09-25 4:15 Brice Goglin
2008-09-25 19:07 ` Jeff Garzik
2008-09-25 20:20 ` Brice Goglin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).