All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brice Goglin <brice@myri.com>
To: Jeff Garzik <jeff@garzik.org>
Cc: netdev@vger.kernel.org
Subject: [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support
Date: Thu, 25 Sep 2008 06:15:18 +0200	[thread overview]
Message-ID: <48DB1056.5040206@myri.com> (raw)

Add multiqueue TX support to myri10ge.
    
Signed-off-by: Brice Goglin <brice@myri.com>

--- linux-2.6.git.orig/drivers/net/myri10ge/myri10ge.c	2008-09-20 09:52:39.000000000 +0200
+++ linux-2.6.git/drivers/net/myri10ge/myri10ge.c	2008-09-20 09:53:09.000000000 +0200
@@ -102,6 +102,8 @@
 #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
 #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
 
+#define MYRI10GE_MAX_SLICES 32
+
 struct myri10ge_rx_buffer_state {
 	struct page *page;
 	int page_offset;
@@ -138,6 +140,8 @@
 
 struct myri10ge_tx_buf {
 	struct mcp_kreq_ether_send __iomem *lanai;	/* lanai ptr for sendq */
+	__be32 __iomem *send_go;	/* "go" doorbell ptr */
+	__be32 __iomem *send_stop;	/* "stop" doorbell ptr */
 	struct mcp_kreq_ether_send *req_list;	/* host shadow of sendq */
 	char *req_bytes;
 	struct myri10ge_tx_buffer_state *info;
@@ -149,6 +153,7 @@
 	int done ____cacheline_aligned;	/* transmit slots completed     */
 	int pkt_done;		/* packets completed */
 	int wake_queue;
+	int queue_active;
 };
 
 struct myri10ge_rx_done {
@@ -418,6 +423,12 @@
 			return -ENOSYS;
 		} else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
 			return -E2BIG;
+		} else if (result == MXGEFW_CMD_ERROR_RANGE &&
+			   cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
+			   (data->
+			    data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
+			   0) {
+			return -ERANGE;
 		} else {
 			dev_err(&mgp->pdev->dev,
 				"command %d failed, result = %d\n",
@@ -947,9 +958,24 @@
 		 */
 
 		cmd.data0 = mgp->num_slices;
-		cmd.data1 = 1;	/* use MSI-X */
+		cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+		if (mgp->dev->real_num_tx_queues > 1)
+			cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
 		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
 					   &cmd, 0);
+
+		/* Firmware older than 1.4.32 only supports multiple
+		 * RX queues, so if we get an error, first retry using a
+		 * single TX queue before giving up */
+		if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
+			mgp->dev->real_num_tx_queues = 1;
+			cmd.data0 = mgp->num_slices;
+			cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+			status = myri10ge_send_cmd(mgp,
+						   MXGEFW_CMD_ENABLE_RSS_QUEUES,
+						   &cmd, 0);
+		}
+
 		if (status != 0) {
 			dev_err(&mgp->pdev->dev,
 				"failed to set number of slices\n");
@@ -1317,6 +1343,7 @@
 {
 	struct pci_dev *pdev = ss->mgp->pdev;
 	struct myri10ge_tx_buf *tx = &ss->tx;
+	struct netdev_queue *dev_queue;
 	struct sk_buff *skb;
 	int idx, len;
 
@@ -1350,11 +1377,31 @@
 					       PCI_DMA_TODEVICE);
 		}
 	}
+
+	dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
+	/*
+	 * Make a minimal effort to prevent the NIC from polling an
+	 * idle tx queue.  If we can't get the lock we leave the queue
+	 * active. In this case, either a thread was about to start
+	 * using the queue anyway, or we lost a race and the NIC will
+	 * waste some of its resources polling an inactive queue for a
+	 * while.
+	 */
+
+	if ((ss->mgp->dev->real_num_tx_queues > 1) &&
+	    __netif_tx_trylock(dev_queue)) {
+		if (tx->req == tx->done) {
+			tx->queue_active = 0;
+			put_be32(htonl(1), tx->send_stop);
+		}
+		__netif_tx_unlock(dev_queue);
+	}
+
 	/* start the queue if we've stopped it */
-	if (netif_queue_stopped(ss->dev)
+	if (netif_tx_queue_stopped(dev_queue)
 	    && tx->req - tx->done < (tx->mask >> 1)) {
 		tx->wake_queue++;
-		netif_wake_queue(ss->dev);
+		netif_tx_wake_queue(dev_queue);
 	}
 }
 
@@ -1482,9 +1529,9 @@
 	u32 send_done_count;
 	int i;
 
-	/* an interrupt on a non-zero slice is implicitly valid
-	 * since MSI-X irqs are not shared */
-	if (ss != mgp->ss) {
+	/* an interrupt on a non-zero receive-only slice is implicitly
+	 * valid  since MSI-X irqs are not shared */
+	if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
 		netif_rx_schedule(ss->dev, &ss->napi);
 		return (IRQ_HANDLED);
 	}
@@ -1526,7 +1573,9 @@
 		barrier();
 	}
 
-	myri10ge_check_statblock(mgp);
+	/* Only slice 0 updates stats */
+	if (ss == mgp->ss)
+		myri10ge_check_statblock(mgp);
 
 	put_be32(htonl(3), ss->irq_claim + 1);
 	return (IRQ_HANDLED);
@@ -1884,6 +1933,7 @@
 	/* ensure req_list entries are aligned to 8 bytes */
 	ss->tx.req_list = (struct mcp_kreq_ether_send *)
 	    ALIGN((unsigned long)ss->tx.req_bytes, 8);
+	ss->tx.queue_active = 0;
 
 	bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
 	ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
@@ -2201,11 +2251,14 @@
 	int status;
 
 	ss = &mgp->ss[slice];
-	cmd.data0 = 0;		/* single slice for now */
-	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0);
-	ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
-	    (mgp->sram + cmd.data0);
-
+	status = 0;
+	if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
+		cmd.data0 = slice;
+		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
+					   &cmd, 0);
+		ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
+		    (mgp->sram + cmd.data0);
+	}
 	cmd.data0 = slice;
 	status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
 				    &cmd, 0);
@@ -2217,6 +2270,10 @@
 	ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
 	    (mgp->sram + cmd.data0);
 
+	ss->tx.send_go = (__iomem __be32 *)
+	    (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
+	ss->tx.send_stop = (__iomem __be32 *)
+	    (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
 	return status;
 
 }
@@ -2230,7 +2287,7 @@
 	ss = &mgp->ss[slice];
 	cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
 	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
-	cmd.data2 = sizeof(struct mcp_irq_data);
+	cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
 	if (status == -ENOSYS) {
 		dma_addr_t bus = ss->fw_stats_bus;
@@ -2271,7 +2328,9 @@
 
 	if (mgp->num_slices > 1) {
 		cmd.data0 = mgp->num_slices;
-		cmd.data1 = 1;	/* use MSI-X */
+		cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
+		if (mgp->dev->real_num_tx_queues > 1)
+			cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
 		status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
 					   &cmd, 0);
 		if (status != 0) {
@@ -2292,6 +2351,7 @@
 			printk(KERN_ERR
 			       "myri10ge: %s: failed to setup rss tables\n",
 			       dev->name);
+			goto abort_with_nothing;
 		}
 
 		/* just enable an identity mapping */
@@ -2362,7 +2422,11 @@
 		status = myri10ge_allocate_rings(ss);
 		if (status != 0)
 			goto abort_with_rings;
-		if (slice == 0)
+
+		/* only firmware which supports multiple TX queues
+		 * supports setting up the tx stats on non-zero
+		 * slices */
+		if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
 			status = myri10ge_set_stats(mgp, slice);
 		if (status) {
 			printk(KERN_ERR
@@ -2428,7 +2492,8 @@
 	mgp->running = MYRI10GE_ETH_RUNNING;
 	mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
 	add_timer(&mgp->watchdog_timer);
-	netif_wake_queue(dev);
+	netif_tx_wake_all_queues(dev);
+
 	return 0;
 
 abort_with_rings:
@@ -2461,7 +2526,8 @@
 		napi_disable(&mgp->ss[i].napi);
 	}
 	netif_carrier_off(dev);
-	netif_stop_queue(dev);
+
+	netif_tx_stop_all_queues(dev);
 	old_down_cnt = mgp->down_cnt;
 	mb();
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
@@ -2566,18 +2632,23 @@
 	struct mcp_kreq_ether_send *req;
 	struct myri10ge_tx_buf *tx;
 	struct skb_frag_struct *frag;
+	struct netdev_queue *netdev_queue;
 	dma_addr_t bus;
 	u32 low;
 	__be32 high_swapped;
 	unsigned int len;
 	int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
-	u16 pseudo_hdr_offset, cksum_offset;
+	u16 pseudo_hdr_offset, cksum_offset, queue;
 	int cum_len, seglen, boundary, rdma_count;
 	u8 flags, odd_flag;
 
-	/* always transmit through slot 0 */
-	ss = mgp->ss;
+	queue = skb_get_queue_mapping(skb);
+	queue &= (mgp->num_slices - 1);
+
+	ss = &mgp->ss[queue];
+	netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
 	tx = &ss->tx;
+
 again:
 	req = tx->req_list;
 	avail = tx->mask - 1 - (tx->req - tx->done);
@@ -2593,7 +2664,7 @@
 	if ((unlikely(avail < max_segments))) {
 		/* we are out of transmit resources */
 		tx->stop_queue++;
-		netif_stop_queue(dev);
+		netif_tx_stop_queue(netdev_queue);
 		return 1;
 	}
 
@@ -2786,10 +2857,16 @@
 	idx = ((count - 1) + tx->req) & tx->mask;
 	tx->info[idx].last = 1;
 	myri10ge_submit_req(tx, tx->req_list, count);
+	/* if using multiple tx queues, make sure NIC polls the
+	 * current slice */
+	if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
+		tx->queue_active = 1;
+		put_be32(htonl(1), tx->send_go);
+	}
 	tx->pkt_start++;
 	if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
 		tx->stop_queue++;
-		netif_stop_queue(dev);
+		netif_tx_stop_queue(netdev_queue);
 	}
 	dev->trans_start = jiffies;
 	return 0;
@@ -3367,20 +3444,21 @@
 		for (i = 0; i < mgp->num_slices; i++) {
 			tx = &mgp->ss[i].tx;
 			printk(KERN_INFO
-			       "myri10ge: %s: (%d): %d %d %d %d %d\n",
-			       mgp->dev->name, i, tx->req, tx->done,
-			       tx->pkt_start, tx->pkt_done,
+			       "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+			       mgp->dev->name, i, tx->queue_active, tx->req,
+			       tx->done, tx->pkt_start, tx->pkt_done,
 			       (int)ntohl(mgp->ss[i].fw_stats->
 					  send_done_count));
 			msleep(2000);
 			printk(KERN_INFO
-			       "myri10ge: %s: (%d): %d %d %d %d %d\n",
-			       mgp->dev->name, i, tx->req, tx->done,
-			       tx->pkt_start, tx->pkt_done,
+			       "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
+			       mgp->dev->name, i, tx->queue_active, tx->req,
+			       tx->done, tx->pkt_start, tx->pkt_done,
 			       (int)ntohl(mgp->ss[i].fw_stats->
 					  send_done_count));
 		}
 	}
+
 	rtnl_lock();
 	myri10ge_close(mgp->dev);
 	status = myri10ge_load_firmware(mgp, 1);
@@ -3435,10 +3513,14 @@
 			/* nic seems like it might be stuck.. */
 			if (rx_pause_cnt != mgp->watchdog_pause) {
 				if (net_ratelimit())
-					printk(KERN_WARNING "myri10ge %s:"
+					printk(KERN_WARNING
+					       "myri10ge %s slice %d:"
 					       "TX paused, check link partner\n",
-					       mgp->dev->name);
+					       mgp->dev->name, i);
 			} else {
+				printk(KERN_WARNING
+				       "myri10ge %s slice %d stuck:",
+				       mgp->dev->name, i);
 				reset_needed = 1;
 			}
 		}
@@ -3624,6 +3706,9 @@
 					 mgp->num_slices);
 		if (status == 0) {
 			pci_disable_msix(pdev);
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+			mgp->features |= NETIF_F_MULTI_QUEUE;
+#endif
 			return;
 		}
 		if (status > 0)
@@ -3653,7 +3738,7 @@
 	int status = -ENXIO;
 	int dac_enabled;
 
-	netdev = alloc_etherdev(sizeof(*mgp));
+	netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
 	if (netdev == NULL) {
 		dev_err(dev, "Could not allocate ethernet device\n");
 		return -ENOMEM;
@@ -3758,7 +3843,7 @@
 		dev_err(&pdev->dev, "failed to alloc slice state\n");
 		goto abort_with_firmware;
 	}
-
+	netdev->real_num_tx_queues = mgp->num_slices;
 	status = myri10ge_reset(mgp);
 	if (status != 0) {
 		dev_err(&pdev->dev, "failed reset\n");
@@ -3782,6 +3867,7 @@
 	netdev->set_multicast_list = myri10ge_set_multicast_list;
 	netdev->set_mac_address = myri10ge_set_mac_address;
 	netdev->features = mgp->features;
+
 	if (dac_enabled)
 		netdev->features |= NETIF_F_HIGHDMA;
 
@@ -3937,8 +4023,7 @@
 	printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name,
 	       MYRI10GE_VERSION_STR);
 
-	if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT ||
-	    myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
+	if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
 		printk(KERN_ERR
 		       "%s: Illegal rssh hash type %d, defaulting to source port\n",
 		       myri10ge_driver.name, myri10ge_rss_hash);
@@ -3947,6 +4032,8 @@
 #if (defined CONFIG_DCA) || (defined CONFIG_DCA_MODULE)
 	dca_register_notify(&myri10ge_dca_notifier);
 #endif
+	if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
+		myri10ge_max_slices = MYRI10GE_MAX_SLICES;
 
 	return pci_register_driver(&myri10ge_driver);
 }



             reply	other threads:[~2008-09-25  4:16 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-25  4:15 Brice Goglin [this message]
2008-09-25 19:07 ` [RESEND][PATCH 2.6.28] myri10ge: Add multiqueue TX support Jeff Garzik
2008-09-25 20:20   ` Brice Goglin
  -- strict thread matches above, loose matches on Subject: below --
2008-09-25  3:42 Brice Goglin
2008-09-20 17:45 Brice Goglin
2008-09-25  0:51 ` Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48DB1056.5040206@myri.com \
    --to=brice@myri.com \
    --cc=jeff@garzik.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.