Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v1 06/12] amd-xgbe: Add support for per DMA channel interrupts
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

This patch provides support for interrupts that are generated by the
Tx/Rx DMA channel pairs of the device.  This allows for Tx and Rx
processing to run across multiple processsors.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 Documentation/devicetree/bindings/net/amd-xgbe.txt |   12 +
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c           |   12 +
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c           |  226 ++++++++++++++++----
 drivers/net/ethernet/amd/xgbe/xgbe-main.c          |   10 +
 drivers/net/ethernet/amd/xgbe/xgbe.h               |   10 +
 5 files changed, 219 insertions(+), 51 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt
index 41354f7..26efd52 100644
--- a/Documentation/devicetree/bindings/net/amd-xgbe.txt
+++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt
@@ -7,7 +7,10 @@ Required properties:
    - PCS registers
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
-- interrupts: Should contain the amd-xgbe interrupt
+- interrupts: Should contain the amd-xgbe interrupt(s). The first interrupt
+  listed is required and is the general device interrupt. If the optional
+  amd,per-channel-interrupt property is specified, then one additional
+  interrupt for each DMA channel supported by the device should be specified
 - clocks:
    - DMA clock for the amd-xgbe device (used for calculating the
      correct Rx interrupt watchdog timer value on a DMA channel
@@ -23,6 +26,9 @@ Optional properties:
 - mac-address: mac address to be assigned to the device. Can be overridden
   by UEFI.
 - dma-coherent: Present if dma operations are coherent
+- amd,per-channel-interrupt: Indicates that Rx and Tx complete will generate
+  a unique interrupt for each DMA channel - this requires an additional
+  interrupt be configured for each DMA channel
 
 Example:
 	xgbe@e0700000 {
@@ -30,7 +36,9 @@ Example:
 		reg = <0 0xe0700000 0 0x80000>,
 		      <0 0xe0780000 0 0x80000>;
 		interrupt-parent = <&gic>;
-		interrupts = <0 325 4>;
+		interrupts = <0 325 4>,
+			     <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>;
+		amd,per-channel-interrupt;
 		clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>;
 		clock-names = "dma_clk", "ptp_clk";
 		phy-handle = <&phy>;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index b3719f1..ac3d319 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -481,17 +481,21 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 
 		if (channel->tx_ring) {
 			/* Enable the following Tx interrupts
-			 *   TIE  - Transmit Interrupt Enable (unless polling)
+			 *   TIE  - Transmit Interrupt Enable (unless using
+			 *          per channel interrupts)
 			 */
-			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
+			if (!pdata->per_channel_irq)
+				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
 		}
 		if (channel->rx_ring) {
 			/* Enable following Rx interrupts
 			 *   RBUE - Receive Buffer Unavailable Enable
-			 *   RIE  - Receive Interrupt Enable
+			 *   RIE  - Receive Interrupt Enable (unless using
+			 *          per channel interrupts)
 			 */
 			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
-			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
+			if (!pdata->per_channel_irq)
+				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
 		}
 
 		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 07e2d21..c3533e1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -114,6 +114,7 @@
  *     THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
@@ -126,7 +127,8 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
-static int xgbe_poll(struct napi_struct *, int);
+static int xgbe_one_poll(struct napi_struct *, int);
+static int xgbe_all_poll(struct napi_struct *, int);
 static void xgbe_set_rx_mode(struct net_device *);
 
 static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
@@ -134,6 +136,7 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 	struct xgbe_channel *channel_mem, *channel;
 	struct xgbe_ring *tx_ring, *rx_ring;
 	unsigned int count, i;
+	int ret = -ENOMEM;
 
 	count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
 
@@ -158,6 +161,19 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
 				    (DMA_CH_INC * i);
 
+		if (pdata->per_channel_irq) {
+			/* Get the DMA interrupt (offset 1) */
+			ret = platform_get_irq(pdata->pdev, i + 1);
+			if (ret < 0) {
+				netdev_err(pdata->netdev,
+					   "platform_get_irq %u failed\n",
+					   i + 1);
+				goto err_irq;
+			}
+
+			channel->dma_irq = ret;
+		}
+
 		if (i < pdata->tx_ring_count) {
 			spin_lock_init(&tx_ring->lock);
 			channel->tx_ring = tx_ring++;
@@ -168,9 +184,9 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 			channel->rx_ring = rx_ring++;
 		}
 
-		DBGPR("  %s - queue_index=%u, dma_regs=%p, tx=%p, rx=%p\n",
+		DBGPR("  %s: queue=%u, dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n",
 		      channel->name, channel->queue_index, channel->dma_regs,
-		      channel->tx_ring, channel->rx_ring);
+		      channel->dma_irq, channel->tx_ring, channel->rx_ring);
 	}
 
 	pdata->channel = channel_mem;
@@ -178,6 +194,9 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 
 	return 0;
 
+err_irq:
+	kfree(rx_ring);
+
 err_rx_ring:
 	kfree(tx_ring);
 
@@ -185,9 +204,7 @@ err_tx_ring:
 	kfree(channel_mem);
 
 err_channel:
-	netdev_err(pdata->netdev, "channel allocation failed\n");
-
-	return -ENOMEM;
+	return ret;
 }
 
 static void xgbe_free_channels(struct xgbe_prv_data *pdata)
@@ -287,11 +304,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 	if (!dma_isr)
 		goto isr_done;
 
-	DBGPR("-->xgbe_isr\n");
-
 	DBGPR("  DMA_ISR = %08x\n", dma_isr);
-	DBGPR("  DMA_DS0 = %08x\n", XGMAC_IOREAD(pdata, DMA_DSR0));
-	DBGPR("  DMA_DS1 = %08x\n", XGMAC_IOREAD(pdata, DMA_DSR1));
 
 	for (i = 0; i < pdata->channel_count; i++) {
 		if (!(dma_isr & (1 << i)))
@@ -302,6 +315,10 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
 		DBGPR("  DMA_CH%u_ISR = %08x\n", i, dma_ch_isr);
 
+		/* If we get a TI or RI interrupt that means per channel DMA
+		 * interrupts are not enabled, so we use the private data napi
+		 * structure, not the per channel napi structure
+		 */
 		if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, TI) ||
 		    XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RI)) {
 			if (napi_schedule_prep(&pdata->napi)) {
@@ -344,12 +361,28 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 
 	DBGPR("  DMA_ISR = %08x\n", XGMAC_IOREAD(pdata, DMA_ISR));
 
-	DBGPR("<--xgbe_isr\n");
-
 isr_done:
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t xgbe_dma_isr(int irq, void *data)
+{
+	struct xgbe_channel *channel = data;
+
+	/* Per channel DMA interrupts are enabled, so we use the per
+	 * channel napi structure and not the private data napi structure
+	 */
+	if (napi_schedule_prep(&channel->napi)) {
+		/* Disable Tx and Rx interrupts */
+		disable_irq(channel->dma_irq);
+
+		/* Turn on polling */
+		__napi_schedule(&channel->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
 static enum hrtimer_restart xgbe_tx_timer(struct hrtimer *timer)
 {
 	struct xgbe_channel *channel = container_of(timer,
@@ -357,18 +390,24 @@ static enum hrtimer_restart xgbe_tx_timer(struct hrtimer *timer)
 						    tx_timer);
 	struct xgbe_ring *ring = channel->tx_ring;
 	struct xgbe_prv_data *pdata = channel->pdata;
+	struct napi_struct *napi;
 	unsigned long flags;
 
 	DBGPR("-->xgbe_tx_timer\n");
 
+	napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+
 	spin_lock_irqsave(&ring->lock, flags);
 
-	if (napi_schedule_prep(&pdata->napi)) {
+	if (napi_schedule_prep(napi)) {
 		/* Disable Tx and Rx interrupts */
-		xgbe_disable_rx_tx_ints(pdata);
+		if (pdata->per_channel_irq)
+			disable_irq(channel->dma_irq);
+		else
+			xgbe_disable_rx_tx_ints(pdata);
 
 		/* Turn on polling */
-		__napi_schedule(&pdata->napi);
+		__napi_schedule(napi);
 	}
 
 	channel->tx_timer_active = 0;
@@ -504,18 +543,46 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
 
 static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
 {
-	if (add)
-		netif_napi_add(pdata->netdev, &pdata->napi, xgbe_poll,
-			       NAPI_POLL_WEIGHT);
-	napi_enable(&pdata->napi);
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++) {
+			if (add)
+				netif_napi_add(pdata->netdev, &channel->napi,
+					       xgbe_one_poll, NAPI_POLL_WEIGHT);
+
+			napi_enable(&channel->napi);
+		}
+	} else {
+		if (add)
+			netif_napi_add(pdata->netdev, &pdata->napi,
+				       xgbe_all_poll, NAPI_POLL_WEIGHT);
+
+		napi_enable(&pdata->napi);
+	}
 }
 
 static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del)
 {
-	napi_disable(&pdata->napi);
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++) {
+			napi_disable(&channel->napi);
 
-	if (del)
-		netif_napi_del(&pdata->napi);
+			if (del)
+				netif_napi_del(&channel->napi);
+		}
+	} else {
+		napi_disable(&pdata->napi);
+
+		if (del)
+			netif_napi_del(&pdata->napi);
+	}
 }
 
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata)
@@ -828,7 +895,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
 static void xgbe_restart_dev(struct xgbe_prv_data *pdata, unsigned int reset)
 {
+	struct xgbe_channel *channel;
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int i;
 
 	DBGPR("-->xgbe_restart_dev\n");
 
@@ -837,7 +906,12 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata, unsigned int reset)
 		return;
 
 	xgbe_stop(pdata);
-	synchronize_irq(pdata->irq_number);
+	synchronize_irq(pdata->dev_irq);
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++)
+			synchronize_irq(channel->dma_irq);
+	}
 
 	xgbe_free_tx_data(pdata);
 	xgbe_free_rx_data(pdata);
@@ -1165,6 +1239,9 @@ static int xgbe_open(struct net_device *netdev)
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel = NULL;
+	char dma_irq_name[IFNAMSIZ + 32];
+	unsigned int i = 0;
 	int ret;
 
 	DBGPR("-->xgbe_open\n");
@@ -1208,14 +1285,32 @@ static int xgbe_open(struct net_device *netdev)
 	INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
 
 	/* Request interrupts */
-	ret = devm_request_irq(pdata->dev, netdev->irq, xgbe_isr, 0,
+	ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
 			       netdev->name, pdata);
 	if (ret) {
 		netdev_alert(netdev, "error requesting irq %d\n",
-			     pdata->irq_number);
+			     pdata->dev_irq);
 		goto err_rings;
 	}
-	pdata->irq_number = netdev->irq;
+
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++) {
+			snprintf(dma_irq_name, sizeof(dma_irq_name) - 1,
+				 "%s-TxRx-%u", netdev_name(netdev),
+				 channel->queue_index);
+
+			ret = devm_request_irq(pdata->dev, channel->dma_irq,
+					       xgbe_dma_isr, 0, dma_irq_name,
+					       channel);
+			if (ret) {
+				netdev_alert(netdev,
+					     "error requesting irq %d\n",
+					     channel->dma_irq);
+				goto err_irq;
+			}
+		}
+	}
 
 	ret = xgbe_start(pdata);
 	if (ret)
@@ -1228,8 +1323,14 @@ static int xgbe_open(struct net_device *netdev)
 err_start:
 	hw_if->exit(pdata);
 
-	devm_free_irq(pdata->dev, pdata->irq_number, pdata);
-	pdata->irq_number = 0;
+err_irq:
+	if (pdata->per_channel_irq) {
+		/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
+		for (i--, channel--; i < pdata->channel_count; i--, channel--)
+			devm_free_irq(pdata->dev, channel->dma_irq, channel);
+	}
+
+	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
 err_rings:
 	desc_if->free_ring_resources(pdata);
@@ -1254,6 +1355,8 @@ static int xgbe_close(struct net_device *netdev)
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel;
+	unsigned int i;
 
 	DBGPR("-->xgbe_close\n");
 
@@ -1269,10 +1372,12 @@ static int xgbe_close(struct net_device *netdev)
 	/* Free the channel and ring structures */
 	xgbe_free_channels(pdata);
 
-	/* Release the interrupt */
-	if (pdata->irq_number != 0) {
-		devm_free_irq(pdata->dev, pdata->irq_number, pdata);
-		pdata->irq_number = 0;
+	/* Release the interrupts */
+	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++)
+			devm_free_irq(pdata->dev, channel->dma_irq, channel);
 	}
 
 	/* Disable the clocks */
@@ -1505,14 +1610,20 @@ static int xgbe_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
 static void xgbe_poll_controller(struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_channel *channel;
+	unsigned int i;
 
 	DBGPR("-->xgbe_poll_controller\n");
 
-	disable_irq(pdata->irq_number);
-
-	xgbe_isr(pdata->irq_number, pdata);
-
-	enable_irq(pdata->irq_number);
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++)
+			xgbe_dma_isr(channel->dma_irq, channel);
+	} else {
+		disable_irq(pdata->dev_irq);
+		xgbe_isr(pdata->dev_irq, pdata);
+		enable_irq(pdata->dev_irq);
+	}
 
 	DBGPR("<--xgbe_poll_controller\n");
 }
@@ -1704,6 +1815,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	struct xgbe_ring_data *rdata;
 	struct xgbe_packet_data *packet;
 	struct net_device *netdev = pdata->netdev;
+	struct napi_struct *napi;
 	struct sk_buff *skb;
 	struct skb_shared_hwtstamps *hwtstamps;
 	unsigned int incomplete, error, context_next, context;
@@ -1717,6 +1829,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	if (!ring)
 		return 0;
 
+	napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+
 	rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
 	packet = &ring->packet_data;
 	while (packet_count < budget) {
@@ -1849,10 +1963,10 @@ read_again:
 		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 		skb_record_rx_queue(skb, channel->queue_index);
-		skb_mark_napi_id(skb, &pdata->napi);
+		skb_mark_napi_id(skb, napi);
 
 		netdev->last_rx = jiffies;
-		napi_gro_receive(&pdata->napi, skb);
+		napi_gro_receive(napi, skb);
 
 next_packet:
 		packet_count++;
@@ -1874,7 +1988,35 @@ next_packet:
 	return packet_count;
 }
 
-static int xgbe_poll(struct napi_struct *napi, int budget)
+static int xgbe_one_poll(struct napi_struct *napi, int budget)
+{
+	struct xgbe_channel *channel = container_of(napi, struct xgbe_channel,
+						    napi);
+	int processed = 0;
+
+	DBGPR("-->xgbe_one_poll: budget=%d\n", budget);
+
+	/* Cleanup Tx ring first */
+	xgbe_tx_poll(channel);
+
+	/* Process Rx ring next */
+	processed = xgbe_rx_poll(channel, budget);
+
+	/* If we processed everything, we are done */
+	if (processed < budget) {
+		/* Turn off polling */
+		napi_complete(napi);
+
+		/* Enable Tx and Rx interrupts */
+		enable_irq(channel->dma_irq);
+	}
+
+	DBGPR("<--xgbe_one_poll: received = %d\n", processed);
+
+	return processed;
+}
+
+static int xgbe_all_poll(struct napi_struct *napi, int budget)
 {
 	struct xgbe_prv_data *pdata = container_of(napi, struct xgbe_prv_data,
 						   napi);
@@ -1883,7 +2025,7 @@ static int xgbe_poll(struct napi_struct *napi, int budget)
 	int processed, last_processed;
 	unsigned int i;
 
-	DBGPR("-->xgbe_poll: budget=%d\n", budget);
+	DBGPR("-->xgbe_all_poll: budget=%d\n", budget);
 
 	processed = 0;
 	ring_budget = budget / pdata->rx_ring_count;
@@ -1911,7 +2053,7 @@ static int xgbe_poll(struct napi_struct *napi, int budget)
 		xgbe_enable_rx_tx_ints(pdata);
 	}
 
-	DBGPR("<--xgbe_poll: received = %d\n", processed);
+	DBGPR("<--xgbe_all_poll: received = %d\n", processed);
 
 	return processed;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index e5077fd..cff9902 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -264,12 +264,18 @@ static int xgbe_probe(struct platform_device *pdev)
 		pdata->awcache = XGBE_DMA_SYS_AWCACHE;
 	}
 
+	/* Check for per channel interrupt support */
+	if (of_property_read_bool(dev->of_node, XGBE_DMA_IRQS))
+		pdata->per_channel_irq = 1;
+
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0) {
-		dev_err(dev, "platform_get_irq failed\n");
+		dev_err(dev, "platform_get_irq 0 failed\n");
 		goto err_io;
 	}
-	netdev->irq = ret;
+	pdata->dev_irq = ret;
+
+	netdev->irq = pdata->dev_irq;
 	netdev->base_addr = (unsigned long)pdata->xgmac_regs;
 
 	/* Set all the function pointers */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 1480c9d..55c935f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -173,6 +173,7 @@
 /* Device-tree clock names */
 #define XGBE_DMA_CLOCK		"dma_clk"
 #define XGBE_PTP_CLOCK		"ptp_clk"
+#define XGBE_DMA_IRQS		"amd,per-channel-interrupt"
 
 /* Timestamp support - values based on 50MHz PTP clock
  *   50MHz => 20 nsec
@@ -359,6 +360,12 @@ struct xgbe_channel {
 	unsigned int queue_index;
 	void __iomem *dma_regs;
 
+	/* Per channel interrupt irq number */
+	int dma_irq;
+
+	/* Netdev related settings */
+	struct napi_struct napi;
+
 	unsigned int saved_ier;
 
 	unsigned int tx_timer_active;
@@ -609,7 +616,8 @@ struct xgbe_prv_data {
 	/* XPCS indirect addressing mutex */
 	struct mutex xpcs_mutex;
 
-	int irq_number;
+	int dev_irq;
+	unsigned int per_channel_irq;
 
 	struct xgbe_hw_if hw_if;
 	struct xgbe_desc_if desc_if;

^ permalink raw reply related

* [PATCH net-next v1 05/12] amd-xgbe: Implement split header receive support
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

Provide support for splitting IP packets so that the header and
payload can be sent to different DMA addresses.  This will allow
the IP header to be put into the linear part of the skb while the
payload can be added as frags.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h |    8 +
 drivers/net/ethernet/amd/xgbe/xgbe-desc.c   |  176 +++++++++++++++++----------
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |   44 +++++--
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |   63 +++++-----
 drivers/net/ethernet/amd/xgbe/xgbe.h        |   21 ++-
 5 files changed, 201 insertions(+), 111 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index caade30..39bcb11 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -207,6 +207,8 @@
 /* DMA channel register entry bit positions and sizes */
 #define DMA_CH_CR_PBLX8_INDEX		16
 #define DMA_CH_CR_PBLX8_WIDTH		1
+#define DMA_CH_CR_SPH_INDEX		24
+#define DMA_CH_CR_SPH_WIDTH		1
 #define DMA_CH_IER_AIE_INDEX		15
 #define DMA_CH_IER_AIE_WIDTH		1
 #define DMA_CH_IER_FBEE_INDEX		12
@@ -429,6 +431,8 @@
 #define MAC_RCR_CST_WIDTH		1
 #define MAC_RCR_DCRCC_INDEX		3
 #define MAC_RCR_DCRCC_WIDTH		1
+#define MAC_RCR_HDSMS_INDEX		12
+#define MAC_RCR_HDSMS_WIDTH		3
 #define MAC_RCR_IPC_INDEX		9
 #define MAC_RCR_IPC_WIDTH		1
 #define MAC_RCR_JE_INDEX		8
@@ -847,6 +851,8 @@
 
 #define RX_NORMAL_DESC0_OVT_INDEX		0
 #define RX_NORMAL_DESC0_OVT_WIDTH		16
+#define RX_NORMAL_DESC2_HL_INDEX		0
+#define RX_NORMAL_DESC2_HL_WIDTH		10
 #define RX_NORMAL_DESC3_CDA_INDEX		27
 #define RX_NORMAL_DESC3_CDA_WIDTH		1
 #define RX_NORMAL_DESC3_CTXT_INDEX		30
@@ -855,6 +861,8 @@
 #define RX_NORMAL_DESC3_ES_WIDTH		1
 #define RX_NORMAL_DESC3_ETLT_INDEX		16
 #define RX_NORMAL_DESC3_ETLT_WIDTH		4
+#define RX_NORMAL_DESC3_FD_INDEX		29
+#define RX_NORMAL_DESC3_FD_WIDTH		1
 #define RX_NORMAL_DESC3_INTE_INDEX		30
 #define RX_NORMAL_DESC3_INTE_WIDTH		1
 #define RX_NORMAL_DESC3_LD_INDEX		28
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 99911f4..e6b9f54 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -138,15 +138,26 @@ static void xgbe_free_ring(struct xgbe_prv_data *pdata,
 		ring->rdata = NULL;
 	}
 
-	if (ring->rx_pa.pages) {
-		dma_unmap_page(pdata->dev, ring->rx_pa.pages_dma,
-			       ring->rx_pa.pages_len, DMA_FROM_DEVICE);
-		put_page(ring->rx_pa.pages);
-
-		ring->rx_pa.pages = NULL;
-		ring->rx_pa.pages_len = 0;
-		ring->rx_pa.pages_offset = 0;
-		ring->rx_pa.pages_dma = 0;
+	if (ring->rx_hdr_pa.pages) {
+		dma_unmap_page(pdata->dev, ring->rx_hdr_pa.pages_dma,
+			       ring->rx_hdr_pa.pages_len, DMA_FROM_DEVICE);
+		put_page(ring->rx_hdr_pa.pages);
+
+		ring->rx_hdr_pa.pages = NULL;
+		ring->rx_hdr_pa.pages_len = 0;
+		ring->rx_hdr_pa.pages_offset = 0;
+		ring->rx_hdr_pa.pages_dma = 0;
+	}
+
+	if (ring->rx_buf_pa.pages) {
+		dma_unmap_page(pdata->dev, ring->rx_buf_pa.pages_dma,
+			       ring->rx_buf_pa.pages_len, DMA_FROM_DEVICE);
+		put_page(ring->rx_buf_pa.pages);
+
+		ring->rx_buf_pa.pages = NULL;
+		ring->rx_buf_pa.pages_len = 0;
+		ring->rx_buf_pa.pages_offset = 0;
+		ring->rx_buf_pa.pages_dma = 0;
 	}
 
 	if (ring->rdesc) {
@@ -244,62 +255,93 @@ err_ring:
 	return ret;
 }
 
-static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
-			      struct xgbe_ring *ring,
-			      struct xgbe_ring_data *rdata)
+static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
+			    struct xgbe_page_alloc *pa, gfp_t gfp, int order)
 {
-	if (!ring->rx_pa.pages) {
-		struct page *pages = NULL;
-		dma_addr_t pages_dma;
-		gfp_t gfp;
-		int order, ret;
-
-		/* Try to obtain pages, decreasing order if necessary */
-		gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP;
-		order = max_t(int, PAGE_ALLOC_COSTLY_ORDER, 1);
-		while (--order >= 0) {
-			pages = alloc_pages(gfp, order);
-			if (pages)
-				break;
-		}
-		if (!pages)
-			return -ENOMEM;
+	struct page *pages = NULL;
+	dma_addr_t pages_dma;
+	int ret;
 
-		/* Map the pages */
-		pages_dma = dma_map_page(pdata->dev, pages, 0,
-					 PAGE_SIZE << order, DMA_FROM_DEVICE);
-		ret = dma_mapping_error(pdata->dev, pages_dma);
-		if (ret) {
-			put_page(pages);
-			return ret;
-		}
+	/* Try to obtain pages, decreasing order if necessary */
+	gfp |= __GFP_COLD | __GFP_COMP;
+	while (order >= 0) {
+		pages = alloc_pages(gfp, order);
+		if (pages)
+			break;
 
-		/* Set the values for this ring */
-		ring->rx_pa.pages = pages;
-		ring->rx_pa.pages_len = PAGE_SIZE << order;
-		ring->rx_pa.pages_offset = 0;
-		ring->rx_pa.pages_dma = pages_dma;
+		order--;
 	}
+	if (!pages)
+		return -ENOMEM;
 
-	get_page(ring->rx_pa.pages);
-	rdata->rx_pa = ring->rx_pa;
+	/* Map the pages */
+	pages_dma = dma_map_page(pdata->dev, pages, 0,
+				 PAGE_SIZE << order, DMA_FROM_DEVICE);
+	ret = dma_mapping_error(pdata->dev, pages_dma);
+	if (ret) {
+		put_page(pages);
+		return ret;
+	}
 
-	rdata->rx_dma = ring->rx_pa.pages_dma + ring->rx_pa.pages_offset;
-	rdata->rx_dma_len = pdata->rx_buf_size;
+	pa->pages = pages;
+	pa->pages_len = PAGE_SIZE << order;
+	pa->pages_offset = 0;
+	pa->pages_dma = pages_dma;
 
-	ring->rx_pa.pages_offset += pdata->rx_buf_size;
-	if ((ring->rx_pa.pages_offset + pdata->rx_buf_size) >
-	    ring->rx_pa.pages_len) {
+	return 0;
+}
+
+static void xgbe_set_buffer_data(struct xgbe_buffer_data *bd,
+				 struct xgbe_page_alloc *pa,
+				 unsigned int len)
+{
+	get_page(pa->pages);
+	bd->pa = *pa;
+
+	bd->dma = pa->pages_dma + pa->pages_offset;
+	bd->dma_len = len;
+
+	pa->pages_offset += len;
+	if ((pa->pages_offset + len) > pa->pages_len) {
 		/* This data descriptor is responsible for unmapping page(s) */
-		rdata->rx_unmap = ring->rx_pa;
+		bd->pa_unmap = *pa;
 
 		/* Get a new allocation next time */
-		ring->rx_pa.pages = NULL;
-		ring->rx_pa.pages_len = 0;
-		ring->rx_pa.pages_offset = 0;
-		ring->rx_pa.pages_dma = 0;
+		pa->pages = NULL;
+		pa->pages_len = 0;
+		pa->pages_offset = 0;
+		pa->pages_dma = 0;
+	}
+}
+
+static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
+			      struct xgbe_ring *ring,
+			      struct xgbe_ring_data *rdata)
+{
+	int order, ret;
+
+	if (!ring->rx_hdr_pa.pages) {
+		ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0);
+		if (ret)
+			return ret;
+	}
+
+	if (!ring->rx_buf_pa.pages) {
+		order = max_t(int, PAGE_ALLOC_COSTLY_ORDER - 1, 0);
+		ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC,
+				       order);
+		if (ret)
+			return ret;
 	}
 
+	/* Set up the header page info */
+	xgbe_set_buffer_data(&rdata->rx_hdr, &ring->rx_hdr_pa,
+			     XGBE_SKB_ALLOC_SIZE);
+
+	/* Set up the buffer page info */
+	xgbe_set_buffer_data(&rdata->rx_buf, &ring->rx_buf_pa,
+			     pdata->rx_buf_size);
+
 	return 0;
 }
 
@@ -409,20 +451,28 @@ static void xgbe_unmap_rdata(struct xgbe_prv_data *pdata,
 		rdata->skb = NULL;
 	}
 
-	if (rdata->rx_pa.pages)
-		put_page(rdata->rx_pa.pages);
+	if (rdata->rx_hdr.pa.pages)
+		put_page(rdata->rx_hdr.pa.pages);
 
-	if (rdata->rx_unmap.pages) {
-		dma_unmap_page(pdata->dev, rdata->rx_unmap.pages_dma,
-			       rdata->rx_unmap.pages_len, DMA_FROM_DEVICE);
-		put_page(rdata->rx_unmap.pages);
+	if (rdata->rx_hdr.pa_unmap.pages) {
+		dma_unmap_page(pdata->dev, rdata->rx_hdr.pa_unmap.pages_dma,
+			       rdata->rx_hdr.pa_unmap.pages_len,
+			       DMA_FROM_DEVICE);
+		put_page(rdata->rx_hdr.pa_unmap.pages);
 	}
 
-	memset(&rdata->rx_pa, 0, sizeof(rdata->rx_pa));
-	memset(&rdata->rx_unmap, 0, sizeof(rdata->rx_unmap));
+	if (rdata->rx_buf.pa.pages)
+		put_page(rdata->rx_buf.pa.pages);
+
+	if (rdata->rx_buf.pa_unmap.pages) {
+		dma_unmap_page(pdata->dev, rdata->rx_buf.pa_unmap.pages_dma,
+			       rdata->rx_buf.pa_unmap.pages_len,
+			       DMA_FROM_DEVICE);
+		put_page(rdata->rx_buf.pa_unmap.pages);
+	}
 
-	rdata->rx_dma = 0;
-	rdata->rx_dma_len = 0;
+	memset(&rdata->rx_hdr, 0, sizeof(rdata->rx_hdr));
+	memset(&rdata->rx_buf, 0, sizeof(rdata->rx_buf));
 
 	rdata->tso_header = 0;
 	rdata->len = 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 7748b75..b3719f1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -335,6 +335,22 @@ static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata)
 	}
 }
 
+static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	channel = pdata->channel;
+	for (i = 0; i < pdata->channel_count; i++, channel++) {
+		if (!channel->rx_ring)
+			break;
+
+		XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, SPH, 1);
+	}
+
+	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
+}
+
 static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
 {
 	unsigned int max_q_count, q_count;
@@ -920,19 +936,19 @@ static void xgbe_rx_desc_reset(struct xgbe_ring_data *rdata)
 	struct xgbe_ring_desc *rdesc = rdata->rdesc;
 
 	/* Reset the Rx descriptor
-	 *   Set buffer 1 (lo) address to dma address (lo)
-	 *   Set buffer 1 (hi) address to dma address (hi)
-	 *   Set buffer 2 (lo) address to zero
-	 *   Set buffer 2 (hi) address to zero and set control bits
-	 *     OWN and INTE
+	 *   Set buffer 1 (lo) address to header dma address (lo)
+	 *   Set buffer 1 (hi) address to header dma address (hi)
+	 *   Set buffer 2 (lo) address to buffer dma address (lo)
+	 *   Set buffer 2 (hi) address to buffer dma address (hi) and
+	 *     set control bits OWN and INTE
 	 */
-	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx_dma));
-	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx_dma));
-	rdesc->desc2 = 0;
+	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx_hdr.dma));
+	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx_hdr.dma));
+	rdesc->desc2 = cpu_to_le32(lower_32_bits(rdata->rx_buf.dma));
+	rdesc->desc3 = cpu_to_le32(upper_32_bits(rdata->rx_buf.dma));
 
-	rdesc->desc3 = 0;
-	if (rdata->interrupt)
-		XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, 1);
+	XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE,
+			  rdata->interrupt ? 1 : 0);
 
 	/* Since the Rx DMA engine is likely running, make sure everything
 	 * is written to the descriptor(s) before setting the OWN bit
@@ -1422,6 +1438,11 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
 			       CONTEXT_NEXT, 1);
 
+	/* Get the header length */
+	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD))
+		rdata->hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
+						   RX_NORMAL_DESC2, HL);
+
 	/* Get the packet length */
 	rdata->len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
@@ -2453,6 +2474,7 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
 	xgbe_config_tx_coalesce(pdata);
 	xgbe_config_rx_buffer_size(pdata);
 	xgbe_config_tso_mode(pdata);
+	xgbe_config_sph_mode(pdata);
 	desc_if->wrapper_tx_desc_init(pdata);
 	desc_if->wrapper_rx_desc_init(pdata);
 	xgbe_enable_dma_interrupts(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index d65f5aa..07e2d21 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1620,31 +1620,25 @@ static void xgbe_rx_refresh(struct xgbe_channel *channel)
 
 static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
 				       struct xgbe_ring_data *rdata,
-				       unsigned int len)
+				       unsigned int *len)
 {
 	struct net_device *netdev = pdata->netdev;
 	struct sk_buff *skb;
 	u8 *packet;
 	unsigned int copy_len;
 
-	skb = netdev_alloc_skb_ip_align(netdev, XGBE_SKB_ALLOC_SIZE);
+	skb = netdev_alloc_skb_ip_align(netdev, rdata->rx_hdr.dma_len);
 	if (!skb)
 		return NULL;
 
-	packet = page_address(rdata->rx_pa.pages) + rdata->rx_pa.pages_offset;
-	copy_len = min_t(unsigned int, XGBE_SKB_ALLOC_SIZE, len);
+	packet = page_address(rdata->rx_hdr.pa.pages) +
+		 rdata->rx_hdr.pa.pages_offset;
+	copy_len = (rdata->hdr_len) ? rdata->hdr_len : *len;
+	copy_len = min(rdata->rx_hdr.dma_len, copy_len);
 	skb_copy_to_linear_data(skb, packet, copy_len);
 	skb_put(skb, copy_len);
 
-	rdata->rx_pa.pages_offset += copy_len;
-	len -= copy_len;
-	if (len)
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				rdata->rx_pa.pages,
-				rdata->rx_pa.pages_offset,
-				len, rdata->rx_dma_len);
-	else
-		put_page(rdata->rx_pa.pages);
+	*len -= copy_len;
 
 	return skb;
 }
@@ -1757,10 +1751,6 @@ read_again:
 		ring->cur++;
 		ring->dirty++;
 
-		dma_sync_single_for_cpu(pdata->dev, rdata->rx_dma,
-					rdata->rx_dma_len,
-					DMA_FROM_DEVICE);
-
 		incomplete = XGMAC_GET_BITS(packet->attributes,
 					    RX_PACKET_ATTRIBUTES,
 					    INCOMPLETE);
@@ -1787,19 +1777,30 @@ read_again:
 			len += put_len;
 
 			if (!skb) {
-				skb = xgbe_create_skb(pdata, rdata, put_len);
+				dma_sync_single_for_cpu(pdata->dev,
+							rdata->rx_hdr.dma,
+							rdata->rx_hdr.dma_len,
+							DMA_FROM_DEVICE);
+
+				skb = xgbe_create_skb(pdata, rdata, &put_len);
 				if (!skb) {
 					error = 1;
 					goto read_again;
 				}
-			} else {
-				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-						rdata->rx_pa.pages,
-						rdata->rx_pa.pages_offset,
-						put_len, rdata->rx_dma_len);
 			}
 
-			rdata->rx_pa.pages = NULL;
+			if (put_len) {
+				dma_sync_single_for_cpu(pdata->dev,
+							rdata->rx_buf.dma,
+							rdata->rx_buf.dma_len,
+							DMA_FROM_DEVICE);
+
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						rdata->rx_buf.pa.pages,
+						rdata->rx_buf.pa.pages_offset,
+						put_len, rdata->rx_buf.dma_len);
+				rdata->rx_buf.pa.pages = NULL;
+			}
 		}
 
 		if (incomplete || context_next)
@@ -1924,10 +1925,10 @@ void xgbe_dump_tx_desc(struct xgbe_ring *ring, unsigned int idx,
 	while (count--) {
 		rdata = XGBE_GET_DESC_DATA(ring, idx);
 		rdesc = rdata->rdesc;
-		DBGPR("TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx,
-		      (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE",
-		      le32_to_cpu(rdesc->desc0), le32_to_cpu(rdesc->desc1),
-		      le32_to_cpu(rdesc->desc2), le32_to_cpu(rdesc->desc3));
+		pr_alert("TX_NORMAL_DESC[%d %s] = %08x:%08x:%08x:%08x\n", idx,
+			 (flag == 1) ? "QUEUED FOR TX" : "TX BY DEVICE",
+			 le32_to_cpu(rdesc->desc0), le32_to_cpu(rdesc->desc1),
+			 le32_to_cpu(rdesc->desc2), le32_to_cpu(rdesc->desc3));
 		idx++;
 	}
 }
@@ -1935,9 +1936,9 @@ void xgbe_dump_tx_desc(struct xgbe_ring *ring, unsigned int idx,
 void xgbe_dump_rx_desc(struct xgbe_ring *ring, struct xgbe_ring_desc *desc,
 		       unsigned int idx)
 {
-	DBGPR("RX_NORMAL_DESC[%d RX BY DEVICE] = %08x:%08x:%08x:%08x\n", idx,
-	      le32_to_cpu(desc->desc0), le32_to_cpu(desc->desc1),
-	      le32_to_cpu(desc->desc2), le32_to_cpu(desc->desc3));
+	pr_alert("RX_NORMAL_DESC[%d RX BY DEVICE] = %08x:%08x:%08x:%08x\n", idx,
+		 le32_to_cpu(desc->desc0), le32_to_cpu(desc->desc1),
+		 le32_to_cpu(desc->desc2), le32_to_cpu(desc->desc3));
 }
 
 void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index d3aa055..1480c9d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -143,6 +143,7 @@
 #define XGBE_RX_MIN_BUF_SIZE	(ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
 #define XGBE_RX_BUF_ALIGN	64
 #define XGBE_SKB_ALLOC_SIZE	256
+#define XGBE_SPH_HDSMS_SIZE	2	/* Keep in sync with SKB_ALLOC_SIZE */
 
 #define XGBE_MAX_DMA_CHANNELS	16
 #define XGBE_MAX_QUEUES		16
@@ -250,6 +251,15 @@ struct xgbe_page_alloc {
 	dma_addr_t pages_dma;
 };
 
+/* Ring entry buffer data */
+struct xgbe_buffer_data {
+	struct xgbe_page_alloc pa;
+	struct xgbe_page_alloc pa_unmap;
+
+	dma_addr_t dma;
+	unsigned int dma_len;
+};
+
 /* Structure used to hold information related to the descriptor
  * and the packet associated with the descriptor (always use
  * use the XGBE_GET_DESC_DATA macro to access this data from the ring)
@@ -263,12 +273,10 @@ struct xgbe_ring_data {
 	unsigned int skb_dma_len;	/* Length of SKB DMA area */
 	unsigned int tso_header;        /* TSO header indicator */
 
-	struct xgbe_page_alloc rx_pa;	/* Rx buffer page allocation */
-	struct xgbe_page_alloc rx_unmap;
-
-	dma_addr_t rx_dma;		/* DMA address of Rx buffer */
-	unsigned int rx_dma_len;	/* Length of the Rx DMA buffer */
+	struct xgbe_buffer_data rx_hdr;	/* Header locations */
+	struct xgbe_buffer_data rx_buf; /* Payload locations */
 
+	unsigned short hdr_len;		/* Length of received header */
 	unsigned short len;		/* Length of received Rx packet */
 
 	unsigned int interrupt;		/* Interrupt indicator */
@@ -308,7 +316,8 @@ struct xgbe_ring {
 	struct xgbe_ring_data *rdata;
 
 	/* Page allocation for RX buffers */
-	struct xgbe_page_alloc rx_pa;
+	struct xgbe_page_alloc rx_hdr_pa;
+	struct xgbe_page_alloc rx_buf_pa;
 
 	/* Ring index values
 	 *  cur   - Tx: index of descriptor to be used for current transfer

^ permalink raw reply related

* [PATCH net-next v1 04/12] amd-xgbe: Use page allocations for Rx buffers
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

Use page allocations for Rx buffers instead of pre-allocating skbs
of a set size.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-desc.c |  143 ++++++++++++++++++++---------
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c  |   60 +++---------
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c  |   95 ++++++++++++-------
 drivers/net/ethernet/amd/xgbe/xgbe.h      |   25 ++++-
 4 files changed, 196 insertions(+), 127 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 6fc5da0..99911f4 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -117,7 +117,7 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
-static void xgbe_unmap_skb(struct xgbe_prv_data *, struct xgbe_ring_data *);
+static void xgbe_unmap_rdata(struct xgbe_prv_data *, struct xgbe_ring_data *);
 
 static void xgbe_free_ring(struct xgbe_prv_data *pdata,
 			   struct xgbe_ring *ring)
@@ -131,13 +131,24 @@ static void xgbe_free_ring(struct xgbe_prv_data *pdata,
 	if (ring->rdata) {
 		for (i = 0; i < ring->rdesc_count; i++) {
 			rdata = XGBE_GET_DESC_DATA(ring, i);
-			xgbe_unmap_skb(pdata, rdata);
+			xgbe_unmap_rdata(pdata, rdata);
 		}
 
 		kfree(ring->rdata);
 		ring->rdata = NULL;
 	}
 
+	if (ring->rx_pa.pages) {
+		dma_unmap_page(pdata->dev, ring->rx_pa.pages_dma,
+			       ring->rx_pa.pages_len, DMA_FROM_DEVICE);
+		put_page(ring->rx_pa.pages);
+
+		ring->rx_pa.pages = NULL;
+		ring->rx_pa.pages_len = 0;
+		ring->rx_pa.pages_offset = 0;
+		ring->rx_pa.pages_dma = 0;
+	}
+
 	if (ring->rdesc) {
 		dma_free_coherent(pdata->dev,
 				  (sizeof(struct xgbe_ring_desc) *
@@ -233,6 +244,65 @@ err_ring:
 	return ret;
 }
 
+static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
+			      struct xgbe_ring *ring,
+			      struct xgbe_ring_data *rdata)
+{
+	if (!ring->rx_pa.pages) {
+		struct page *pages = NULL;
+		dma_addr_t pages_dma;
+		gfp_t gfp;
+		int order, ret;
+
+		/* Try to obtain pages, decreasing order if necessary */
+		gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP;
+		order = max_t(int, PAGE_ALLOC_COSTLY_ORDER, 1);
+		while (--order >= 0) {
+			pages = alloc_pages(gfp, order);
+			if (pages)
+				break;
+		}
+		if (!pages)
+			return -ENOMEM;
+
+		/* Map the pages */
+		pages_dma = dma_map_page(pdata->dev, pages, 0,
+					 PAGE_SIZE << order, DMA_FROM_DEVICE);
+		ret = dma_mapping_error(pdata->dev, pages_dma);
+		if (ret) {
+			put_page(pages);
+			return ret;
+		}
+
+		/* Set the values for this ring */
+		ring->rx_pa.pages = pages;
+		ring->rx_pa.pages_len = PAGE_SIZE << order;
+		ring->rx_pa.pages_offset = 0;
+		ring->rx_pa.pages_dma = pages_dma;
+	}
+
+	get_page(ring->rx_pa.pages);
+	rdata->rx_pa = ring->rx_pa;
+
+	rdata->rx_dma = ring->rx_pa.pages_dma + ring->rx_pa.pages_offset;
+	rdata->rx_dma_len = pdata->rx_buf_size;
+
+	ring->rx_pa.pages_offset += pdata->rx_buf_size;
+	if ((ring->rx_pa.pages_offset + pdata->rx_buf_size) >
+	    ring->rx_pa.pages_len) {
+		/* This data descriptor is responsible for unmapping page(s) */
+		rdata->rx_unmap = ring->rx_pa;
+
+		/* Get a new allocation next time */
+		ring->rx_pa.pages = NULL;
+		ring->rx_pa.pages_len = 0;
+		ring->rx_pa.pages_offset = 0;
+		ring->rx_pa.pages_dma = 0;
+	}
+
+	return 0;
+}
+
 static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
@@ -281,8 +351,7 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
 	struct xgbe_ring *ring;
 	struct xgbe_ring_desc *rdesc;
 	struct xgbe_ring_data *rdata;
-	dma_addr_t rdesc_dma, skb_dma;
-	struct sk_buff *skb = NULL;
+	dma_addr_t rdesc_dma;
 	unsigned int i, j;
 
 	DBGPR("-->xgbe_wrapper_rx_descriptor_init\n");
@@ -302,22 +371,8 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
 			rdata->rdesc = rdesc;
 			rdata->rdesc_dma = rdesc_dma;
 
-			/* Allocate skb & assign to each rdesc */
-			skb = dev_alloc_skb(pdata->rx_buf_size);
-			if (skb == NULL)
-				break;
-			skb_dma = dma_map_single(pdata->dev, skb->data,
-						 pdata->rx_buf_size,
-						 DMA_FROM_DEVICE);
-			if (dma_mapping_error(pdata->dev, skb_dma)) {
-				netdev_alert(pdata->netdev,
-					     "failed to do the dma map\n");
-				dev_kfree_skb_any(skb);
+			if (xgbe_map_rx_buffer(pdata, ring, rdata))
 				break;
-			}
-			rdata->skb = skb;
-			rdata->skb_dma = skb_dma;
-			rdata->skb_dma_len = pdata->rx_buf_size;
 
 			rdesc++;
 			rdesc_dma += sizeof(struct xgbe_ring_desc);
@@ -334,8 +389,8 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
 	DBGPR("<--xgbe_wrapper_rx_descriptor_init\n");
 }
 
-static void xgbe_unmap_skb(struct xgbe_prv_data *pdata,
-			   struct xgbe_ring_data *rdata)
+static void xgbe_unmap_rdata(struct xgbe_prv_data *pdata,
+			     struct xgbe_ring_data *rdata)
 {
 	if (rdata->skb_dma) {
 		if (rdata->mapped_as_page) {
@@ -354,6 +409,21 @@ static void xgbe_unmap_skb(struct xgbe_prv_data *pdata,
 		rdata->skb = NULL;
 	}
 
+	if (rdata->rx_pa.pages)
+		put_page(rdata->rx_pa.pages);
+
+	if (rdata->rx_unmap.pages) {
+		dma_unmap_page(pdata->dev, rdata->rx_unmap.pages_dma,
+			       rdata->rx_unmap.pages_len, DMA_FROM_DEVICE);
+		put_page(rdata->rx_unmap.pages);
+	}
+
+	memset(&rdata->rx_pa, 0, sizeof(rdata->rx_pa));
+	memset(&rdata->rx_unmap, 0, sizeof(rdata->rx_unmap));
+
+	rdata->rx_dma = 0;
+	rdata->rx_dma_len = 0;
+
 	rdata->tso_header = 0;
 	rdata->len = 0;
 	rdata->interrupt = 0;
@@ -494,7 +564,7 @@ static int xgbe_map_tx_skb(struct xgbe_channel *channel, struct sk_buff *skb)
 err_out:
 	while (start_index < cur_index) {
 		rdata = XGBE_GET_DESC_DATA(ring, start_index++);
-		xgbe_unmap_skb(pdata, rdata);
+		xgbe_unmap_rdata(pdata, rdata);
 	}
 
 	DBGPR("<--xgbe_map_tx_skb: count=0\n");
@@ -502,40 +572,25 @@ err_out:
 	return 0;
 }
 
-static void xgbe_realloc_skb(struct xgbe_channel *channel)
+static void xgbe_realloc_rx_buffer(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_ring *ring = channel->rx_ring;
 	struct xgbe_ring_data *rdata;
-	struct sk_buff *skb = NULL;
-	dma_addr_t skb_dma;
 	int i;
 
-	DBGPR("-->xgbe_realloc_skb: rx_ring->rx.realloc_index = %u\n",
+	DBGPR("-->xgbe_realloc_rx_buffer: rx_ring->rx.realloc_index = %u\n",
 	      ring->rx.realloc_index);
 
 	for (i = 0; i < ring->dirty; i++) {
 		rdata = XGBE_GET_DESC_DATA(ring, ring->rx.realloc_index);
 
 		/* Reset rdata values */
-		xgbe_unmap_skb(pdata, rdata);
+		xgbe_unmap_rdata(pdata, rdata);
 
-		/* Allocate skb & assign to each rdesc */
-		skb = dev_alloc_skb(pdata->rx_buf_size);
-		if (skb == NULL)
+		if (xgbe_map_rx_buffer(pdata, ring, rdata))
 			break;
-		skb_dma = dma_map_single(pdata->dev, skb->data,
-					 pdata->rx_buf_size, DMA_FROM_DEVICE);
-		if (dma_mapping_error(pdata->dev, skb_dma)) {
-			netdev_alert(pdata->netdev,
-				     "failed to do the dma map\n");
-			dev_kfree_skb_any(skb);
-			break;
-		}
-		rdata->skb = skb;
-		rdata->skb_dma = skb_dma;
-		rdata->skb_dma_len = pdata->rx_buf_size;
 
 		hw_if->rx_desc_reset(rdata);
 
@@ -543,7 +598,7 @@ static void xgbe_realloc_skb(struct xgbe_channel *channel)
 	}
 	ring->dirty = 0;
 
-	DBGPR("<--xgbe_realloc_skb\n");
+	DBGPR("<--xgbe_realloc_rx_buffer\n");
 }
 
 void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *desc_if)
@@ -553,8 +608,8 @@ void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *desc_if)
 	desc_if->alloc_ring_resources = xgbe_alloc_ring_resources;
 	desc_if->free_ring_resources = xgbe_free_ring_resources;
 	desc_if->map_tx_skb = xgbe_map_tx_skb;
-	desc_if->realloc_skb = xgbe_realloc_skb;
-	desc_if->unmap_skb = xgbe_unmap_skb;
+	desc_if->realloc_rx_buffer = xgbe_realloc_rx_buffer;
+	desc_if->unmap_rdata = xgbe_unmap_rdata;
 	desc_if->wrapper_tx_desc_init = xgbe_wrapper_tx_descriptor_init;
 	desc_if->wrapper_rx_desc_init = xgbe_wrapper_rx_descriptor_init;
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 7b97d38..7748b75 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -880,13 +880,15 @@ static void xgbe_tx_desc_reset(struct xgbe_ring_data *rdata)
 	rdesc->desc1 = 0;
 	rdesc->desc2 = 0;
 	rdesc->desc3 = 0;
+
+	/* Make sure ownership is written to the descriptor */
+	wmb();
 }
 
 static void xgbe_tx_desc_init(struct xgbe_channel *channel)
 {
 	struct xgbe_ring *ring = channel->tx_ring;
 	struct xgbe_ring_data *rdata;
-	struct xgbe_ring_desc *rdesc;
 	int i;
 	int start_index = ring->cur;
 
@@ -895,26 +897,11 @@ static void xgbe_tx_desc_init(struct xgbe_channel *channel)
 	/* Initialze all descriptors */
 	for (i = 0; i < ring->rdesc_count; i++) {
 		rdata = XGBE_GET_DESC_DATA(ring, i);
-		rdesc = rdata->rdesc;
 
-		/* Initialize Tx descriptor
-		 *   Set buffer 1 (lo) address to zero
-		 *   Set buffer 1 (hi) address to zero
-		 *   Reset all other control bits (IC, TTSE, B2L & B1L)
-		 *   Reset all other control bits (OWN, CTXT, FD, LD, CPC, CIC,
-		 *     etc)
-		 */
-		rdesc->desc0 = 0;
-		rdesc->desc1 = 0;
-		rdesc->desc2 = 0;
-		rdesc->desc3 = 0;
+		/* Initialize Tx descriptor */
+		xgbe_tx_desc_reset(rdata);
 	}
 
-	/* Make sure everything is written to the descriptor(s) before
-	 * telling the device about them
-	 */
-	wmb();
-
 	/* Update the total number of Tx descriptors */
 	XGMAC_DMA_IOWRITE(channel, DMA_CH_TDRLR, ring->rdesc_count - 1);
 
@@ -939,8 +926,8 @@ static void xgbe_rx_desc_reset(struct xgbe_ring_data *rdata)
 	 *   Set buffer 2 (hi) address to zero and set control bits
 	 *     OWN and INTE
 	 */
-	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->skb_dma));
-	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->skb_dma));
+	rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->rx_dma));
+	rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->rx_dma));
 	rdesc->desc2 = 0;
 
 	rdesc->desc3 = 0;
@@ -964,7 +951,6 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel)
 	struct xgbe_prv_data *pdata = channel->pdata;
 	struct xgbe_ring *ring = channel->rx_ring;
 	struct xgbe_ring_data *rdata;
-	struct xgbe_ring_desc *rdesc;
 	unsigned int start_index = ring->cur;
 	unsigned int rx_coalesce, rx_frames;
 	unsigned int i;
@@ -977,34 +963,16 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel)
 	/* Initialize all descriptors */
 	for (i = 0; i < ring->rdesc_count; i++) {
 		rdata = XGBE_GET_DESC_DATA(ring, i);
-		rdesc = rdata->rdesc;
 
-		/* Initialize Rx descriptor
-		 *   Set buffer 1 (lo) address to dma address (lo)
-		 *   Set buffer 1 (hi) address to dma address (hi)
-		 *   Set buffer 2 (lo) address to zero
-		 *   Set buffer 2 (hi) address to zero and set control
-		 *     bits OWN and INTE appropriateley
-		 */
-		rdesc->desc0 = cpu_to_le32(lower_32_bits(rdata->skb_dma));
-		rdesc->desc1 = cpu_to_le32(upper_32_bits(rdata->skb_dma));
-		rdesc->desc2 = 0;
-		rdesc->desc3 = 0;
-		XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, OWN, 1);
-		XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE, 1);
-		rdata->interrupt = 1;
-		if (rx_coalesce && (!rx_frames || ((i + 1) % rx_frames))) {
-			/* Clear interrupt on completion bit */
-			XGMAC_SET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, INTE,
-					  0);
+		/* Set interrupt on completion bit as appropriate */
+		if (rx_coalesce && (!rx_frames || ((i + 1) % rx_frames)))
 			rdata->interrupt = 0;
-		}
-	}
+		else
+			rdata->interrupt = 1;
 
-	/* Make sure everything is written to the descriptors before
-	 * telling the device about them
-	 */
-	wmb();
+		/* Initialize Rx descriptor */
+		xgbe_rx_desc_reset(rdata);
+	}
 
 	/* Update the total number of Rx descriptors */
 	XGMAC_DMA_IOWRITE(channel, DMA_CH_RDRLR, ring->rdesc_count - 1);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 8cb2372..d65f5aa 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -218,8 +218,8 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
 	}
 
 	rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-	if (rx_buf_size < XGBE_RX_MIN_BUF_SIZE)
-		rx_buf_size = XGBE_RX_MIN_BUF_SIZE;
+	rx_buf_size = clamp_val(rx_buf_size, XGBE_RX_MIN_BUF_SIZE, PAGE_SIZE);
+
 	rx_buf_size = (rx_buf_size + XGBE_RX_BUF_ALIGN - 1) &
 		      ~(XGBE_RX_BUF_ALIGN - 1);
 
@@ -546,7 +546,7 @@ void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata)
 	DBGPR("<--xgbe_init_rx_coalesce\n");
 }
 
-static void xgbe_free_tx_skbuff(struct xgbe_prv_data *pdata)
+static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
 	struct xgbe_channel *channel;
@@ -554,7 +554,7 @@ static void xgbe_free_tx_skbuff(struct xgbe_prv_data *pdata)
 	struct xgbe_ring_data *rdata;
 	unsigned int i, j;
 
-	DBGPR("-->xgbe_free_tx_skbuff\n");
+	DBGPR("-->xgbe_free_tx_data\n");
 
 	channel = pdata->channel;
 	for (i = 0; i < pdata->channel_count; i++, channel++) {
@@ -564,14 +564,14 @@ static void xgbe_free_tx_skbuff(struct xgbe_prv_data *pdata)
 
 		for (j = 0; j < ring->rdesc_count; j++) {
 			rdata = XGBE_GET_DESC_DATA(ring, j);
-			desc_if->unmap_skb(pdata, rdata);
+			desc_if->unmap_rdata(pdata, rdata);
 		}
 	}
 
-	DBGPR("<--xgbe_free_tx_skbuff\n");
+	DBGPR("<--xgbe_free_tx_data\n");
 }
 
-static void xgbe_free_rx_skbuff(struct xgbe_prv_data *pdata)
+static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
 	struct xgbe_channel *channel;
@@ -579,7 +579,7 @@ static void xgbe_free_rx_skbuff(struct xgbe_prv_data *pdata)
 	struct xgbe_ring_data *rdata;
 	unsigned int i, j;
 
-	DBGPR("-->xgbe_free_rx_skbuff\n");
+	DBGPR("-->xgbe_free_rx_data\n");
 
 	channel = pdata->channel;
 	for (i = 0; i < pdata->channel_count; i++, channel++) {
@@ -589,11 +589,11 @@ static void xgbe_free_rx_skbuff(struct xgbe_prv_data *pdata)
 
 		for (j = 0; j < ring->rdesc_count; j++) {
 			rdata = XGBE_GET_DESC_DATA(ring, j);
-			desc_if->unmap_skb(pdata, rdata);
+			desc_if->unmap_rdata(pdata, rdata);
 		}
 	}
 
-	DBGPR("<--xgbe_free_rx_skbuff\n");
+	DBGPR("<--xgbe_free_rx_data\n");
 }
 
 static void xgbe_adjust_link(struct net_device *netdev)
@@ -839,8 +839,8 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata, unsigned int reset)
 	xgbe_stop(pdata);
 	synchronize_irq(pdata->irq_number);
 
-	xgbe_free_tx_skbuff(pdata);
-	xgbe_free_rx_skbuff(pdata);
+	xgbe_free_tx_data(pdata);
+	xgbe_free_rx_data(pdata);
 
 	/* Issue software reset to device if requested */
 	if (reset)
@@ -1609,7 +1609,7 @@ static void xgbe_rx_refresh(struct xgbe_channel *channel)
 	struct xgbe_ring *ring = channel->rx_ring;
 	struct xgbe_ring_data *rdata;
 
-	desc_if->realloc_skb(channel);
+	desc_if->realloc_rx_buffer(channel);
 
 	/* Update the Rx Tail Pointer Register with address of
 	 * the last cleaned entry */
@@ -1618,6 +1618,37 @@ static void xgbe_rx_refresh(struct xgbe_channel *channel)
 			  lower_32_bits(rdata->rdesc_dma));
 }
 
+static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
+				       struct xgbe_ring_data *rdata,
+				       unsigned int len)
+{
+	struct net_device *netdev = pdata->netdev;
+	struct sk_buff *skb;
+	u8 *packet;
+	unsigned int copy_len;
+
+	skb = netdev_alloc_skb_ip_align(netdev, XGBE_SKB_ALLOC_SIZE);
+	if (!skb)
+		return NULL;
+
+	packet = page_address(rdata->rx_pa.pages) + rdata->rx_pa.pages_offset;
+	copy_len = min_t(unsigned int, XGBE_SKB_ALLOC_SIZE, len);
+	skb_copy_to_linear_data(skb, packet, copy_len);
+	skb_put(skb, copy_len);
+
+	rdata->rx_pa.pages_offset += copy_len;
+	len -= copy_len;
+	if (len)
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				rdata->rx_pa.pages,
+				rdata->rx_pa.pages_offset,
+				len, rdata->rx_dma_len);
+	else
+		put_page(rdata->rx_pa.pages);
+
+	return skb;
+}
+
 static int xgbe_tx_poll(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
@@ -1651,7 +1682,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
 #endif
 
 		/* Free the SKB and reset the descriptor for re-use */
-		desc_if->unmap_skb(pdata, rdata);
+		desc_if->unmap_rdata(pdata, rdata);
 		hw_if->tx_desc_reset(rdata);
 
 		processed++;
@@ -1726,9 +1757,9 @@ read_again:
 		ring->cur++;
 		ring->dirty++;
 
-		dma_unmap_single(pdata->dev, rdata->skb_dma,
-				 rdata->skb_dma_len, DMA_FROM_DEVICE);
-		rdata->skb_dma = 0;
+		dma_sync_single_for_cpu(pdata->dev, rdata->rx_dma,
+					rdata->rx_dma_len,
+					DMA_FROM_DEVICE);
 
 		incomplete = XGMAC_GET_BITS(packet->attributes,
 					    RX_PACKET_ATTRIBUTES,
@@ -1753,26 +1784,22 @@ read_again:
 
 		if (!context) {
 			put_len = rdata->len - len;
-			if (skb) {
-				if (pskb_expand_head(skb, 0, put_len,
-						     GFP_ATOMIC)) {
-					DBGPR("pskb_expand_head error\n");
-					if (incomplete) {
-						error = 1;
-						goto read_again;
-					}
-
-					dev_kfree_skb(skb);
-					goto next_packet;
+			len += put_len;
+
+			if (!skb) {
+				skb = xgbe_create_skb(pdata, rdata, put_len);
+				if (!skb) {
+					error = 1;
+					goto read_again;
 				}
-				memcpy(skb_tail_pointer(skb), rdata->skb->data,
-				       put_len);
 			} else {
-				skb = rdata->skb;
-				rdata->skb = NULL;
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						rdata->rx_pa.pages,
+						rdata->rx_pa.pages_offset,
+						put_len, rdata->rx_dma_len);
 			}
-			skb_put(skb, put_len);
-			len += put_len;
+
+			rdata->rx_pa.pages = NULL;
 		}
 
 		if (incomplete || context_next)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 19f1d90..d3aa055 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -142,6 +142,7 @@
 
 #define XGBE_RX_MIN_BUF_SIZE	(ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
 #define XGBE_RX_BUF_ALIGN	64
+#define XGBE_SKB_ALLOC_SIZE	256
 
 #define XGBE_MAX_DMA_CHANNELS	16
 #define XGBE_MAX_QUEUES		16
@@ -240,6 +241,15 @@ struct xgbe_ring_desc {
 	u32 desc3;
 };
 
+/* Page allocation related values */
+struct xgbe_page_alloc {
+	struct page *pages;
+	unsigned int pages_len;
+	unsigned int pages_offset;
+
+	dma_addr_t pages_dma;
+};
+
 /* Structure used to hold information related to the descriptor
  * and the packet associated with the descriptor (always use
  * use the XGBE_GET_DESC_DATA macro to access this data from the ring)
@@ -253,6 +263,12 @@ struct xgbe_ring_data {
 	unsigned int skb_dma_len;	/* Length of SKB DMA area */
 	unsigned int tso_header;        /* TSO header indicator */
 
+	struct xgbe_page_alloc rx_pa;	/* Rx buffer page allocation */
+	struct xgbe_page_alloc rx_unmap;
+
+	dma_addr_t rx_dma;		/* DMA address of Rx buffer */
+	unsigned int rx_dma_len;	/* Length of the Rx DMA buffer */
+
 	unsigned short len;		/* Length of received Rx packet */
 
 	unsigned int interrupt;		/* Interrupt indicator */
@@ -291,6 +307,9 @@ struct xgbe_ring {
 	 */
 	struct xgbe_ring_data *rdata;
 
+	/* Page allocation for RX buffers */
+	struct xgbe_page_alloc rx_pa;
+
 	/* Ring index values
 	 *  cur   - Tx: index of descriptor to be used for current transfer
 	 *          Rx: index of descriptor to check for packet availability
@@ -515,8 +534,8 @@ struct xgbe_desc_if {
 	int (*alloc_ring_resources)(struct xgbe_prv_data *);
 	void (*free_ring_resources)(struct xgbe_prv_data *);
 	int (*map_tx_skb)(struct xgbe_channel *, struct sk_buff *);
-	void (*realloc_skb)(struct xgbe_channel *);
-	void (*unmap_skb)(struct xgbe_prv_data *, struct xgbe_ring_data *);
+	void (*realloc_rx_buffer)(struct xgbe_channel *);
+	void (*unmap_rdata)(struct xgbe_prv_data *, struct xgbe_ring_data *);
 	void (*wrapper_tx_desc_init)(struct xgbe_prv_data *);
 	void (*wrapper_rx_desc_init)(struct xgbe_prv_data *);
 };
@@ -624,7 +643,7 @@ struct xgbe_prv_data {
 	unsigned int rx_riwt;
 	unsigned int rx_frames;
 
-	/* Current MTU */
+	/* Current Rx buffer size */
 	unsigned int rx_buf_size;
 
 	/* Flow control settings */

^ permalink raw reply related

* [PATCH net-next v1 03/12] amd-xgbe: Use the u32 data type for descriptors
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

The Tx and Rx descriptors are unsigned 32 bit values.  Use the u32
type, rather than unsigned int, to map these descriptors.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe.h |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 0dc15d7..19f1d90 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -234,10 +234,10 @@ struct xgbe_packet_data {
 
 /* Common Rx and Tx descriptor mapping */
 struct xgbe_ring_desc {
-	unsigned int desc0;
-	unsigned int desc1;
-	unsigned int desc2;
-	unsigned int desc3;
+	u32 desc0;
+	u32 desc1;
+	u32 desc2;
+	u32 desc3;
 };
 
 /* Structure used to hold information related to the descriptor

^ permalink raw reply related

* [PATCH net-next v1 02/12] amd-xgbe: Rename pre_xmit function to dev_xmit
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

The pre_xmit function name implies that it performs operations prior
to transmitting the packet when in fact it is responsible for setting
up the descriptors and initiating the transmit.  Rename this to
function from pre_xmit to dev_xmit, which is consistent with the name
used during receive processing - dev_read.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c |    8 ++++----
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c |    2 +-
 drivers/net/ethernet/amd/xgbe/xgbe.h     |    2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 9da3a03..7b97d38 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1198,7 +1198,7 @@ static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata)
 	xgbe_config_flow_control(pdata);
 }
 
-static void xgbe_pre_xmit(struct xgbe_channel *channel)
+static void xgbe_dev_xmit(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
 	struct xgbe_ring *ring = channel->tx_ring;
@@ -1211,7 +1211,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel)
 	int start_index = ring->cur;
 	int i;
 
-	DBGPR("-->xgbe_pre_xmit\n");
+	DBGPR("-->xgbe_dev_xmit\n");
 
 	csum = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			      CSUM_ENABLE);
@@ -1410,7 +1410,7 @@ static void xgbe_pre_xmit(struct xgbe_channel *channel)
 	      channel->name, start_index & (ring->rdesc_count - 1),
 	      (ring->cur - 1) & (ring->rdesc_count - 1));
 
-	DBGPR("<--xgbe_pre_xmit\n");
+	DBGPR("<--xgbe_dev_xmit\n");
 }
 
 static int xgbe_dev_read(struct xgbe_channel *channel)
@@ -2561,7 +2561,7 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->powerup_rx = xgbe_powerup_rx;
 	hw_if->powerdown_rx = xgbe_powerdown_rx;
 
-	hw_if->pre_xmit = xgbe_pre_xmit;
+	hw_if->dev_xmit = xgbe_dev_xmit;
 	hw_if->dev_read = xgbe_dev_read;
 	hw_if->enable_int = xgbe_enable_int;
 	hw_if->disable_int = xgbe_disable_int;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 07b00bd..8cb2372 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1343,7 +1343,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
 	xgbe_prep_tx_tstamp(pdata, skb, packet);
 
 	/* Configure required descriptor fields for transmission */
-	hw_if->pre_xmit(channel);
+	hw_if->dev_xmit(channel);
 
 #ifdef XGMAC_ENABLE_TX_PKT_DUMP
 	xgbe_print_pkt(netdev, skb, true);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 789957d..0dc15d7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -456,7 +456,7 @@ struct xgbe_hw_if {
 
 	int (*enable_int)(struct xgbe_channel *, enum xgbe_int);
 	int (*disable_int)(struct xgbe_channel *, enum xgbe_int);
-	void (*pre_xmit)(struct xgbe_channel *);
+	void (*dev_xmit)(struct xgbe_channel *);
 	int (*dev_read)(struct xgbe_channel *);
 	void (*tx_desc_init)(struct xgbe_channel *);
 	void (*rx_desc_init)(struct xgbe_channel *);

^ permalink raw reply related

* [PATCH net-next v1 01/12] amd-xgbe: Move ring allocation to device open
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

Move the channel and ring tracking structures allocation to device
open.  This will allow for future support to vary the number of Tx/Rx
queues without unloading the module.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c     |   93 +++++++++++++++++++++++++-
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |    8 +-
 drivers/net/ethernet/amd/xgbe/xgbe-main.c    |   62 -----------------
 3 files changed, 93 insertions(+), 70 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 2349ea9..07b00bd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -129,6 +129,80 @@
 static int xgbe_poll(struct napi_struct *, int);
 static void xgbe_set_rx_mode(struct net_device *);
 
+static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel_mem, *channel;
+	struct xgbe_ring *tx_ring, *rx_ring;
+	unsigned int count, i;
+
+	count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
+
+	channel_mem = kcalloc(count, sizeof(struct xgbe_channel), GFP_KERNEL);
+	if (!channel_mem)
+		goto err_channel;
+
+	tx_ring = kcalloc(pdata->tx_ring_count, sizeof(struct xgbe_ring),
+			  GFP_KERNEL);
+	if (!tx_ring)
+		goto err_tx_ring;
+
+	rx_ring = kcalloc(pdata->rx_ring_count, sizeof(struct xgbe_ring),
+			  GFP_KERNEL);
+	if (!rx_ring)
+		goto err_rx_ring;
+
+	for (i = 0, channel = channel_mem; i < count; i++, channel++) {
+		snprintf(channel->name, sizeof(channel->name), "channel-%d", i);
+		channel->pdata = pdata;
+		channel->queue_index = i;
+		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
+				    (DMA_CH_INC * i);
+
+		if (i < pdata->tx_ring_count) {
+			spin_lock_init(&tx_ring->lock);
+			channel->tx_ring = tx_ring++;
+		}
+
+		if (i < pdata->rx_ring_count) {
+			spin_lock_init(&rx_ring->lock);
+			channel->rx_ring = rx_ring++;
+		}
+
+		DBGPR("  %s - queue_index=%u, dma_regs=%p, tx=%p, rx=%p\n",
+		      channel->name, channel->queue_index, channel->dma_regs,
+		      channel->tx_ring, channel->rx_ring);
+	}
+
+	pdata->channel = channel_mem;
+	pdata->channel_count = count;
+
+	return 0;
+
+err_rx_ring:
+	kfree(tx_ring);
+
+err_tx_ring:
+	kfree(channel_mem);
+
+err_channel:
+	netdev_err(pdata->netdev, "channel allocation failed\n");
+
+	return -ENOMEM;
+}
+
+static void xgbe_free_channels(struct xgbe_prv_data *pdata)
+{
+	if (!pdata->channel)
+		return;
+
+	kfree(pdata->channel->rx_ring);
+	kfree(pdata->channel->tx_ring);
+	kfree(pdata->channel);
+
+	pdata->channel = NULL;
+	pdata->channel_count = 0;
+}
+
 static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring)
 {
 	return (ring->rdesc_count - (ring->cur - ring->dirty));
@@ -1119,10 +1193,15 @@ static int xgbe_open(struct net_device *netdev)
 		goto err_ptpclk;
 	pdata->rx_buf_size = ret;
 
+	/* Allocate the channel and ring structures */
+	ret = xgbe_alloc_channels(pdata);
+	if (ret)
+		goto err_ptpclk;
+
 	/* Allocate the ring descriptors and buffers */
 	ret = desc_if->alloc_ring_resources(pdata);
 	if (ret)
-		goto err_ptpclk;
+		goto err_channels;
 
 	/* Initialize the device restart and Tx timestamp work struct */
 	INIT_WORK(&pdata->restart_work, xgbe_restart);
@@ -1134,7 +1213,7 @@ static int xgbe_open(struct net_device *netdev)
 	if (ret) {
 		netdev_alert(netdev, "error requesting irq %d\n",
 			     pdata->irq_number);
-		goto err_irq;
+		goto err_rings;
 	}
 	pdata->irq_number = netdev->irq;
 
@@ -1152,9 +1231,12 @@ err_start:
 	devm_free_irq(pdata->dev, pdata->irq_number, pdata);
 	pdata->irq_number = 0;
 
-err_irq:
+err_rings:
 	desc_if->free_ring_resources(pdata);
 
+err_channels:
+	xgbe_free_channels(pdata);
+
 err_ptpclk:
 	clk_disable_unprepare(pdata->ptpclk);
 
@@ -1181,9 +1263,12 @@ static int xgbe_close(struct net_device *netdev)
 	/* Issue software reset to device */
 	hw_if->exit(pdata);
 
-	/* Free all the ring data */
+	/* Free the ring descriptors and buffers */
 	desc_if->free_ring_resources(pdata);
 
+	/* Free the channel and ring structures */
+	xgbe_free_channels(pdata);
+
 	/* Release the interrupt */
 	if (pdata->irq_number != 0) {
 		devm_free_irq(pdata->dev, pdata->irq_number, pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 49508ec..47022fb 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -452,9 +452,9 @@ static int xgbe_set_coalesce(struct net_device *netdev,
 			     rx_usecs);
 		return -EINVAL;
 	}
-	if (rx_frames > pdata->channel->rx_ring->rdesc_count) {
+	if (rx_frames > pdata->rx_desc_count) {
 		netdev_alert(netdev, "rx-frames is limited to %d frames\n",
-			     pdata->channel->rx_ring->rdesc_count);
+			     pdata->rx_desc_count);
 		return -EINVAL;
 	}
 
@@ -462,9 +462,9 @@ static int xgbe_set_coalesce(struct net_device *netdev,
 	tx_frames = ec->tx_max_coalesced_frames;
 
 	/* Check the bounds of values for Tx */
-	if (tx_frames > pdata->channel->tx_ring->rdesc_count) {
+	if (tx_frames > pdata->tx_desc_count) {
 		netdev_alert(netdev, "tx-frames is limited to %d frames\n",
-			     pdata->channel->tx_ring->rdesc_count);
+			     pdata->tx_desc_count);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index f5a8fa0..e5077fd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -133,60 +133,6 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(XGBE_DRV_VERSION);
 MODULE_DESCRIPTION(XGBE_DRV_DESC);
 
-static struct xgbe_channel *xgbe_alloc_rings(struct xgbe_prv_data *pdata)
-{
-	struct xgbe_channel *channel_mem, *channel;
-	struct xgbe_ring *tx_ring, *rx_ring;
-	unsigned int count, i;
-
-	DBGPR("-->xgbe_alloc_rings\n");
-
-	count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
-
-	channel_mem = devm_kcalloc(pdata->dev, count,
-				   sizeof(struct xgbe_channel), GFP_KERNEL);
-	if (!channel_mem)
-		return NULL;
-
-	tx_ring = devm_kcalloc(pdata->dev, pdata->tx_ring_count,
-			       sizeof(struct xgbe_ring), GFP_KERNEL);
-	if (!tx_ring)
-		return NULL;
-
-	rx_ring = devm_kcalloc(pdata->dev, pdata->rx_ring_count,
-			       sizeof(struct xgbe_ring), GFP_KERNEL);
-	if (!rx_ring)
-		return NULL;
-
-	for (i = 0, channel = channel_mem; i < count; i++, channel++) {
-		snprintf(channel->name, sizeof(channel->name), "channel-%d", i);
-		channel->pdata = pdata;
-		channel->queue_index = i;
-		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
-				    (DMA_CH_INC * i);
-
-		if (i < pdata->tx_ring_count) {
-			spin_lock_init(&tx_ring->lock);
-			channel->tx_ring = tx_ring++;
-		}
-
-		if (i < pdata->rx_ring_count) {
-			spin_lock_init(&rx_ring->lock);
-			channel->rx_ring = rx_ring++;
-		}
-
-		DBGPR("  %s - queue_index=%u, dma_regs=%p, tx=%p, rx=%p\n",
-		      channel->name, channel->queue_index, channel->dma_regs,
-		      channel->tx_ring, channel->rx_ring);
-	}
-
-	pdata->channel_count = count;
-
-	DBGPR("<--xgbe_alloc_rings\n");
-
-	return channel_mem;
-}
-
 static void xgbe_default_config(struct xgbe_prv_data *pdata)
 {
 	DBGPR("-->xgbe_default_config\n");
@@ -383,14 +329,6 @@ static int xgbe_probe(struct platform_device *pdev)
 		goto err_io;
 	}
 
-	/* Allocate the rings for the DMA channels */
-	pdata->channel = xgbe_alloc_rings(pdata);
-	if (!pdata->channel) {
-		dev_err(dev, "ring allocation failed\n");
-		ret = -ENOMEM;
-		goto err_io;
-	}
-
 	/* Prepare to regsiter with MDIO */
 	pdata->mii_bus_id = kasprintf(GFP_KERNEL, "%s", pdev->name);
 	if (!pdata->mii_bus_id) {

^ permalink raw reply related

* [PATCH net-next v1 00/12] amd-xgbe: AMD XGBE driver updates 2014-11-04
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem

The following series of patches includes functional updates to the
driver as well as some trivial changes for function renaming and
spelling fixes.

- Move channel and ring structure allocation into the device open path
- Rename the pre_xmit function to dev_xmit
- Explicitly use the u32 data type for the device descriptors
- Use page allocation for the receive buffers
- Add support for split header/payload receive
- Add support for per DMA channel interrupts
- Add support for receive side scaling (RSS)
- Add support for ethtool receive side scaling commands
- Fix the spelling of descriptors
- After a PCS reset, sync the PCS and PHY modes
- Add dependency on HAS_IOMEM to both the amd-xgbe and amd-xgbe-phy
  drivers

This patch series is based on net-next.

---

Tom Lendacky (12):
      amd-xgbe: Move ring allocation to device open
      amd-xgbe: Rename pre_xmit function to dev_xmit
      amd-xgbe: Use the u32 data type for descriptors
      amd-xgbe: Use page allocations for Rx buffers
      amd-xgbe: Implement split header receive support
      amd-xgbe: Add support for per DMA channel interrupts
      amd-xgbe: Provide support for receive side scaling
      amd-xgbe: Add receive side scaling ethtool support
      amd-xgbe: Fix a spelling error
      amd-xgbe-phy: Sync PCS and PHY modes after reset
      amd-xgbe: Let AMD_XGBE depend on HAS_IOMEM
      amd-xgbe-phy: Let AMD_XGBE_PHY depend on HAS_IOMEM


 Documentation/devicetree/bindings/net/amd-xgbe.txt |   12 -
 drivers/net/ethernet/amd/Kconfig                   |    2 
 drivers/net/ethernet/amd/xgbe/xgbe-common.h        |   42 ++
 drivers/net/ethernet/amd/xgbe/xgbe-desc.c          |  193 +++++++--
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c           |  288 ++++++++++---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c           |  445 ++++++++++++++++----
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c       |   82 ++++
 drivers/net/ethernet/amd/xgbe/xgbe-main.c          |   86 +---
 drivers/net/ethernet/amd/xgbe/xgbe.h               |   77 +++
 drivers/net/phy/Kconfig                            |    2 
 drivers/net/phy/amd-xgbe-phy.c                     |    3 
 11 files changed, 955 insertions(+), 277 deletions(-)

-- 
Tom Lendacky

^ permalink raw reply

* Re: [Patch net-next v2] neigh: remove dynamic neigh table registration support
From: David Miller @ 2014-11-04 22:02 UTC (permalink / raw)
  To: xiyou.wangcong; +Cc: netdev
In-Reply-To: <1415038454-8150-1-git-send-email-xiyou.wangcong@gmail.com>

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon,  3 Nov 2014 10:14:14 -0800

> Currently there are only three neigh tables in the whole kernel:
> arp table, ndisc table and decnet neigh table. What's more,
> we don't support registering multiple tables per family.
> Therefore we can just make these tables statically built-in.
> 
> Cc: David S. Miller <davem@davemloft.net>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---
> v2: remove useless #ifdef's
>     move the assignment to the end of neigh_table_init()

neigh_table_clear should definitely NULL out the slot, otherwise
we hold in there a pointer to module memory which is about to
be released.

^ permalink raw reply

* [PATCH net 1/5] be2net: Implement ndo_gso_check()
From: Joe Stringer @ 2014-11-04 21:56 UTC (permalink / raw)
  To: netdev
  Cc: sathya.perla, jeffrey.t.kirsher, linux.nics, amirv, shahed.shaikh,
	Dept-GELinuxNICDev, therbert, linux-kernel
In-Reply-To: <1415138202-1197-1-git-send-email-joestringer@nicira.com>

ndo_gso_check() was recently introduced to allow NICs to report the
offloading support that they have on a per-skb basis. Add an
implementation for this driver which checks for something that looks
like VXLAN.

Implementation shamelessly stolen from Tom Herbert:
http://thread.gmane.org/gmane.linux.network/332428/focus=333111

Signed-off-by: Joe Stringer <joestringer@nicira.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 9a18e79..bd52b8d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4423,6 +4423,17 @@ static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
 }
 #endif
 
+static bool be_gso_check(struct sk_buff *skb, struct net_device *dev)
+{
+	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	     skb->inner_protocol != htons(ETH_P_TEB) ||
+	     skb_inner_mac_header(skb) - skb_transport_header(skb) != 16))
+		return false;
+
+	return true;
+}
+
 static const struct net_device_ops be_netdev_ops = {
 	.ndo_open		= be_open,
 	.ndo_stop		= be_close,
@@ -4451,6 +4462,7 @@ static const struct net_device_ops be_netdev_ops = {
 	.ndo_add_vxlan_port	= be_add_vxlan_port,
 	.ndo_del_vxlan_port	= be_del_vxlan_port,
 #endif
+	.ndo_gso_check		= be_gso_check,
 };
 
 static void be_netdev_init(struct net_device *netdev)
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH] stmmac: fix sparse warnings
From: David Miller @ 2014-11-04 21:59 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: andriy.shevchenko, netdev, hock.leong.kweh, vbridgers2013
In-Reply-To: <54590045.3050305@st.com>

From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Tue, 4 Nov 2014 17:35:17 +0100

> On 11/3/2014 6:28 PM, Andy Shevchenko wrote:
>> This patch fixes the following sparse warnings.
>>
>> drivers/net/ethernet/stmicro/stmmac/enh_desc.c:381:30: warning: symbol
>> 'enh_desc_ops' was not declared. Should it be static?
>> drivers/net/ethernet/stmicro/stmmac/norm_desc.c:253:30: warning:
>> symbol 'ndesc_ops' was not declared. Should it be static?
>> drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c:141:33: warning:
>> symbol 'stmmac_ptp' was not declared. Should it be static?
>>
>> There is no functional change.
> 
> Hello Andy
> 
> I have never seen this kind of warnings.

Are you running the 'sparse' checker with all options enabled?
This is one of the most fundamental warnings it spits out.

> I prefer to not include the
> stmmac.h in enh_desc.c and norm_desc.c but eventually to move the
> following from stmmac.h to common.h:
>   extern const struct stmmac_desc_ops enh_desc_ops;
>   extern const struct stmmac_desc_ops ndesc_ops;
> what do you think?

You two sort this out and submit a new patch, thanks.

^ permalink raw reply

* Re: Fw: [Bug 82471] New: net/core/dev.c skb_war_bad_offload
From: Jesse Brandeburg @ 2014-11-04 21:59 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: NetDEV list, Jesse Brandeburg
In-Reply-To: <20140815142646.4be3d091@haswell.linuxnetplumber.net>

I believe this is a regression, as reporters say this worked with 3.13 kernels.

anyone have any idea what is up with this skb_warn_bad_offload with
the bonding driver?   see the bug text
for a lot more detail.  Is it fixed already?  This is occurring on top
of both Intel and Broadcom nics and is
with 802.3ad bonding enabled, and turning off scatter gather avoids the issue.

On Fri, Aug 15, 2014 at 2:26 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:

> Subject: [Bug 82471] New: net/core/dev.c skb_war_bad_offload
>
>
> https://bugzilla.kernel.org/show_bug.cgi?id=82471
>
>             Bug ID: 82471
>            Summary: net/core/dev.c skb_war_bad_offload
>            Product: Networking
>            Version: 2.5
>     Kernel Version: 3.16.1
>           Hardware: x86-64
>                 OS: Linux
>               Tree: Mainline
>             Status: NEW
>           Severity: normal
>           Priority: P1
>          Component: IPV4
>           Assignee: shemminger@linux-foundation.org
>           Reporter: vladi@aresgate.net
>         Regression: No
>
> Created attachment 146761
>   --> https://bugzilla.kernel.org/attachment.cgi?id=146761&action=edit
> kernel config
>
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973225] ------------[
> cut here ]------------
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973236] WARNING: CPU:
> 2 PID: 0 at net/core/dev.c:2246 skb_warn_bad_offload+0xc8/0xd5()
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973238] :
> caps=(0x000000000419fba9, 0x00000000001b583b) len=2962 data_len=2896
> gso_size=1448 gso_type=1 ip_summed=3
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973239] Modules
> linked in: ntfs msdos xfs libcrc32c ipmi_devintf intel_rapl
> x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul
> ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper
> cryptd sb_edac edac_core 8021q garp ioatdma stp ipmi_si mrp llc bonding
> hid_generic ixgbe usbhid hid ahci dca libahci mdio
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973257] CPU: 2 PID: 0
> Comm: swapper/2 Tainted: G        W     3.16.0 #2
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973259] Hardware
> name: Supermicro X9DRD-7LN4F(-JBOD)/X9DRD-EF/X9DRD-7LN4F, BIOS 3.0a 12/05/2013
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973260]
> 0000000000000009 ffff88046fd036b0 ffffffff815c4096 ffff88046fd036f8
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973262]
> ffff88046fd036e8 ffffffff8103f633 ffff880018b7c4e0 ffff8804687df000
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973264]
> 0000000000000001 0000000000000003 ffffffffa0193320 ffff88046fd03748
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973266] Call Trace:
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973268]  <IRQ>
> [<ffffffff815c4096>] dump_stack+0x45/0x56
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973280]
> [<ffffffff8103f633>] warn_slowpath_common+0x73/0x90
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973286]
> [<ffffffff8103f697>] warn_slowpath_fmt+0x47/0x50
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973288]
> [<ffffffff812edadc>] ? ___ratelimit+0x7c/0xf0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973291]
> [<ffffffff815c5e19>] skb_warn_bad_offload+0xc8/0xd5
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973294]
> [<ffffffff814e57fe>] skb_checksum_help+0x16e/0x180
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973297]
> [<ffffffff814e9ecc>] dev_hard_start_xmit+0x42c/0x4b0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973299]
> [<ffffffff814ea154>] ? __dev_queue_xmit+0x204/0x440
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973301]
> [<ffffffff814ea232>] __dev_queue_xmit+0x2e2/0x440
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973302]
> [<ffffffff814ea39b>] ? dev_queue_xmit+0xb/0x10
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973304]
> [<ffffffff814ea39b>] dev_queue_xmit+0xb/0x10
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973308]
> [<ffffffffa012b758>] vlan_dev_hard_start_xmit+0x88/0x100 [8021q]
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973317]
> [<ffffffff814e9d9a>] dev_hard_start_xmit+0x2fa/0x4b0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973321]
> [<ffffffff814ea232>] __dev_queue_xmit+0x2e2/0x440
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973323]
> [<ffffffff814ea39b>] dev_queue_xmit+0xb/0x10
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973325]
> [<ffffffff814f1192>] neigh_connected_output+0xb2/0xf0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973327]
> [<ffffffff815192dc>] ip_finish_output+0x4ec/0x890
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973329]
> [<ffffffff8151ac03>] ip_output+0x53/0x90
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973331]
> [<ffffffff8151a39b>] ip_local_out_sk+0x2b/0x30
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973333]
> [<ffffffff8151a6fa>] ip_queue_xmit+0x13a/0x3c0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973335]
> [<ffffffff815309fa>] tcp_transmit_skb+0x42a/0x8f0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973337]
> [<ffffffff81530ffa>] tcp_write_xmit+0x13a/0xc00
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973347]
> [<ffffffff8152f033>] ? tcp_established_options+0x33/0xd0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973350]
> [<ffffffff81531d09>] __tcp_push_pending_frames+0x29/0xc0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973353]
> [<ffffffff8152da77>] tcp_rcv_established+0x1f7/0x5e0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973356]
> [<ffffffff81535fc5>] tcp_v4_do_rcv+0x215/0x4a0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973369]
> [<ffffffff810651f8>] ? ttwu_do_activate.constprop.64+0x58/0x60
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973374]
> [<ffffffff81295d31>] ? security_sock_rcv_skb+0x11/0x20
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973377]
> [<ffffffff815392ad>] tcp_v4_rcv+0x73d/0x7c0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973380]
> [<ffffffff8106fafc>] ? update_group_capacity+0x16c/0x270
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973386]
> [<ffffffff812e8300>] ? cpumask_next_and+0x30/0x50
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973388]
> [<ffffffff81514b50>] ip_local_deliver_finish+0x80/0x1c0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973390]
> [<ffffffff81515154>] ip_local_deliver+0x34/0x90
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973392]
> [<ffffffff81514d99>] ip_rcv_finish+0x109/0x350
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973399]
> [<ffffffff815153d2>] ip_rcv+0x222/0x370
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973403]
> [<ffffffff814e5eb6>] __netif_receive_skb_core+0x416/0x570
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973407]
> [<ffffffff814e73f3>] __netif_receive_skb+0x13/0x60
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973410]
> [<ffffffff814e745e>] netif_receive_skb_internal+0x1e/0x90
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973415]
> [<ffffffff814e7b40>] napi_gro_receive+0x70/0xa0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973422]
> [<ffffffffa0134f4c>] ixgbe_clean_rx_irq+0x75c/0xb20 [ixgbe]
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973427]
> [<ffffffffa0136172>] ixgbe_poll+0x522/0x850 [ixgbe]
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973430]
> [<ffffffff8105e986>] ? hrtimer_get_next_event+0xb6/0xc0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973437]
> [<ffffffff814e8dc1>] net_rx_action+0x101/0x1a0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973443]
> [<ffffffff810430ab>] __do_softirq+0xdb/0x240
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973447]
> [<ffffffff8104349e>] irq_exit+0xee/0x110
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973450]
> [<ffffffff81004913>] do_IRQ+0x53/0xf0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973453]
> [<ffffffff815caaaa>] common_interrupt+0x6a/0x6a
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973454]  <EOI>
> [<ffffffff814af007>] ? cpuidle_enter_state+0x47/0xc0
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973463]
> [<ffffffff814af132>] cpuidle_enter+0x12/0x20
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973468]
> [<ffffffff81075fbf>] cpu_startup_entry+0x24f/0x280
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973477]
> [<ffffffff810905e3>] ? clockevents_config_and_register+0x23/0x30
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973482]
> [<ffffffff810282be>] start_secondary+0x1be/0x270
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973486] ---[ end
> trace de552357488766e8 ]---
> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.974181] ------------[
> cut here ]------------
>
> --
> You are receiving this mail because:
> You are the assignee for the bug.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH V2 net-next] esp4: remove assignment in if condition
From: David Miller @ 2014-11-04 21:58 UTC (permalink / raw)
  To: fabf
  Cc: linux-kernel, joe, steffen.klassert, herbert, kuznet, jmorris,
	yoshfuji, kaber, netdev
In-Reply-To: <1415138076-7036-1-git-send-email-fabf@skynet.be>

From: Fabian Frederick <fabf@skynet.be>
Date: Tue,  4 Nov 2014 22:54:36 +0100

> Signed-off-by: Fabian Frederick <fabf@skynet.be>
> ---
> V2: remove blank line between set and test (suggested by Joe Perches).

Applied, thanks.

^ permalink raw reply

* [PATCH net 5/5] qlcnic: Implement ndo_gso_check()
From: Joe Stringer @ 2014-11-04 21:56 UTC (permalink / raw)
  To: netdev
  Cc: sathya.perla, jeffrey.t.kirsher, linux.nics, amirv, shahed.shaikh,
	Dept-GELinuxNICDev, therbert, linux-kernel
In-Reply-To: <1415138202-1197-1-git-send-email-joestringer@nicira.com>

ndo_gso_check() was recently introduced to allow NICs to report the
offloading support that they have on a per-skb basis. Add an
implementation for this driver which checks for something that looks
like VXLAN.

Implementation shamelessly stolen from Tom Herbert:
http://thread.gmane.org/gmane.linux.network/332428/focus=333111

Signed-off-by: Joe Stringer <joestringer@nicira.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index f5e29f7..6184f47 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -505,6 +505,17 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev,
 }
 #endif
 
+static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev)
+{
+	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	     skb->inner_protocol != htons(ETH_P_TEB) ||
+	     skb_inner_mac_header(skb) - skb_transport_header(skb) != 16))
+		return false;
+
+	return true;
+}
+
 static const struct net_device_ops qlcnic_netdev_ops = {
 	.ndo_open	   = qlcnic_open,
 	.ndo_stop	   = qlcnic_close,
@@ -537,6 +548,7 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 	.ndo_set_vf_vlan	= qlcnic_sriov_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= qlcnic_sriov_set_vf_spoofchk,
 #endif
+	.ndo_gso_check		= qlcnic_gso_check,
 };
 
 static const struct net_device_ops qlcnic_netdev_failed_ops = {
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net 4/5] net/mlx4_en: Implement ndo_gso_check()
From: Joe Stringer @ 2014-11-04 21:56 UTC (permalink / raw)
  To: netdev
  Cc: sathya.perla, jeffrey.t.kirsher, linux.nics, amirv, shahed.shaikh,
	Dept-GELinuxNICDev, therbert, linux-kernel
In-Reply-To: <1415138202-1197-1-git-send-email-joestringer@nicira.com>

ndo_gso_check() was recently introduced to allow NICs to report the
offloading support that they have on a per-skb basis. Add an
implementation for this driver which checks for something that looks
like VXLAN.

Implementation shamelessly stolen from Tom Herbert:
http://thread.gmane.org/gmane.linux.network/332428/focus=333111

Signed-off-by: Joe Stringer <joestringer@nicira.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index f3032fe..aca9908 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2344,6 +2344,17 @@ static void mlx4_en_del_vxlan_port(struct  net_device *dev,
 }
 #endif
 
+static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev)
+{
+	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	     skb->inner_protocol != htons(ETH_P_TEB) ||
+	     skb_inner_mac_header(skb) - skb_transport_header(skb) != 16))
+		return false;
+
+	return true;
+}
+
 static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_open		= mlx4_en_open,
 	.ndo_stop		= mlx4_en_close,
@@ -2374,6 +2385,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
 #endif
+	.ndo_gso_check		= mlx4_en_gso_check,
 };
 
 static const struct net_device_ops mlx4_netdev_ops_master = {
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net 3/5] fm10k: Implement ndo_gso_check()
From: Joe Stringer @ 2014-11-04 21:56 UTC (permalink / raw)
  To: netdev
  Cc: sathya.perla, jeffrey.t.kirsher, linux.nics, amirv, shahed.shaikh,
	Dept-GELinuxNICDev, therbert, linux-kernel
In-Reply-To: <1415138202-1197-1-git-send-email-joestringer@nicira.com>

ndo_gso_check() was recently introduced to allow NICs to report the
offloading support that they have on a per-skb basis. Add an
implementation for this driver which checks for something that looks
like VXLAN.

Implementation shamelessly stolen from Tom Herbert:
http://thread.gmane.org/gmane.linux.network/332428/focus=333111

Signed-off-by: Joe Stringer <joestringer@nicira.com>
---
Should this driver report support for GSO on packets with tunnel headers
up to 64B like the i40e driver does?
---
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 8811364..b9ef622 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1350,6 +1350,17 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
 	}
 }
 
+static bool fm10k_gso_check(struct sk_buff *skb, struct net_device *dev)
+{
+	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	     skb->inner_protocol != htons(ETH_P_TEB) ||
+	     skb_inner_mac_header(skb) - skb_transport_header(skb) != 16))
+		return false;
+
+	return true;
+}
+
 static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_open		= fm10k_open,
 	.ndo_stop		= fm10k_close,
@@ -1372,6 +1383,7 @@ static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_do_ioctl		= fm10k_ioctl,
 	.ndo_dfwd_add_station	= fm10k_dfwd_add_station,
 	.ndo_dfwd_del_station	= fm10k_dfwd_del_station,
+	.ndo_gso_check		= fm10k_gso_check,
 };
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net 2/5] i40e: Implement ndo_gso_check()
From: Joe Stringer @ 2014-11-04 21:56 UTC (permalink / raw)
  To: netdev
  Cc: sathya.perla, jeffrey.t.kirsher, linux.nics, amirv, shahed.shaikh,
	Dept-GELinuxNICDev, therbert, linux-kernel
In-Reply-To: <1415138202-1197-1-git-send-email-joestringer@nicira.com>

ndo_gso_check() was recently introduced to allow NICs to report the
offloading support that they have on a per-skb basis. Add an
implementation for this driver which checks for tunnel headers over UDP
of up to 64 octets in length.

Implementation shamelessly stolen from Tom Herbert:
http://thread.gmane.org/gmane.linux.network/332428/focus=333111

Signed-off-by: Joe Stringer <joestringer@nicira.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c |   14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c3a7f4a..21829b5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7444,9 +7444,20 @@ static int i40e_ndo_fdb_dump(struct sk_buff *skb,
 
 	return idx;
 }
-
 #endif /* USE_DEFAULT_FDB_DEL_DUMP */
 #endif /* HAVE_FDB_OPS */
+
+static bool i40e_gso_check(struct sk_buff *skb, struct net_device *dev)
+{
+	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	     skb->inner_protocol != htons(ETH_P_TEB) ||
+	     skb_inner_mac_header(skb) - skb_transport_header(skb) > 64))
+		return false;
+
+	return true;
+}
+
 static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_open		= i40e_open,
 	.ndo_stop		= i40e_close,
@@ -7487,6 +7498,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_fdb_dump		= i40e_ndo_fdb_dump,
 #endif
 #endif
+	.ndo_gso_check		= i40e_gso_check,
 };
 
 /**
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net 0/5] Implement ndo_gso_check() for vxlan nics
From: Joe Stringer @ 2014-11-04 21:56 UTC (permalink / raw)
  To: netdev
  Cc: sathya.perla, jeffrey.t.kirsher, linux.nics, amirv, shahed.shaikh,
	Dept-GELinuxNICDev, therbert, linux-kernel

Most NICs that report NETIF_F_GSO_UDP_TUNNEL support VXLAN, and not other
UDP-based encapsulation protocols where the format and size of the header may
differ. This patch series implements ndo_gso_check() for these NICs,
restricting the GSO handling to something that looks and smells like VXLAN.

Implementation shamelessly stolen from Tom Herbert (with minor fixups):
http://thread.gmane.org/gmane.linux.network/332428/focus=333111

If there are particular differences for your driver on actual support, I'd like
to hear about it. I adjusted the i40e driver to report support with tunnel
headers of up to 64 octets, perhaps there are other specifics that I've missed.

Joe Stringer (5):
  be2net: Implement ndo_gso_check()
  i40e: Implement ndo_gso_check()
  fm10k: Implement ndo_gso_check()
  net/mlx4_en: Implement ndo_gso_check()
  qlcnic: Implement ndo_gso_check()

 drivers/net/ethernet/emulex/benet/be_main.c      |   12 ++++++++++++
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c  |   12 ++++++++++++
 drivers/net/ethernet/intel/i40e/i40e_main.c      |   14 +++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |   12 ++++++++++++
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   12 ++++++++++++
 5 files changed, 61 insertions(+), 1 deletion(-)

--
1.7.10.4

^ permalink raw reply

* [PATCH V2 net-next] esp4: remove assignment in if condition
From: Fabian Frederick @ 2014-11-04 21:54 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joe Perches, Fabian Frederick, Steffen Klassert, Herbert Xu,
	David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
V2: remove blank line between set and test (suggested by Joe Perches).

 net/ipv4/esp4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 360b565..d2bf02e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -392,8 +392,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (elen <= 0)
 		goto out;
 
-	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+	err = skb_cow_data(skb, 0, &trailer);
+	if (err < 0)
 		goto out;
+
 	nfrags = err;
 
 	assoclen = sizeof(*esph);
-- 
1.9.3

^ permalink raw reply related

* Re: [Xen-devel] [PATCHv1 net-next] xen-netback: remove unconditional pull_skb_tail in guest Tx path
From: Eric Dumazet @ 2014-11-04 21:43 UTC (permalink / raw)
  To: David Miller
  Cc: zoltan.kiss, david.vrabel, Ian.Campbell, netdev, malcolm.crossley,
	wei.liu2, xen-devel
In-Reply-To: <20141104.161704.1690311989900127361.davem@davemloft.net>

On Tue, 2014-11-04 at 16:17 -0500, David Miller wrote:


> 
> Every protocol demux starts with pskb_may_pull() to pull frag data
> into the linear area, if necessary, before looking at headers.

eth_get_headlen() might be relevant as well, to perform a single copy of
exactly all headers.

This is known to help a bit.

^ permalink raw reply

* Re: [PATCHv1 net-next] xen-netback: remove unconditional pull_skb_tail in guest Tx path
From: David Miller @ 2014-11-04 21:41 UTC (permalink / raw)
  To: david.vrabel; +Cc: netdev, xen-devel, ian.campbell, wei.liu2, malcolm.crossley
In-Reply-To: <1415035431-27485-1-git-send-email-david.vrabel@citrix.com>

From: David Vrabel <david.vrabel@citrix.com>
Date: Mon, 3 Nov 2014 17:23:51 +0000

> From: Malcolm Crossley <malcolm.crossley@citrix.com>
> 
> Unconditionally pulling 128 bytes into the linear buffer is not
> required. Netback has already grant copied up-to 128 bytes from the
> first slot of a packet into the linear buffer. The first slot normally
> contain all the IPv4/IPv6 and TCP/UDP headers.
> 
> The unconditional pull would often copy frag data unnecessarily.  This
> is a performance problem when running on a version of Xen where grant
> unmap avoids TLB flushes for pages which are not accessed.  TLB
> flushes can now be avoided for > 99% of unmaps (it was 0% before).
> 
> Grant unmap TLB flush avoidance will be available in a future version
> of Xen (probably 4.6).
> 
> Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>

Now that this has been discussed a bit, it is possible to get an ack or two?

Thanks.

^ permalink raw reply

* [PATCH v3 1/1] ip-link: in human readable output use dynamic precision length
From: Christian Hesse @ 2014-11-04 21:17 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Christian Hesse
In-Reply-To: <20141104221038.440fd9d7@leda.localdomain>

---
 ip/ipaddress.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index e240bb5..db39437 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -324,6 +324,8 @@ static void print_num(FILE *fp, unsigned width, uint64_t count)
 	const char *prefix = "kMGTPE";
 	const unsigned int base = use_iec ? 1024 : 1000;
 	uint64_t powi = 1;
+	uint16_t powj = 1;
+	uint8_t precision = 2;
 	char buf[64];
 
 	if (!human_readable || count < base) {
@@ -343,8 +345,15 @@ static void print_num(FILE *fp, unsigned width, uint64_t count)
 		++prefix;
 	}
 
-	snprintf(buf, sizeof(buf), "%.1f%c%s", (double) count / powi, 
-		 *prefix, use_iec ? "i" : "");
+	/* try to guess a good number of digits for precision */
+	for (; precision > 0; precision--) {
+		powj *= 10;
+		if (count / powi < powj)
+			break;
+	}
+
+	snprintf(buf, sizeof(buf), "%.*f%c%s", precision,
+		(double) count / powi, *prefix, use_iec ? "i" : "");
 
 	fprintf(fp, "%-*s ", width, buf);
 }
-- 
2.1.3

^ permalink raw reply related

* Re: [Xen-devel] [PATCHv1 net-next] xen-netback: remove unconditional pull_skb_tail in guest Tx path
From: David Miller @ 2014-11-04 21:17 UTC (permalink / raw)
  To: zoltan.kiss
  Cc: david.vrabel, Ian.Campbell, netdev, malcolm.crossley, wei.liu2,
	xen-devel
In-Reply-To: <5457C807.5080509@linaro.org>

From: Zoltan Kiss <zoltan.kiss@linaro.org>
Date: Mon, 03 Nov 2014 18:23:03 +0000

> 
> 
> On 03/11/14 17:46, David Vrabel wrote:
>> On 03/11/14 17:39, Ian Campbell wrote:
>>> On Mon, 2014-11-03 at 17:23 +0000, David Vrabel wrote:
>>>> From: Malcolm Crossley <malcolm.crossley@citrix.com>
>>>>
>>>> Unconditionally pulling 128 bytes into the linear buffer is not
>>>> required. Netback has already grant copied up-to 128 bytes from the
>>>> first slot of a packet into the linear buffer. The first slot normally
>>>> contain all the IPv4/IPv6 and TCP/UDP headers.
>>>
>>> What about when it doesn't? It sounds as if we now won't pull up,
>>> which
>>> would be bad.
>>
>> The network stack will always pull any headers it needs to inspect
>> (the
>> frag may be a userspace page which has the same security issues as a
>> frag with a foreign page).
> I wouldn't bet my life on this, but indeed it should always happen.

I would bet my life on it.

Every protocol demux starts with pskb_may_pull() to pull frag data
into the linear area, if necessary, before looking at headers.

^ permalink raw reply

* Re: [PATCH -next v4 0/3] net: allow setting ecn via routing table
From: David Miller @ 2014-11-04 21:14 UTC (permalink / raw)
  To: fw; +Cc: netdev
In-Reply-To: <1415032503-4936-1-git-send-email-fw@strlen.de>

From: Florian Westphal <fw@strlen.de>
Date: Mon,  3 Nov 2014 17:35:00 +0100

> Here is v4 of the patchset, its exactly the same as v3 except in patch3/3
> where I added the missing 'const' qualifier to a function argument that
> Eric spotted during review.
> 
> I preserved Erics Acks so that he doesn't have to resend them.

Looks great, series applied, thanks Florian.

I particularly like how the syncookie code was handled.

^ permalink raw reply

* Re: [PATCH v2 1/1] ip-link: in human readable output use dynamic precision length
From: Christian Hesse @ 2014-11-04 21:10 UTC (permalink / raw)
  To: David Laight; +Cc: Stephen Hemminger, netdev@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6D1C9E72A7@AcuExch.aculab.com>

[-- Attachment #1: Type: text/plain, Size: 1484 bytes --]

David Laight <David.Laight@ACULAB.COM> on Tue, 2014/11/04 11:06:
> From: Christian Hesse
> ...
> > ...
> > @@ -343,8 +344,11 @@ static void print_num(FILE *fp, unsigned width,
> > uint64_t count) ++prefix;
> >  	}
> > 
> > -	snprintf(buf, sizeof(buf), "%.1f%c%s", (double) count / powi,
> > -		 *prefix, use_iec ? "i" : "");
> > +	if ((precision = 3 - snprintf(NULL, 0, "%"PRIu64, count / powi))
> > < 0)
> 
> Don't put assignments in conditionals.

Ok. :D

I do not like this at all... snprintf() would be nice for a catch-all, but we
have to take care of negative values. So let's try something different.
I will think about it and send a new patch.

> > +		precision = 0;
> > +
> > +	snprintf(buf, sizeof(buf), "%.*f%c%s", precision,
> > +		(double) count / powi, *prefix, use_iec ? "i" : "");
> > 
> >  	fprintf(fp, "%-*s ", width, buf);
> >  }
> 
> The above will go wrong in all sorts of horrid ways....
> For instance you are doing a truncating integer divide, but the FP
> value will get rounded for display.
> 
> It would be safer to use integers throughout.

My implementation used integers, but Stephen changes this to floating point
with his cleanups.

IMHO the rounding is ok. This is for *human* readability. ;)
Whoever wants correct values should not ask ip to print human readable values
but rely on pure numbers.

> Oh, and a 2Mbit E1 link is actually 2048000 :-)

Sorry? Did not get the point.
-- 
Best regards,
Chris

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Convert net_msg_warn, NETDEBUG, & LIMIT_NETDEBUG?
From: Joe Perches @ 2014-11-04 20:46 UTC (permalink / raw)
  To: netdev

net_msg_warn is a sysctl used to control the printk
of a bundle of mostly ipv4/ipv6 logging messages.

Does anyone use it?

NETDEBUG is used 4 times, 2 of which seem senseless
as they are allocation failures messages after an
alloc_skb.  These already get stack dumps.

The other NETDEBUG uses are ESP crypto descriptions.

LIMIT_NETDEBUG is used a lot more.

include/net/sock.h:#define LIMIT_NETDEBUG(fmt, args...) \
include/net/sock.h-     do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)

Most of the LIMIT_NETDEBUG uses are emitted at KERN_DEBUG.

Here is the count of each type of use:
     31 KERN_DEBUG
      2 KERN_ERR
      3 KERN_INFO
     11 KERN_WARNING

Should those KERN_DEBUG uses be converted to
net_dbg_ratelimited so that these uses could be
controlled via dynamic_debug instead of the
net_msg_warn sysctl?

net/dccp/ uses LIMIT_NETDEBUG via DCCP_WARN to
control another 38 KERN_WARNING messages.

The others LIMIT_NETDEBUG uses could be converted
to net_<level>_ratelimited if appropriate.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox