Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v1 06/12] amd-xgbe: Add support for per DMA channel interrupts
From: Tom Lendacky @ 2014-11-04 22:06 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

This patch provides support for interrupts that are generated by the
Tx/Rx DMA channel pairs of the device.  This allows for Tx and Rx
processing to run across multiple processsors.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 Documentation/devicetree/bindings/net/amd-xgbe.txt |   12 +
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c           |   12 +
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c           |  226 ++++++++++++++++----
 drivers/net/ethernet/amd/xgbe/xgbe-main.c          |   10 +
 drivers/net/ethernet/amd/xgbe/xgbe.h               |   10 +
 5 files changed, 219 insertions(+), 51 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt
index 41354f7..26efd52 100644
--- a/Documentation/devicetree/bindings/net/amd-xgbe.txt
+++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt
@@ -7,7 +7,10 @@ Required properties:
    - PCS registers
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
-- interrupts: Should contain the amd-xgbe interrupt
+- interrupts: Should contain the amd-xgbe interrupt(s). The first interrupt
+  listed is required and is the general device interrupt. If the optional
+  amd,per-channel-interrupt property is specified, then one additional
+  interrupt for each DMA channel supported by the device should be specified
 - clocks:
    - DMA clock for the amd-xgbe device (used for calculating the
      correct Rx interrupt watchdog timer value on a DMA channel
@@ -23,6 +26,9 @@ Optional properties:
 - mac-address: mac address to be assigned to the device. Can be overridden
   by UEFI.
 - dma-coherent: Present if dma operations are coherent
+- amd,per-channel-interrupt: Indicates that Rx and Tx complete will generate
+  a unique interrupt for each DMA channel - this requires an additional
+  interrupt be configured for each DMA channel
 
 Example:
 	xgbe@e0700000 {
@@ -30,7 +36,9 @@ Example:
 		reg = <0 0xe0700000 0 0x80000>,
 		      <0 0xe0780000 0 0x80000>;
 		interrupt-parent = <&gic>;
-		interrupts = <0 325 4>;
+		interrupts = <0 325 4>,
+			     <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>;
+		amd,per-channel-interrupt;
 		clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>;
 		clock-names = "dma_clk", "ptp_clk";
 		phy-handle = <&phy>;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index b3719f1..ac3d319 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -481,17 +481,21 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 
 		if (channel->tx_ring) {
 			/* Enable the following Tx interrupts
-			 *   TIE  - Transmit Interrupt Enable (unless polling)
+			 *   TIE  - Transmit Interrupt Enable (unless using
+			 *          per channel interrupts)
 			 */
-			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
+			if (!pdata->per_channel_irq)
+				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
 		}
 		if (channel->rx_ring) {
 			/* Enable following Rx interrupts
 			 *   RBUE - Receive Buffer Unavailable Enable
-			 *   RIE  - Receive Interrupt Enable
+			 *   RIE  - Receive Interrupt Enable (unless using
+			 *          per channel interrupts)
 			 */
 			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
-			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
+			if (!pdata->per_channel_irq)
+				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
 		}
 
 		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 07e2d21..c3533e1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -114,6 +114,7 @@
  *     THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
@@ -126,7 +127,8 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
-static int xgbe_poll(struct napi_struct *, int);
+static int xgbe_one_poll(struct napi_struct *, int);
+static int xgbe_all_poll(struct napi_struct *, int);
 static void xgbe_set_rx_mode(struct net_device *);
 
 static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
@@ -134,6 +136,7 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 	struct xgbe_channel *channel_mem, *channel;
 	struct xgbe_ring *tx_ring, *rx_ring;
 	unsigned int count, i;
+	int ret = -ENOMEM;
 
 	count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
 
@@ -158,6 +161,19 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
 				    (DMA_CH_INC * i);
 
+		if (pdata->per_channel_irq) {
+			/* Get the DMA interrupt (offset 1) */
+			ret = platform_get_irq(pdata->pdev, i + 1);
+			if (ret < 0) {
+				netdev_err(pdata->netdev,
+					   "platform_get_irq %u failed\n",
+					   i + 1);
+				goto err_irq;
+			}
+
+			channel->dma_irq = ret;
+		}
+
 		if (i < pdata->tx_ring_count) {
 			spin_lock_init(&tx_ring->lock);
 			channel->tx_ring = tx_ring++;
@@ -168,9 +184,9 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 			channel->rx_ring = rx_ring++;
 		}
 
-		DBGPR("  %s - queue_index=%u, dma_regs=%p, tx=%p, rx=%p\n",
+		DBGPR("  %s: queue=%u, dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n",
 		      channel->name, channel->queue_index, channel->dma_regs,
-		      channel->tx_ring, channel->rx_ring);
+		      channel->dma_irq, channel->tx_ring, channel->rx_ring);
 	}
 
 	pdata->channel = channel_mem;
@@ -178,6 +194,9 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 
 	return 0;
 
+err_irq:
+	kfree(rx_ring);
+
 err_rx_ring:
 	kfree(tx_ring);
 
@@ -185,9 +204,7 @@ err_tx_ring:
 	kfree(channel_mem);
 
 err_channel:
-	netdev_err(pdata->netdev, "channel allocation failed\n");
-
-	return -ENOMEM;
+	return ret;
 }
 
 static void xgbe_free_channels(struct xgbe_prv_data *pdata)
@@ -287,11 +304,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 	if (!dma_isr)
 		goto isr_done;
 
-	DBGPR("-->xgbe_isr\n");
-
 	DBGPR("  DMA_ISR = %08x\n", dma_isr);
-	DBGPR("  DMA_DS0 = %08x\n", XGMAC_IOREAD(pdata, DMA_DSR0));
-	DBGPR("  DMA_DS1 = %08x\n", XGMAC_IOREAD(pdata, DMA_DSR1));
 
 	for (i = 0; i < pdata->channel_count; i++) {
 		if (!(dma_isr & (1 << i)))
@@ -302,6 +315,10 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
 		DBGPR("  DMA_CH%u_ISR = %08x\n", i, dma_ch_isr);
 
+		/* If we get a TI or RI interrupt that means per channel DMA
+		 * interrupts are not enabled, so we use the private data napi
+		 * structure, not the per channel napi structure
+		 */
 		if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, TI) ||
 		    XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RI)) {
 			if (napi_schedule_prep(&pdata->napi)) {
@@ -344,12 +361,28 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 
 	DBGPR("  DMA_ISR = %08x\n", XGMAC_IOREAD(pdata, DMA_ISR));
 
-	DBGPR("<--xgbe_isr\n");
-
 isr_done:
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t xgbe_dma_isr(int irq, void *data)
+{
+	struct xgbe_channel *channel = data;
+
+	/* Per channel DMA interrupts are enabled, so we use the per
+	 * channel napi structure and not the private data napi structure
+	 */
+	if (napi_schedule_prep(&channel->napi)) {
+		/* Disable Tx and Rx interrupts */
+		disable_irq(channel->dma_irq);
+
+		/* Turn on polling */
+		__napi_schedule(&channel->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
 static enum hrtimer_restart xgbe_tx_timer(struct hrtimer *timer)
 {
 	struct xgbe_channel *channel = container_of(timer,
@@ -357,18 +390,24 @@ static enum hrtimer_restart xgbe_tx_timer(struct hrtimer *timer)
 						    tx_timer);
 	struct xgbe_ring *ring = channel->tx_ring;
 	struct xgbe_prv_data *pdata = channel->pdata;
+	struct napi_struct *napi;
 	unsigned long flags;
 
 	DBGPR("-->xgbe_tx_timer\n");
 
+	napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+
 	spin_lock_irqsave(&ring->lock, flags);
 
-	if (napi_schedule_prep(&pdata->napi)) {
+	if (napi_schedule_prep(napi)) {
 		/* Disable Tx and Rx interrupts */
-		xgbe_disable_rx_tx_ints(pdata);
+		if (pdata->per_channel_irq)
+			disable_irq(channel->dma_irq);
+		else
+			xgbe_disable_rx_tx_ints(pdata);
 
 		/* Turn on polling */
-		__napi_schedule(&pdata->napi);
+		__napi_schedule(napi);
 	}
 
 	channel->tx_timer_active = 0;
@@ -504,18 +543,46 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
 
 static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
 {
-	if (add)
-		netif_napi_add(pdata->netdev, &pdata->napi, xgbe_poll,
-			       NAPI_POLL_WEIGHT);
-	napi_enable(&pdata->napi);
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++) {
+			if (add)
+				netif_napi_add(pdata->netdev, &channel->napi,
+					       xgbe_one_poll, NAPI_POLL_WEIGHT);
+
+			napi_enable(&channel->napi);
+		}
+	} else {
+		if (add)
+			netif_napi_add(pdata->netdev, &pdata->napi,
+				       xgbe_all_poll, NAPI_POLL_WEIGHT);
+
+		napi_enable(&pdata->napi);
+	}
 }
 
 static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del)
 {
-	napi_disable(&pdata->napi);
+	struct xgbe_channel *channel;
+	unsigned int i;
+
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++) {
+			napi_disable(&channel->napi);
 
-	if (del)
-		netif_napi_del(&pdata->napi);
+			if (del)
+				netif_napi_del(&channel->napi);
+		}
+	} else {
+		napi_disable(&pdata->napi);
+
+		if (del)
+			netif_napi_del(&pdata->napi);
+	}
 }
 
 void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata)
@@ -828,7 +895,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
 static void xgbe_restart_dev(struct xgbe_prv_data *pdata, unsigned int reset)
 {
+	struct xgbe_channel *channel;
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int i;
 
 	DBGPR("-->xgbe_restart_dev\n");
 
@@ -837,7 +906,12 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata, unsigned int reset)
 		return;
 
 	xgbe_stop(pdata);
-	synchronize_irq(pdata->irq_number);
+	synchronize_irq(pdata->dev_irq);
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++)
+			synchronize_irq(channel->dma_irq);
+	}
 
 	xgbe_free_tx_data(pdata);
 	xgbe_free_rx_data(pdata);
@@ -1165,6 +1239,9 @@ static int xgbe_open(struct net_device *netdev)
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel = NULL;
+	char dma_irq_name[IFNAMSIZ + 32];
+	unsigned int i = 0;
 	int ret;
 
 	DBGPR("-->xgbe_open\n");
@@ -1208,14 +1285,32 @@ static int xgbe_open(struct net_device *netdev)
 	INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
 
 	/* Request interrupts */
-	ret = devm_request_irq(pdata->dev, netdev->irq, xgbe_isr, 0,
+	ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
 			       netdev->name, pdata);
 	if (ret) {
 		netdev_alert(netdev, "error requesting irq %d\n",
-			     pdata->irq_number);
+			     pdata->dev_irq);
 		goto err_rings;
 	}
-	pdata->irq_number = netdev->irq;
+
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++) {
+			snprintf(dma_irq_name, sizeof(dma_irq_name) - 1,
+				 "%s-TxRx-%u", netdev_name(netdev),
+				 channel->queue_index);
+
+			ret = devm_request_irq(pdata->dev, channel->dma_irq,
+					       xgbe_dma_isr, 0, dma_irq_name,
+					       channel);
+			if (ret) {
+				netdev_alert(netdev,
+					     "error requesting irq %d\n",
+					     channel->dma_irq);
+				goto err_irq;
+			}
+		}
+	}
 
 	ret = xgbe_start(pdata);
 	if (ret)
@@ -1228,8 +1323,14 @@ static int xgbe_open(struct net_device *netdev)
 err_start:
 	hw_if->exit(pdata);
 
-	devm_free_irq(pdata->dev, pdata->irq_number, pdata);
-	pdata->irq_number = 0;
+err_irq:
+	if (pdata->per_channel_irq) {
+		/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
+		for (i--, channel--; i < pdata->channel_count; i--, channel--)
+			devm_free_irq(pdata->dev, channel->dma_irq, channel);
+	}
+
+	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
 err_rings:
 	desc_if->free_ring_resources(pdata);
@@ -1254,6 +1355,8 @@ static int xgbe_close(struct net_device *netdev)
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_desc_if *desc_if = &pdata->desc_if;
+	struct xgbe_channel *channel;
+	unsigned int i;
 
 	DBGPR("-->xgbe_close\n");
 
@@ -1269,10 +1372,12 @@ static int xgbe_close(struct net_device *netdev)
 	/* Free the channel and ring structures */
 	xgbe_free_channels(pdata);
 
-	/* Release the interrupt */
-	if (pdata->irq_number != 0) {
-		devm_free_irq(pdata->dev, pdata->irq_number, pdata);
-		pdata->irq_number = 0;
+	/* Release the interrupts */
+	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++)
+			devm_free_irq(pdata->dev, channel->dma_irq, channel);
 	}
 
 	/* Disable the clocks */
@@ -1505,14 +1610,20 @@ static int xgbe_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
 static void xgbe_poll_controller(struct net_device *netdev)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_channel *channel;
+	unsigned int i;
 
 	DBGPR("-->xgbe_poll_controller\n");
 
-	disable_irq(pdata->irq_number);
-
-	xgbe_isr(pdata->irq_number, pdata);
-
-	enable_irq(pdata->irq_number);
+	if (pdata->per_channel_irq) {
+		channel = pdata->channel;
+		for (i = 0; i < pdata->channel_count; i++, channel++)
+			xgbe_dma_isr(channel->dma_irq, channel);
+	} else {
+		disable_irq(pdata->dev_irq);
+		xgbe_isr(pdata->dev_irq, pdata);
+		enable_irq(pdata->dev_irq);
+	}
 
 	DBGPR("<--xgbe_poll_controller\n");
 }
@@ -1704,6 +1815,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	struct xgbe_ring_data *rdata;
 	struct xgbe_packet_data *packet;
 	struct net_device *netdev = pdata->netdev;
+	struct napi_struct *napi;
 	struct sk_buff *skb;
 	struct skb_shared_hwtstamps *hwtstamps;
 	unsigned int incomplete, error, context_next, context;
@@ -1717,6 +1829,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	if (!ring)
 		return 0;
 
+	napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
+
 	rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
 	packet = &ring->packet_data;
 	while (packet_count < budget) {
@@ -1849,10 +1963,10 @@ read_again:
 		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 		skb_record_rx_queue(skb, channel->queue_index);
-		skb_mark_napi_id(skb, &pdata->napi);
+		skb_mark_napi_id(skb, napi);
 
 		netdev->last_rx = jiffies;
-		napi_gro_receive(&pdata->napi, skb);
+		napi_gro_receive(napi, skb);
 
 next_packet:
 		packet_count++;
@@ -1874,7 +1988,35 @@ next_packet:
 	return packet_count;
 }
 
-static int xgbe_poll(struct napi_struct *napi, int budget)
+static int xgbe_one_poll(struct napi_struct *napi, int budget)
+{
+	struct xgbe_channel *channel = container_of(napi, struct xgbe_channel,
+						    napi);
+	int processed = 0;
+
+	DBGPR("-->xgbe_one_poll: budget=%d\n", budget);
+
+	/* Cleanup Tx ring first */
+	xgbe_tx_poll(channel);
+
+	/* Process Rx ring next */
+	processed = xgbe_rx_poll(channel, budget);
+
+	/* If we processed everything, we are done */
+	if (processed < budget) {
+		/* Turn off polling */
+		napi_complete(napi);
+
+		/* Enable Tx and Rx interrupts */
+		enable_irq(channel->dma_irq);
+	}
+
+	DBGPR("<--xgbe_one_poll: received = %d\n", processed);
+
+	return processed;
+}
+
+static int xgbe_all_poll(struct napi_struct *napi, int budget)
 {
 	struct xgbe_prv_data *pdata = container_of(napi, struct xgbe_prv_data,
 						   napi);
@@ -1883,7 +2025,7 @@ static int xgbe_poll(struct napi_struct *napi, int budget)
 	int processed, last_processed;
 	unsigned int i;
 
-	DBGPR("-->xgbe_poll: budget=%d\n", budget);
+	DBGPR("-->xgbe_all_poll: budget=%d\n", budget);
 
 	processed = 0;
 	ring_budget = budget / pdata->rx_ring_count;
@@ -1911,7 +2053,7 @@ static int xgbe_poll(struct napi_struct *napi, int budget)
 		xgbe_enable_rx_tx_ints(pdata);
 	}
 
-	DBGPR("<--xgbe_poll: received = %d\n", processed);
+	DBGPR("<--xgbe_all_poll: received = %d\n", processed);
 
 	return processed;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index e5077fd..cff9902 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -264,12 +264,18 @@ static int xgbe_probe(struct platform_device *pdev)
 		pdata->awcache = XGBE_DMA_SYS_AWCACHE;
 	}
 
+	/* Check for per channel interrupt support */
+	if (of_property_read_bool(dev->of_node, XGBE_DMA_IRQS))
+		pdata->per_channel_irq = 1;
+
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0) {
-		dev_err(dev, "platform_get_irq failed\n");
+		dev_err(dev, "platform_get_irq 0 failed\n");
 		goto err_io;
 	}
-	netdev->irq = ret;
+	pdata->dev_irq = ret;
+
+	netdev->irq = pdata->dev_irq;
 	netdev->base_addr = (unsigned long)pdata->xgmac_regs;
 
 	/* Set all the function pointers */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 1480c9d..55c935f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -173,6 +173,7 @@
 /* Device-tree clock names */
 #define XGBE_DMA_CLOCK		"dma_clk"
 #define XGBE_PTP_CLOCK		"ptp_clk"
+#define XGBE_DMA_IRQS		"amd,per-channel-interrupt"
 
 /* Timestamp support - values based on 50MHz PTP clock
  *   50MHz => 20 nsec
@@ -359,6 +360,12 @@ struct xgbe_channel {
 	unsigned int queue_index;
 	void __iomem *dma_regs;
 
+	/* Per channel interrupt irq number */
+	int dma_irq;
+
+	/* Netdev related settings */
+	struct napi_struct napi;
+
 	unsigned int saved_ier;
 
 	unsigned int tx_timer_active;
@@ -609,7 +616,8 @@ struct xgbe_prv_data {
 	/* XPCS indirect addressing mutex */
 	struct mutex xpcs_mutex;
 
-	int irq_number;
+	int dev_irq;
+	unsigned int per_channel_irq;
 
 	struct xgbe_hw_if hw_if;
 	struct xgbe_desc_if desc_if;

^ permalink raw reply related

* [PATCH net-next v1 08/12] amd-xgbe: Add receive side scaling ethtool support
From: Tom Lendacky @ 2014-11-04 22:07 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

This patch adds support for ethtool receive side scaling (RSS) commands.
Support is added to get/set the RSS hash key and the RSS lookup table.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c     |   20 +++++++
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c |   74 ++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe.h         |    2 +
 3 files changed, 96 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 551794c..7daa2cd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -419,6 +419,24 @@ static int xgbe_write_rss_lookup_table(struct xgbe_prv_data *pdata)
 	return 0;
 }
 
+static int xgbe_set_rss_hash_key(struct xgbe_prv_data *pdata, const u8 *key)
+{
+	memcpy(pdata->rss_key, key, sizeof(pdata->rss_key));
+
+	return xgbe_write_rss_hash_key(pdata);
+}
+
+static int xgbe_set_rss_lookup_table(struct xgbe_prv_data *pdata,
+				     const u32 *table)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++)
+		XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH, table[i]);
+
+	return xgbe_write_rss_lookup_table(pdata);
+}
+
 static int xgbe_enable_rss(struct xgbe_prv_data *pdata)
 {
 	int ret;
@@ -2759,6 +2777,8 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	/* For Receive Side Scaling */
 	hw_if->enable_rss = xgbe_enable_rss;
 	hw_if->disable_rss = xgbe_disable_rss;
+	hw_if->set_rss_hash_key = xgbe_set_rss_hash_key;
+	hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table;
 
 	DBGPR("<--xgbe_init_function_ptrs\n");
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 47022fb..95d4453 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -481,6 +481,75 @@ static int xgbe_set_coalesce(struct net_device *netdev,
 	return 0;
 }
 
+static int xgbe_get_rxnfc(struct net_device *netdev,
+			  struct ethtool_rxnfc *rxnfc, u32 *rule_locs)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_GRXRINGS:
+		rxnfc->data = pdata->rx_ring_count;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static u32 xgbe_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	return sizeof(pdata->rss_key);
+}
+
+static u32 xgbe_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+
+	return ARRAY_SIZE(pdata->rss_table);
+}
+
+static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	unsigned int i;
+
+	if (indir) {
+		for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++)
+			indir[i] = XGMAC_GET_BITS(pdata->rss_table[i],
+						  MAC_RSSDR, DMCH);
+	}
+
+	if (key)
+		memcpy(key, pdata->rss_key, sizeof(pdata->rss_key));
+
+	return 0;
+}
+
+static int xgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
+			 const u8 *key)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	unsigned int ret;
+
+	if (indir) {
+		ret = hw_if->set_rss_lookup_table(pdata, indir);
+		if (ret)
+			return ret;
+	}
+
+	if (key) {
+		ret = hw_if->set_rss_hash_key(pdata, key);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int xgbe_get_ts_info(struct net_device *netdev,
 			    struct ethtool_ts_info *ts_info)
 {
@@ -526,6 +595,11 @@ static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_strings = xgbe_get_strings,
 	.get_ethtool_stats = xgbe_get_ethtool_stats,
 	.get_sset_count = xgbe_get_sset_count,
+	.get_rxnfc = xgbe_get_rxnfc,
+	.get_rxfh_key_size = xgbe_get_rxfh_key_size,
+	.get_rxfh_indir_size = xgbe_get_rxfh_indir_size,
+	.get_rxfh = xgbe_get_rxfh,
+	.set_rxfh = xgbe_set_rxfh,
 	.get_ts_info = xgbe_get_ts_info,
 };
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 2ac4f17..aa8da9f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -557,6 +557,8 @@ struct xgbe_hw_if {
 	/* For Receive Side Scaling */
 	int (*enable_rss)(struct xgbe_prv_data *);
 	int (*disable_rss)(struct xgbe_prv_data *);
+	int (*set_rss_hash_key)(struct xgbe_prv_data *, const u8 *);
+	int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *);
 };
 
 struct xgbe_desc_if {

^ permalink raw reply related

* [PATCH  net-next v1  09/12] amd-xgbe: Fix a spelling error
From: Tom Lendacky @ 2014-11-04 22:07 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

This patch fixes the spelling of the word "descriptor" in a couple
of locations.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 6c5a707..ced9f52 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1185,13 +1185,13 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 	packet->rdesc_count = 0;
 
 	if (xgbe_is_tso(skb)) {
-		/* TSO requires an extra desriptor if mss is different */
+		/* TSO requires an extra descriptor if mss is different */
 		if (skb_shinfo(skb)->gso_size != ring->tx.cur_mss) {
 			context_desc = 1;
 			packet->rdesc_count++;
 		}
 
-		/* TSO requires an extra desriptor for TSO header */
+		/* TSO requires an extra descriptor for TSO header */
 		packet->rdesc_count++;
 
 		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,

^ permalink raw reply related

* [PATCH net-next] ipv6: trivial, add bracket for the if block
From: Florent Fourcot @ 2014-11-04 22:01 UTC (permalink / raw)
  To: netdev; +Cc: Florent Fourcot

The "else" block is on several lines and use bracket.

Signed-off-by: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
---
 net/ipv6/ip6_flowlabel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dd7d4e..c143437 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -769,10 +769,10 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
 static int ip6fl_seq_show(struct seq_file *seq, void *v)
 {
 	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
-	if (v == SEQ_START_TOKEN)
+	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
 			   "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
-	else {
+	} else {
 		struct ip6_flowlabel *fl = v;
 		seq_printf(seq,
 			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
-- 
2.1.1

^ permalink raw reply related

* Re: [PATCH v2 0/3] drivers: net: xgene: Fix crash for backward compatibility
From: David Miller @ 2014-11-04 22:09 UTC (permalink / raw)
  To: isubramanian; +Cc: netdev, devicetree, linux-arm-kernel, patches, kchudgar
In-Reply-To: <1415044796-5081-1-git-send-email-isubramanian@apm.com>

From: Iyappan Subramanian <isubramanian@apm.com>
Date: Mon,  3 Nov 2014 11:59:53 -0800

> This patch set fixes the following issues that were reported during regression.
> 
> Patch 1,2 : Adds backward compatibility with the older firmware (<= 1.13.28).
> Patch 3   : Use separate hardware resources (descriptor ring, prefetch buffer)
> 	   that are not shared with the firmware

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next] ipv6: trivial, add bracket for the if block
From: David Miller @ 2014-11-04 22:11 UTC (permalink / raw)
  To: florent.fourcot; +Cc: netdev
In-Reply-To: <1415138460-24971-1-git-send-email-florent.fourcot@enst-bretagne.fr>

From: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Date: Tue,  4 Nov 2014 23:01:00 +0100

> The "else" block is on several lines and use bracket.
> 
> Signed-off-by: Florent Fourcot <florent.fourcot@enst-bretagne.fr>

Applied.

^ permalink raw reply

* [PATCH/TRIVIAL 1/1 net-next] udp: remove blank line between set and test
From: Fabian Frederick @ 2014-11-04 22:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joe Perches, Fabian Frederick, David S. Miller, Alexey Kuznetsov,
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, Jiri Kosina,
	netdev

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/udp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cf0cece..3f001db 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1784,7 +1784,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 				saddr, daddr, udptable);
 
 	sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
-
 	if (sk != NULL) {
 		int ret;
 
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH/TRIVIAL 1/1 net-next] udp: remove blank line between set and test
From: David Miller @ 2014-11-04 22:12 UTC (permalink / raw)
  To: fabf; +Cc: linux-kernel, joe, kuznet, jmorris, yoshfuji, kaber, trivial,
	netdev
In-Reply-To: <1415139079-7931-1-git-send-email-fabf@skynet.be>

From: Fabian Frederick <fabf@skynet.be>
Date: Tue,  4 Nov 2014 23:11:19 +0100

> Suggested-by: Joe Perches <joe@perches.com>
> Signed-off-by: Fabian Frederick <fabf@skynet.be>

Applied.

^ permalink raw reply

* Re: [PATCH] net: phy: spi_ks8995: remove sysfs bin file by registered attribute
From: David Miller @ 2014-11-04 22:19 UTC (permalink / raw)
  To: vz; +Cc: netdev, f.fainelli
In-Reply-To: <1415057109-8506-1-git-send-email-vz@mleia.com>

From: Vladimir Zapolskiy <vz@mleia.com>
Date: Tue,  4 Nov 2014 01:25:09 +0200

> When a sysfs binary file is asked to be removed, it is found by
> attribute name, so strictly speaking this change is not a fix, but
> just in case when attribute name is changed in the driver or sysfs
> internals are changed, it might be better to remove the previously
> created file using right the same binary attribute.
> 
> Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>

Applied to net-next, thanks.

^ permalink raw reply

* [PATCH net-next v1 07/12] amd-xgbe: Provide support for receive side scaling
From: Tom Lendacky @ 2014-11-04 22:07 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

This patch provides support for receive side scaling (RSS). RSS allows
for spreading incoming network packets across the Rx queues.  When used
in conjunction with the per DMA channel interrupt support, this allows
the receive processing to be spread across multiple processors.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h |   34 ++++++
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |  148 +++++++++++++++++++++++++++
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c    |   16 +++
 drivers/net/ethernet/amd/xgbe/xgbe-main.c   |   16 +++
 drivers/net/ethernet/amd/xgbe/xgbe.h        |   21 ++++
 5 files changed, 233 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 39bcb11..2fe8fc7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -308,6 +308,9 @@
 #define MAC_MACA0LR			0x0304
 #define MAC_MACA1HR			0x0308
 #define MAC_MACA1LR			0x030c
+#define MAC_RSSCR			0x0c80
+#define MAC_RSSAR			0x0c88
+#define MAC_RSSDR			0x0c8c
 #define MAC_TSCR			0x0d00
 #define MAC_SSIR			0x0d04
 #define MAC_STSR			0x0d08
@@ -449,6 +452,24 @@
 #define MAC_RFCR_UP_WIDTH		1
 #define MAC_RQC0R_RXQ0EN_INDEX		0
 #define MAC_RQC0R_RXQ0EN_WIDTH		2
+#define MAC_RSSAR_ADDRT_INDEX		2
+#define MAC_RSSAR_ADDRT_WIDTH		1
+#define MAC_RSSAR_CT_INDEX		1
+#define MAC_RSSAR_CT_WIDTH		1
+#define MAC_RSSAR_OB_INDEX		0
+#define MAC_RSSAR_OB_WIDTH		1
+#define MAC_RSSAR_RSSIA_INDEX		8
+#define MAC_RSSAR_RSSIA_WIDTH		8
+#define MAC_RSSCR_IP2TE_INDEX		1
+#define MAC_RSSCR_IP2TE_WIDTH		1
+#define MAC_RSSCR_RSSE_INDEX		0
+#define MAC_RSSCR_RSSE_WIDTH		1
+#define MAC_RSSCR_TCP4TE_INDEX		2
+#define MAC_RSSCR_TCP4TE_WIDTH		1
+#define MAC_RSSCR_UDP4TE_INDEX		3
+#define MAC_RSSCR_UDP4TE_WIDTH		1
+#define MAC_RSSDR_DMCH_INDEX		0
+#define MAC_RSSDR_DMCH_WIDTH		4
 #define MAC_SSIR_SNSINC_INDEX		8
 #define MAC_SSIR_SNSINC_WIDTH		8
 #define MAC_SSIR_SSINC_INDEX		16
@@ -848,6 +869,8 @@
 #define RX_PACKET_ATTRIBUTES_CONTEXT_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_INDEX	5
 #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX	6
+#define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH	1
 
 #define RX_NORMAL_DESC0_OVT_INDEX		0
 #define RX_NORMAL_DESC0_OVT_WIDTH		16
@@ -865,12 +888,23 @@
 #define RX_NORMAL_DESC3_FD_WIDTH		1
 #define RX_NORMAL_DESC3_INTE_INDEX		30
 #define RX_NORMAL_DESC3_INTE_WIDTH		1
+#define RX_NORMAL_DESC3_L34T_INDEX		20
+#define RX_NORMAL_DESC3_L34T_WIDTH		4
 #define RX_NORMAL_DESC3_LD_INDEX		28
 #define RX_NORMAL_DESC3_LD_WIDTH		1
 #define RX_NORMAL_DESC3_OWN_INDEX		31
 #define RX_NORMAL_DESC3_OWN_WIDTH		1
 #define RX_NORMAL_DESC3_PL_INDEX		0
 #define RX_NORMAL_DESC3_PL_WIDTH		14
+#define RX_NORMAL_DESC3_RSV_INDEX		26
+#define RX_NORMAL_DESC3_RSV_WIDTH		1
+
+#define RX_DESC3_L34T_IPV4_TCP			1
+#define RX_DESC3_L34T_IPV4_UDP			2
+#define RX_DESC3_L34T_IPV4_ICMP			3
+#define RX_DESC3_L34T_IPV6_TCP			9
+#define RX_DESC3_L34T_IPV6_UDP			10
+#define RX_DESC3_L34T_IPV6_ICMP			11
 
 #define RX_CONTEXT_DESC3_TSA_INDEX		4
 #define RX_CONTEXT_DESC3_TSA_WIDTH		1
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index ac3d319..551794c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -351,6 +351,127 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata)
 	XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
 }
 
+static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type,
+			      unsigned int index, unsigned int val)
+{
+	unsigned int wait;
+	int ret = 0;
+
+	mutex_lock(&pdata->rss_mutex);
+
+	if (XGMAC_IOREAD_BITS(pdata, MAC_RSSAR, OB)) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	XGMAC_IOWRITE(pdata, MAC_RSSDR, val);
+
+	XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, RSSIA, index);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, ADDRT, type);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, CT, 0);
+	XGMAC_IOWRITE_BITS(pdata, MAC_RSSAR, OB, 1);
+
+	wait = 1000;
+	while (wait--) {
+		if (!XGMAC_IOREAD_BITS(pdata, MAC_RSSAR, OB))
+			goto unlock;
+
+		usleep_range(1000, 1500);
+	}
+
+	ret = -EBUSY;
+
+unlock:
+	mutex_unlock(&pdata->rss_mutex);
+
+	return ret;
+}
+
+static int xgbe_write_rss_hash_key(struct xgbe_prv_data *pdata)
+{
+	unsigned int key_regs = sizeof(pdata->rss_key) / sizeof(u32);
+	unsigned int *key = (unsigned int *)&pdata->rss_key;
+	int ret;
+
+	while (key_regs--) {
+		ret = xgbe_write_rss_reg(pdata, XGBE_RSS_HASH_KEY_TYPE,
+					 key_regs, *key++);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int xgbe_write_rss_lookup_table(struct xgbe_prv_data *pdata)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(pdata->rss_table); i++) {
+		ret = xgbe_write_rss_reg(pdata,
+					 XGBE_RSS_LOOKUP_TABLE_TYPE, i,
+					 pdata->rss_table[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int xgbe_enable_rss(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	if (!pdata->hw_feat.rss)
+		return -EOPNOTSUPP;
+
+	/* Program the hash key */
+	ret = xgbe_write_rss_hash_key(pdata);
+	if (ret)
+		return ret;
+
+	/* Program the lookup table */
+	ret = xgbe_write_rss_lookup_table(pdata);
+	if (ret)
+		return ret;
+
+	/* Set the RSS options */
+	XGMAC_IOWRITE(pdata, MAC_RSSCR, pdata->rss_options);
+
+	/* Enable RSS */
+	XGMAC_IOWRITE_BITS(pdata, MAC_RSSCR, RSSE, 1);
+
+	return 0;
+}
+
+static int xgbe_disable_rss(struct xgbe_prv_data *pdata)
+{
+	if (!pdata->hw_feat.rss)
+		return -EOPNOTSUPP;
+
+	XGMAC_IOWRITE_BITS(pdata, MAC_RSSCR, RSSE, 0);
+
+	return 0;
+}
+
+static void xgbe_config_rss(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	if (!pdata->hw_feat.rss)
+		return;
+
+	if (pdata->netdev->features & NETIF_F_RXHASH)
+		ret = xgbe_enable_rss(pdata);
+	else
+		ret = xgbe_disable_rss(pdata);
+
+	if (ret)
+		netdev_err(pdata->netdev,
+			   "error configuring RSS, RSS disabled\n");
+}
+
 static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
 {
 	unsigned int max_q_count, q_count;
@@ -1408,7 +1529,7 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 	struct xgbe_ring_desc *rdesc;
 	struct xgbe_packet_data *packet = &ring->packet_data;
 	struct net_device *netdev = channel->pdata->netdev;
-	unsigned int err, etlt;
+	unsigned int err, etlt, l34t;
 
 	DBGPR("-->xgbe_dev_read: cur = %d\n", ring->cur);
 
@@ -1447,6 +1568,26 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 		rdata->hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
 						   RX_NORMAL_DESC2, HL);
 
+	/* Get the RSS hash */
+	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, RSV)) {
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       RSS_HASH, 1);
+
+		packet->rss_hash = le32_to_cpu(rdesc->desc1);
+
+		l34t = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, L34T);
+		switch (l34t) {
+		case RX_DESC3_L34T_IPV4_TCP:
+		case RX_DESC3_L34T_IPV4_UDP:
+		case RX_DESC3_L34T_IPV6_TCP:
+		case RX_DESC3_L34T_IPV6_UDP:
+			packet->rss_hash_type = PKT_HASH_TYPE_L4;
+
+		default:
+			packet->rss_hash_type = PKT_HASH_TYPE_L3;
+		}
+	}
+
 	/* Get the packet length */
 	rdata->len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
@@ -2479,6 +2620,7 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
 	xgbe_config_rx_buffer_size(pdata);
 	xgbe_config_tso_mode(pdata);
 	xgbe_config_sph_mode(pdata);
+	xgbe_config_rss(pdata);
 	desc_if->wrapper_tx_desc_init(pdata);
 	desc_if->wrapper_rx_desc_init(pdata);
 	xgbe_enable_dma_interrupts(pdata);
@@ -2614,5 +2756,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->config_dcb_tc = xgbe_config_dcb_tc;
 	hw_if->config_dcb_pfc = xgbe_config_dcb_pfc;
 
+	/* For Receive Side Scaling */
+	hw_if->enable_rss = xgbe_enable_rss;
+	hw_if->disable_rss = xgbe_disable_rss;
+
 	DBGPR("<--xgbe_init_function_ptrs\n");
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index c3533e1..6c5a707 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1661,12 +1661,21 @@ static int xgbe_set_features(struct net_device *netdev,
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
-	netdev_features_t rxcsum, rxvlan, rxvlan_filter;
+	netdev_features_t rxhash, rxcsum, rxvlan, rxvlan_filter;
+	int ret = 0;
 
+	rxhash = pdata->netdev_features & NETIF_F_RXHASH;
 	rxcsum = pdata->netdev_features & NETIF_F_RXCSUM;
 	rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX;
 	rxvlan_filter = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_FILTER;
 
+	if ((features & NETIF_F_RXHASH) && !rxhash)
+		ret = hw_if->enable_rss(pdata);
+	else if (!(features & NETIF_F_RXHASH) && rxhash)
+		ret = hw_if->disable_rss(pdata);
+	if (ret)
+		return ret;
+
 	if ((features & NETIF_F_RXCSUM) && !rxcsum)
 		hw_if->enable_rx_csum(pdata);
 	else if (!(features & NETIF_F_RXCSUM) && rxcsum)
@@ -1960,6 +1969,11 @@ read_again:
 			hwtstamps->hwtstamp = ns_to_ktime(nsec);
 		}
 
+		if (XGMAC_GET_BITS(packet->attributes,
+				   RX_PACKET_ATTRIBUTES, RSS_HASH))
+			skb_set_hash(skb, packet->rss_hash,
+				     packet->rss_hash_type);
+
 		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 		skb_record_rx_queue(skb, channel->queue_index);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index cff9902..05fbdf9 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -170,6 +170,7 @@ static int xgbe_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct resource *res;
 	const u8 *mac_addr;
+	unsigned int i;
 	int ret;
 
 	DBGPR("--> xgbe_probe\n");
@@ -190,6 +191,7 @@ static int xgbe_probe(struct platform_device *pdev)
 
 	spin_lock_init(&pdata->lock);
 	mutex_init(&pdata->xpcs_mutex);
+	mutex_init(&pdata->rss_mutex);
 	spin_lock_init(&pdata->tstamp_lock);
 
 	/* Set and validate the number of descriptors for a ring */
@@ -335,6 +337,17 @@ static int xgbe_probe(struct platform_device *pdev)
 		goto err_io;
 	}
 
+	/* Initialize RSS hash key and lookup table */
+	get_random_bytes(pdata->rss_key, sizeof(pdata->rss_key));
+
+	for (i = 0; i < XGBE_RSS_MAX_TABLE_SIZE; i++)
+		XGMAC_SET_BITS(pdata->rss_table[i], MAC_RSSDR, DMCH,
+			       i % pdata->rx_ring_count);
+
+	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, IP2TE, 1);
+	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, TCP4TE, 1);
+	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);
+
 	/* Prepare to regsiter with MDIO */
 	pdata->mii_bus_id = kasprintf(GFP_KERNEL, "%s", pdev->name);
 	if (!pdata->mii_bus_id) {
@@ -365,6 +378,9 @@ static int xgbe_probe(struct platform_device *pdev)
 			      NETIF_F_HW_VLAN_CTAG_TX |
 			      NETIF_F_HW_VLAN_CTAG_FILTER;
 
+	if (pdata->hw_feat.rss)
+		netdev->hw_features |= NETIF_F_RXHASH;
+
 	netdev->vlan_features |= NETIF_F_SG |
 				 NETIF_F_IP_CSUM |
 				 NETIF_F_IPV6_CSUM |
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 55c935f..2ac4f17 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -215,6 +215,12 @@
 /* Maximum MAC address hash table size (256 bits = 8 bytes) */
 #define XGBE_MAC_HASH_TABLE_SIZE	8
 
+/* Receive Side Scaling */
+#define XGBE_RSS_HASH_KEY_SIZE		40
+#define XGBE_RSS_MAX_TABLE_SIZE		256
+#define XGBE_RSS_LOOKUP_TABLE_TYPE	0
+#define XGBE_RSS_HASH_KEY_TYPE		1
+
 struct xgbe_prv_data;
 
 struct xgbe_packet_data {
@@ -233,6 +239,9 @@ struct xgbe_packet_data {
 	unsigned short vlan_ctag;
 
 	u64 rx_tstamp;
+
+	u32 rss_hash;
+	enum pkt_hash_types rss_hash_type;
 };
 
 /* Common Rx and Tx descriptor mapping */
@@ -544,6 +553,10 @@ struct xgbe_hw_if {
 	/* For Data Center Bridging config */
 	void (*config_dcb_tc)(struct xgbe_prv_data *);
 	void (*config_dcb_pfc)(struct xgbe_prv_data *);
+
+	/* For Receive Side Scaling */
+	int (*enable_rss)(struct xgbe_prv_data *);
+	int (*disable_rss)(struct xgbe_prv_data *);
 };
 
 struct xgbe_desc_if {
@@ -616,6 +629,9 @@ struct xgbe_prv_data {
 	/* XPCS indirect addressing mutex */
 	struct mutex xpcs_mutex;
 
+	/* RSS addressing mutex */
+	struct mutex rss_mutex;
+
 	int dev_irq;
 	unsigned int per_channel_irq;
 
@@ -668,6 +684,11 @@ struct xgbe_prv_data {
 	unsigned int tx_pause;
 	unsigned int rx_pause;
 
+	/* Receive Side Scaling settings */
+	u8 rss_key[XGBE_RSS_HASH_KEY_SIZE];
+	u32 rss_table[XGBE_RSS_MAX_TABLE_SIZE];
+	u32 rss_options;
+
 	/* MDIO settings */
 	struct module *phy_module;
 	char *mii_bus_id;

^ permalink raw reply related

* [PATCH net-next v1 10/12] amd-xgbe-phy: Sync PCS and PHY modes after reset
From: Tom Lendacky @ 2014-11-04 22:07 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

This patch adds support to sync the states of the PCS and the PHY
after a reset is performed.  If the PCS and the PHY are not in the
same state after reset an extra mode change would be performed. This
extra mode change might not be needed if the PCS and the PHY are
synced up after reset.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/phy/amd-xgbe-phy.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c
index c456559..37b9f3f 100644
--- a/drivers/net/phy/amd-xgbe-phy.c
+++ b/drivers/net/phy/amd-xgbe-phy.c
@@ -992,7 +992,8 @@ static int amd_xgbe_phy_soft_reset(struct phy_device *phydev)
 	if (ret & MDIO_CTRL1_RESET)
 		return -ETIMEDOUT;
 
-	return 0;
+	/* Make sure the XPCS and SerDes are in compatible states */
+	return amd_xgbe_phy_xgmii_mode(phydev);
 }
 
 static int amd_xgbe_phy_config_init(struct phy_device *phydev)

^ permalink raw reply related

* [PATCH net-next v1 12/12] amd-xgbe-phy: Let AMD_XGBE_PHY depend on HAS_IOMEM
From: Tom Lendacky @ 2014-11-04 22:07 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

The amd-xgbe-phy driver needs to perform ioremap calls, so add HAS_IOMEM
to its build dependency.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/phy/Kconfig |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 75472cf7..b4b0f80 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -26,7 +26,7 @@ config AMD_PHY
 
 config AMD_XGBE_PHY
 	tristate "Driver for the AMD 10GbE (amd-xgbe) PHYs"
-	depends on OF
+	depends on OF && HAS_IOMEM
 	---help---
 	  Currently supports the AMD 10GbE PHY
 

^ permalink raw reply related

* [PATCH net-next v1 11/12] amd-xgbe: Let AMD_XGBE depend on HAS_IOMEM
From: Tom Lendacky @ 2014-11-04 22:07 UTC (permalink / raw)
  To: netdev; +Cc: davem
In-Reply-To: <20141104220620.24738.10070.stgit@tlendack-t1.amdoffice.net>

The amd-xgbe driver needs to perform ioremap calls, so add HAS_IOMEM
to its build dependency.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 drivers/net/ethernet/amd/Kconfig |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 8319c99..7a5e4aa 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -179,7 +179,7 @@ config SUNLANCE
 
 config AMD_XGBE
 	tristate "AMD 10GbE Ethernet driver"
-	depends on OF_NET
+	depends on OF_NET && HAS_IOMEM
 	select PHYLIB
 	select AMD_XGBE_PHY
 	select BITREVERSE

^ permalink raw reply related

* Re: Fw: [Bug 82471] New: net/core/dev.c skb_war_bad_offload
From: Tom Herbert @ 2014-11-04 22:57 UTC (permalink / raw)
  To: Jesse Brandeburg; +Cc: Stephen Hemminger, NetDEV list, Jesse Brandeburg
In-Reply-To: <CAEuXFEwJeLeCRmNMXVk9naKd8zC2gos7-ORzu7XW=Ja0N-CkEA@mail.gmail.com>

Using vlan and bonding? vlan_dev_hard_start_xmit called. A possible
cause is that bonding interface is out of sync with slave interface
w.r.t. GSO features. Do we know if this worked in 3.14, 3.15?


On Tue, Nov 4, 2014 at 1:59 PM, Jesse Brandeburg
<jesse.brandeburg@gmail.com> wrote:
> I believe this is a regression, as reporters say this worked with 3.13 kernels.
>
> anyone have any idea what is up with this skb_warn_bad_offload with
> the bonding driver?   see the bug text
> for a lot more detail.  Is it fixed already?  This is occurring on top
> of both Intel and Broadcom nics and is
> with 802.3ad bonding enabled, and turning off scatter gather avoids the issue.
>
> On Fri, Aug 15, 2014 at 2:26 PM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
>
>> Subject: [Bug 82471] New: net/core/dev.c skb_war_bad_offload
>>
>>
>> https://bugzilla.kernel.org/show_bug.cgi?id=82471
>>
>>             Bug ID: 82471
>>            Summary: net/core/dev.c skb_war_bad_offload
>>            Product: Networking
>>            Version: 2.5
>>     Kernel Version: 3.16.1
>>           Hardware: x86-64
>>                 OS: Linux
>>               Tree: Mainline
>>             Status: NEW
>>           Severity: normal
>>           Priority: P1
>>          Component: IPV4
>>           Assignee: shemminger@linux-foundation.org
>>           Reporter: vladi@aresgate.net
>>         Regression: No
>>
>> Created attachment 146761
>>   --> https://bugzilla.kernel.org/attachment.cgi?id=146761&action=edit
>> kernel config
>>
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973225] ------------[
>> cut here ]------------
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973236] WARNING: CPU:
>> 2 PID: 0 at net/core/dev.c:2246 skb_warn_bad_offload+0xc8/0xd5()
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973238] :
>> caps=(0x000000000419fba9, 0x00000000001b583b) len=2962 data_len=2896
>> gso_size=1448 gso_type=1 ip_summed=3
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973239] Modules
>> linked in: ntfs msdos xfs libcrc32c ipmi_devintf intel_rapl
>> x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul
>> ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper
>> cryptd sb_edac edac_core 8021q garp ioatdma stp ipmi_si mrp llc bonding
>> hid_generic ixgbe usbhid hid ahci dca libahci mdio
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973257] CPU: 2 PID: 0
>> Comm: swapper/2 Tainted: G        W     3.16.0 #2
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973259] Hardware
>> name: Supermicro X9DRD-7LN4F(-JBOD)/X9DRD-EF/X9DRD-7LN4F, BIOS 3.0a 12/05/2013
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973260]
>> 0000000000000009 ffff88046fd036b0 ffffffff815c4096 ffff88046fd036f8
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973262]
>> ffff88046fd036e8 ffffffff8103f633 ffff880018b7c4e0 ffff8804687df000
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973264]
>> 0000000000000001 0000000000000003 ffffffffa0193320 ffff88046fd03748
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973266] Call Trace:
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973268]  <IRQ>
>> [<ffffffff815c4096>] dump_stack+0x45/0x56
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973280]
>> [<ffffffff8103f633>] warn_slowpath_common+0x73/0x90
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973286]
>> [<ffffffff8103f697>] warn_slowpath_fmt+0x47/0x50
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973288]
>> [<ffffffff812edadc>] ? ___ratelimit+0x7c/0xf0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973291]
>> [<ffffffff815c5e19>] skb_warn_bad_offload+0xc8/0xd5
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973294]
>> [<ffffffff814e57fe>] skb_checksum_help+0x16e/0x180
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973297]
>> [<ffffffff814e9ecc>] dev_hard_start_xmit+0x42c/0x4b0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973299]
>> [<ffffffff814ea154>] ? __dev_queue_xmit+0x204/0x440
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973301]
>> [<ffffffff814ea232>] __dev_queue_xmit+0x2e2/0x440
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973302]
>> [<ffffffff814ea39b>] ? dev_queue_xmit+0xb/0x10
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973304]
>> [<ffffffff814ea39b>] dev_queue_xmit+0xb/0x10
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973308]
>> [<ffffffffa012b758>] vlan_dev_hard_start_xmit+0x88/0x100 [8021q]
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973317]
>> [<ffffffff814e9d9a>] dev_hard_start_xmit+0x2fa/0x4b0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973321]
>> [<ffffffff814ea232>] __dev_queue_xmit+0x2e2/0x440
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973323]
>> [<ffffffff814ea39b>] dev_queue_xmit+0xb/0x10
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973325]
>> [<ffffffff814f1192>] neigh_connected_output+0xb2/0xf0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973327]
>> [<ffffffff815192dc>] ip_finish_output+0x4ec/0x890
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973329]
>> [<ffffffff8151ac03>] ip_output+0x53/0x90
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973331]
>> [<ffffffff8151a39b>] ip_local_out_sk+0x2b/0x30
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973333]
>> [<ffffffff8151a6fa>] ip_queue_xmit+0x13a/0x3c0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973335]
>> [<ffffffff815309fa>] tcp_transmit_skb+0x42a/0x8f0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973337]
>> [<ffffffff81530ffa>] tcp_write_xmit+0x13a/0xc00
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973347]
>> [<ffffffff8152f033>] ? tcp_established_options+0x33/0xd0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973350]
>> [<ffffffff81531d09>] __tcp_push_pending_frames+0x29/0xc0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973353]
>> [<ffffffff8152da77>] tcp_rcv_established+0x1f7/0x5e0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973356]
>> [<ffffffff81535fc5>] tcp_v4_do_rcv+0x215/0x4a0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973369]
>> [<ffffffff810651f8>] ? ttwu_do_activate.constprop.64+0x58/0x60
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973374]
>> [<ffffffff81295d31>] ? security_sock_rcv_skb+0x11/0x20
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973377]
>> [<ffffffff815392ad>] tcp_v4_rcv+0x73d/0x7c0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973380]
>> [<ffffffff8106fafc>] ? update_group_capacity+0x16c/0x270
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973386]
>> [<ffffffff812e8300>] ? cpumask_next_and+0x30/0x50
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973388]
>> [<ffffffff81514b50>] ip_local_deliver_finish+0x80/0x1c0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973390]
>> [<ffffffff81515154>] ip_local_deliver+0x34/0x90
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973392]
>> [<ffffffff81514d99>] ip_rcv_finish+0x109/0x350
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973399]
>> [<ffffffff815153d2>] ip_rcv+0x222/0x370
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973403]
>> [<ffffffff814e5eb6>] __netif_receive_skb_core+0x416/0x570
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973407]
>> [<ffffffff814e73f3>] __netif_receive_skb+0x13/0x60
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973410]
>> [<ffffffff814e745e>] netif_receive_skb_internal+0x1e/0x90
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973415]
>> [<ffffffff814e7b40>] napi_gro_receive+0x70/0xa0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973422]
>> [<ffffffffa0134f4c>] ixgbe_clean_rx_irq+0x75c/0xb20 [ixgbe]
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973427]
>> [<ffffffffa0136172>] ixgbe_poll+0x522/0x850 [ixgbe]
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973430]
>> [<ffffffff8105e986>] ? hrtimer_get_next_event+0xb6/0xc0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973437]
>> [<ffffffff814e8dc1>] net_rx_action+0x101/0x1a0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973443]
>> [<ffffffff810430ab>] __do_softirq+0xdb/0x240
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973447]
>> [<ffffffff8104349e>] irq_exit+0xee/0x110
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973450]
>> [<ffffffff81004913>] do_IRQ+0x53/0xf0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973453]
>> [<ffffffff815caaaa>] common_interrupt+0x6a/0x6a
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973454]  <EOI>
>> [<ffffffff814af007>] ? cpuidle_enter_state+0x47/0xc0
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973463]
>> [<ffffffff814af132>] cpuidle_enter+0x12/0x20
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973468]
>> [<ffffffff81075fbf>] cpu_startup_entry+0x24f/0x280
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973477]
>> [<ffffffff810905e3>] ? clockevents_config_and_register+0x23/0x30
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973482]
>> [<ffffffff810282be>] start_secondary+0x1be/0x270
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.973486] ---[ end
>> trace de552357488766e8 ]---
>> Aug  6 06:46:50 prod-ent-ceph03.dc2.ec.loc kernel: [29530.974181] ------------[
>> cut here ]------------
>>
>> --
>> You are receiving this mail because:
>> You are the assignee for the bug.
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 1/7] bpf: add 'flags' attribute to BPF_MAP_UPDATE_ELEM command
From: Alexei Starovoitov @ 2014-11-04 23:04 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: David S. Miller, Ingo Molnar, Andy Lutomirski,
	Hannes Frederic Sowa, Eric Dumazet, Linux API,
	Network Development, LKML
In-Reply-To: <54589B89.5000309-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

On Tue, Nov 4, 2014 at 1:25 AM, Daniel Borkmann <dborkman-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>
> On 11/04/2014 03:54 AM, Alexei Starovoitov wrote:
>>
>> the current meaning of BPF_MAP_UPDATE_ELEM syscall command is:
>> either update existing map element or create a new one.
>> Initially the plan was to add a new command to handle the case of
>> 'create new element if it didn't exist', but 'flags' style looks
>> cleaner and overall diff is much smaller (more code reused), so add 'flags'
>> attribute to BPF_MAP_UPDATE_ELEM command with the following meaning:
>> enum {
>>    BPF_MAP_UPDATE_OR_CREATE = 0, /* add new element or update existing */
>>    BPF_MAP_CREATE_ONLY,          /* add new element if it didn't exist */
>>    BPF_MAP_UPDATE_ONLY           /* update existing element */
>> };
>
>
> From you commit message/code I currently don't see an explanation why
> it cannot be done in typical ``flags style'' as various syscalls do,
> i.e. BPF_MAP_UPDATE_OR_CREATE rather represented as ...
>
>   BPF_MAP_CREATE | BPF_MAP_UPDATE
>
> Do you expect more than 64 different flags to be passed from user space
> for BPF_MAP?

several reasons:
- preserve flags==0 as default behavior
- avoid holes and extra checks for invalid combinations, so
  if (flags > BPF_MAP_UPDATE_ONLY) goto err, is enough.
- it looks much neater when user space uses
  BPF_MAP_UPDATE_OR_CREATE instead of ORing bits.

Note this choice doesn't prevent adding bit-like flags
in the future. Today I cannot think of any new flags
for the update() command, but if somebody comes up with
a new selector that can apply to all three combinations,
we can add it as 3rd bit that can be ORed.
Default will stay zero and 'if >' check in older
kernels will seamlessly work with new userspace.
I don't like holes in flags and combinatorial
explosion of bits and checks for them unless
absolutely necessary.

^ permalink raw reply

* Re: [PATCH net-next 6/7] bpf: allow eBPF programs to use maps
From: Alexei Starovoitov @ 2014-11-04 23:08 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: David S. Miller, Ingo Molnar, Andy Lutomirski,
	Hannes Frederic Sowa, Eric Dumazet, Linux API,
	Network Development, LKML
In-Reply-To: <5458A17B.7030904@redhat.com>

On Tue, Nov 4, 2014 at 1:50 AM, Daniel Borkmann <dborkman@redhat.com> wrote:
> These WARN_ON_ONCE(!rcu_read_lock_held()) seem odd. While I see the point
> that
> you're holding RCU read lock on the lookup, can you elaborate on your RCU
> usage
> here and why it's necessary for delete/update?
>
> I suspect due to the synchronize_rcu() you're using and not using any RCU
> accessors but plain memcpy() e.g. in case of the array ...?

Correct in case of array.
Also hash delete/update() call into lookup() internally
that is using _rcu() helpers...
Future map types might have much more
complex implementations (like LPM), so it helps
to state the rules early.

Another reason is more complex to explain:
A program that intends to access maps has to be one
rcu critical section. So all lookup/update/delete calls
are under rcu_lock_held.
Since programs by themselves cannot have WARN_ON
inside them, I've added WARN_ON in these three
functions that will be called from the programs to make
sure that kernel subsystems don't do (*prog->bpf_func)(...)
without taking rcu_lock if they intend to let programs
access maps.

Having said that in the future we might have a case
for programs that don't call into these functions at all
and execute instructions only. Those won't need
rcu_lock() wrap. I experimented with that for the
patch where I replaced pred-tree walker with eBPF
program. There is no rcu there. And no calls
to map accessors.

Has to be noted, that socket filters use rcu to
protect sk_filter pointer and program itself. So for that
use case we'll keep using rcu for foreseeable future.
For tracing filters I had to add rcu_lock() around
BPF_PROG_RUN() invocation and these WARN_ON
checks saved me a lot of headache, so I prefer to
keep them since they cost nothing when lockdep is off.

^ permalink raw reply

* Re: [PATCH net-next 3/7] bpf: add array type of eBPF maps
From: Alexei Starovoitov @ 2014-11-04 23:14 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: David S. Miller, Ingo Molnar, Andy Lutomirski,
	Hannes Frederic Sowa, Eric Dumazet, Linux API,
	Network Development, LKML
In-Reply-To: <5458A349.5020401@redhat.com>

On Tue, Nov 4, 2014 at 1:58 AM, Daniel Borkmann <dborkman@redhat.com> wrote:
>> +
>> +       memcpy(array->value + array->elem_size * index, value,
>> array->elem_size);
>
> What would protect this from concurrent updates?

nothing.
that's what I meant in commit log:
 - map_update_elem() replaces elements in an non-atomic way
   (for atomic updates hashtable type should be used instead)

The array map is like C array of structures.
Nothing protects concurrent access.
It's used in the cases where accuracy is not needed
or when there is no concurrent access.
To compute a histogram of events in tracing the array
of integers is used. Every integer is a counter. Program
increments it (may be without using xadd) and
user space periodically reads it back.
map_update_elem() is called by userspace once
to initialize it if zero-init is not enough.
Programs do lookup() and modify the values.
For array type update() method is used rarely,
delete() is never used and get_next() is needed
for completeness to browse maps through
common map API.
I'm guessing you're asking, because it may feel
that adding a lock() will help to make it more useful? ;)
It's not, since programs cannot take a lock().

^ permalink raw reply

* [PATCH net-next] fast_hash: avoid indirect function calls
From: Hannes Frederic Sowa @ 2014-11-04 23:23 UTC (permalink / raw)
  To: netdev; +Cc: kernel, dborkman, Thomas Graf

By default the arch_fast_hash hashing function pointers are initialized
to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get
updated to the CRC32 ones. This dispatching scheme incurs a function
pointer lookup and indirect call for every hashing operation.

rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing
functions in its structure, too, causing two indirect branches per
hashing operation.

Using alternative_call we can get away with one of those indirect branches.

Acked-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
Hi,

I targetted net-next because original implementation went in over netdev@
and networking is the main user.

Would it make sense to start suppressing the generation of local
functions for static inline functions which address is taken?

E.g. we could use extern inline in a few cases (dst_output is often used
as a function pointer but marked static inline).  We could mark it as
extern inline and copy&paste the code to a .c file to prevent multiple
copies of machine code for this function. But because of the copy&paste I
did not in this case.

Bye,
Hannes

 arch/x86/include/asm/hash.h | 51 ++++++++++++++++++++++++++++++++++++++++-----
 arch/x86/lib/hash.c         | 29 +++++++++++++++-----------
 include/asm-generic/hash.h  | 36 ++++++++++++++++++++++++++++++--
 include/linux/hash.h        | 34 ------------------------------
 lib/Makefile                |  2 +-
 lib/hash.c                  | 39 ----------------------------------
 6 files changed, 98 insertions(+), 93 deletions(-)
 delete mode 100644 lib/hash.c

diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
index e8c58f8..a881d78 100644
--- a/arch/x86/include/asm/hash.h
+++ b/arch/x86/include/asm/hash.h
@@ -1,7 +1,48 @@
-#ifndef _ASM_X86_HASH_H
-#define _ASM_X86_HASH_H
+#ifndef __ASM_X86_HASH_H
+#define __ASM_X86_HASH_H
 
-struct fast_hash_ops;
-extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
+#include <linux/cpufeature.h>
+#include <asm/alternative.h>
 
-#endif /* _ASM_X86_HASH_H */
+u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed);
+u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed);
+
+/*
+ * non-inline versions of jhash so gcc does not need to generate
+ * duplicate code in every object file
+ */
+u32 __jhash(const void *data, u32 len, u32 seed);
+u32 __jhash2(const u32 *data, u32 len, u32 seed);
+
+/*
+ * for documentation of these functions please look into
+ * <include/asm-generic/hash.h>
+ */
+
+static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	u32 hash;
+
+	alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2,
+#ifdef CONFIG_X86_64
+			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
+#else
+			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
+#endif
+	return hash;
+}
+
+static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+{
+	u32 hash;
+
+	alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2,
+#ifdef CONFIG_X86_64
+			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
+#else
+			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
+#endif
+	return hash;
+}
+
+#endif /* __ASM_X86_HASH_H */
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c
index ff4fa51..e143271 100644
--- a/arch/x86/lib/hash.c
+++ b/arch/x86/lib/hash.c
@@ -31,13 +31,13 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/hash.h>
-#include <linux/init.h>
-
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/hash.h>
 
+#include <linux/hash.h>
+#include <linux/jhash.h>
+
 static inline u32 crc32_u32(u32 crc, u32 val)
 {
 #ifdef CONFIG_AS_CRC32
@@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val)
 	return crc;
 }
 
-static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
+u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
 {
 	const u32 *p32 = (const u32 *) data;
 	u32 i, tmp = 0;
@@ -71,22 +71,27 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
 
 	return seed;
 }
+EXPORT_SYMBOL(__intel_crc4_2_hash);
 
-static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
+u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
 {
-	const u32 *p32 = (const u32 *) data;
 	u32 i;
 
 	for (i = 0; i < len; i++)
-		seed = crc32_u32(seed, *p32++);
+		seed = crc32_u32(seed, *data++);
 
 	return seed;
 }
+EXPORT_SYMBOL(__intel_crc4_2_hash2);
 
-void __init setup_arch_fast_hash(struct fast_hash_ops *ops)
+u32 __jhash(const void *data, u32 len, u32 seed)
 {
-	if (cpu_has_xmm4_2) {
-		ops->hash  = intel_crc4_2_hash;
-		ops->hash2 = intel_crc4_2_hash2;
-	}
+	return jhash(data, len, seed);
+}
+EXPORT_SYMBOL(__jhash);
+
+u32 __jhash2(const u32 *data, u32 len, u32 seed)
+{
+	return jhash2(data, len, seed);
 }
+EXPORT_SYMBOL(__jhash2);
diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h
index b631284..3c82760 100644
--- a/include/asm-generic/hash.h
+++ b/include/asm-generic/hash.h
@@ -1,9 +1,41 @@
 #ifndef __ASM_GENERIC_HASH_H
 #define __ASM_GENERIC_HASH_H
 
-struct fast_hash_ops;
-static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
+#include <linux/jhash.h>
+
+/**
+ *	arch_fast_hash - Caclulates a hash over a given buffer that can have
+ *			 arbitrary size. This function will eventually use an
+ *			 architecture-optimized hashing implementation if
+ *			 available, and trades off distribution for speed.
+ *
+ *	@data: buffer to hash
+ *	@len: length of buffer in bytes
+ *	@seed: start seed
+ *
+ *	Returns 32bit hash.
+ */
+static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	return jhash(data, len, seed);
+}
+
+/**
+ *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
+ *			  size that is of a multiple of 32bit words. This
+ *			  function will eventually use an architecture-
+ *			  optimized hashing implementation if available,
+ *			  and trades off distribution for speed.
+ *
+ *	@data: buffer to hash (must be 32bit padded)
+ *	@len: number of 32bit words
+ *	@seed: start seed
+ *
+ *	Returns 32bit hash.
+ */
+static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
 {
+	return jhash2(data, len, seed);
 }
 
 #endif /* __ASM_GENERIC_HASH_H */
diff --git a/include/linux/hash.h b/include/linux/hash.h
index d0494c3..6e8fb02 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr)
 	return (u32)val;
 }
 
-struct fast_hash_ops {
-	u32 (*hash)(const void *data, u32 len, u32 seed);
-	u32 (*hash2)(const u32 *data, u32 len, u32 seed);
-};
-
-/**
- *	arch_fast_hash - Caclulates a hash over a given buffer that can have
- *			 arbitrary size. This function will eventually use an
- *			 architecture-optimized hashing implementation if
- *			 available, and trades off distribution for speed.
- *
- *	@data: buffer to hash
- *	@len: length of buffer in bytes
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);
-
-/**
- *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
- *			  size that is of a multiple of 32bit words. This
- *			  function will eventually use an architecture-
- *			  optimized hashing implementation if available,
- *			  and trades off distribution for speed.
- *
- *	@data: buffer to hash (must be 32bit padded)
- *	@len: number of 32bit words
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);
-
 #endif /* _LINUX_HASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 7512dc9..04e53dd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o
+	 percpu-refcount.o percpu_ida.o rhashtable.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/hash.c b/lib/hash.c
deleted file mode 100644
index fea973f..0000000
--- a/lib/hash.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* General purpose hashing library
- *
- * That's a start of a kernel hashing library, which can be extended
- * with further algorithms in future. arch_fast_hash{2,}() will
- * eventually resolve to an architecture optimized implementation.
- *
- * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
- * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
- * Copyright 2013 Thomas Graf <tgraf@redhat.com>
- * Licensed under the GNU General Public License, version 2.0 (GPLv2)
- */
-
-#include <linux/jhash.h>
-#include <linux/hash.h>
-#include <linux/cache.h>
-
-static struct fast_hash_ops arch_hash_ops __read_mostly = {
-	.hash  = jhash,
-	.hash2 = jhash2,
-};
-
-u32 arch_fast_hash(const void *data, u32 len, u32 seed)
-{
-	return arch_hash_ops.hash(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash);
-
-u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
-{
-	return arch_hash_ops.hash2(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash2);
-
-static int __init hashlib_init(void)
-{
-	setup_arch_fast_hash(&arch_hash_ops);
-	return 0;
-}
-early_initcall(hashlib_init);
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next] net: Convert SEQ_START_TOKEN/seq_printf to seq_puts
From: Joe Perches @ 2014-11-04 23:37 UTC (permalink / raw)
  To: netdev

Using a single fixed string is smaller code size than using
a format and many string arguments.

Reduces overall code size a little.

$ size net/ipv4/igmp.o* net/ipv6/mcast.o* net/ipv6/ip6_flowlabel.o*
   text	   data	    bss	    dec	    hex	filename
  34269	   7012	  14824	  56105	   db29	net/ipv4/igmp.o.new
  34315	   7012	  14824	  56151	   db57	net/ipv4/igmp.o.old
  30078	   7869	  13200	  51147	   c7cb	net/ipv6/mcast.o.new
  30105	   7869	  13200	  51174	   c7e6	net/ipv6/mcast.o.old
  11434	   3748	   8580	  23762	   5cd2	net/ipv6/ip6_flowlabel.o.new
  11491	   3748	   8580	  23819	   5d0b	net/ipv6/ip6_flowlabel.o.old

Signed-off-by: Joe Perches <joe@perches.com>
---

Done by using printf with these formats and args

 net/ipv4/igmp.c          | 6 +-----
 net/ipv6/ip6_flowlabel.c | 3 +--
 net/ipv6/mcast.c         | 6 +-----
 3 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3f80513..1e4adae 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2687,11 +2687,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq,
-			   "%3s %6s "
-			   "%10s %10s %6s %6s\n", "Idx",
-			   "Device", "MCA",
-			   "SRC", "INC", "EXC");
+		seq_puts(seq, "Idx Device        MCA        SRC    INC    EXC\n");
 	} else {
 		seq_printf(seq,
 			   "%3d %6.6s 0x%08x "
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c143437..7221021 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -770,8 +770,7 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
 {
 	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
-			   "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
+		seq_puts(seq, "Label S Owner  Users  Linger Expires  Dst                              Opt\n");
 	} else {
 		struct ip6_flowlabel *fl = v;
 		seq_printf(seq,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9648de2..e04f184 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2823,11 +2823,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq,
-			   "%3s %6s "
-			   "%32s %32s %6s %6s\n", "Idx",
-			   "Device", "Multicast Address",
-			   "Source Address", "INC", "EXC");
+		seq_puts(seq, "Idx Device                Multicast Address                   Source Address    INC    EXC\n");
 	} else {
 		seq_printf(seq,
 			   "%3d %6.6s %pi6 %pi6 %6lu %6lu\n",

^ permalink raw reply related

* Re: [PATCH 00/20] kselftest install target feature
From: Shuah Khan @ 2014-11-04 23:45 UTC (permalink / raw)
  To: Kees Cook
  Cc: Greg KH, Andrew Morton, Michal Marek, David S. Miller,
	tranmanphong, David Herrmann, Hugh Dickins, bobby.prani,
	Eric W. Biederman, Serge E. Hallyn, linux-kbuild, LKML, Linux API,
	Network Development
In-Reply-To: <CAGXu5jK9-TpOF4SFX2bkU2UJMXk-3W+-_we0BTxu4Ga+7bYXSQ@mail.gmail.com>

On 11/04/2014 12:22 PM, Kees Cook wrote:
> On Tue, Nov 4, 2014 at 9:10 AM, Shuah Khan <shuahkh@osg.samsung.com> wrote:
>> This patch series adds a new kselftest_install make target
>> to enable selftest install. When make kselftest_install is
>> run, selftests are installed on the system. A new install
>> target is added to selftests Makefile which will install
>> targets for the tests that are specified in INSTALL_TARGETS.
>> During install, a script is generated to run tests that are
>> installed. This script will be installed in the selftest install
>> directory. Individual test Makefiles are changed to add to the
>> script. This will allow new tests to add install and run test
>> commands to the generated kselftest script.
> 
> I'm all for making the self tests more available, but I don't think
> this is the right approach. My primary objection is that it creates a
> second way to run tests, and that means any changes and additions need
> to be updated in two places. I'd much rather just maintain the single
> "make" targets instead. Having "make" available on the target device
> doesn't seem too bad to me. Is there a reason that doesn't work for
> your situation?

Kees,

My primary objective is to provide a way to install selftests for a
specific kernel release. This will allow developers to run tests for
a specific release and look for regressions. Adding an install target
will also help support local execution of tests in a virtualized
environments. In some cases such as qemu, it is not practical to
expect the target to have support for "make". Once tests are installed
to be run outside the git environment, we need a master script that
can run the tests. Hence the need for a master script that can run
tests.

We have the ability to run all tests via make kselftest target or
run a specific test using the individual test's run_tests target.
Both of above are necessary to support running tests from the tree.
Embedding run_tests logic in the makefiles doesn't work very well
in the long run.

We also need a way to run them outside tree. I agree with you that
the way I added the script generation, duplicates the code in individual
run_tests targets and that changes/updates need to be made in both
places.

Would you be ok with the approach if I fixed the duplicating
problem? I can address the duplication concern easily.

> 
> I would, however, like to see some better standardization of the test
> "framework" that we've got in there already. (For example, some
> failures fail the "make", some don't, there are various reporting
> methods for success/failure depending on the test, etc.)

This is being addressed and I have the framework in linux-kselftest
git next branch at the moment. I do think the above work is part of
addressing the larger framework issues such as being able to run tests
on a target system that might not have "make" support and makes it
easier to use.

thanks,
-- Shuah


-- 
Shuah Khan
Sr. Linux Kernel Developer
Samsung Research America (Silicon Valley)
shuahkh@osg.samsung.com | (970) 217-8978

^ permalink raw reply

* Re: [PATCH net 2/5] i40e: Implement ndo_gso_check()
From: Jesse Gross @ 2014-11-04 23:45 UTC (permalink / raw)
  To: Joe Stringer
  Cc: netdev, Sathya Perla, Jeff Kirsher, linux.nics, amirv,
	shahed.shaikh, Dept-GELinuxNICDev, Tom Herbert,
	Linux Kernel Mailing List
In-Reply-To: <1415138202-1197-3-git-send-email-joestringer@nicira.com>

On Tue, Nov 4, 2014 at 1:56 PM, Joe Stringer <joestringer@nicira.com> wrote:
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index c3a7f4a..21829b5 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> +static bool i40e_gso_check(struct sk_buff *skb, struct net_device *dev)
> +{
> +       if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
> +           (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
> +            skb->inner_protocol != htons(ETH_P_TEB) ||
> +            skb_inner_mac_header(skb) - skb_transport_header(skb) > 64))
> +               return false;

I think it may be possible to even support a few more things here.
According to the datasheet here:
http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xl710-10-40-controller-datasheet.pdf

This can actually support 64 bytes beyond the tunnel header, which
would make for a total of 80 bytes. It looks like it can also support
IPv4 or IPv6 beyond just Ethernet as the encapsulated protocol.

Intel guys, can you confirm that this is correct?

^ permalink raw reply

* [PATCH v2 net-next] udp: Increment UDP_MIB_IGNOREDMULTI for arriving unmatched multicasts
From: Rick Jones @ 2014-11-04 23:47 UTC (permalink / raw)
  To: netdev; +Cc: davem


From: Rick Jones <rick.jones2@hp.com>

As NIC multicast filtering isn't perfect, and some platforms are
quite content to spew broadcasts, we should not trigger an event
for skb:kfree_skb when we do not have a match for such an incoming
datagram.  We do though want to avoid sweeping the matter under the
rug entirely, so increment a suitable statistic.

This incorporates feedback from David L. Stevens, Karl Neiss and Eric
Dumazet.

Signed-off-by: Rick Jones <rick.jones2@hp.com>

---

Noticed __udp4_lib_mcast_deliver showing-up in a perf dropped packet
profile on a system sitting on a network with a bunch of Windows boxes
sending what they are fond of sending.

Verified that the new UDP_MIB_IGNOREDMULTI increments when ignored
datagrams are encountered, but was unable to cross the i's and dot
the t's of perf because the perf built from the tree at the time
wasn't happy in general.  Also hit a test system with some netperf
multicast UDP_STREAM and UDP_RR testing but that is the extent of 
the testing performed.

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index df40137..30f541b 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -156,6 +156,7 @@ enum
 	UDP_MIB_RCVBUFERRORS,			/* RcvbufErrors */
 	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
 	UDP_MIB_CSUMERRORS,			/* InCsumErrors */
+	UDP_MIB_IGNOREDMULTI,			/* IgnoredMulti */
 	__UDP_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8e3eb39..5c5450c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -181,6 +181,7 @@ static const struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
+	SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cd0db54..1215f89 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1647,7 +1647,8 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				    struct udphdr  *uh,
 				    __be32 saddr, __be32 daddr,
-				    struct udp_table *udptable)
+				    struct udp_table *udptable,
+				    int proto)
 {
 	struct sock *sk, *stack[256 / sizeof(struct sock *)];
 	struct hlist_nulls_node *node;
@@ -1656,6 +1657,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	int dif = skb->dev->ifindex;
 	unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
 	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+	unsigned int inner_flushed = 0;
 
 	if (use_hash2) {
 		hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
@@ -1674,6 +1676,7 @@ start_lookup:
 					dif, hnum)) {
 			if (unlikely(count == ARRAY_SIZE(stack))) {
 				flush_stack(stack, count, skb, ~0);
+				inner_flushed = 1;
 				count = 0;
 			}
 			stack[count++] = sk;
@@ -1695,7 +1698,10 @@ start_lookup:
 	if (count) {
 		flush_stack(stack, count, skb, count - 1);
 	} else {
-		kfree_skb(skb);
+		if (!inner_flushed)
+			UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+					 proto == IPPROTO_UDPLITE);
+		consume_skb(skb);
 	}
 	return 0;
 }
@@ -1780,7 +1786,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	} else {
 		if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 			return __udp4_lib_mcast_deliver(net, skb, uh,
-					saddr, daddr, udptable);
+					saddr, daddr, udptable, proto);
 
 		sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 	}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 1752cd0..679253d0 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -136,6 +136,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
 	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
+	SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6ba535..d80f21e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -771,7 +771,7 @@ static void udp6_csum_zero_error(struct sk_buff *skb)
  */
 static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		const struct in6_addr *saddr, const struct in6_addr *daddr,
-		struct udp_table *udptable)
+		struct udp_table *udptable, int proto)
 {
 	struct sock *sk, *stack[256 / sizeof(struct sock *)];
 	const struct udphdr *uh = udp_hdr(skb);
@@ -781,6 +781,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	int dif = inet6_iif(skb);
 	unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
 	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
+	int inner_flushed = 0;
 
 	if (use_hash2) {
 		hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
@@ -803,6 +804,7 @@ start_lookup:
 		    (uh->check || udp_sk(sk)->no_check6_rx)) {
 			if (unlikely(count == ARRAY_SIZE(stack))) {
 				flush_stack(stack, count, skb, ~0);
+				inner_flushed = 1;
 				count = 0;
 			}
 			stack[count++] = sk;
@@ -821,7 +823,10 @@ start_lookup:
 	if (count) {
 		flush_stack(stack, count, skb, count - 1);
 	} else {
-		kfree_skb(skb);
+		if (!inner_flushed)
+			UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+					 proto == IPPROTO_UDPLITE);
+		consume_skb(skb);
 	}
 	return 0;
 }
@@ -873,7 +878,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	 */
 	if (ipv6_addr_is_multicast(daddr))
 		return __udp6_lib_mcast_deliver(net, skb,
-				saddr, daddr, udptable);
+				saddr, daddr, udptable, proto);
 
 	/* Unicast */
 

^ permalink raw reply related

* Atlas 2.5% End Of Year Promotion Offer!!
From: Atlas Finance Loans @ 2014-11-04 19:54 UTC (permalink / raw)

In-Reply-To: <1785258737.69546.1415130873710.JavaMail.root@ninhbinh.gov.vn>

[-- Attachment #1: Type: text/plain, Size: 0 bytes --]



[-- Attachment #2: Atlas 2.5% Loan Offer.docx --]
[-- Type: application/vnd.openxmlformats-officedocument.wordprocessingml.document, Size: 12096 bytes --]

^ permalink raw reply

* Re: [Patch net-next v2] neigh: remove dynamic neigh table registration support
From: Cong Wang @ 2014-11-05  0:38 UTC (permalink / raw)
  To: David Miller; +Cc: Linux Kernel Network Developers
In-Reply-To: <20141104.170251.950859328398615616.davem@davemloft.net>

On Tue, Nov 4, 2014 at 2:02 PM, David Miller <davem@davemloft.net> wrote:
> From: Cong Wang <xiyou.wangcong@gmail.com>
> Date: Mon,  3 Nov 2014 10:14:14 -0800
>
>> Currently there are only three neigh tables in the whole kernel:
>> arp table, ndisc table and decnet neigh table. What's more,
>> we don't support registering multiple tables per family.
>> Therefore we can just make these tables statically built-in.
>>
>> Cc: David S. Miller <davem@davemloft.net>
>> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
>> ---
>> v2: remove useless #ifdef's
>>     move the assignment to the end of neigh_table_init()
>
> neigh_table_clear should definitely NULL out the slot, otherwise
> we hold in there a pointer to module memory which is about to
> be released.

Good catch! I totally missed neigh_table_clear().

Thanks!

^ permalink raw reply

* [PATCH] ipvs: Keep skb->sk when allocating headroom on tunnel xmit
From: Calvin Owens @ 2014-11-05  0:37 UTC (permalink / raw)
  To: Simon Horman, Julian Anastasov, Wensong Zhang
  Cc: lvs-devel, linux-kernel, netdev, agartrell, kernel-team,
	Calvin Owens

ip_vs_prepare_tunneled_skb() ignores ->sk when allocating a new
skb, either unconditionally setting ->sk to NULL or allowing
the uninitialized ->sk from a newly allocated skb to leak through
to the caller.

This patch properly copies ->sk and increments its reference count.

Signed-off-by: Calvin Owens <calvinowens@fb.com>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 437a366..bd90bf8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
 		new_skb = skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb)
 			goto error;
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
 		consume_skb(skb);
 		skb = new_skb;
 	}
-- 
2.1.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox