Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v3 1/2] net: dsa: mv88e6xxx: Introduce _mv88e6xxx_phy_page_{read,write}
From: Patrick Uiterwijk @ 2016-03-30  1:39 UTC (permalink / raw)
  To: linux, davem, vivien.didelot, andrew
  Cc: netdev, dennis, pbrobinson, Patrick Uiterwijk

Add versions of the phy_page_read and _write functions to
be used in a context where the SMI mutex is held.

Tested-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: Patrick Uiterwijk <patrick@puiterwijk.org>
---
 drivers/net/dsa/mv88e6xxx.c | 49 +++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index fa086e0..86a2029 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2264,6 +2264,38 @@ static void mv88e6xxx_bridge_work(struct work_struct *work)
 	mutex_unlock(&ps->smi_mutex);
 }
 
+static int _mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page,
+				     int reg, int val)
+{
+	int ret;
+
+	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
+	if (ret < 0)
+		goto restore_page_0;
+
+	ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val);
+restore_page_0:
+	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+
+	return ret;
+}
+
+static int _mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page,
+				    int reg)
+{
+	int ret;
+
+	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
+	if (ret < 0)
+		goto restore_page_0;
+
+	ret = _mv88e6xxx_phy_read_indirect(ds, port, reg);
+restore_page_0:
+	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+
+	return ret;
+}
+
 static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2714,13 +2746,9 @@ int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg)
 	int ret;
 
 	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
-	if (ret < 0)
-		goto error;
-	ret = _mv88e6xxx_phy_read_indirect(ds, port, reg);
-error:
-	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+	ret = _mv88e6xxx_phy_page_read(ds, port, page, reg);
 	mutex_unlock(&ps->smi_mutex);
+
 	return ret;
 }
 
@@ -2731,14 +2759,9 @@ int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page,
 	int ret;
 
 	mutex_lock(&ps->smi_mutex);
-	ret = _mv88e6xxx_phy_write_indirect(ds, port, 0x16, page);
-	if (ret < 0)
-		goto error;
-
-	ret = _mv88e6xxx_phy_write_indirect(ds, port, reg, val);
-error:
-	_mv88e6xxx_phy_write_indirect(ds, port, 0x16, 0x0);
+	ret = _mv88e6xxx_phy_page_write(ds, port, page, reg, val);
 	mutex_unlock(&ps->smi_mutex);
+
 	return ret;
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next v3 2/2] net: dsa: mv88e6xxx: Clear the PDOWN bit on setup
From: Patrick Uiterwijk @ 2016-03-30  1:39 UTC (permalink / raw)
  To: linux, davem, vivien.didelot, andrew
  Cc: netdev, dennis, pbrobinson, Patrick Uiterwijk
In-Reply-To: <1459301981-26535-1-git-send-email-patrick@puiterwijk.org>

Some of the vendor-specific bootloaders set up this part
of the initialization for us, so this was never added.
However, since upstream bootloaders don't initialize the
chip specifically, they leave the fiber MII's PDOWN flag
set, which means that the CPU port doesn't connect.

This patch checks whether this flag has been clear prior
by something else, and if not make us clear it.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Patrick Uiterwijk <patrick@puiterwijk.org>
---
 drivers/net/dsa/mv88e6xxx.c | 36 ++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx.h |  8 ++++++++
 2 files changed, 44 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 86a2029..50454be 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2296,6 +2296,25 @@ restore_page_0:
 	return ret;
 }
 
+static int mv88e6xxx_power_on_serdes(struct dsa_switch *ds)
+{
+	int ret;
+
+	ret = _mv88e6xxx_phy_page_read(ds, REG_FIBER_SERDES, PAGE_FIBER_SERDES,
+				       MII_BMCR);
+	if (ret < 0)
+		return ret;
+
+	if (ret & BMCR_PDOWN) {
+		ret &= ~BMCR_PDOWN;
+		ret = _mv88e6xxx_phy_page_write(ds, REG_FIBER_SERDES,
+						PAGE_FIBER_SERDES, MII_BMCR,
+						ret);
+	}
+
+	return ret;
+}
+
 static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2399,6 +2418,23 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
 			goto abort;
 	}
 
+	/* If this port is connected to a SerDes, make sure the SerDes is not
+	 * powered down.
+	 */
+	if (mv88e6xxx_6352_family(ds)) {
+		ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS);
+		if (ret < 0)
+			goto abort;
+		ret &= PORT_STATUS_CMODE_MASK;
+		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
+		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
+		    (ret == PORT_STATUS_CMODE_SGMII)) {
+			ret = mv88e6xxx_power_on_serdes(ds);
+			if (ret < 0)
+				goto abort;
+		}
+	}
+
 	/* Port Control 2: don't force a good FCS, set the maximum frame size to
 	 * 10240 bytes, disable 802.1q tags checking, don't discard tagged or
 	 * untagged frames on this port, do a destination address lookup on all
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 9a038ab..26a424a 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -28,6 +28,10 @@
 #define SMI_CMD_OP_45_READ_DATA_INC	((3 << 10) | SMI_CMD_BUSY)
 #define SMI_DATA		0x01
 
+/* Fiber/SERDES Registers are located at SMI address F, page 1 */
+#define REG_FIBER_SERDES	0x0f
+#define PAGE_FIBER_SERDES	0x01
+
 #define REG_PORT(p)		(0x10 + (p))
 #define PORT_STATUS		0x00
 #define PORT_STATUS_PAUSE_EN	BIT(15)
@@ -45,6 +49,10 @@
 #define PORT_STATUS_MGMII	BIT(6) /* 6185 */
 #define PORT_STATUS_TX_PAUSED	BIT(5)
 #define PORT_STATUS_FLOW_CTRL	BIT(4)
+#define PORT_STATUS_CMODE_MASK	0x0f
+#define PORT_STATUS_CMODE_100BASE_X	0x8
+#define PORT_STATUS_CMODE_1000BASE_X	0x9
+#define PORT_STATUS_CMODE_SGMII		0xa
 #define PORT_PCS_CTRL		0x01
 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK	BIT(15)
 #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK	BIT(14)
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 0/5] macb: Codingstyle cleanups
From: Moritz Fischer @ 2016-03-30  2:11 UTC (permalink / raw)
  To: nicolas.ferre
  Cc: michal.simek, joe, davem, netdev, linux-kernel,
	moritz.fischer.private, Moritz Fischer

Hi all,

resending almost unchanged v2 here:

Changes from v2:
* Rebased onto net-next
* Changed 5th patches commit message
* Added Nicholas' and Michal's Acked-Bys

Changes from v1:
* Backed out variable scope changes
* Separated out ether_addr_copy into it's own commit
* Fixed typo in comments as suggested by Joe

Cheers,

    Moritz

Moritz Fischer (5):
  net: macb: Fix coding style error message
  net: macb: Fix coding style warnings
  net: macb: Fix coding style suggestions
  net: macb: Use ether_addr_copy over memcpy
  net: macb: Fix simple typo

 drivers/net/ethernet/cadence/macb.c | 152 ++++++++++++++++--------------------
 1 file changed, 69 insertions(+), 83 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH v3 1/5] net: macb: Fix coding style error message
From: Moritz Fischer @ 2016-03-30  2:11 UTC (permalink / raw)
  To: nicolas.ferre
  Cc: michal.simek, joe, davem, netdev, linux-kernel,
	moritz.fischer.private, Moritz Fischer
In-Reply-To: <1459303875-3362-1-git-send-email-moritz.fischer@ettus.com>

checkpatch.pl gave the following error:

ERROR: space required before the open parenthesis '('
+	for(; p < end; p++, offset += 4)

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Moritz Fischer <moritz.fischer@ettus.com>
---
 drivers/net/ethernet/cadence/macb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 48a7d7d..b5aa96e 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -499,7 +499,7 @@ static void macb_update_stats(struct macb *bp)
 
 	WARN_ON((unsigned long)(end - p - 1) != (MACB_TPF - MACB_PFR) / 4);
 
-	for(; p < end; p++, offset += 4)
+	for (; p < end; p++, offset += 4)
 		*p += bp->macb_reg_readl(bp, offset);
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 2/5] net: macb: Fix coding style warnings
From: Moritz Fischer @ 2016-03-30  2:11 UTC (permalink / raw)
  To: nicolas.ferre
  Cc: michal.simek, joe, davem, netdev, linux-kernel,
	moritz.fischer.private, Moritz Fischer
In-Reply-To: <1459303875-3362-1-git-send-email-moritz.fischer@ettus.com>

This commit takes care of the coding style warnings
that are mostly due to a different comment style and
lines over 80 chars, as well as a dangling else.

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Moritz Fischer <moritz.fischer@ettus.com>
---
 drivers/net/ethernet/cadence/macb.c | 100 +++++++++++++++---------------------
 1 file changed, 42 insertions(+), 58 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index b5aa96e..f25681b 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -61,8 +61,7 @@
 #define MACB_WOL_HAS_MAGIC_PACKET	(0x1 << 0)
 #define MACB_WOL_ENABLED		(0x1 << 1)
 
-/*
- * Graceful stop timeouts in us. We should allow up to
+/* Graceful stop timeouts in us. We should allow up to
  * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions)
  */
 #define MACB_HALT_TIMEOUT	1230
@@ -130,8 +129,7 @@ static void hw_writel(struct macb *bp, int offset, u32 value)
 	writel_relaxed(value, bp->regs + offset);
 }
 
-/*
- * Find the CPU endianness by using the loopback bit of NCR register. When the
+/* Find the CPU endianness by using the loopback bit of NCR register. When the
  * CPU is in big endian we need to program swaped mode for management
  * descriptor access.
  */
@@ -386,7 +384,8 @@ static int macb_mii_probe(struct net_device *dev)
 
 	pdata = dev_get_platdata(&bp->pdev->dev);
 	if (pdata && gpio_is_valid(pdata->phy_irq_pin)) {
-		ret = devm_gpio_request(&bp->pdev->dev, pdata->phy_irq_pin, "phy int");
+		ret = devm_gpio_request(&bp->pdev->dev, pdata->phy_irq_pin,
+					"phy int");
 		if (!ret) {
 			phy_irq = gpio_to_irq(pdata->phy_irq_pin);
 			phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq;
@@ -452,7 +451,8 @@ static int macb_mii_init(struct macb *bp)
 		err = of_mdiobus_register(bp->mii_bus, np);
 
 		/* fallback to standard phy registration if no phy were
-		   found during dt phy registration */
+		 * found during dt phy registration
+		 */
 		if (!err && !phy_find_first(bp->mii_bus)) {
 			for (i = 0; i < PHY_MAX_ADDR; i++) {
 				struct phy_device *phydev;
@@ -567,8 +567,7 @@ static void macb_tx_error_task(struct work_struct *work)
 	/* Make sure nobody is trying to queue up new packets */
 	netif_tx_stop_all_queues(bp->dev);
 
-	/*
-	 * Stop transmission now
+	/* Stop transmission now
 	 * (in case we have just queued new packets)
 	 * macb/gem must be halted to write TBQP register
 	 */
@@ -576,8 +575,7 @@ static void macb_tx_error_task(struct work_struct *work)
 		/* Just complain for now, reinitializing TX path can be good */
 		netdev_err(bp->dev, "BUG: halt tx timed out\n");
 
-	/*
-	 * Treat frames in TX queue including the ones that caused the error.
+	/* Treat frames in TX queue including the ones that caused the error.
 	 * Free transmit buffers in upper layer.
 	 */
 	for (tail = queue->tx_tail; tail != queue->tx_head; tail++) {
@@ -607,10 +605,9 @@ static void macb_tx_error_task(struct work_struct *work)
 				bp->stats.tx_bytes += skb->len;
 			}
 		} else {
-			/*
-			 * "Buffers exhausted mid-frame" errors may only happen
-			 * if the driver is buggy, so complain loudly about those.
-			 * Statistics are updated by hardware.
+			/* "Buffers exhausted mid-frame" errors may only happen
+			 * if the driver is buggy, so complain loudly about
+			 * those. Statistics are updated by hardware.
 			 */
 			if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED))
 				netdev_err(bp->dev,
@@ -722,7 +719,8 @@ static void gem_rx_refill(struct macb *bp)
 	struct sk_buff		*skb;
 	dma_addr_t		paddr;
 
-	while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail, RX_RING_SIZE) > 0) {
+	while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail,
+			  RX_RING_SIZE) > 0) {
 		entry = macb_rx_ring_wrap(bp->rx_prepared_head);
 
 		/* Make hw descriptor updates visible to CPU */
@@ -741,7 +739,8 @@ static void gem_rx_refill(struct macb *bp)
 
 			/* now fill corresponding descriptor entry */
 			paddr = dma_map_single(&bp->pdev->dev, skb->data,
-					       bp->rx_buffer_size, DMA_FROM_DEVICE);
+					       bp->rx_buffer_size,
+					       DMA_FROM_DEVICE);
 			if (dma_mapping_error(&bp->pdev->dev, paddr)) {
 				dev_kfree_skb(skb);
 				break;
@@ -777,14 +776,14 @@ static void discard_partial_frame(struct macb *bp, unsigned int begin,
 
 	for (frag = begin; frag != end; frag++) {
 		struct macb_dma_desc *desc = macb_rx_desc(bp, frag);
+
 		desc->addr &= ~MACB_BIT(RX_USED);
 	}
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
-	/*
-	 * When this happens, the hardware stats registers for
+	/* When this happens, the hardware stats registers for
 	 * whatever caused this is updated, so we don't have to record
 	 * anything.
 	 */
@@ -883,8 +882,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 		macb_rx_ring_wrap(first_frag),
 		macb_rx_ring_wrap(last_frag), len);
 
-	/*
-	 * The ethernet header starts NET_IP_ALIGN bytes into the
+	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
 	 * payload word-aligned.
 	 *
@@ -1099,8 +1097,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 			    (unsigned long)status);
 
 		if (status & MACB_RX_INT_FLAGS) {
-			/*
-			 * There's no point taking any more interrupts
+			/* There's no point taking any more interrupts
 			 * until we have processed the buffers. The
 			 * scheduling call may fail if the poll routine
 			 * is already scheduled, so disable interrupts
@@ -1129,8 +1126,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		if (status & MACB_BIT(TCOMP))
 			macb_tx_interrupt(queue);
 
-		/*
-		 * Link change detection isn't possible with RMII, so we'll
+		/* Link change detection isn't possible with RMII, so we'll
 		 * add that if/when we get our hands on a full-blown MII PHY.
 		 */
 
@@ -1161,8 +1157,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		}
 
 		if (status & MACB_BIT(HRESP)) {
-			/*
-			 * TODO: Reset the hardware, and maybe move the
+			/* TODO: Reset the hardware, and maybe move the
 			 * netdev_err to a lower-priority context as well
 			 * (work queue?)
 			 */
@@ -1181,8 +1176,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-/*
- * Polling receive - used by netconsole and other diagnostic tools
+/* Polling receive - used by netconsole and other diagnostic tools
  * to allow network i/o with interrupts disabled.
  */
 static void macb_poll_controller(struct net_device *dev)
@@ -1478,10 +1472,10 @@ static int gem_alloc_rx_buffers(struct macb *bp)
 	bp->rx_skbuff = kzalloc(size, GFP_KERNEL);
 	if (!bp->rx_skbuff)
 		return -ENOMEM;
-	else
-		netdev_dbg(bp->dev,
-			   "Allocated %d RX struct sk_buff entries at %p\n",
-			   RX_RING_SIZE, bp->rx_skbuff);
+
+	netdev_dbg(bp->dev,
+		   "Allocated %d RX struct sk_buff entries at %p\n",
+		   RX_RING_SIZE, bp->rx_skbuff);
 	return 0;
 }
 
@@ -1494,10 +1488,10 @@ static int macb_alloc_rx_buffers(struct macb *bp)
 					    &bp->rx_buffers_dma, GFP_KERNEL);
 	if (!bp->rx_buffers)
 		return -ENOMEM;
-	else
-		netdev_dbg(bp->dev,
-			   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
-			   size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers);
+
+	netdev_dbg(bp->dev,
+		   "Allocated RX buffers of %d bytes at %08lx (mapped %p)\n",
+		   size, (unsigned long)bp->rx_buffers_dma, bp->rx_buffers);
 	return 0;
 }
 
@@ -1588,8 +1582,7 @@ static void macb_reset_hw(struct macb *bp)
 	struct macb_queue *queue;
 	unsigned int q;
 
-	/*
-	 * Disable RX and TX (XXX: Should we halt the transmission
+	/* Disable RX and TX (XXX: Should we halt the transmission
 	 * more gracefully?)
 	 */
 	macb_writel(bp, NCR, 0);
@@ -1652,8 +1645,7 @@ static u32 macb_mdc_clk_div(struct macb *bp)
 	return config;
 }
 
-/*
- * Get the DMA bus width field of the network configuration register that we
+/* Get the DMA bus width field of the network configuration register that we
  * should program.  We find the width from decoding the design configuration
  * register to find the maximum supported data bus width.
  */
@@ -1673,8 +1665,7 @@ static u32 macb_dbw(struct macb *bp)
 	}
 }
 
-/*
- * Configure the receive DMA engine
+/* Configure the receive DMA engine
  * - use the correct receive buffer size
  * - set best burst length for DMA operations
  *   (if not supported by FIFO, it will fallback to default)
@@ -1762,8 +1753,7 @@ static void macb_init_hw(struct macb *bp)
 	macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE));
 }
 
-/*
- * The hash address register is 64 bits long and takes up two
+/* The hash address register is 64 bits long and takes up two
  * locations in the memory map.  The least significant bits are stored
  * in EMAC_HSL and the most significant bits in EMAC_HSH.
  *
@@ -1803,9 +1793,7 @@ static inline int hash_bit_value(int bitnr, __u8 *addr)
 	return 0;
 }
 
-/*
- * Return the hash index value for the specified address.
- */
+/* Return the hash index value for the specified address. */
 static int hash_get_index(__u8 *addr)
 {
 	int i, j, bitval;
@@ -1821,9 +1809,7 @@ static int hash_get_index(__u8 *addr)
 	return hash_index;
 }
 
-/*
- * Add multicast addresses to the internal multicast-hash table.
- */
+/* Add multicast addresses to the internal multicast-hash table. */
 static void macb_sethashtable(struct net_device *dev)
 {
 	struct netdev_hw_addr *ha;
@@ -1842,9 +1828,7 @@ static void macb_sethashtable(struct net_device *dev)
 	macb_or_gem_writel(bp, HRT, mc_filter[1]);
 }
 
-/*
- * Enable/Disable promiscuous and multicast modes.
- */
+/* Enable/Disable promiscuous and multicast modes. */
 static void macb_set_rx_mode(struct net_device *dev)
 {
 	unsigned long cfg;
@@ -2161,9 +2145,8 @@ static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 
 	if (!(bp->caps & MACB_CAPS_USRIO_DISABLED))
 		regs_buff[12] = macb_or_gem_readl(bp, USRIO);
-	if (macb_is_gem(bp)) {
+	if (macb_is_gem(bp))
 		regs_buff[13] = gem_readl(bp, DMACFG);
-	}
 }
 
 static void macb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
@@ -2286,11 +2269,11 @@ static const struct net_device_ops macb_netdev_ops = {
 	.ndo_set_features	= macb_set_features,
 };
 
-/*
- * Configure peripheral capabilities according to device tree
+/* Configure peripheral capabilities according to device tree
  * and integration options used
  */
-static void macb_configure_caps(struct macb *bp, const struct macb_config *dt_conf)
+static void macb_configure_caps(struct macb *bp,
+				const struct macb_config *dt_conf)
 {
 	u32 dcfg;
 
@@ -2996,6 +2979,7 @@ static int macb_probe(struct platform_device *pdev)
 	phy_node =  of_get_next_available_child(np, NULL);
 	if (phy_node) {
 		int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0);
+
 		if (gpio_is_valid(gpio)) {
 			bp->reset_gpio = gpio_to_desc(gpio);
 			gpiod_direction_output(bp->reset_gpio, 1);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 3/5] net: macb: Fix coding style suggestions
From: Moritz Fischer @ 2016-03-30  2:11 UTC (permalink / raw)
  To: nicolas.ferre
  Cc: michal.simek, joe, davem, netdev, linux-kernel,
	moritz.fischer.private, Moritz Fischer
In-Reply-To: <1459303875-3362-1-git-send-email-moritz.fischer@ettus.com>

This commit deals with a bunch of checkpatch suggestions
that without changing behavior make checkpatch happier.

Acked-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Moritz Fischer <moritz.fischer@ettus.com>
---
 drivers/net/ethernet/cadence/macb.c | 46 +++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index f25681b..2ba0934 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -187,7 +187,7 @@ static void macb_get_hwaddr(struct macb *bp)
 
 	pdata = dev_get_platdata(&bp->pdev->dev);
 
-	/* Check all 4 address register for vaild address */
+	/* Check all 4 address register for valid address */
 	for (i = 0; i < 4; i++) {
 		bottom = macb_or_gem_readl(bp, SA1B + i * 8);
 		top = macb_or_gem_readl(bp, SA1T + i * 8);
@@ -295,7 +295,7 @@ static void macb_set_tx_clk(struct clk *clk, int speed, struct net_device *dev)
 	ferr = DIV_ROUND_UP(ferr, rate / 100000);
 	if (ferr > 5)
 		netdev_warn(dev, "unable to generate target frequency: %ld Hz\n",
-				rate);
+			    rate);
 
 	if (clk_set_rate(clk, rate_rounded))
 		netdev_err(dev, "adjusting tx_clk failed.\n");
@@ -429,7 +429,7 @@ static int macb_mii_init(struct macb *bp)
 	macb_writel(bp, NCR, MACB_BIT(MPE));
 
 	bp->mii_bus = mdiobus_alloc();
-	if (bp->mii_bus == NULL) {
+	if (!bp->mii_bus) {
 		err = -ENOMEM;
 		goto err_out;
 	}
@@ -438,7 +438,7 @@ static int macb_mii_init(struct macb *bp)
 	bp->mii_bus->read = &macb_mdio_read;
 	bp->mii_bus->write = &macb_mdio_write;
 	snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
-		bp->pdev->name, bp->pdev->id);
+		 bp->pdev->name, bp->pdev->id);
 	bp->mii_bus->priv = bp;
 	bp->mii_bus->parent = &bp->dev->dev;
 	pdata = dev_get_platdata(&bp->pdev->dev);
@@ -659,7 +659,7 @@ static void macb_tx_interrupt(struct macb_queue *queue)
 		queue_writel(queue, ISR, MACB_BIT(TCOMP));
 
 	netdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n",
-		(unsigned long)status);
+		    (unsigned long)status);
 
 	head = queue->tx_head;
 	for (tail = queue->tx_tail; tail != head; tail++) {
@@ -728,10 +728,10 @@ static void gem_rx_refill(struct macb *bp)
 
 		bp->rx_prepared_head++;
 
-		if (bp->rx_skbuff[entry] == NULL) {
+		if (!bp->rx_skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
 			skb = netdev_alloc_skb(bp->dev, bp->rx_buffer_size);
-			if (unlikely(skb == NULL)) {
+			if (unlikely(!skb)) {
 				netdev_err(bp->dev,
 					   "Unable to allocate sk_buff\n");
 				break;
@@ -765,7 +765,7 @@ static void gem_rx_refill(struct macb *bp)
 	wmb();
 
 	netdev_vdbg(bp->dev, "rx ring: prepared head %d, tail %d\n",
-		   bp->rx_prepared_head, bp->rx_tail);
+		    bp->rx_prepared_head, bp->rx_tail);
 }
 
 /* Mark DMA descriptors from begin up to and not including end as unused */
@@ -879,8 +879,8 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	len = desc->ctrl & bp->rx_frm_len_mask;
 
 	netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
-		macb_rx_ring_wrap(first_frag),
-		macb_rx_ring_wrap(last_frag), len);
+		    macb_rx_ring_wrap(first_frag),
+		    macb_rx_ring_wrap(last_frag), len);
 
 	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
@@ -922,7 +922,8 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 			frag_len = len - offset;
 		}
 		skb_copy_to_linear_data_offset(skb, offset,
-				macb_rx_buffer(bp, frag), frag_len);
+					       macb_rx_buffer(bp, frag),
+					       frag_len);
 		offset += bp->rx_buffer_size;
 		desc = macb_rx_desc(bp, frag);
 		desc->addr &= ~MACB_BIT(RX_USED);
@@ -940,7 +941,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	bp->stats.rx_packets++;
 	bp->stats.rx_bytes += skb->len;
 	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
-		   skb->len, skb->csum);
+		    skb->len, skb->csum);
 	netif_receive_skb(skb);
 
 	return 0;
@@ -1047,7 +1048,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
 	work_done = 0;
 
 	netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
-		   (unsigned long)status, budget);
+		    (unsigned long)status, budget);
 
 	work_done = bp->macbgem_ops.mog_rx(bp, budget);
 	if (work_done < budget) {
@@ -1262,7 +1263,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 	}
 
 	/* Should never happen */
-	if (unlikely(tx_skb == NULL)) {
+	if (unlikely(!tx_skb)) {
 		netdev_err(bp->dev, "BUG! empty skb!\n");
 		return 0;
 	}
@@ -1332,16 +1333,16 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 	netdev_vdbg(bp->dev,
-		   "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
-		   queue_index, skb->len, skb->head, skb->data,
-		   skb_tail_pointer(skb), skb_end_pointer(skb));
+		    "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
+		    queue_index, skb->len, skb->head, skb->data,
+		    skb_tail_pointer(skb), skb_end_pointer(skb));
 	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
 		       skb->data, 16, true);
 #endif
 
 	/* Count how many TX buffer descriptors are needed to send this
 	 * socket buffer: skb fragments of jumbo frames may need to be
-	 * splitted into many buffer descriptors.
+	 * split into many buffer descriptors.
 	 */
 	count = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
 	nr_frags = skb_shinfo(skb)->nr_frags;
@@ -1392,8 +1393,8 @@ static void macb_init_rx_buffer_size(struct macb *bp, size_t size)
 
 		if (bp->rx_buffer_size % RX_BUFFER_MULTIPLE) {
 			netdev_dbg(bp->dev,
-				    "RX buffer must be multiple of %d bytes, expanding\n",
-				    RX_BUFFER_MULTIPLE);
+				   "RX buffer must be multiple of %d bytes, expanding\n",
+				   RX_BUFFER_MULTIPLE);
 			bp->rx_buffer_size =
 				roundup(bp->rx_buffer_size, RX_BUFFER_MULTIPLE);
 		}
@@ -1416,7 +1417,7 @@ static void gem_free_rx_buffers(struct macb *bp)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		skb = bp->rx_skbuff[i];
 
-		if (skb == NULL)
+		if (!skb)
 			continue;
 
 		desc = &bp->rx_ring[i];
@@ -1817,7 +1818,8 @@ static void macb_sethashtable(struct net_device *dev)
 	unsigned int bitnr;
 	struct macb *bp = netdev_priv(dev);
 
-	mc_filter[0] = mc_filter[1] = 0;
+	mc_filter[0] = 0;
+	mc_filter[1] = 0;
 
 	netdev_for_each_mc_addr(ha, dev) {
 		bitnr = hash_get_index(ha->addr);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 5/5] net: macb: Fix simple typo
From: Moritz Fischer @ 2016-03-30  2:11 UTC (permalink / raw)
  To: nicolas.ferre
  Cc: michal.simek, joe, davem, netdev, linux-kernel,
	moritz.fischer.private, Moritz Fischer
In-Reply-To: <1459303875-3362-1-git-send-email-moritz.fischer@ettus.com>

Acked-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Moritz Fischer <moritz.fischer@ettus.com>
---
 drivers/net/ethernet/cadence/macb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 01a8ffb..eec3200 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -130,7 +130,7 @@ static void hw_writel(struct macb *bp, int offset, u32 value)
 }
 
 /* Find the CPU endianness by using the loopback bit of NCR register. When the
- * CPU is in big endian we need to program swaped mode for management
+ * CPU is in big endian we need to program swapped mode for management
  * descriptor access.
  */
 static bool hw_is_native_io(void __iomem *addr)
-- 
2.7.4

^ permalink raw reply related

* Re: [net-next 02/16] i40e/i40evf: Rewrite logic for 8 descriptor per packet check
From: Alexander Duyck @ 2016-03-30  2:20 UTC (permalink / raw)
  To: Jesse Brandeburg
  Cc: Jeff Kirsher, Dave Miller, Alexander Duyck, NetDEV list,
	Neil Horman, sassmann, John Greene, Jesse Brandeburg
In-Reply-To: <CAEuXFEwdc1VZ6bcnMy1sqFvm=tr3zDwauW6oW8WO77F9oeERcA@mail.gmail.com>

On Tue, Mar 29, 2016 at 5:34 PM, Jesse Brandeburg
<jesse.brandeburg@gmail.com> wrote:
> stupid gmail... sent again to the list, sorry for duplicate (but
> formatted better this time)
>
>
> Hey Alex, this patch appears to have caused a regression (probably both
> i40e/i40evf).  Easily reproducible running rds-stress (see the thread titled
> "i40e card Tx resets", the middle post by sowmini has the repro steps,
> ignore the pktgen discussion in the thread.)

I'll see about setting up rds tomorrow.  It should be pretty straight forward.

> I've spent some time trying to figure out the right fix, but keep getting
> stuck in the complicated logic of the function, so I'm sending this in the
> hope you'll take a look.
>
> This is (one example of) the skb that doesn't get linearized:
> skb_print_bits:  > headlen=114 datalen=33824 data=238 mac=238 nh=252
> h=272 gso=1448 frag=17

So the code as I had written it up should be verifying we use no more
than 8 descriptors as that was what the data sheet states.  You might
want to request a spec update if the hardware only supports 7 data
descriptors per segment for a TSO as that isn't what is in the
documentation.

> skb_print_bits: frag[0]: len: 256
> skb_print_bits: frag[1]: len: 48
> skb_print_bits: frag[2]: len: 256
> skb_print_bits: frag[3]: len: 48
> skb_print_bits: frag[4]: len: 256
> skb_print_bits: frag[5]: len: 48
> skb_print_bits: frag[6]: len: 4096
>
> This descriptor ^^^ is the 8th, I believe the hardware mechanism faults on
> this input.

Is this something that was previously being linearized?  It doesn't
seem like it would be.  We are only 8 descriptors in and that 7th
fragment should have flushed the count back to 1 with a remainder.

If you are wanting to trigger a linearize with the updated code you
would just need to update 2 lines.  Replace "I40E_MAX_BUFFER_TXD - 1"
with just I40E_MAX_BUFFER_TXD" in the line where we subtract that
value from nr_frags.  Then remove one of the "sum +=
skb_frag_size(++frag);" lines.  That should update the code so that
you only evaluate 5 buffers at a time instead of 6.  It should force
things to coalesce if we would span 8 or more descriptors instead of 9
or more which is what the code currently does based on what was
required in the EAS.

> I added a print of the sum at each point it is subtracted or added to after
> initially being set, sum7/8 are in the for loop.
>
> skb_print_bits: frag[7]: len: 4096
> skb_print_bits: frag[8]: len: 48
> skb_print_bits: frag[9]: len: 4096
> skb_print_bits: frag[10]: len: 4096
> skb_print_bits: frag[11]: len: 48
> skb_print_bits: frag[12]: len: 4096
> skb_print_bits: frag[13]: len: 4096
> skb_print_bits: frag[14]: len: 48
> skb_print_bits: frag[15]: len: 4096
> skb_print_bits: frag[16]: len: 4096
> __i40e_chk_linearize: sum1: -1399
> __i40e_chk_linearize: sum2: -1143
> __i40e_chk_linearize: sum3: -1095
> __i40e_chk_linearize: sum4: -839
> __i40e_chk_linearize: sum5: -791
> __i40e_chk_linearize: sum7: 3305
> __i40e_chk_linearize: sum8: 3257
> __i40e_chk_linearize: sum7: 7353
> __i40e_chk_linearize: sum8: 7097
> __i40e_chk_linearize: sum7: 7145
> __i40e_chk_linearize: sum8: 7097
> __i40e_chk_linearize: sum7: 11193
> __i40e_chk_linearize: sum8: 10937
> __i40e_chk_linearize: sum7: 15033
> __i40e_chk_linearize: sum8: 14985
> __i40e_chk_linearize: sum7: 15033
>
> This was the descriptors generated from the above:
> d[054] = 0x0000000000000000 0x16a0211400000011
> d[055] = 0x0000000bfbaa40ee 0x000001ca02871640

len 72

> d[056] = 0x0000000584bcd000 0x0000040202871640

len 256

> d[057] = 0x0000000c0bfea9d0 0x000000c202871640

len 48

> d[058] = 0x0000000584bcd100 0x0000040202871640

len 256

> d[059] = 0x0000000c0bfeaa00 0x000000c202871640

len 48

> d[05a] = 0x0000000584bcd200 0x0000040202871640

len 256

> d[05b] = 0x0000000c0bfeaa30 0x000000c202871640

len 48

> d[05c] = 0x000000056d5f0000 0x0000400202871640

len 4096

>From what I can see we are at 8 descriptors and have exceeded 1 MSS.
According to the EAS this is supposed to work.

> d[05d] = 0x000000056d5f1000 0x0000400202871640
> d[05e] = 0x0000000c0bfeaa60 0x000000c202871640
> d[05f] = 0x00000005f2762000 0x0000400202871640
> d[060] = 0x00000005f765e000 0x0000400202871640
> d[061] = 0x0000000c0bfeaa90 0x000000c202871640
> d[062] = 0x0000000574928000 0x0000400202871640
> d[063] = 0x0000000568ba5000 0x0000400202871640
> d[064] = 0x0000000c0bfeaac0 0x000000c202871640
> d[065] = 0x00000005f68cd000 0x0000400202871640
> d[066] = 0x0000000585a2a000 0x0000400202871670
>
>
> On Fri, Feb 19, 2016 at 3:54 AM Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> wrote:
>>
>> From: Alexander Duyck <aduyck@mirantis.com>
>>
>> This patch is meant to rewrite the logic for how we determine if we can
>> transmit the frame or if it needs to be linearized.
>>
>> +       /* Initialize size to the negative value of gso_size minus 1.  We
>> +        * use this as the worst case scenerio in which the frag ahead
>> +        * of us only provides one byte which is why we are limited to 6
>> +        * descriptors for a single transmit as the header and previous
>> +        * fragment are already consuming 2 descriptors.
>> +        */
>> +       sum = 1 - gso_size;
>> +
>> +       /* Add size of frags 1 through 5 to create our initial sum */
>> +       sum += skb_frag_size(++frag);
>
>
> I'm pretty sure this code skips frag[0] due to the pre-increment, the bug
> seems to occur in the algorithm because skb->data contains L2/L3/L4 header
> plus some data, and should counts as 1 descriptor for every subsequent sent
> packet, and if the incoming skb has 7 chunks that add up to < MSS then we
> get in trouble.

Yep we were skipping frag 0 on purpose.  There is a comment earlier
that basically states that frag 0 cannot borrow from an earlier
descriptor.  The assumption is that if we have a frag it contains at
least 1 byte.  So if the 6 frags following the first one do not
provide at least gso_size - 1 in data the packet needs to be
linearized.

The code as written was meant to cap us at 8 data descriptors per
packet.  It sounds like we are wanting to reduce that to 7 if I
understand what you are describing correctly.

>> +       sum += skb_frag_size(++frag);
>> +       sum += skb_frag_size(++frag);
>> +       sum += skb_frag_size(++frag);
>> +       sum += skb_frag_size(++frag);
>> +
>> +       /* Walk through fragments adding latest fragment, testing it, and
>> +        * then removing stale fragments from the sum.
>> +        */
>> +       stale = &skb_shinfo(skb)->frags[0];
>> +       for (;;) {
>> +               sum += skb_frag_size(++frag);
>> +
>> +               /* if sum is negative we failed to make sufficient progress */
>> +               if (sum < 0)
>> +                       return true;
>> +
>> +               /* use pre-decrement to avoid processing last fragment */
>> +               if (!--nr_frags)
>> +                       break;
>> +
>> +               sum -= skb_frag_size(++stale);
>
>
> I think this line also skips stale[0]

Right we skip frag 0 and the last fragment in the packet.  The general
idea was that we were preventing us from going over 8 descriptors per
packet so I was only checking 6 fragments starting at the second and
as long as we completed one MSS in those 6 descriptors we were
guaranteed to complete a packet in less than 8 total descriptors.

^ permalink raw reply

* [PATCHv2 net] team: team should sync the port's uc/mc addrs when add a port
From: Xin Long @ 2016-03-30  2:58 UTC (permalink / raw)
  To: network dev; +Cc: davem, Jiri Pirko, Marcelo Ricardo Leitner, Cong Wang

There is an issue when we use mavtap over team:
When we replug nic links from team0, the real nics's mc list will not
include the maddr for macvtap any more. then we can't receive pkts to
macvtap device, as they are filterred by mc list of nic.

In Bonding Driver, it syncs the uc/mc addrs in bond_enslave().

We will fix this issue on team by adding the port's uc/mc addrs sync in
team_port_add.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 drivers/net/team/team.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 26c64d2..a0f64cb 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1198,6 +1198,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_dev_open;
 	}
 
+	dev_uc_sync_multiple(port_dev, dev);
+	dev_mc_sync_multiple(port_dev, dev);
+
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
 		netdev_err(dev, "Failed to add vlan ids to device %s\n",
@@ -1261,6 +1264,8 @@ err_enable_netpoll:
 	vlan_vids_del_by_dev(port_dev, dev);
 
 err_vids_add:
+	dev_uc_unsync(port_dev, dev);
+	dev_mc_unsync(port_dev, dev);
 	dev_close(port_dev);
 
 err_dev_open:
-- 
2.1.0

^ permalink raw reply related

* Re: [PATCH] ethernet: mvneta: Support netpoll
From: David Miller @ 2016-03-30  3:09 UTC (permalink / raw)
  To: ezequiel; +Cc: thomas.petazzoni, netdev
In-Reply-To: <1459197678-12022-1-git-send-email-ezequiel@vanguardiasur.com.ar>

From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Date: Mon, 28 Mar 2016 17:41:18 -0300

> +/* Polled functionality used by netconsole and others in non interrupt mode */
> +static void mvneta_poll_controller(struct net_device *dev)
> +{
> +	struct mvneta_port *pp = netdev_priv(dev);
> +
> +	on_each_cpu(mvneta_percpu_poll_controller, pp, false);
> +}

This doesn't work.

netpoll may be invoked from any context whatsoever, even hardware
interrupt handlers or contexts where cpu interrupts are disabled.

smp_call_function() and thus on_each_cpu() may not be called with
disabled interrupts or from a hardware interrupt handler or from
a bottom half handler, all of which are valid situations where
netpoll may occur since printk's can occur anywhere.

^ permalink raw reply

* Re: [PATCH net] net: ipv4: Multipath needs to handle unreachable nexthops
From: David Ahern @ 2016-03-30  3:16 UTC (permalink / raw)
  To: Julian Anastasov; +Cc: netdev
In-Reply-To: <alpine.LFD.2.11.1603251024390.1896@ja.home.ssi.bg>

On 3/25/16 4:05 AM, Julian Anastasov wrote:
>
> 	Hello,
>
> On Thu, 24 Mar 2016, David Ahern wrote:
>
>> On 3/24/16 4:33 PM, Julian Anastasov wrote:
>>> 	But for multipath routes we can also consider the
>>> nexthops as "alternatives", so it depends on how one uses
>>> the multipath mechanism. The ability to fallback to
>>> another nexthop assumes one connection is allowed to
>>> move from one ISP to another. What if the second ISP
>>> decides to reject the connection? What we have is a
>>> broken connection just because the retransmits
>>> were diverted to wrong place in the hurry. So, the
>>> nexthops can be compatible or incompatible. For your
>>> setup they are, for others they are not.
>>
>> I am not sure I completely understand your point. Are you saying that within a
>> single multipath route some connections to nexthops are allowed and others are
>> not?
>>
>> So to put that paragraph into an example
>>
>> 15.0.0.0/16
>> 	nexthop via 12.0.0.2  dev swp1 weight 1
>> 	nexthop via 12.0.0.3  dev swp1 weight 1
>>
>> Hosts from 15.0/16 could have TCP connections use 12.0.0.2, but not 12.0.0.3
>> because 12.0.0.3 could be a different ISP and not allow TCP connections from
>> this address space?
>
> 	Yes. Two cases are possible:
>
> 1. ISP2 filters saddr, traffic with saddr from ISP1 is dropped.
>
> 2. ISP2 allows any saddr. But how the responses from
> world with daddr=IP_from_ISP1 will come from ISP2 link?
> If the nexthops are for different ISP the connection
> can survive only if sticks to its ISP. An ISP will
> not work as a backup link for another ISP.

Seems to me this is a problem that is addressed by VRFs, not multipath 
routes where some nexthops are actually deadends because they attempt to 
cross ISPs.


>> After that if it has information that says that a nexthop is dead, why would
>> it continue to try to probe? Any traffic that selects that nh is dead. That to
>
> 	If entry becomes FAILED this state is preserved
> if we do not direct traffic to this entry. If there was a
> single connection that was rejected after 3 failed probes
> the next connection (with your patch) will fallback to
> another neigh and the first entry will remain in FAILED
> state until expiration. If one wants to refresh the state
> often, a script/tool that pings all GWs is needed, so that
> you can notice the available or failed paths faster.
>
>> me defies the basis of having multiple paths.
>
> 	We do not know how long is the outage. Long living
> connections may prefer to survive with retransmits.
> Say you are using SSH via wifi link doing important work.
> Do you want your connection to break just because link was
> down for a while?

neighbor entries have a timeout and when it drops from the cache the arp 
will try again. This suggested patch is not saying 'never try a nexthop 
again' it is saying 'I have multiple paths and since path 1 is down try 
another one'.

I'll send an updated patch when I get time (traveling at the moment); I 
guess a sysctl is going to be needed if the behavior you mention with 
ISPs is reasonable.

^ permalink raw reply

* Re: [PATCH] net: fec: stop the "rcv is not +last, " error messages
From: Greg Ungerer @ 2016-03-30  3:24 UTC (permalink / raw)
  To: Troy Kisky; +Cc: netdev

Hi Troy,

Commit 55cd48c8 ('net: fec: stop the "rcv is not +last, " error
messages') adds a write to a register that is not present in all
implementations of the FEC hardware module. None of the ColdFire
SoC parts with the FEC module have the FTRL (0x1b0) register.

Does this need a quirk flag to key access to this register of?
Or can you piggyback on the FEC_QUIRK_HAS_RACC flag?

Regards
Greg

^ permalink raw reply

* Re: [PATCH net] bpf: make padding in bpf_tunnel_key explicit
From: David Miller @ 2016-03-30  4:10 UTC (permalink / raw)
  To: daniel; +Cc: alexei.starovoitov, netdev
In-Reply-To: <6589c70157238797e63986eeea67cfe2abfb3260.1459288316.git.daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 30 Mar 2016 00:02:00 +0200

> Make the 2 byte padding in struct bpf_tunnel_key between tunnel_ttl
> and tunnel_label members explicit. No issue has been observed, and
> gcc/llvm does padding for the old struct already, where tunnel_label
> was not yet present, so the current code works, but since it's part
> of uapi, make sure we don't introduce holes in structs.
> 
> Therefore, add tunnel_ext that we can use generically in future
> (f.e. to flag OAM messages for backends, etc). Also add the offset
> to the compat tests to be sure should some compilers not padd the
> tail of the old version of bpf_tunnel_key.
> 
> Fixes: 4018ab1875e0 ("bpf: support flow label for bpf_skb_{set, get}_tunnel_key")
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH] ethernet: mvneta: Support netpoll
From: Ezequiel Garcia @ 2016-03-30  4:59 UTC (permalink / raw)
  To: David Miller; +Cc: Thomas Petazzoni, netdev
In-Reply-To: <20160329.230930.1234182429605936870.davem@davemloft.net>

Hi David,

On 30 March 2016 at 00:09, David Miller <davem@davemloft.net> wrote:
> From: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
> Date: Mon, 28 Mar 2016 17:41:18 -0300
>
>> +/* Polled functionality used by netconsole and others in non interrupt mode */
>> +static void mvneta_poll_controller(struct net_device *dev)
>> +{
>> +     struct mvneta_port *pp = netdev_priv(dev);
>> +
>> +     on_each_cpu(mvneta_percpu_poll_controller, pp, false);
>> +}
>
> This doesn't work.
>
> netpoll may be invoked from any context whatsoever, even hardware
> interrupt handlers or contexts where cpu interrupts are disabled.
>
> smp_call_function() and thus on_each_cpu() may not be called with
> disabled interrupts or from a hardware interrupt handler or from
> a bottom half handler, all of which are valid situations where
> netpoll may occur since printk's can occur anywhere.

Well, I hated the idea of using on_each_cpu here, but wasn't sure
if there was a real reason that forbid it.

Thanks a lot for the feedback.
-- 
Ezequiel García, VanguardiaSur
www.vanguardiasur.com.ar

^ permalink raw reply

* [PATCH RFC] net: decrease the length of backlog queue immediately after it's detached from sk
From: Yang Yingliang @ 2016-03-30  5:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, eric.dumazet

When task A hold the sk owned in tcp_sendmsg, if lots of packets
arrive and the packets will be added to backlog queue. The packets
will be handled in release_sock called from tcp_sendmsg. When the
sk_backlog is removed from sk, the length will not decrease until
all the packets in backlog queue are handled. This may leads to the
new packets be dropped because the lenth is too big. So set the
lenth to 0 immediately after it's detached from sk.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/sock.c b/net/core/sock.c
index 47fc8bb..108be05 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1933,6 +1933,7 @@ static void __release_sock(struct sock *sk)

 	do {
 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
+		sk->sk_backlog.len = 0;
 		bh_unlock_sock(sk);

 		do {
-- 
2.5.0

^ permalink raw reply related

* Re: [PATCH 06/16] wcn36xx: Fetch private sta data from sta entry instead of from vif
From: Kalle Valo @ 2016-03-30  5:18 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: kbuild test robot, netdev, linux-wireless, linux-kernel,
	Pontus Fuchs, kbuild-all, wcn36xx, Eugene Krasnikov
In-Reply-To: <20160329213721.GV8929@tuxbot>

Bjorn Andersson <bjorn.andersson@linaro.org> writes:

>> All error/warnings (new ones prefixed by >>):
>> 
>>    drivers/net/wireless/ath/wcn36xx/main.c: In function 'wcn36xx_set_key':
>> >> drivers/net/wireless/ath/wcn36xx/main.c:389:9: error: implicit declaration of function 'wcn36xx_sta_to_priv' [-Werror=implicit-function-declaration]
>>      struct wcn36xx_sta *sta_priv = wcn36xx_sta_to_priv(sta);
>>             ^
>> >> drivers/net/wireless/ath/wcn36xx/main.c:389:33: warning: initialization makes pointer from integer without a cast
>>      struct wcn36xx_sta *sta_priv = wcn36xx_sta_to_priv(sta);
>>                                     ^
>>    cc1: some warnings being treated as errors
>
> This should have been reordered with patch 7, that introduces this
> helper function. Do you want me to resend, or can you apply the patches
> out of order?

It's better that you resend the whole patchset as v2.

-- 
Kalle Valo

^ permalink raw reply

* Re: [net-next 02/16] i40e/i40evf: Rewrite logic for 8 descriptor per packet check
From: Alexander Duyck @ 2016-03-30  5:19 UTC (permalink / raw)
  To: Jesse Brandeburg
  Cc: Jeff Kirsher, Dave Miller, Alexander Duyck, NetDEV list,
	Neil Horman, sassmann, John Greene, Jesse Brandeburg
In-Reply-To: <CAKgT0Uf8_Eb0G+k6N4vz69HefM3_oCkj7PB30z3keBCR4HAUnQ@mail.gmail.com>

On Tue, Mar 29, 2016 at 7:20 PM, Alexander Duyck
<alexander.duyck@gmail.com> wrote:
> On Tue, Mar 29, 2016 at 5:34 PM, Jesse Brandeburg
> <jesse.brandeburg@gmail.com> wrote:
>> stupid gmail... sent again to the list, sorry for duplicate (but
>> formatted better this time)
>>
>>
>> Hey Alex, this patch appears to have caused a regression (probably both
>> i40e/i40evf).  Easily reproducible running rds-stress (see the thread titled
>> "i40e card Tx resets", the middle post by sowmini has the repro steps,
>> ignore the pktgen discussion in the thread.)
>
> I'll see about setting up rds tomorrow.  It should be pretty straight forward.
>
>> I've spent some time trying to figure out the right fix, but keep getting
>> stuck in the complicated logic of the function, so I'm sending this in the
>> hope you'll take a look.
>>
>> This is (one example of) the skb that doesn't get linearized:
>> skb_print_bits:  > headlen=114 datalen=33824 data=238 mac=238 nh=252
>> h=272 gso=1448 frag=17
>
> So the code as I had written it up should be verifying we use no more
> than 8 descriptors as that was what the data sheet states.  You might
> want to request a spec update if the hardware only supports 7 data
> descriptors per segment for a TSO as that isn't what is in the
> documentation.
>
>> skb_print_bits: frag[0]: len: 256
>> skb_print_bits: frag[1]: len: 48
>> skb_print_bits: frag[2]: len: 256
>> skb_print_bits: frag[3]: len: 48
>> skb_print_bits: frag[4]: len: 256
>> skb_print_bits: frag[5]: len: 48
>> skb_print_bits: frag[6]: len: 4096
>>
>> This descriptor ^^^ is the 8th, I believe the hardware mechanism faults on
>> this input.
>
> Is this something that was previously being linearized?  It doesn't
> seem like it would be.  We are only 8 descriptors in and that 7th
> fragment should have flushed the count back to 1 with a remainder.
>
> If you are wanting to trigger a linearize with the updated code you
> would just need to update 2 lines.  Replace "I40E_MAX_BUFFER_TXD - 1"
> with just I40E_MAX_BUFFER_TXD" in the line where we subtract that
> value from nr_frags.  Then remove one of the "sum +=
> skb_frag_size(++frag);" lines.  That should update the code so that
> you only evaluate 5 buffers at a time instead of 6.  It should force
> things to coalesce if we would span 8 or more descriptors instead of 9
> or more which is what the code currently does based on what was
> required in the EAS.
>
>> I added a print of the sum at each point it is subtracted or added to after
>> initially being set, sum7/8 are in the for loop.
>>
>> skb_print_bits: frag[7]: len: 4096
>> skb_print_bits: frag[8]: len: 48
>> skb_print_bits: frag[9]: len: 4096
>> skb_print_bits: frag[10]: len: 4096
>> skb_print_bits: frag[11]: len: 48
>> skb_print_bits: frag[12]: len: 4096
>> skb_print_bits: frag[13]: len: 4096
>> skb_print_bits: frag[14]: len: 48
>> skb_print_bits: frag[15]: len: 4096
>> skb_print_bits: frag[16]: len: 4096
>> __i40e_chk_linearize: sum1: -1399
>> __i40e_chk_linearize: sum2: -1143
>> __i40e_chk_linearize: sum3: -1095
>> __i40e_chk_linearize: sum4: -839
>> __i40e_chk_linearize: sum5: -791
>> __i40e_chk_linearize: sum7: 3305
>> __i40e_chk_linearize: sum8: 3257
>> __i40e_chk_linearize: sum7: 7353
>> __i40e_chk_linearize: sum8: 7097
>> __i40e_chk_linearize: sum7: 7145
>> __i40e_chk_linearize: sum8: 7097
>> __i40e_chk_linearize: sum7: 11193
>> __i40e_chk_linearize: sum8: 10937
>> __i40e_chk_linearize: sum7: 15033
>> __i40e_chk_linearize: sum8: 14985
>> __i40e_chk_linearize: sum7: 15033
>>
>> This was the descriptors generated from the above:
>> d[054] = 0x0000000000000000 0x16a0211400000011
>> d[055] = 0x0000000bfbaa40ee 0x000001ca02871640
>
> len 72

Minor correction here.  This is 72 hex, which is 114 in decimal.  The
interesting bit I believe is the fact that we have both header and
payload data in the same descriptor.

You should probably check with your hardware team on this but I have a
working theory on the issue.  I'm thinking that because both header
and payload are coming out of the same descriptor this must count as 2
descriptors and not 1 when we compute the usage for the first
descriptor.  As such I do probably need to start testing at frag 0
because the first payload can actually come out of the header region
if the first descriptor includes both payload and data.

I'll try to submit a patch for it tonight.  If you can test it
tomorrow I would appreciate it.  Otherwise I will try setting up an
environment to try and reproduce the issue tomorrow.

- Alex

^ permalink raw reply

* Re: [PATCH RFC] net: decrease the length of backlog queue immediately after it's detached from sk
From: Eric Dumazet @ 2016-03-30  5:25 UTC (permalink / raw)
  To: Yang Yingliang; +Cc: netdev, davem
In-Reply-To: <1459315001-3448-1-git-send-email-yangyingliang@huawei.com>

On Wed, 2016-03-30 at 13:16 +0800, Yang Yingliang wrote:
> When task A hold the sk owned in tcp_sendmsg, if lots of packets
> arrive and the packets will be added to backlog queue. The packets
> will be handled in release_sock called from tcp_sendmsg. When the
> sk_backlog is removed from sk, the length will not decrease until
> all the packets in backlog queue are handled. This may leads to the
> new packets be dropped because the lenth is too big. So set the
> lenth to 0 immediately after it's detached from sk.
> 
> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
> ---
>  net/core/sock.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 47fc8bb..108be05 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1933,6 +1933,7 @@ static void __release_sock(struct sock *sk)
>  
>  	do {
>  		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
> +		sk->sk_backlog.len = 0;
>  		bh_unlock_sock(sk);
>  
>  		do {

Certainly not.

Have you really missed the comment ?

https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8eae939f1400326b06d0c9afe53d2a484a326871


I do not believe the case you describe can happen, unless a misbehaving
driver cooks fat skb (with skb->truesize being far more bigger than
skb->len)

^ permalink raw reply

* Re: [net-next 02/16] i40e/i40evf: Rewrite logic for 8 descriptor per packet check
From: Jeff Kirsher @ 2016-03-30  5:29 UTC (permalink / raw)
  To: Alexander Duyck, Jesse Brandeburg
  Cc: Dave Miller, Alexander Duyck, NetDEV list, Neil Horman, sassmann,
	John Greene, Jesse Brandeburg
In-Reply-To: <CAKgT0Udt1b8RYdh4Ex+34i+V4dRZWUuYWM=MpJK_DhuqGecjZQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1081 bytes --]

On Tue, 2016-03-29 at 22:19 -0700, Alexander Duyck wrote:
> Minor correction here.  This is 72 hex, which is 114 in decimal.  The
> interesting bit I believe is the fact that we have both header and
> payload data in the same descriptor.
> 
> You should probably check with your hardware team on this but I have
> a
> working theory on the issue.  I'm thinking that because both header
> and payload are coming out of the same descriptor this must count as
> 2
> descriptors and not 1 when we compute the usage for the first
> descriptor.  As such I do probably need to start testing at frag 0
> because the first payload can actually come out of the header region
> if the first descriptor includes both payload and data.
> 
> I'll try to submit a patch for it tonight.  If you can test it
> tomorrow I would appreciate it.  Otherwise I will try setting up an
> environment to try and reproduce the issue tomorrow.

If you get it submitted tonight, I can have it ready in my tree for
testing for Jesse and the team when they get into the office tomorrow.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: am335x: no multicast reception over VLAN
From: Mugunthan V N @ 2016-03-30  5:33 UTC (permalink / raw)
  To: Grygorii Strashko, Yegor Yefremov
  Cc: netdev, linux-omap@vger.kernel.org, drivshin, ml
In-Reply-To: <56FA78AA.4030402@ti.com>

On Tuesday 29 March 2016 06:14 PM, Grygorii Strashko wrote:
> On 03/29/2016 03:35 PM, Yegor Yefremov wrote:
>> On Tue, Mar 29, 2016 at 1:05 PM, Grygorii Strashko
>> <grygorii.strashko@ti.com> wrote:
>>> On 03/29/2016 08:21 AM, Yegor Yefremov wrote:
>>>> Hi Mugunthan,
>>>>
>>>> On Tue, Mar 29, 2016 at 6:00 AM, Mugunthan V N <mugunthanvnm@ti.com> wrote:
>>>>> Hi Yegor
>>>>>
>>>>> On Wednesday 16 March 2016 08:35 PM, Yegor Yefremov wrote:
>>>>>> I have an am335x based board using CPSW in Dual EMAC mode. Without
>>>>>> VLAN IDs I can receive and send multicast packets [1]. When I create
>>>>>> VLAN ID:
>>>>>>
>>>>>> ip link add link eth1 name eth1.100 type vlan id 100
>>>>>> ip addr add 192.168.100.2/24 brd 192.168.100.255 dev eth1.100
>>>>>> route add -net 224.0.0.0 netmask 224.0.0.0 eth1.100
>>>>>>
>>>>>> I can successfully send multicast packets, but not receive them. On
>>>>>> the other side of the Ethernet cable I've used Pandaboard. Pandaboard
>>>>>> could both receive and send multicast packets via VLAN.
>>>>>
>>>>> Are you trying multicast tx/rx on eth1 or eth1.100?
>>>>
>>>> I'm trying multicast tx/rx on eth1.100.
>>>>
>>>> eth1 has no problems.
>>>>
>>>
>>> it'd be nice if will be able to post here output fom commands:
>>> # switch-config -d [git://git.ti.com/switch-config/switch-config.git v4.1]
>>> # ifconfig -a
>>> # tcpdump -e -f -Q in -i eth0
>>> # tcpdump -e -f -Q in -i eth0.100
>>
>> Which kernel/branch do you want me to test?
>>
>> git://git.ti.com/ti-linux-kernel/ti-linux-kernel.git and ti-rt-linux-4.1.y?
>>
>> So far I was using vanilla kernel.
> 
> Your branch (but better 4.5 kernels (or 4.4)). 
> Just when you've done with configuration run cmds 1&2,
> and when you run your use-case - run cmds 2&3 on receiver side (grap ~5-10 packets).
> then stop test and run cmd 1 again.
> 
> After all could you provide your console output here, pls.
> 
> 

To use command 1, you need TI kernel [1] as it won't build with vanilla
kernel.

[1]: git://git.ti.com/ti-linux-kernel/ti-linux-kernel.git ti-linux-4.1.y

Regards
Mugunthan V N

^ permalink raw reply

* Re: [PATCH RFC] net: decrease the length of backlog queue immediately after it's detached from sk
From: Eric Dumazet @ 2016-03-30  5:34 UTC (permalink / raw)
  To: Yang Yingliang; +Cc: netdev, davem
In-Reply-To: <1459315520.6473.187.camel@edumazet-glaptop3.roam.corp.google.com>

On Tue, 2016-03-29 at 22:25 -0700, Eric Dumazet wrote:
> On Wed, 2016-03-30 at 13:16 +0800, Yang Yingliang wrote:
> > When task A hold the sk owned in tcp_sendmsg, if lots of packets
> > arrive and the packets will be added to backlog queue. The packets
> > will be handled in release_sock called from tcp_sendmsg. When the
> > sk_backlog is removed from sk, the length will not decrease until
> > all the packets in backlog queue are handled. This may leads to the
> > new packets be dropped because the lenth is too big. So set the
> > lenth to 0 immediately after it's detached from sk.
> > 
> > Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
> > ---
> >  net/core/sock.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index 47fc8bb..108be05 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -1933,6 +1933,7 @@ static void __release_sock(struct sock *sk)
> >  
> >  	do {
> >  		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
> > +		sk->sk_backlog.len = 0;
> >  		bh_unlock_sock(sk);
> >  
> >  		do {
> 
> Certainly not.
> 
> Have you really missed the comment ?
> 
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8eae939f1400326b06d0c9afe53d2a484a326871
> 
> 
> I do not believe the case you describe can happen, unless a misbehaving
> driver cooks fat skb (with skb->truesize being far more bigger than
> skb->len)
> 

And also make sure you backported
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=da882c1f2ecadb0ed582628ec1585e36b137c0f0

^ permalink raw reply

* Re: [PATCH RFC] net: decrease the length of backlog queue immediately after it's detached from sk
From: Yang Yingliang @ 2016-03-30  5:38 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem
In-Reply-To: <1459315520.6473.187.camel@edumazet-glaptop3.roam.corp.google.com>



On 2016/3/30 13:25, Eric Dumazet wrote:
> On Wed, 2016-03-30 at 13:16 +0800, Yang Yingliang wrote:
>> When task A hold the sk owned in tcp_sendmsg, if lots of packets
>> arrive and the packets will be added to backlog queue. The packets
>> will be handled in release_sock called from tcp_sendmsg. When the
>> sk_backlog is removed from sk, the length will not decrease until
>> all the packets in backlog queue are handled. This may leads to the
>> new packets be dropped because the lenth is too big. So set the
>> lenth to 0 immediately after it's detached from sk.
>>
>> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
>> ---
>>   net/core/sock.c | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/net/core/sock.c b/net/core/sock.c
>> index 47fc8bb..108be05 100644
>> --- a/net/core/sock.c
>> +++ b/net/core/sock.c
>> @@ -1933,6 +1933,7 @@ static void __release_sock(struct sock *sk)
>>
>>   	do {
>>   		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
>> +		sk->sk_backlog.len = 0;
>>   		bh_unlock_sock(sk);
>>
>>   		do {
>
> Certainly not.
>
> Have you really missed the comment ?
>
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8eae939f1400326b06d0c9afe53d2a484a326871

My kernel is 4.1 LTS, it seems don't have this patch. I will try this 
patch later.

Thanks
Yang
>
>
> I do not believe the case you describe can happen, unless a misbehaving
> driver cooks fat skb (with skb->truesize being far more bigger than
> skb->len)
>
>
>
>
>
>
>

^ permalink raw reply

* Re: [PATCH RFC] net: decrease the length of backlog queue immediately after it's detached from sk
From: Yang Yingliang @ 2016-03-30  5:56 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem
In-Reply-To: <1459316043.6473.188.camel@edumazet-glaptop3.roam.corp.google.com>



On 2016/3/30 13:34, Eric Dumazet wrote:
> On Tue, 2016-03-29 at 22:25 -0700, Eric Dumazet wrote:
>> On Wed, 2016-03-30 at 13:16 +0800, Yang Yingliang wrote:
>>> When task A hold the sk owned in tcp_sendmsg, if lots of packets
>>> arrive and the packets will be added to backlog queue. The packets
>>> will be handled in release_sock called from tcp_sendmsg. When the
>>> sk_backlog is removed from sk, the length will not decrease until
>>> all the packets in backlog queue are handled. This may leads to the
>>> new packets be dropped because the lenth is too big. So set the
>>> lenth to 0 immediately after it's detached from sk.
>>>
>>> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
>>> ---
>>>   net/core/sock.c | 1 +
>>>   1 file changed, 1 insertion(+)
>>>
>>> diff --git a/net/core/sock.c b/net/core/sock.c
>>> index 47fc8bb..108be05 100644
>>> --- a/net/core/sock.c
>>> +++ b/net/core/sock.c
>>> @@ -1933,6 +1933,7 @@ static void __release_sock(struct sock *sk)
>>>
>>>   	do {
>>>   		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
>>> +		sk->sk_backlog.len = 0;
>>>   		bh_unlock_sock(sk);
>>>
>>>   		do {
>>
>> Certainly not.
>>
>> Have you really missed the comment ?
>>
>> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8eae939f1400326b06d0c9afe53d2a484a326871
>>
>>
>> I do not believe the case you describe can happen, unless a misbehaving
>> driver cooks fat skb (with skb->truesize being far more bigger than
>> skb->len)
>>
>
> And also make sure you backported
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=da882c1f2ecadb0ed582628ec1585e36b137c0f0

Sorry, I made a mistake. I am very sure my kernel has these two patches.
And I can get some dropping of the packets in 10Gb eth.

# netstat -s | grep -i backlog
     TCPBacklogDrop: 4135
# netstat -s | grep -i backlog
     TCPBacklogDrop: 4167


>
>
>
>
>
>

^ permalink raw reply

* [net PATCH] i40e/i40evf: Limit TSO to 7 descriptors for payload instead of 8 per packet
From: Alexander Duyck @ 2016-03-30  6:44 UTC (permalink / raw)
  To: netdev, intel-wired-lan, jesse.brandeburg, alexander.duyck,
	jeffrey.t.kirsher

This patch addresses a bug introduced based on my interpretation of the
XL710 datasheet.  Specifically section 8.4.1 states that "A single transmit
packet may span up to 8 buffers (up to 8 data descriptors per packet
including both the header and payload buffers)."  It then later goes on to
say that each segment for a TSO obeys the previous rule, however it then
refers to TSO header and the segment payload buffers.

I believe the actual limit for fragments with TSO and a skbuff that has
payload data in the header portion of the buffer is actually only 7
fragments as the skb->data portion counts as 2 buffers, one for the TSO
header, and one for a segment payload buffer.

Fixes: 2d37490b82af ("i40e/i40evf: Rewrite logic for 8 descriptor per packet check")
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
---

This patch has been sanity checked only.  I cannot yet guarantee it
resolves the original issue that was reported.  I'll try to get a
reproduction environment setup tomorrow but I don't know how long that
should take.

 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |   40 ++++++++++++++-----------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c |   40 ++++++++++++++-----------
 2 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5d5fa5359a1d..97437f04d99d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2597,12 +2597,17 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
 }
 
 /**
- * __i40e_chk_linearize - Check if there are more than 8 fragments per packet
+ * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
  * @skb:      send buffer
  *
- * Note: Our HW can't scatter-gather more than 8 fragments to build
- * a packet on the wire and so we need to figure out the cases where we
- * need to linearize the skb.
+ * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
  **/
 bool __i40e_chk_linearize(struct sk_buff *skb)
 {
@@ -2614,18 +2619,17 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
 	if (unlikely(!gso_size))
 		return true;
 
-	/* no need to check if number of frags is less than 8 */
+	/* no need to check if number of frags is less than 7 */
 	nr_frags = skb_shinfo(skb)->nr_frags;
-	if (nr_frags < I40E_MAX_BUFFER_TXD)
+	if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
 		return false;
 
 	/* We need to walk through the list and validate that each group
 	 * of 6 fragments totals at least gso_size.  However we don't need
-	 * to perform such validation on the first or last 6 since the first
-	 * 6 cannot inherit any data from a descriptor before them, and the
-	 * last 6 cannot inherit any data from a descriptor after them.
+	 * to perform such validation on the last 6 since the last 6 cannot
+	 * inherit any data from a descriptor after them.
 	 */
-	nr_frags -= I40E_MAX_BUFFER_TXD - 1;
+	nr_frags -= I40E_MAX_BUFFER_TXD - 2;
 	frag = &skb_shinfo(skb)->frags[0];
 
 	/* Initialize size to the negative value of gso_size minus 1.  We
@@ -2636,19 +2640,19 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
 	 */
 	sum = 1 - gso_size;
 
-	/* Add size of frags 1 through 5 to create our initial sum */
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
 
 	/* Walk through fragments adding latest fragment, testing it, and
 	 * then removing stale fragments from the sum.
 	 */
 	stale = &skb_shinfo(skb)->frags[0];
 	for (;;) {
-		sum += skb_frag_size(++frag);
+		sum += skb_frag_size(frag++);
 
 		/* if sum is negative we failed to make sufficient progress */
 		if (sum < 0)
@@ -2658,7 +2662,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
 		if (!--nr_frags)
 			break;
 
-		sum -= skb_frag_size(++stale);
+		sum -= skb_frag_size(stale++);
 	}
 
 	return false;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 04aabc52ba0d..240e4a1b2507 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1799,12 +1799,17 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
 }
 
 /**
- * __i40evf_chk_linearize - Check if there are more than 8 fragments per packet
+ * __i40evf_chk_linearize - Check if there are more than 8 buffers per packet
  * @skb:      send buffer
  *
- * Note: Our HW can't scatter-gather more than 8 fragments to build
- * a packet on the wire and so we need to figure out the cases where we
- * need to linearize the skb.
+ * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
  **/
 bool __i40evf_chk_linearize(struct sk_buff *skb)
 {
@@ -1816,18 +1821,17 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
 	if (unlikely(!gso_size))
 		return true;
 
-	/* no need to check if number of frags is less than 8 */
+	/* no need to check if number of frags is less than 7 */
 	nr_frags = skb_shinfo(skb)->nr_frags;
-	if (nr_frags < I40E_MAX_BUFFER_TXD)
+	if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
 		return false;
 
 	/* We need to walk through the list and validate that each group
 	 * of 6 fragments totals at least gso_size.  However we don't need
-	 * to perform such validation on the first or last 6 since the first
-	 * 6 cannot inherit any data from a descriptor before them, and the
-	 * last 6 cannot inherit any data from a descriptor after them.
+	 * to perform such validation on the last 6 since the last 6 cannot
+	 * inherit any data from a descriptor after them.
 	 */
-	nr_frags -= I40E_MAX_BUFFER_TXD - 1;
+	nr_frags -= I40E_MAX_BUFFER_TXD - 2;
 	frag = &skb_shinfo(skb)->frags[0];
 
 	/* Initialize size to the negative value of gso_size minus 1.  We
@@ -1838,19 +1842,19 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
 	 */
 	sum = 1 - gso_size;
 
-	/* Add size of frags 1 through 5 to create our initial sum */
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
-	sum += skb_frag_size(++frag);
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
 
 	/* Walk through fragments adding latest fragment, testing it, and
 	 * then removing stale fragments from the sum.
 	 */
 	stale = &skb_shinfo(skb)->frags[0];
 	for (;;) {
-		sum += skb_frag_size(++frag);
+		sum += skb_frag_size(frag++);
 
 		/* if sum is negative we failed to make sufficient progress */
 		if (sum < 0)
@@ -1860,7 +1864,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
 		if (!--nr_frags)
 			break;
 
-		sum -= skb_frag_size(++stale);
+		sum -= skb_frag_size(stale++);
 	}
 
 	return false;

^ permalink raw reply related

* RE: [PATCH iproute2 v1 1/1] lib/utils: fix get_addr() and get_prefix() error messages
From: Varlese, Marco @ 2016-03-30  7:19 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: netdev@vger.kernel.org, davem@davemloft.net, Jiri Pirko,
	John Fastabend, jhs@mojatatu.com, Szczerbik, PrzemyslawX
In-Reply-To: <20160327103506.28f3d68e@xeon-e3>

[-- Attachment #1: Type: text/plain, Size: 2806 bytes --]


> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> On Behalf Of Stephen Hemminger
> Sent: Sunday, March 27, 2016 6:35 PM
> To: Varlese, Marco <marco.varlese@intel.com>
> Cc: netdev@vger.kernel.org; davem@davemloft.net; Jiri Pirko
<jiri@resnulli.us>;
> John Fastabend <john.fastabend@gmail.com>; jhs@mojatatu.com; Szczerbik,
> PrzemyslawX <przemyslawx.szczerbik@intel.com>
> Subject: Re: [PATCH iproute2 v1 1/1] lib/utils: fix get_addr() and
get_prefix()
> error messages
> 
> On Tue, 22 Mar 2016 13:02:02 +0000
> "Varlese, Marco" <marco.varlese@intel.com> wrote:
> 
> > An attempt to add invalid address to interface would print "???" string
> > instead of the address family name.
> >
> > For example:
> > $ ip address add 256.10.166.1/24 dev ens8
> > Error: ??? prefix is expected rather than "256.10.166.1/24".
> >
> > $ ip neighbor add proxy 2001:db8::g dev ens8
> > Error: ??? address is expected rather than "2001:db8::g".
> >
> > With this patch the output will look like:
> > $ ip address add 256.10.166.1/24 dev ens8
> > Error: inet prefix is expected rather than "256.10.166.1/24".
> >
> > $ ip neighbor add proxy 2001:db8::g dev ens8
> > Error: inet6 address is expected rather than "2001:db8::g".
> >
> > Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
> > Signed-off-by: Marco Varlese <marco.varlese@intel.com>
> > ---
> > lib/utils.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/utils.c b/lib/utils.c
> > index fa35f4d..4820de1 100644
> > --- a/lib/utils.c
> > +++ b/lib/utils.c
> > @@ -567,7 +567,7 @@ int get_addr(inet_prefix *dst, const char *arg, int
> family)
> > {
> >         if (get_addr_1(dst, arg, family)) {
> >                 fprintf(stderr, "Error: %s address is expected rather than
\"%s\".\n",
> > -                               family_name(family) ,arg);
> > +                               family_name(dst->family), arg);
> >                 exit(1);
> >         }
> >         return 0;
> > @@ -581,7 +581,7 @@ int get_prefix(inet_prefix *dst, char *arg, int family)
> >         }
> >         if (get_prefix_1(dst, arg, family)) {
> >                 fprintf(stderr, "Error: %s prefix is expected rather than
\"%s\".\n",
> > -                               family_name(family) ,arg);
> > +                               family_name(dst->family), arg);
> >                 exit(1);
> >         }
> >         return 0;
> 
> Your patch was corrupted by your email client?
Yes, you're right; the patch got corrupted by the email client.
Sorry about that.
Looking into this now.

> 
> $ patch -p1
<~/Downloads/iproute2-v1-1-1-lib-utils-fix-get_addr-and-get_prefix-
> error-messages.patch
> patching file lib/utils.c
> patch: **** malformed patch at line 6: {

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 4414 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox