Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v13 3/3] net: hisilicon: new hip04 ethernet driver
From: Ding Tianhong @ 2015-01-14  6:34 UTC (permalink / raw)
  To: arnd, robh+dt, davem, grant.likely, agraf
  Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
	zhangfei.gao, netdev, devicetree, linux
In-Reply-To: <1421217254-12008-1-git-send-email-dingtianhong@huawei.com>

Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller.
The controller has no tx done interrupt, reclaim xmitted buffer in the poll.

v13: Fix the problem of alignment parameters for function and checkpatch warming.

v12: According Alex's suggestion, modify the changelog and add MODULE_DEVICE_TABLE
     for hip04 ethernet.

v11: Add ethtool support for tx coalecse getting and setting, the xmit_more
     is not supported for this patch, but I think it could work for hip04,
     will support it later after some tests for performance better.

     Here are some performance test results by ping and iperf(add tx_coalesce_frames/users),
     it looks that the performance and latency is more better by tx_coalesce_frames/usecs.

     - Before:
     $ ping 192.168.1.1 ...
     === 192.168.1.1 ping statistics ===
     24 packets transmitted, 24 received, 0% packet loss, time 22999ms
     rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms

     $ iperf -c 192.168.1.1 ...
     [ ID] Interval       Transfer     Bandwidth
     [  3]  0.0- 1.0 sec   115 MBytes   945 Mbits/sec

     - After:
     $ ping 192.168.1.1 ...
     === 192.168.1.1 ping statistics ===
     24 packets transmitted, 24 received, 0% packet loss, time 22999ms
     rtt min/avg/max/mdev = 0.178/0.190/0.380/0.041 ms

     $ iperf -c 192.168.1.1 ...
     [ ID] Interval       Transfer     Bandwidth
     [  3]  0.0- 1.0 sec   115 MBytes   965 Mbits/sec

v10: According David Miller and Arnd Bergmann's suggestion, add some modification
     for v9 version
     - drop the workqueue
     - batch cleanup based on tx_coalesce_frames/usecs for better throughput
     - use a reasonable default tx timeout (200us, could be shorted
       based on measurements) with a range timer
     - fix napi poll function return value
     - use a lockless queue for cleanup

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/ethernet/hisilicon/Makefile    |   2 +-
 drivers/net/ethernet/hisilicon/hip04_eth.c | 969 +++++++++++++++++++++++++++++
 2 files changed, 970 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c

diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 40115a7..6c14540 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -3,4 +3,4 @@
 #
 
 obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
-obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o
+obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o hip04_eth.o
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
new file mode 100644
index 0000000..525214e
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -0,0 +1,969 @@
+
+/* Copyright (c) 2014 Linaro Ltd.
+ * Copyright (c) 2014 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/ktime.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+
+#define PPE_CFG_RX_ADDR			0x100
+#define PPE_CFG_POOL_GRP		0x300
+#define PPE_CFG_RX_BUF_SIZE		0x400
+#define PPE_CFG_RX_FIFO_SIZE		0x500
+#define PPE_CURR_BUF_CNT		0xa200
+
+#define GE_DUPLEX_TYPE			0x08
+#define GE_MAX_FRM_SIZE_REG		0x3c
+#define GE_PORT_MODE			0x40
+#define GE_PORT_EN			0x44
+#define GE_SHORT_RUNTS_THR_REG		0x50
+#define GE_TX_LOCAL_PAGE_REG		0x5c
+#define GE_TRANSMIT_CONTROL_REG		0x60
+#define GE_CF_CRC_STRIP_REG		0x1b0
+#define GE_MODE_CHANGE_REG		0x1b4
+#define GE_RECV_CONTROL_REG		0x1e0
+#define GE_STATION_MAC_ADDRESS		0x210
+#define PPE_CFG_CPU_ADD_ADDR		0x580
+#define PPE_CFG_MAX_FRAME_LEN_REG	0x408
+#define PPE_CFG_BUS_CTRL_REG		0x424
+#define PPE_CFG_RX_CTRL_REG		0x428
+#define PPE_CFG_RX_PKT_MODE_REG		0x438
+#define PPE_CFG_QOS_VMID_GEN		0x500
+#define PPE_CFG_RX_PKT_INT		0x538
+#define PPE_INTEN			0x600
+#define PPE_INTSTS			0x608
+#define PPE_RINT			0x604
+#define PPE_CFG_STS_MODE		0x700
+#define PPE_HIS_RX_PKT_CNT		0x804
+
+/* REG_INTERRUPT */
+#define RCV_INT				BIT(10)
+#define RCV_NOBUF			BIT(8)
+#define RCV_DROP			BIT(7)
+#define TX_DROP				BIT(6)
+#define DEF_INT_ERR			(RCV_NOBUF | RCV_DROP | TX_DROP)
+#define DEF_INT_MASK			(RCV_INT | DEF_INT_ERR)
+
+/* TX descriptor config */
+#define TX_FREE_MEM			BIT(0)
+#define TX_READ_ALLOC_L3		BIT(1)
+#define TX_FINISH_CACHE_INV		BIT(2)
+#define TX_CLEAR_WB			BIT(4)
+#define TX_L3_CHECKSUM			BIT(5)
+#define TX_LOOP_BACK			BIT(11)
+
+/* RX error */
+#define RX_PKT_DROP			BIT(0)
+#define RX_L2_ERR			BIT(1)
+#define RX_PKT_ERR			(RX_PKT_DROP | RX_L2_ERR)
+
+#define SGMII_SPEED_1000		0x08
+#define SGMII_SPEED_100			0x07
+#define SGMII_SPEED_10			0x06
+#define MII_SPEED_100			0x01
+#define MII_SPEED_10			0x00
+
+#define GE_DUPLEX_FULL			BIT(0)
+#define GE_DUPLEX_HALF			0x00
+#define GE_MODE_CHANGE_EN		BIT(0)
+
+#define GE_TX_AUTO_NEG			BIT(5)
+#define GE_TX_ADD_CRC			BIT(6)
+#define GE_TX_SHORT_PAD_THROUGH		BIT(7)
+
+#define GE_RX_STRIP_CRC			BIT(0)
+#define GE_RX_STRIP_PAD			BIT(3)
+#define GE_RX_PAD_EN			BIT(4)
+
+#define GE_AUTO_NEG_CTL			BIT(0)
+
+#define GE_RX_INT_THRESHOLD		BIT(6)
+#define GE_RX_TIMEOUT			0x04
+
+#define GE_RX_PORT_EN			BIT(1)
+#define GE_TX_PORT_EN			BIT(2)
+
+#define PPE_CFG_STS_RX_PKT_CNT_RC	BIT(12)
+
+#define PPE_CFG_RX_PKT_ALIGN		BIT(18)
+#define PPE_CFG_QOS_VMID_MODE		BIT(14)
+#define PPE_CFG_QOS_VMID_GRP_SHIFT	8
+
+#define PPE_CFG_RX_FIFO_FSFU		BIT(11)
+#define PPE_CFG_RX_DEPTH_SHIFT		16
+#define PPE_CFG_RX_START_SHIFT		0
+#define PPE_CFG_RX_CTRL_ALIGN_SHIFT	11
+
+#define PPE_CFG_BUS_LOCAL_REL		BIT(14)
+#define PPE_CFG_BUS_BIG_ENDIEN		BIT(0)
+
+#define RX_DESC_NUM			128
+#define TX_DESC_NUM			256
+#define TX_NEXT(N)			(((N) + 1) & (TX_DESC_NUM-1))
+#define RX_NEXT(N)			(((N) + 1) & (RX_DESC_NUM-1))
+
+#define GMAC_PPE_RX_PKT_MAX_LEN		379
+#define GMAC_MAX_PKT_LEN		1516
+#define GMAC_MIN_PKT_LEN		31
+#define RX_BUF_SIZE			1600
+#define RESET_TIMEOUT			1000
+#define TX_TIMEOUT			(6 * HZ)
+
+#define DRV_NAME			"hip04-ether"
+#define DRV_VERSION			"v1.0"
+
+#define HIP04_MAX_TX_COALESCE_USECS	200
+#define HIP04_MIN_TX_COALESCE_USECS	100
+#define HIP04_MAX_TX_COALESCE_FRAMES	200
+#define HIP04_MIN_TX_COALESCE_FRAMES	100
+
+struct tx_desc {
+	u32 send_addr;
+	u32 send_size;
+	u32 next_addr;
+	u32 cfg;
+	u32 wb_addr;
+} __aligned(64);
+
+struct rx_desc {
+	u16 reserved_16;
+	u16 pkt_len;
+	u32 reserve1[3];
+	u32 pkt_err;
+	u32 reserve2[4];
+};
+
+struct hip04_priv {
+	void __iomem *base;
+	int phy_mode;
+	int chan;
+	unsigned int port;
+	unsigned int speed;
+	unsigned int duplex;
+	unsigned int reg_inten;
+
+	struct napi_struct napi;
+	struct net_device *ndev;
+
+	struct tx_desc *tx_desc;
+	dma_addr_t tx_desc_dma;
+	struct sk_buff *tx_skb[TX_DESC_NUM];
+	dma_addr_t tx_phys[TX_DESC_NUM];
+	unsigned int tx_head;
+
+	int tx_coalesce_frames;
+	int tx_coalesce_usecs;
+	struct hrtimer tx_coalesce_timer;
+
+	unsigned char *rx_buf[RX_DESC_NUM];
+	dma_addr_t rx_phys[RX_DESC_NUM];
+	unsigned int rx_head;
+	unsigned int rx_buf_size;
+
+	struct device_node *phy_node;
+	struct phy_device *phy;
+	struct regmap *map;
+	struct work_struct tx_timeout_task;
+
+	/* written only by tx cleanup */
+	unsigned int tx_tail ____cacheline_aligned_in_smp;
+};
+
+static inline unsigned int tx_count(unsigned int head, unsigned int tail)
+{
+	return (head - tail) % (TX_DESC_NUM - 1);
+}
+
+static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	priv->speed = speed;
+	priv->duplex = duplex;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_SGMII:
+		if (speed == SPEED_1000)
+			val = SGMII_SPEED_1000;
+		else if (speed == SPEED_100)
+			val = SGMII_SPEED_100;
+		else
+			val = SGMII_SPEED_10;
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		if (speed == SPEED_100)
+			val = MII_SPEED_100;
+		else
+			val = MII_SPEED_10;
+		break;
+	default:
+		netdev_warn(ndev, "not supported mode\n");
+		val = MII_SPEED_10;
+		break;
+	}
+	writel_relaxed(val, priv->base + GE_PORT_MODE);
+
+	val = duplex ? GE_DUPLEX_FULL : GE_DUPLEX_HALF;
+	writel_relaxed(val, priv->base + GE_DUPLEX_TYPE);
+
+	val = GE_MODE_CHANGE_EN;
+	writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG);
+}
+
+static void hip04_reset_ppe(struct hip04_priv *priv)
+{
+	u32 val, tmp, timeout = 0;
+
+	do {
+		regmap_read(priv->map, priv->port * 4 + PPE_CURR_BUF_CNT, &val);
+		regmap_read(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, &tmp);
+		if (timeout++ > RESET_TIMEOUT)
+			break;
+	} while (val & 0xfff);
+}
+
+static void hip04_config_fifo(struct hip04_priv *priv)
+{
+	u32 val;
+
+	val = readl_relaxed(priv->base + PPE_CFG_STS_MODE);
+	val |= PPE_CFG_STS_RX_PKT_CNT_RC;
+	writel_relaxed(val, priv->base + PPE_CFG_STS_MODE);
+
+	val = BIT(priv->port);
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val);
+
+	val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT;
+	val |= PPE_CFG_QOS_VMID_MODE;
+	writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN);
+
+	val = RX_BUF_SIZE;
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val);
+
+	val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT;
+	val |= PPE_CFG_RX_FIFO_FSFU;
+	val |= priv->chan << PPE_CFG_RX_START_SHIFT;
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_FIFO_SIZE, val);
+
+	val = NET_IP_ALIGN << PPE_CFG_RX_CTRL_ALIGN_SHIFT;
+	writel_relaxed(val, priv->base + PPE_CFG_RX_CTRL_REG);
+
+	val = PPE_CFG_RX_PKT_ALIGN;
+	writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_MODE_REG);
+
+	val = PPE_CFG_BUS_LOCAL_REL | PPE_CFG_BUS_BIG_ENDIEN;
+	writel_relaxed(val, priv->base + PPE_CFG_BUS_CTRL_REG);
+
+	val = GMAC_PPE_RX_PKT_MAX_LEN;
+	writel_relaxed(val, priv->base + PPE_CFG_MAX_FRAME_LEN_REG);
+
+	val = GMAC_MAX_PKT_LEN;
+	writel_relaxed(val, priv->base + GE_MAX_FRM_SIZE_REG);
+
+	val = GMAC_MIN_PKT_LEN;
+	writel_relaxed(val, priv->base + GE_SHORT_RUNTS_THR_REG);
+
+	val = readl_relaxed(priv->base + GE_TRANSMIT_CONTROL_REG);
+	val |= GE_TX_AUTO_NEG | GE_TX_ADD_CRC | GE_TX_SHORT_PAD_THROUGH;
+	writel_relaxed(val, priv->base + GE_TRANSMIT_CONTROL_REG);
+
+	val = GE_RX_STRIP_CRC;
+	writel_relaxed(val, priv->base + GE_CF_CRC_STRIP_REG);
+
+	val = readl_relaxed(priv->base + GE_RECV_CONTROL_REG);
+	val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN;
+	writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG);
+
+	val = GE_AUTO_NEG_CTL;
+	writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG);
+}
+
+static void hip04_mac_enable(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	/* enable tx & rx */
+	val = readl_relaxed(priv->base + GE_PORT_EN);
+	val |= GE_RX_PORT_EN | GE_TX_PORT_EN;
+	writel_relaxed(val, priv->base + GE_PORT_EN);
+
+	/* clear rx int */
+	val = RCV_INT;
+	writel_relaxed(val, priv->base + PPE_RINT);
+
+	/* config recv int */
+	val = GE_RX_INT_THRESHOLD | GE_RX_TIMEOUT;
+	writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_INT);
+
+	/* enable interrupt */
+	priv->reg_inten = DEF_INT_MASK;
+	writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+}
+
+static void hip04_mac_disable(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	u32 val;
+
+	/* disable int */
+	priv->reg_inten &= ~(DEF_INT_MASK);
+	writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+
+	/* disable tx & rx */
+	val = readl_relaxed(priv->base + GE_PORT_EN);
+	val &= ~(GE_RX_PORT_EN | GE_TX_PORT_EN);
+	writel_relaxed(val, priv->base + GE_PORT_EN);
+}
+
+static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+	writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR);
+}
+
+static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys)
+{
+	regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys);
+}
+
+static u32 hip04_recv_cnt(struct hip04_priv *priv)
+{
+	return readl(priv->base + PPE_HIS_RX_PKT_CNT);
+}
+
+static void hip04_update_mac_address(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+
+	writel_relaxed(((ndev->dev_addr[0] << 8) | (ndev->dev_addr[1])),
+		       priv->base + GE_STATION_MAC_ADDRESS);
+	writel_relaxed(((ndev->dev_addr[2] << 24) | (ndev->dev_addr[3] << 16) |
+			(ndev->dev_addr[4] << 8) | (ndev->dev_addr[5])),
+		       priv->base + GE_STATION_MAC_ADDRESS + 4);
+}
+
+static int hip04_set_mac_address(struct net_device *ndev, void *addr)
+{
+	eth_mac_addr(ndev, addr);
+	hip04_update_mac_address(ndev);
+	return 0;
+}
+
+static int hip04_tx_reclaim(struct net_device *ndev, bool force)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	unsigned tx_tail = priv->tx_tail;
+	struct tx_desc *desc;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+	unsigned int count;
+
+	smp_rmb();
+	count = tx_count(ACCESS_ONCE(priv->tx_head), tx_tail);
+	if (count == 0)
+		goto out;
+
+	while (count) {
+		desc = &priv->tx_desc[tx_tail];
+		if (desc->send_addr != 0) {
+			if (force)
+				desc->send_addr = 0;
+			else
+				break;
+		}
+
+		if (priv->tx_phys[tx_tail]) {
+			dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail],
+					 priv->tx_skb[tx_tail]->len,
+					 DMA_TO_DEVICE);
+			priv->tx_phys[tx_tail] = 0;
+		}
+		pkts_compl++;
+		bytes_compl += priv->tx_skb[tx_tail]->len;
+		dev_kfree_skb(priv->tx_skb[tx_tail]);
+		priv->tx_skb[tx_tail] = NULL;
+		tx_tail = TX_NEXT(tx_tail);
+		count--;
+	}
+
+	priv->tx_tail = tx_tail;
+	smp_wmb(); /* Ensure tx_tail visible to xmit */
+
+out:
+	if (pkts_compl || bytes_compl)
+		netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+
+	if (unlikely(netif_queue_stopped(ndev)) && (count < (TX_DESC_NUM - 1)))
+		netif_wake_queue(ndev);
+
+	return count;
+}
+
+static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int tx_head = priv->tx_head, count;
+	struct tx_desc *desc = &priv->tx_desc[tx_head];
+	dma_addr_t phys;
+
+	smp_rmb();
+	count = tx_count(tx_head, ACCESS_ONCE(priv->tx_tail));
+	if (count == (TX_DESC_NUM - 1)) {
+		netif_stop_queue(ndev);
+		return NETDEV_TX_BUSY;
+	}
+
+	phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(&ndev->dev, phys)) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	priv->tx_skb[tx_head] = skb;
+	priv->tx_phys[tx_head] = phys;
+	desc->send_addr = cpu_to_be32(phys);
+	desc->send_size = cpu_to_be32(skb->len);
+	desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV);
+	phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc);
+	desc->wb_addr = cpu_to_be32(phys);
+	skb_tx_timestamp(skb);
+
+	hip04_set_xmit_desc(priv, phys);
+	priv->tx_head = TX_NEXT(tx_head);
+	count++;
+	netdev_sent_queue(ndev, skb->len);
+
+	stats->tx_bytes += skb->len;
+	stats->tx_packets++;
+
+	/* Ensure tx_head update visible to tx reclaim */
+	smp_wmb();
+
+	/* queue is getting full, better start cleaning up now */
+	if (count >= priv->tx_coalesce_frames) {
+		if (napi_schedule_prep(&priv->napi)) {
+			/* disable rx interrupt and timer */
+			priv->reg_inten &= ~(RCV_INT);
+			writel_relaxed(DEF_INT_MASK & ~RCV_INT,
+				       priv->base + PPE_INTEN);
+			hrtimer_cancel(&priv->tx_coalesce_timer);
+			__napi_schedule(&priv->napi);
+		}
+	} else if (!hrtimer_is_queued(&priv->tx_coalesce_timer)) {
+		/* cleanup not pending yet, start a new timer */
+		hrtimer_start_expires(&priv->tx_coalesce_timer,
+				      HRTIMER_MODE_REL);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int hip04_rx_poll(struct napi_struct *napi, int budget)
+{
+	struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
+	struct net_device *ndev = priv->ndev;
+	struct net_device_stats *stats = &ndev->stats;
+	unsigned int cnt = hip04_recv_cnt(priv);
+	struct rx_desc *desc;
+	struct sk_buff *skb;
+	unsigned char *buf;
+	bool last = false;
+	dma_addr_t phys;
+	int rx = 0;
+	int tx_remaining;
+	u16 len;
+	u32 err;
+
+	while (cnt && !last) {
+		buf = priv->rx_buf[priv->rx_head];
+		skb = build_skb(buf, priv->rx_buf_size);
+		if (unlikely(!skb))
+			net_dbg_ratelimited("build_skb failed\n");
+
+		dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head],
+				 RX_BUF_SIZE, DMA_FROM_DEVICE);
+		priv->rx_phys[priv->rx_head] = 0;
+
+		desc = (struct rx_desc *)skb->data;
+		len = be16_to_cpu(desc->pkt_len);
+		err = be32_to_cpu(desc->pkt_err);
+
+		if (0 == len) {
+			dev_kfree_skb_any(skb);
+			last = true;
+		} else if ((err & RX_PKT_ERR) || (len >= GMAC_MAX_PKT_LEN)) {
+			dev_kfree_skb_any(skb);
+			stats->rx_dropped++;
+			stats->rx_errors++;
+		} else {
+			skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+			skb_put(skb, len);
+			skb->protocol = eth_type_trans(skb, ndev);
+			napi_gro_receive(&priv->napi, skb);
+			stats->rx_packets++;
+			stats->rx_bytes += len;
+			rx++;
+		}
+
+		buf = netdev_alloc_frag(priv->rx_buf_size);
+		if (!buf)
+			goto done;
+		phys = dma_map_single(&ndev->dev, buf,
+				      RX_BUF_SIZE, DMA_FROM_DEVICE);
+		if (dma_mapping_error(&ndev->dev, phys))
+			goto done;
+		priv->rx_buf[priv->rx_head] = buf;
+		priv->rx_phys[priv->rx_head] = phys;
+		hip04_set_recv_desc(priv, phys);
+
+		priv->rx_head = RX_NEXT(priv->rx_head);
+		if (rx >= budget)
+			goto done;
+
+		if (--cnt == 0)
+			cnt = hip04_recv_cnt(priv);
+	}
+
+	if (!(priv->reg_inten & RCV_INT)) {
+		/* enable rx interrupt */
+		priv->reg_inten |= RCV_INT;
+		writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN);
+	}
+	napi_complete(napi);
+done:
+	/* clean up tx descriptors and start a new timer if necessary */
+	tx_remaining = hip04_tx_reclaim(ndev, false);
+	if (rx < budget && tx_remaining)
+		hrtimer_start_expires(&priv->tx_coalesce_timer, HRTIMER_MODE_REL);
+
+	return rx;
+}
+
+static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *)dev_id;
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u32 ists = readl_relaxed(priv->base + PPE_INTSTS);
+
+	if (!ists)
+		return IRQ_NONE;
+
+	writel_relaxed(DEF_INT_MASK, priv->base + PPE_RINT);
+
+	if (unlikely(ists & DEF_INT_ERR)) {
+		if (ists & (RCV_NOBUF | RCV_DROP))
+			stats->rx_errors++;
+			stats->rx_dropped++;
+			netdev_err(ndev, "rx drop\n");
+		if (ists & TX_DROP) {
+			stats->tx_dropped++;
+			netdev_err(ndev, "tx drop\n");
+		}
+	}
+
+	if (ists & RCV_INT && napi_schedule_prep(&priv->napi)) {
+		/* disable rx interrupt */
+		priv->reg_inten &= ~(RCV_INT);
+		writel_relaxed(DEF_INT_MASK & ~RCV_INT, priv->base + PPE_INTEN);
+		hrtimer_cancel(&priv->tx_coalesce_timer);
+		__napi_schedule(&priv->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+enum hrtimer_restart tx_done(struct hrtimer *hrtimer)
+{
+	struct hip04_priv *priv;
+
+	priv = container_of(hrtimer, struct hip04_priv, tx_coalesce_timer);
+
+	if (napi_schedule_prep(&priv->napi)) {
+		/* disable rx interrupt */
+		priv->reg_inten &= ~(RCV_INT);
+		writel_relaxed(DEF_INT_MASK & ~RCV_INT, priv->base + PPE_INTEN);
+		__napi_schedule(&priv->napi);
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static void hip04_adjust_link(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct phy_device *phy = priv->phy;
+
+	if ((priv->speed != phy->speed) || (priv->duplex != phy->duplex)) {
+		hip04_config_port(ndev, phy->speed, phy->duplex);
+		phy_print_status(phy);
+	}
+}
+
+static int hip04_mac_open(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	priv->rx_head = 0;
+	priv->tx_head = 0;
+	priv->tx_tail = 0;
+	hip04_reset_ppe(priv);
+
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		dma_addr_t phys;
+
+		phys = dma_map_single(&ndev->dev, priv->rx_buf[i],
+				      RX_BUF_SIZE, DMA_FROM_DEVICE);
+		if (dma_mapping_error(&ndev->dev, phys))
+			return -EIO;
+
+		priv->rx_phys[i] = phys;
+		hip04_set_recv_desc(priv, phys);
+	}
+
+	if (priv->phy)
+		phy_start(priv->phy);
+
+	netdev_reset_queue(ndev);
+	netif_start_queue(ndev);
+	hip04_mac_enable(ndev);
+	napi_enable(&priv->napi);
+
+	return 0;
+}
+
+static int hip04_mac_stop(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	napi_disable(&priv->napi);
+	netif_stop_queue(ndev);
+	hip04_mac_disable(ndev);
+	hip04_tx_reclaim(ndev, true);
+	hip04_reset_ppe(priv);
+
+	if (priv->phy)
+		phy_stop(priv->phy);
+
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		if (priv->rx_phys[i]) {
+			dma_unmap_single(&ndev->dev, priv->rx_phys[i],
+					 RX_BUF_SIZE, DMA_FROM_DEVICE);
+			priv->rx_phys[i] = 0;
+		}
+	}
+
+	return 0;
+}
+
+static void hip04_timeout(struct net_device *ndev)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+
+	schedule_work(&priv->tx_timeout_task);
+}
+
+static void hip04_tx_timeout_task(struct work_struct *work)
+{
+	struct hip04_priv *priv;
+
+	priv = container_of(work, struct hip04_priv, tx_timeout_task);
+	hip04_mac_stop(priv->ndev);
+	hip04_mac_open(priv->ndev);
+}
+
+static struct net_device_stats *hip04_get_stats(struct net_device *ndev)
+{
+	return &ndev->stats;
+}
+
+static int hip04_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+	struct hip04_priv *priv = netdev_priv(netdev);
+
+	ec->tx_coalesce_usecs = priv->tx_coalesce_usecs;
+	ec->tx_max_coalesced_frames = priv->tx_coalesce_frames;
+
+	return 0;
+}
+
+static int hip04_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+	struct hip04_priv *priv = netdev_priv(netdev);
+
+	/* Check not supported parameters  */
+	if ((ec->rx_max_coalesced_frames) || (ec->rx_coalesce_usecs_irq) ||
+	    (ec->rx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) ||
+	    (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) ||
+	    (ec->pkt_rate_low) || (ec->rx_coalesce_usecs_low) ||
+	    (ec->rx_max_coalesced_frames_low) || (ec->tx_coalesce_usecs_high) ||
+	    (ec->tx_max_coalesced_frames_low) || (ec->pkt_rate_high) ||
+	    (ec->tx_coalesce_usecs_low) || (ec->rx_coalesce_usecs_high) ||
+	    (ec->rx_max_coalesced_frames_high) || (ec->rx_coalesce_usecs) ||
+	    (ec->tx_max_coalesced_frames_irq) ||
+	    (ec->stats_block_coalesce_usecs) ||
+	    (ec->tx_max_coalesced_frames_high) || (ec->rate_sample_interval))
+		return -EOPNOTSUPP;
+
+	if ((ec->tx_coalesce_usecs > HIP04_MAX_TX_COALESCE_USECS ||
+	     ec->tx_coalesce_usecs < HIP04_MIN_TX_COALESCE_USECS) ||
+	    (ec->tx_max_coalesced_frames > HIP04_MAX_TX_COALESCE_FRAMES ||
+	     ec->tx_max_coalesced_frames < HIP04_MIN_TX_COALESCE_FRAMES))
+		return -EINVAL;
+
+	priv->tx_coalesce_usecs = ec->tx_coalesce_usecs;
+	priv->tx_coalesce_frames = ec->tx_max_coalesced_frames;
+
+	return 0;
+}
+
+static void hip04_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+}
+
+static struct ethtool_ops hip04_ethtool_ops = {
+	.get_coalesce		= hip04_get_coalesce,
+	.set_coalesce		= hip04_set_coalesce,
+	.get_drvinfo		= hip04_get_drvinfo,
+};
+
+static struct net_device_ops hip04_netdev_ops = {
+	.ndo_open		= hip04_mac_open,
+	.ndo_stop		= hip04_mac_stop,
+	.ndo_get_stats		= hip04_get_stats,
+	.ndo_start_xmit		= hip04_mac_start_xmit,
+	.ndo_set_mac_address	= hip04_set_mac_address,
+	.ndo_tx_timeout         = hip04_timeout,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= eth_change_mtu,
+};
+
+static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	priv->tx_desc = dma_alloc_coherent(d,
+					   TX_DESC_NUM * sizeof(struct tx_desc),
+					   &priv->tx_desc_dma, GFP_KERNEL);
+	if (!priv->tx_desc)
+		return -ENOMEM;
+
+	priv->rx_buf_size = RX_BUF_SIZE +
+			    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	for (i = 0; i < RX_DESC_NUM; i++) {
+		priv->rx_buf[i] = netdev_alloc_frag(priv->rx_buf_size);
+		if (!priv->rx_buf[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hip04_free_ring(struct net_device *ndev, struct device *d)
+{
+	struct hip04_priv *priv = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < RX_DESC_NUM; i++)
+		if (priv->rx_buf[i])
+			put_page(virt_to_head_page(priv->rx_buf[i]));
+
+	for (i = 0; i < TX_DESC_NUM; i++)
+		if (priv->tx_skb[i])
+			dev_kfree_skb_any(priv->tx_skb[i]);
+
+	dma_free_coherent(d, TX_DESC_NUM * sizeof(struct tx_desc),
+			  priv->tx_desc, priv->tx_desc_dma);
+}
+
+static int hip04_mac_probe(struct platform_device *pdev)
+{
+	struct device *d = &pdev->dev;
+	struct device_node *node = d->of_node;
+	struct of_phandle_args arg;
+	struct net_device *ndev;
+	struct hip04_priv *priv;
+	struct resource *res;
+	unsigned int irq;
+	ktime_t txtime;
+	int ret;
+
+	ndev = alloc_etherdev(sizeof(struct hip04_priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	platform_set_drvdata(pdev, ndev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(d, res);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
+		goto init_fail;
+	}
+
+	ret = of_parse_phandle_with_fixed_args(node, "port-handle", 2, 0, &arg);
+	if (ret < 0) {
+		dev_warn(d, "no port-handle\n");
+		goto init_fail;
+	}
+
+	priv->port = arg.args[0];
+	priv->chan = arg.args[1] * RX_DESC_NUM;
+
+	hrtimer_init(&priv->tx_coalesce_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+	/* BQL will try to keep the TX queue as short as possible, but it can't
+	 * be faster than tx_coalesce_usecs, so we need a fast timeout here,
+	 * but also long enough to gather up enough frames to ensure we don't
+	 * get more interrupts than necessary.
+	 * 200us is enough for 16 frames of 1500 bytes at gigabit ethernet rate
+	 */
+	priv->tx_coalesce_frames = TX_DESC_NUM * 3 / 4;
+	priv->tx_coalesce_usecs = 200;
+	/* allow timer to fire after half the time at the earliest */
+	txtime = ktime_set(0, priv->tx_coalesce_usecs * NSEC_PER_USEC / 2);
+	hrtimer_set_expires_range(&priv->tx_coalesce_timer, txtime, txtime);
+	priv->tx_coalesce_timer.function = tx_done;
+
+	priv->map = syscon_node_to_regmap(arg.np);
+	if (IS_ERR(priv->map)) {
+		dev_warn(d, "no syscon hisilicon,hip04-ppe\n");
+		ret = PTR_ERR(priv->map);
+		goto init_fail;
+	}
+
+	priv->phy_mode = of_get_phy_mode(node);
+	if (priv->phy_mode < 0) {
+		dev_warn(d, "not find phy-mode\n");
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		ret = -EINVAL;
+		goto init_fail;
+	}
+
+	ret = devm_request_irq(d, irq, hip04_mac_interrupt,
+			       0, pdev->name, ndev);
+	if (ret) {
+		netdev_err(ndev, "devm_request_irq failed\n");
+		goto init_fail;
+	}
+
+	priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
+	if (priv->phy_node) {
+		priv->phy = of_phy_connect(ndev, priv->phy_node,
+					   &hip04_adjust_link,
+					   0, priv->phy_mode);
+		if (!priv->phy) {
+			ret = -EPROBE_DEFER;
+			goto init_fail;
+		}
+	}
+
+	INIT_WORK(&priv->tx_timeout_task, hip04_tx_timeout_task);
+
+	ether_setup(ndev);
+	ndev->netdev_ops = &hip04_netdev_ops;
+	ndev->ethtool_ops = &hip04_ethtool_ops;
+	ndev->watchdog_timeo = TX_TIMEOUT;
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+	ndev->irq = irq;
+	netif_napi_add(ndev, &priv->napi, hip04_rx_poll, NAPI_POLL_WEIGHT);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	hip04_reset_ppe(priv);
+	if (priv->phy_mode == PHY_INTERFACE_MODE_MII)
+		hip04_config_port(ndev, SPEED_100, DUPLEX_FULL);
+
+	hip04_config_fifo(priv);
+	random_ether_addr(ndev->dev_addr);
+	hip04_update_mac_address(ndev);
+
+	ret = hip04_alloc_ring(ndev, d);
+	if (ret) {
+		netdev_err(ndev, "alloc ring fail\n");
+		goto alloc_fail;
+	}
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		free_netdev(ndev);
+		goto alloc_fail;
+	}
+
+	return 0;
+
+alloc_fail:
+	hip04_free_ring(ndev, d);
+init_fail:
+	of_node_put(priv->phy_node);
+	free_netdev(ndev);
+	return ret;
+}
+
+static int hip04_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct hip04_priv *priv = netdev_priv(ndev);
+	struct device *d = &pdev->dev;
+
+	if (priv->phy)
+		phy_disconnect(priv->phy);
+
+	hip04_free_ring(ndev, d);
+	unregister_netdev(ndev);
+	free_irq(ndev->irq, ndev);
+	of_node_put(priv->phy_node);
+	cancel_work_sync(&priv->tx_timeout_task);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static const struct of_device_id hip04_mac_match[] = {
+	{ .compatible = "hisilicon,hip04-mac" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(of, hip04_mac_match);
+
+static struct platform_driver hip04_mac_driver = {
+	.probe	= hip04_mac_probe,
+	.remove	= hip04_remove,
+	.driver	= {
+		.name		= DRV_NAME,
+		.owner		= THIS_MODULE,
+		.of_match_table	= hip04_mac_match,
+	},
+};
+module_platform_driver(hip04_mac_driver);
+
+MODULE_DESCRIPTION("HISILICON P04 Ethernet driver");
-- 
1.8.0

^ permalink raw reply related

* [PATCH net-next v13 1/3] Documentation: add Device tree bindings for Hisilicon hip04 ethernet
From: Ding Tianhong @ 2015-01-14  6:34 UTC (permalink / raw)
  To: arnd, robh+dt, davem, grant.likely, agraf
  Cc: sergei.shtylyov, linux-arm-kernel, eric.dumazet, xuwei5,
	zhangfei.gao, netdev, devicetree, linux
In-Reply-To: <1421217254-12008-1-git-send-email-dingtianhong@huawei.com>

From: Zhangfei Gao <zhangfei.gao@linaro.org>

This patch adds the Device Tree bindings for the Hisilicon hip04
Ethernet controller, including 100M / 1000M controller.

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 .../bindings/net/hisilicon-hip04-net.txt           | 88 ++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
new file mode 100644
index 0000000..988fc69
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
@@ -0,0 +1,88 @@
+Hisilicon hip04 Ethernet Controller
+
+* Ethernet controller node
+
+Required properties:
+- compatible: should be "hisilicon,hip04-mac".
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device.
+- port-handle: <phandle port channel>
+	phandle, specifies a reference to the syscon ppe node
+	port, port number connected to the controller
+	channel, recv channel start from channel * number (RX_DESC_NUM)
+- phy-mode: see ethernet.txt [1].
+
+Optional properties:
+- phy-handle: see ethernet.txt [1].
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+
+
+* Ethernet ppe node:
+Control rx & tx fifos of all ethernet controllers.
+Have 2048 recv channels shared by all ethernet controllers, only if no overlap.
+Each controller's recv channel start from channel * number (RX_DESC_NUM).
+
+Required properties:
+- compatible: "hisilicon,hip04-ppe", "syscon".
+- reg: address and length of the register set for the device.
+
+
+* MDIO bus node:
+
+Required properties:
+
+- compatible: should be "hisilicon,hip04-mdio".
+- Inherits from MDIO bus node binding [2]
+[2] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+	mdio {
+		compatible = "hisilicon,hip04-mdio";
+		reg = <0x28f1000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		phy0: ethernet-phy@0 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <0>;
+			marvell,reg-init = <18 0x14 0 0x8001>;
+		};
+
+		phy1: ethernet-phy@1 {
+			compatible = "ethernet-phy-ieee802.3-c22";
+			reg = <1>;
+			marvell,reg-init = <18 0x14 0 0x8001>;
+		};
+	};
+
+	ppe: ppe@28c0000 {
+		compatible = "hisilicon,hip04-ppe", "syscon";
+		reg = <0x28c0000 0x10000>;
+	};
+
+	fe: ethernet@28b0000 {
+		compatible = "hisilicon,hip04-mac";
+		reg = <0x28b0000 0x10000>;
+		interrupts = <0 413 4>;
+		phy-mode = "mii";
+		port-handle = <&ppe 31 0>;
+	};
+
+	ge0: ethernet@2800000 {
+		compatible = "hisilicon,hip04-mac";
+		reg = <0x2800000 0x10000>;
+		interrupts = <0 402 4>;
+		phy-mode = "sgmii";
+		port-handle = <&ppe 0 1>;
+		phy-handle = <&phy0>;
+	};
+
+	ge8: ethernet@2880000 {
+		compatible = "hisilicon,hip04-mac";
+		reg = <0x2880000 0x10000>;
+		interrupts = <0 410 4>;
+		phy-mode = "sgmii";
+		port-handle = <&ppe 8 2>;
+		phy-handle = <&phy1>;
+	};
-- 
1.8.0

^ permalink raw reply related

* Re: [net-next PATCH v2 00/12] Flow API
From: Or Gerlitz @ 2015-01-14  6:29 UTC (permalink / raw)
  To: John Fastabend
  Cc: Thomas Graf, simon.horman, Scott Feldman, Linux Netdev List,
	Jamal Hadi Salim, Andy Gospodarek, David Miller
In-Reply-To: <20150113212941.13874.48692.stgit@nitbit.x32>

On Tue, Jan 13, 2015 at 11:35 PM, John Fastabend
<john.fastabend@gmail.com> wrote:
> I tried to roll in all the feedback from v1 into this series annotated
> here,

Hi John,

Can you please drop V2 to your
https://github.com/jrfastab/rocker-net-next.git tree?

thanks,

Or.

^ permalink raw reply

* RE: be2net: SR-IOV, vlan isolation issue
From: Sathya Perla @ 2015-01-14  6:26 UTC (permalink / raw)
  To: Yoann Juet, netdev@vger.kernel.org; +Cc: Yoann Juet
In-Reply-To: <54AF9FF1.3040906@univ-nantes.fr>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-
> 
> Hi all,
> 
> I recently discovered unattended behavior from Emulex cards with KVM
> hypervisor and SR-IOV. On such 10Gbps cards (be2net module, Emulex
> OneConnect OCm14102-U3-D devices), guest machines attached to VFs on
> the
> Emulex Physical Functions (PF) see all multicast and broadcast (not
> unicast) traffic from/to other VM located on the same PF **BUT** on
> other vlans. Just put into promiscuous mode the guest machine's
> interface and you will observe inbound, outbound (multicast + broadcast
> only) irrelevant traffic.
> 
> Please note that irrelevant traffic is not sent to the guest machine
> TCP/IP stack. No firewall hitting for instance. The issue is about
> traffic monitoring with a VF put into promiscuous mode using a sniffer
> like tshark, tcpdump... Vlan isolation seems not 100% effective from the
> guest perspective since mcast+bcast information leaks.
> 
> A similar issue has already been observed with Broadcom cards and then
> patched by the developer team. Refer to the post in archive "bnx2x +
> SR-IOV, no internal L2 switching", 12 Feb 2014. Emulex driver seems to
> suffer the same problem, isn't it ?
> 

Yoann, thanks for reporting this issue. This issue is caused because
the VF was allowed to go into vlan-promiscuous mode by the PF.
We'll try to provide a fix for this soon...

thanks,
-Sathya

^ permalink raw reply

* net_test_tools: add ipv6 support for kbench_mod
From: Shaohua Li @ 2015-01-14  5:45 UTC (permalink / raw)
  To: netdev; +Cc: davem, kafai

This patch adds ipv6 support for kbench_mod test module

diff --git a/kbench_mod.c b/kbench_mod.c
index fc3765c..05425df 100644
--- a/kbench_mod.c
+++ b/kbench_mod.c
@@ -3,9 +3,11 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/inet.h>
+#include <linux/in6.h>
 
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/ip6_route.h>
 
 #include <linux/timex.h>
 
@@ -66,9 +68,13 @@ extern int ip_route_output_cycles_7;
 static int flow_oif = DEFAULT_OIF;
 static int flow_iif = DEFAULT_IIF;
 static u32 flow_mark = DEFAULT_MARK;
-static u32 flow_dst_ip_addr = DEFAULT_DST_IP_ADDR;
-static u32 flow_src_ip_addr = DEFAULT_SRC_IP_ADDR;
+static u32 ip4_flow_dst_ip_addr = DEFAULT_DST_IP_ADDR;
+static u32 ip4_flow_src_ip_addr = DEFAULT_SRC_IP_ADDR;
+static struct in6_addr ip6_flow_dst_ip_addr;
+static struct in6_addr ip6_flow_src_ip_addr;
 static int flow_tos = DEFAULT_TOS;
+static int ip6_bench;
+module_param(ip6_bench, int, 0);
 
 static char dst_string[64];
 static char src_string[64];
@@ -76,12 +82,29 @@ static char src_string[64];
 module_param_string(dst, dst_string, sizeof(dst_string), 0);
 module_param_string(src, src_string, sizeof(src_string), 0);
 
-static void __init flow_setup(void)
+static int __init flow_setup(void)
 {
+	if (ip6_bench) {
+		if (dst_string[0] &&
+		    !in6_pton(dst_string, -1, ip6_flow_dst_ip_addr.s6_addr, -1, NULL)) {
+			pr_info("cannot parse \"%s\"\n", dst_string);
+			return -1;
+		}
+
+		if (src_string[0] &&
+		    !in6_pton(src_string, -1, ip6_flow_src_ip_addr.s6_addr, -1, NULL)) {
+			pr_info("cannot parse \"%s\"\n", src_string);
+			return -1;
+		}
+
+		return 0;
+	}
+
 	if (dst_string[0])
-		flow_dst_ip_addr = in_aton(dst_string);
+		ip4_flow_dst_ip_addr = in_aton(dst_string);
 	if (src_string[0])
-		flow_src_ip_addr = in_aton(src_string);
+		ip4_flow_src_ip_addr = in_aton(src_string);
+	return 0;
 }
 
 module_param_named(oif, flow_oif, int, 0);
@@ -92,15 +115,70 @@ module_param_named(tos, flow_tos, int, 0);
 static int warmup_count = DEFAULT_WARMUP_COUNT;
 module_param_named(count, warmup_count, int, 0);
 
-static void flow_init(struct flowi4 *fl4)
+#define flow_init(fl, gen) 				\
+do {							\
+	memset((fl), 0, sizeof(*(fl)));			\
+	(fl)->flowi##gen##_oif = flow_oif;		\
+	(fl)->flowi##gen##_iif = flow_iif;		\
+	(fl)->flowi##gen##_mark = flow_mark;		\
+	(fl)->flowi##gen##_tos = flow_tos;		\
+	(fl)->daddr = ip##gen##_flow_dst_ip_addr;	\
+	(fl)->saddr = ip##gen##_flow_src_ip_addr;	\
+} while (0)
+
+#define flow_init_ip6(fl) 				\
+do {							\
+	flow_init(fl, 6);				\
+	(fl)->flowi6_proto = IPPROTO_ICMPV6;		\
+} while(0)
+
+static int skb_init_ip6(struct sk_buff *skb)
+{
+	struct ipv6hdr *hdr;
+	struct net_device *dev;
+
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+	hdr = ipv6_hdr(skb);
+
+	hdr->priority = 0;
+	hdr->version = 6;
+	memset(hdr->flow_lbl, 0, sizeof(hdr->flow_lbl));
+	hdr->payload_len = htons(sizeof(struct icmp6hdr));
+	hdr->nexthdr = IPPROTO_ICMPV6;
+	hdr->saddr = ip6_flow_src_ip_addr;
+	hdr->daddr = ip6_flow_dst_ip_addr;
+
+	dev = __dev_get_by_index(&init_net, flow_iif);
+	if (dev == NULL) {
+		pr_info("Input device does not exist\n");
+		return -ENODEV;
+	}
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+	skb->mark = flow_mark;
+	return 0;
+}
+
+static int skb_init_ip4(struct sk_buff *skb)
 {
-	memset(fl4, 0, sizeof(*fl4));
-	fl4->flowi4_oif = flow_oif;
-	fl4->flowi4_iif = flow_iif;
-	fl4->flowi4_mark = flow_mark;
-	fl4->flowi4_tos = flow_tos;
-	fl4->daddr = flow_dst_ip_addr;
-	fl4->saddr = flow_src_ip_addr;
+	struct net_device *dev;
+
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	ip_hdr(skb)->protocol = IPPROTO_ICMP;
+	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
+	dev = __dev_get_by_index(&init_net, flow_iif);
+	if (dev == NULL) {
+		pr_info("Input device does not exist\n");
+		return -ENODEV;
+	}
+	skb->protocol = htons(ETH_P_IP);
+	skb->dev = dev;
+	skb->mark = flow_mark;
+	return 0;
 }
 
 static struct rtable *route_output(struct net *net, struct flowi4 *fl4)
@@ -108,7 +186,7 @@ static struct rtable *route_output(struct net *net, struct flowi4 *fl4)
 	return ip_route_output_key(net, fl4);
 }
 
-static void do_full_output_lookup_bench(void)
+static void do_full_output_lookup_bench_ip4(void)
 {
 	unsigned long long t1, t2, tdiff;
 	struct rtable *rt;
@@ -118,7 +196,7 @@ static void do_full_output_lookup_bench(void)
 	rt = NULL;
 
 	for (i = 0; i < warmup_count; i++) {
-		flow_init(&fl4);
+		flow_init(&fl4, 4);
 
 		rt = route_output(&init_net, &fl4);
 		if (IS_ERR(rt))
@@ -140,7 +218,7 @@ static void do_full_output_lookup_bench(void)
 	ip_route_output_cycles_7 = 0;
 #endif
 
-	flow_init(&fl4);
+	flow_init(&fl4, 4);
 
 	t1 = get_tick();
 	rt = route_output(&init_net, &fl4);
@@ -161,11 +239,45 @@ static void do_full_output_lookup_bench(void)
 #endif
 }
 
+static void do_full_output_lookup_bench_ip6(void)
+{
+	unsigned long long t1, t2, tdiff;
+	struct rt6_info *rt;
+	struct flowi6 fl6;
+	int i;
+
+	rt = NULL;
+
+	for (i = 0; i < warmup_count; i++) {
+		flow_init_ip6(&fl6);
+
+		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
+		if (IS_ERR(rt))
+			break;
+		ip6_rt_put(rt);
+	}
+	if (IS_ERR(rt)) {
+		pr_info("ip6_route_output: err=%ld\n", PTR_ERR(rt));
+		return;
+	}
+
+	flow_init_ip6(&fl6);
+
+	t1 = get_tick();
+	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
+	t2 = get_tick();
+	if (!IS_ERR(rt))
+		ip6_rt_put(rt);
+
+	tdiff = t2 - t1;
+	pr_info("ip6_route_output tdiff: %llu\n", tdiff);
+}
+
 static void do_full_input_lookup_bench(void)
 {
 	unsigned long long t1, t2, tdiff;
-	struct net_device *dev;
 	struct sk_buff *skb;
+	struct rt6_info *rt;
 	int err, i;
 
 	skb = alloc_skb(4096, GFP_KERNEL);
@@ -173,23 +285,22 @@ static void do_full_input_lookup_bench(void)
 		pr_info("Cannot alloc SKB for test\n");
 		return;
 	}
-	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-	ip_hdr(skb)->protocol = IPPROTO_ICMP;
-	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
-
-	dev = __dev_get_by_index(&init_net, flow_iif);
-	if (dev == NULL) {
-		pr_info("Input device does not exist\n");
+	if (ip6_bench)
+		err = skb_init_ip6(skb);
+	else
+		err = skb_init_ip4(skb);
+	if (err)
 		goto out_free;
-	}
-	skb->protocol = htons(ETH_P_IP);
-	skb->dev = dev;
-	skb->mark = flow_mark;
+
 	local_bh_disable();
 	err = 0;
 	for (i = 0; i < warmup_count; i++) {
-		err = ip_route_input(skb, flow_dst_ip_addr, flow_src_ip_addr, flow_tos, dev);
+		if (ip6_bench) {
+			ip6_route_input(skb);
+			rt = (struct rt6_info *)skb_dst(skb);
+			err = (!rt || rt == init_net.ipv6.ip6_null_entry);
+		} else
+			err = ip_route_input(skb, ip4_flow_dst_ip_addr, ip4_flow_src_ip_addr, flow_tos, skb->dev);
 		if (err)
 			break;
 		skb_dst_drop(skb);
@@ -203,7 +314,12 @@ static void do_full_input_lookup_bench(void)
 
 	local_bh_disable();
 	t1 = get_tick();
-	err = ip_route_input(skb, flow_dst_ip_addr, flow_src_ip_addr, flow_tos, dev);
+	if (ip6_bench) {
+		ip6_route_input(skb);
+		rt = (struct rt6_info *)skb_dst(skb);
+		err = (!rt || rt == init_net.ipv6.ip6_null_entry);
+	} else
+		err = ip_route_input(skb, ip4_flow_dst_ip_addr, ip4_flow_src_ip_addr, flow_tos, skb->dev);
 	t2 = get_tick();
 	local_bh_enable();
 
@@ -215,7 +331,10 @@ static void do_full_input_lookup_bench(void)
 	skb_dst_drop(skb);
 
 	tdiff = t2 - t1;
-	pr_info("ip_route_input tdiff: %llu\n", tdiff);
+	if (ip6_bench)
+		pr_info("ip6_route_input tdiff: %llu\n", tdiff);
+	else
+		pr_info("ip_route_input tdiff: %llu\n", tdiff);
 
 out_free:
 	kfree_skb(skb);
@@ -223,9 +342,12 @@ static void do_full_input_lookup_bench(void)
 
 static void do_full_lookup_bench(void)
 {
-	if (!flow_iif)
-		do_full_output_lookup_bench();
-	else
+	if (!flow_iif) {
+		if (ip6_bench)
+			do_full_output_lookup_bench_ip6();
+		else
+			do_full_output_lookup_bench_ip4();
+	} else
 		do_full_input_lookup_bench();
 }
 
@@ -240,7 +362,7 @@ static void do_full_lookup_prealloc_bench(void)
 	err = 0;
 
 	for (i = 0; i < warmup_count; i++) {
-		flow_init(&fl4);
+		flow_init(&fl4, 4);
 
 		rt = ip_route_output_flow_prealloc(&init_net, &fl4, NULL, &rt_stack.dst);
 		if (IS_ERR(rt)) {
@@ -264,7 +386,7 @@ static void do_full_lookup_prealloc_bench(void)
 	ip_route_output_cycles_7 = 0;
 #endif
 
-	flow_init(&fl4);
+	flow_init(&fl4, 4);
 
 	t1 = get_tick();
 	rt = ip_route_output_flow_prealloc(&init_net, &fl4, NULL, &rt_stack.dst);
@@ -295,7 +417,7 @@ static void do_fib_lookup_bench(void)
 	struct flowi4 fl4;
 	int err, i;
 
-	flow_init(&fl4);
+	flow_init(&fl4, 4);
 
 	for (i = 0; i < warmup_count; i++) {
 		struct fib_table *table;
@@ -398,7 +520,7 @@ static void do_new_lookup_bench(void)
 	struct flowi fl;
 	int err, i;
 
-	flow_init(&fl);
+	flow_init(&fl, 4);
 
 	for (i = 0; i < warmup_count; i++) {
 		err = new_output_lookup(&fl, &rt);
@@ -428,6 +550,8 @@ static void do_bench(void)
 	do_full_lookup_bench();
 	do_full_lookup_bench();
 
+	if (ip6_bench)
+		return;
 #ifdef IP_ROUTE_HAVE_PREALLOC
 	do_full_lookup_prealloc_bench();
 	do_full_lookup_prealloc_bench();
@@ -452,10 +576,19 @@ static int __init kbench_init(void)
 {
 	flow_setup();
 
-	pr_info("flow [IIF(%d),OIF(%d),MARK(0x%08x),D(%pI4),S(%pI4),TOS(0x%02x)]\n",
-		flow_iif, flow_oif, flow_mark,
-		&flow_dst_ip_addr,
-		&flow_src_ip_addr, flow_tos);
+	if (!ip6_bench) {
+		pr_info("flow [IIF(%d),OIF(%d),MARK(0x%08x),D(%pI4),S(%pI4),TOS(0x%02x)]\n",
+			flow_iif, flow_oif, flow_mark,
+			&ip4_flow_dst_ip_addr,
+			&ip4_flow_src_ip_addr, flow_tos);
+	} else {
+		pr_info("flow [IIF(%d),OIF(%d),MARK(0x%08x),D(%pI6),"
+			"S(%pI6),TOS(0x%02x)]\n",
+			flow_iif, flow_oif, flow_mark,
+			&ip6_flow_dst_ip_addr,
+			&ip6_flow_src_ip_addr,
+			flow_tos);
+	}
 
 #if defined(CONFIG_X86)
 	if (!cpu_has_tsc) {

^ permalink raw reply related

* Re: [PATCH] neighbour: fix base_reachable_time(_ms) not effective immediatly when changed
From: David Miller @ 2015-01-14  5:28 UTC (permalink / raw)
  To: jeff; +Cc: netdev
In-Reply-To: <1421205759-19766-1-git-send-email-jeff@melix.org>

From: Jean-Francois Remy <jeff@melix.org>
Date: Wed, 14 Jan 2015 04:22:39 +0100

> When setting base_reachable_time or base_reachable_time_ms on a
> specific interface through sysctl or netlink, the reachable_time
> value is not updated.
> 
> This means that neighbour entries will continue to be updated using the
> old value until it is recomputed in neigh_period_work (which
>     recomputes the value every 300*HZ).
> On systems with HZ equal to 1000 for instance, it means 5mins before
> the change is effective.
> 
> This patch changes this behavior by recomputing reachable_time after
> each set on base_reachable_time or base_reachable_time_ms.
> The new value will become effective the next time the neighbour's timer
> is triggered.
> 
> Changes are made in two places: the netlink code for set and the sysctl
> handling code. For sysctl, I use a proc_handler. The ipv6 network
> code does provide its own handler but it already refreshes
> reachable_time correctly so it's not an issue.
> Any other user of neighbour which provide its own handlers must
> refresh reachable_time.
> 
> Signed-off-by: Jean-Francois Remy <jeff@melix.org>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH RESEND v2] net: fec: fix MDIO bus assignement for dual fec SoC's
From: David Miller @ 2015-01-14  5:27 UTC (permalink / raw)
  To: stefan-XLVq0VzYD2Y
  Cc: shawn.guo-QSEj5FYQhm4dnm+yROfE0A,
	u.kleine-koenig-bIcnvbaLZ9MEGnE8C9+IrQ,
	fugang.duan-KZfg59tc24xl57MIdRCFDg,
	fabio.estevam-KZfg59tc24xl57MIdRCFDg, mark.rutland-5wv7dgnIgG8,
	robh+dt-DgEjT+Ai2ygdnm+yROfE0A, pawel.moll-5wv7dgnIgG8,
	ijc+devicetree-KcIKpvwj1kUDXYZnReoRVg,
	galak-sgV2jX0FEOL9JmXXK+q4OQ, B38611-KZfg59tc24xl57MIdRCFDg,
	LW-bxm8fMRDkQLDiMYJYoSAnRvVK+yQ3ZXh,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	devicetree-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1421191221-23095-1-git-send-email-stefan-XLVq0VzYD2Y@public.gmane.org>

From: Stefan Agner <stefan-XLVq0VzYD2Y@public.gmane.org>
Date: Wed, 14 Jan 2015 00:20:21 +0100

> On i.MX28, the MDIO bus is shared between the two FEC instances.
> The driver makes sure that the second FEC uses the MDIO bus of the
> first FEC. This is done conditionally if FEC_QUIRK_ENET_MAC is set.
> However, in newer designs, such as Vybrid or i.MX6SX, each FEC MAC
> has its own MDIO bus. Simply removing the quirk FEC_QUIRK_ENET_MAC
> is not an option since other logic, triggered by this quirk, is
> still needed.
> 
> Furthermore, there are board designs which use the same MDIO bus
> for both PHY's even though the second bus would be available on the
> SoC side. Such layout are popular since it saves pins on SoC side.
> Due to the above quirk, those boards currently do work fine. The
> boards in the mainline tree with such a layout are:
> - Freescale Vybrid Tower with TWR-SER2 (vf610-twr.dts)
> - Freescale i.MX6 SoloX SDB Board (imx6sx-sdb.dts)
> 
> This patch adds a new quirk FEC_QUIRK_SINGLE_MDIO for i.MX28, which
> makes sure that the MDIO bus of the first FEC is used in any case.
> 
> However, the boards above do have a SoC with a MDIO bus for each FEC
> instance. But the PHY's are not connected in a 1:1 configuration. A
> proper device tree description is needed to allow the driver to
> figure out where to find its PHY. This patch fixes that shortcoming
> by adding a MDIO bus child node to the first FEC instance, along
> with the two PHY's on that bus, and making use of the phy-handle
> property to add a reference to the PHY's.
> 
> Acked-by: Sascha Hauer <s.hauer-bIcnvbaLZ9MEGnE8C9+IrQ@public.gmane.org>
> Signed-off-by: Stefan Agner <stefan-XLVq0VzYD2Y@public.gmane.org>

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 2/2] net/macb: improved ethtool statistics support
From: David Miller @ 2015-01-14  5:26 UTC (permalink / raw)
  To: xander.huff
  Cc: nicolas.ferre, jaeden.amero, rich.tollerton, ben.shelton,
	brad.mouring, netdev, linux-kernel
In-Reply-To: <1421187351-27279-2-git-send-email-xander.huff@ni.com>

From: Xander Huff <xander.huff@ni.com>
Date: Tue, 13 Jan 2015 16:15:51 -0600

> Currently `ethtool -S` simply returns "no stats available". It
> would be more useful to see what the various ethtool statistics
> registers' values are. This change implements get_ethtool_stats,
> get_strings, and get_sset_count functions to accomplish this.
> 
> Read all GEM statistics registers and sum them into
> macb.ethtool_stats. Add the necessary infrastructure to make this
> accessible via `ethtool -S`.
> 
> Update gem_update_stats to utilize ethtool_stats.
> 
> Signed-off-by: Xander Huff <xander.huff@ni.com>

Applied.

^ permalink raw reply

* Re: [PATCH 1/2] net/macb: Adding comments to various #defs to make interpretation easier
From: David Miller @ 2015-01-14  5:26 UTC (permalink / raw)
  To: xander.huff
  Cc: nicolas.ferre, jaeden.amero, rich.tollerton, ben.shelton,
	brad.mouring, netdev, linux-kernel
In-Reply-To: <1421187351-27279-1-git-send-email-xander.huff@ni.com>

From: Xander Huff <xander.huff@ni.com>
Date: Tue, 13 Jan 2015 16:15:50 -0600

> This change is to help improve at-a-glace knowledge of the purpose of the
> various Cadence MACB/GEM registers. Comments are more helpful for human
> readability than short acronyms.
> 
> Describe various #define varibles Cadence MACB/GEM registers as documented
> in Xilinix's "Zynq-7000 All Programmable SoC TechnicalReference Manual, v1.9.1
> (UG-585)"
> 
> Signed-off-by: Xander Huff <xander.huff@ni.com>

Applied.

^ permalink raw reply

* Re: [PATCHv1 0/3 net-next] xen-netfront: refactor making Tx requests
From: David Miller @ 2015-01-14  5:23 UTC (permalink / raw)
  To: david.vrabel; +Cc: netdev, xen-devel, konrad.wilk, boris.ostrovsky
In-Reply-To: <1421169404-27461-1-git-send-email-david.vrabel@citrix.com>

From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 13 Jan 2015 17:16:41 +0000

> As netfront as evolved to handle different sorts of skbs the code to
> fill a Tx requests has been copy and pasted several times.  The series
> refactors this and a few other areas.
> 
> The first patch is to a Xen header but this can be merged via
> net-next.

Series applied, thanks David.

^ permalink raw reply

* Re: [patch-net-next 1/3] net: ethernet: cpsw: unroll IRQ request loop
From: David Miller @ 2015-01-14  5:18 UTC (permalink / raw)
  To: balbi; +Cc: tony, linux-omap, mugunthanvnm, netdev
In-Reply-To: <1421178288-7393-1-git-send-email-balbi@ti.com>

From: Felipe Balbi <balbi@ti.com>
Date: Tue, 13 Jan 2015 13:44:46 -0600

> +	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
> +			0, dev_name(&pdev->dev), priv);

When a function call spans multiple lines, the argument on the second
and subsequent lines must start on the first column after the openning
parenthesis of the function call.

If you are using only TAB characters to indent, you are likely not
doing it correctly.

> +	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
> +			0, dev_name(&pdev->dev), priv);

Likewise.

> +	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
> +			0, dev_name(&pdev->dev), priv);

Likewise.

> +	ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
> +			0, dev_name(&pdev->dev), priv);

Likewise.

^ permalink raw reply

* [PATCHv3 net-next] openvswitch: Introduce ovs_tunnel_route_lookup
From: Fan Du @ 2015-01-14  5:10 UTC (permalink / raw)
  To: pshelar; +Cc: netdev, dev, fengyuleidian0615

Introduce ovs_tunnel_route_lookup to consolidate route lookup
shared by vxlan, gre, and geneve ports.

Signed-off-by: Fan Du <fan.du@intel.com>
---
Change log:
v3:
  - Use ovs_tunnel_route_lookup for ovs_tunnel_get_egress_info
  - Constantify tun_key structure for gre, vxlan, geneve ports
    to surpress warnings.
v2:
  - Use inline instead of function call
  - Rename vport_route_lookup to ovs_tunnel_route_lookup

---
 net/openvswitch/vport-geneve.c |   13 ++-----------
 net/openvswitch/vport-gre.c    |   12 ++----------
 net/openvswitch/vport-vxlan.c  |   12 ++----------
 net/openvswitch/vport.c        |    9 +--------
 net/openvswitch/vport.h        |   18 ++++++++++++++++++
 5 files changed, 25 insertions(+), 39 deletions(-)

diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 484864d..eabe564 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -172,7 +172,7 @@ error:
 
 static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
-	struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 	struct ovs_tunnel_info *tun_info;
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct geneve_port *geneve_port = geneve_vport(vport);
@@ -191,16 +191,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	tun_key = &tun_info->tunnel;
-
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_UDP;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto error;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index d4168c4..1aa4921 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -134,7 +134,7 @@ static int gre_err(struct sk_buff *skb, u32 info,
 static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
-	struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 	struct flowi4 fl;
 	struct rtable *rt;
 	int min_headroom;
@@ -148,15 +148,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto err_free_skb;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d7c46b3..1435a05 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -145,7 +145,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
-	struct ovs_key_ipv4_tunnel *tun_key;
+	const struct ovs_key_ipv4_tunnel *tun_key;
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
@@ -158,15 +158,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
-	/* Route lookup */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_UDP;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		goto error;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 2034c6d..fb9d5fb 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -594,14 +594,7 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
 	 * The process may need to be changed if the corresponding process
 	 * in vports ops changed.
 	 */
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = tun_key->ipv4_dst;
-	fl.saddr = tun_key->ipv4_src;
-	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
-	fl.flowi4_mark = skb_mark;
-	fl.flowi4_proto = ipproto;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 99c8e71..f8ae295 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -236,4 +236,22 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
 int ovs_vport_ops_register(struct vport_ops *ops);
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
+static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
+						     const struct ovs_key_ipv4_tunnel *key,
+						     u32 mark,
+						     struct flowi4 *fl,
+						     u8 protocol)
+{
+	struct rtable *rt;
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->ipv4_dst;
+	fl->saddr = key->ipv4_src;
+	fl->flowi4_tos = RT_TOS(key->ipv4_tos);
+	fl->flowi4_mark = mark;
+	fl->flowi4_proto = protocol;
+
+	rt = ip_route_output_key(net, fl);
+	return rt;
+}
 #endif /* vport.h */
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH 2/2] mdio-mux-gpio: use new gpiod_get_array and gpiod_put_array functions
From: Alexandre Courbot @ 2015-01-14  5:11 UTC (permalink / raw)
  To: Rojhalat Ibrahim
  Cc: linux-gpio@vger.kernel.org, Alexandre Courbot, Linus Walleij,
	David Miller, netdev
In-Reply-To: <13407316.QPemVgjPDO@pcimr>

On Sat, Jan 10, 2015 at 12:19 AM, Rojhalat Ibrahim <imr@rtschenk.de> wrote:
> Use the new gpiod_get_array and gpiod_put_array functions for obtaining and
> disposing of GPIO descriptors.
>
> Signed-off-by: Rojhalat Ibrahim <imr@rtschenk.de>
> ---
> This patch depends on my previous patch "gpiolib: add gpiod_get_array and
> gpiod_put_array functions".
>
>  drivers/net/phy/mdio-mux-gpio.c |   28 ++++++++--------------------
>  1 file changed, 8 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
> index 1eaf81e..35c37da 100644
> --- a/drivers/net/phy/mdio-mux-gpio.c
> +++ b/drivers/net/phy/mdio-mux-gpio.c
> @@ -47,7 +47,6 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
>  {
>         struct mdio_mux_gpio_state *s;
>         int num_gpios;
> -       unsigned int n;
>         int r;
>
>         if (!pdev->dev.of_node)
> @@ -63,16 +62,10 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
>
>         s->num_gpios = num_gpios;
>
> -       for (n = 0; n < num_gpios; ) {
> -               struct gpio_desc *gpio = gpiod_get_index(&pdev->dev, NULL, n,
> -                                                        GPIOD_OUT_LOW);
> -               if (IS_ERR(gpio)) {
> -                       r = PTR_ERR(gpio);
> -                       goto err;
> -               }
> -               s->gpio[n] = gpio;
> -               n++;
> -       }
> +       r = gpiod_get_array(&pdev->dev, NULL, s->gpio, num_gpios,
> +                           GPIOD_OUT_LOW);
> +       if (r != num_gpios)
> +               return r;
>
>         r = mdio_mux_init(&pdev->dev,
>                           mdio_mux_gpio_switch_fn, &s->mux_handle, s);
> @@ -80,22 +73,17 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
>         if (r == 0) {
>                 pdev->dev.platform_data = s;
>                 return 0;
> +       } else {
> +               gpiod_put_array(s->gpio, num_gpios);
> +               return r;
>         }

Suggestion: handle the errors in the if condition, and let normal
execution be visible at the first level of indentation of the
function. I.e:

    if (r != 0) {
        gpiod_put_array(s->gpio, num_gpios);
        return r;
    }

    pdev->dev.platform_data = s;
    return 0;

This is how previous errors are handled in this function and is
generally a good thing to do as it makes the function's logic easier
to follow.

^ permalink raw reply

* Re: [PATCH net-next v12 3/3] net: hisilicon: new hip04 ethernet driver
From: Ding Tianhong @ 2015-01-14  4:22 UTC (permalink / raw)
  To: David Miller
  Cc: arnd, robh+dt, grant.likely, agraf, sergei.shtylyov,
	linux-arm-kernel, eric.dumazet, xuwei5, zhangfei.gao, netdev,
	devicetree, linux
In-Reply-To: <20150113.230611.375062426344934847.davem@davemloft.net>

On 2015/1/14 12:06, David Miller wrote:
> From: Ding Tianhong <dingtianhong@huawei.com>
> Date: Tue, 13 Jan 2015 17:11:30 +0800
> 
>> +static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
>> +{
>> +	struct hip04_priv *priv = netdev_priv(ndev);
>> +	int i;
>> +
>> +	priv->tx_desc = dma_alloc_coherent(d,
>> +			TX_DESC_NUM * sizeof(struct tx_desc),
>> +			&priv->tx_desc_dma, GFP_KERNEL);
> 
> When a function call spans multiple lines, the arguments on the
> second and subsequent lines should start at the first column
> after the openning parenthesis of the function call.
> 
> If you are only using TAB characters, your indentation is likely
> to be incorrect.
> 

Sorry for the mistake.

>> +	/*
>> +	 * BQL will try to keep the TX queue as short as possible, but it can't
>> +	 * be faster than tx_coalesce_usecs, so we need a fast timeout here,
>> +	 * but also long enough to gather up enough frames to ensure we don't
>> +	 * get more interrupts than necessary.
>> +	 * 200us is enough for 16 frames of 1500 bytes at gigabit ethernet rate
>> +	 */
> 
> Comments in the networking should be:
> 
> 	/* Formatted
> 	 * like this.
> 	 */
> 

yes.
>> +	priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
>> +	if (priv->phy_node) {
>> +		priv->phy = of_phy_connect(ndev, priv->phy_node,
>> +			&hip04_adjust_link, 0, priv->phy_mode);
> 
> Please align the arguments to this function call correctly, as
> per above.
> 
Ok, I will check them again.

Ding

> Thanks.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> .
> 

^ permalink raw reply

* Re: [PATCH net-next v12 3/3] net: hisilicon: new hip04 ethernet driver
From: David Miller @ 2015-01-14  4:06 UTC (permalink / raw)
  To: dingtianhong-hv44wF8Li93QT0dZR+AlfA
  Cc: arnd-r2nGTMty4D4, robh+dt-DgEjT+Ai2ygdnm+yROfE0A,
	grant.likely-QSEj5FYQhm4dnm+yROfE0A, agraf-l3A5Bk7waGM,
	sergei.shtylyov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	xuwei5-C8/M+/jPZTeaMJb+Lgu22Q,
	zhangfei.gao-QSEj5FYQhm4dnm+yROfE0A,
	netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-lFZ/pmaqli7XmaaqVzeoHQ
In-Reply-To: <1421140290-5492-4-git-send-email-dingtianhong-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

From: Ding Tianhong <dingtianhong-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Date: Tue, 13 Jan 2015 17:11:30 +0800

> +static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
> +{
> +	struct hip04_priv *priv = netdev_priv(ndev);
> +	int i;
> +
> +	priv->tx_desc = dma_alloc_coherent(d,
> +			TX_DESC_NUM * sizeof(struct tx_desc),
> +			&priv->tx_desc_dma, GFP_KERNEL);

When a function call spans multiple lines, the arguments on the
second and subsequent lines should start at the first column
after the openning parenthesis of the function call.

If you are only using TAB characters, your indentation is likely
to be incorrect.

> +	/*
> +	 * BQL will try to keep the TX queue as short as possible, but it can't
> +	 * be faster than tx_coalesce_usecs, so we need a fast timeout here,
> +	 * but also long enough to gather up enough frames to ensure we don't
> +	 * get more interrupts than necessary.
> +	 * 200us is enough for 16 frames of 1500 bytes at gigabit ethernet rate
> +	 */

Comments in the networking should be:

	/* Formatted
	 * like this.
	 */

> +	priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
> +	if (priv->phy_node) {
> +		priv->phy = of_phy_connect(ndev, priv->phy_node,
> +			&hip04_adjust_link, 0, priv->phy_mode);

Please align the arguments to this function call correctly, as
per above.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH net-next v2] rhashtable: unnecessary to use delayed work
From: Ying Xue @ 2015-01-14  3:32 UTC (permalink / raw)
  To: tgraf; +Cc: davem, netdev

When we put our declared work task in the global workqueue with
schedule_delayed_work(), its delay parameter is always zero.
Therefore, we should define a normal work in rhashtable structure
instead of a delayed work.

By the way, we add a condition to check whether resizing functions
are NULL before cancel the work, avoiding to cancel an uninitialized
work.

Lastly, while we wait for all work items we submitted before to run
to completion with cancel_delayed_work(), ht->mutex has been taken in
rhashtable_destroy(). Moreover, cancel_delayed_work() doesn't return
until all work items are accomplished, and when work items are
scheduled, the work's function - rht_deferred_worker() will be called.
However, as rht_deferred_worker() also needs to acquire the lock,
deadlock might happen at the moment as the lock is already held before.
So if the cancel work function is moved out of the lock covered scope,
this can help to avoid the deadlock.

Fixes: 97defe1 ("rhashtable: Per bucket locks & deferred expansion/shrinking")
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
---
v2 changes:
  Move cancel_work_sync() out of ht->mutex lock scope

 include/linux/rhashtable.h |    2 +-
 lib/rhashtable.c           |   11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9570832..a2562ed 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -119,7 +119,7 @@ struct rhashtable {
 	atomic_t			nelems;
 	atomic_t			shift;
 	struct rhashtable_params	p;
-	struct delayed_work             run_work;
+	struct work_struct		run_work;
 	struct mutex                    mutex;
 	bool                            being_destroyed;
 };
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ed6ae1a..1f56189 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -476,7 +476,7 @@ static void rht_deferred_worker(struct work_struct *work)
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
 
-	ht = container_of(work, struct rhashtable, run_work.work);
+	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
 	tbl = rht_dereference(ht->tbl, ht);
 
@@ -498,7 +498,7 @@ static void rhashtable_wakeup_worker(struct rhashtable *ht)
 	if (tbl == new_tbl &&
 	    ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) ||
 	     (ht->p.shrink_decision && ht->p.shrink_decision(ht, size))))
-		schedule_delayed_work(&ht->run_work, 0);
+		schedule_work(&ht->run_work);
 }
 
 static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
@@ -894,7 +894,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
 	if (ht->p.grow_decision || ht->p.shrink_decision)
-		INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker);
+		INIT_WORK(&ht->run_work, rht_deferred_worker);
 
 	return 0;
 }
@@ -911,12 +911,11 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
 void rhashtable_destroy(struct rhashtable *ht)
 {
 	ht->being_destroyed = true;
+	if (ht->p.grow_decision || ht->p.shrink_decision)
+		cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
-
-	cancel_delayed_work(&ht->run_work);
 	bucket_table_free(rht_dereference(ht->tbl, ht));
-
 	mutex_unlock(&ht->mutex);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH] neighbour: fix base_reachable_time(_ms) not effective immediatly when changed
From: Jean-Francois Remy @ 2015-01-14  3:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, Jean-Francois Remy
In-Reply-To: <20150113.220438.1470978767602235254.davem@davemloft.net>

When setting base_reachable_time or base_reachable_time_ms on a
specific interface through sysctl or netlink, the reachable_time
value is not updated.

This means that neighbour entries will continue to be updated using the
old value until it is recomputed in neigh_period_work (which
    recomputes the value every 300*HZ).
On systems with HZ equal to 1000 for instance, it means 5mins before
the change is effective.

This patch changes this behavior by recomputing reachable_time after
each set on base_reachable_time or base_reachable_time_ms.
The new value will become effective the next time the neighbour's timer
is triggered.

Changes are made in two places: the netlink code for set and the sysctl
handling code. For sysctl, I use a proc_handler. The ipv6 network
code does provide its own handler but it already refreshes
reachable_time correctly so it's not an issue.
Any other user of neighbour which provide its own handlers must
refresh reachable_time.

Signed-off-by: Jean-Francois Remy <jeff@melix.org>
---
 net/core/neighbour.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e38f17..8d614c9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2043,6 +2043,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
 			case NDTPA_BASE_REACHABLE_TIME:
 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
 					      nla_get_msecs(tbp[i]));
+				/* update reachable_time as well, otherwise, the change will
+				 * only be effective after the next time neigh_periodic_work
+				 * decides to recompute it (can be multiple minutes)
+				 */
+				p->reachable_time =
+					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
 				break;
 			case NDTPA_GC_STALETIME:
 				NEIGH_VAR_SET(p, GC_STALETIME,
@@ -2921,6 +2927,31 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
+					  void __user *buffer,
+					  size_t *lenp, loff_t *ppos)
+{
+	struct neigh_parms *p = ctl->extra2;
+	int ret;
+
+	if (strcmp(ctl->procname, "base_reachable_time") == 0)
+		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
+	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
+		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
+	else
+		ret = -1;
+
+	if (write && ret == 0) {
+		/* update reachable_time as well, otherwise, the change will
+		 * only be effective after the next time neigh_periodic_work
+		 * decides to recompute it
+		 */
+		p->reachable_time =
+			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
+	}
+	return ret;
+}
+
 #define NEIGH_PARMS_DATA_OFFSET(index)	\
 	(&((struct neigh_parms *) 0)->data[index])
 
@@ -3047,6 +3078,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
 		/* ReachableTime (in milliseconds) */
 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+	} else {
+		/* Those handlers will update p->reachable_time after
+		 * base_reachable_time(_ms) is set to ensure the new timer starts being
+		 * applied after the next neighbour update instead of waiting for
+		 * neigh_periodic_work to update its value (can be multiple minutes)
+		 * So any handler that replaces them should do this as well
+		 */
+		/* ReachableTime */
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
+			neigh_proc_base_reachable_time;
+		/* ReachableTime (in milliseconds) */
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
+			neigh_proc_base_reachable_time;
 	}
 
 	/* Don't export sysctls to unprivileged users */
-- 
2.1.0

^ permalink raw reply related

* Re: [PATCH] neighbour: fix base_reachable_time(_ms) not effective immediatly when changed
From: David Miller @ 2015-01-14  3:04 UTC (permalink / raw)
  To: jeff; +Cc: netdev
In-Reply-To: <1421135479-15952-1-git-send-email-jeff@melix.org>

From: Jean-Francois Remy <jeff@melix.org>
Date: Tue, 13 Jan 2015 08:51:19 +0100

> When setting base_reachable_time or base_reachable_time_ms through
> sysctl or netlink, the reachable_time value is not updated.
> This means that neighbour entries will continue to be updated using the
> old value until it is recomputed in neigh_period_work (which
>     recomputes the value every 300*HZ).
> On systems with HZ equal to 1000 for instance, it means 5mins before
> the change is effective.
> 
> This patch changes this behavior by recomputing reachable_time after
> each set on base_reachable_time or base_reachable_time_ms.
> The new value will become effective the next time the neighbour's timer
> is triggered.
> 
> Changes are made in two places: the netlink code for set and the sysctl
> handling code. For sysctl, I use a proc_handler. The ipv6 network
> code does provide its own handler but it already refreshes
> reachable_time correctly so it's not an issue.
> Any other user of neighbour which provide its own handlers must
> refresh reachable_time.
> 
> Signed-off-by: Jean-Francois Remy <jeff@melix.org>

Your change is correct but there are some coding style issues to
deal with:

> +				/*
> +				 * update reachable_time as well, otherwise, the change will
> +				 * only be effective after the next time neigh_periodic_work
> +				 * decides to recompute it (can be multiple minutes)
> +				 */

Comments in the networking should be:

	/* Of this
	 * form.
	 */


> +	if (write && ret == 0 ) {

No space between '0' and the closing parenthesis please.

> +		/*
> +		 * update reachable_time as well, otherwise, the change will
> +		 * only be effective after the next time neigh_periodic_work
> +		 * decides to recompute it
> +		 */

Please fix this comment's layout as per above.

> +		/*
> +		 * Those handlers will update p->reachable_time after
> +		 * base_reachable_time(_ms) is set to ensure the new timer starts being
> +		 * applied after the next neighbour update instead of waiting for
> +		 * neigh_periodic_work to update its value (can be multiple minutes)
> +		 * So any handler that replaces them should do this as well
> +		 */

Likewise.

^ permalink raw reply

* Re: [PATCH v2 1/1] atm: remove deprecated use of pci api
From: David Miller @ 2015-01-14  2:59 UTC (permalink / raw)
  To: lambert.quentin; +Cc: chas, linux-atm-general, netdev, linux-kernel
In-Reply-To: <20150112161042.GA11374@sloth>

From: Quentin Lambert <lambert.quentin@gmail.com>
Date: Mon, 12 Jan 2015 17:10:42 +0100

> @@ -2246,7 +2246,8 @@ static int eni_init_one(struct pci_dev *pci_dev,
>  		goto err_disable;
>  
>  	zero = &eni_dev->zero;
> -	zero->addr = pci_alloc_consistent(pci_dev, ENI_ZEROES_SIZE, &zero->dma);
> +	zero->addr = dma_alloc_coherent(&pci_dev->dev, ENI_ZEROES_SIZE,
> +					&zero->dma, GFP_ATOMIC);
>  	if (!zero->addr)
>  		goto err_kfree;
>  

I really would like you to look at these locations and see if
GFP_KERNEL can be used instead of GFP_ATOMIC.  I bet that nearly
all of these can, and it is preferred.

Thanks.

^ permalink raw reply

* Re: Why is bridge's sysfs values in 1/100 of second?
From: David Miller @ 2015-01-14  2:53 UTC (permalink / raw)
  To: greearb; +Cc: netdev
In-Reply-To: <54B5D5DA.5080008@candelatech.com>

From: Ben Greear <greearb@candelatech.com>
Date: Tue, 13 Jan 2015 18:35:06 -0800

> Are the units supposed to be 1/100 of a second, or is that just some
> luck depending on HZ?

More specifically, it's "USER_HZ" which unlike HZ is unchanging.

^ permalink raw reply

* Re: [PATCH] bridge: only provide proxy ARP when CONFIG_INET is enabled
From: David Ahern @ 2015-01-14  2:56 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: David Miller, cwang, netdev, kyeyoonp, bridge, stephen
In-Reply-To: <4681500.x6CYopasp1@wuerfel>

On 1/13/15 2:33 PM, Arnd Bergmann wrote:

> The effect is very similar to my patch (probably same object code), the
> only difference should be that it would add an ugly #ifdef instead of
> the preferred IS_ENABLED() check, so you don't get any compile-time
> coverage of the function.

Indeed. As long as br_do_proxy_arp does not get exported that works the 
same.

David

^ permalink raw reply

* Re: [PATCH] i40e: don't enable and init FCOE by default when do PF reset
From: ethan zhao @ 2015-01-14  2:40 UTC (permalink / raw)
  To: Dev, Vasu
  Cc: e1000-devel@lists.sourceforge.net, brian.maly@oracle.com,
	Allan, Bruce W, Brandeburg, Jesse, Parikh, Neerav, Linux NICS,
	Ronciak, John, netdev@vger.kernel.org, Ethan Zhao,
	linux-kernel@vger.kernel.org
In-Reply-To: <933BEC2E04D6A5458F4B0239FB547F9A34CC4055@fmsmsx118.amr.corp.intel.com>

Vasu,

On 2015/1/14 3:38, Dev, Vasu wrote:
>> -----Original Message-----
>>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
>>>>> b/drivers/net/ethernet/intel/i40e/i40e_main.c
>>>>> index a5f2660..a2572cc 100644
>>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
>>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
>>>>> @@ -6180,9 +6180,12 @@ static void i40e_reset_and_rebuild(struct
>>>>> i40e_pf *pf, bool reinit)
>>>>>      }
>>>>>   #endif /* CONFIG_I40E_DCB */
>>>>>   #ifdef I40E_FCOE
>>>>> -   ret = i40e_init_pf_fcoe(pf);
>>>>> -   if (ret)
>>>>> -           dev_info(&pf->pdev->dev, "init_pf_fcoe failed: %d\n", ret);
>>>>> +   if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
>>>>> +           ret = i40e_init_pf_fcoe(pf);
>>> Calling i40e_init_pf_fcoe() here conflicts with its
>> I40E_FLAG_FCOE_ENABLED pre-condition since I40E_FLAG_FCOE_ENABLED is
>> set by very same i40e_init_pf_fcoe(), in turn i40e_init_pf_fcoe() will never get
>> called.
>>
>> I don't think so,  here ,i40e_reset_and_rebuild()  is not the only and the first
>> place that  i40e_init_pf_fcoe() is called, see i40e_probe(), that is the first
>> chance.
>>
>> i40e_probe()
>> -->i40e_sw_init()
>>       -->i40e_init_pf_fcoe()
>>
>> And the I40E_FLAG_FCOE_ENABLED is possible be set by
>> i40e_fcoe_enable() or i40e_fcoe_disable() interface before the reset action is
>> to be done.
>>
> It is set by i40e_init_pf_fcoe() and you are right that the modified call flow by your patch won't impact setting of I40E_FLAG_FCOE_ENABLED anyway which could have prevented calling i40e_init_pf_fcoe() as I described above, so this is not an issue with the patch.
>
>> BTW, the reason I post this patch is that we hit a bug, after setup vlan, the PF
>> is enabled to FCOE.
>>
> Then that BUG would still remain un-fixed and calling i40e_init_pf_fcoe() under I40E_FLAG_FCOE_ENABLED  flag really won't affect call flow to fix anything. I mean I40E_FLAG_FCOE_ENABLED  condition will be true with "pf->hw.func_caps.fcoe == true" and otherwise calling i40e_init_pf_fcoe() simply returns back early on after checking "pf->hw.func_caps.fcoe == false", so how that bug is fixed here by added I40E_FLAG_FCOE_ENABLED  condition ? What is the bug ?
  The func_caps.fcoe is assigned by following call path, under our test 
environment,

  i40e_probe()
   ->i40e_get_capabilities()
      ->i40e_aq_discover_capabilities()
         ->i40e_parse_discover_capabilities()

  Or

  i40e_reset_and_rebuild()
   ->i40e_get_capabilities()
     ->i40e_aq_discover_capabilities()
       ->i40e_parse_discover_capabilities()

  Under our test environment, the "pf->hw.func_caps.fcoe" is true. so if 
i40e_reset_and_rebuild() is called for VLAN setup, ethtool diagnostic test.
  And then i40e_init_pf_fcoe() is to be called,

  While if (!pf->hw.func_caps.fcoe) wouldn't return,

  So  pf->flags is set to I40E_FLAG_FCOE_ENABLED.

  With my patch,  i40e_init_pf_fcoe() is only called after 
I40E_FLAG_FCOE_ENABLED is set before reset.

Enable FCOE in i40e_probe() or not is another issue.


Thanks,
Ethan


>
>>> Jeff Kirsher should be getting out a patch queued by me which adds
>> I40E_FCoE Kbuild option, in that FCoE is disabled by default and  user could
>> enable FCoE only if needed, that patch would do same of skipping
>> i40e_init_pf_fcoe() whether FCoE capability in device enabled or not in
>> default config.
>> The following patch will not fix the above issue -- configuration of PF will be
>> changed via reset.
>> How about the FCOE is configured and disabled by  i40e_fcoe_disable() ,
>> then reset happens ?
>>
> May be but if the BUG is due to FCoE being enabled then having it disabled in config will avoid the bug for non FCoE config option and once bug is understood then that has to be fixed for FCoE enabled config also as I asked above.
>
> Thanks Ethan for detailed response.
> Vasu
>
>>>  From patchwork Wed Oct  2 23:26:08 2013
>>> Content-Type: text/plain; charset="utf-8"
>>> MIME-Version: 1.0
>>> Content-Transfer-Encoding: 7bit
>>> Subject: [net] i40e: adds FCoE configure option
>>> Date: Thu, 03 Oct 2013 07:26:08 -0000
>>> From: Vasu Dev <vasu.dev@intel.com>
>>> X-Patchwork-Id: 11797
>>>
>>> Adds FCoE config option I40E_FCOE, so that FCoE can be enabled as
>>> needed but otherwise have it disabled by default.
>>>
>>> This also eliminate multiple FCoE config checks, instead now just one
>>> config check for CONFIG_I40E_FCOE.
>>>
>>> The I40E FCoE was added with 3.17 kernel and therefore this patch
>>> shall be applied to stable 3.17 kernel also.
>>>
>>> CC: <stable@vger.kernel.org>
>>> Signed-off-by: Vasu Dev <vasu.dev@intel.com>
>>> Tested-by: Jim Young <jamesx.m.young@intel.com>
>>>
>>> ---
>>> drivers/net/ethernet/intel/Kconfig           |   11 +++++++++++
>>>   drivers/net/ethernet/intel/i40e/Makefile     |    2 +-
>>>   drivers/net/ethernet/intel/i40e/i40e_osdep.h |    4 ++--
>>>   3 files changed, 14 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/Kconfig
>>> b/drivers/net/ethernet/intel/Kconfig
>>> index 5b8300a..4d61ef5 100644
>>> --- a/drivers/net/ethernet/intel/Kconfig
>>> +++ b/drivers/net/ethernet/intel/Kconfig
>>> @@ -281,6 +281,17 @@ config I40E_DCB
>>>
>>>            If unsure, say N.
>>>
>>> +config I40E_FCOE
>>> +       bool "Fibre Channel over Ethernet (FCoE)"
>>> +       default n
>>> +       depends on I40E && DCB && FCOE
>>> +       ---help---
>>> +         Say Y here if you want to use Fibre Channel over Ethernet (FCoE)
>>> +         in the driver. This will create new netdev for exclusive FCoE
>>> +         use with XL710 FCoE offloads enabled.
>>> +
>>> +         If unsure, say N.
>>> +
>>>   config I40EVF
>>>          tristate "Intel(R) XL710 X710 Virtual Function Ethernet support"
>>>          depends on PCI_MSI
>>> diff --git a/drivers/net/ethernet/intel/i40e/Makefile
>>> b/drivers/net/ethernet/intel/i40e/Makefile
>>> index 4b94ddb..c405819 100644
>>> --- a/drivers/net/ethernet/intel/i40e/Makefile
>>> +++ b/drivers/net/ethernet/intel/i40e/Makefile
>>> @@ -44,4 +44,4 @@ i40e-objs := i40e_main.o \
>>>          i40e_virtchnl_pf.o
>>>
>>>   i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
>>> -i40e-$(CONFIG_FCOE:m=y) += i40e_fcoe.o
>>> +i40e-$(CONFIG_I40E_FCOE) += i40e_fcoe.o
>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
>>> b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
>>> index 045b5c4..ad802dd 100644
>>> --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
>>> @@ -78,7 +78,7 @@ do {                                                            \
>>>   } while (0)
>>>
>>>   typedef enum i40e_status_code i40e_status; -#if defined(CONFIG_FCOE)
>>> || defined(CONFIG_FCOE_MODULE)
>>> +#ifdef CONFIG_I40E_FCOE
>>>   #define I40E_FCOE
>>> -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
>>> +#endif
>>>   #endif /* _I40E_OSDEP_H_ */
>>>
>>>>> +           if (ret)
>>>>> +                   dev_info(&pf->pdev->dev,
>>>>> +                            "init_pf_fcoe failed: %d\n", ret);
>>>>> +   }
>>>>>
>>>>>   #endif
>>>>>      /* do basic switch setup */
>>>>> --
>>>>> 1.8.3.1
>> Thanks,
>> Ethan


------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* Why is bridge's sysfs values in 1/100 of second?
From: Ben Greear @ 2015-01-14  2:35 UTC (permalink / raw)
  To: netdev

Are the units supposed to be 1/100 of a second, or is that just some
luck depending on HZ?


root@ath9k-138:/home/lanforge# brctl setageing br0 98
root@ath9k-138:/home/lanforge# cat /sys/class/net/br0/bridge/ageing_time
9800

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply

* [PATCH net-next 2/2] cxgb4i : Call into recently added cxgb4 ipv6 api
From: Anish Bhatt @ 2015-01-14  2:28 UTC (permalink / raw)
  To: netdev; +Cc: davem, hariprasad, kxie, deepak.s, Anish Bhatt
In-Reply-To: <1421202516-13862-1-git-send-email-anish@chelsio.com>

Get a reference on every ipv6 address we offload to hardware so that it cannot
be prematurely cleared out before cleanup or when still in use.

Signed-off-by: Anish Bhatt <anish@chelsio.com>
---
 drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 37d7191a3c38..dd00e5fe4a5e 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -28,6 +28,7 @@
 #include "t4fw_api.h"
 #include "l2t.h"
 #include "cxgb4i.h"
+#include "clip_tbl.h"
 
 static unsigned int dbg_level;
 
@@ -1322,6 +1323,9 @@ static inline void l2t_put(struct cxgbi_sock *csk)
 static void release_offload_resources(struct cxgbi_sock *csk)
 {
 	struct cxgb4_lld_info *lldi;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct net_device *ndev = csk->cdev->ports[csk->port_id];
+#endif
 
 	log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK,
 		"csk 0x%p,%u,0x%lx,%u.\n",
@@ -1334,6 +1338,12 @@ static void release_offload_resources(struct cxgbi_sock *csk)
 	}
 
 	l2t_put(csk);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (csk->csk_family == AF_INET6)
+		cxgb4_clip_release(ndev,
+				   (const u32 *)&csk->saddr6.sin6_addr, 1);
+#endif
+
 	if (cxgbi_sock_flag(csk, CTPF_HAS_ATID))
 		free_atid(csk);
 	else if (cxgbi_sock_flag(csk, CTPF_HAS_TID)) {
@@ -1391,10 +1401,15 @@ static int init_act_open(struct cxgbi_sock *csk)
 	csk->l2t = cxgb4_l2t_get(lldi->l2t, n, ndev, 0);
 	if (!csk->l2t) {
 		pr_err("%s, cannot alloc l2t.\n", ndev->name);
-		goto rel_resource;
+		goto rel_resource_without_clip;
 	}
 	cxgbi_sock_get(csk);
 
+#if IS_ENABLED(CONFIG_IPV6)
+	if (csk->csk_family == AF_INET6)
+		cxgb4_clip_get(ndev, (const u32 *)&csk->saddr6.sin6_addr, 1);
+#endif
+
 	if (t4) {
 		size = sizeof(struct cpl_act_open_req);
 		size6 = sizeof(struct cpl_act_open_req6);
@@ -1451,6 +1466,12 @@ static int init_act_open(struct cxgbi_sock *csk)
 	return 0;
 
 rel_resource:
+#if IS_ENABLED(CONFIG_IPV6)
+	if (csk->csk_family == AF_INET6)
+		cxgb4_clip_release(ndev,
+				   (const u32 *)&csk->saddr6.sin6_addr, 1);
+#endif
+rel_resource_without_clip:
 	if (n)
 		neigh_release(n);
 	if (skb)
-- 
2.2.1

^ permalink raw reply related

* [PATCH net-next 1/2] cxgb4: Update ipv6 address handling api
From: Anish Bhatt @ 2015-01-14  2:28 UTC (permalink / raw)
  To: netdev; +Cc: davem, hariprasad, kxie, deepak.s, Anish Bhatt
In-Reply-To: <1421202516-13862-1-git-send-email-anish@chelsio.com>

This patch improves on previously added support for ipv6 addresses. The code
is consolidated to a single file and adds an api for use by dependent upper
level drivers such as cxgb4i/iw_cxgb4 etc.

Signed-off-by: Anish Bhatt <anish@chelsio.com>
Signed-off-by: Deepak Singh <deepak.s@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/Makefile        |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c      | 314 +++++++++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h      |  41 +++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |   3 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |  19 ++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 228 +++++----------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h     |   3 -
 7 files changed, 447 insertions(+), 163 deletions(-)
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index b85280775997..ae50cd72358c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -4,6 +4,6 @@
 
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o
+cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
new file mode 100644
index 000000000000..dacecba70f76
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -0,0 +1,314 @@
+/*
+ *  This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *  Copyright (C) 2003-2014 Chelsio Communications.  All rights reserved.
+ *
+ *  Written by Deepak (deepak.s@chelsio.com)
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
+ *  release for licensing terms and conditions.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/jhash.h>
+#include <linux/if_vlan.h>
+#include <net/addrconf.h>
+#include "cxgb4.h"
+#include "clip_tbl.h"
+
+static inline unsigned int ipv4_clip_hash(struct clip_tbl *c, const u32 *key)
+{
+	unsigned int clipt_size_half = c->clipt_size / 2;
+
+	return jhash_1word(*key, 0) % clipt_size_half;
+}
+
+static inline unsigned int ipv6_clip_hash(struct clip_tbl *d, const u32 *key)
+{
+	unsigned int clipt_size_half = d->clipt_size / 2;
+	u32 xor = key[0] ^ key[1] ^ key[2] ^ key[3];
+
+	return clipt_size_half +
+		(jhash_1word(xor, 0) % clipt_size_half);
+}
+
+static unsigned int clip_addr_hash(struct clip_tbl *ctbl, const u32 *addr,
+				   int addr_len)
+{
+	return addr_len == 4 ? ipv4_clip_hash(ctbl, addr) :
+				ipv6_clip_hash(ctbl, addr);
+}
+
+static int clip6_get_mbox(const struct net_device *dev,
+			  const struct in6_addr *lip)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct fw_clip_cmd c;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_write = htonl(FW_CMD_OP_V(FW_CLIP_CMD) |
+			FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
+	c.alloc_to_len16 = htonl(FW_CLIP_CMD_ALLOC_F | FW_LEN16(c));
+	*(__be64 *)&c.ip_hi = *(__be64 *)(lip->s6_addr);
+	*(__be64 *)&c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
+	return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
+}
+
+static int clip6_release_mbox(const struct net_device *dev,
+			      const struct in6_addr *lip)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct fw_clip_cmd c;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_write = htonl(FW_CMD_OP_V(FW_CLIP_CMD) |
+			FW_CMD_REQUEST_F | FW_CMD_READ_F);
+	c.alloc_to_len16 = htonl(FW_CLIP_CMD_FREE_F | FW_LEN16(c));
+	*(__be64 *)&c.ip_hi = *(__be64 *)(lip->s6_addr);
+	*(__be64 *)&c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
+	return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
+}
+
+int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct clip_tbl *ctbl = adap->clipt;
+	struct clip_entry *ce, *cte;
+	u32 *addr = (u32 *)lip;
+	int hash;
+	int addr_len;
+	int ret = 0;
+
+	if (v6)
+		addr_len = 16;
+	else
+		addr_len = 4;
+
+	hash = clip_addr_hash(ctbl, addr, addr_len);
+
+	read_lock_bh(&ctbl->lock);
+	list_for_each_entry(cte, &ctbl->hash_list[hash], list) {
+		if (addr_len == cte->addr_len &&
+		    memcmp(lip, cte->addr, cte->addr_len) == 0) {
+			ce = cte;
+			read_unlock_bh(&ctbl->lock);
+			goto found;
+		}
+	}
+	read_unlock_bh(&ctbl->lock);
+
+	write_lock_bh(&ctbl->lock);
+	if (!list_empty(&ctbl->ce_free_head)) {
+		ce = list_first_entry(&ctbl->ce_free_head,
+				      struct clip_entry, list);
+		list_del(&ce->list);
+		INIT_LIST_HEAD(&ce->list);
+		spin_lock_init(&ce->lock);
+		atomic_set(&ce->refcnt, 0);
+		atomic_dec(&ctbl->nfree);
+		ce->addr_len = addr_len;
+		memcpy(ce->addr, lip, addr_len);
+		list_add_tail(&ce->list, &ctbl->hash_list[hash]);
+		if (v6) {
+			ret = clip6_get_mbox(dev, (const struct in6_addr *)lip);
+			if (ret) {
+				write_unlock_bh(&ctbl->lock);
+				return ret;
+			}
+		}
+	} else {
+		write_unlock_bh(&ctbl->lock);
+		return -ENOMEM;
+	}
+	write_unlock_bh(&ctbl->lock);
+found:
+	atomic_inc(&ce->refcnt);
+
+	return 0;
+}
+EXPORT_SYMBOL(cxgb4_clip_get);
+
+void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct clip_tbl *ctbl = adap->clipt;
+	struct clip_entry *ce, *cte;
+	u32 *addr = (u32 *)lip;
+	int hash;
+	int addr_len;
+
+	if (v6)
+		addr_len = 16;
+	else
+		addr_len = 4;
+
+	hash = clip_addr_hash(ctbl, addr, addr_len);
+
+	read_lock_bh(&ctbl->lock);
+	list_for_each_entry(cte, &ctbl->hash_list[hash], list) {
+		if (addr_len == cte->addr_len &&
+		    memcmp(lip, cte->addr, cte->addr_len) == 0) {
+			ce = cte;
+			read_unlock_bh(&ctbl->lock);
+			goto found;
+		}
+	}
+	read_unlock_bh(&ctbl->lock);
+
+	return;
+found:
+	write_lock_bh(&ctbl->lock);
+	spin_lock_bh(&ce->lock);
+	if (atomic_dec_and_test(&ce->refcnt)) {
+		list_del(&ce->list);
+		INIT_LIST_HEAD(&ce->list);
+		list_add_tail(&ce->list, &ctbl->ce_free_head);
+		atomic_inc(&ctbl->nfree);
+		if (v6)
+			clip6_release_mbox(dev, (const struct in6_addr *)lip);
+	}
+	spin_unlock_bh(&ce->lock);
+	write_unlock_bh(&ctbl->lock);
+}
+EXPORT_SYMBOL(cxgb4_clip_release);
+
+/* Retrieves IPv6 addresses from a root device (bond, vlan) associated with
+ * a physical device.
+ * The physical device reference is needed to send the actul CLIP command.
+ */
+static int cxgb4_update_dev_clip(struct net_device *root_dev,
+				 struct net_device *dev)
+{
+	struct inet6_dev *idev = NULL;
+	struct inet6_ifaddr *ifa;
+	int ret = 0;
+
+	idev = __in6_dev_get(root_dev);
+	if (!idev)
+		return ret;
+
+	read_lock_bh(&idev->lock);
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
+		ret = cxgb4_clip_get(dev, (const u32 *)ifa->addr.s6_addr, 1);
+		if (ret < 0)
+			break;
+	}
+	read_unlock_bh(&idev->lock);
+
+	return ret;
+}
+
+int cxgb4_update_root_dev_clip(struct net_device *dev)
+{
+	struct net_device *root_dev = NULL;
+	int i, ret = 0;
+
+	/* First populate the real net device's IPv6 addresses */
+	ret = cxgb4_update_dev_clip(dev, dev);
+	if (ret)
+		return ret;
+
+	/* Parse all bond and vlan devices layered on top of the physical dev */
+	root_dev = netdev_master_upper_dev_get_rcu(dev);
+	if (root_dev) {
+		ret = cxgb4_update_dev_clip(root_dev, dev);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < VLAN_N_VID; i++) {
+		root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i);
+		if (!root_dev)
+			continue;
+
+		ret = cxgb4_update_dev_clip(root_dev, dev);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(cxgb4_update_root_dev_clip);
+
+int clip_tbl_show(struct seq_file *seq, void *v)
+{
+	struct adapter *adapter = seq->private;
+	struct clip_tbl *ctbl = adapter->clipt;
+	struct clip_entry *ce;
+	char ip[60];
+	int i;
+
+	read_lock_bh(&ctbl->lock);
+
+	seq_puts(seq, "IP Address                  Users\n");
+	for (i = 0 ; i < ctbl->clipt_size;  ++i) {
+		list_for_each_entry(ce, &ctbl->hash_list[i], list) {
+			ip[0] = '\0';
+			if (ce->addr_len == 16)
+				sprintf(ip, "%pI6c", ce->addr);
+			else
+				sprintf(ip, "%pI4c", ce->addr);
+			seq_printf(seq, "%-25s   %u\n", ip,
+				   atomic_read(&ce->refcnt));
+		}
+	}
+	seq_printf(seq, "Free clip entries : %d\n", atomic_read(&ctbl->nfree));
+
+	read_unlock_bh(&ctbl->lock);
+
+	return 0;
+}
+
+struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
+				  unsigned int clipt_end)
+{
+	struct clip_entry *cl_list;
+	struct clip_tbl *ctbl;
+	unsigned int clipt_size;
+	int i;
+
+	if (clipt_start >= clipt_end)
+		return NULL;
+	clipt_size = clipt_end - clipt_start + 1;
+	if (clipt_size < CLIPT_MIN_HASH_BUCKETS)
+		return NULL;
+
+	ctbl = t4_alloc_mem(sizeof(*ctbl) +
+				clipt_size*sizeof(struct list_head));
+	if (!ctbl)
+		return NULL;
+
+	ctbl->clipt_start = clipt_start;
+	ctbl->clipt_size = clipt_size;
+	INIT_LIST_HEAD(&ctbl->ce_free_head);
+
+	atomic_set(&ctbl->nfree, clipt_size);
+	rwlock_init(&ctbl->lock);
+
+	for (i = 0; i < ctbl->clipt_size; ++i)
+		INIT_LIST_HEAD(&ctbl->hash_list[i]);
+
+	cl_list = t4_alloc_mem(clipt_size*sizeof(struct clip_entry));
+	ctbl->cl_list = (void *)cl_list;
+
+	for (i = 0; i < clipt_size; i++) {
+		INIT_LIST_HEAD(&cl_list[i].list);
+		list_add_tail(&cl_list[i].list, &ctbl->ce_free_head);
+	}
+
+	return ctbl;
+}
+
+void t4_cleanup_clip_tbl(struct adapter *adap)
+{
+	struct clip_tbl *ctbl = adap->clipt;
+
+	if (ctbl) {
+		if (ctbl->cl_list)
+			t4_free_mem(ctbl->cl_list);
+		t4_free_mem(ctbl);
+	}
+}
+EXPORT_SYMBOL(t4_cleanup_clip_tbl);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
new file mode 100644
index 000000000000..2eaba0161cf8
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
@@ -0,0 +1,41 @@
+/*
+ *  This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *  Copyright (C) 2003-2014 Chelsio Communications.  All rights reserved.
+ *
+ *  Written by Deepak (deepak.s@chelsio.com)
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
+ *  release for licensing terms and conditions.
+ */
+
+struct clip_entry {
+	spinlock_t lock;	/* Hold while modifying clip reference */
+	atomic_t refcnt;
+	struct list_head list;
+	u32 addr[4];
+	int addr_len;
+};
+
+struct clip_tbl {
+	unsigned int clipt_start;
+	unsigned int clipt_size;
+	rwlock_t lock;
+	atomic_t nfree;
+	struct list_head ce_free_head;
+	void *cl_list;
+	struct list_head hash_list[0];
+};
+
+enum {
+	CLIPT_MIN_HASH_BUCKETS = 2,
+};
+
+struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
+				  unsigned int clipt_end);
+int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6);
+void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6);
+int clip_tbl_show(struct seq_file *seq, void *v);
+int cxgb4_update_root_dev_clip(struct net_device *dev);
+void t4_cleanup_clip_tbl(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 7c785b5e7757..e468f920892f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -668,6 +668,9 @@ struct adapter {
 	unsigned int l2t_start;
 	unsigned int l2t_end;
 	struct l2t_data *l2t;
+	unsigned int clipt_start;
+	unsigned int clipt_end;
+	struct clip_tbl *clipt;
 	void *uld_handle[CXGB4_ULD_MAX];
 	struct list_head list_node;
 	struct list_head rcu_node;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index e9f348942eb0..6dabfe5ba44e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -41,6 +41,7 @@
 #include "t4_regs.h"
 #include "t4fw_api.h"
 #include "cxgb4_debugfs.h"
+#include "clip_tbl.h"
 #include "l2t.h"
 
 /* generic seq_file support for showing a table of size rows x width. */
@@ -563,6 +564,21 @@ static const struct file_operations mps_tcam_debugfs_fops = {
 	.release = seq_release,
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int clip_tbl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, clip_tbl_show, PDE_DATA(inode));
+}
+
+static const struct file_operations clip_tbl_debugfs_fops = {
+	.owner   = THIS_MODULE,
+	.open    = clip_tbl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+#endif
+
 static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
 			loff_t *ppos)
 {
@@ -646,6 +662,9 @@ int t4_setup_debugfs(struct adapter *adap)
 		{ "devlog", &devlog_fops, S_IRUSR, 0 },
 		{ "l2t", &t4_l2t_fops, S_IRUSR, 0},
 		{ "mps_tcam", &mps_tcam_debugfs_fops, S_IRUSR, 0 },
+#if IS_ENABLED(CONFIG_IPV6)
+		{ "clip_tbl", &clip_tbl_debugfs_fops, S_IRUSR, 0 },
+#endif
 	};
 
 	add_debugfs_files(adap,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 082a596a4264..1147e1e88314 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -62,6 +62,7 @@
 #include <net/netevent.h>
 #include <net/addrconf.h>
 #include <net/bonding.h>
+#include <net/addrconf.h>
 #include <asm/uaccess.h>
 
 #include "cxgb4.h"
@@ -71,6 +72,7 @@
 #include "t4fw_api.h"
 #include "cxgb4_dcb.h"
 #include "cxgb4_debugfs.h"
+#include "clip_tbl.h"
 #include "l2t.h"
 
 #ifdef DRV_VERSION
@@ -3236,40 +3238,6 @@ static int tid_init(struct tid_info *t)
 	return 0;
 }
 
-int cxgb4_clip_get(const struct net_device *dev,
-		   const struct in6_addr *lip)
-{
-	struct adapter *adap;
-	struct fw_clip_cmd c;
-
-	adap = netdev2adap(dev);
-	memset(&c, 0, sizeof(c));
-	c.op_to_write = htonl(FW_CMD_OP_V(FW_CLIP_CMD) |
-			FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
-	c.alloc_to_len16 = htonl(FW_CLIP_CMD_ALLOC_F | FW_LEN16(c));
-	c.ip_hi = *(__be64 *)(lip->s6_addr);
-	c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
-	return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
-}
-EXPORT_SYMBOL(cxgb4_clip_get);
-
-int cxgb4_clip_release(const struct net_device *dev,
-		       const struct in6_addr *lip)
-{
-	struct adapter *adap;
-	struct fw_clip_cmd c;
-
-	adap = netdev2adap(dev);
-	memset(&c, 0, sizeof(c));
-	c.op_to_write = htonl(FW_CMD_OP_V(FW_CLIP_CMD) |
-			FW_CMD_REQUEST_F | FW_CMD_READ_F);
-	c.alloc_to_len16 = htonl(FW_CLIP_CMD_FREE_F | FW_LEN16(c));
-	c.ip_hi = *(__be64 *)(lip->s6_addr);
-	c.ip_lo = *(__be64 *)(lip->s6_addr + 8);
-	return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, false);
-}
-EXPORT_SYMBOL(cxgb4_clip_release);
-
 /**
  *	cxgb4_create_server - create an IP server
  *	@dev: the device
@@ -4122,148 +4090,61 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 }
 EXPORT_SYMBOL(cxgb4_unregister_uld);
 
-/* Check if netdev on which event is occured belongs to us or not. Return
- * success (true) if it belongs otherwise failure (false).
- * Called with rcu_read_lock() held.
- */
 #if IS_ENABLED(CONFIG_IPV6)
-static bool cxgb4_netdev(const struct net_device *netdev)
+static int cxgb4_inet6addr_handler(struct notifier_block *this,
+				   unsigned long event, void *data)
 {
+	struct inet6_ifaddr *ifa = data;
+	struct net_device *event_dev = ifa->idev->dev;
+	const struct device *parent = NULL;
+#if IS_ENABLED(CONFIG_BONDING)
 	struct adapter *adap;
-	int i;
-
-	list_for_each_entry_rcu(adap, &adap_rcu_list, rcu_node)
-		for (i = 0; i < MAX_NPORTS; i++)
-			if (adap->port[i] == netdev)
-				return true;
-	return false;
-}
+#endif
+	if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+		event_dev = vlan_dev_real_dev(event_dev);
+#if IS_ENABLED(CONFIG_BONDING)
+	if (event_dev->flags & IFF_MASTER) {
+		list_for_each_entry(adap, &adapter_list, list_node) {
+			switch (event) {
+			case NETDEV_UP:
+				cxgb4_clip_get(adap->port[0],
+					       (const u32 *)ifa, 1);
+				break;
+			case NETDEV_DOWN:
+				cxgb4_clip_release(adap->port[0],
+						   (const u32 *)ifa, 1);
+				break;
+			default:
+				break;
+			}
+		}
+		return NOTIFY_OK;
+	}
+#endif
 
-static int clip_add(struct net_device *event_dev, struct inet6_ifaddr *ifa,
-		    unsigned long event)
-{
-	int ret = NOTIFY_DONE;
+	if (event_dev)
+		parent = event_dev->dev.parent;
 
-	rcu_read_lock();
-	if (cxgb4_netdev(event_dev)) {
+	if (parent && parent->driver == &cxgb4_driver.driver) {
 		switch (event) {
 		case NETDEV_UP:
-			ret = cxgb4_clip_get(event_dev, &ifa->addr);
-			if (ret < 0) {
-				rcu_read_unlock();
-				return ret;
-			}
-			ret = NOTIFY_OK;
+			cxgb4_clip_get(event_dev, (const u32 *)ifa, 1);
 			break;
 		case NETDEV_DOWN:
-			cxgb4_clip_release(event_dev, &ifa->addr);
-			ret = NOTIFY_OK;
+			cxgb4_clip_release(event_dev, (const u32 *)ifa, 1);
 			break;
 		default:
 			break;
 		}
 	}
-	rcu_read_unlock();
-	return ret;
-}
-
-static int cxgb4_inet6addr_handler(struct notifier_block *this,
-		unsigned long event, void *data)
-{
-	struct inet6_ifaddr *ifa = data;
-	struct net_device *event_dev;
-	int ret = NOTIFY_DONE;
-	struct bonding *bond = netdev_priv(ifa->idev->dev);
-	struct list_head *iter;
-	struct slave *slave;
-	struct pci_dev *first_pdev = NULL;
-
-	if (ifa->idev->dev->priv_flags & IFF_802_1Q_VLAN) {
-		event_dev = vlan_dev_real_dev(ifa->idev->dev);
-		ret = clip_add(event_dev, ifa, event);
-	} else if (ifa->idev->dev->flags & IFF_MASTER) {
-		/* It is possible that two different adapters are bonded in one
-		 * bond. We need to find such different adapters and add clip
-		 * in all of them only once.
-		 */
-		bond_for_each_slave(bond, slave, iter) {
-			if (!first_pdev) {
-				ret = clip_add(slave->dev, ifa, event);
-				/* If clip_add is success then only initialize
-				 * first_pdev since it means it is our device
-				 */
-				if (ret == NOTIFY_OK)
-					first_pdev = to_pci_dev(
-							slave->dev->dev.parent);
-			} else if (first_pdev !=
-				   to_pci_dev(slave->dev->dev.parent))
-					ret = clip_add(slave->dev, ifa, event);
-		}
-	} else
-		ret = clip_add(ifa->idev->dev, ifa, event);
-
-	return ret;
+	return NOTIFY_OK;
 }
 
+static bool inet6addr_registered;
 static struct notifier_block cxgb4_inet6addr_notifier = {
 	.notifier_call = cxgb4_inet6addr_handler
 };
 
-/* Retrieves IPv6 addresses from a root device (bond, vlan) associated with
- * a physical device.
- * The physical device reference is needed to send the actul CLIP command.
- */
-static int update_dev_clip(struct net_device *root_dev, struct net_device *dev)
-{
-	struct inet6_dev *idev = NULL;
-	struct inet6_ifaddr *ifa;
-	int ret = 0;
-
-	idev = __in6_dev_get(root_dev);
-	if (!idev)
-		return ret;
-
-	read_lock_bh(&idev->lock);
-	list_for_each_entry(ifa, &idev->addr_list, if_list) {
-		ret = cxgb4_clip_get(dev, &ifa->addr);
-		if (ret < 0)
-			break;
-	}
-	read_unlock_bh(&idev->lock);
-
-	return ret;
-}
-
-static int update_root_dev_clip(struct net_device *dev)
-{
-	struct net_device *root_dev = NULL;
-	int i, ret = 0;
-
-	/* First populate the real net device's IPv6 addresses */
-	ret = update_dev_clip(dev, dev);
-	if (ret)
-		return ret;
-
-	/* Parse all bond and vlan devices layered on top of the physical dev */
-	root_dev = netdev_master_upper_dev_get_rcu(dev);
-	if (root_dev) {
-		ret = update_dev_clip(root_dev, dev);
-		if (ret)
-			return ret;
-	}
-
-	for (i = 0; i < VLAN_N_VID; i++) {
-		root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i);
-		if (!root_dev)
-			continue;
-
-		ret = update_dev_clip(root_dev, dev);
-		if (ret)
-			break;
-	}
-	return ret;
-}
-
 static void update_clip(const struct adapter *adap)
 {
 	int i;
@@ -4277,7 +4158,7 @@ static void update_clip(const struct adapter *adap)
 		ret = 0;
 
 		if (dev)
-			ret = update_root_dev_clip(dev);
+			ret = cxgb4_update_root_dev_clip(dev);
 
 		if (ret < 0)
 			break;
@@ -5391,6 +5272,14 @@ static int adap_init0(struct adapter *adap)
 	adap->tids.nftids = val[4] - val[3] + 1;
 	adap->sge.ingr_start = val[5];
 
+	params[0] = FW_PARAM_PFVF(CLIP_START);
+	params[1] = FW_PARAM_PFVF(CLIP_END);
+	ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
+	if (ret < 0)
+		goto bye;
+	adap->clipt_start = val[0];
+	adap->clipt_end = val[1];
+
 	/* query params related to active filter region */
 	params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
 	params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
@@ -6211,6 +6100,18 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		adapter->params.offload = 0;
 	}
 
+#if IS_ENABLED(CONFIG_IPV6)
+	adapter->clipt = t4_init_clip_tbl(adapter->clipt_start,
+					  adapter->clipt_end);
+	if (!adapter->clipt) {
+		/* We tolerate a lack of clip_table, giving up
+		 * some functionality
+		 */
+		dev_warn(&pdev->dev,
+			 "could not allocate Clip table, continuing\n");
+		adapter->params.offload = 0;
+	}
+#endif
 	if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
 		dev_warn(&pdev->dev, "could not allocate TID table, "
 			 "continuing\n");
@@ -6336,6 +6237,9 @@ static void remove_one(struct pci_dev *pdev)
 			cxgb_down(adapter);
 
 		free_some_resources(adapter);
+#if IS_ENABLED(CONFIG_IPV6)
+		t4_cleanup_clip_tbl(adapter);
+#endif
 		iounmap(adapter->regs);
 		if (!is_t4(adapter->params.chip))
 			iounmap(adapter->bar2);
@@ -6374,7 +6278,10 @@ static int __init cxgb4_init_module(void)
 		debugfs_remove(cxgb4_debugfs_root);
 
 #if IS_ENABLED(CONFIG_IPV6)
-	register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+	if (!inet6addr_registered) {
+		register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+		inet6addr_registered = true;
+	}
 #endif
 
 	return ret;
@@ -6383,7 +6290,10 @@ static int __init cxgb4_init_module(void)
 static void __exit cxgb4_cleanup_module(void)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-	unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+	if (inet6addr_registered && list_empty(&adapter_list)) {
+		unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
+		inet6addr_registered = false;
+	}
 #endif
 	pci_unregister_driver(&cxgb4_driver);
 	debugfs_remove(cxgb4_debugfs_root);  /* NULL ok */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 152b4c4c7809..78ab4d406ce2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -173,9 +173,6 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
 			       unsigned char port, unsigned char mask);
 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
 			       unsigned int queue, bool ipv6);
-int cxgb4_clip_get(const struct net_device *dev, const struct in6_addr *lip);
-int cxgb4_clip_release(const struct net_device *dev,
-		       const struct in6_addr *lip);
 
 static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
 {
-- 
2.2.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox