Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH V2 6/8] net: mediatek: fix TX locking
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

Inside the TX path there is a lock inside the tx_map function. This is
however too late. The patch moves the lock to the start of the xmit
function right before the free count check of the DMA ring happens.
If we do not do this, the code becomes racy leading to TX stalls and
dropped packets. This happens as there are 2 netdevs running on the
same physical DMA ring.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 60b66ab..8434355 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -536,7 +536,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_dma *itxd, *txd;
 	struct mtk_tx_buf *tx_buf;
-	unsigned long flags;
 	dma_addr_t mapped_addr;
 	unsigned int nr_frags;
 	int i, n_desc = 1;
@@ -568,11 +567,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
 		return -ENOMEM;
 
-	/* normally we can rely on the stack not calling this more than once,
-	 * however we have 2 queues running ont he same ring so we need to lock
-	 * the ring access
-	 */
-	spin_lock_irqsave(&eth->page_lock, flags);
 	WRITE_ONCE(itxd->txd1, mapped_addr);
 	tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
 	dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
@@ -632,8 +626,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
 				(!nr_frags * TX_DMA_LS0)));
 
-	spin_unlock_irqrestore(&eth->page_lock, flags);
-
 	netdev_sent_queue(dev, skb->len);
 	skb_tx_timestamp(skb);
 
@@ -661,8 +653,6 @@ err_dma:
 		itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
 	} while (itxd != txd);
 
-	spin_unlock_irqrestore(&eth->page_lock, flags);
-
 	return -ENOMEM;
 }
 
@@ -712,14 +702,22 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct net_device_stats *stats = &dev->stats;
+	unsigned long flags;
 	bool gso = false;
 	int tx_num;
 
+	/* normally we can rely on the stack not calling this more than once,
+	 * however we have 2 queues running ont he same ring so we need to lock
+	 * the ring access
+	 */
+	spin_lock_irqsave(&eth->page_lock, flags);
+
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
 		mtk_stop_queue(eth);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
+		spin_unlock_irqrestore(&eth->page_lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -747,10 +745,12 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			     ring->thresh))
 			mtk_wake_queue(eth);
 	}
+	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;
 
 drop:
+	spin_unlock_irqrestore(&eth->page_lock, flags);
 	stats->tx_dropped++;
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 5/8] net: mediatek: fix mtk_pending_work
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

The driver supports 2 MACs. Both run on the same DMA ring. If we hit a TX
timeout we need to stop both netdevs before restarting them again. If we
don't do this, mtk_stop() wont shutdown DMA and the consecutive call to
mtk_open() wont restart DMA and enable IRQs.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   31 ++++++++++++++++++---------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4ebc42e..60b66ab 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1430,19 +1430,30 @@ static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static void mtk_pending_work(struct work_struct *work)
 {
-	struct mtk_mac *mac = container_of(work, struct mtk_mac, pending_work);
-	struct mtk_eth *eth = mac->hw;
-	struct net_device *dev = eth->netdev[mac->id];
-	int err;
+	struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
+	int err, i;
+	unsigned long restart = 0;
 
 	rtnl_lock();
-	mtk_stop(dev);
 
-	err = mtk_open(dev);
-	if (err) {
-		netif_alert(eth, ifup, dev,
-			    "Driver up/down cycle failed, closing device.\n");
-		dev_close(dev);
+	/* stop all devices to make sure that dma is properly shut down */
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!netif_oper_up(eth->netdev[i]))
+			continue;
+		mtk_stop(eth->netdev[i]);
+		__set_bit(i, &restart);
+	}
+
+	/* restart DMA and enable IRQs */
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!test_bit(i, &restart))
+			continue;
+		err = mtk_open(eth->netdev[i]);
+		if (err) {
+			netif_alert(eth, ifup, eth->netdev[i],
+			      "Driver up/down cycle failed, closing device.\n");
+			dev_close(eth->netdev[i]);
+		}
 	}
 	rtnl_unlock();
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 4/8] net: mediatek: fix stop and wakeup of queue
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

The driver supports 2 MACs. Both run on the same DMA ring. If we go
above/below the TX rings threshold value, we always need to wake/stop
the queue of both devices. Not doing to can cause TX stalls and packet
drops on one of the devices.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   37 +++++++++++++++++++--------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index a4982e4..4ebc42e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -684,6 +684,28 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static void mtk_wake_queue(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		netif_wake_queue(eth->netdev[i]);
+	}
+}
+
+static void mtk_stop_queue(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		netif_stop_queue(eth->netdev[i]);
+	}
+}
+
 static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -695,7 +717,7 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
-		netif_stop_queue(dev);
+		mtk_stop_queue(eth);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
 		return NETDEV_TX_BUSY;
@@ -720,10 +742,10 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 
 	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
-		netif_stop_queue(dev);
+		mtk_stop_queue(eth);
 		if (unlikely(atomic_read(&ring->free_count) >
 			     ring->thresh))
-			netif_wake_queue(dev);
+			mtk_wake_queue(eth);
 	}
 
 	return NETDEV_TX_OK;
@@ -897,13 +919,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 	if (!total)
 		return 0;
 
-	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->netdev[i] ||
-		    unlikely(!netif_queue_stopped(eth->netdev[i])))
-			continue;
-		if (atomic_read(&ring->free_count) > ring->thresh)
-			netif_wake_queue(eth->netdev[i]);
-	}
+	if (atomic_read(&ring->free_count) > ring->thresh)
+		mtk_wake_queue(eth);
 
 	return total;
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 3/8] net: mediatek: remove superfluous reset call
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

HW reset is triggered in the mtk_hw_init() function. There is no need to
also reset the core during probe.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 94cceb8..a4982e4 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1679,10 +1679,6 @@ static int mtk_probe(struct platform_device *pdev)
 	struct mtk_eth *eth;
 	int err;
 
-	err = device_reset(&pdev->dev);
-	if (err)
-		return err;
-
 	match = of_match_device(of_mtk_match, &pdev->dev);
 	soc = (struct mtk_soc_data *)match->data;
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 2/8] net: mediatek: mtk_cal_txd_req() returns bad value
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

The code used to also support the PDMA engine, which had 2 packet pointers
per descriptor. Because of this we had to divide the result by 2 and round
it up. This is no longer needed as the code only supports QDMA.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index bb10d57..94cceb8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -681,7 +681,7 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 		nfrags += skb_shinfo(skb)->nr_frags;
 	}
 
-	return DIV_ROUND_UP(nfrags, 2);
+	return nfrags;
 }
 
 static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 1/8] net: mediatek: watchdog_timeo was not set
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, Matthias Brugger,
	Sean Wang (王志亘), netdev, linux-mediatek,
	linux-kernel, John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic@openwrt.org>

The original commit failed to set watchdog_timeo. This patch sets
watchdog_timeo to HZ.

Signed-off-by: John Crispin <blogic@openwrt.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e0b68af..bb10d57 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1645,6 +1645,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
+	eth->netdev[id]->watchdog_timeo = HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 0/8] net: mediatek: make the driver pass stress tests
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin

While testing the driver we managed to get the TX path to stall and fail
to recover. When dual MAC support was added to the driver, the whole queue
stop/wake code was not properly adapted. There was also a regression in the
locking of the xmit function. The fact that watchdog_timeo was not set and
that the tx_timeout code failed to properly reset the dma, irq and queue
just made the mess complete.

This series make the driver pass stress testing. With this series applied
the testbed has been running for several days and still has not locked up.
We have a second setup that has a small hack patch applied to randomly stop
irqs and/or one of the queues and successfully manages to recover from these
simulated tx stalls.

John Crispin (8):
  net: mediatek: watchdog_timeo was not set
  net: mediatek: mtk_cal_txd_req() returns bad value
  net: mediatek: remove superfluous reset call
  net: mediatek: fix stop and wakeup of queue
  net: mediatek: fix mtk_pending_work
  net: mediatek: fix TX locking
  net: mediatek: move the pending_work struct to the device generic
    struct
  net: mediatek: do not set the QID field in the TX DMA descriptors

 drivers/net/ethernet/mediatek/mtk_eth_soc.c |  106 ++++++++++++++++-----------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |    4 +-
 2 files changed, 66 insertions(+), 44 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* [RFC PATCH] possible bug in handling of ipv4 route caching
From: Chris Friesen @ 2016-04-07 19:17 UTC (permalink / raw)
  To: netdev

Hi,

We think we may have found a bug in the handling of ipv4 route caching,
and are curious what you think.

For local routes that require a particular output interface we do not
want to cache the result.  Caching the result causes incorrect behaviour
when there are multiple source addresses on the interface.  The end
result being that if the intended recipient is waiting on that interface
for the packet he won't receive it because it will be delivered on the
loopback interface and the IP_PKTINFO ipi_ifindex will be set to the
loopback interface as well.

This can be tested by running a program such as "dhcp_release" which
attempts to inject a packet on a particular interface so that it is
received by another program on the same board.  The receiving process
should see an IP_PKTINFO ipi_ifndex value of the source interface
(e.g., eth1) instead of the loopback interface (e.g., lo).  The packet
will still appear on the loopback interface in tcpdump but the important
aspect is that the CMSG info is correct.

For what it's worth, here's a patch that we've applied locally to deal
with the issue.

Chris

Signed-off-by: Allain Legacy <allain.legacy@windriver.com>
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02c6229..e965d4b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2045,6 +2045,17 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		 */
 		if (fi && res->prefixlen < 4)
 			fi = NULL;
+	} else if ((type == RTN_LOCAL) && (orig_oif != 0)) {
+		/* For local routes that require a particular output interface
+                 * we do not want to cache the result.  Caching the result
+                 * causes incorrect behaviour when there are multiple source
+                 * addresses on the interface, the end result being that if the
+                 * intended recipient is waiting on that interface for the
+                 * packet he won't receive it because it will be delivered on
+                 * the loopback interface and the IP_PKTINFO ipi_ifindex will
+                 * be set to the loopback interface as well.
+		 */
+		fi = NULL;
 	}

 	fnhe = NULL;

^ permalink raw reply related

* Re: [RFC PATCH net 3/4] ipv6: datagram: Update dst cache of a connected datagram sk during pmtu update
From: Martin KaFai Lau @ 2016-04-07 19:09 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, Eric Dumazet, Wei Wang, Kernel Team
In-Reply-To: <CAM_iQpXbOj_zuQqHUie_yBOXqBEehHhF2FU8FA+tr7hgYY4QZg@mail.gmail.com>

On Thu, Apr 07, 2016 at 11:37:10AM -0700, Cong Wang wrote:
> You are lost in discussion
Indeed. :(

>
> I still think it is okay without the lock, because even if you take the lock,
> the pmtu update could still happen after you release it, so there is no
> essential difference here. The only reason I can think of for taking
> the sock lock is protecting parallel pmtu update, but it looks safe for
> this case too.
>
> So which case do you want to protect by taking the sock lock?
When the pmtu-update is doing route lookup and another connect is
happening, what sk->sk_v6_daddr will this route lookup use?
the old one, new one or neither of them?

Is it acceptable that getsockopt() is returning something that it
is not currently connected to? and potentially somewhere that it
is never connected to?

^ permalink raw reply

* Re: [PATCH v6 net-next] net: ipv4: Consider failed nexthops in multipath routes
From: Julian Anastasov @ 2016-04-07 18:58 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev
In-Reply-To: <1460038860-25670-1-git-send-email-dsa@cumulusnetworks.com>


	Hello,

On Thu, 7 Apr 2016, David Ahern wrote:

> Multipath route lookups should consider knowledge about next hops and not
> select a hop that is known to be failed.
> 
> Example:
> 
>                      [h2]                   [h3]   15.0.0.5
>                       |                      |
>                      3|                     3|
>                     [SP1]                  [SP2]--+
>                      1  2                   1     2
>                      |  |     /-------------+     |
>                      |   \   /                    |
>                      |     X                      |
>                      |    / \                     |
>                      |   /   \---------------\    |
>                      1  2                     1   2
>          12.0.0.2  [TOR1] 3-----------------3 [TOR2] 12.0.0.3
>                      4                         4
>                       \                       /
>                         \                    /
>                          \                  /
>                           -------|   |-----/
>                                  1   2
>                                 [TOR3]
>                                   3|
>                                    |
>                                   [h1]  12.0.0.1
> 
> host h1 with IP 12.0.0.1 has 2 paths to host h3 at 15.0.0.5:
> 
>     root@h1:~# ip ro ls
>     ...
>     12.0.0.0/24 dev swp1  proto kernel  scope link  src 12.0.0.1
>     15.0.0.0/16
>             nexthop via 12.0.0.2  dev swp1 weight 1
>             nexthop via 12.0.0.3  dev swp1 weight 1
>     ...
> 
> If the link between tor3 and tor1 is down and the link between tor1
> and tor2 then tor1 is effectively cut-off from h1. Yet the route lookups
> in h1 are alternating between the 2 routes: ping 15.0.0.5 gets one and
> ssh 15.0.0.5 gets the other. Connections that attempt to use the
> 12.0.0.2 nexthop fail since that neighbor is not reachable:
> 
>     root@h1:~# ip neigh show
>     ...
>     12.0.0.3 dev swp1 lladdr 00:02:00:00:00:1b REACHABLE
>     12.0.0.2 dev swp1  FAILED
>     ...
> 
> The failed path can be avoided by considering known neighbor information
> when selecting next hops. If the neighbor lookup fails we have no
> knowledge about the nexthop, so give it a shot. If there is an entry
> then only select the nexthop if the state is sane. This is similar to
> what fib_detect_death does.
> 
> To maintain backward compatibility use of the neighbor information is
> based on a new sysctl, fib_multipath_use_neigh.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Reviewed-by: Julian Anastasov <ja@ssi.bg>

> ---
> v6
> - changed __neigh_lookup_noref to __ipv4_neigh_lookup_noref per Dave's
>   comment
> 
> v5
> - returned comma that got lost in the ether and removed resetting of
>   nhsel at end of loop - again comments from Julian
> 
> v4
> - remove NULL initializer and logic for fallback per Julian's comment
> 
> v3
> - Julian comments: changed use of dead in documentation to failed,
>   init state to NUD_REACHABLE which simplifies fib_good_nh, use of
>   nh_dev for neighbor lookup, fallback to first entry which is what
>   current logic does
> 
> v2
> - use rcu locking to avoid refcnts per Eric's suggestion
> - only consider neighbor info for nh_scope == RT_SCOPE_LINK per Julian's
>   comment
> - drop the 'state == NUD_REACHABLE' from the state check since it is
>   part of NUD_VALID (comment from Julian)
> - wrapped the use of the neigh in a sysctl
> 
>  Documentation/networking/ip-sysctl.txt | 10 ++++++++++
>  include/net/netns/ipv4.h               |  3 +++
>  net/ipv4/fib_semantics.c               | 34 +++++++++++++++++++++++++++++-----
>  net/ipv4/sysctl_net_ipv4.c             | 11 +++++++++++
>  4 files changed, 53 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index b183e2b606c8..6c7f365b1515 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -63,6 +63,16 @@ fwmark_reflect - BOOLEAN
>  	fwmark of the packet they are replying to.
>  	Default: 0
>  
> +fib_multipath_use_neigh - BOOLEAN
> +	Use status of existing neighbor entry when determining nexthop for
> +	multipath routes. If disabled, neighbor information is not used and
> +	packets could be directed to a failed nexthop. Only valid for kernels
> +	built with CONFIG_IP_ROUTE_MULTIPATH enabled.
> +	Default: 0 (disabled)
> +	Possible values:
> +	0 - disabled
> +	1 - enabled
> +
>  route/max_size - INTEGER
>  	Maximum number of routes allowed in the kernel.  Increase
>  	this when using large numbers of interfaces and/or routes.
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index a69cde3ce460..d061ffeb1e71 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -133,6 +133,9 @@ struct netns_ipv4 {
>  	struct fib_rules_ops	*mr_rules_ops;
>  #endif
>  #endif
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> +	int sysctl_fib_multipath_use_neigh;
> +#endif
>  	atomic_t	rt_genid;
>  };
>  #endif
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index d97268e8ff10..ab64d9f2eef9 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -1559,21 +1559,45 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
>  }
>  
>  #ifdef CONFIG_IP_ROUTE_MULTIPATH
> +static bool fib_good_nh(const struct fib_nh *nh)
> +{
> +	int state = NUD_REACHABLE;
> +
> +	if (nh->nh_scope == RT_SCOPE_LINK) {
> +		struct neighbour *n;
> +
> +		rcu_read_lock_bh();
> +
> +		n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
> +		if (n)
> +			state = n->nud_state;
> +
> +		rcu_read_unlock_bh();
> +	}
> +
> +	return !!(state & NUD_VALID);
> +}
>  
>  void fib_select_multipath(struct fib_result *res, int hash)
>  {
>  	struct fib_info *fi = res->fi;
> +	struct net *net = fi->fib_net;
> +	bool first = false;
>  
>  	for_nexthops(fi) {
>  		if (hash > atomic_read(&nh->nh_upper_bound))
>  			continue;
>  
> -		res->nh_sel = nhsel;
> -		return;
> +		if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
> +		    fib_good_nh(nh)) {
> +			res->nh_sel = nhsel;
> +			return;
> +		}
> +		if (!first) {
> +			res->nh_sel = nhsel;
> +			first = true;
> +		}
>  	} endfor_nexthops(fi);
> -
> -	/* Race condition: route has just become dead. */
> -	res->nh_sel = 0;
>  }
>  #endif
>  
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 1e1fe6086dd9..bb0419582b8d 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -960,6 +960,17 @@ static struct ctl_table ipv4_net_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec,
>  	},
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> +	{
> +		.procname	= "fib_multipath_use_neigh",
> +		.data		= &init_net.ipv4.sysctl_fib_multipath_use_neigh,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &one,
> +	},
> +#endif
>  	{ }
>  };
>  
> -- 
> 2.1.4

Regards

^ permalink raw reply

* Re: [RFC PATCH 0/2] selinux: avoid nf hooks overhead when not needed
From: Paul Moore @ 2016-04-07 18:55 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Paolo Abeni, linux-security-module, David S. Miller, James Morris,
	Andreas Gruenbacher, Stephen Smalley, netdev, selinux
In-Reply-To: <20160406234532.GA731@breakpoint.cc>

On Thursday, April 07, 2016 01:45:32 AM Florian Westphal wrote:
> Paul Moore <paul@paul-moore.com> wrote:
> > On Wed, Apr 6, 2016 at 6:14 PM, Florian Westphal <fw@strlen.de> wrote:
> > > netfilter hooks are per namespace -- so there is hook unregister when
> > > netns is destroyed.
> > 
> > Looking around, I see the global and per-namespace registration
> > functions (nf_register_hook and nf_register_net_hook, respectively),
> > but I'm looking to see if/how newly created namespace inherit
> > netfilter hooks from the init network namespace ... if you can create
> > a network namespace and dodge the SELinux hooks, that isn't a good
> > thing from a SELinux point of view, although it might be a plus
> > depending on where you view Paolo's original patches ;)
> 
> Heh :-)
> 
> If you use nf_register_net_hook, the hook is only registered in the
> namespace.
> 
> If you use nf_register_hook, the hook is put on a global list and
> registed in all existing namespaces.
> 
> New namespaces will have the hook added as well (see
> netfilter_net_init -> nf_register_hook_list in netfilter/core.c )
>
> Since nf_register_hook is used it should be impossible to get a netns
> that doesn't call these hooks.

Great, thanks.
 
> > > Do you think it makes sense to rework the patch to delay registering
> > > of the netfiler hooks until the system is in a state where they're
> > > needed, without the 'unregister' aspect?
> > 
> > I would need to see the patch to say for certain, but in principle
> > that seems perfectly reasonable and I think would satisfy both the
> > netdev and SELinux camps - good suggestion.  My main goal is to drop
> > the selinux_nf_ip_init() entirely so it can't be used as a ROP gadget.
> > 
> > We might even be able to trim the secmark_active and peerlbl_active
> > checks in the SELinux netfilter hooks (an earlier attempt at
> > optimization; contrary to popular belief, I do care about SELinux
> > performance), although that would mean that enabling the network
> > access controls would be one way ... I guess you can disregard that
> > last bit, I'm thinking aloud again.
> 
> One way is fine I think.

Yes, just disregard my second paragraph above.
 
> > > Ideally this would even be per netns -- in perfect world we would
> > > be able to make it so that a new netns are created with an empty
> > > hook list.
> > 
> > In general SELinux doesn't care about namespaces, for reasons that are
> > sorta beyond the scope of this conversation, so I would like to stick
> > to a all or nothing approach to enabling the SELinux netfilter hooks
> > across namespaces.  Perhaps we can revisit this at a later time, but
> > let's keep it simple right now.
> 
> Okay, I'd prefer to stick to your recommendation anyway wrt. to selinux
> (Casey, I read your comment regarding smack. Noted, we don't want to
> break smack either...)
> 
> I think that in this case the entire question is:
> 
> In your experience, how likely is a config where selinux is enabled BUT the
> hooks are not needed (i.e., where we hit the
> 
> if (!selinux_policycap_netpeer)
>     return NF_ACCEPT;
> 
> if (!secmark_active && !peerlbl_active)
>    return NF_ACCEPT;
> 
> tests inside the hooks)?  If such setups are uncommon we should just
> drop this idea or at least put it on the back burner until the more
> expensive netfilter hooks (conntrack, cough) are out of the way.

A few years ago I would have said that it is relatively uncommon for admins to 
enable the SELinux network access controls; it was typically just 
government/intelligence agencies who had very strict access control 
requirements and represented a small portion of SELinux users.  However, over 
the past few years I've been fielding more and more questions from admins/devs 
in the virtualization space who are interested in some of these capabilities; 
it isn't clear to me how many of these people are switching it on, but there 
is definitely more interest than I have seen in the past and the interested is 
centered around some rather common use cases.

So, to summarize, I don't know ;)

If you've got bigger sources of overhead, my opinion would be to go tackle 
those first.  Perhaps I can even find the time to work on the 
SELinux/netfilter stuff while you are off slaying the bigger dragons, no 
promises at the moment.

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH 1/9] net: mediatek: update the IRQ part of the binding document
From: David Miller @ 2016-04-07 18:50 UTC (permalink / raw)
  To: blogic-p3rKhJxN3npAfugRpC6u6w
  Cc: nbd-p3rKhJxN3npAfugRpC6u6w, matthias.bgg-Re5JQEeQqe8AvxtiuMwx3w,
	sean.wang-NuS5LvNUpcJWk0Htik3J/w, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	devicetree-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1460051876-53135-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>


Every patch series must begin with a postings labelled "[PATCH 0/9] ..."
which explains what the series is doing, how it is implementing that,
and why it is implemented that way.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH v5 net-next 15/15] nfp: allow ring size reconfiguration at runtime
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Since much of the required changes have already been made for
changing MTU at runtime let's use it for ring size changes as
well.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |   1 +
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 126 +++++++++++++++++++++
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |  30 ++---
 3 files changed, 136 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 9ab8e3967dc9..3d53fcf323eb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -724,6 +724,7 @@ void nfp_net_rss_write_key(struct nfp_net *nn);
 void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
 int nfp_net_irqs_alloc(struct nfp_net *nn);
 void nfp_net_irqs_disable(struct nfp_net *nn);
+int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt);
 
 #ifdef CONFIG_NFP_NET_DEBUG
 void nfp_net_debugfs_create(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index c4f0c70e77ce..0bdff390c958 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1444,6 +1444,59 @@ err_alloc:
 	return -ENOMEM;
 }
 
+static struct nfp_net_tx_ring *
+nfp_net_shadow_tx_rings_prepare(struct nfp_net *nn, u32 buf_cnt)
+{
+	struct nfp_net_tx_ring *rings;
+	unsigned int r;
+
+	rings = kcalloc(nn->num_tx_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings)
+		return NULL;
+
+	for (r = 0; r < nn->num_tx_rings; r++) {
+		nfp_net_tx_ring_init(&rings[r], nn->tx_rings[r].r_vec, r);
+
+		if (nfp_net_tx_ring_alloc(&rings[r], buf_cnt))
+			goto err_free_prev;
+	}
+
+	return rings;
+
+err_free_prev:
+	while (r--)
+		nfp_net_tx_ring_free(&rings[r]);
+	kfree(rings);
+	return NULL;
+}
+
+static struct nfp_net_tx_ring *
+nfp_net_shadow_tx_rings_swap(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+{
+	struct nfp_net_tx_ring *old = nn->tx_rings;
+	unsigned int r;
+
+	for (r = 0; r < nn->num_tx_rings; r++)
+		old[r].r_vec->tx_ring = &rings[r];
+
+	nn->tx_rings = rings;
+	return old;
+}
+
+static void
+nfp_net_shadow_tx_rings_free(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+{
+	unsigned int r;
+
+	if (!rings)
+		return;
+
+	for (r = 0; r < nn->num_tx_rings; r++)
+		nfp_net_tx_ring_free(&rings[r]);
+
+	kfree(rings);
+}
+
 /**
  * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
  * @rx_ring:  RX ring to free
@@ -1560,6 +1613,9 @@ nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
 {
 	unsigned int r;
 
+	if (!rings)
+		return;
+
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
 		nfp_net_rx_ring_free(&rings[r]);
@@ -2104,6 +2160,76 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 	return err;
 }
 
+int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
+{
+	struct nfp_net_tx_ring *tx_rings = NULL;
+	struct nfp_net_rx_ring *rx_rings = NULL;
+	u32 old_rxd_cnt, old_txd_cnt;
+	int err;
+
+	if (!netif_running(nn->netdev)) {
+		nn->rxd_cnt = rxd_cnt;
+		nn->txd_cnt = txd_cnt;
+		return 0;
+	}
+
+	old_rxd_cnt = nn->rxd_cnt;
+	old_txd_cnt = nn->txd_cnt;
+
+	/* Prepare new rings */
+	if (nn->rxd_cnt != rxd_cnt) {
+		rx_rings = nfp_net_shadow_rx_rings_prepare(nn, nn->fl_bufsz,
+							   rxd_cnt);
+		if (!rx_rings)
+			return -ENOMEM;
+	}
+	if (nn->txd_cnt != txd_cnt) {
+		tx_rings = nfp_net_shadow_tx_rings_prepare(nn, txd_cnt);
+		if (!tx_rings) {
+			nfp_net_shadow_rx_rings_free(nn, rx_rings);
+			return -ENOMEM;
+		}
+	}
+
+	/* Stop device, swap in new rings, try to start the firmware */
+	nfp_net_close_stack(nn);
+	nfp_net_clear_config_and_disable(nn);
+
+	if (rx_rings)
+		rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
+	if (tx_rings)
+		tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
+
+	nn->rxd_cnt = rxd_cnt;
+	nn->txd_cnt = txd_cnt;
+
+	err = nfp_net_set_config_and_enable(nn);
+	if (err) {
+		const int err_new = err;
+
+		/* Try with old configuration and old rings */
+		if (rx_rings)
+			rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
+		if (tx_rings)
+			tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
+
+		nn->rxd_cnt = old_rxd_cnt;
+		nn->txd_cnt = old_txd_cnt;
+
+		err = __nfp_net_set_config_and_enable(nn);
+		if (err)
+			nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
+			       err_new, err);
+	}
+
+	nfp_net_shadow_rx_rings_free(nn, rx_rings);
+	nfp_net_shadow_tx_rings_free(nn, tx_rings);
+
+	nfp_net_open_stack(nn);
+
+	return err;
+}
+
 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
 						struct rtnl_link_stats64 *stats)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 9a4084a68db5..ccfef1f17627 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -153,37 +153,25 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	struct nfp_net *nn = netdev_priv(netdev);
 	u32 rxd_cnt, txd_cnt;
 
-	if (netif_running(netdev)) {
-		/* Some NIC drivers allow reconfiguration on the fly,
-		 * some down the interface, change and then up it
-		 * again.  For now we don't allow changes when the
-		 * device is up.
-		 */
-		nn_warn(nn, "Can't change rings while device is up\n");
-		return -EBUSY;
-	}
-
 	/* We don't have separate queues/rings for small/large frames. */
 	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
 		return -EINVAL;
 
 	/* Round up to supported values */
 	rxd_cnt = roundup_pow_of_two(ring->rx_pending);
-	rxd_cnt = max_t(u32, rxd_cnt, NFP_NET_MIN_RX_DESCS);
-	rxd_cnt = min_t(u32, rxd_cnt, NFP_NET_MAX_RX_DESCS);
-
 	txd_cnt = roundup_pow_of_two(ring->tx_pending);
-	txd_cnt = max_t(u32, txd_cnt, NFP_NET_MIN_TX_DESCS);
-	txd_cnt = min_t(u32, txd_cnt, NFP_NET_MAX_TX_DESCS);
 
-	if (nn->rxd_cnt != rxd_cnt || nn->txd_cnt != txd_cnt)
-		nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n",
-		       nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt);
+	if (rxd_cnt < NFP_NET_MIN_RX_DESCS || rxd_cnt > NFP_NET_MAX_RX_DESCS ||
+	    txd_cnt < NFP_NET_MIN_TX_DESCS || txd_cnt > NFP_NET_MAX_TX_DESCS)
+		return -EINVAL;
 
-	nn->rxd_cnt = rxd_cnt;
-	nn->txd_cnt = txd_cnt;
+	if (nn->rxd_cnt == rxd_cnt && nn->txd_cnt == txd_cnt)
+		return 0;
 
-	return 0;
+	nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n",
+	       nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt);
+
+	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
 static void nfp_net_get_strings(struct net_device *netdev,
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 14/15] nfp: pass ring count as function parameter
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Soon ring resize will call this functions with values
different than the current configuration we need to
explicitly pass the ring count as parameter.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 23 +++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index e7c420fdcb0d..c4f0c70e77ce 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1407,17 +1407,18 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
 /**
  * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
  * @tx_ring:   TX Ring structure to allocate
+ * @cnt:       Ring buffer count
  *
  * Return: 0 on success, negative errno otherwise.
  */
-static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring)
+static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt)
 {
 	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct pci_dev *pdev = nn->pdev;
 	int sz;
 
-	tx_ring->cnt = nn->txd_cnt;
+	tx_ring->cnt = cnt;
 
 	tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
 	tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
@@ -1470,18 +1471,20 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
  * @rx_ring:  RX ring to allocate
  * @fl_bufsz: Size of buffers to allocate
+ * @cnt:      Ring buffer count
  *
  * Return: 0 on success, negative errno otherwise.
  */
 static int
-nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz)
+nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz,
+		      u32 cnt)
 {
 	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct pci_dev *pdev = nn->pdev;
 	int sz;
 
-	rx_ring->cnt = nn->rxd_cnt;
+	rx_ring->cnt = cnt;
 	rx_ring->bufsz = fl_bufsz;
 
 	rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
@@ -1507,7 +1510,8 @@ err_alloc:
 }
 
 static struct nfp_net_rx_ring *
-nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz)
+nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz,
+				u32 buf_cnt)
 {
 	struct nfp_net_rx_ring *rings;
 	unsigned int r;
@@ -1519,7 +1523,7 @@ nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz)
 	for (r = 0; r < nn->num_rx_rings; r++) {
 		nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r);
 
-		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz))
+		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, buf_cnt))
 			goto err_free_prev;
 
 		if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r]))
@@ -1878,12 +1882,12 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		if (err)
 			goto err_free_prev_vecs;
 
-		err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring);
+		err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt);
 		if (err)
 			goto err_cleanup_vec_p;
 
 		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring,
-					    nn->fl_bufsz);
+					    nn->fl_bufsz, nn->rxd_cnt);
 		if (err)
 			goto err_free_tx_ring_p;
 
@@ -2063,7 +2067,8 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 	}
 
 	/* Prepare new rings */
-	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz);
+	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz,
+						    nn->rxd_cnt);
 	if (!tmp_rings)
 		return -ENOMEM;
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 13/15] nfp: convert .ndo_change_mtu() to prepare/commit paradigm
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

When changing MTU on running device first allocate new rings
and buffers and once it succeeds proceed with changing MTU.

Allocation of new rings is not really necessary for this
operation - it's done to keep the code simple and because
size of the extra ring memory is quite small compared to
the size of buffers.

Operation can still fail midway through if FW communication
times out.  In that case we retry with old MTU (rings).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 108 +++++++++++++++++++--
 1 file changed, 102 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 03c60f755de0..e7c420fdcb0d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1506,6 +1506,64 @@ err_alloc:
 	return -ENOMEM;
 }
 
+static struct nfp_net_rx_ring *
+nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz)
+{
+	struct nfp_net_rx_ring *rings;
+	unsigned int r;
+
+	rings = kcalloc(nn->num_rx_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings)
+		return NULL;
+
+	for (r = 0; r < nn->num_rx_rings; r++) {
+		nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r);
+
+		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz))
+			goto err_free_prev;
+
+		if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r]))
+			goto err_free_ring;
+	}
+
+	return rings;
+
+err_free_prev:
+	while (r--) {
+		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+err_free_ring:
+		nfp_net_rx_ring_free(&rings[r]);
+	}
+	kfree(rings);
+	return NULL;
+}
+
+static struct nfp_net_rx_ring *
+nfp_net_shadow_rx_rings_swap(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+{
+	struct nfp_net_rx_ring *old = nn->rx_rings;
+	unsigned int r;
+
+	for (r = 0; r < nn->num_rx_rings; r++)
+		old[r].r_vec->rx_ring = &rings[r];
+
+	nn->rx_rings = rings;
+	return old;
+}
+
+static void
+nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+{
+	unsigned int r;
+
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+		nfp_net_rx_ring_free(&rings[r]);
+	}
+
+	kfree(rings);
+}
+
 static int
 nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 		       int idx)
@@ -1984,23 +2042,61 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
 
 static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 {
+	unsigned int old_mtu, old_fl_bufsz, new_fl_bufsz;
 	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_rx_ring *tmp_rings;
+	int err;
 
 	if (new_mtu < 68 || new_mtu > nn->max_mtu) {
 		nn_err(nn, "New MTU (%d) is not valid\n", new_mtu);
 		return -EINVAL;
 	}
 
+	old_mtu = netdev->mtu;
+	old_fl_bufsz = nn->fl_bufsz;
+	new_fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu;
+
+	if (!netif_running(netdev)) {
+		netdev->mtu = new_mtu;
+		nn->fl_bufsz = new_fl_bufsz;
+		return 0;
+	}
+
+	/* Prepare new rings */
+	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz);
+	if (!tmp_rings)
+		return -ENOMEM;
+
+	/* Stop device, swap in new rings, try to start the firmware */
+	nfp_net_close_stack(nn);
+	nfp_net_clear_config_and_disable(nn);
+
+	tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+
 	netdev->mtu = new_mtu;
-	nn->fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu;
+	nn->fl_bufsz = new_fl_bufsz;
+
+	err = nfp_net_set_config_and_enable(nn);
+	if (err) {
+		const int err_new = err;
+
+		/* Try with old configuration and old rings */
+		tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+
+		netdev->mtu = old_mtu;
+		nn->fl_bufsz = old_fl_bufsz;
 
-	/* restart if running */
-	if (netif_running(netdev)) {
-		nfp_net_netdev_close(netdev);
-		nfp_net_netdev_open(netdev);
+		err = __nfp_net_set_config_and_enable(nn);
+		if (err)
+			nn_err(nn, "Can't restore MTU - FW communication failed (%d,%d)\n",
+			       err_new, err);
 	}
 
-	return 0;
+	nfp_net_shadow_rx_rings_free(nn, tmp_rings);
+
+	nfp_net_open_stack(nn);
+
+	return err;
 }
 
 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 12/15] nfp: propagate list buffer size in struct rx_ring
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Free list buffer size needs to be propagated to few functions
as a parameter and added to struct nfp_net_rx_ring since soon
some of the functions will be reused to manage rings with
buffers of size different than nn->fl_bufsz.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  3 +++
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 24 ++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index fc005c982b7d..9ab8e3967dc9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -298,6 +298,8 @@ struct nfp_net_rx_buf {
  * @rxds:       Virtual address of FL/RX ring in host memory
  * @dma:        DMA address of the FL/RX ring
  * @size:       Size, in bytes, of the FL/RX ring (needed to free)
+ * @bufsz:	Buffer allocation size for convenience of management routines
+ *		(NOTE: this is in second cache line, do not use on fast path!)
  */
 struct nfp_net_rx_ring {
 	struct nfp_net_r_vector *r_vec;
@@ -319,6 +321,7 @@ struct nfp_net_rx_ring {
 
 	dma_addr_t dma;
 	unsigned int size;
+	unsigned int bufsz;
 } ____cacheline_aligned;
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index ed23b9d348c3..03c60f755de0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -957,25 +957,27 @@ static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_alloc_one() - Allocate and map skb for RX
  * @rx_ring:	RX ring structure of the skb
  * @dma_addr:	Pointer to storage for DMA address (output param)
+ * @fl_bufsz:	size of freelist buffers
  *
  * This function will allcate a new skb, map it for DMA.
  *
  * Return: allocated skb or NULL on failure.
  */
 static struct sk_buff *
-nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr)
+nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
+		     unsigned int fl_bufsz)
 {
 	struct nfp_net *nn = rx_ring->r_vec->nfp_net;
 	struct sk_buff *skb;
 
-	skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz);
+	skb = netdev_alloc_skb(nn->netdev, fl_bufsz);
 	if (!skb) {
 		nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n");
 		return NULL;
 	}
 
 	*dma_addr = dma_map_single(&nn->pdev->dev, skb->data,
-				  nn->fl_bufsz, DMA_FROM_DEVICE);
+				   fl_bufsz, DMA_FROM_DEVICE);
 	if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
 		dev_kfree_skb_any(skb);
 		nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
@@ -1068,7 +1070,7 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 			continue;
 
 		dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr,
-				 nn->fl_bufsz, DMA_FROM_DEVICE);
+				 rx_ring->bufsz, DMA_FROM_DEVICE);
 		dev_kfree_skb_any(rx_ring->rxbufs[i].skb);
 		rx_ring->rxbufs[i].dma_addr = 0;
 		rx_ring->rxbufs[i].skb = NULL;
@@ -1090,7 +1092,8 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 
 	for (i = 0; i < rx_ring->cnt - 1; i++) {
 		rxbufs[i].skb =
-			nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr);
+			nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
+					     rx_ring->bufsz);
 		if (!rxbufs[i].skb) {
 			nfp_net_rx_ring_bufs_free(nn, rx_ring);
 			return -ENOMEM;
@@ -1278,7 +1281,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 		skb = rx_ring->rxbufs[idx].skb;
 
-		new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr);
+		new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr,
+					       nn->fl_bufsz);
 		if (!new_skb) {
 			nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb,
 					    rx_ring->rxbufs[idx].dma_addr);
@@ -1465,10 +1469,12 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 /**
  * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
  * @rx_ring:  RX ring to allocate
+ * @fl_bufsz: Size of buffers to allocate
  *
  * Return: 0 on success, negative errno otherwise.
  */
-static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring)
+static int
+nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz)
 {
 	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
@@ -1476,6 +1482,7 @@ static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring)
 	int sz;
 
 	rx_ring->cnt = nn->rxd_cnt;
+	rx_ring->bufsz = fl_bufsz;
 
 	rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
 	rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size,
@@ -1817,7 +1824,8 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		if (err)
 			goto err_cleanup_vec_p;
 
-		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring);
+		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring,
+					    nn->fl_bufsz);
 		if (err)
 			goto err_free_tx_ring_p;
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 11/15] nfp: sync ring state during FW reconfiguration
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

FW reconfiguration in .ndo_open()/.ndo_stop() should reset/
restore queue state.  Since we need IRQs to be disabled when
filling rings on RX path we have to move disable_irq() from
.ndo_open() all the way up to IRQ allocation.

nfp_net_start_vec() becomes trivial now so it's inlined.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 45 ++++++++--------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 6c1ed8914416..ed23b9d348c3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1519,6 +1519,7 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 		nn_err(nn, "Error requesting IRQ %d\n", entry->vector);
 		return err;
 	}
+	disable_irq(entry->vector);
 
 	/* Setup NAPI */
 	netif_napi_add(nn->netdev, &r_vec->napi,
@@ -1647,13 +1648,14 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 
 	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
 	err = nfp_net_reconfig(nn, update);
-	if (err) {
+	if (err)
 		nn_err(nn, "Could not disable device: %d\n", err);
-		return;
-	}
 
-	for (r = 0; r < nn->num_r_vecs; r++)
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
+		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
 		nfp_net_vec_clear_ring_data(nn, r);
+	}
 
 	nn->ctrl = new_ctrl;
 }
@@ -1721,6 +1723,9 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 
 	nn->ctrl = new_ctrl;
 
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring);
+
 	/* Since reconfiguration requests while NFP is down are ignored we
 	 * have to wipe the entire VXLAN configuration and reinitialize it.
 	 */
@@ -1749,26 +1754,6 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
 }
 
 /**
- * nfp_net_start_vec() - Start ring vector
- * @nn:      NFP Net device structure
- * @r_vec:   Ring vector to be started
- */
-static void
-nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
-{
-	unsigned int irq_vec;
-
-	irq_vec = nn->irq_entries[r_vec->irq_idx].vector;
-
-	disable_irq(irq_vec);
-
-	nfp_net_rx_ring_fill_freelist(r_vec->rx_ring);
-	napi_enable(&r_vec->napi);
-
-	enable_irq(irq_vec);
-}
-
-/**
  * nfp_net_open_stack() - Start the device from stack's perspective
  * @nn:      NFP Net device to reconfigure
  */
@@ -1776,8 +1761,10 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_start_vec(nn, &nn->r_vecs[r]);
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		napi_enable(&nn->r_vecs[r].napi);
+		enable_irq(nn->irq_entries[nn->r_vecs[r].irq_idx].vector);
+	}
 
 	netif_tx_wake_all_queues(nn->netdev);
 
@@ -1902,8 +1889,10 @@ static void nfp_net_close_stack(struct nfp_net *nn)
 	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
-	for (r = 0; r < nn->num_r_vecs; r++)
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		disable_irq(nn->irq_entries[nn->r_vecs[r].irq_idx].vector);
 		napi_disable(&nn->r_vecs[r].napi);
+	}
 
 	netif_tx_disable(nn->netdev);
 }
@@ -1917,9 +1906,7 @@ static void nfp_net_close_free_all(struct nfp_net *nn)
 	unsigned int r;
 
 	for (r = 0; r < nn->num_r_vecs; r++) {
-		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
 		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
 		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
 		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
 		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 10/15] nfp: slice .ndo_open() and .ndo_stop() up
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Divide .ndo_open() and .ndo_stop() into logical, callable
chunks.  No functional changes.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 218 +++++++++++++--------
 1 file changed, 136 insertions(+), 82 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 342335d09fb2..6c1ed8914416 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1672,6 +1672,82 @@ nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx);
 }
 
+static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
+{
+	u32 new_ctrl, update = 0;
+	unsigned int r;
+	int err;
+
+	new_ctrl = nn->ctrl;
+
+	if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
+		nfp_net_rss_write_key(nn);
+		nfp_net_rss_write_itbl(nn);
+		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
+		update |= NFP_NET_CFG_UPDATE_RSS;
+	}
+
+	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
+		nfp_net_coalesce_write_cfg(nn);
+
+		new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
+		update |= NFP_NET_CFG_UPDATE_IRQMOD;
+	}
+
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
+
+	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
+		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
+
+	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
+		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
+
+	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+
+	nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
+	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
+
+	/* Enable device */
+	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+	update |= NFP_NET_CFG_UPDATE_GEN;
+	update |= NFP_NET_CFG_UPDATE_MSIX;
+	update |= NFP_NET_CFG_UPDATE_RING;
+	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
+		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
+
+	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+	err = nfp_net_reconfig(nn, update);
+
+	nn->ctrl = new_ctrl;
+
+	/* Since reconfiguration requests while NFP is down are ignored we
+	 * have to wipe the entire VXLAN configuration and reinitialize it.
+	 */
+	if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
+		memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
+		memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
+		vxlan_get_rx_port(nn->netdev);
+	}
+
+	return err;
+}
+
+/**
+ * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
+ * @nn:      NFP Net device to reconfigure
+ */
+static int nfp_net_set_config_and_enable(struct nfp_net *nn)
+{
+	int err;
+
+	err = __nfp_net_set_config_and_enable(nn);
+	if (err)
+		nfp_net_clear_config_and_disable(nn);
+
+	return err;
+}
+
 /**
  * nfp_net_start_vec() - Start ring vector
  * @nn:      NFP Net device structure
@@ -1692,20 +1768,33 @@ nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
 	enable_irq(irq_vec);
 }
 
+/**
+ * nfp_net_open_stack() - Start the device from stack's perspective
+ * @nn:      NFP Net device to reconfigure
+ */
+static void nfp_net_open_stack(struct nfp_net *nn)
+{
+	unsigned int r;
+
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_start_vec(nn, &nn->r_vecs[r]);
+
+	netif_tx_wake_all_queues(nn->netdev);
+
+	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	nfp_net_read_link_status(nn);
+}
+
 static int nfp_net_netdev_open(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 	int err, r;
-	u32 update = 0;
-	u32 new_ctrl;
 
 	if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
 		nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl);
 		return -EBUSY;
 	}
 
-	new_ctrl = nn->ctrl;
-
 	/* Step 1: Allocate resources for rings and the like
 	 * - Request interrupts
 	 * - Allocate RX and TX ring resources
@@ -1758,20 +1847,6 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	if (err)
 		goto err_free_rings;
 
-	if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
-		nfp_net_rss_write_key(nn);
-		nfp_net_rss_write_itbl(nn);
-		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
-		update |= NFP_NET_CFG_UPDATE_RSS;
-	}
-
-	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
-		nfp_net_coalesce_write_cfg(nn);
-
-		new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
-		update |= NFP_NET_CFG_UPDATE_IRQMOD;
-	}
-
 	/* Step 2: Configure the NFP
 	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
 	 * - Write MAC address (in case it changed)
@@ -1779,43 +1854,9 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	 * - Set the Freelist buffer size
 	 * - Enable the FW
 	 */
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
-
-	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
-		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
-
-	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
-		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
-
-	nfp_net_write_mac_addr(nn, netdev->dev_addr);
-
-	nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu);
-	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
-
-	/* Enable device */
-	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
-	update |= NFP_NET_CFG_UPDATE_GEN;
-	update |= NFP_NET_CFG_UPDATE_MSIX;
-	update |= NFP_NET_CFG_UPDATE_RING;
-	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
-		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
-
-	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
-	err = nfp_net_reconfig(nn, update);
+	err = nfp_net_set_config_and_enable(nn);
 	if (err)
-		goto err_clear_config;
-
-	nn->ctrl = new_ctrl;
-
-	/* Since reconfiguration requests while NFP is down are ignored we
-	 * have to wipe the entire VXLAN configuration and reinitialize it.
-	 */
-	if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
-		memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
-		memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
-		vxlan_get_rx_port(netdev);
-	}
+		goto err_free_rings;
 
 	/* Step 3: Enable for kernel
 	 * - put some freelist descriptors on each RX ring
@@ -1823,18 +1864,10 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	 * - enable all TX queues
 	 * - set link state
 	 */
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_start_vec(nn, &nn->r_vecs[r]);
-
-	netif_tx_wake_all_queues(netdev);
-
-	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
-	nfp_net_read_link_status(nn);
+	nfp_net_open_stack(nn);
 
 	return 0;
 
-err_clear_config:
-	nfp_net_clear_config_and_disable(nn);
 err_free_rings:
 	r = nn->num_r_vecs;
 err_free_prev_vecs:
@@ -1858,36 +1891,31 @@ err_free_exn:
 }
 
 /**
- * nfp_net_netdev_close() - Called when the device is downed
- * @netdev:      netdev structure
+ * nfp_net_close_stack() - Quiescent the stack (part of close)
+ * @nn:	     NFP Net device to reconfigure
  */
-static int nfp_net_netdev_close(struct net_device *netdev)
+static void nfp_net_close_stack(struct nfp_net *nn)
 {
-	struct nfp_net *nn = netdev_priv(netdev);
-	int r;
-
-	if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
-		nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
-		return 0;
-	}
+	unsigned int r;
 
-	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
-	 */
 	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
-	netif_carrier_off(netdev);
+	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
 	for (r = 0; r < nn->num_r_vecs; r++)
 		napi_disable(&nn->r_vecs[r].napi);
 
-	netif_tx_disable(netdev);
+	netif_tx_disable(nn->netdev);
+}
 
-	/* Step 2: Tell NFP
-	 */
-	nfp_net_clear_config_and_disable(nn);
+/**
+ * nfp_net_close_free_all() - Free all runtime resources
+ * @nn:      NFP Net device to reconfigure
+ */
+static void nfp_net_close_free_all(struct nfp_net *nn)
+{
+	unsigned int r;
 
-	/* Step 3: Free resources
-	 */
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
 		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
@@ -1902,6 +1930,32 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
+}
+
+/**
+ * nfp_net_netdev_close() - Called when the device is downed
+ * @netdev:      netdev structure
+ */
+static int nfp_net_netdev_close(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
+		nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
+		return 0;
+	}
+
+	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
+	 */
+	nfp_net_close_stack(nn);
+
+	/* Step 2: Tell NFP
+	 */
+	nfp_net_clear_config_and_disable(nn);
+
+	/* Step 3: Free resources
+	 */
+	nfp_net_close_free_all(nn);
 
 	nn_dbg(nn, "%s down", netdev->name);
 	return 0;
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 09/15] nfp: move filling ring information to FW config
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

nfp_net_[rt]x_ring_{alloc,free} should only allocate or free
ring resources without touching the device.  Move setting
parameters in the BAR to separate functions.  This will make
it possible to reuse alloc/free functions to allocate new
rings while the device is running.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 50 ++++++++++++++--------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index a6a917fe8e31..342335d09fb2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1387,10 +1387,6 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct pci_dev *pdev = nn->pdev;
 
-	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), 0);
-	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), 0);
-	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), 0);
-
 	kfree(tx_ring->txbufs);
 
 	if (tx_ring->txds)
@@ -1430,11 +1426,6 @@ static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring)
 	if (!tx_ring->txbufs)
 		goto err_alloc;
 
-	/* Write the DMA address, size and MSI-X info to the device */
-	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), tx_ring->dma);
-	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), ilog2(tx_ring->cnt));
-	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), r_vec->irq_idx);
-
 	netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx);
 
 	nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n",
@@ -1458,10 +1449,6 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct pci_dev *pdev = nn->pdev;
 
-	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), 0);
-	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), 0);
-	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), 0);
-
 	kfree(rx_ring->rxbufs);
 
 	if (rx_ring->rxds)
@@ -1501,11 +1488,6 @@ static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring)
 	if (!rx_ring->rxbufs)
 		goto err_alloc;
 
-	/* Write the DMA address, size and MSI-X info to the device */
-	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), rx_ring->dma);
-	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), ilog2(rx_ring->cnt));
-	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), r_vec->irq_idx);
-
 	nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n",
 	       rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx,
 	       rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds);
@@ -1630,6 +1612,17 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac)
 		  get_unaligned_be16(nn->netdev->dev_addr + 4) << 16);
 }
 
+static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
+{
+	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
+
+	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
+	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
+}
+
 /**
  * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
  * @nn:      NFP Net device to reconfigure
@@ -1637,6 +1630,7 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac)
 static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 {
 	u32 new_ctrl, update;
+	unsigned int r;
 	int err;
 
 	new_ctrl = nn->ctrl;
@@ -1658,9 +1652,26 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 		return;
 	}
 
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_vec_clear_ring_data(nn, r);
+
 	nn->ctrl = new_ctrl;
 }
 
+static void
+nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+			    unsigned int idx)
+{
+	/* Write the DMA address, size and MSI-X info to the device */
+	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), r_vec->rx_ring->dma);
+	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(r_vec->rx_ring->cnt));
+	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), r_vec->irq_idx);
+
+	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), r_vec->tx_ring->dma);
+	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(r_vec->tx_ring->cnt));
+	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx);
+}
+
 /**
  * nfp_net_start_vec() - Start ring vector
  * @nn:      NFP Net device structure
@@ -1768,6 +1779,9 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	 * - Set the Freelist buffer size
 	 * - Enable the FW
 	 */
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
+
 	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
 		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 08/15] nfp: preallocate RX buffers early in .ndo_open
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

We want the .ndo_open() to have following structure:
 - allocate resources;
 - configure HW/FW;
 - enable the device from stack perspective.
Therefore filling RX rings needs to be moved to the beginning
of .ndo_open().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 34 +++++++---------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 0c3c37ad28a4..a6a917fe8e31 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1666,28 +1666,19 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
  * @nn:      NFP Net device structure
  * @r_vec:   Ring vector to be started
  */
-static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
+static void
+nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
 {
 	unsigned int irq_vec;
-	int err = 0;
 
 	irq_vec = nn->irq_entries[r_vec->irq_idx].vector;
 
 	disable_irq(irq_vec);
 
-	err = nfp_net_rx_ring_bufs_alloc(r_vec->nfp_net, r_vec->rx_ring);
-	if (err) {
-		nn_err(nn, "RV%02d: couldn't allocate enough buffers\n",
-		       r_vec->irq_idx);
-		goto out;
-	}
 	nfp_net_rx_ring_fill_freelist(r_vec->rx_ring);
-
 	napi_enable(&r_vec->napi);
-out:
-	enable_irq(irq_vec);
 
-	return err;
+	enable_irq(irq_vec);
 }
 
 static int nfp_net_netdev_open(struct net_device *netdev)
@@ -1742,6 +1733,10 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring);
 		if (err)
 			goto err_free_tx_ring_p;
+
+		err = nfp_net_rx_ring_bufs_alloc(nn, nn->r_vecs[r].rx_ring);
+		if (err)
+			goto err_flush_rx_ring_p;
 	}
 
 	err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings);
@@ -1814,11 +1809,8 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	 * - enable all TX queues
 	 * - set link state
 	 */
-	for (r = 0; r < nn->num_r_vecs; r++) {
-		err = nfp_net_start_vec(nn, &nn->r_vecs[r]);
-		if (err)
-			goto err_disable_napi;
-	}
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_start_vec(nn, &nn->r_vecs[r]);
 
 	netif_tx_wake_all_queues(netdev);
 
@@ -1827,18 +1819,14 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 
 	return 0;
 
-err_disable_napi:
-	while (r--) {
-		napi_disable(&nn->r_vecs[r].napi);
-		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
-		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-	}
 err_clear_config:
 	nfp_net_clear_config_and_disable(nn);
 err_free_rings:
 	r = nn->num_r_vecs;
 err_free_prev_vecs:
 	while (r--) {
+		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
+err_flush_rx_ring_p:
 		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
 err_free_tx_ring_p:
 		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 07/15] nfp: reorganize initial filling of RX rings
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Separate allocation of buffers from giving them to FW,
thanks to this it will be possible to move allocation
earlier on .ndo_open() path and reuse buffers during
runtime reconfiguration.

Similar to TX side clean up the spill of functionality
from flush to freeing the ring.  Unlike on TX side,
RX ring reset does not free buffers from the ring.
Ring reset means only that FW pointers are zeroed and
buffers on the ring must be placed in [0, cnt - 1)
positions.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 119 ++++++++++++++-------
 1 file changed, 78 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 61f243760ee0..0c3c37ad28a4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1020,62 +1020,100 @@ static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
 }
 
 /**
- * nfp_net_rx_flush() - Free any buffers currently on the RX ring
- * @rx_ring:  RX ring to remove buffers from
+ * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
+ * @rx_ring:	RX ring structure
  *
- * Assumes that the device is stopped
+ * Warning: Do *not* call if ring buffers were never put on the FW freelist
+ *	    (i.e. device was not enabled)!
  */
-static void nfp_net_rx_flush(struct nfp_net_rx_ring *rx_ring)
+static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
 {
-	struct nfp_net *nn = rx_ring->r_vec->nfp_net;
-	struct pci_dev *pdev = nn->pdev;
-	int idx;
+	unsigned int wr_idx, last_idx;
 
-	while (rx_ring->rd_p != rx_ring->wr_p) {
-		idx = rx_ring->rd_p % rx_ring->cnt;
+	/* Move the empty entry to the end of the list */
+	wr_idx = rx_ring->wr_p % rx_ring->cnt;
+	last_idx = rx_ring->cnt - 1;
+	rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
+	rx_ring->rxbufs[wr_idx].skb = rx_ring->rxbufs[last_idx].skb;
+	rx_ring->rxbufs[last_idx].dma_addr = 0;
+	rx_ring->rxbufs[last_idx].skb = NULL;
 
-		if (rx_ring->rxbufs[idx].skb) {
-			dma_unmap_single(&pdev->dev,
-					 rx_ring->rxbufs[idx].dma_addr,
-					 nn->fl_bufsz, DMA_FROM_DEVICE);
-			dev_kfree_skb_any(rx_ring->rxbufs[idx].skb);
-			rx_ring->rxbufs[idx].dma_addr = 0;
-			rx_ring->rxbufs[idx].skb = NULL;
-		}
+	memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt);
+	rx_ring->wr_p = 0;
+	rx_ring->rd_p = 0;
+	rx_ring->wr_ptr_add = 0;
+}
 
-		memset(&rx_ring->rxds[idx], 0, sizeof(rx_ring->rxds[idx]));
+/**
+ * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
+ * @nn:		NFP Net device
+ * @rx_ring:	RX ring to remove buffers from
+ *
+ * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
+ * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
+ * to restore required ring geometry.
+ */
+static void
+nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
+{
+	struct pci_dev *pdev = nn->pdev;
+	unsigned int i;
 
-		rx_ring->rd_p++;
+	for (i = 0; i < rx_ring->cnt - 1; i++) {
+		/* NULL skb can only happen when initial filling of the ring
+		 * fails to allocate enough buffers and calls here to free
+		 * already allocated ones.
+		 */
+		if (!rx_ring->rxbufs[i].skb)
+			continue;
+
+		dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr,
+				 nn->fl_bufsz, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(rx_ring->rxbufs[i].skb);
+		rx_ring->rxbufs[i].dma_addr = 0;
+		rx_ring->rxbufs[i].skb = NULL;
 	}
 }
 
 /**
- * nfp_net_rx_fill_freelist() - Attempt filling freelist with RX buffers
- * @rx_ring: RX ring to fill
- *
- * Try to fill as many buffers as possible into freelist.  Return
- * number of buffers added.
- *
- * Return: Number of freelist buffers added.
+ * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
+ * @nn:		NFP Net device
+ * @rx_ring:	RX ring to remove buffers from
  */
-static int nfp_net_rx_fill_freelist(struct nfp_net_rx_ring *rx_ring)
+static int
+nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 {
-	struct sk_buff *skb;
-	dma_addr_t dma_addr;
+	struct nfp_net_rx_buf *rxbufs;
+	unsigned int i;
+
+	rxbufs = rx_ring->rxbufs;
 
-	while (nfp_net_rx_space(rx_ring)) {
-		skb = nfp_net_rx_alloc_one(rx_ring, &dma_addr);
-		if (!skb) {
-			nfp_net_rx_flush(rx_ring);
+	for (i = 0; i < rx_ring->cnt - 1; i++) {
+		rxbufs[i].skb =
+			nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr);
+		if (!rxbufs[i].skb) {
+			nfp_net_rx_ring_bufs_free(nn, rx_ring);
 			return -ENOMEM;
 		}
-		nfp_net_rx_give_one(rx_ring, skb, dma_addr);
 	}
 
 	return 0;
 }
 
 /**
+ * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
+ * @rx_ring: RX ring to fill
+ */
+static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
+{
+	unsigned int i;
+
+	for (i = 0; i < rx_ring->cnt - 1; i++)
+		nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].skb,
+				    rx_ring->rxbufs[i].dma_addr);
+}
+
+/**
  * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
  * @flags: RX descriptor flags field in CPU byte order
  */
@@ -1431,10 +1469,6 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 				  rx_ring->rxds, rx_ring->dma);
 
 	rx_ring->cnt = 0;
-	rx_ring->wr_p = 0;
-	rx_ring->rd_p = 0;
-	rx_ring->wr_ptr_add = 0;
-
 	rx_ring->rxbufs = NULL;
 	rx_ring->rxds = NULL;
 	rx_ring->dma = 0;
@@ -1641,12 +1675,13 @@ static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
 
 	disable_irq(irq_vec);
 
-	err = nfp_net_rx_fill_freelist(r_vec->rx_ring);
+	err = nfp_net_rx_ring_bufs_alloc(r_vec->nfp_net, r_vec->rx_ring);
 	if (err) {
 		nn_err(nn, "RV%02d: couldn't allocate enough buffers\n",
 		       r_vec->irq_idx);
 		goto out;
 	}
+	nfp_net_rx_ring_fill_freelist(r_vec->rx_ring);
 
 	napi_enable(&r_vec->napi);
 out:
@@ -1795,7 +1830,8 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 err_disable_napi:
 	while (r--) {
 		napi_disable(&nn->r_vecs[r].napi);
-		nfp_net_rx_flush(nn->r_vecs[r].rx_ring);
+		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
+		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
 	}
 err_clear_config:
 	nfp_net_clear_config_and_disable(nn);
@@ -1851,7 +1887,8 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 	/* Step 3: Free resources
 	 */
 	for (r = 0; r < nn->num_r_vecs; r++) {
-		nfp_net_rx_flush(nn->r_vecs[r].rx_ring);
+		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
+		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
 		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
 		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
 		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 06/15] nfp: cleanup tx ring flush and rename to reset
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Since we never used flush without freeing the ring later
the functionality of the two operations is mixed.
Rename flush to ring reset and move there all the things
which have to be done after FW ring state is cleared.
While at it do some clean-ups.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 81 ++++++++++------------
 1 file changed, 37 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 66fab7162b7c..61f243760ee0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -867,61 +867,59 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 }
 
 /**
- * nfp_net_tx_flush() - Free any untransmitted buffers currently on the TX ring
- * @tx_ring:     TX ring structure
+ * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
+ * @nn:		NFP Net device
+ * @tx_ring:	TX ring structure
  *
  * Assumes that the device is stopped
  */
-static void nfp_net_tx_flush(struct nfp_net_tx_ring *tx_ring)
+static void
+nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
 {
-	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
-	struct nfp_net *nn = r_vec->nfp_net;
-	struct pci_dev *pdev = nn->pdev;
 	const struct skb_frag_struct *frag;
 	struct netdev_queue *nd_q;
-	struct sk_buff *skb;
-	int nr_frags;
-	int fidx;
-	int idx;
+	struct pci_dev *pdev = nn->pdev;
 
 	while (tx_ring->rd_p != tx_ring->wr_p) {
-		idx = tx_ring->rd_p % tx_ring->cnt;
+		int nr_frags, fidx, idx;
+		struct sk_buff *skb;
 
+		idx = tx_ring->rd_p % tx_ring->cnt;
 		skb = tx_ring->txbufs[idx].skb;
-		if (skb) {
-			nr_frags = skb_shinfo(skb)->nr_frags;
-			fidx = tx_ring->txbufs[idx].fidx;
-
-			if (fidx == -1) {
-				/* unmap head */
-				dma_unmap_single(&pdev->dev,
-						 tx_ring->txbufs[idx].dma_addr,
-						 skb_headlen(skb),
-						 DMA_TO_DEVICE);
-			} else {
-				/* unmap fragment */
-				frag = &skb_shinfo(skb)->frags[fidx];
-				dma_unmap_page(&pdev->dev,
-					       tx_ring->txbufs[idx].dma_addr,
-					       skb_frag_size(frag),
-					       DMA_TO_DEVICE);
-			}
-
-			/* check for last gather fragment */
-			if (fidx == nr_frags - 1)
-				dev_kfree_skb_any(skb);
-
-			tx_ring->txbufs[idx].dma_addr = 0;
-			tx_ring->txbufs[idx].skb = NULL;
-			tx_ring->txbufs[idx].fidx = -2;
+		nr_frags = skb_shinfo(skb)->nr_frags;
+		fidx = tx_ring->txbufs[idx].fidx;
+
+		if (fidx == -1) {
+			/* unmap head */
+			dma_unmap_single(&pdev->dev,
+					 tx_ring->txbufs[idx].dma_addr,
+					 skb_headlen(skb), DMA_TO_DEVICE);
+		} else {
+			/* unmap fragment */
+			frag = &skb_shinfo(skb)->frags[fidx];
+			dma_unmap_page(&pdev->dev,
+				       tx_ring->txbufs[idx].dma_addr,
+				       skb_frag_size(frag), DMA_TO_DEVICE);
 		}
 
-		memset(&tx_ring->txds[idx], 0, sizeof(tx_ring->txds[idx]));
+		/* check for last gather fragment */
+		if (fidx == nr_frags - 1)
+			dev_kfree_skb_any(skb);
+
+		tx_ring->txbufs[idx].dma_addr = 0;
+		tx_ring->txbufs[idx].skb = NULL;
+		tx_ring->txbufs[idx].fidx = -2;
 
 		tx_ring->qcp_rd_p++;
 		tx_ring->rd_p++;
 	}
 
+	memset(tx_ring->txds, 0, sizeof(*tx_ring->txds) * tx_ring->cnt);
+	tx_ring->wr_p = 0;
+	tx_ring->rd_p = 0;
+	tx_ring->qcp_rd_p = 0;
+	tx_ring->wr_ptr_add = 0;
+
 	nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
 	netdev_tx_reset_queue(nd_q);
 }
@@ -1362,11 +1360,6 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
 				  tx_ring->txds, tx_ring->dma);
 
 	tx_ring->cnt = 0;
-	tx_ring->wr_p = 0;
-	tx_ring->rd_p = 0;
-	tx_ring->qcp_rd_p = 0;
-	tx_ring->wr_ptr_add = 0;
-
 	tx_ring->txbufs = NULL;
 	tx_ring->txds = NULL;
 	tx_ring->dma = 0;
@@ -1859,7 +1852,7 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 	 */
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		nfp_net_rx_flush(nn->r_vecs[r].rx_ring);
-		nfp_net_tx_flush(nn->r_vecs[r].tx_ring);
+		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
 		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
 		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
 		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 05/15] nfp: allocate ring SW structs dynamically
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

To be able to switch rings more easily on config changes
allocate them dynamically, separately from nfp_net structure.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  6 ++---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 28 +++++++++++++++++-----
 .../net/ethernet/netronome/nfp/nfp_net_debugfs.c   | 20 +++++++++-------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 75683fb26734..fc005c982b7d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -472,6 +472,9 @@ struct nfp_net {
 
 	u32 rx_offset;
 
+	struct nfp_net_tx_ring *tx_rings;
+	struct nfp_net_rx_ring *rx_rings;
+
 #ifdef CONFIG_PCI_IOV
 	unsigned int num_vfs;
 	struct vf_data_storage *vfinfo;
@@ -504,9 +507,6 @@ struct nfp_net {
 	int txd_cnt;
 	int rxd_cnt;
 
-	struct nfp_net_tx_ring tx_rings[NFP_NET_MAX_TX_RINGS];
-	struct nfp_net_rx_ring rx_rings[NFP_NET_MAX_RX_RINGS];
-
 	u8 num_irqs;
 	u8 num_r_vecs;
 	struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS];
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 7cd20fcd631a..66fab7162b7c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -413,12 +413,6 @@ static void nfp_net_irqs_assign(struct net_device *netdev)
 		r_vec->irq_idx = NFP_NET_NON_Q_VECTORS + r;
 
 		cpumask_set_cpu(r, &r_vec->affinity_mask);
-
-		r_vec->tx_ring = &nn->tx_rings[r];
-		nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, r);
-
-		r_vec->rx_ring = &nn->rx_rings[r];
-		nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, r);
 	}
 }
 
@@ -1503,6 +1497,12 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 	struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx];
 	int err;
 
+	r_vec->tx_ring = &nn->tx_rings[idx];
+	nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx);
+
+	r_vec->rx_ring = &nn->rx_rings[idx];
+	nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx);
+
 	snprintf(r_vec->name, sizeof(r_vec->name),
 		 "%s-rxtx-%d", nn->netdev->name, idx);
 	err = request_irq(entry->vector, r_vec->handler, 0, r_vec->name, r_vec);
@@ -1693,6 +1693,15 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		goto err_free_exn;
 	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
 
+	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
+			       GFP_KERNEL);
+	if (!nn->rx_rings)
+		goto err_free_lsc;
+	nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
+			       GFP_KERNEL);
+	if (!nn->tx_rings)
+		goto err_free_rx_rings;
+
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
 		if (err)
@@ -1807,6 +1816,10 @@ err_free_tx_ring_p:
 err_cleanup_vec_p:
 		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 	}
+	kfree(nn->tx_rings);
+err_free_rx_rings:
+	kfree(nn->rx_rings);
+err_free_lsc:
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 err_free_exn:
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
@@ -1852,6 +1865,9 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 	}
 
+	kfree(nn->rx_rings);
+	kfree(nn->tx_rings);
+
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index 4c97c713121c..f86a1f13d27b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -40,8 +40,9 @@ static struct dentry *nfp_dir;
 
 static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
 {
-	struct nfp_net_rx_ring *rx_ring = file->private;
 	int fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p, rxd_cnt;
+	struct nfp_net_r_vector *r_vec = file->private;
+	struct nfp_net_rx_ring *rx_ring;
 	struct nfp_net_rx_desc *rxd;
 	struct sk_buff *skb;
 	struct nfp_net *nn;
@@ -49,9 +50,10 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
 
 	rtnl_lock();
 
-	if (!rx_ring->r_vec || !rx_ring->r_vec->nfp_net)
+	if (!r_vec->nfp_net || !r_vec->rx_ring)
 		goto out;
-	nn = rx_ring->r_vec->nfp_net;
+	nn = r_vec->nfp_net;
+	rx_ring = r_vec->rx_ring;
 	if (!netif_running(nn->netdev))
 		goto out;
 
@@ -115,7 +117,8 @@ static const struct file_operations nfp_rx_q_fops = {
 
 static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 {
-	struct nfp_net_tx_ring *tx_ring = file->private;
+	struct nfp_net_r_vector *r_vec = file->private;
+	struct nfp_net_tx_ring *tx_ring;
 	struct nfp_net_tx_desc *txd;
 	int d_rd_p, d_wr_p, txd_cnt;
 	struct sk_buff *skb;
@@ -124,9 +127,10 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 
 	rtnl_lock();
 
-	if (!tx_ring->r_vec || !tx_ring->r_vec->nfp_net)
+	if (!r_vec->nfp_net || !r_vec->tx_ring)
 		goto out;
-	nn = tx_ring->r_vec->nfp_net;
+	nn = r_vec->nfp_net;
+	tx_ring = r_vec->tx_ring;
 	if (!netif_running(nn->netdev))
 		goto out;
 
@@ -207,13 +211,13 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
 	for (i = 0; i < nn->num_rx_rings; i++) {
 		sprintf(int_name, "%d", i);
 		debugfs_create_file(int_name, S_IRUSR, rx,
-				    &nn->rx_rings[i], &nfp_rx_q_fops);
+				    &nn->r_vecs[i], &nfp_rx_q_fops);
 	}
 
 	for (i = 0; i < nn->num_tx_rings; i++) {
 		sprintf(int_name, "%d", i);
 		debugfs_create_file(int_name, S_IRUSR, tx,
-				    &nn->tx_rings[i], &nfp_tx_q_fops);
+				    &nn->r_vecs[i], &nfp_tx_q_fops);
 	}
 }
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 04/15] nfp: make *x_ring_init do all the init
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

nfp_net_[rt]x_ring_init functions used to be called from probe
path only and some of their functionality was spilled to the
call site.  In order to reuse them for ring reconfiguration
we need them to do all the init.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 28 ++++++++++++++--------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 8692587904c5..7cd20fcd631a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -347,12 +347,18 @@ static irqreturn_t nfp_net_irq_exn(int irq, void *data)
 /**
  * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
  * @tx_ring:  TX ring structure
+ * @r_vec:    IRQ vector servicing this ring
+ * @idx:      Ring index
  */
-static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring)
+static void
+nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
+		     struct nfp_net_r_vector *r_vec, unsigned int idx)
 {
-	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
 
+	tx_ring->idx = idx;
+	tx_ring->r_vec = r_vec;
+
 	tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
 	tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
 }
@@ -360,12 +366,18 @@ static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring)
 /**
  * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
  * @rx_ring:  RX ring structure
+ * @r_vec:    IRQ vector servicing this ring
+ * @idx:      Ring index
  */
-static void nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring)
+static void
+nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
+		     struct nfp_net_r_vector *r_vec, unsigned int idx)
 {
-	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
 
+	rx_ring->idx = idx;
+	rx_ring->r_vec = r_vec;
+
 	rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
 	rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1);
 
@@ -403,14 +415,10 @@ static void nfp_net_irqs_assign(struct net_device *netdev)
 		cpumask_set_cpu(r, &r_vec->affinity_mask);
 
 		r_vec->tx_ring = &nn->tx_rings[r];
-		nn->tx_rings[r].idx = r;
-		nn->tx_rings[r].r_vec = r_vec;
-		nfp_net_tx_ring_init(r_vec->tx_ring);
+		nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, r);
 
 		r_vec->rx_ring = &nn->rx_rings[r];
-		nn->rx_rings[r].idx = r;
-		nn->rx_rings[r].r_vec = r_vec;
-		nfp_net_rx_ring_init(r_vec->rx_ring);
+		nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, r);
 	}
 }
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 03/15] nfp: break up nfp_net_{alloc|free}_rings
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

nfp_net_{alloc|free}_rings contained strange mix of allocations
and vector initialization.  Remove it, declare vector init as
a separate function and handle allocations explicitly.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 126 ++++++++-------------
 1 file changed, 47 insertions(+), 79 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 5da1199e7afb..8692587904c5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1488,91 +1488,40 @@ err_alloc:
 	return -ENOMEM;
 }
 
-static void __nfp_net_free_rings(struct nfp_net *nn, unsigned int n_free)
+static int
+nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+		       int idx)
 {
-	struct nfp_net_r_vector *r_vec;
-	struct msix_entry *entry;
+	struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx];
+	int err;
 
-	while (n_free--) {
-		r_vec = &nn->r_vecs[n_free];
-		entry = &nn->irq_entries[r_vec->irq_idx];
+	snprintf(r_vec->name, sizeof(r_vec->name),
+		 "%s-rxtx-%d", nn->netdev->name, idx);
+	err = request_irq(entry->vector, r_vec->handler, 0, r_vec->name, r_vec);
+	if (err) {
+		nn_err(nn, "Error requesting IRQ %d\n", entry->vector);
+		return err;
+	}
 
-		nfp_net_rx_ring_free(r_vec->rx_ring);
-		nfp_net_tx_ring_free(r_vec->tx_ring);
+	/* Setup NAPI */
+	netif_napi_add(nn->netdev, &r_vec->napi,
+		       nfp_net_poll, NAPI_POLL_WEIGHT);
 
-		irq_set_affinity_hint(entry->vector, NULL);
-		free_irq(entry->vector, r_vec);
+	irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask);
 
-		netif_napi_del(&r_vec->napi);
-	}
-}
+	nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, entry->vector, entry->entry);
 
-/**
- * nfp_net_free_rings() - Free all ring resources
- * @nn:      NFP Net device to reconfigure
- */
-static void nfp_net_free_rings(struct nfp_net *nn)
-{
-	__nfp_net_free_rings(nn, nn->num_r_vecs);
+	return 0;
 }
 
-/**
- * nfp_net_alloc_rings() - Allocate resources for RX and TX rings
- * @nn:      NFP Net device to reconfigure
- *
- * Return: 0 on success or negative errno on error.
- */
-static int nfp_net_alloc_rings(struct nfp_net *nn)
+static void
+nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
 {
-	struct nfp_net_r_vector *r_vec;
-	struct msix_entry *entry;
-	int err;
-	int r;
+	struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx];
 
-	for (r = 0; r < nn->num_r_vecs; r++) {
-		r_vec = &nn->r_vecs[r];
-		entry = &nn->irq_entries[r_vec->irq_idx];
-
-		/* Setup NAPI */
-		netif_napi_add(nn->netdev, &r_vec->napi,
-			       nfp_net_poll, NAPI_POLL_WEIGHT);
-
-		snprintf(r_vec->name, sizeof(r_vec->name),
-			 "%s-rxtx-%d", nn->netdev->name, r);
-		err = request_irq(entry->vector, r_vec->handler, 0,
-				  r_vec->name, r_vec);
-		if (err) {
-			nn_dbg(nn, "Error requesting IRQ %d\n", entry->vector);
-			goto err_napi_del;
-		}
-
-		irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask);
-
-		nn_dbg(nn, "RV%02d: irq=%03d/%03d\n",
-		       r, entry->vector, entry->entry);
-
-		/* Allocate TX ring resources */
-		err = nfp_net_tx_ring_alloc(r_vec->tx_ring);
-		if (err)
-			goto err_free_irq;
-
-		/* Allocate RX ring resources */
-		err = nfp_net_rx_ring_alloc(r_vec->rx_ring);
-		if (err)
-			goto err_free_tx;
-	}
-
-	return 0;
-
-err_free_tx:
-	nfp_net_tx_ring_free(r_vec->tx_ring);
-err_free_irq:
 	irq_set_affinity_hint(entry->vector, NULL);
-	free_irq(entry->vector, r_vec);
-err_napi_del:
 	netif_napi_del(&r_vec->napi);
-	__nfp_net_free_rings(nn, r);
-	return err;
+	free_irq(entry->vector, r_vec);
 }
 
 /**
@@ -1736,9 +1685,19 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		goto err_free_exn;
 	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
 
-	err = nfp_net_alloc_rings(nn);
-	if (err)
-		goto err_free_lsc;
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
+		if (err)
+			goto err_free_prev_vecs;
+
+		err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring);
+		if (err)
+			goto err_cleanup_vec_p;
+
+		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring);
+		if (err)
+			goto err_free_tx_ring_p;
+	}
 
 	err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings);
 	if (err)
@@ -1831,8 +1790,15 @@ err_disable_napi:
 err_clear_config:
 	nfp_net_clear_config_and_disable(nn);
 err_free_rings:
-	nfp_net_free_rings(nn);
-err_free_lsc:
+	r = nn->num_r_vecs;
+err_free_prev_vecs:
+	while (r--) {
+		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
+err_free_tx_ring_p:
+		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
+err_cleanup_vec_p:
+		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+	}
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 err_free_exn:
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
@@ -1873,9 +1839,11 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		nfp_net_rx_flush(nn->r_vecs[r].rx_ring);
 		nfp_net_tx_flush(nn->r_vecs[r].tx_ring);
+		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
+		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
+		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 	}
 
-	nfp_net_free_rings(nn);
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
 
-- 
1.9.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox