Netdev List
 help / color / mirror / Atom feed
* [PATCH v5 net-next 10/15] nfp: slice .ndo_open() and .ndo_stop() up
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Divide .ndo_open() and .ndo_stop() into logical, callable
chunks.  No functional changes.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 218 +++++++++++++--------
 1 file changed, 136 insertions(+), 82 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 342335d09fb2..6c1ed8914416 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1672,6 +1672,82 @@ nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx);
 }
 
+static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
+{
+	u32 new_ctrl, update = 0;
+	unsigned int r;
+	int err;
+
+	new_ctrl = nn->ctrl;
+
+	if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
+		nfp_net_rss_write_key(nn);
+		nfp_net_rss_write_itbl(nn);
+		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
+		update |= NFP_NET_CFG_UPDATE_RSS;
+	}
+
+	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
+		nfp_net_coalesce_write_cfg(nn);
+
+		new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
+		update |= NFP_NET_CFG_UPDATE_IRQMOD;
+	}
+
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
+
+	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
+		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
+
+	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
+		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
+
+	nfp_net_write_mac_addr(nn, nn->netdev->dev_addr);
+
+	nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
+	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
+
+	/* Enable device */
+	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+	update |= NFP_NET_CFG_UPDATE_GEN;
+	update |= NFP_NET_CFG_UPDATE_MSIX;
+	update |= NFP_NET_CFG_UPDATE_RING;
+	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
+		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
+
+	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
+	err = nfp_net_reconfig(nn, update);
+
+	nn->ctrl = new_ctrl;
+
+	/* Since reconfiguration requests while NFP is down are ignored we
+	 * have to wipe the entire VXLAN configuration and reinitialize it.
+	 */
+	if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
+		memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
+		memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
+		vxlan_get_rx_port(nn->netdev);
+	}
+
+	return err;
+}
+
+/**
+ * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
+ * @nn:      NFP Net device to reconfigure
+ */
+static int nfp_net_set_config_and_enable(struct nfp_net *nn)
+{
+	int err;
+
+	err = __nfp_net_set_config_and_enable(nn);
+	if (err)
+		nfp_net_clear_config_and_disable(nn);
+
+	return err;
+}
+
 /**
  * nfp_net_start_vec() - Start ring vector
  * @nn:      NFP Net device structure
@@ -1692,20 +1768,33 @@ nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
 	enable_irq(irq_vec);
 }
 
+/**
+ * nfp_net_open_stack() - Start the device from stack's perspective
+ * @nn:      NFP Net device to reconfigure
+ */
+static void nfp_net_open_stack(struct nfp_net *nn)
+{
+	unsigned int r;
+
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_start_vec(nn, &nn->r_vecs[r]);
+
+	netif_tx_wake_all_queues(nn->netdev);
+
+	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	nfp_net_read_link_status(nn);
+}
+
 static int nfp_net_netdev_open(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 	int err, r;
-	u32 update = 0;
-	u32 new_ctrl;
 
 	if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
 		nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl);
 		return -EBUSY;
 	}
 
-	new_ctrl = nn->ctrl;
-
 	/* Step 1: Allocate resources for rings and the like
 	 * - Request interrupts
 	 * - Allocate RX and TX ring resources
@@ -1758,20 +1847,6 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	if (err)
 		goto err_free_rings;
 
-	if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
-		nfp_net_rss_write_key(nn);
-		nfp_net_rss_write_itbl(nn);
-		nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
-		update |= NFP_NET_CFG_UPDATE_RSS;
-	}
-
-	if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
-		nfp_net_coalesce_write_cfg(nn);
-
-		new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
-		update |= NFP_NET_CFG_UPDATE_IRQMOD;
-	}
-
 	/* Step 2: Configure the NFP
 	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
 	 * - Write MAC address (in case it changed)
@@ -1779,43 +1854,9 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	 * - Set the Freelist buffer size
 	 * - Enable the FW
 	 */
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
-
-	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
-		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
-
-	nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
-		  0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
-
-	nfp_net_write_mac_addr(nn, netdev->dev_addr);
-
-	nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu);
-	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
-
-	/* Enable device */
-	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
-	update |= NFP_NET_CFG_UPDATE_GEN;
-	update |= NFP_NET_CFG_UPDATE_MSIX;
-	update |= NFP_NET_CFG_UPDATE_RING;
-	if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
-		new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
-
-	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
-	err = nfp_net_reconfig(nn, update);
+	err = nfp_net_set_config_and_enable(nn);
 	if (err)
-		goto err_clear_config;
-
-	nn->ctrl = new_ctrl;
-
-	/* Since reconfiguration requests while NFP is down are ignored we
-	 * have to wipe the entire VXLAN configuration and reinitialize it.
-	 */
-	if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
-		memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
-		memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
-		vxlan_get_rx_port(netdev);
-	}
+		goto err_free_rings;
 
 	/* Step 3: Enable for kernel
 	 * - put some freelist descriptors on each RX ring
@@ -1823,18 +1864,10 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	 * - enable all TX queues
 	 * - set link state
 	 */
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_start_vec(nn, &nn->r_vecs[r]);
-
-	netif_tx_wake_all_queues(netdev);
-
-	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
-	nfp_net_read_link_status(nn);
+	nfp_net_open_stack(nn);
 
 	return 0;
 
-err_clear_config:
-	nfp_net_clear_config_and_disable(nn);
 err_free_rings:
 	r = nn->num_r_vecs;
 err_free_prev_vecs:
@@ -1858,36 +1891,31 @@ err_free_exn:
 }
 
 /**
- * nfp_net_netdev_close() - Called when the device is downed
- * @netdev:      netdev structure
+ * nfp_net_close_stack() - Quiescent the stack (part of close)
+ * @nn:	     NFP Net device to reconfigure
  */
-static int nfp_net_netdev_close(struct net_device *netdev)
+static void nfp_net_close_stack(struct nfp_net *nn)
 {
-	struct nfp_net *nn = netdev_priv(netdev);
-	int r;
-
-	if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
-		nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
-		return 0;
-	}
+	unsigned int r;
 
-	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
-	 */
 	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
-	netif_carrier_off(netdev);
+	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
 	for (r = 0; r < nn->num_r_vecs; r++)
 		napi_disable(&nn->r_vecs[r].napi);
 
-	netif_tx_disable(netdev);
+	netif_tx_disable(nn->netdev);
+}
 
-	/* Step 2: Tell NFP
-	 */
-	nfp_net_clear_config_and_disable(nn);
+/**
+ * nfp_net_close_free_all() - Free all runtime resources
+ * @nn:      NFP Net device to reconfigure
+ */
+static void nfp_net_close_free_all(struct nfp_net *nn)
+{
+	unsigned int r;
 
-	/* Step 3: Free resources
-	 */
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
 		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
@@ -1902,6 +1930,32 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
+}
+
+/**
+ * nfp_net_netdev_close() - Called when the device is downed
+ * @netdev:      netdev structure
+ */
+static int nfp_net_netdev_close(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
+		nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
+		return 0;
+	}
+
+	/* Step 1: Disable RX and TX rings from the Linux kernel perspective
+	 */
+	nfp_net_close_stack(nn);
+
+	/* Step 2: Tell NFP
+	 */
+	nfp_net_clear_config_and_disable(nn);
+
+	/* Step 3: Free resources
+	 */
+	nfp_net_close_free_all(nn);
 
 	nn_dbg(nn, "%s down", netdev->name);
 	return 0;
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 11/15] nfp: sync ring state during FW reconfiguration
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

FW reconfiguration in .ndo_open()/.ndo_stop() should reset/
restore queue state.  Since we need IRQs to be disabled when
filling rings on RX path we have to move disable_irq() from
.ndo_open() all the way up to IRQ allocation.

nfp_net_start_vec() becomes trivial now so it's inlined.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 45 ++++++++--------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 6c1ed8914416..ed23b9d348c3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1519,6 +1519,7 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 		nn_err(nn, "Error requesting IRQ %d\n", entry->vector);
 		return err;
 	}
+	disable_irq(entry->vector);
 
 	/* Setup NAPI */
 	netif_napi_add(nn->netdev, &r_vec->napi,
@@ -1647,13 +1648,14 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 
 	nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
 	err = nfp_net_reconfig(nn, update);
-	if (err) {
+	if (err)
 		nn_err(nn, "Could not disable device: %d\n", err);
-		return;
-	}
 
-	for (r = 0; r < nn->num_r_vecs; r++)
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
+		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
 		nfp_net_vec_clear_ring_data(nn, r);
+	}
 
 	nn->ctrl = new_ctrl;
 }
@@ -1721,6 +1723,9 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 
 	nn->ctrl = new_ctrl;
 
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring);
+
 	/* Since reconfiguration requests while NFP is down are ignored we
 	 * have to wipe the entire VXLAN configuration and reinitialize it.
 	 */
@@ -1749,26 +1754,6 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
 }
 
 /**
- * nfp_net_start_vec() - Start ring vector
- * @nn:      NFP Net device structure
- * @r_vec:   Ring vector to be started
- */
-static void
-nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
-{
-	unsigned int irq_vec;
-
-	irq_vec = nn->irq_entries[r_vec->irq_idx].vector;
-
-	disable_irq(irq_vec);
-
-	nfp_net_rx_ring_fill_freelist(r_vec->rx_ring);
-	napi_enable(&r_vec->napi);
-
-	enable_irq(irq_vec);
-}
-
-/**
  * nfp_net_open_stack() - Start the device from stack's perspective
  * @nn:      NFP Net device to reconfigure
  */
@@ -1776,8 +1761,10 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_start_vec(nn, &nn->r_vecs[r]);
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		napi_enable(&nn->r_vecs[r].napi);
+		enable_irq(nn->irq_entries[nn->r_vecs[r].irq_idx].vector);
+	}
 
 	netif_tx_wake_all_queues(nn->netdev);
 
@@ -1902,8 +1889,10 @@ static void nfp_net_close_stack(struct nfp_net *nn)
 	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
-	for (r = 0; r < nn->num_r_vecs; r++)
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		disable_irq(nn->irq_entries[nn->r_vecs[r].irq_idx].vector);
 		napi_disable(&nn->r_vecs[r].napi);
+	}
 
 	netif_tx_disable(nn->netdev);
 }
@@ -1917,9 +1906,7 @@ static void nfp_net_close_free_all(struct nfp_net *nn)
 	unsigned int r;
 
 	for (r = 0; r < nn->num_r_vecs; r++) {
-		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
 		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
 		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
 		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
 		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 12/15] nfp: propagate list buffer size in struct rx_ring
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Free list buffer size needs to be propagated to few functions
as a parameter and added to struct nfp_net_rx_ring since soon
some of the functions will be reused to manage rings with
buffers of size different than nn->fl_bufsz.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  3 +++
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 24 ++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index fc005c982b7d..9ab8e3967dc9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -298,6 +298,8 @@ struct nfp_net_rx_buf {
  * @rxds:       Virtual address of FL/RX ring in host memory
  * @dma:        DMA address of the FL/RX ring
  * @size:       Size, in bytes, of the FL/RX ring (needed to free)
+ * @bufsz:	Buffer allocation size for convenience of management routines
+ *		(NOTE: this is in second cache line, do not use on fast path!)
  */
 struct nfp_net_rx_ring {
 	struct nfp_net_r_vector *r_vec;
@@ -319,6 +321,7 @@ struct nfp_net_rx_ring {
 
 	dma_addr_t dma;
 	unsigned int size;
+	unsigned int bufsz;
 } ____cacheline_aligned;
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index ed23b9d348c3..03c60f755de0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -957,25 +957,27 @@ static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_alloc_one() - Allocate and map skb for RX
  * @rx_ring:	RX ring structure of the skb
  * @dma_addr:	Pointer to storage for DMA address (output param)
+ * @fl_bufsz:	size of freelist buffers
  *
  * This function will allcate a new skb, map it for DMA.
  *
  * Return: allocated skb or NULL on failure.
  */
 static struct sk_buff *
-nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr)
+nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
+		     unsigned int fl_bufsz)
 {
 	struct nfp_net *nn = rx_ring->r_vec->nfp_net;
 	struct sk_buff *skb;
 
-	skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz);
+	skb = netdev_alloc_skb(nn->netdev, fl_bufsz);
 	if (!skb) {
 		nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n");
 		return NULL;
 	}
 
 	*dma_addr = dma_map_single(&nn->pdev->dev, skb->data,
-				  nn->fl_bufsz, DMA_FROM_DEVICE);
+				   fl_bufsz, DMA_FROM_DEVICE);
 	if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
 		dev_kfree_skb_any(skb);
 		nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
@@ -1068,7 +1070,7 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 			continue;
 
 		dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr,
-				 nn->fl_bufsz, DMA_FROM_DEVICE);
+				 rx_ring->bufsz, DMA_FROM_DEVICE);
 		dev_kfree_skb_any(rx_ring->rxbufs[i].skb);
 		rx_ring->rxbufs[i].dma_addr = 0;
 		rx_ring->rxbufs[i].skb = NULL;
@@ -1090,7 +1092,8 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 
 	for (i = 0; i < rx_ring->cnt - 1; i++) {
 		rxbufs[i].skb =
-			nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr);
+			nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
+					     rx_ring->bufsz);
 		if (!rxbufs[i].skb) {
 			nfp_net_rx_ring_bufs_free(nn, rx_ring);
 			return -ENOMEM;
@@ -1278,7 +1281,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 		skb = rx_ring->rxbufs[idx].skb;
 
-		new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr);
+		new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr,
+					       nn->fl_bufsz);
 		if (!new_skb) {
 			nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb,
 					    rx_ring->rxbufs[idx].dma_addr);
@@ -1465,10 +1469,12 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 /**
  * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
  * @rx_ring:  RX ring to allocate
+ * @fl_bufsz: Size of buffers to allocate
  *
  * Return: 0 on success, negative errno otherwise.
  */
-static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring)
+static int
+nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz)
 {
 	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
@@ -1476,6 +1482,7 @@ static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring)
 	int sz;
 
 	rx_ring->cnt = nn->rxd_cnt;
+	rx_ring->bufsz = fl_bufsz;
 
 	rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
 	rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size,
@@ -1817,7 +1824,8 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		if (err)
 			goto err_cleanup_vec_p;
 
-		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring);
+		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring,
+					    nn->fl_bufsz);
 		if (err)
 			goto err_free_tx_ring_p;
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 13/15] nfp: convert .ndo_change_mtu() to prepare/commit paradigm
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

When changing MTU on running device first allocate new rings
and buffers and once it succeeds proceed with changing MTU.

Allocation of new rings is not really necessary for this
operation - it's done to keep the code simple and because
size of the extra ring memory is quite small compared to
the size of buffers.

Operation can still fail midway through if FW communication
times out.  In that case we retry with old MTU (rings).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 108 +++++++++++++++++++--
 1 file changed, 102 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 03c60f755de0..e7c420fdcb0d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1506,6 +1506,64 @@ err_alloc:
 	return -ENOMEM;
 }
 
+static struct nfp_net_rx_ring *
+nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz)
+{
+	struct nfp_net_rx_ring *rings;
+	unsigned int r;
+
+	rings = kcalloc(nn->num_rx_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings)
+		return NULL;
+
+	for (r = 0; r < nn->num_rx_rings; r++) {
+		nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r);
+
+		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz))
+			goto err_free_prev;
+
+		if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r]))
+			goto err_free_ring;
+	}
+
+	return rings;
+
+err_free_prev:
+	while (r--) {
+		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+err_free_ring:
+		nfp_net_rx_ring_free(&rings[r]);
+	}
+	kfree(rings);
+	return NULL;
+}
+
+static struct nfp_net_rx_ring *
+nfp_net_shadow_rx_rings_swap(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+{
+	struct nfp_net_rx_ring *old = nn->rx_rings;
+	unsigned int r;
+
+	for (r = 0; r < nn->num_rx_rings; r++)
+		old[r].r_vec->rx_ring = &rings[r];
+
+	nn->rx_rings = rings;
+	return old;
+}
+
+static void
+nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+{
+	unsigned int r;
+
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+		nfp_net_rx_ring_free(&rings[r]);
+	}
+
+	kfree(rings);
+}
+
 static int
 nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 		       int idx)
@@ -1984,23 +2042,61 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
 
 static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 {
+	unsigned int old_mtu, old_fl_bufsz, new_fl_bufsz;
 	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_rx_ring *tmp_rings;
+	int err;
 
 	if (new_mtu < 68 || new_mtu > nn->max_mtu) {
 		nn_err(nn, "New MTU (%d) is not valid\n", new_mtu);
 		return -EINVAL;
 	}
 
+	old_mtu = netdev->mtu;
+	old_fl_bufsz = nn->fl_bufsz;
+	new_fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu;
+
+	if (!netif_running(netdev)) {
+		netdev->mtu = new_mtu;
+		nn->fl_bufsz = new_fl_bufsz;
+		return 0;
+	}
+
+	/* Prepare new rings */
+	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz);
+	if (!tmp_rings)
+		return -ENOMEM;
+
+	/* Stop device, swap in new rings, try to start the firmware */
+	nfp_net_close_stack(nn);
+	nfp_net_clear_config_and_disable(nn);
+
+	tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+
 	netdev->mtu = new_mtu;
-	nn->fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu;
+	nn->fl_bufsz = new_fl_bufsz;
+
+	err = nfp_net_set_config_and_enable(nn);
+	if (err) {
+		const int err_new = err;
+
+		/* Try with old configuration and old rings */
+		tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+
+		netdev->mtu = old_mtu;
+		nn->fl_bufsz = old_fl_bufsz;
 
-	/* restart if running */
-	if (netif_running(netdev)) {
-		nfp_net_netdev_close(netdev);
-		nfp_net_netdev_open(netdev);
+		err = __nfp_net_set_config_and_enable(nn);
+		if (err)
+			nn_err(nn, "Can't restore MTU - FW communication failed (%d,%d)\n",
+			       err_new, err);
 	}
 
-	return 0;
+	nfp_net_shadow_rx_rings_free(nn, tmp_rings);
+
+	nfp_net_open_stack(nn);
+
+	return err;
 }
 
 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 14/15] nfp: pass ring count as function parameter
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Soon ring resize will call this functions with values
different than the current configuration we need to
explicitly pass the ring count as parameter.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 23 +++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index e7c420fdcb0d..c4f0c70e77ce 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1407,17 +1407,18 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
 /**
  * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
  * @tx_ring:   TX Ring structure to allocate
+ * @cnt:       Ring buffer count
  *
  * Return: 0 on success, negative errno otherwise.
  */
-static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring)
+static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt)
 {
 	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct pci_dev *pdev = nn->pdev;
 	int sz;
 
-	tx_ring->cnt = nn->txd_cnt;
+	tx_ring->cnt = cnt;
 
 	tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
 	tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
@@ -1470,18 +1471,20 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
  * @rx_ring:  RX ring to allocate
  * @fl_bufsz: Size of buffers to allocate
+ * @cnt:      Ring buffer count
  *
  * Return: 0 on success, negative errno otherwise.
  */
 static int
-nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz)
+nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz,
+		      u32 cnt)
 {
 	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct pci_dev *pdev = nn->pdev;
 	int sz;
 
-	rx_ring->cnt = nn->rxd_cnt;
+	rx_ring->cnt = cnt;
 	rx_ring->bufsz = fl_bufsz;
 
 	rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
@@ -1507,7 +1510,8 @@ err_alloc:
 }
 
 static struct nfp_net_rx_ring *
-nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz)
+nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz,
+				u32 buf_cnt)
 {
 	struct nfp_net_rx_ring *rings;
 	unsigned int r;
@@ -1519,7 +1523,7 @@ nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz)
 	for (r = 0; r < nn->num_rx_rings; r++) {
 		nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r);
 
-		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz))
+		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, buf_cnt))
 			goto err_free_prev;
 
 		if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r]))
@@ -1878,12 +1882,12 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		if (err)
 			goto err_free_prev_vecs;
 
-		err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring);
+		err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt);
 		if (err)
 			goto err_cleanup_vec_p;
 
 		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring,
-					    nn->fl_bufsz);
+					    nn->fl_bufsz, nn->rxd_cnt);
 		if (err)
 			goto err_free_tx_ring_p;
 
@@ -2063,7 +2067,8 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 	}
 
 	/* Prepare new rings */
-	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz);
+	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz,
+						    nn->rxd_cnt);
 	if (!tmp_rings)
 		return -ENOMEM;
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH v5 net-next 15/15] nfp: allow ring size reconfiguration at runtime
From: Jakub Kicinski @ 2016-04-07 18:39 UTC (permalink / raw)
  To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460054388-471-1-git-send-email-jakub.kicinski@netronome.com>

Since much of the required changes have already been made for
changing MTU at runtime let's use it for ring size changes as
well.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |   1 +
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 126 +++++++++++++++++++++
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |  30 ++---
 3 files changed, 136 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 9ab8e3967dc9..3d53fcf323eb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -724,6 +724,7 @@ void nfp_net_rss_write_key(struct nfp_net *nn);
 void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
 int nfp_net_irqs_alloc(struct nfp_net *nn);
 void nfp_net_irqs_disable(struct nfp_net *nn);
+int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt);
 
 #ifdef CONFIG_NFP_NET_DEBUG
 void nfp_net_debugfs_create(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index c4f0c70e77ce..0bdff390c958 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1444,6 +1444,59 @@ err_alloc:
 	return -ENOMEM;
 }
 
+static struct nfp_net_tx_ring *
+nfp_net_shadow_tx_rings_prepare(struct nfp_net *nn, u32 buf_cnt)
+{
+	struct nfp_net_tx_ring *rings;
+	unsigned int r;
+
+	rings = kcalloc(nn->num_tx_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings)
+		return NULL;
+
+	for (r = 0; r < nn->num_tx_rings; r++) {
+		nfp_net_tx_ring_init(&rings[r], nn->tx_rings[r].r_vec, r);
+
+		if (nfp_net_tx_ring_alloc(&rings[r], buf_cnt))
+			goto err_free_prev;
+	}
+
+	return rings;
+
+err_free_prev:
+	while (r--)
+		nfp_net_tx_ring_free(&rings[r]);
+	kfree(rings);
+	return NULL;
+}
+
+static struct nfp_net_tx_ring *
+nfp_net_shadow_tx_rings_swap(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+{
+	struct nfp_net_tx_ring *old = nn->tx_rings;
+	unsigned int r;
+
+	for (r = 0; r < nn->num_tx_rings; r++)
+		old[r].r_vec->tx_ring = &rings[r];
+
+	nn->tx_rings = rings;
+	return old;
+}
+
+static void
+nfp_net_shadow_tx_rings_free(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+{
+	unsigned int r;
+
+	if (!rings)
+		return;
+
+	for (r = 0; r < nn->num_tx_rings; r++)
+		nfp_net_tx_ring_free(&rings[r]);
+
+	kfree(rings);
+}
+
 /**
  * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
  * @rx_ring:  RX ring to free
@@ -1560,6 +1613,9 @@ nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
 {
 	unsigned int r;
 
+	if (!rings)
+		return;
+
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
 		nfp_net_rx_ring_free(&rings[r]);
@@ -2104,6 +2160,76 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 	return err;
 }
 
+int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
+{
+	struct nfp_net_tx_ring *tx_rings = NULL;
+	struct nfp_net_rx_ring *rx_rings = NULL;
+	u32 old_rxd_cnt, old_txd_cnt;
+	int err;
+
+	if (!netif_running(nn->netdev)) {
+		nn->rxd_cnt = rxd_cnt;
+		nn->txd_cnt = txd_cnt;
+		return 0;
+	}
+
+	old_rxd_cnt = nn->rxd_cnt;
+	old_txd_cnt = nn->txd_cnt;
+
+	/* Prepare new rings */
+	if (nn->rxd_cnt != rxd_cnt) {
+		rx_rings = nfp_net_shadow_rx_rings_prepare(nn, nn->fl_bufsz,
+							   rxd_cnt);
+		if (!rx_rings)
+			return -ENOMEM;
+	}
+	if (nn->txd_cnt != txd_cnt) {
+		tx_rings = nfp_net_shadow_tx_rings_prepare(nn, txd_cnt);
+		if (!tx_rings) {
+			nfp_net_shadow_rx_rings_free(nn, rx_rings);
+			return -ENOMEM;
+		}
+	}
+
+	/* Stop device, swap in new rings, try to start the firmware */
+	nfp_net_close_stack(nn);
+	nfp_net_clear_config_and_disable(nn);
+
+	if (rx_rings)
+		rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
+	if (tx_rings)
+		tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
+
+	nn->rxd_cnt = rxd_cnt;
+	nn->txd_cnt = txd_cnt;
+
+	err = nfp_net_set_config_and_enable(nn);
+	if (err) {
+		const int err_new = err;
+
+		/* Try with old configuration and old rings */
+		if (rx_rings)
+			rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
+		if (tx_rings)
+			tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
+
+		nn->rxd_cnt = old_rxd_cnt;
+		nn->txd_cnt = old_txd_cnt;
+
+		err = __nfp_net_set_config_and_enable(nn);
+		if (err)
+			nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
+			       err_new, err);
+	}
+
+	nfp_net_shadow_rx_rings_free(nn, rx_rings);
+	nfp_net_shadow_tx_rings_free(nn, tx_rings);
+
+	nfp_net_open_stack(nn);
+
+	return err;
+}
+
 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
 						struct rtnl_link_stats64 *stats)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 9a4084a68db5..ccfef1f17627 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -153,37 +153,25 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	struct nfp_net *nn = netdev_priv(netdev);
 	u32 rxd_cnt, txd_cnt;
 
-	if (netif_running(netdev)) {
-		/* Some NIC drivers allow reconfiguration on the fly,
-		 * some down the interface, change and then up it
-		 * again.  For now we don't allow changes when the
-		 * device is up.
-		 */
-		nn_warn(nn, "Can't change rings while device is up\n");
-		return -EBUSY;
-	}
-
 	/* We don't have separate queues/rings for small/large frames. */
 	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
 		return -EINVAL;
 
 	/* Round up to supported values */
 	rxd_cnt = roundup_pow_of_two(ring->rx_pending);
-	rxd_cnt = max_t(u32, rxd_cnt, NFP_NET_MIN_RX_DESCS);
-	rxd_cnt = min_t(u32, rxd_cnt, NFP_NET_MAX_RX_DESCS);
-
 	txd_cnt = roundup_pow_of_two(ring->tx_pending);
-	txd_cnt = max_t(u32, txd_cnt, NFP_NET_MIN_TX_DESCS);
-	txd_cnt = min_t(u32, txd_cnt, NFP_NET_MAX_TX_DESCS);
 
-	if (nn->rxd_cnt != rxd_cnt || nn->txd_cnt != txd_cnt)
-		nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n",
-		       nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt);
+	if (rxd_cnt < NFP_NET_MIN_RX_DESCS || rxd_cnt > NFP_NET_MAX_RX_DESCS ||
+	    txd_cnt < NFP_NET_MIN_TX_DESCS || txd_cnt > NFP_NET_MAX_TX_DESCS)
+		return -EINVAL;
 
-	nn->rxd_cnt = rxd_cnt;
-	nn->txd_cnt = txd_cnt;
+	if (nn->rxd_cnt == rxd_cnt && nn->txd_cnt == txd_cnt)
+		return 0;
 
-	return 0;
+	nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n",
+	       nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt);
+
+	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
 static void nfp_net_get_strings(struct net_device *netdev,
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 1/9] net: mediatek: update the IRQ part of the binding document
From: David Miller @ 2016-04-07 18:50 UTC (permalink / raw)
  To: blogic-p3rKhJxN3npAfugRpC6u6w
  Cc: nbd-p3rKhJxN3npAfugRpC6u6w, matthias.bgg-Re5JQEeQqe8AvxtiuMwx3w,
	sean.wang-NuS5LvNUpcJWk0Htik3J/w, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	devicetree-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1460051876-53135-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>


Every patch series must begin with a postings labelled "[PATCH 0/9] ..."
which explains what the series is doing, how it is implementing that,
and why it is implemented that way.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH 0/2] selinux: avoid nf hooks overhead when not needed
From: Paul Moore @ 2016-04-07 18:55 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Paolo Abeni, linux-security-module, David S. Miller, James Morris,
	Andreas Gruenbacher, Stephen Smalley, netdev, selinux
In-Reply-To: <20160406234532.GA731@breakpoint.cc>

On Thursday, April 07, 2016 01:45:32 AM Florian Westphal wrote:
> Paul Moore <paul@paul-moore.com> wrote:
> > On Wed, Apr 6, 2016 at 6:14 PM, Florian Westphal <fw@strlen.de> wrote:
> > > netfilter hooks are per namespace -- so there is hook unregister when
> > > netns is destroyed.
> > 
> > Looking around, I see the global and per-namespace registration
> > functions (nf_register_hook and nf_register_net_hook, respectively),
> > but I'm looking to see if/how newly created namespace inherit
> > netfilter hooks from the init network namespace ... if you can create
> > a network namespace and dodge the SELinux hooks, that isn't a good
> > thing from a SELinux point of view, although it might be a plus
> > depending on where you view Paolo's original patches ;)
> 
> Heh :-)
> 
> If you use nf_register_net_hook, the hook is only registered in the
> namespace.
> 
> If you use nf_register_hook, the hook is put on a global list and
> registed in all existing namespaces.
> 
> New namespaces will have the hook added as well (see
> netfilter_net_init -> nf_register_hook_list in netfilter/core.c )
>
> Since nf_register_hook is used it should be impossible to get a netns
> that doesn't call these hooks.

Great, thanks.
 
> > > Do you think it makes sense to rework the patch to delay registering
> > > of the netfiler hooks until the system is in a state where they're
> > > needed, without the 'unregister' aspect?
> > 
> > I would need to see the patch to say for certain, but in principle
> > that seems perfectly reasonable and I think would satisfy both the
> > netdev and SELinux camps - good suggestion.  My main goal is to drop
> > the selinux_nf_ip_init() entirely so it can't be used as a ROP gadget.
> > 
> > We might even be able to trim the secmark_active and peerlbl_active
> > checks in the SELinux netfilter hooks (an earlier attempt at
> > optimization; contrary to popular belief, I do care about SELinux
> > performance), although that would mean that enabling the network
> > access controls would be one way ... I guess you can disregard that
> > last bit, I'm thinking aloud again.
> 
> One way is fine I think.

Yes, just disregard my second paragraph above.
 
> > > Ideally this would even be per netns -- in perfect world we would
> > > be able to make it so that a new netns are created with an empty
> > > hook list.
> > 
> > In general SELinux doesn't care about namespaces, for reasons that are
> > sorta beyond the scope of this conversation, so I would like to stick
> > to a all or nothing approach to enabling the SELinux netfilter hooks
> > across namespaces.  Perhaps we can revisit this at a later time, but
> > let's keep it simple right now.
> 
> Okay, I'd prefer to stick to your recommendation anyway wrt. to selinux
> (Casey, I read your comment regarding smack. Noted, we don't want to
> break smack either...)
> 
> I think that in this case the entire question is:
> 
> In your experience, how likely is a config where selinux is enabled BUT the
> hooks are not needed (i.e., where we hit the
> 
> if (!selinux_policycap_netpeer)
>     return NF_ACCEPT;
> 
> if (!secmark_active && !peerlbl_active)
>    return NF_ACCEPT;
> 
> tests inside the hooks)?  If such setups are uncommon we should just
> drop this idea or at least put it on the back burner until the more
> expensive netfilter hooks (conntrack, cough) are out of the way.

A few years ago I would have said that it is relatively uncommon for admins to 
enable the SELinux network access controls; it was typically just 
government/intelligence agencies who had very strict access control 
requirements and represented a small portion of SELinux users.  However, over 
the past few years I've been fielding more and more questions from admins/devs 
in the virtualization space who are interested in some of these capabilities; 
it isn't clear to me how many of these people are switching it on, but there 
is definitely more interest than I have seen in the past and the interested is 
centered around some rather common use cases.

So, to summarize, I don't know ;)

If you've got bigger sources of overhead, my opinion would be to go tackle 
those first.  Perhaps I can even find the time to work on the 
SELinux/netfilter stuff while you are off slaying the bigger dragons, no 
promises at the moment.

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH v6 net-next] net: ipv4: Consider failed nexthops in multipath routes
From: Julian Anastasov @ 2016-04-07 18:58 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev
In-Reply-To: <1460038860-25670-1-git-send-email-dsa@cumulusnetworks.com>


	Hello,

On Thu, 7 Apr 2016, David Ahern wrote:

> Multipath route lookups should consider knowledge about next hops and not
> select a hop that is known to be failed.
> 
> Example:
> 
>                      [h2]                   [h3]   15.0.0.5
>                       |                      |
>                      3|                     3|
>                     [SP1]                  [SP2]--+
>                      1  2                   1     2
>                      |  |     /-------------+     |
>                      |   \   /                    |
>                      |     X                      |
>                      |    / \                     |
>                      |   /   \---------------\    |
>                      1  2                     1   2
>          12.0.0.2  [TOR1] 3-----------------3 [TOR2] 12.0.0.3
>                      4                         4
>                       \                       /
>                         \                    /
>                          \                  /
>                           -------|   |-----/
>                                  1   2
>                                 [TOR3]
>                                   3|
>                                    |
>                                   [h1]  12.0.0.1
> 
> host h1 with IP 12.0.0.1 has 2 paths to host h3 at 15.0.0.5:
> 
>     root@h1:~# ip ro ls
>     ...
>     12.0.0.0/24 dev swp1  proto kernel  scope link  src 12.0.0.1
>     15.0.0.0/16
>             nexthop via 12.0.0.2  dev swp1 weight 1
>             nexthop via 12.0.0.3  dev swp1 weight 1
>     ...
> 
> If the link between tor3 and tor1 is down and the link between tor1
> and tor2 then tor1 is effectively cut-off from h1. Yet the route lookups
> in h1 are alternating between the 2 routes: ping 15.0.0.5 gets one and
> ssh 15.0.0.5 gets the other. Connections that attempt to use the
> 12.0.0.2 nexthop fail since that neighbor is not reachable:
> 
>     root@h1:~# ip neigh show
>     ...
>     12.0.0.3 dev swp1 lladdr 00:02:00:00:00:1b REACHABLE
>     12.0.0.2 dev swp1  FAILED
>     ...
> 
> The failed path can be avoided by considering known neighbor information
> when selecting next hops. If the neighbor lookup fails we have no
> knowledge about the nexthop, so give it a shot. If there is an entry
> then only select the nexthop if the state is sane. This is similar to
> what fib_detect_death does.
> 
> To maintain backward compatibility use of the neighbor information is
> based on a new sysctl, fib_multipath_use_neigh.
> 
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>

Reviewed-by: Julian Anastasov <ja@ssi.bg>

> ---
> v6
> - changed __neigh_lookup_noref to __ipv4_neigh_lookup_noref per Dave's
>   comment
> 
> v5
> - returned comma that got lost in the ether and removed resetting of
>   nhsel at end of loop - again comments from Julian
> 
> v4
> - remove NULL initializer and logic for fallback per Julian's comment
> 
> v3
> - Julian comments: changed use of dead in documentation to failed,
>   init state to NUD_REACHABLE which simplifies fib_good_nh, use of
>   nh_dev for neighbor lookup, fallback to first entry which is what
>   current logic does
> 
> v2
> - use rcu locking to avoid refcnts per Eric's suggestion
> - only consider neighbor info for nh_scope == RT_SCOPE_LINK per Julian's
>   comment
> - drop the 'state == NUD_REACHABLE' from the state check since it is
>   part of NUD_VALID (comment from Julian)
> - wrapped the use of the neigh in a sysctl
> 
>  Documentation/networking/ip-sysctl.txt | 10 ++++++++++
>  include/net/netns/ipv4.h               |  3 +++
>  net/ipv4/fib_semantics.c               | 34 +++++++++++++++++++++++++++++-----
>  net/ipv4/sysctl_net_ipv4.c             | 11 +++++++++++
>  4 files changed, 53 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
> index b183e2b606c8..6c7f365b1515 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -63,6 +63,16 @@ fwmark_reflect - BOOLEAN
>  	fwmark of the packet they are replying to.
>  	Default: 0
>  
> +fib_multipath_use_neigh - BOOLEAN
> +	Use status of existing neighbor entry when determining nexthop for
> +	multipath routes. If disabled, neighbor information is not used and
> +	packets could be directed to a failed nexthop. Only valid for kernels
> +	built with CONFIG_IP_ROUTE_MULTIPATH enabled.
> +	Default: 0 (disabled)
> +	Possible values:
> +	0 - disabled
> +	1 - enabled
> +
>  route/max_size - INTEGER
>  	Maximum number of routes allowed in the kernel.  Increase
>  	this when using large numbers of interfaces and/or routes.
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index a69cde3ce460..d061ffeb1e71 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -133,6 +133,9 @@ struct netns_ipv4 {
>  	struct fib_rules_ops	*mr_rules_ops;
>  #endif
>  #endif
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> +	int sysctl_fib_multipath_use_neigh;
> +#endif
>  	atomic_t	rt_genid;
>  };
>  #endif
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index d97268e8ff10..ab64d9f2eef9 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -1559,21 +1559,45 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
>  }
>  
>  #ifdef CONFIG_IP_ROUTE_MULTIPATH
> +static bool fib_good_nh(const struct fib_nh *nh)
> +{
> +	int state = NUD_REACHABLE;
> +
> +	if (nh->nh_scope == RT_SCOPE_LINK) {
> +		struct neighbour *n;
> +
> +		rcu_read_lock_bh();
> +
> +		n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
> +		if (n)
> +			state = n->nud_state;
> +
> +		rcu_read_unlock_bh();
> +	}
> +
> +	return !!(state & NUD_VALID);
> +}
>  
>  void fib_select_multipath(struct fib_result *res, int hash)
>  {
>  	struct fib_info *fi = res->fi;
> +	struct net *net = fi->fib_net;
> +	bool first = false;
>  
>  	for_nexthops(fi) {
>  		if (hash > atomic_read(&nh->nh_upper_bound))
>  			continue;
>  
> -		res->nh_sel = nhsel;
> -		return;
> +		if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
> +		    fib_good_nh(nh)) {
> +			res->nh_sel = nhsel;
> +			return;
> +		}
> +		if (!first) {
> +			res->nh_sel = nhsel;
> +			first = true;
> +		}
>  	} endfor_nexthops(fi);
> -
> -	/* Race condition: route has just become dead. */
> -	res->nh_sel = 0;
>  }
>  #endif
>  
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 1e1fe6086dd9..bb0419582b8d 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -960,6 +960,17 @@ static struct ctl_table ipv4_net_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec,
>  	},
> +#ifdef CONFIG_IP_ROUTE_MULTIPATH
> +	{
> +		.procname	= "fib_multipath_use_neigh",
> +		.data		= &init_net.ipv4.sysctl_fib_multipath_use_neigh,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &one,
> +	},
> +#endif
>  	{ }
>  };
>  
> -- 
> 2.1.4

Regards

^ permalink raw reply

* Re: [RFC PATCH net 3/4] ipv6: datagram: Update dst cache of a connected datagram sk during pmtu update
From: Martin KaFai Lau @ 2016-04-07 19:09 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, Eric Dumazet, Wei Wang, Kernel Team
In-Reply-To: <CAM_iQpXbOj_zuQqHUie_yBOXqBEehHhF2FU8FA+tr7hgYY4QZg@mail.gmail.com>

On Thu, Apr 07, 2016 at 11:37:10AM -0700, Cong Wang wrote:
> You are lost in discussion
Indeed. :(

>
> I still think it is okay without the lock, because even if you take the lock,
> the pmtu update could still happen after you release it, so there is no
> essential difference here. The only reason I can think of for taking
> the sock lock is protecting parallel pmtu update, but it looks safe for
> this case too.
>
> So which case do you want to protect by taking the sock lock?
When the pmtu-update is doing route lookup and another connect is
happening, what sk->sk_v6_daddr will this route lookup use?
the old one, new one or neither of them?

Is it acceptable that getsockopt() is returning something that it
is not currently connected to? and potentially somewhere that it
is never connected to?

^ permalink raw reply

* [RFC PATCH] possible bug in handling of ipv4 route caching
From: Chris Friesen @ 2016-04-07 19:17 UTC (permalink / raw)
  To: netdev

Hi,

We think we may have found a bug in the handling of ipv4 route caching,
and are curious what you think.

For local routes that require a particular output interface we do not
want to cache the result.  Caching the result causes incorrect behaviour
when there are multiple source addresses on the interface.  The end
result being that if the intended recipient is waiting on that interface
for the packet he won't receive it because it will be delivered on the
loopback interface and the IP_PKTINFO ipi_ifindex will be set to the
loopback interface as well.

This can be tested by running a program such as "dhcp_release" which
attempts to inject a packet on a particular interface so that it is
received by another program on the same board.  The receiving process
should see an IP_PKTINFO ipi_ifndex value of the source interface
(e.g., eth1) instead of the loopback interface (e.g., lo).  The packet
will still appear on the loopback interface in tcpdump but the important
aspect is that the CMSG info is correct.

For what it's worth, here's a patch that we've applied locally to deal
with the issue.

Chris



Signed-off-by: Allain Legacy <allain.legacy@windriver.com>
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02c6229..e965d4b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2045,6 +2045,17 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		 */
 		if (fi && res->prefixlen < 4)
 			fi = NULL;
+	} else if ((type == RTN_LOCAL) && (orig_oif != 0)) {
+		/* For local routes that require a particular output interface
+                 * we do not want to cache the result.  Caching the result
+                 * causes incorrect behaviour when there are multiple source
+                 * addresses on the interface, the end result being that if the
+                 * intended recipient is waiting on that interface for the
+                 * packet he won't receive it because it will be delivered on
+                 * the loopback interface and the IP_PKTINFO ipi_ifindex will
+                 * be set to the loopback interface as well.
+		 */
+		fi = NULL;
 	}
 
 	fnhe = NULL;

^ permalink raw reply related

* [PATCH V2 0/8] net: mediatek: make the driver pass stress tests
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin

While testing the driver we managed to get the TX path to stall and fail
to recover. When dual MAC support was added to the driver, the whole queue
stop/wake code was not properly adapted. There was also a regression in the
locking of the xmit function. The fact that watchdog_timeo was not set and
that the tx_timeout code failed to properly reset the dma, irq and queue
just made the mess complete.

This series make the driver pass stress testing. With this series applied
the testbed has been running for several days and still has not locked up.
We have a second setup that has a small hack patch applied to randomly stop
irqs and/or one of the queues and successfully manages to recover from these
simulated tx stalls.

John Crispin (8):
  net: mediatek: watchdog_timeo was not set
  net: mediatek: mtk_cal_txd_req() returns bad value
  net: mediatek: remove superfluous reset call
  net: mediatek: fix stop and wakeup of queue
  net: mediatek: fix mtk_pending_work
  net: mediatek: fix TX locking
  net: mediatek: move the pending_work struct to the device generic
    struct
  net: mediatek: do not set the QID field in the TX DMA descriptors

 drivers/net/ethernet/mediatek/mtk_eth_soc.c |  106 ++++++++++++++++-----------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |    4 +-
 2 files changed, 66 insertions(+), 44 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* [PATCH V2 1/8] net: mediatek: watchdog_timeo was not set
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, Matthias Brugger,
	Sean Wang (王志亘), netdev, linux-mediatek,
	linux-kernel, John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic@openwrt.org>

The original commit failed to set watchdog_timeo. This patch sets
watchdog_timeo to HZ.

Signed-off-by: John Crispin <blogic@openwrt.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e0b68af..bb10d57 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1645,6 +1645,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
+	eth->netdev[id]->watchdog_timeo = HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 2/8] net: mediatek: mtk_cal_txd_req() returns bad value
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

The code used to also support the PDMA engine, which had 2 packet pointers
per descriptor. Because of this we had to divide the result by 2 and round
it up. This is no longer needed as the code only supports QDMA.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index bb10d57..94cceb8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -681,7 +681,7 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 		nfrags += skb_shinfo(skb)->nr_frags;
 	}
 
-	return DIV_ROUND_UP(nfrags, 2);
+	return nfrags;
 }
 
 static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 3/8] net: mediatek: remove superfluous reset call
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

HW reset is triggered in the mtk_hw_init() function. There is no need to
also reset the core during probe.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 94cceb8..a4982e4 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1679,10 +1679,6 @@ static int mtk_probe(struct platform_device *pdev)
 	struct mtk_eth *eth;
 	int err;
 
-	err = device_reset(&pdev->dev);
-	if (err)
-		return err;
-
 	match = of_match_device(of_mtk_match, &pdev->dev);
 	soc = (struct mtk_soc_data *)match->data;
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 4/8] net: mediatek: fix stop and wakeup of queue
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

The driver supports 2 MACs. Both run on the same DMA ring. If we go
above/below the TX rings threshold value, we always need to wake/stop
the queue of both devices. Not doing to can cause TX stalls and packet
drops on one of the devices.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   37 +++++++++++++++++++--------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index a4982e4..4ebc42e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -684,6 +684,28 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static void mtk_wake_queue(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		netif_wake_queue(eth->netdev[i]);
+	}
+}
+
+static void mtk_stop_queue(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		netif_stop_queue(eth->netdev[i]);
+	}
+}
+
 static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -695,7 +717,7 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
-		netif_stop_queue(dev);
+		mtk_stop_queue(eth);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
 		return NETDEV_TX_BUSY;
@@ -720,10 +742,10 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 
 	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
-		netif_stop_queue(dev);
+		mtk_stop_queue(eth);
 		if (unlikely(atomic_read(&ring->free_count) >
 			     ring->thresh))
-			netif_wake_queue(dev);
+			mtk_wake_queue(eth);
 	}
 
 	return NETDEV_TX_OK;
@@ -897,13 +919,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 	if (!total)
 		return 0;
 
-	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->netdev[i] ||
-		    unlikely(!netif_queue_stopped(eth->netdev[i])))
-			continue;
-		if (atomic_read(&ring->free_count) > ring->thresh)
-			netif_wake_queue(eth->netdev[i]);
-	}
+	if (atomic_read(&ring->free_count) > ring->thresh)
+		mtk_wake_queue(eth);
 
 	return total;
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 5/8] net: mediatek: fix mtk_pending_work
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

The driver supports 2 MACs. Both run on the same DMA ring. If we hit a TX
timeout we need to stop both netdevs before restarting them again. If we
don't do this, mtk_stop() wont shutdown DMA and the consecutive call to
mtk_open() wont restart DMA and enable IRQs.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   31 ++++++++++++++++++---------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4ebc42e..60b66ab 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1430,19 +1430,30 @@ static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static void mtk_pending_work(struct work_struct *work)
 {
-	struct mtk_mac *mac = container_of(work, struct mtk_mac, pending_work);
-	struct mtk_eth *eth = mac->hw;
-	struct net_device *dev = eth->netdev[mac->id];
-	int err;
+	struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
+	int err, i;
+	unsigned long restart = 0;
 
 	rtnl_lock();
-	mtk_stop(dev);
 
-	err = mtk_open(dev);
-	if (err) {
-		netif_alert(eth, ifup, dev,
-			    "Driver up/down cycle failed, closing device.\n");
-		dev_close(dev);
+	/* stop all devices to make sure that dma is properly shut down */
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!netif_oper_up(eth->netdev[i]))
+			continue;
+		mtk_stop(eth->netdev[i]);
+		__set_bit(i, &restart);
+	}
+
+	/* restart DMA and enable IRQs */
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!test_bit(i, &restart))
+			continue;
+		err = mtk_open(eth->netdev[i]);
+		if (err) {
+			netif_alert(eth, ifup, eth->netdev[i],
+			      "Driver up/down cycle failed, closing device.\n");
+			dev_close(eth->netdev[i]);
+		}
 	}
 	rtnl_unlock();
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 6/8] net: mediatek: fix TX locking
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>

Inside the TX path there is a lock inside the tx_map function. This is
however too late. The patch moves the lock to the start of the xmit
function right before the free count check of the DMA ring happens.
If we do not do this, the code becomes racy leading to TX stalls and
dropped packets. This happens as there are 2 netdevs running on the
same physical DMA ring.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 60b66ab..8434355 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -536,7 +536,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_dma *itxd, *txd;
 	struct mtk_tx_buf *tx_buf;
-	unsigned long flags;
 	dma_addr_t mapped_addr;
 	unsigned int nr_frags;
 	int i, n_desc = 1;
@@ -568,11 +567,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
 		return -ENOMEM;
 
-	/* normally we can rely on the stack not calling this more than once,
-	 * however we have 2 queues running ont he same ring so we need to lock
-	 * the ring access
-	 */
-	spin_lock_irqsave(&eth->page_lock, flags);
 	WRITE_ONCE(itxd->txd1, mapped_addr);
 	tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
 	dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
@@ -632,8 +626,6 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
 				(!nr_frags * TX_DMA_LS0)));
 
-	spin_unlock_irqrestore(&eth->page_lock, flags);
-
 	netdev_sent_queue(dev, skb->len);
 	skb_tx_timestamp(skb);
 
@@ -661,8 +653,6 @@ err_dma:
 		itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
 	} while (itxd != txd);
 
-	spin_unlock_irqrestore(&eth->page_lock, flags);
-
 	return -ENOMEM;
 }
 
@@ -712,14 +702,22 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct net_device_stats *stats = &dev->stats;
+	unsigned long flags;
 	bool gso = false;
 	int tx_num;
 
+	/* normally we can rely on the stack not calling this more than once,
+	 * however we have 2 queues running ont he same ring so we need to lock
+	 * the ring access
+	 */
+	spin_lock_irqsave(&eth->page_lock, flags);
+
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
 		mtk_stop_queue(eth);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
+		spin_unlock_irqrestore(&eth->page_lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -747,10 +745,12 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			     ring->thresh))
 			mtk_wake_queue(eth);
 	}
+	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;
 
 drop:
+	spin_unlock_irqrestore(&eth->page_lock, flags);
 	stats->tx_dropped++;
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 7/8] net: mediatek: move the pending_work struct to the device generic struct
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, Matthias Brugger,
	Sean Wang (王志亘), netdev, linux-mediatek,
	linux-kernel, John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic@openwrt.org>

The worker always touches both netdevs. It is ethernet core and not MAC
specific. We only need one worker, which belongs into the ethernets core
struct.

Signed-off-by: John Crispin <blogic@openwrt.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |   10 ++++------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |    4 ++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8434355..f9f8851 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1193,7 +1193,7 @@ static void mtk_tx_timeout(struct net_device *dev)
 	eth->netdev[mac->id]->stats.tx_errors++;
 	netif_err(eth, tx_err, dev,
 		  "transmit timed out\n");
-	schedule_work(&mac->pending_work);
+	schedule_work(&eth->pending_work);
 }
 
 static irqreturn_t mtk_handle_irq(int irq, void *_eth)
@@ -1438,7 +1438,7 @@ static void mtk_pending_work(struct work_struct *work)
 
 	/* stop all devices to make sure that dma is properly shut down */
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!netif_oper_up(eth->netdev[i]))
+		if (!eth->netdev[i])
 			continue;
 		mtk_stop(eth->netdev[i]);
 		__set_bit(i, &restart);
@@ -1463,15 +1463,13 @@ static int mtk_cleanup(struct mtk_eth *eth)
 	int i;
 
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		struct mtk_mac *mac = netdev_priv(eth->netdev[i]);
-
 		if (!eth->netdev[i])
 			continue;
 
 		unregister_netdev(eth->netdev[i]);
 		free_netdev(eth->netdev[i]);
-		cancel_work_sync(&mac->pending_work);
 	}
+	cancel_work_sync(&eth->pending_work);
 
 	return 0;
 }
@@ -1659,7 +1657,6 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->id = id;
 	mac->hw = eth;
 	mac->of_node = np;
-	INIT_WORK(&mac->pending_work, mtk_pending_work);
 
 	mac->hw_stats = devm_kzalloc(eth->dev,
 				     sizeof(*mac->hw_stats),
@@ -1761,6 +1758,7 @@ static int mtk_probe(struct platform_device *pdev)
 
 	eth->dev = &pdev->dev;
 	eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
+	INIT_WORK(&eth->pending_work, mtk_pending_work);
 
 	err = mtk_hw_init(eth);
 	if (err)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 48a5292..eed626d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -363,6 +363,7 @@ struct mtk_rx_ring {
  * @clk_gp1:		The gmac1 clock
  * @clk_gp2:		The gmac2 clock
  * @mii_bus:		If there is a bus we need to create an instance for it
+ * @pending_work:	The workqueue used to reset the dma ring
  */
 
 struct mtk_eth {
@@ -389,6 +390,7 @@ struct mtk_eth {
 	struct clk			*clk_gp1;
 	struct clk			*clk_gp2;
 	struct mii_bus			*mii_bus;
+	struct work_struct		pending_work;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
@@ -398,7 +400,6 @@ struct mtk_eth {
  * @hw:			Backpointer to our main datastruture
  * @hw_stats:		Packet statistics counter
  * @phy_dev:		The attached PHY if available
- * @pending_work:	The workqueue used to reset the dma ring
  */
 struct mtk_mac {
 	int				id;
@@ -406,7 +407,6 @@ struct mtk_mac {
 	struct mtk_eth			*hw;
 	struct mtk_hw_stats		*hw_stats;
 	struct phy_device		*phy_dev;
-	struct work_struct		pending_work;
 };
 
 /* the struct describing the SoC. these are declared in the soc_xyz.c files */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2 8/8] net: mediatek: do not set the QID field in the TX DMA descriptors
From: John Crispin @ 2016-04-07 19:26 UTC (permalink / raw)
  To: David S. Miller
  Cc: Felix Fietkau, Matthias Brugger,
	Sean Wang (王志亘), netdev, linux-mediatek,
	linux-kernel, John Crispin
In-Reply-To: <1460057210-55786-1-git-send-email-blogic@openwrt.org>

The QID field gets set to the mac id. This made the DMA linked list queue
the traffic of each MAC on a different internal queue. However during long
term testing we found that this will cause traffic stalls as the multi
queue setup requires a more complete initialisation which is not part of
the upstream driver yet.

This patch removes the code setting the QID field, resulting in all
traffic ending up in queue 0 which works without any special setup.

Signed-off-by: John Crispin <blogic@openwrt.org>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c |    3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index f9f8851..8163047 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -603,8 +603,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 			WRITE_ONCE(txd->txd1, mapped_addr);
 			WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
 					       TX_DMA_PLEN0(frag_map_size) |
-					       last_frag * TX_DMA_LS0) |
-					       mac->id);
+					       last_frag * TX_DMA_LS0));
 			WRITE_ONCE(txd->txd4, 0);
 
 			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH V2] net: mediatek: update the IRQ part of the binding document
From: John Crispin @ 2016-04-07 19:28 UTC (permalink / raw)
  To: David S. Miller
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA, Felix Fietkau,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	Sean Wang (王志亘),
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mediatek-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r, Matthias Brugger,
	John Crispin

The current binding document only describes a single interrupt. Update the
document by adding the 2 other interrupts.

The driver currently only uses a single interrupt. The HW is however able
to using IRQ grouping to split TX and RX onto separate GIC irqs.

Signed-off-by: John Crispin <blogic-p3rKhJxN3npAfugRpC6u6w@public.gmane.org>
Cc: devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
---
Changes in V2:
* split this patch out of the series that fixes tx stalls in the driver

 Documentation/devicetree/bindings/net/mediatek-net.txt |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 5ca7929..2f142be 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -9,7 +9,7 @@ have dual GMAC each represented by a child node..
 Required properties:
 - compatible: Should be "mediatek,mt7623-eth"
 - reg: Address and length of the register set for the device
-- interrupts: Should contain the frame engines interrupt
+- interrupts: Should contain the three frame engines interrupts
 - clocks: the clock used by the core
 - clock-names: the names of the clock listed in the clocks property. These are
 	"ethif", "esw", "gp2", "gp1"
@@ -42,7 +42,9 @@ eth: ethernet@1b100000 {
 		 <&ethsys CLK_ETHSYS_GP2>,
 		 <&ethsys CLK_ETHSYS_GP1>;
 	clock-names = "ethif", "esw", "gp2", "gp1";
-	interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW>;
+	interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW
+		      GIC_SPI 199 IRQ_TYPE_LEVEL_LOW
+		      GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>;
 	power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
 	resets = <&ethsys MT2701_ETHSYS_ETH_RST>;
 	reset-names = "eth";
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH V3] net: emac: emac gigabit ethernet controller driver
From: Timur Tabi @ 2016-04-07 19:28 UTC (permalink / raw)
  To: Rob Herring
  Cc: Gilad Avidov, netdev, linux-kernel@vger.kernel.org,
	devicetree@vger.kernel.org, linux-arm-msm, Sagar Dharia, shankerd,
	Greg Kroah-Hartman, vikrams, Christopher Covington
In-Reply-To: <CAL_JsqLnJt+mdL==-qP-EJhMa2rA6i-AyDESj69wNVt7D5JJUQ@mail.gmail.com>

Rob Herring wrote:

>>>> >>>+- reg : Offset and length of the register regions for the device
>>>> >>>+- reg-names : Register region names referenced in 'reg' above.
>>>> >>>+       Required register resource entries are:
>>>> >>>+       "base"   : EMAC controller base register block.
>>>> >>>+       "csr"    : EMAC wrapper register block.
>>>> >>>+       Optional register resource entries are:
>>>> >>>+       "ptp"    : EMAC PTP (1588) register block.
>>>> >>>+                  Required if 'qcom,emac-tstamp-en' is present.
>>>> >>>+       "sgmii"  : EMAC SGMII PHY register block.
>>>> >>>+- interrupts : Interrupt numbers used by this controller
>>>> >>>+- interrupt-names : Interrupt resource names referenced in 'interrupts'
>>>> >>>above.
>>>> >>>+       Required interrupt resource entries are:
>>>> >>>+       "emac_core0"   : EMAC core0 interrupt.
>>>> >>>+       "sgmii_irq"   : EMAC SGMII interrupt.
>>>> >>>+- qcom,emac-gpio-mdc  : GPIO pin number of the MDC line of MDIO bus.
>>>> >>>+- qcom,emac-gpio-mdio : GPIO pin number of the MDIO line of MDIO bus.
>>> >>
>>> >>
>>> >>Use the standard binding for GPIO controlled MDIO bus.
>> >
>> >
>> >I'm not familiar with that one.  Are you talking about
>> >bindings/net/mdio-gpio.txt?

> Yes.
>
>
>>>> >>>+- phy-addr            : Specifies phy address on MDIO bus.
>>>> >>>+                       Required if the optional property
>>>> >>>"qcom,no-external-phy"
>>>> >>>+                       is not specified.
>>> >>
>>> >>
>>> >>Don't you think you will need to know the specific phy device or other
>>> >>properties of the phy?
>> >
>> >
>> >That, I can't answer.  Aren't all MDIO devices basically the same?  It's
>> >been a while since I've worked on them.

> No. There was some discussion just this week about needing to require
> phy devices to have compatible strings.

I'm back to working on this driver, and I need some more help with how 
to handle the phy.  mdio-gpio.txt doesn't really tell me much. I'm 
actually working on an ACPI system and not DT.  I don't want to strip 
out the DT code, but I can't really test it.  I want to keep changes to 
Gilad's patch to a minimum.

Part of the problem with the Emac (and Gilad tried to explain this) is 
that it has an internal phy.  Technically, you can connect this internal 
phy directly to another internal phy on another SOC, and use this as an 
SOC interconnect.  However, I don't know of anyone actually doing that.

Instead, most systems have the internal phy connect to an external phy. 
  This connection is how the Emac receives packets from the external phy.

So I don't understand how I'm supposed to use the binding in 
mdio-gpio.txt.  For one thing, there is no such binding on ACPI systems. 
  On my ACPI system, firmware has set up the GPIOs.  The driver never 
actually makes any gpio_xxx calls.  At this point, I'm tempted to just 
remove all the GPIO stuff from Gilad's patch, if I can't help enough 
help to figure out how to modify the driver the way you think I should.

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum, a Linux Foundation collaborative project.

^ permalink raw reply

* Re: optimizations to sk_buff handling in rds_tcp_data_ready
From: Sowmini Varadhan @ 2016-04-07 19:29 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: alexei.starovoitov, tom, netdev
In-Reply-To: <1460038560.6473.397.camel@edumazet-glaptop3.roam.corp.google.com>

On (04/07/16 07:16), Eric Dumazet wrote:
> Use skb split like TCP in output path ?

That almost looks like what I want, but skb_split modifies both
skb and skb1, and I want to leave skb untouched (otherwise 
I will mess up the book-keeping in tcp_read_sock). But skb_split 
is a good template- I think it could even be extended to avoid copying
the frags that we'd later trim off in rds_tcp_data_recv anyway, 
let me see what I can come up with, based on that code.

> Really, pskb_expand_head() is not supposed to copy payload ;)

That would make my world very easy! But it has callers from all
over the kernel, e.g., skb_realloc_headroom, and changing it is
obviously risky

^ permalink raw reply

* Re: [Lsf] [Lsf-pc] [LSF/MM TOPIC] Generic page-pool recycle facility?
From: Jesper Dangaard Brouer @ 2016-04-07 19:43 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Chuck Lever, Christoph Hellwig, James Bottomley, Tom Herbert,
	Brenden Blanco, lsf, linux-mm, netdev@vger.kernel.org, lsf-pc,
	Alexei Starovoitov, brouer
In-Reply-To: <1460045640.30063.3.camel@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 1010 bytes --]


On Thu, 07 Apr 2016 12:14:00 -0400 Rik van Riel <riel@redhat.com> wrote:

> On Thu, 2016-04-07 at 08:48 -0700, Chuck Lever wrote:
> > > 
> > > On Apr 7, 2016, at 7:38 AM, Christoph Hellwig <hch@infradead.org>
> > > wrote:
> > > 
> > > This is also very interesting for storage targets, which face the
> > > same issue.  SCST has a mode where it caches some fully constructed
> > > SGLs, which is probably very similar to what NICs want to do.  
> >
> > +1 for NFS server.  
> 
> I have swapped around my slot (into the MM track)
> with Jesper's slot (now a plenary session), since
> there seems to be a fair amount of interest in
> Jesper's proposal from IO and FS people, and my
> topic is more MM specific.

Wow - I'm impressed. I didn't expect such a good slot!
Glad to see the interest!
Thanks!

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 213 bytes --]

^ permalink raw reply

* Re: [PATCH 2/9] rxrpc: Disable a debugging statement that has been left enabled.
From: David Howells @ 2016-04-07 19:43 UTC (permalink / raw)
  To: Joe Perches; +Cc: dhowells, linux-afs, netdev, linux-kernel
In-Reply-To: <1460049545.6715.98.camel@perches.com>

Joe Perches <joe@perches.com> wrote:

> > Joe Perches <joe@perches.com> wrote:
> > > It might be better to remove kenter and _enter
> > > altogether and use function tracing instead.
> > Possibly - but not at this time.
> 
> Swell.

I didn't say I wouldn't do it - it's just that I'm trying to fix other stuff
at the moment and don't particularly want to add that to the list just now.
kenter, _enter and co. are serving me very well.

> > Besides, isn't the function tracing log lost
> > if the machine crashes?
> 
> I believe yes, but would it matter?

Let's see...  If the machine panics whilst I'm developing stuff (quite likely
if something goes wrong in BH context), how do I get at the function tracing
log to find out why it panicked if the log is then lost?  With the serial
console, at least I automatically capture the output of kenter and co..

David

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox