Netdev List
 help / color / mirror / Atom feed
* [PATCH net v3] net: ip6_vti: Fix one possbile memleak when fail to register_netdevice
From: gfree.wind @ 2017-04-29  3:55 UTC (permalink / raw)
  To: davem, steffen.klassert, herbert, kuznet, jmorris, yoshfuji,
	kaber, netdev
  Cc: Gao Feng

From: Gao Feng <gfree.wind@foxmail.com>

The ip6_vti allocates some resources in its ndo_init func, and
free some of them in its destructor func. Then there is one memleak
that some errors happen after register_netdevice invokes the ndo_init
callback. Because only the ndo_uninit callback is invoked in the error
handler of register_netdevice, but destructor not.

Now create one new func vti6_destructor_free to free the mem in
the destructor, and ndo_uninit func also invokes it when fail to
register the vti6 device.

It's not only free all resources, but also follow the original desgin
that the resources are freed in the destructor normally after
register the device successfully.

Signed-off-by: Gao Feng <gfree.wind@foxmail.com>
---
 v3: Split one patch to multiple commits, per David Ahern
 v2: Move the free in ndo_uninit when fail to register, per Herbert Xu
 v1: initial version

 net/ipv6/ip6_vti.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 3d8a3b6..3b3f49a 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -177,9 +177,14 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
 	}
 }
 
-static void vti6_dev_free(struct net_device *dev)
+static void vti6_destructor_free(struct net_device *dev)
 {
 	free_percpu(dev->tstats);
+}
+
+static void vti6_dev_free(struct net_device *dev)
+{
+	vti6_destructor_free(dev);
 	free_netdev(dev);
 }
 
@@ -296,6 +301,10 @@ static void vti6_dev_uninit(struct net_device *dev)
 	else
 		vti6_tnl_unlink(ip6n, t);
 	dev_put(dev);
+
+	/* dev is not registered, perform the free instead of destructor */
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		vti6_destructor_free(dev);
 }
 
 static int vti6_rcv(struct sk_buff *skb)
-- 
2.7.4

^ permalink raw reply related

* [PATCH net v3] net: ip_tunnel: Fix one possbile memleak when fail to register_netdevice
From: gfree.wind @ 2017-04-29  3:56 UTC (permalink / raw)
  To: davem, kuznet, jmorris, yoshfuji, kaber, netdev; +Cc: Gao Feng

From: Gao Feng <gfree.wind@foxmail.com>

The ip_tunnel allocates some resources in its ndo_init func, and
free some of them in its destructor func. Then there is one memleak
that some errors happen after register_netdevice invokes the ndo_init
callback. Because only the ndo_uninit callback is invoked in the error
handler of register_netdevice, but destructor not.

Now create one new func ip_tunnel_destructor_free to free the mem in
the destructor, and ndo_uninit func also invokes it when fail to
register the ip_tunnel device.

It's not only free all resources, but also follow the original desgin
that the resources are freed in the destructor normally after
register the device successfully.

Signed-off-by: Gao Feng <gfree.wind@foxmail.com>
---
 v3: Split one patch to multiple commits, per David Ahern
 v2: Move the free in ndo_uninit when fail to register, per Herbert Xu
 v1: initial version

 net/ipv4/ip_tunnel.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 823abae..96a3005 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -954,13 +954,18 @@ int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 
-static void ip_tunnel_dev_free(struct net_device *dev)
+static void ip_tunnel_destructor_free(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 
 	gro_cells_destroy(&tunnel->gro_cells);
 	dst_cache_destroy(&tunnel->dst_cache);
 	free_percpu(dev->tstats);
+}
+
+static void ip_tunnel_dev_free(struct net_device *dev)
+{
+	ip_tunnel_destructor_free(dev);
 	free_netdev(dev);
 }
 
@@ -1192,6 +1197,10 @@ void ip_tunnel_uninit(struct net_device *dev)
 		ip_tunnel_del(itn, netdev_priv(dev));
 
 	dst_cache_reset(&tunnel->dst_cache);
+
+	/* dev is not registered, perform the free instead of destructor */
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		ip_tunnel_destructor_free(dev);
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH net v3] net: sit: Fix one possbile memleak when fail to register_netdevice
From: gfree.wind @ 2017-04-29  3:57 UTC (permalink / raw)
  To: davem, kuznet, jmorris, yoshfuji, kaber, netdev; +Cc: Gao Feng

From: Gao Feng <gfree.wind@foxmail.com>

The ipip6 allocates some resources in its ndo_init func, and
free some of them in its destructor func. Then there is one memleak
that some errors happen after register_netdevice invokes the ndo_init
callback. Because only the ndo_uninit callback is invoked in the error
handler of register_netdevice, but destructor not.

Now create one new func ipip6_destructor_free to free the mem in
the destructor, and ndo_uninit func also invokes it when fail to
register the ipip6 device.

It's not only free all resources, but also follow the original desgin
that the resources are freed in the destructor normally after
register the device successfully.

Signed-off-by: Gao Feng <gfree.wind@foxmail.com>
---
 v3: Split one patch to multiple commits, per David Ahern
 v2: Move the free in ndo_uninit when fail to register, per Herbert Xu
 v1: initial version

 net/ipv6/sit.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 99853c6..28c1649 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -464,6 +464,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
 	return ok;
 }
 
+static void ipip6_destructor_free(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	dst_cache_destroy(&tunnel->dst_cache);
+	free_percpu(dev->tstats);
+}
+
 static void ipip6_tunnel_uninit(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -477,6 +485,10 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
 	}
 	dst_cache_reset(&tunnel->dst_cache);
 	dev_put(dev);
+
+	/* dev is not registered, perform the free instead of destructor */
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		ipip6_destructor_free(dev);
 }
 
 static int ipip6_err(struct sk_buff *skb, u32 info)
@@ -1329,10 +1341,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
 
 static void ipip6_dev_free(struct net_device *dev)
 {
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-
-	dst_cache_destroy(&tunnel->dst_cache);
-	free_percpu(dev->tstats);
+	ipip6_destructor_free(dev);
 	free_netdev(dev);
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH net v3] net: vlan: Fix one possbile memleak when fail to register_netdevice
From: gfree.wind @ 2017-04-29  3:59 UTC (permalink / raw)
  To: davem, idosch, jiri, pabeni, mmanning, jarod, stephen, netdev; +Cc: Gao Feng

From: Gao Feng <gfree.wind@foxmail.com>

The vlan driver allocates some resources in its ndo_init func, and
free some of them in its destructor func. Then there is one memleak
that some errors happen after register_netdevice invokes the ndo_init
callback. Because only the ndo_uninit callback is invoked in the error
handler of register_netdevice, but destructor not.

Now create one new func vlan_destructor_free to free the mem in the
destructor, and ndo_uninit func also invokes it when fail to register
the vlan device.

It's not only free all resources, but also follow the original desgin
that the resources are freed in the destructor normally after
register the device successfully.

Signed-off-by: Gao Feng <gfree.wind@foxmail.com>
---
 v3: Split one patch to multiple commits, per David Ahern
 v2: Move the free in ndo_uninit when fail to register, per Herbert Xu
 v1: initial version

 net/8021q/vlan_dev.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e97ab82..adcaa39 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -608,6 +608,14 @@ static int vlan_dev_init(struct net_device *dev)
 	return 0;
 }
 
+static void vlan_destructor_free(struct net_device *dev)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+	free_percpu(vlan->vlan_pcpu_stats);
+	vlan->vlan_pcpu_stats = NULL;
+}
+
 static void vlan_dev_uninit(struct net_device *dev)
 {
 	struct vlan_priority_tci_mapping *pm;
@@ -620,6 +628,10 @@ static void vlan_dev_uninit(struct net_device *dev)
 			kfree(pm);
 		}
 	}
+
+	/* dev is not registered, perform the free instead of destructor */
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		vlan_destructor_free(dev);
 }
 
 static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
@@ -803,10 +815,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 
 static void vlan_dev_free(struct net_device *dev)
 {
-	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
-
-	free_percpu(vlan->vlan_pcpu_stats);
-	vlan->vlan_pcpu_stats = NULL;
+	vlan_destructor_free(dev);
 	free_netdev(dev);
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 00/11] ibmvnic: Updated reset handler and code fixes
From: Nathan Fontenot @ 2017-04-29  8:15 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon

This set of patches multiple code fixes and a new rest handler
for the ibmvnic driver. In order to implement the new reset handler
for the ibmvnic driver resource initialization needed to be moved to
its own routine, a state variable is introduced to replace the
various is_* flags in the driver, and a new routine to handle the
assorted reasons the driver can be reset.

The additional patches cleanup some of the code, renmove some
memory leaks, and fix a few bugs.

---

Nathan Fontenot (10):
      ibmvnic: Move resource initialization to its own routine
      ibmvnic: Replace is_closed with state field
      ibmvnic: Updated reset handling
      ibmvnic: Delete napi's when releasing driver resources
      ibmvnic: Whitespace correction in release_rx_pools
      ibmvnic: Clean up tx pools when closing
      ibmvnic: Wait for any pending scrqs entries at driver close
      ibmvnic: Check for driver reset first in ibmvnic_xmit
      ibmvnic: Continue skb processing after skb completion error
      ibmvnic: Move queue restart

Thomas Falcon (1):
      From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>


 drivers/net/ethernet/ibm/ibmvnic.c |  564 +++++++++++++++++++++++-------------
 drivers/net/ethernet/ibm/ibmvnic.h |   31 ++
 2 files changed, 390 insertions(+), 205 deletions(-)

^ permalink raw reply

* [PATCH net-next 01/11] ibmvnic: Move resource initialization to its own routine
From: Nathan Fontenot @ 2017-04-29  8:15 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

Move all of the calls to initialize resources for the driver to
a separate routine.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   71 ++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4fcd2f0..c67f1d6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -624,22 +624,10 @@ static int set_real_num_queues(struct net_device *netdev)
 	return rc;
 }
 
-static int ibmvnic_open(struct net_device *netdev)
+static int init_resources(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	struct device *dev = &adapter->vdev->dev;
-	int rc = 0;
-	int i;
-
-	if (adapter->is_closed) {
-		rc = ibmvnic_init(adapter);
-		if (rc)
-			return rc;
-	}
-
-	rc = ibmvnic_login(netdev);
-	if (rc)
-		return rc;
+	struct net_device *netdev = adapter->netdev;
+	int i, rc;
 
 	rc = set_real_num_queues(netdev);
 	if (rc)
@@ -647,7 +635,7 @@ static int ibmvnic_open(struct net_device *netdev)
 
 	rc = init_sub_crq_irqs(adapter);
 	if (rc) {
-		dev_err(dev, "failed to initialize sub crq irqs\n");
+		netdev_err(netdev, "failed to initialize sub crq irqs\n");
 		return -1;
 	}
 
@@ -659,25 +647,47 @@ static int ibmvnic_open(struct net_device *netdev)
 	adapter->napi = kcalloc(adapter->req_rx_queues,
 				sizeof(struct napi_struct), GFP_KERNEL);
 	if (!adapter->napi)
-		goto ibmvnic_open_fail;
+		return -ENOMEM;
+
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
 			       NAPI_POLL_WEIGHT);
-		napi_enable(&adapter->napi[i]);
 	}
 
 	send_map_query(adapter);
 
 	rc = init_rx_pools(netdev);
 	if (rc)
-		goto ibmvnic_open_fail;
+		return rc;
 
 	rc = init_tx_pools(netdev);
+	return rc;
+}
+
+static int ibmvnic_open(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int i, rc;
+
+	if (adapter->is_closed) {
+		rc = ibmvnic_init(adapter);
+		if (rc)
+			return rc;
+	}
+
+	rc = ibmvnic_login(netdev);
 	if (rc)
-		goto ibmvnic_open_fail;
+		return rc;
+
+	rc = init_resources(adapter);
+	if (rc)
+		return rc;
 
 	replenish_pools(adapter);
 
+	for (i = 0; i < adapter->req_rx_queues; i++)
+		napi_enable(&adapter->napi[i]);
+
 	/* We're ready to receive frames, enable the sub-crq interrupts and
 	 * set the logical link state to up
 	 */
@@ -688,19 +698,16 @@ static int ibmvnic_open(struct net_device *netdev)
 		enable_scrq_irq(adapter, adapter->tx_scrq[i]);
 
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
-	if (rc)
-		goto ibmvnic_open_fail;
-
-	netif_tx_start_all_queues(netdev);
-	adapter->is_closed = false;
-
-	return 0;
+	if (rc) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			napi_disable(&adapter->napi[i]);
+		release_resources(adapter);
+	} else {
+		netif_tx_start_all_queues(netdev);
+		adapter->is_closed = false;
+	}
 
-ibmvnic_open_fail:
-	for (i = 0; i < adapter->req_rx_queues; i++)
-		napi_disable(&adapter->napi[i]);
-	release_resources(adapter);
-	return -ENOMEM;
+	return rc;
 }
 
 static void disable_sub_crqs(struct ibmvnic_adapter *adapter)

^ permalink raw reply related

* [PATCH net-next 02/11] ibmvnic: Replace is_closed with state field
From: Nathan Fontenot @ 2017-04-29  8:15 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

Replace the is_closed flag in the ibmvnic adapter strcut with a
more comprehensive state field that tracks the current state of
the driver.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   20 +++++++++++++-------
 drivers/net/ethernet/ibm/ibmvnic.h |   12 ++++++++++--
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c67f1d6..40a8ba0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -669,7 +669,9 @@ static int ibmvnic_open(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int i, rc;
 
-	if (adapter->is_closed) {
+	adapter->state = VNIC_OPENING;
+
+	if (adapter->state == VNIC_CLOSED) {
 		rc = ibmvnic_init(adapter);
 		if (rc)
 			return rc;
@@ -704,7 +706,7 @@ static int ibmvnic_open(struct net_device *netdev)
 		release_resources(adapter);
 	} else {
 		netif_tx_start_all_queues(netdev);
-		adapter->is_closed = false;
+		adapter->state = VNIC_OPEN;
 	}
 
 	return rc;
@@ -733,7 +735,7 @@ static int ibmvnic_close(struct net_device *netdev)
 	int rc = 0;
 	int i;
 
-	adapter->closing = true;
+	adapter->state = VNIC_CLOSING;
 	disable_sub_crqs(adapter);
 
 	if (adapter->napi) {
@@ -748,8 +750,7 @@ static int ibmvnic_close(struct net_device *netdev)
 
 	release_resources(adapter);
 
-	adapter->is_closed = true;
-	adapter->closing = false;
+	adapter->state = VNIC_CLOSED;
 	return rc;
 }
 
@@ -1860,7 +1861,8 @@ static int pending_scrq(struct ibmvnic_adapter *adapter,
 {
 	union sub_crq *entry = &scrq->msgs[scrq->cur];
 
-	if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP || adapter->closing)
+	if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP ||
+	    adapter->state == VNIC_CLOSING)
 		return 1;
 	else
 		return 0;
@@ -3353,6 +3355,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		return -ENOMEM;
 
 	adapter = netdev_priv(netdev);
+	adapter->state = VNIC_PROBING;
 	dev_set_drvdata(&dev->dev, netdev);
 	adapter->vdev = dev;
 	adapter->netdev = netdev;
@@ -3380,7 +3383,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 
 	netdev->mtu = adapter->req_mtu - ETH_HLEN;
-	adapter->is_closed = false;
 
 	rc = register_netdev(netdev);
 	if (rc) {
@@ -3390,6 +3392,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
+	adapter->state = VNIC_PROBED;
 	return 0;
 }
 
@@ -3398,12 +3401,15 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	struct net_device *netdev = dev_get_drvdata(&dev->dev);
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
+	adapter->state = VNIC_REMOVING;
 	unregister_netdev(netdev);
 
 	release_resources(adapter);
 	release_sub_crqs(adapter);
 	release_crq_queue(adapter);
 
+	adapter->state = VNIC_REMOVED;
+
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index a69979f..03a866f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -913,6 +913,15 @@ struct ibmvnic_error_buff {
 	__be32 error_id;
 };
 
+enum vnic_state {VNIC_PROBING = 1,
+		 VNIC_PROBED,
+		 VNIC_OPENING,
+		 VNIC_OPEN,
+		 VNIC_CLOSING,
+		 VNIC_CLOSED,
+		 VNIC_REMOVING,
+		 VNIC_REMOVED};
+
 struct ibmvnic_adapter {
 	struct vio_dev *vdev;
 	struct net_device *netdev;
@@ -962,7 +971,6 @@ struct ibmvnic_adapter {
 	u64 promisc;
 
 	struct ibmvnic_tx_pool *tx_pool;
-	bool closing;
 	struct completion init_done;
 	int init_done_rc;
 
@@ -1011,5 +1019,5 @@ struct ibmvnic_adapter {
 	struct work_struct ibmvnic_xport;
 	struct tasklet_struct tasklet;
 	bool failover;
-	bool is_closed;
+	enum vnic_state state;
 };

^ permalink raw reply related

* [PATCH net-next 03/11] ibmvnic: Updated reset handling
From: Nathan Fontenot @ 2017-04-29  8:15 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

The ibmvnic driver has multiple handlers for resetting the driver
depending on the reason the reset is needed (failover, lpm,
fatal erors,...). All of the reset handlers do essentially the same
thing, this patch moves this work to a common reset handler.

By doing this we also allow the driver to better handle situations
where we can get a reset while handling a reset.

The updated reset handling works by adding a reset work item to the
list of resets and then scheduling work to perform the reset. This
step is necessary because we can receive a reset in interrupt context
and we want to handle the reset out of interrupt context.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |  413 +++++++++++++++++++++++-------------
 drivers/net/ethernet/ibm/ibmvnic.h |   19 +-
 2 files changed, 275 insertions(+), 157 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 40a8ba0..b3d9b28 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -194,7 +194,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
 	if (!ltb->buff)
 		return;
 
-	if (!adapter->failover)
+	if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
+	    adapter->reset_reason != VNIC_RESET_MOBILITY)
 		send_request_unmap(adapter, ltb->map_id);
 	dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
 }
@@ -292,9 +293,6 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
 {
 	int i;
 
-	if (adapter->migrated)
-		return;
-
 	adapter->replenish_task_cycles++;
 	for (i = 0; i < be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
 	     i++) {
@@ -569,11 +567,6 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
 	bool resend;
 	int rc;
 
-	if (adapter->logical_link_state == link_state) {
-		netdev_dbg(netdev, "Link state already %d\n", link_state);
-		return 0;
-	}
-
 	netdev_err(netdev, "setting link state %d\n", link_state);
 	memset(&crq, 0, sizeof(crq));
 	crq.logical_link_state.first = IBMVNIC_CRQ_CMD;
@@ -664,27 +657,13 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	return rc;
 }
 
-static int ibmvnic_open(struct net_device *netdev)
+static int __ibmvnic_open(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	enum vnic_state prev_state = adapter->state;
 	int i, rc;
 
 	adapter->state = VNIC_OPENING;
-
-	if (adapter->state == VNIC_CLOSED) {
-		rc = ibmvnic_init(adapter);
-		if (rc)
-			return rc;
-	}
-
-	rc = ibmvnic_login(netdev);
-	if (rc)
-		return rc;
-
-	rc = init_resources(adapter);
-	if (rc)
-		return rc;
-
 	replenish_pools(adapter);
 
 	for (i = 0; i < adapter->req_rx_queues; i++)
@@ -693,22 +672,65 @@ static int ibmvnic_open(struct net_device *netdev)
 	/* We're ready to receive frames, enable the sub-crq interrupts and
 	 * set the logical link state to up
 	 */
-	for (i = 0; i < adapter->req_rx_queues; i++)
-		enable_scrq_irq(adapter, adapter->rx_scrq[i]);
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		if (prev_state == VNIC_CLOSED)
+			enable_irq(adapter->rx_scrq[i]->irq);
+		else
+			enable_scrq_irq(adapter, adapter->rx_scrq[i]);
+	}
 
-	for (i = 0; i < adapter->req_tx_queues; i++)
-		enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+	for (i = 0; i < adapter->req_tx_queues; i++) {
+		if (prev_state == VNIC_CLOSED)
+			enable_irq(adapter->tx_scrq[i]->irq);
+		else
+			enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+	}
 
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
 	if (rc) {
 		for (i = 0; i < adapter->req_rx_queues; i++)
 			napi_disable(&adapter->napi[i]);
 		release_resources(adapter);
-	} else {
-		netif_tx_start_all_queues(netdev);
-		adapter->state = VNIC_OPEN;
+		return rc;
+	}
+	
+	netif_tx_start_all_queues(netdev);
+
+	if (prev_state == VNIC_CLOSED) {
+		for (i = 0; i < adapter->req_rx_queues; i++)
+			napi_schedule(&adapter->napi[i]);
 	}
 
+	adapter->state = VNIC_OPEN;
+	return rc;
+}
+
+static int ibmvnic_open(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	mutex_lock(&adapter->reset_lock);
+
+	if (adapter->state != VNIC_CLOSED) {
+		rc = ibmvnic_login(netdev);
+		if (rc) {
+			mutex_unlock(&adapter->reset_lock);
+			return rc;
+		}
+
+		rc = init_resources(adapter);
+		if (rc) {
+			netdev_err(netdev, "failed to initialize resources\n");
+			release_resources(adapter);
+			mutex_unlock(&adapter->reset_lock);
+			return rc;
+		}
+	}
+
+	rc = __ibmvnic_open(netdev);
+	mutex_unlock(&adapter->reset_lock);
+
 	return rc;
 }
 
@@ -729,13 +751,14 @@ static void disable_sub_crqs(struct ibmvnic_adapter *adapter)
 	}
 }
 
-static int ibmvnic_close(struct net_device *netdev)
+static int __ibmvnic_close(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int rc = 0;
 	int i;
 
 	adapter->state = VNIC_CLOSING;
+	netif_tx_stop_all_queues(netdev);
 	disable_sub_crqs(adapter);
 
 	if (adapter->napi) {
@@ -743,17 +766,24 @@ static int ibmvnic_close(struct net_device *netdev)
 			napi_disable(&adapter->napi[i]);
 	}
 
-	if (!adapter->failover)
-		netif_tx_stop_all_queues(netdev);
-
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
 
-	release_resources(adapter);
-
 	adapter->state = VNIC_CLOSED;
 	return rc;
 }
 
+static int ibmvnic_close(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	mutex_lock(&adapter->reset_lock);
+	rc = __ibmvnic_close(netdev);
+	mutex_unlock(&adapter->reset_lock);
+
+	return rc;
+}
+
 /**
  * build_hdr_data - creates L2/L3/L4 header data buffer
  * @hdr_field - bitfield determining needed headers
@@ -915,7 +945,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
 				   be32_to_cpu(adapter->login_rsp_buf->
 					       off_txsubm_subcrqs));
-	if (adapter->migrated) {
+	if (adapter->resetting) {
 		if (!netif_subqueue_stopped(netdev, skb))
 			netif_stop_subqueue(netdev, queue_num);
 		dev_kfree_skb_any(skb);
@@ -1107,18 +1137,185 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static void ibmvnic_tx_timeout(struct net_device *dev)
+/**
+ * do_reset returns zero if we are able to keep processing reset events, or
+ * non-zero if we hit a fatal error and must halt.
+ */
+static int do_reset(struct ibmvnic_adapter *adapter,
+		    struct ibmvnic_rwi *rwi, u32 reset_state)
 {
-	struct ibmvnic_adapter *adapter = netdev_priv(dev);
-	int rc;
+	struct net_device *netdev = adapter->netdev;
+	int i, rc;
+
+	netif_carrier_off(netdev);
+	adapter->reset_reason = rwi->reset_reason;
+
+	if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
+		rc = ibmvnic_reenable_crq_queue(adapter);
+		if (rc)
+			return 0;
+	}
+
+	rc = __ibmvnic_close(netdev);
+	if (rc)
+		return rc;
+
+	/* remove the closed state so when we call open it appears
+	 * we are coming from the probed state.
+	 */
+	adapter->state = VNIC_PROBED;
 
-	/* Adapter timed out, resetting it */
+	release_resources(adapter);
 	release_sub_crqs(adapter);
-	rc = ibmvnic_reset_crq(adapter);
+	release_crq_queue(adapter);
+
+	rc = ibmvnic_init(adapter);
 	if (rc)
-		dev_err(&adapter->vdev->dev, "Adapter timeout, reset failed\n");
-	else
-		ibmvnic_send_crq_init(adapter);
+		return 0;
+
+	/* If the adapter was in PROBE state prior to the reset, exit here. */
+	if (reset_state == VNIC_PROBED)
+		return 0;
+
+	rc = ibmvnic_login(netdev);
+	if (rc) {
+		adapter->state = VNIC_PROBED;
+		return 0;
+	}
+
+	rtnl_lock();
+	rc = init_resources(adapter);
+	rtnl_unlock();
+	if (rc)
+		return rc;
+
+	if (reset_state == VNIC_CLOSED)
+		return 0;
+
+	rc = __ibmvnic_open(netdev);
+	if (rc) {
+		if (list_empty(&adapter->rwi_list))
+			adapter->state = VNIC_CLOSED;
+		else
+			adapter->state = reset_state;
+
+		return 0;
+	}
+
+	netif_carrier_on(netdev);
+
+	/* kick napi */
+	for (i = 0; i < adapter->req_rx_queues; i++)
+		napi_schedule(&adapter->napi[i]);
+
+	return 0;
+}
+
+static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
+{
+	struct ibmvnic_rwi *rwi;
+
+	mutex_lock(&adapter->rwi_lock);
+
+	if (!list_empty(&adapter->rwi_list)) {
+		rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi,
+				       list);
+		list_del(&rwi->list);
+	} else {
+		rwi = NULL;
+	}
+
+	mutex_unlock(&adapter->rwi_lock);
+	return rwi;
+}
+
+static void free_all_rwi(struct ibmvnic_adapter *adapter)
+{
+	struct ibmvnic_rwi *rwi;
+
+	rwi = get_next_rwi(adapter);
+	while (rwi) {
+		kfree(rwi);
+		rwi = get_next_rwi(adapter);
+	}
+}
+
+static void __ibmvnic_reset(struct work_struct *work)
+{
+	struct ibmvnic_rwi *rwi;
+	struct ibmvnic_adapter *adapter;
+	struct net_device *netdev;
+	u32 reset_state;
+	int rc;
+
+	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
+	netdev = adapter->netdev;
+
+	mutex_lock(&adapter->reset_lock);
+	adapter->resetting = true;
+	reset_state = adapter->state;
+
+	rwi = get_next_rwi(adapter);
+	while (rwi) {
+		rc = do_reset(adapter, rwi, reset_state);
+		kfree(rwi);
+		if (rc)
+			break;
+
+		rwi = get_next_rwi(adapter);
+	}
+
+	if (rc) {
+		free_all_rwi(adapter);
+		return;
+	}
+
+	adapter->resetting = false;
+	mutex_unlock(&adapter->reset_lock);
+}
+
+static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
+			  enum ibmvnic_reset_reason reason)
+{
+	struct ibmvnic_rwi *rwi, *tmp;
+	struct net_device *netdev = adapter->netdev;
+	struct list_head *entry;
+
+	if (adapter->state == VNIC_REMOVING ||
+	    adapter->state == VNIC_REMOVED) {
+		netdev_dbg(netdev, "Adapter removing, skipping reset\n");
+		return;
+	}
+
+	mutex_lock(&adapter->rwi_lock);
+
+	list_for_each(entry, &adapter->rwi_list) {
+		tmp = list_entry(entry, struct ibmvnic_rwi, list);
+		if (tmp->reset_reason == reason) {
+			netdev_err(netdev, "Matching reset found, skipping\n");
+			mutex_unlock(&adapter->rwi_lock);
+			return;
+		}
+	}
+
+	rwi = kzalloc(sizeof(*rwi), GFP_KERNEL);
+	if (!rwi) {
+		mutex_unlock(&adapter->rwi_lock);
+		ibmvnic_close(netdev);
+		return;
+	}
+
+	rwi->reset_reason = reason;
+	list_add_tail(&rwi->list, &adapter->rwi_list);
+	mutex_unlock(&adapter->rwi_lock);
+	schedule_work(&adapter->ibmvnic_reset);
+}
+
+static void ibmvnic_tx_timeout(struct net_device *dev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(dev);
+
+	ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT);
 }
 
 static void remove_buff_from_pool(struct ibmvnic_adapter *adapter,
@@ -2000,18 +2197,6 @@ static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter)
 	return ibmvnic_send_crq(adapter, &crq);
 }
 
-static int ibmvnic_send_crq_init_complete(struct ibmvnic_adapter *adapter)
-{
-	union ibmvnic_crq crq;
-
-	memset(&crq, 0, sizeof(crq));
-	crq.generic.first = IBMVNIC_CRQ_INIT_CMD;
-	crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE;
-	netdev_dbg(adapter->netdev, "Sending CRQ init complete\n");
-
-	return ibmvnic_send_crq(adapter, &crq);
-}
-
 static int send_version_xchg(struct ibmvnic_adapter *adapter)
 {
 	union ibmvnic_crq crq;
@@ -2509,6 +2694,9 @@ static void handle_error_indication(union ibmvnic_crq *crq,
 
 	if (be32_to_cpu(crq->error_indication.error_id))
 		request_error_information(adapter, crq);
+
+	if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR)
+		ibmvnic_reset(adapter, VNIC_RESET_FATAL);
 }
 
 static void handle_change_mac_rsp(union ibmvnic_crq *crq,
@@ -2897,26 +3085,6 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
 	}
 }
 
-static void ibmvnic_xport_event(struct work_struct *work)
-{
-	struct ibmvnic_adapter *adapter = container_of(work,
-						       struct ibmvnic_adapter,
-						       ibmvnic_xport);
-	struct device *dev = &adapter->vdev->dev;
-	long rc;
-
-	release_sub_crqs(adapter);
-	if (adapter->migrated) {
-		rc = ibmvnic_reenable_crq_queue(adapter);
-		if (rc)
-			dev_err(dev, "Error after enable rc=%ld\n", rc);
-		adapter->migrated = false;
-		rc = ibmvnic_send_crq_init(adapter);
-		if (rc)
-			dev_err(dev, "Error sending init rc=%ld\n", rc);
-	}
-}
-
 static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			       struct ibmvnic_adapter *adapter)
 {
@@ -2934,12 +3102,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 		switch (gen_crq->cmd) {
 		case IBMVNIC_CRQ_INIT:
 			dev_info(dev, "Partner initialized\n");
-			/* Send back a response */
-			rc = ibmvnic_send_crq_init_complete(adapter);
-			if (!rc)
-				schedule_work(&adapter->vnic_crq_init);
-			else
-				dev_err(dev, "Can't send initrsp rc=%ld\n", rc);
 			break;
 		case IBMVNIC_CRQ_INIT_COMPLETE:
 			dev_info(dev, "Partner initialization complete\n");
@@ -2950,19 +3112,18 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 		}
 		return;
 	case IBMVNIC_CRQ_XPORT_EVENT:
+		netif_carrier_off(netdev);
 		if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
-			dev_info(dev, "Re-enabling adapter\n");
-			adapter->migrated = true;
-			schedule_work(&adapter->ibmvnic_xport);
+			dev_info(dev, "Migrated, re-enabling adapter\n");
+			ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
 		} else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
 			dev_info(dev, "Backing device failover detected\n");
-			netif_carrier_off(netdev);
-			adapter->failover = true;
+			ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
 		} else {
 			/* The adapter lost the connection */
 			dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
 				gen_crq->cmd);
-			schedule_work(&adapter->ibmvnic_xport);
+			ibmvnic_reset(adapter, VNIC_RESET_FATAL);
 		}
 		return;
 	case IBMVNIC_CRQ_CMD_RSP:
@@ -3243,64 +3404,6 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter)
 	return retrc;
 }
 
-static void handle_crq_init_rsp(struct work_struct *work)
-{
-	struct ibmvnic_adapter *adapter = container_of(work,
-						       struct ibmvnic_adapter,
-						       vnic_crq_init);
-	struct device *dev = &adapter->vdev->dev;
-	struct net_device *netdev = adapter->netdev;
-	unsigned long timeout = msecs_to_jiffies(30000);
-	bool restart = false;
-	int rc;
-
-	if (adapter->failover) {
-		release_sub_crqs(adapter);
-		if (netif_running(netdev)) {
-			netif_tx_disable(netdev);
-			ibmvnic_close(netdev);
-			restart = true;
-		}
-	}
-
-	reinit_completion(&adapter->init_done);
-	send_version_xchg(adapter);
-	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
-		dev_err(dev, "Passive init timeout\n");
-		goto task_failed;
-	}
-
-	netdev->mtu = adapter->req_mtu - ETH_HLEN;
-
-	if (adapter->failover) {
-		adapter->failover = false;
-		if (restart) {
-			rc = ibmvnic_open(netdev);
-			if (rc)
-				goto restart_failed;
-		}
-		netif_carrier_on(netdev);
-		return;
-	}
-
-	rc = register_netdev(netdev);
-	if (rc) {
-		dev_err(dev,
-			"failed to register netdev rc=%d\n", rc);
-		goto register_failed;
-	}
-	dev_info(dev, "ibmvnic registered\n");
-
-	return;
-
-restart_failed:
-	dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc);
-register_failed:
-	release_sub_crqs(adapter);
-task_failed:
-	dev_err(dev, "Passive initialization was not successful\n");
-}
-
 static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -3359,7 +3462,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	dev_set_drvdata(&dev->dev, netdev);
 	adapter->vdev = dev;
 	adapter->netdev = netdev;
-	adapter->failover = false;
 
 	ether_addr_copy(adapter->mac_addr, mac_addr_p);
 	ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
@@ -3368,14 +3470,17 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	netdev->ethtool_ops = &ibmvnic_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
-	INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp);
-	INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event);
-
 	spin_lock_init(&adapter->stats_lock);
 
 	INIT_LIST_HEAD(&adapter->errors);
 	spin_lock_init(&adapter->error_list_lock);
 
+	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
+	INIT_LIST_HEAD(&adapter->rwi_list);
+	mutex_init(&adapter->reset_lock);
+	mutex_init(&adapter->rwi_lock);
+	adapter->resetting = false;
+
 	rc = ibmvnic_init(adapter);
 	if (rc) {
 		free_netdev(netdev);
@@ -3403,6 +3508,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
 
 	adapter->state = VNIC_REMOVING;
 	unregister_netdev(netdev);
+	mutex_lock(&adapter->reset_lock);
 
 	release_resources(adapter);
 	release_sub_crqs(adapter);
@@ -3410,6 +3516,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
 
 	adapter->state = VNIC_REMOVED;
 
+	mutex_unlock(&adapter->reset_lock);
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 03a866f..4702b48 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -922,6 +922,16 @@ enum vnic_state {VNIC_PROBING = 1,
 		 VNIC_REMOVING,
 		 VNIC_REMOVED};
 
+enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
+			   VNIC_RESET_MOBILITY,
+			   VNIC_RESET_FATAL,
+			   VNIC_RESET_TIMEOUT};
+
+struct ibmvnic_rwi {
+	enum ibmvnic_reset_reason reset_reason;
+	struct list_head list;
+};
+
 struct ibmvnic_adapter {
 	struct vio_dev *vdev;
 	struct net_device *netdev;
@@ -931,7 +941,6 @@ struct ibmvnic_adapter {
 	dma_addr_t ip_offload_tok;
 	struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl;
 	dma_addr_t ip_offload_ctrl_tok;
-	bool migrated;
 	u32 msg_enable;
 
 	/* Statistics */
@@ -1015,9 +1024,11 @@ struct ibmvnic_adapter {
 	__be64 tx_rx_desc_req;
 	u8 map_id;
 
-	struct work_struct vnic_crq_init;
-	struct work_struct ibmvnic_xport;
 	struct tasklet_struct tasklet;
-	bool failover;
 	enum vnic_state state;
+	enum ibmvnic_reset_reason reset_reason;
+	struct mutex reset_lock, rwi_lock;
+	struct list_head rwi_list;
+	struct work_struct ibmvnic_reset;
+	bool resetting;
 };

^ permalink raw reply related

* [PATCH net-next 04/11] ibmvnic: Delete napi's when releasing driver resources
From: Nathan Fontenot @ 2017-04-29  8:15 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

The napi structs allocated at drivier initializatio need to be
free'ed when releasing the drivers resources.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |    9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b3d9b28..9f2686d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -552,11 +552,20 @@ static int ibmvnic_login(struct net_device *netdev)
 
 static void release_resources(struct ibmvnic_adapter *adapter)
 {
+	int i;
+
 	release_tx_pools(adapter);
 	release_rx_pools(adapter);
 
 	release_stats_token(adapter);
 	release_error_buffers(adapter);
+
+	if (adapter->napi) {
+		for (i = 0; i < adapter->req_rx_queues; i++) {
+			if (&adapter->napi[i])
+				netif_napi_del(&adapter->napi[i]);
+		}
+	}
 }
 
 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)

^ permalink raw reply related

* [PATCH net-next 05/11] ibmvnic: Whitespace correction in release_rx_pools
From: Nathan Fontenot @ 2017-04-29  8:15 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9f2686d..d20d884 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -348,7 +348,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 		free_long_term_buff(adapter, &rx_pool->long_term_buff);
 
 		if (!rx_pool->rx_buff)
-		continue;
+			continue;
 
 		for (j = 0; j < rx_pool->size; j++) {
 			if (rx_pool->rx_buff[j].skb) {

^ permalink raw reply related

* [PATCH net-next 06/11] ibmvnic: Clean up tx pools when closing
From: Nathan Fontenot @ 2017-04-29  8:16 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

When closing the bimvnic driver, most notably during the reset
path, the tx pools need to be cleaned to ensure there are no
hanging skbs that need to be free'ed.

The need to do this was found during debugging a loss of network
traffic after handling a driver reset. The underlying cause was
some skbs that were in the tx pool that were never free'ed. As a
result the upper network layers never tried a re-send since it
believed the driver still had the skb.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index d20d884..0400ae7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -760,6 +760,34 @@ static void disable_sub_crqs(struct ibmvnic_adapter *adapter)
 	}
 }
 
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
+	struct ibmvnic_tx_pool *tx_pool;
+	u64 tx_entries;
+	int tx_scrqs;
+	int i, j;
+
+	if (!adapter->tx_pool)
+		return;
+
+	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
+	tx_entries = adapter->req_tx_entries_per_subcrq;
+
+	/* Free any remaining skbs in the tx buffer pools */
+	for (i = 0; i < tx_scrqs; i++) {
+		tx_pool = &adapter->tx_pool[i];
+		if (!tx_pool)
+			continue;
+
+		for (j = 0; j < tx_entries; j++) {
+			if (tx_pool->tx_buff[j].skb) {
+				dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
+				tx_pool->tx_buff[j].skb = NULL;
+			}
+		}
+	}
+}
+
 static int __ibmvnic_close(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -768,6 +796,8 @@ static int __ibmvnic_close(struct net_device *netdev)
 
 	adapter->state = VNIC_CLOSING;
 	netif_tx_stop_all_queues(netdev);
+
+	clean_tx_pools(adapter);
 	disable_sub_crqs(adapter);
 
 	if (adapter->napi) {

^ permalink raw reply related

* [PATCH net-next 08/11] ibmvnic: Check for driver reset first in ibmvnic_xmit
From: Nathan Fontenot @ 2017-04-29  8:16 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

Move the check for the driver resetting to the first thing
in ibmvnic_xmit().

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4da7080..2cf1b64 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -985,12 +985,6 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int index = 0;
 	int ret = 0;
 
-	tx_pool = &adapter->tx_pool[queue_num];
-	tx_scrq = adapter->tx_scrq[queue_num];
-	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
-	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
-				   be32_to_cpu(adapter->login_rsp_buf->
-					       off_txsubm_subcrqs));
 	if (adapter->resetting) {
 		if (!netif_subqueue_stopped(netdev, skb))
 			netif_stop_subqueue(netdev, queue_num);
@@ -1002,6 +996,12 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
+	tx_pool = &adapter->tx_pool[queue_num];
+	tx_scrq = adapter->tx_scrq[queue_num];
+	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
+	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+		be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
+
 	index = tx_pool->free_map[tx_pool->consumer_index];
 	offset = index * adapter->req_mtu;
 	dst = tx_pool->long_term_buff.buff + offset;

^ permalink raw reply related

* [PATCH net-next 07/11] ibmvnic: Wait for any pending scrqs entries at driver close
From: Nathan Fontenot @ 2017-04-29  8:16 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

When closing the ibmvnic driver we need to wait for any pending
sub crq entries to ensure they are handled.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |   47 +++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0400ae7..4da7080 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -743,23 +743,6 @@ static int ibmvnic_open(struct net_device *netdev)
 	return rc;
 }
 
-static void disable_sub_crqs(struct ibmvnic_adapter *adapter)
-{
-	int i;
-
-	if (adapter->tx_scrq) {
-		for (i = 0; i < adapter->req_tx_queues; i++)
-			if (adapter->tx_scrq[i])
-				disable_irq(adapter->tx_scrq[i]->irq);
-	}
-
-	if (adapter->rx_scrq) {
-		for (i = 0; i < adapter->req_rx_queues; i++)
-			if (adapter->rx_scrq[i])
-				disable_irq(adapter->rx_scrq[i]->irq);
-	}
-}
-
 static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_tx_pool *tx_pool;
@@ -797,15 +780,39 @@ static int __ibmvnic_close(struct net_device *netdev)
 	adapter->state = VNIC_CLOSING;
 	netif_tx_stop_all_queues(netdev);
 
-	clean_tx_pools(adapter);
-	disable_sub_crqs(adapter);
-
 	if (adapter->napi) {
 		for (i = 0; i < adapter->req_rx_queues; i++)
 			napi_disable(&adapter->napi[i]);
 	}
 
+	clean_tx_pools(adapter);
+
+	if (adapter->tx_scrq) {
+		for (i = 0; i < adapter->req_tx_queues; i++)
+			if (adapter->tx_scrq[i]->irq)
+				disable_irq(adapter->tx_scrq[i]->irq);
+	}
+
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+	if (rc)
+		return rc;
+
+	if (adapter->rx_scrq) {
+		for (i = 0; i < adapter->req_rx_queues; i++) {
+			int retries = 10;
+
+			while (pending_scrq(adapter, adapter->rx_scrq[i])) {
+				retries--;
+				mdelay(100);
+
+				if (retries == 0)
+					break;
+			}
+	
+			if (adapter->rx_scrq[i]->irq)
+				disable_irq(adapter->rx_scrq[i]->irq);
+		}
+	}
 
 	adapter->state = VNIC_CLOSED;
 	return rc;

^ permalink raw reply related

* [PATCH net-next 09/11] ibmvnic: Continue skb processing after skb completion error
From: Nathan Fontenot @ 2017-04-29  8:16 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

There is not a need to stop processing skbs if we encounter a
skb that has a receive completion error.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2cf1b64..4b2e128 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1404,7 +1404,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
 			/* free the entry */
 			next->rx_comp.first = 0;
 			remove_buff_from_pool(adapter, rx_buff);
-			break;
+			continue;
 		}
 
 		length = be32_to_cpu(next->rx_comp.len);

^ permalink raw reply related

* [PATCH net-next 10/11] From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
From: Nathan Fontenot @ 2017-04-29  8:16 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>

ibmvnic: Record SKB RX queue during poll

Map each RX SKB to the RX queue associated with the driver's RX SCRQ.
This should improve the RX CPU load balancing issues seen by the
performance team.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4b2e128..4a2b99f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1428,6 +1428,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
 
 		skb_put(skb, length);
 		skb->protocol = eth_type_trans(skb, netdev);
+		skb_record_rx_queue(skb, scrq_num);
 
 		if (flags & IBMVNIC_IP_CHKSUM_GOOD &&
 		    flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) {

^ permalink raw reply related

* [PATCH net-next 11/11] ibmvnic: Move queue restart
From: Nathan Fontenot @ 2017-04-29  8:16 UTC (permalink / raw)
  To: netdev; +Cc: brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429080710.13438.39798.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>


---
 drivers/net/ethernet/ibm/ibmvnic.c |   23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4a2b99f..12536ba 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1776,6 +1776,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_tx_buff *txbuff;
 	union sub_crq *next;
+	struct sk_buff *skb;
 	int index;
 	int i, j;
 	u8 first;
@@ -1808,20 +1809,10 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 						 DMA_TO_DEVICE);
 			}
 
+			skb = txbuff->skb;
 			if (txbuff->last_frag) {
-				if (atomic_sub_return(next->tx_comp.num_comps,
-						      &scrq->used) <=
-				    (adapter->req_tx_entries_per_subcrq / 2) &&
-				    netif_subqueue_stopped(adapter->netdev,
-							   txbuff->skb)) {
-					netif_wake_subqueue(adapter->netdev,
-							    scrq->pool_index);
-					netdev_dbg(adapter->netdev,
-						   "Started queue %d\n",
-						   scrq->pool_index);
-				}
-
 				dev_kfree_skb_any(txbuff->skb);
+				txbuff->skb = NULL;
 			}
 
 			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
@@ -1832,6 +1823,14 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 		}
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
+				
+		if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+		    (adapter->req_tx_entries_per_subcrq / 2) &&
+		    netif_subqueue_stopped(adapter->netdev, skb)) {
+			netif_wake_subqueue(adapter->netdev, scrq->pool_index);
+			netdev_info(adapter->netdev, "Started queue %d\n",
+				    scrq->pool_index);
+		}
 	}
 
 	enable_scrq_irq(adapter, scrq);

^ permalink raw reply related

* Re: [net-next PATCH V1] samples/bpf: bpf_load.c detect and abort if ELF maps section size is wrong
From: Jesper Dangaard Brouer @ 2017-04-29  7:57 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: kafai, netdev, eric, Daniel Borkmann, brouer
In-Reply-To: <20170429033519.2iyv7zjm7z43liuw@ast-mbp>

On Fri, 28 Apr 2017 20:35:21 -0700
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Fri, Apr 28, 2017 at 04:25:04PM +0200, Jesper Dangaard Brouer wrote:
> > The struct bpf_map_def was extended in commit fb30d4b71214 ("bpf: Add tests
> > for map-in-map") with member unsigned int inner_map_idx.  This changed the size
> > of the maps section in the generated ELF _kern.o files.
> > 
> > Unfortunately the loader in bpf_load.c does not detect or handle this.  Thus,
> > older _kern.o files became incompatible, and caused hard-to-debug errors
> > where the syscall validation rejected BPF_MAP_CREATE request.
> > 
> > This patch only detect the situation and aborts load_bpf_file(). It also
> > add code comments warning people that read this loader for inspiration
> > for these pitfalls.
> > 
> > Fixes: fb30d4b71214 ("bpf: Add tests for map-in-map")
> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>  
> 
> Thanks!
> Acked-by: Alexei Starovoitov <ast@kernel.org>

Thanks!

> > Is it worth to implement proper backward-compat loading of older ELF objects
> > with this bpf-loader?  
> 
> probably yes, since it looks like a bunch of code in samples/bpf/ still
> depend on it and some features are missing in tools/lib/bpf,
> so unless we actively work on improving libbpf.a 
> we won't be able to get rid of this 'sample' loader for some time.

Okay, I'll work on that next week. Hoping I can make the merge window,
so we avoid having a non-backward compat bpf_load in a kernel release.

p.s. I'll be presenting about XDP in Sweden Thur+Friday next week:
 https://lundlinuxcon.org/?page=current

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH v2] iov_iter: don't revert iov buffer if csum error
From: Ding Tianhong @ 2017-04-29  9:37 UTC (permalink / raw)
  To: Al Viro
  Cc: David Miller, pabeni, edumazet, hannes, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, LinuxArm, weiyongjun (A)
In-Reply-To: <20170429024647.GO29622@ZenIV.linux.org.uk>



On 2017/4/29 10:46, Al Viro wrote:
> On Sat, Apr 29, 2017 at 10:38:48AM +0800, Ding Tianhong wrote:
>> The patch 327868212381 (make skb_copy_datagram_msg() et.al. preserve
>> ->msg_iter on error) will revert the iov buffer if copy to iter
>> failed, but it didn't copy any datagram if the skb_checksum_complete
>> error, so no need to revert any data at this place.
> 
> The bug is real, but I would suggest a simpler fix:
>                 if (__skb_checksum_complete(skb))
>                         return -EINVAL;
> leaving the rest as-is.
> 
Looks good, if so, we don't need the csum_error any more,

-		if (csum_fold(csum))
+
+		if (csum_fold(csum)) {
+			iov_iter_revert(&msg->msg_iter, chunk);
+ 			return -EINVAL;
+		}
+
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
 	}
 	return 0;
- csum_error:
-	iov_iter_revert(&msg->msg_iter, chunk);
- 	return -EINVAL;
 fault:
 	return -EFAULT;

DO you agree this way? :)

Thanks
Ding

> .
> 

^ permalink raw reply

* URGENT
From: KATIE HIGGINS @ 2017-04-29 11:12 UTC (permalink / raw)


Ahoj zlato, Jmenuji se Katie Higginsová, Z Spojené státy americké, mám
o tebe zájem, představím si mé lépe, jakmile obdržím svou poštu,
doufám, že slyším od vás. Díky a polibkům. Katie Higginsová.

^ permalink raw reply

* 54179 netdev
From: kindergartenchaos2 @ 2017-04-29 12:02 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: 93791932238.zip --]
[-- Type: application/zip, Size: 2992 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] lwtunnel: fix error path in lwtunnel_fill_encap()
From: David Ahern @ 2017-04-29 13:03 UTC (permalink / raw)
  To: Dan Carpenter, David S. Miller, Pan Bian
  Cc: Roopa Prabhu, Robert Shearman, Tom Herbert, David Lebrun, netdev,
	kernel-janitors
In-Reply-To: <20170428130347.53hagk77wh6scmcs@mwanda>

On 4/28/17 7:03 AM, Dan Carpenter wrote:
> We recently added a check to see if nla_nest_start() fails.  There are
> two issues with that.  First, if it fails then I don't think we should
> call nla_nest_cancel().  Second, it's slightly convoluted but the
> current code returns success but we should return -EMSGSIZE instead.
> 
> Fixes: a50fe0ffd76f ("lwtunnel: check return value of nla_nest_start")
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
> 

Acked-by: David Ahern <dsa@cumulusnetworks.com>

^ permalink raw reply

* (unknown), 
From: Dmitry Bazhenov @ 2017-04-29 15:25 UTC (permalink / raw)
  To: netdev

unsubscribe

^ permalink raw reply

* Re: [PATCH net-next 11/11] ibmvnic: Move queue restart
From: kbuild test robot @ 2017-04-29 17:18 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: kbuild-all, netdev, brking, jallen, muvic, tlfalcon
In-Reply-To: <20170429081634.13438.38831.stgit@ltcalpine2-lp23.aus.stglabs.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 4324 bytes --]

Hi Nathan,

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Nathan-Fontenot/ibmvnic-Updated-reset-handler-and-code-fixes/20170430-004245
config: powerpc-allmodconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=powerpc 

Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings

All warnings (new ones prefixed by >>):

   In file included from include/linux/if_ether.h:23:0,
                    from include/uapi/linux/ethtool.h:18,
                    from include/linux/ethtool.h:17,
                    from include/linux/netdevice.h:42,
                    from drivers/net//ethernet/ibm/ibmvnic.c:54:
   drivers/net//ethernet/ibm/ibmvnic.c: In function 'ibmvnic_interrupt_tx':
>> include/linux/skbuff.h:3689:12: warning: 'skb' may be used uninitialized in this function [-Wmaybe-uninitialized]
     return skb->queue_mapping;
            ~~~^~~~~~~~~~~~~~~
   drivers/net//ethernet/ibm/ibmvnic.c:1779:18: note: 'skb' was declared here
     struct sk_buff *skb;
                     ^~~
--
   In file included from include/linux/if_ether.h:23:0,
                    from include/uapi/linux/ethtool.h:18,
                    from include/linux/ethtool.h:17,
                    from include/linux/netdevice.h:42,
                    from drivers/net/ethernet/ibm/ibmvnic.c:54:
   drivers/net/ethernet/ibm/ibmvnic.c: In function 'ibmvnic_interrupt_tx':
>> include/linux/skbuff.h:3689:12: warning: 'skb' may be used uninitialized in this function [-Wmaybe-uninitialized]
     return skb->queue_mapping;
            ~~~^~~~~~~~~~~~~~~
   drivers/net/ethernet/ibm/ibmvnic.c:1779:18: note: 'skb' was declared here
     struct sk_buff *skb;
                     ^~~

vim +/skb +3689 include/linux/skbuff.h

574f7194 Eric W. Biederman     2014-04-01  3673  	return !skb->destructor &&
574f7194 Eric W. Biederman     2014-04-01  3674  #if IS_ENABLED(CONFIG_XFRM)
574f7194 Eric W. Biederman     2014-04-01  3675  		!skb->sp &&
574f7194 Eric W. Biederman     2014-04-01  3676  #endif
cb9c6836 Florian Westphal      2017-01-23  3677  		!skb_nfct(skb) &&
574f7194 Eric W. Biederman     2014-04-01  3678  		!skb->_skb_refdst &&
574f7194 Eric W. Biederman     2014-04-01  3679  		!skb_has_frag_list(skb);
574f7194 Eric W. Biederman     2014-04-01  3680  }
574f7194 Eric W. Biederman     2014-04-01  3681  
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3682  static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3683  {
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3684  	skb->queue_mapping = queue_mapping;
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3685  }
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3686  
9247744e Stephen Hemminger     2009-03-21  3687  static inline u16 skb_get_queue_mapping(const struct sk_buff *skb)
4e3ab47a Pavel Emelyanov       2007-10-21  3688  {
4e3ab47a Pavel Emelyanov       2007-10-21 @3689  	return skb->queue_mapping;
4e3ab47a Pavel Emelyanov       2007-10-21  3690  }
4e3ab47a Pavel Emelyanov       2007-10-21  3691  
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3692  static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3693  {
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3694  	to->queue_mapping = from->queue_mapping;
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3695  }
f25f4e44 Peter P Waskiewicz Jr 2007-07-06  3696  
d5a9e24a David S. Miller       2009-01-27  3697  static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue)

:::::: The code at line 3689 was first introduced by commit
:::::: 4e3ab47a547616e583c7a5458beced6aa34c8ef3 [NET]: Make and use skb_get_queue_mapping

:::::: TO: Pavel Emelyanov <xemul@openvz.org>
:::::: CC: David S. Miller <davem@sunset.davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 52793 bytes --]

^ permalink raw reply

* Re: assembler mnenomics for call/tailcall plus maps...
From: David Miller @ 2017-04-29 18:38 UTC (permalink / raw)
  To: ast; +Cc: daniel, netdev, xdp-newbies
In-Reply-To: <398986a4-ef4b-6631-c6eb-96505e0a5849@fb.com>

From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 27 Apr 2017 19:06:14 -0700

> So in asm the map lookup will look like:
> 	.section        maps,"aw",@progbits
>         .globl  hashmap_def
> hashmap_def:
>         .long   1  # type
>         .long   24 # key_size
>         .long   40 # value_size
>         .long   256 # max_entries
> 
>         .text
>         .section        xdp_tx_iptunnel,"ax",@progbits
>         .globl  _xdp_prog
> _xdp_prog:
>          ldimm64 r1, hashmap_def
>          mov r2, r10
>          add r2, -8
>          call bpf_map_lookup_elem
> 
> this is 64-bit relo for ldimm64 insn
> 
> This is how it's defined in llvm:
> ELF_RELOC(R_BPF_NONE,        0)
> ELF_RELOC(R_BPF_64_64,       1)
> ELF_RELOC(R_BPF_64_32,      10)
> 
> The R_BPF_64_64 is the only relocation against .text
> The other one is used for relo into dwarf sections.

There is another missing element here, we don't use the relocation to
write the actual address of "&hashmap_def" into the ldimm64
instruction.

Instead, we use the relocation to write the file descriptor number for
that map into the instruction.  The only parts of the relocation that
are used are the offset (to find the BPF instruction) and the symbol
reference (to find out which map we are referencing).

The BPF loader also doesn't even check the relocation number to make
sure it's of the correct type.  It just assumes that any relocation it
sees is exactly this kind used for maps.

My relocations in binutils currently look like this:

START_RELOC_NUMBERS (elf_bpf_reloc_type)
  RELOC_NUMBER (R_BPF_NONE, 0)
  RELOC_NUMBER (R_BPF_INSN_16, 1)
  RELOC_NUMBER (R_BPF_INSN_32, 2)
  RELOC_NUMBER (R_BPF_INSN_64, 3)
  RELOC_NUMBER (R_BPF_WDISP16, 4)
  RELOC_NUMBER (R_BPF_DATA_8,  5)
  RELOC_NUMBER (R_BPF_DATA_16, 6)
  RELOC_NUMBER (R_BPF_DATA_32, 7)
  RELOC_NUMBER (R_BPF_DATA_64, 8)
END_RELOC_NUMBERS (R_BPF_max)

There is of course R_BPF_NONE, and then we have 4 relocations for
instructions.  One for the 16-bit offset field (non-pc-relative), one
for the 32-bit immediate field (again, non-pc-relative), one for
ldimm64's immediate field, and finally R_BPF_WDISP16 for doing a
pc-relative relocation to the offset field of a jump instruction.

Then we have the usual data relocations, for 8, 16, 32, and 64-bit.

I guess LLVM's R_BPF_64_64 is the one used for the ldimm64 reference
to a map, and is equivalent to R_BPF_INSN_64 above.

We just need to sort out how we want this semantically to interconnect
etc.

In binutils we can make the MAP reference special and explicit, so we
would for example add:

	RELOC_NUMBER (R_BPF_MAP, 9)

or whatever.  And then for assembler syntax, use something like:

	%map(SYMBOL)

So you would go:

	ldimm64	r1, %map(hash_map)

or, taking it one step further, do the following since we know this
maps to a 32-bit FD:

	mov32	r1, %map(hash_map)

and this way avoid eating up a 16-byte opcode just for this.

In GCC it will be simple to get the backend to emit this, various
options exist.  We can make it a special "__attribute__((map))", or
use address spaces to annotate the map object.  And then when the
ldimm64 or whatever instruction is emitted, and it sees the symbol
referenced has this special type, it will emit "%%map(%s)" instead of
just "%s" for the symbol name in the asembler output.

That in turn will lead to the correct relocation.

But I guess for now what I could do is just make R_BPF_INSN_64 have
the same number as LLVM's R_BPF_64_64 and it should "just work" using
tooling.

I think we should spend serious time properly designing the
relocations and thinking ahead about people perhaps wanting to link
multiple objects together, call functions in other objects, and
perhaps even doing dynamic relocations.  Nothing fundamentally in
eBPF prevents this.

^ permalink raw reply

* Re: [PATCH RFC net-next v4 4/7] net: use skb->csum_not_inet to identify packets needing crc32c
From: Tom Herbert @ 2017-04-29 20:18 UTC (permalink / raw)
  To: Davide Caratti
  Cc: Alexander Duyck, David Laight, David S . Miller,
	Marcelo Ricardo Leitner, Linux Kernel Network Developers,
	linux-sctp @ vger . kernel . org
In-Reply-To: <4f3a01b66bc46d3a2169a8ef2b4d4285e03f5894.1492692976.git.dcaratti@redhat.com>

On Thu, Apr 20, 2017 at 6:38 AM, Davide Caratti <dcaratti@redhat.com> wrote:
> skb->csum_not_inet carries the indication on which algorithm is needed to
> compute checksum on skb in the transmit path, when skb->ip_summed is equal
> to CHECKSUM_PARTIAL. If skb carries a SCTP packet and crc32c hasn't been
> yet written in L4 header, skb->csum_not_inet is assigned to 1; otherwise,
> assume Internet Checksum is needed and thus set skb->csum_not_inet to 0.
>
> Suggested-by: Tom Herbert <tom@herbertland.com>
> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
> ---
>  include/linux/skbuff.h | 16 +++++++++-------
>  net/core/dev.c         |  1 +
>  net/sched/act_csum.c   |  1 +
>  net/sctp/offload.c     |  1 +
>  net/sctp/output.c      |  1 +
>  5 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 927309e..419f4c8 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -189,12 +189,13 @@
>   *
>   *   NETIF_F_SCTP_CRC - This feature indicates that a device is capable of
>   *     offloading the SCTP CRC in a packet. To perform this offload the stack
> - *     will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset
> - *     accordingly. Note the there is no indication in the skbuff that the
> - *     CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports
> - *     both IP checksum offload and SCTP CRC offload must verify which offload
> - *     is configured for a packet presumably by inspecting packet headers; in
> - *     case, skb_crc32c_csum_help is provided to compute CRC on SCTP packets.
> + *     will set set csum_start and csum_offset accordingly, set ip_summed to
> + *     CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in
> + *     the skbuff that the CHECKSUM_PARTIAL refers to CRC32c.
> + *     A driver that supports both IP checksum offload and SCTP CRC32c offload
> + *     must verify which offload is configured for a packet by testing the
> + *     value of skb->csum_not_inet; skb_crc32c_csum_help is provided to resolve
> + *     CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1.
>   *
>   *   NETIF_F_FCOE_CRC - This feature indicates that a device is capable of
>   *     offloading the FCOE CRC in a packet. To perform this offload the stack
> @@ -615,6 +616,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
>   *     @wifi_acked_valid: wifi_acked was set
>   *     @wifi_acked: whether frame was acked on wifi or not
>   *     @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
> + *     @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
>   *     @dst_pending_confirm: need to confirm neighbour
>    *    @napi_id: id of the NAPI struct this skb came from
>   *     @secmark: security marking
> @@ -743,7 +745,7 @@ struct sk_buff {
>         __u8                    csum_valid:1;
>         __u8                    csum_complete_sw:1;
>         __u8                    csum_level:2;
> -       __u8                    __csum_bad_unused:1; /* one bit hole */
> +       __u8                    csum_not_inet:1;
>
>         __u8                    dst_pending_confirm:1;
>  #ifdef CONFIG_IPV6_NDISC_NODETYPE
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 77a2d73..9f56f87 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2642,6 +2642,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
>         }
>         *(__le32 *)(skb->data + offset) = crc32c_csum;
>         skb->ip_summed = CHECKSUM_NONE;
> +       skb->csum_not_inet = 0;
>  out:
>         return ret;
>  }
> diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
> index ab6fdbd..3317a2f 100644
> --- a/net/sched/act_csum.c
> +++ b/net/sched/act_csum.c
> @@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
>         sctph->checksum = sctp_compute_cksum(skb,
>                                              skb_network_offset(skb) + ihl);
>         skb->ip_summed = CHECKSUM_NONE;
> +       skb->csum_not_inet = 0;
>
>         return 1;
>  }
> diff --git a/net/sctp/offload.c b/net/sctp/offload.c
> index 378f462..ef156ac 100644
> --- a/net/sctp/offload.c
> +++ b/net/sctp/offload.c
> @@ -35,6 +35,7 @@
>  static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
>  {
>         skb->ip_summed = CHECKSUM_NONE;
> +       skb->csum_not_inet = 0;
>         return sctp_compute_cksum(skb, skb_transport_offset(skb));
>  }
>
> diff --git a/net/sctp/output.c b/net/sctp/output.c
> index 1409a87..e2edf2e 100644
> --- a/net/sctp/output.c
> +++ b/net/sctp/output.c
> @@ -538,6 +538,7 @@ static int sctp_packet_pack(struct sctp_packet *packet,
>         } else {
>  chksum:
>                 head->ip_summed = CHECKSUM_PARTIAL;
> +               head->csum_not_inet = 1;
>                 head->csum_start = skb_transport_header(head) - head->head;
>                 head->csum_offset = offsetof(struct sctphdr, checksum);
>         }
> --
> 2.7.4
>
Looks great!

Acked-by: Tom Herbert <tom@herbertland.com>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox