Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net 1/7] net/mlx5: Fix driver load bad flow when having fw initializing timeout
From: Saeed Mahameed @ 2017-04-23 10:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Mohamad Haj Yahia, Saeed Mahameed, kernel-team
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Mohamad Haj Yahia <mohamad@mellanox.com>

If FW is stuck in initializing state we will skip the driver load, but
current error handling flow doesn't clean previously allocated command
interface resources.

Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 60154a175bd3..0ad66324247f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1029,7 +1029,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (err) {
 		dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
 			FW_INIT_TIMEOUT_MILI);
-		goto out_err;
+		goto err_cmd_cleanup;
 	}

 	err = mlx5_core_enable_hca(dev, 0);
-- 
2.11.0

^ permalink raw reply related

* [net 2/7] net/mlx5: E-Switch, Correctly deal with inline mode on ConnectX-5
From: Saeed Mahameed @ 2017-04-23 10:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Saeed Mahameed
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

On ConnectX5 the wqe inline mode is "none" and hence the FW
reports MLX5_CAP_INLINE_MODE_NOT_REQUIRED.

Fix our devlink callbacks to deal with that on get and set.

Also fix the tc flow parsing code not to fail anything when
inline isn't required.

Fixes: bffaa916588e ('net/mlx5: E-Switch, Add control for inline mode')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  3 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 36 ++++++++++++++--------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fade7233dac5..b7c99c38a7c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -639,7 +639,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
 	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
 	    rep->vport != FDB_UPLINK_VPORT) {
-		if (min_inline > esw->offloads.inline_mode) {
+		if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+		    esw->offloads.inline_mode < min_inline) {
 			netdev_warn(priv->netdev,
 				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
 				    min_inline, esw->offloads.inline_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 307ec6c5fd3b..d111cebca9f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -911,8 +911,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	int num_vports = esw->enabled_vports;
-	int err;
-	int vport;
+	int err, vport;
 	u8 mlx5_mode;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
@@ -921,9 +920,17 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+		if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+			return 0;
+		/* fall through */
+	case MLX5_CAP_INLINE_MODE_L2:
+		esw_warn(dev, "Inline mode can't be set\n");
 		return -EOPNOTSUPP;
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		break;
+	}
 
 	if (esw->offloads.num_flows > 0) {
 		esw_warn(dev, "Can't set inline mode when flows are configured\n");
@@ -966,18 +973,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-		return -EOPNOTSUPP;
-
 	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
 
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 {
+	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 	struct mlx5_core_dev *dev = esw->dev;
 	int vport;
-	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
@@ -985,10 +988,18 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
-	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
-		return -EOPNOTSUPP;
+	switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+		mlx5_mode = MLX5_INLINE_MODE_NONE;
+		goto out;
+	case MLX5_CAP_INLINE_MODE_L2:
+		mlx5_mode = MLX5_INLINE_MODE_L2;
+		goto out;
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		goto query_vports;
+	}
 
+query_vports:
 	for (vport = 1; vport <= nvfs; vport++) {
 		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
 		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
@@ -996,6 +1007,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 		prev_mlx5_mode = mlx5_mode;
 	}
 
+out:
 	*mode = mlx5_mode;
 	return 0;
 }
-- 
2.11.0

^ permalink raw reply related

* [net 5/7] net/mlx5: Fix UAR memory leak
From: Saeed Mahameed @ 2017-04-23 10:08 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Maor Gottlieb, Saeed Mahameed
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Maor Gottlieb <maorg@mellanox.com>

When UAR is released, we deallocate the device resource, but
don't unmmap the UAR mapping memory.
Fix the leak by unmapping this memory.

Fixes: a6d51b68611e9 ('net/mlx5: Introduce blue flame register allocator)
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 2e6b0f290ddc..222b25908d01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -87,6 +87,7 @@ static void up_rel_func(struct kref *kref)
 	struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
 
 	list_del(&up->list);
+	iounmap(up->map);
 	if (mlx5_cmd_free_uar(up->mdev, up->index))
 		mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
 	kfree(up->reg_bitmap);
-- 
2.11.0

^ permalink raw reply related

* [net 7/7] net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling
From: Saeed Mahameed @ 2017-04-23 10:08 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Ilan Tayari, Saeed Mahameed, kernel-team
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Ilan Tayari <ilant@mellanox.com>

Handler for ETHTOOL_GRXCLSRLALL must set info->data to the size
of the table, regardless of the amount of entries in it.
Existing code does not do that, and this breaks all usage of ethtool -N
or -n without explicit location, with this error:
rmgr: Invalid RX class rules table size: Success

Set info->data to the table size.

Tested:
ethtool -n ens8
ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1
ethtool -N ens8 flow-type ip4 src-ip 1.1.1.1 dst-ip 2.2.2.2 action 1 loc 55
ethtool -n ens8
ethtool -N ens8 delete 1023
ethtool -N ens8 delete 55

Fixes: f913a72aa008 ("net/mlx5e: Add support to get ethtool flow rules")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index d55fff0ba388..26fc77e80f7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -564,6 +564,7 @@ int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *i
 	int idx = 0;
 	int err = 0;

+	info->data = MAX_NUM_OF_ETHTOOL_RULES;
 	while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
 		err = mlx5e_ethtool_get_flow(priv, info, location);
 		if (!err)
-- 
2.11.0

^ permalink raw reply related

* [net 6/7] net/mlx5e: Fix small packet threshold
From: Saeed Mahameed @ 2017-04-23 10:08 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Eugenia Emantayev, Saeed Mahameed, kernel-team
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Eugenia Emantayev <eugenia@mellanox.com>

RX packet headers are meant to be contained in SKB linear part,
and chose a threshold of 128.
It turns out this is not enough, i.e. for IPv6 packet over VxLAN.
In this case, UDP/IPv4 needs 42 bytes, GENEVE header is 8 bytes,
and 86 bytes for TCP/IPv6. In total 136 bytes that is more than
current 128 bytes. In this case expand header flow is reached.
The warning in skb_try_coalesce() caused by a wrong truesize
was already fixed here:
commit 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()").
Still, we prefer to totally avoid the expand header flow for performance reasons.
Tested regular TCP_STREAM with iperf for 1 and 8 streams, no degradation was found.

Fixes: 461017cb006a ("net/mlx5e: Support RX multi-packet WQE (Striding RQ)")
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index dc52053128bc..3d9490cd2db1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -90,7 +90,7 @@
 #define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)

 #define MLX5_UMR_ALIGN				(2048)
-#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(128)
+#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(256)

 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
-- 
2.11.0

^ permalink raw reply related

* [pull request][net 0/7] Mellanox, mlx5 fixes 2017-04-22
From: Saeed Mahameed @ 2017-04-23 10:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Saeed Mahameed

Hi Dave,

This series contains some mlx5 fixes for net.

For your convenience, the series doesn't introduce any conflict with
the ongoing net-next pull request.

Please pull and let me know if there's any problem.

For -stable:
("net/mlx5: E-Switch, Correctly deal with inline mode on ConnectX-5") kernels >= 4.10
("net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling") kernels >= 4.8
("net/mlx5e: Fix small packet threshold")       kernels >= 4.7
("net/mlx5: Fix driver load bad flow when having fw initializing timeout") kernels >= 4.4

Thanks,
Saeed.

The following changes since commit 94836ecf1e7378b64d37624fbb81fe48fbd4c772:

  Merge tag 'nfsd-4.11-2' of git://linux-nfs.org/~bfields/linux (2017-04-21 16:37:48 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-fixes-2017-04-22

for you to fetch changes up to 5e82c9e4ed60beba83f46a1a5a8307b99a23e982:

  net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling (2017-04-22 21:52:37 +0300)

----------------------------------------------------------------
mlx5-fixes-2017-04-22

----------------------------------------------------------------
Eugenia Emantayev (1):
      net/mlx5e: Fix small packet threshold

Ilan Tayari (1):
      net/mlx5e: Fix ETHTOOL_GRXCLSRLALL handling

Maor Gottlieb (1):
      net/mlx5: Fix UAR memory leak

Mohamad Haj Yahia (1):
      net/mlx5: Fix driver load bad flow when having fw initializing timeout

Or Gerlitz (3):
      net/mlx5: E-Switch, Correctly deal with inline mode on ConnectX-5
      net/mlx5e: Make sure the FW max encap size is enough for ipv4 tunnels
      net/mlx5e: Make sure the FW max encap size is enough for ipv6 tunnels

 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  2 +-
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 87 ++++++++++++----------
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 36 ++++++---
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/uar.c      |  1 +
 6 files changed, 76 insertions(+), 53 deletions(-)

^ permalink raw reply

* [net 3/7] net/mlx5e: Make sure the FW max encap size is enough for ipv4 tunnels
From: Saeed Mahameed @ 2017-04-23 10:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Saeed Mahameed
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

Otherwise the code that fills the ipv4 encapsulation headers could be writing
beyond the allocated headers buffer.

Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 42 ++++++++++++++-----------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b7c99c38a7c4..fc7c1d30461c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -786,16 +786,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 	return 0;
 }
 
-static int gen_vxlan_header_ipv4(struct net_device *out_dev,
-				 char buf[],
-				 unsigned char h_dest[ETH_ALEN],
-				 int ttl,
-				 __be32 daddr,
-				 __be32 saddr,
-				 __be16 udp_dst_port,
-				 __be32 vx_vni)
+static void gen_vxlan_header_ipv4(struct net_device *out_dev,
+				  char buf[], int encap_size,
+				  unsigned char h_dest[ETH_ALEN],
+				  int ttl,
+				  __be32 daddr,
+				  __be32 saddr,
+				  __be16 udp_dst_port,
+				  __be32 vx_vni)
 {
-	int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
 	struct ethhdr *eth = (struct ethhdr *)buf;
 	struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
 	struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
@@ -818,8 +817,6 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev,
 	udp->dest = udp_dst_port;
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-	return encap_size;
 }
 
 static int gen_vxlan_header_ipv6(struct net_device *out_dev,
@@ -863,13 +860,20 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 					  struct net_device **out_dev)
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
 	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	int encap_size, ttl, err;
 	struct neighbour *n = NULL;
 	struct flowi4 fl4 = {};
 	char *encap_header;
+	int ttl, err;
 
-	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	if (max_encap_size < ipv4_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv4_encap_size, max_encap_size);
+		return -EOPNOTSUPP;
+	}
+
+	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
 	if (!encap_header)
 		return -ENOMEM;
 
@@ -904,11 +908,11 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 
 	switch (e->tunnel_type) {
 	case MLX5_HEADER_TYPE_VXLAN:
-		encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
-						   e->h_dest, ttl,
-						   fl4.daddr,
-						   fl4.saddr, tun_key->tp_dst,
-						   tunnel_id_to_key32(tun_key->tun_id));
+		gen_vxlan_header_ipv4(*out_dev, encap_header,
+				      ipv4_encap_size, e->h_dest, ttl,
+				      fl4.daddr,
+				      fl4.saddr, tun_key->tp_dst,
+				      tunnel_id_to_key32(tun_key->tun_id));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -916,7 +920,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       encap_size, encap_header, &e->encap_id);
+			       ipv4_encap_size, encap_header, &e->encap_id);
 out:
 	if (err && n)
 		neigh_release(n);
-- 
2.11.0

^ permalink raw reply related

* [net 4/7] net/mlx5e: Make sure the FW max encap size is enough for ipv6 tunnels
From: Saeed Mahameed @ 2017-04-23 10:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Or Gerlitz, Saeed Mahameed
In-Reply-To: <20170423100802.27630-1-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

Otherwise the code that fills the ipv6 encapsulation headers could be writing
beyond the allocated headers buffer.

Fixes: ce99f6b97fcd ('net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 42 ++++++++++++++-----------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fc7c1d30461c..5436866798f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -819,16 +819,15 @@ static void gen_vxlan_header_ipv4(struct net_device *out_dev,
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
 }
 
-static int gen_vxlan_header_ipv6(struct net_device *out_dev,
-				 char buf[],
-				 unsigned char h_dest[ETH_ALEN],
-				 int ttl,
-				 struct in6_addr *daddr,
-				 struct in6_addr *saddr,
-				 __be16 udp_dst_port,
-				 __be32 vx_vni)
+static void gen_vxlan_header_ipv6(struct net_device *out_dev,
+				  char buf[], int encap_size,
+				  unsigned char h_dest[ETH_ALEN],
+				  int ttl,
+				  struct in6_addr *daddr,
+				  struct in6_addr *saddr,
+				  __be16 udp_dst_port,
+				  __be32 vx_vni)
 {
-	int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN;
 	struct ethhdr *eth = (struct ethhdr *)buf;
 	struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
 	struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
@@ -850,8 +849,6 @@ static int gen_vxlan_header_ipv6(struct net_device *out_dev,
 	udp->dest = udp_dst_port;
 	vxh->vx_flags = VXLAN_HF_VNI;
 	vxh->vx_vni = vxlan_vni_field(vx_vni);
-
-	return encap_size;
 }
 
 static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
@@ -935,13 +932,20 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 
 {
 	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
 	struct ip_tunnel_key *tun_key = &e->tun_info.key;
-	int encap_size, err, ttl = 0;
 	struct neighbour *n = NULL;
 	struct flowi6 fl6 = {};
 	char *encap_header;
+	int err, ttl = 0;
 
-	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	if (max_encap_size < ipv6_encap_size) {
+		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+			       ipv6_encap_size, max_encap_size);
+		return -EOPNOTSUPP;
+	}
+
+	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
 	if (!encap_header)
 		return -ENOMEM;
 
@@ -977,11 +981,11 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 
 	switch (e->tunnel_type) {
 	case MLX5_HEADER_TYPE_VXLAN:
-		encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header,
-						   e->h_dest, ttl,
-						   &fl6.daddr,
-						   &fl6.saddr, tun_key->tp_dst,
-						   tunnel_id_to_key32(tun_key->tun_id));
+		gen_vxlan_header_ipv6(*out_dev, encap_header,
+				      ipv6_encap_size, e->h_dest, ttl,
+				      &fl6.daddr,
+				      &fl6.saddr, tun_key->tp_dst,
+				      tunnel_id_to_key32(tun_key->tun_id));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -989,7 +993,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 	}
 
 	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       encap_size, encap_header, &e->encap_id);
+			       ipv6_encap_size, encap_header, &e->encap_id);
 out:
 	if (err && n)
 		neigh_release(n);
-- 
2.11.0

^ permalink raw reply related

* [PATCH 1/1] net: bcmgenet: fix incorrect return value checks
From: Pan Bian @ 2017-04-23 10:01 UTC (permalink / raw)
  To: Florian Fainelli, netdev; +Cc: linux-kernel, Pan Bian

From: Pan Bian <bianpan2016@163.com>

Function platform_get_irq() will return a negative value on errors.
However, in function bcmgenet_probe(), 0 is considered as a flag of
error. This patch fixes the bug by checking whether the return value of
platform_get_irq() is less than 0.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 365895e..0bccf2b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3329,7 +3329,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	priv->irq0 = platform_get_irq(pdev, 0);
 	priv->irq1 = platform_get_irq(pdev, 1);
 	priv->wol_irq = platform_get_irq(pdev, 2);
-	if (!priv->irq0 || !priv->irq1) {
+	if (priv->irq0 < 0 || priv->irq1 < 0) {
 		dev_err(&pdev->dev, "can't find IRQs\n");
 		err = -EINVAL;
 		goto err;
-- 
1.9.1

^ permalink raw reply related

* [PATCH 1/1] wan: pc300too: abort path on failure
From: Pan Bian @ 2017-04-23  9:38 UTC (permalink / raw)
  To: Krzysztof Halasa; +Cc: netdev, linux-kernel, Pan Bian

From: Pan Bian <bianpan2016@163.com>

In function pc300_pci_init_one(), on the ioremap error path, function
pc300_pci_remove_one() is called to free the allocated memory. However,
the path is not terminated, and the freed memory will be used later,
resulting in use-after-free bugs. This path fixes the bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 drivers/net/wan/pc300too.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c
index e1dd1ec..b9b934b 100644
--- a/drivers/net/wan/pc300too.c
+++ b/drivers/net/wan/pc300too.c
@@ -346,6 +346,7 @@ static int pc300_pci_init_one(struct pci_dev *pdev,
 	    card->rambase == NULL) {
 		pr_err("ioremap() failed\n");
 		pc300_pci_remove_one(pdev);
+		return -ENOMEM;
 	}
 
 	/* PLX PCI 9050 workaround for local configuration register read bug */
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 1/1] tipc: check return value of nlmsg_new
From: Joe Perches @ 2017-04-23  8:16 UTC (permalink / raw)
  To: PanBian; +Cc: Ying Xue, David S. Miller, netdev, tipc-discussion, linux-kernel
In-Reply-To: <20170423080027.GA843@bp>

On Sun, 2017-04-23 at 16:00 +0800, PanBian wrote:
> On Sun, Apr 23, 2017 at 12:17:16AM -0700, Joe Perches wrote:
> > On Sun, 2017-04-23 at 15:09 +0800, Pan Bian wrote:
> > > Function nlmsg_new() will return a NULL pointer if there is no enough
> > > memory, and its return value should be checked before it is used.
> > > However, in function tipc_nl_node_get_monitor(), the validation of the
> > > return value of function nlmsg_new() is missed. This patch fixes the
> > > bug.
> > 
> > Hello.
> > 
> > Thanks for the patches.
> > 
> > Are you finding these via a tool or inspection?
> > 
> > If a tool is being used, could you please describe it?
> > 
> 
> Yes. I developed a tool to find this kind of bugs.
> 
> The detecting idea is simple. In large systems like the Linux kernel, 
> most implementations are correct, and incorrect ones are rare. Based on
> this observation, we take programs that have different implementations
> with others as bugs. For example, in most cases, the return vlaue of
> nlmsg_new() is validated and it will not be passed to genlmsg_reply() if
> its value is NULL. However, in function tipc_nl_node_get_monitor(), the
> validation is missing. The abnormal behavior leads us to believe that
> there is a bug.

Perhaps adding __must_check to some of the appropriate function
declarations/prototypes would help avoid new future misuses.

^ permalink raw reply

* Re: [PATCH 1/1] tipc: check return value of nlmsg_new
From: PanBian @ 2017-04-23  8:00 UTC (permalink / raw)
  To: Joe Perches
  Cc: Ying Xue, David S. Miller, netdev, tipc-discussion, linux-kernel
In-Reply-To: <1492931836.30293.43.camel@perches.com>

On Sun, Apr 23, 2017 at 12:17:16AM -0700, Joe Perches wrote:
> On Sun, 2017-04-23 at 15:09 +0800, Pan Bian wrote:
> > Function nlmsg_new() will return a NULL pointer if there is no enough
> > memory, and its return value should be checked before it is used.
> > However, in function tipc_nl_node_get_monitor(), the validation of the
> > return value of function nlmsg_new() is missed. This patch fixes the
> > bug.
> 
> Hello.
> 
> Thanks for the patches.
> 
> Are you finding these via a tool or inspection?
> 
> If a tool is being used, could you please describe it?
> 

Yes. I developed a tool to find this kind of bugs.

The detecting idea is simple. In large systems like the Linux kernel, 
most implementations are correct, and incorrect ones are rare. Based on
this observation, we take programs that have different implementations
with others as bugs. For example, in most cases, the return vlaue of
nlmsg_new() is validated and it will not be passed to genlmsg_reply() if
its value is NULL. However, in function tipc_nl_node_get_monitor(), the
validation is missing. The abnormal behavior leads us to believe that
there is a bug.

Thanks for your attention.

^ permalink raw reply

* Re: [RFC] change the default Kconfig value of mlx5_en
From: Matan Barak @ 2017-04-23  7:54 UTC (permalink / raw)
  To: Ian Kumlien, Saeed Mahameed
  Cc: Leon Romanovsky, David Miller, Saeed Mahameed,
	Linux Kernel Network Developers
In-Reply-To: <CAA85sZsH4JNczUzK0EfGC959y=+SK1b=T6_5zb_0e1b99xGtvA@mail.gmail.com>

On 22/04/2017 12:28, Ian Kumlien wrote:
> On Sat, Apr 22, 2017 at 3:07 AM, Saeed Mahameed
> <saeedm@dev.mellanox.co.il> wrote:
>> On Sat, Apr 22, 2017 at 3:47 AM, Ian Kumlien <ian.kumlien@gmail.com> wrote:
>>> On Sat, Apr 22, 2017 at 2:34 AM, Saeed Mahameed
>>> <saeedm@dev.mellanox.co.il> wrote:
>>>> On Sat, Apr 22, 2017 at 2:10 AM, Ian Kumlien <ian.kumlien@gmail.com> wrote:
>>>>> Sorry,
>>>>>
>>>>> Back again, fighting cold, hot whiskey has been consumed...
>>>>>
>>>>> Something like this would perhaps be a better solution:
>>>>>
>>>>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>>>>> b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>>>>> index 60154a175bd3..fe192e247601 100644
>>>>> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>>>>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>>>>> @@ -1139,6 +1139,10 @@ static int mlx5_load_one(struct mlx5_core_dev
>>>>> *dev, struct mlx5_priv *priv,
>>>>>
>>>>>  #ifdef CONFIG_MLX5_CORE_EN
>>>>>         mlx5_eswitch_attach(dev->priv.eswitch);
>>>>> +#else
>>>>> +       if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) {
>>>>> +               dev_info(&pdev->dev, "Ethernet device discovered but
>>>>> support not enabled in kernel.");
>>>>> +       }
>>>>>  #endif
>>>>>
>>>>
>>>> Currently both MLX5_CORE=n and MLX5_CORE_EN=n as a default, the issue
>>>> you are seeing can occur only if you explicitly  set MLX5_CORE=y and
>>>> MLX5_CORE=n, Why would someone do this if he knows he wants Ethernet
>>>> support as well ? IMHO this print is redundant .
>>>
>>> Well, I'm running a prebuilt kernel - which was configured this way,
>>> and since there
>>> is no mlx5_en module and it does state that the link is "Ethernet", it
>>> just looks like the
>>> driver is broken or in some kind of really weird state.
>>>
>>>> Anyway, Are you looking for RDMA support over ethernet (RoCE) ? and
>>>> you are not interested to have ethernet netdev support ?
>>>
>>> ? RDMA is something we'll look at in the future, right now, having the
>>> nics actually
>>> work as nics is a priority ;)
>>>
>>
>> I see, i just wanted to understand your situation :)
>>
>>>> if yes, I think this is something that can be achieved, but the
>>>> question is do we really need this ?
>>>
>>> It's really weird to see the driver load, to see everything register
>>> and have no feedback.
>>>
>>
>> So, in your case you have mlx5 core support without MLX5_CORE_EN which
>> provides the eswitch and netdev functionality in ethernet.
>
> Yes
>
>> But you will still have mlx5_ib register an RDMA interface and
>> theoretically it should work, the only thing you won't see is a
>> netdevice.
>>
>> The weird thing is that you don't see a link up on the RDMA interface,
>> Leon/Matan can you please look into this ? do we really need a netdev
>> to have a functioning RDMA logical link in ethernet ?
>

The RDMA core subsystem listens to netdev events and configure RoCE GIDs
accordingly. It currently relies on a RoCE dev to have an associated 
netdev, as even default GIDs (the equivalent to IPv6 link local GIDs) 
relies on a MAC address that comes from the netdev.
In IB NICs, the case is different. There's no associated netdev for 
that, so compiling MLX5_CORE_EN isn't required.

> The switch we have does support RDMA but the manual is sparse (as in
> nothing really there) wrt enabling/configuring the RDMA bit so something
> might be missing.
>
> I'll try to remember to do the same test when we setup the mellanox switches =)
>
>>> Including no network devices, but if you run the Infiniband commands,
>>> they tell you that
>>> you are connected to Ethernet but that the device is down and disabled.
>>>
>>> To me, down and disabled is not the same as in "Ethernet support is
>>> not included" =)
>>>
>>> Basically, i would hate for someone else to end up in the same
>>> situation since you only
>>> get guides on how to enable infiniband/RDMA but what you really want
>>> to do at that point
>>> is to disable it and see if that gives you your network devices back =)
>>>
>>
>> Yes this is misleading, Maybe your kernel log warning is not so bad
>> after all, but let me dig more into this.
>> I will get back to you next week.
>
> Thanks, I bet that there is better ways to do it, this one was just
> one of the first ones i found =)
>
>>> I have had similar issues with some connectx3 devices while playing at
>>> home but i suspect
>>> that it's just a limitation of OFED packages available for the dist I'm running.

^ permalink raw reply

* [PATCH 2/2] team: fix memory leak
From: Pan Bian @ 2017-04-23  7:29 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, linux-kernel, Pan Bian

In function team_nl_send_options_get(), pointer skb keeps the return
value of function nlmsg_new(). When the call to genlmsg_put() fails, the
control flow directly returns and does not free skb. This will result in
a memory leak bug. This patch fixes it.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 drivers/net/team/team.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index dd3a2e9..85c0124 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2361,8 +2361,10 @@ static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq,
 
 	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_OPTIONS_GET);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;
-- 
1.9.1

^ permalink raw reply related

* [PATCH 1/2] team: fix memory leak
From: Pan Bian @ 2017-04-23  7:29 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, linux-kernel, Pan Bian

In function team_nl_send_port_list_get(), pointer skb keeps the return
value of nlmsg_new(). When the call to genlmsg_put() fails, the memory
is not freed. This will result in a memory leak bug. This patch fixes
it.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 drivers/net/team/team.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f8c81f1..dd3a2e9 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2634,8 +2634,10 @@ static int team_nl_send_port_list_get(struct team *team, u32 portid, u32 seq,
 
 	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_PORT_LIST_GET);
-	if (!hdr)
+	if (!hdr) {
+		nlmsg_free(skb);
 		return -EMSGSIZE;
+	}
 
 	if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex))
 		goto nla_put_failure;
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 1/1] tipc: check return value of nlmsg_new
From: Joe Perches @ 2017-04-23  7:17 UTC (permalink / raw)
  To: Pan Bian, Jon Maloy, Ying Xue, David S. Miller
  Cc: netdev, tipc-discussion, linux-kernel
In-Reply-To: <1492931359-25004-1-git-send-email-bianpan2016@163.com>

On Sun, 2017-04-23 at 15:09 +0800, Pan Bian wrote:
> Function nlmsg_new() will return a NULL pointer if there is no enough
> memory, and its return value should be checked before it is used.
> However, in function tipc_nl_node_get_monitor(), the validation of the
> return value of function nlmsg_new() is missed. This patch fixes the
> bug.

Hello.

Thanks for the patches.

Are you finding these via a tool or inspection?

If a tool is being used, could you please describe it?

^ permalink raw reply

* [PATCH 1/1] tipc: check return value of nlmsg_new
From: Pan Bian @ 2017-04-23  7:09 UTC (permalink / raw)
  To: Jon Maloy, Ying Xue, David S. Miller
  Cc: netdev, tipc-discussion, linux-kernel, Pan Bian

Function nlmsg_new() will return a NULL pointer if there is no enough
memory, and its return value should be checked before it is used.
However, in function tipc_nl_node_get_monitor(), the validation of the
return value of function nlmsg_new() is missed. This patch fixes the
bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 net/tipc/node.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4512e83..568e48d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2098,6 +2098,8 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info)
 	int err;

 	msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg.skb)
+		return -ENOMEM;
 	msg.portid = info->snd_portid;
 	msg.seq = info->snd_seq;

-- 
1.9.1

^ permalink raw reply related

* [PATCH 1/1] mt7601u: check return value of alloc_skb
From: Pan Bian @ 2017-04-23  7:00 UTC (permalink / raw)
  To: Jakub Kicinski, Kalle Valo, Matthias Brugger
  Cc: netdev, Pan Bian, linux-wireless, linux-kernel, linux-mediatek,
	linux-arm-kernel

Function alloc_skb() will return a NULL pointer if there is no enough
memory. However, in function mt7601u_mcu_msg_alloc(), its return value
is not validated before it is used. This patch fixes it.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 drivers/net/wireless/mediatek/mt7601u/mcu.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c
index dbdfb3f..a9f5f39 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mcu.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c
@@ -66,8 +66,10 @@ static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev,
 	WARN_ON(len % 4); /* if length is not divisible by 4 we need to pad */
 
 	skb = alloc_skb(len + MT_DMA_HDR_LEN + 4, GFP_KERNEL);
-	skb_reserve(skb, MT_DMA_HDR_LEN);
-	memcpy(skb_put(skb, len), data, len);
+	if (skb) {
+		skb_reserve(skb, MT_DMA_HDR_LEN);
+		memcpy(skb_put(skb, len), data, len);
+	}
 
 	return skb;
 }
@@ -170,6 +172,8 @@ static int mt7601u_mcu_function_select(struct mt7601u_dev *dev,
 	};
 
 	skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+	if (!skb)
+		return -ENOMEM;
 	return mt7601u_mcu_msg_send(dev, skb, CMD_FUN_SET_OP, func == 5);
 }
 
@@ -205,6 +209,8 @@ int mt7601u_mcu_tssi_read_kick(struct mt7601u_dev *dev, int use_hvga)
 	};
 
 	skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+	if (!skb)
+		return -ENOMEM;
 	return mt7601u_mcu_msg_send(dev, skb, CMD_CALIBRATION_OP, true);
 }
 
-- 
1.9.1

^ permalink raw reply related

* Re: [Intel-wired-lan] NFS over NAT causes e1000e transmit hangs
From: Neftin, Sasha @ 2017-04-23  6:46 UTC (permalink / raw)
  To: Florian Fainelli, Eric Dumazet; +Cc: netdev, intel-wired-lan
In-Reply-To: <50732fb0-d2b6-9395-d982-6eca8c09aedd@gmail.com>

On 4/20/2017 00:15, Florian Fainelli wrote:
> On 04/19/2017 01:52 AM, Neftin, Sasha wrote:
>> On 4/18/2017 22:05, Florian Fainelli wrote:
>>> On 04/18/2017 12:03 PM, Eric Dumazet wrote:
>>>> On Tue, 2017-04-18 at 11:18 -0700, Florian Fainelli wrote:
>>>>> Hi,
>>>>>
>>>>> I am using NFS over a NAT with two e1000e adapters and with eth1 being
>>>>> the LAN interface and eth0 the WAN interface. The kernel is Ubuntu's
>>>>> 16.10 kernel: 4.8.0-46-generic. The device doing NAT over NFS is just
>>>>> mounting a remote folder and doing normal execution/file accesses. It's
>>>>> enough to untar a file from this device onto a NFS share to expose the
>>>>> problem.
>>>>>
>>>>> The transmit hangs look like the ones below, doing a rmmod/insmod does
>>>>> not help eliminated the problem, nor does a power cycle. Stopping the
>>>>> NFS over NAT definitively does let the adapter recover.
>>>> Is this NFS over TCP or UDP ?
>>> This is NFS over TCP mounted with the following:
>>>
>>> type nfs
>>> (rw,relatime,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,port=2049,timeo=70,retrans=3,sec=sys,local_lock=none,addr=X.X.X.X)
>>>
>>>
>>> Thanks Eric!
>> Please, try disable TCP segmentation offload: ethtool -K <adapter> tso off.
> I am not able to reproduce the hangs with TSO turned off. Is there a
> specific patch you would want me to try?

Please, work with TSO turned off so. There is no patch for this specific 
problem.

^ permalink raw reply

* [PATCH 1/1] openvswitch: check return value of nla_nest_start
From: Pan Bian @ 2017-04-23  6:43 UTC (permalink / raw)
  To: Pravin Shelar, David S. Miller, netdev-u79uwXL29TY76Z2rM5mHXA,
	dev-yBygre7rU0TnMu66kgdUjQ, linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: Pan Bian

Function nla_nest_start() will return a NULL pointer on error, and its
return value should be validated before it is used. However, in function
queue_userspace_packet(), its return value is ignored. This may result
in NULL dereference when calling nla_nest_end(). This patch fixes the
bug.

Signed-off-by: Pan Bian <bianpan2016-9Onoh4P/yGk@public.gmane.org>
---
 net/openvswitch/datapath.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9c62b63..34c0fbd 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -489,7 +489,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		err = ovs_nla_put_tunnel_info(user_skb,
 					      upcall_info->egress_tun_info);
 		BUG_ON(err);
-		nla_nest_end(user_skb, nla);
+		if (nla)
+			nla_nest_end(user_skb, nla);
 	}
 
 	if (upcall_info->actions_len) {
@@ -497,7 +498,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		err = ovs_nla_put_actions(upcall_info->actions,
 					  upcall_info->actions_len,
 					  user_skb);
-		if (!err)
+		if (!err && nla)
 			nla_nest_end(user_skb, nla);
 		else
 			nla_nest_cancel(user_skb, nla);
-- 
1.9.1

^ permalink raw reply related

* [PATCH 1/1] lwtunnel: check return value of nla_nest_start
From: Pan Bian @ 2017-04-23  6:28 UTC (permalink / raw)
  To: David S. Miller
  Cc: David Ahern, Roopa Prabhu, Alexei Starovoitov, David Lebrun,
	Tom Herbert, Robert Shearman, netdev, linux-kernel, Pan Bian

Function nla_nest_start() may return a NULL pointer on error. However,
in function lwtunnel_fill_encap(), the return value of nla_nest_start()
is not validated before it is used. This patch checks the return value
of nla_nest_start() against NULL.

Signed-off-by: Pan Bian <bianpan2016@163.com>
---
 net/core/lwtunnel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 6df9f8f..3471ce7 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -216,6 +216,8 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
 
 	ret = -EOPNOTSUPP;
 	nest = nla_nest_start(skb, RTA_ENCAP);
+	if (!nest)
+		goto nla_put_failure;
 	rcu_read_lock();
 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 	if (likely(ops && ops->fill_encap))
-- 
1.9.1

^ permalink raw reply related

* Re: [net-next 03/11] ixgbe: add support for XDP_TX action
From: Jakub Kicinski @ 2017-04-23  3:52 UTC (permalink / raw)
  To: John Fastabend
  Cc: Jeff Kirsher, davem, netdev, nhorman, sassmann, jogreene,
	John Fastabend
In-Reply-To: <58FC2226.3070207@gmail.com>

On Sat, 22 Apr 2017 20:40:22 -0700, John Fastabend wrote:
> >> @@ -9557,7 +9739,21 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
> >>  			return -EINVAL;
> >>  	}
> >>  
> >> +	if (nr_cpu_ids > MAX_XDP_QUEUES)
> >> +		return -ENOMEM;
> >> +
> >>  	old_prog = xchg(&adapter->xdp_prog, prog);
> >> +
> >> +	/* If transitioning XDP modes reconfigure rings */
> >> +	if (!!prog != !!old_prog) {
> >> +		int err = ixgbe_setup_tc(dev, netdev_get_num_tc(dev));
> >> +
> >> +		if (err) {
> >> +			rcu_assign_pointer(adapter->xdp_prog, old_prog);
> >> +			return -EINVAL;
> >> +		}
> >> +	}
> >> +
> >>  	for (i = 0; i < adapter->num_rx_queues; i++)
> >>  		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
> >>    
> > 
> > In case of disabling XDP I assume ixgbe_setup_tc() will free the rings
> > before the xdp_prog on the rings is swapped to NULL.  Is there anything
> > preventing TX in that time window?  I think usual ordering would be to
> > install the prog after reconfig but uninstall before.
> >   
> 
> Well in the ixgbe_setup_tc() case we set the rx_ring->xdp_prog in
> ixgbe_setup_rx_resorources(), while the dma engine is disabled, so the for
> loop is just doing another set on the rx_ring assigning it to the program
> already set previously.
> 
> Its not really buggy its just extra useless work so I'll change it to this,
> 
> 	if (!!prog != !!old_prog) {
> 		...
> 	} else {
> 		for ( ... )
> 			swap xdp prog
> 	}
> 
> Nice spot, thanks for reviewing. And I missed a build error so I'll roll these
> fixes in a resend.

Ah, thanks for explaining.  No bugs that I can spot then :)

^ permalink raw reply

* Re: [net-next 00/11][pull request] 10GbE Intel Wired LAN Driver Updates 2017-04-20
From: John Fastabend @ 2017-04-23  3:41 UTC (permalink / raw)
  To: David Miller, jeffrey.t.kirsher; +Cc: netdev, nhorman, sassmann, jogreene
In-Reply-To: <20170421.141809.1159225304098548850.davem@davemloft.net>

On 17-04-21 11:18 AM, David Miller wrote:
> From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> Date: Thu, 20 Apr 2017 18:50:18 -0700
> 
>> John adds XDP support (yeah!) for ixgbe.
> 
> As excited and eager as I am about this, I want to see the build regression
> for PAGE_SIZE>=8192 fixed before I pull this.
> 
> Thanks.
> 

Dang :(

Jeff, Alex sent you a fix already for this, but Jakub had a few nice
improvements. I'm thinking the easiest thing to do is for me to merge Alex's fix
and Jakubs comments and resend the patches.

Thanks,
John

^ permalink raw reply

* Re: [net-next 03/11] ixgbe: add support for XDP_TX action
From: John Fastabend @ 2017-04-23  3:40 UTC (permalink / raw)
  To: Jakub Kicinski, Jeff Kirsher
  Cc: davem, netdev, nhorman, sassmann, jogreene, John Fastabend
In-Reply-To: <20170422192411.1d85793a@cakuba.lan>

On 17-04-22 07:24 PM, Jakub Kicinski wrote:
> On Thu, 20 Apr 2017 18:50:21 -0700, Jeff Kirsher wrote:
>> +static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter)
>> +{
>> +	if (nr_cpu_ids > MAX_XDP_QUEUES)
>> +		return 0;
>> +
>> +	return adapter->xdp_prog ? nr_cpu_ids : 0;
>> +}
> 
> Nit: AFAICT ixgbe_xdp_setup() will guarantee xdp_prog is not set if
> there are too many CPU ids.

Sure being a bit paranoid I guess.

> 
>> @@ -6120,10 +6193,21 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
>>  		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
>>  		goto err_setup_tx;
>>  	}
>> +	for (j = 0; j < adapter->num_xdp_queues; j++) {
>> +		err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]);
>> +		if (!err)
>> +			continue;
>> +
>> +		e_err(probe, "Allocation for Tx Queue %u failed\n", j);
>> +		goto err_setup_tx;
>> +	}
>> +
>>  
> 
> Nit: extra line here

OK well I guess we can fix this if we need a respin anyways.

> 
>> @@ -9557,7 +9739,21 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
>>  			return -EINVAL;
>>  	}
>>  
>> +	if (nr_cpu_ids > MAX_XDP_QUEUES)
>> +		return -ENOMEM;
>> +
>>  	old_prog = xchg(&adapter->xdp_prog, prog);
>> +
>> +	/* If transitioning XDP modes reconfigure rings */
>> +	if (!!prog != !!old_prog) {
>> +		int err = ixgbe_setup_tc(dev, netdev_get_num_tc(dev));
>> +
>> +		if (err) {
>> +			rcu_assign_pointer(adapter->xdp_prog, old_prog);
>> +			return -EINVAL;
>> +		}
>> +	}
>> +
>>  	for (i = 0; i < adapter->num_rx_queues; i++)
>>  		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
>>  
> 
> In case of disabling XDP I assume ixgbe_setup_tc() will free the rings
> before the xdp_prog on the rings is swapped to NULL.  Is there anything
> preventing TX in that time window?  I think usual ordering would be to
> install the prog after reconfig but uninstall before.
> 

Well in the ixgbe_setup_tc() case we set the rx_ring->xdp_prog in
ixgbe_setup_rx_resorources(), while the dma engine is disabled, so the for
loop is just doing another set on the rx_ring assigning it to the program
already set previously.

Its not really buggy its just extra useless work so I'll change it to this,

	if (!!prog != !!old_prog) {
		...
	} else {
		for ( ... )
			swap xdp prog
	}

Nice spot, thanks for reviewing. And I missed a build error so I'll roll these
fixes in a resend.

Thanks,
John

^ permalink raw reply

* [PATCH net-next v3 5/5] nfp: remove the refresh of all ports optimization
From: Jakub Kicinski @ 2017-04-23  3:17 UTC (permalink / raw)
  To: netdev; +Cc: kubakici, oss-drivers, Jakub Kicinski
In-Reply-To: <20170423031756.94429-1-jakub.kicinski@netronome.com>

The code refreshing the eth port state was trying to update state
of all ports of the card.  Unfortunately to safely walk the port
list we would have to hold the port lock, which we can't due to
lock ordering constraints against rtnl.

Make the per-port sync refresh and async refresh of all ports
completely separate routines.

Fixes: 172f638c93dd ("nfp: add port state refresh")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  3 +-
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 13 +++--
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  | 67 +++++++++++++++-------
 3 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 8302a2d688da..8f20fdef0754 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -819,7 +819,8 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn);
 int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new);
 
 bool nfp_net_link_changed_read_clear(struct nfp_net *nn);
-void nfp_net_refresh_port_config(struct nfp_net *nn);
+int nfp_net_refresh_eth_port(struct nfp_net *nn);
+void nfp_net_refresh_port_table(struct nfp_net *nn);
 
 #ifdef CONFIG_NFP_DEBUG
 void nfp_net_debugfs_create(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 3328041ec290..6e27d1281425 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -211,10 +211,15 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
 		return 0;
 
 	/* Use link speed from ETH table if available, otherwise try the BAR */
-	if (nn->eth_port && nfp_net_link_changed_read_clear(nn))
-		nfp_net_refresh_port_config(nn);
-	/* Separate if - on FW error the port could've disappeared from table */
 	if (nn->eth_port) {
+		int err;
+
+		if (nfp_net_link_changed_read_clear(nn)) {
+			err = nfp_net_refresh_eth_port(nn);
+			if (err)
+				return err;
+		}
+
 		cmd->base.port = nn->eth_port->port_type;
 		cmd->base.speed = nn->eth_port->speed;
 		cmd->base.duplex = DUPLEX_FULL;
@@ -273,7 +278,7 @@ nfp_net_set_link_ksettings(struct net_device *netdev,
 	if (err > 0)
 		return 0; /* no change */
 
-	nfp_net_refresh_port_config(nn);
+	nfp_net_refresh_port_table(nn);
 
 	return err;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 4c6863a072d3..8cb87cbe1120 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -176,13 +176,13 @@ nfp_net_get_mac_addr(struct nfp_net *nn, struct nfp_cpp *cpp, unsigned int id)
 }
 
 static struct nfp_eth_table_port *
-nfp_net_find_port(struct nfp_pf *pf, unsigned int id)
+nfp_net_find_port(struct nfp_eth_table *eth_tbl, unsigned int id)
 {
 	int i;
 
-	for (i = 0; pf->eth_tbl && i < pf->eth_tbl->count; i++)
-		if (pf->eth_tbl->ports[i].eth_index == id)
-			return &pf->eth_tbl->ports[i];
+	for (i = 0; eth_tbl && i < eth_tbl->count; i++)
+		if (eth_tbl->ports[i].eth_index == id)
+			return &eth_tbl->ports[i];
 
 	return NULL;
 }
@@ -367,7 +367,7 @@ nfp_net_pf_alloc_netdevs(struct nfp_pf *pf, void __iomem *ctrl_bar,
 		prev_tx_base = tgt_tx_base;
 		prev_rx_base = tgt_rx_base;
 
-		eth_port = nfp_net_find_port(pf, i);
+		eth_port = nfp_net_find_port(pf->eth_tbl, i);
 		if (eth_port && eth_port->override_changed) {
 			nfp_warn(pf->cpp, "Config changed for port #%d, reboot required before port will be operational\n", i);
 		} else {
@@ -485,6 +485,7 @@ static void nfp_net_refresh_netdevs(struct work_struct *work)
 {
 	struct nfp_pf *pf = container_of(work, struct nfp_pf,
 					 port_refresh_work);
+	struct nfp_eth_table *eth_table;
 	struct nfp_net *nn, *next;
 
 	mutex_lock(&pf->port_lock);
@@ -493,6 +494,27 @@ static void nfp_net_refresh_netdevs(struct work_struct *work)
 	if (list_empty(&pf->ports))
 		goto out;
 
+	list_for_each_entry(nn, &pf->ports, port_list)
+		nfp_net_link_changed_read_clear(nn);
+
+	eth_table = nfp_eth_read_ports(pf->cpp);
+	if (!eth_table) {
+		nfp_err(pf->cpp, "Error refreshing port config!\n");
+		goto out;
+	}
+
+	rtnl_lock();
+	list_for_each_entry(nn, &pf->ports, port_list) {
+		if (!nn->eth_port)
+			continue;
+		nn->eth_port = nfp_net_find_port(eth_table,
+						 nn->eth_port->eth_index);
+	}
+	rtnl_unlock();
+
+	kfree(pf->eth_tbl);
+	pf->eth_tbl = eth_table;
+
 	list_for_each_entry_safe(nn, next, &pf->ports, port_list) {
 		if (!nn->eth_port) {
 			nfp_warn(pf->cpp, "Warning: port not present after reconfig\n");
@@ -517,31 +539,36 @@ static void nfp_net_refresh_netdevs(struct work_struct *work)
 	mutex_unlock(&pf->port_lock);
 }
 
-void nfp_net_refresh_port_config(struct nfp_net *nn)
+void nfp_net_refresh_port_table(struct nfp_net *nn)
 {
 	struct nfp_pf *pf = pci_get_drvdata(nn->pdev);
-	struct nfp_eth_table *old_table;
 
-	ASSERT_RTNL();
+	schedule_work(&pf->port_refresh_work);
+}
 
-	old_table = pf->eth_tbl;
+int nfp_net_refresh_eth_port(struct nfp_net *nn)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_eth_table *eth_table;
 
-	list_for_each_entry(nn, &pf->ports, port_list)
-		nfp_net_link_changed_read_clear(nn);
+	eth_table = nfp_eth_read_ports(nn->cpp);
+	if (!eth_table) {
+		nn_err(nn, "Error refreshing port state table!\n");
+		return -EIO;
+	}
 
-	pf->eth_tbl = nfp_eth_read_ports(pf->cpp);
-	if (!pf->eth_tbl) {
-		pf->eth_tbl = old_table;
-		nfp_err(pf->cpp, "Error refreshing port config!\n");
-		return;
+	eth_port = nfp_net_find_port(eth_table, nn->eth_port->eth_index);
+	if (!eth_port) {
+		nn_err(nn, "Error finding state of the port!\n");
+		kfree(eth_table);
+		return -EIO;
 	}
 
-	list_for_each_entry(nn, &pf->ports, port_list)
-		nn->eth_port = nfp_net_find_port(pf, nn->eth_port->eth_index);
+	memcpy(nn->eth_port, eth_port, sizeof(*eth_port));
 
-	kfree(old_table);
+	kfree(eth_table);
 
-	schedule_work(&pf->port_refresh_work);
+	return 0;
 }
 
 /*
-- 
2.11.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox