Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next 4/5] mlx5: hide unused functions
From: Saeed Mahameed @ 2017-04-22 18:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Or Gerlitz, Roi Dayan, Stephen Hemminger,
	Stephen Hemminger, Saeed Mahameed
In-Reply-To: <20170422184507.26569-1-saeedm@mellanox.com>

From: Stephen Hemminger <stephen@networkplumber.org>

Fix sparse warnings in recent ipoib support.
The RDMA functions are not used yet, hide behind #ifdef.
Based on comment, they will eventually be local so make static.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
index ec78e637840f..3c84e36af018 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
@@ -178,7 +178,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 	return 0;
 }
 
-void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
+static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
 {
 	struct mlx5i_priv *ipriv = priv->ppriv;
 
@@ -359,9 +359,10 @@ static int mlx5i_close(struct net_device *netdev)
 	return 0;
 }
 
+#ifdef notusedyet
 /* IPoIB RDMA netdev callbacks */
-int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
-		       union ib_gid *gid, u16 lid, int set_qkey)
+static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
+			      union ib_gid *gid, u16 lid, int set_qkey)
 {
 	struct mlx5e_priv    *epriv = mlx5i_epriv(netdev);
 	struct mlx5_core_dev *mdev  = epriv->mdev;
@@ -377,8 +378,8 @@ int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
 	return err;
 }
 
-int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
-		       union ib_gid *gid, u16 lid)
+static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
+			      union ib_gid *gid, u16 lid)
 {
 	struct mlx5e_priv    *epriv = mlx5i_epriv(netdev);
 	struct mlx5_core_dev *mdev  = epriv->mdev;
@@ -395,7 +396,7 @@ int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
 	return err;
 }
 
-int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
+static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
 	       struct ib_ah *address, u32 dqpn, u32 dqkey)
 {
 	struct mlx5e_priv *epriv = mlx5i_epriv(dev);
@@ -404,6 +405,7 @@ int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
 
 	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey);
 }
+#endif
 
 static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 {
@@ -418,10 +420,10 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-					  struct ib_device *ibdev,
-					  const char *name,
-					  void (*setup)(struct net_device *))
+static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
+						 struct ib_device *ibdev,
+						 const char *name,
+						 void (*setup)(struct net_device *))
 {
 	const struct mlx5e_profile *profile = &mlx5i_nic_profile;
 	int nch = profile->max_nch(mdev);
@@ -480,7 +482,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 }
 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
 
-void mlx5_rdma_netdev_free(struct net_device *netdev)
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
 {
 	struct mlx5e_priv          *priv    = mlx5i_epriv(netdev);
 	const struct mlx5e_profile *profile = priv->profile;
-- 
2.11.0

^ permalink raw reply related

* [pull request][net-next 0/5] Mellanox, mlx5 updates 2017-04-22
From: Saeed Mahameed @ 2017-04-22 18:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Or Gerlitz, Roi Dayan, Stephen Hemminger, Saeed Mahameed

Hi Dave,

This series contains some updates to mlx5 driver.

Sparse and compiler warnings fixes from Stephen Hemminger.

>From Roi Dayan and Or Gerlitz, Add devlink and mlx5 support for controlling
E-Switch encapsulation mode, this knob will enable HW support for applying
encapsulation/decapsulation to VF traffic as part of SRIOV e-switch offloading.

Please pull and let me know if there's any problem.

Thanks,
Saeed.

---

The following changes since commit fb796707d7a6c9b24fdf80b9b4f24fa5ffcf0ec5:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2017-04-21 20:23:53 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2017-04-22

for you to fetch changes up to 8bf3198a5e394ed6815aeb8fedaf49722986bbd3:

  mlx5: fix warning about missing prototype (2017-04-22 20:26:42 +0300)

Or Gerlitz (1):
      net/mlx5: E-Switch, Refactor fast path FDB table creation in switchdev mode

Roi Dayan (2):
      net/devlink: Add E-Switch encapsulation control
      net/mlx5: E-Switch, Add control for encapsulation

Stephen Hemminger (2):
      mlx5: hide unused functions
      mlx5: fix warning about missing prototype

 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |   5 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   3 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 132 +++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/ipoib.c    |  24 ++--
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   2 +
 include/net/devlink.h                              |   2 +
 include/uapi/linux/devlink.h                       |   7 ++
 net/core/devlink.c                                 |  26 +++-
 10 files changed, 167 insertions(+), 36 deletions(-)

^ permalink raw reply

* [net-next 2/5] net/mlx5: E-Switch, Refactor fast path FDB table creation in switchdev mode
From: Saeed Mahameed @ 2017-04-22 18:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Or Gerlitz, Roi Dayan, Stephen Hemminger, Saeed Mahameed
In-Reply-To: <20170422184507.26569-1-saeedm@mellanox.com>

From: Or Gerlitz <ogerlitz@mellanox.com>

Refactor the creation of the fast path FDB table that holds the
offloaded rules in SRIOV switchdev mode into it's own function.

This will be used in the next patch to be able and re-create the
table under different settings without going through legacy mode.

This patch doesn't change any functionality.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 69 +++++++++++++++-------
 1 file changed, 49 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 992b380d36be..ce3a2c040706 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -426,31 +426,21 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	return err;
 }
 
-#define MAX_PF_SQ 256
 #define ESW_OFFLOADS_NUM_GROUPS  4
 
-static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
+static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 {
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_table_attr ft_attr = {};
-	int table_size, ix, esw_size, err = 0;
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *fdb = NULL;
-	struct mlx5_flow_group *g;
-	u32 *flow_group_in;
-	void *match_criteria;
+	int esw_size, err = 0;
 	u32 flags = 0;
 
-	flow_group_in = mlx5_vzalloc(inlen);
-	if (!flow_group_in)
-		return -ENOMEM;
-
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get FDB flow namespace\n");
 		err = -EOPNOTSUPP;
-		goto ns_err;
+		goto out;
 	}
 
 	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
@@ -471,10 +461,49 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
-		goto fast_fdb_err;
+		goto out;
 	}
 	esw->fdb_table.fdb = fdb;
 
+out:
+	return err;
+}
+
+static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+{
+	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+}
+
+#define MAX_PF_SQ 256
+
+static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb = NULL;
+	int table_size, ix, err = 0;
+	struct mlx5_flow_group *g;
+	void *match_criteria;
+	u32 *flow_group_in;
+
+	esw_debug(esw->dev, "Create offloads FDB Tables\n");
+	flow_group_in = mlx5_vzalloc(inlen);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		err = -EOPNOTSUPP;
+		goto ns_err;
+	}
+
+	err = esw_create_offloads_fast_fdb_table(esw);
+	if (err)
+		goto fast_fdb_err;
+
 	table_size = nvports + MAX_PF_SQ + 1;
 
 	ft_attr.max_fte = table_size;
@@ -545,18 +574,18 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	return err;
 }
 
-static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 {
 	if (!esw->fdb_table.fdb)
 		return;
 
-	esw_debug(esw->dev, "Destroy offloads FDB Table\n");
+	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
 	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
-	mlx5_destroy_flow_table(esw->fdb_table.fdb);
+	esw_destroy_offloads_fast_fdb_table(esw);
 }
 
 static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@ -716,7 +745,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 	mlx5_dev_list_unlock();
 
-	err = esw_create_offloads_fdb_table(esw, nvports);
+	err = esw_create_offloads_fdb_tables(esw, nvports);
 	if (err)
 		goto create_fdb_err;
 
@@ -753,7 +782,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 	esw_destroy_offloads_table(esw);
 
 create_ft_err:
-	esw_destroy_offloads_fdb_table(esw);
+	esw_destroy_offloads_fdb_tables(esw);
 
 create_fdb_err:
 	/* enable back PF RoCE */
@@ -799,7 +828,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
 
 	esw_destroy_vport_rx_group(esw);
 	esw_destroy_offloads_table(esw);
-	esw_destroy_offloads_fdb_table(esw);
+	esw_destroy_offloads_fdb_tables(esw);
 }
 
 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
-- 
2.11.0

^ permalink raw reply related

* [net-next 5/5] mlx5: fix warning about missing prototype
From: Saeed Mahameed @ 2017-04-22 18:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Or Gerlitz, Roi Dayan, Stephen Hemminger,
	Stephen Hemminger, Saeed Mahameed
In-Reply-To: <20170422184507.26569-1-saeedm@mellanox.com>

From: Stephen Hemminger <stephen@networkplumber.org>

Fix sparse warning about missing prototypes. The rx/tx code path
defines functions with prototypes in ipoib.h.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 43308243f519..ae66fad98244 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -39,6 +39,7 @@
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
+#include "ipoib.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index dda7db503043..ab3bb026ff9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -33,6 +33,7 @@
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
 #include "en.h"
+#include "ipoib.h"
 
 #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
-- 
2.11.0

^ permalink raw reply related

* [net-next 3/5] net/mlx5: E-Switch, Add control for encapsulation
From: Saeed Mahameed @ 2017-04-22 18:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Or Gerlitz, Roi Dayan, Stephen Hemminger, Saeed Mahameed
In-Reply-To: <20170422184507.26569-1-saeedm@mellanox.com>

From: Roi Dayan <roid@mellanox.com>

Implement the devlink e-switch encapsulation control set and get
callbacks. Apply the value set by the user on the switchdev offloads
mode when creating the fast FDB table where offloaded rules will be set.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  5 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  3 ++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 63 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  2 +
 4 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b3281d1118b3..21bed3c3334d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1806,6 +1806,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	esw->enabled_vports = 0;
 	esw->mode = SRIOV_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+	else
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 1f56ed9f5a6f..1e7f21be1233 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -210,6 +210,7 @@ struct mlx5_esw_offload {
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	u8 inline_mode;
 	u64 num_flows;
+	u8 encap;
 };
 
 struct mlx5_eswitch {
@@ -322,6 +323,8 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
 				     struct mlx5_eswitch_rep *rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ce3a2c040706..189d24dbd3e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -450,8 +450,7 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 	esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS,
 			 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
-	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
-	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
 		flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
 
 	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
@@ -1045,6 +1044,66 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
 	return 0;
 }
 
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
+	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
+		return -EOPNOTSUPP;
+
+	if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_LEGACY) {
+		esw->offloads.encap = encap;
+		return 0;
+	}
+
+	if (esw->offloads.encap == encap)
+		return 0;
+
+	if (esw->offloads.num_flows > 0) {
+		esw_warn(dev, "Can't set encapsulation when flows are configured\n");
+		return -EOPNOTSUPP;
+	}
+
+	esw_destroy_offloads_fast_fdb_table(esw);
+
+	esw->offloads.encap = encap;
+	err = esw_create_offloads_fast_fdb_table(esw);
+	if (err) {
+		esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err);
+		esw->offloads.encap = !encap;
+		(void) esw_create_offloads_fast_fdb_table(esw);
+	}
+	return err;
+}
+
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	*encap = esw->offloads.encap;
+	return 0;
+}
+
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
 				     struct mlx5_eswitch_rep *__rep)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 9c2bec732af9..bde91a8bec73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1280,6 +1280,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
 	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
 	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
 	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+	.eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+	.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
 #endif
 };
 
-- 
2.11.0

^ permalink raw reply related

* [net-next 1/5] net/devlink: Add E-Switch encapsulation control
From: Saeed Mahameed @ 2017-04-22 18:45 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Or Gerlitz, Roi Dayan, Stephen Hemminger, Saeed Mahameed
In-Reply-To: <20170422184507.26569-1-saeedm@mellanox.com>

From: Roi Dayan <roid@mellanox.com>

This is an e-switch global knob to enable HW support for applying
encapsulation/decapsulation to VF traffic as part of SRIOV e-switch offloading.

The actual encap/decap is carried out (along with the matching and other actions)
per offloaded e-switch rules, e.g as done when offloading the TC tunnel key action.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/devlink.h        |  2 ++
 include/uapi/linux/devlink.h |  7 +++++++
 net/core/devlink.c           | 26 +++++++++++++++++++++++---
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 24de13f8c94f..ed7687bbf5d0 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -268,6 +268,8 @@ struct devlink_ops {
 	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
 	int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
 	int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode);
+	int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
+	int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b47bee277347..b0e807ac53bb 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -119,6 +119,11 @@ enum devlink_eswitch_inline_mode {
 	DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT,
 };
 
+enum devlink_eswitch_encap_mode {
+	DEVLINK_ESWITCH_ENCAP_MODE_NONE,
+	DEVLINK_ESWITCH_ENCAP_MODE_BASIC,
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -195,6 +200,8 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_PAD,
 
+	DEVLINK_ATTR_ESWITCH_ENCAP_MODE,	/* u8 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0afac5800b57..b0b87a292e7c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1397,10 +1397,10 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
 				   u32 seq, int flags)
 {
 	const struct devlink_ops *ops = devlink->ops;
+	u8 inline_mode, encap_mode;
 	void *hdr;
 	int err = 0;
 	u16 mode;
-	u8 inline_mode;
 
 	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
 	if (!hdr)
@@ -1429,6 +1429,15 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
 			goto nla_put_failure;
 	}
 
+	if (ops->eswitch_encap_mode_get) {
+		err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+		if (err)
+			goto nla_put_failure;
+		err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+		if (err)
+			goto nla_put_failure;
+	}
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -1468,9 +1477,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
 {
 	struct devlink *devlink = info->user_ptr[0];
 	const struct devlink_ops *ops = devlink->ops;
-	u16 mode;
-	u8 inline_mode;
+	u8 inline_mode, encap_mode;
 	int err = 0;
+	u16 mode;
 
 	if (!ops)
 		return -EOPNOTSUPP;
@@ -1493,6 +1502,16 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
 		if (err)
 			return err;
 	}
+
+	if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+		if (!ops->eswitch_encap_mode_set)
+			return -EOPNOTSUPP;
+		encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+		err = ops->eswitch_encap_mode_set(devlink, encap_mode);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -2190,6 +2209,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
 	[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
 	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
 };
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH v2] net: natsemi: ns83820: add checks for dma mapping error
From: Francois Romieu @ 2017-04-22 19:20 UTC (permalink / raw)
  To: Alexey Khoroshilov; +Cc: David S. Miller, netdev, linux-kernel, ldv-project
In-Reply-To: <1492866365-5422-1-git-send-email-khoroshilov@ispras.ru>

Alexey Khoroshilov <khoroshilov@ispras.ru> :
[...]
> diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
> index 729095db3e08..dfc64e1e31f9 100644
> --- a/drivers/net/ethernet/natsemi/ns83820.c
> +++ b/drivers/net/ethernet/natsemi/ns83820.c
[...]
> @@ -1183,6 +1193,32 @@ static netdev_tx_t ns83820_hard_start_xmit(struct sk_buff *skb,
>  		netif_start_queue(ndev);
>  
>  	return NETDEV_TX_OK;
> +
> +dma_error:
> +	do {
> +		free_idx = (free_idx + NR_TX_DESC - 1) % NR_TX_DESC;
> +		desc = dev->tx_descs + (free_idx * DESC_SIZE);
> +		cmdsts = le32_to_cpu(desc[DESC_CMDSTS]);
> +		len = cmdsts & CMDSTS_LEN_MASK;
> +		buf = desc_addr_get(desc + DESC_BUFPTR);
> +		if (desc == first_desc)
> +			pci_unmap_single(dev->pci_dev,
> +					buf,
> +					len,
> +					PCI_DMA_TODEVICE);
> +		else
> +			pci_unmap_page(dev->pci_dev,
> +					buf,
> +					len,
> +					PCI_DMA_TODEVICE);

(use tabs + spaces to indent: code should line up right after the parenthesis)

(premature line breaks imho)

(nevermind, both can be avoided :o) )

> +		desc[DESC_CMDSTS] = cpu_to_le32(0);
> +		mb();
> +	} while (desc != first_desc);
> +
> +dma_error_first:
> +	dev_kfree_skb_any(skb);
> +	ndev->stats.tx_errors++;
                    ^^^^^^^^^ -> should be tx_dropped
> +	return NETDEV_TX_OK;
>  }

You only need a single test in the error loop if you mimic the map loop.
Something like:

diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 729095d..5e2dbc9 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1160,9 +1160,11 @@ static netdev_tx_t ns83820_hard_start_xmit(struct sk_buff *skb,
 
 		buf = skb_frag_dma_map(&dev->pci_dev->dev, frag, 0,
 				       skb_frag_size(frag), DMA_TO_DEVICE);
+		if (dma_mapping_error(&dev->pci_dev->dev, buf))
+			goto err_unmap_frags;
 		dprintk("frag: buf=%08Lx  page=%08lx offset=%08lx\n",
 			(long long)buf, (long) page_to_pfn(frag->page),
 			frag->page_offset);
 		len = skb_frag_size(frag);
 		frag++;
 		nr_frags--;
@@ -1181,8 +1184,27 @@ static netdev_tx_t ns83820_hard_start_xmit(struct sk_buff *skb,
 	/* Check again: we may have raced with a tx done irq */
 	if (stopped && (dev->tx_done_idx != tx_done_idx) && start_tx_okay(dev))
 		netif_start_queue(ndev);
-
+out:
 	return NETDEV_TX_OK;
+
+err_unmap_frags:
+	while (1) {
+		buf = desc_addr_get(desc + DESC_BUFPTR);
+		if (!--nr_frags)
+			break;
+
+		pci_unmap_page(dev->pci_dev, buf, len, PCI_DMA_TODEVICE);
+
+		free_idx = (free_idx - 1) % NR_TX_DESC;
+		desc = dev->tx_descs + (free_idx * DESC_SIZE);
+		len = le32_to_cpu(desc + DESC_CMDSTS) & CMDSTS_LEN_MASK;
+	}
+	pci_unmap_single(dev->pci_dev, buf, len, PCI_DMA_TODEVICE);
+
+err_free_skb:
+	dev_kfree_skb_any(skb);
+	ndev->stats.tx_dropped++;
+	goto out;
 }
 
 static void ns83820_update_stats(struct ns83820 *dev)


Thinking more about it, the driver seems rather unsafe if a failing
start_xmit closely follows a succeeding one. The driver should imho
map frags first *then* plug the remaining hole in the descriptor ring.
Until it does, the implicit assumption about descriptor ownership that
the error unroll loop relies on may be wrong.

-- 
Ueimor

^ permalink raw reply related

* [PATCH] bpf: Add sparc support to tools and samples.
From: David Miller @ 2017-04-22 19:38 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: daniel, netdev


Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/bpf_helpers.h      | 19 +++++++++++++++++++
 tools/build/feature/test-bpf.c |  3 +++
 tools/lib/bpf/bpf.c            |  2 ++
 3 files changed, 24 insertions(+)

diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 52de9d8..9a9c95f 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -146,11 +146,30 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
 #define PT_REGS_SP(x) ((x)->sp)
 #define PT_REGS_IP(x) ((x)->nip)
 
+#elif defined(__sparc__)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
 #endif
 
 #ifdef __powerpc__
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
 #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#elif defined(__sparc__)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
 #else
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({				\
 		bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index e04ab89..ebc6dce 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -9,6 +9,9 @@
 #  define __NR_bpf 321
 # elif defined(__aarch64__)
 #  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
+# else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
 #endif
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index f84c398..4fe444b80 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -37,6 +37,8 @@
 #  define __NR_bpf 321
 # elif defined(__aarch64__)
 #  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
 # else
 #  error __NR_bpf not defined. libbpf does not support your arch.
 # endif
-- 
2.1.2.532.g19b5d50

^ permalink raw reply related

* tools/testing/selftests/bpf/Makefile
From: David Miller @ 2017-04-22 19:45 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: daniel, netdev

Alexei, that unconditional -D__x86_64__ isn't going to work.  It in
fact makes the build break on sparc because the types.h asm headers
explicitly check for things like __sparc__ && __arch64__ etc.

There are other places that want stuff like this, so let's do it
right.

In every

	arch/${ARCH}/Makefile

extract out the "-DXXX" stuff from CHECKFLAGS into a new Makefile
variable, expand that into CHECKFLAGS and use the new variable in
places like

	tools/testing/selftests/bpf/Makefile

and

	tools/testing/selftests/ipc/Makefile

Thanks.

^ permalink raw reply

* Re: [PATCH] bpf: Add sparc support to tools and samples.
From: Daniel Borkmann @ 2017-04-22 19:46 UTC (permalink / raw)
  To: David Miller, alexei.starovoitov; +Cc: netdev
In-Reply-To: <20170422.153846.1171688488318421720.davem@davemloft.net>

On 04/22/2017 09:38 PM, David Miller wrote:
>
> Signed-off-by: David S. Miller <davem@davemloft.net>

LGTM, thanks!

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* Re: [PATCH] bpf: Add sparc support to tools and samples.
From: David Miller @ 2017-04-22 20:02 UTC (permalink / raw)
  To: daniel; +Cc: alexei.starovoitov, netdev
In-Reply-To: <58FBB326.5090409@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 22 Apr 2017 21:46:46 +0200

> On 04/22/2017 09:38 PM, David Miller wrote:
>>
>> Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> LGTM, thanks!
> 
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>

Great, this and the sparc64 eBPF JIT are now pushed out to net-next.

^ permalink raw reply

* Re: compile issue in latest iproute2
From: Jamal Hadi Salim @ 2017-04-22 20:06 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Daniel Borkmann, netdev@vger.kernel.org
In-Reply-To: <20170422095434.730035b6@xeon-e3>

On 17-04-22 12:54 PM, Stephen Hemminger wrote:
> On Sat, 22 Apr 2017 12:43:50 -0400
> Jamal Hadi Salim <jhs@mojatatu.com> wrote:
>
>> On 17-04-22 12:18 PM, Daniel Borkmann wrote:
>> [..]
>>>
>>> Anything I'm missing?
>>
>>
>> Let me get back to that machine (couple of hours) and try to see how i
>> created the issue.
>> Shouldve cutnpasted the error msg. Cant create it on this laptop.
>>
>> cheers,
>> jamal
>
> Current tip of iproute2 master compiles fine for me
> both with and without HAVE_ELF


Sorry - I cannot recreate it. I tried from scratch and did the patches
I was testing on and it compiled cleanly. Apologies for the alarm.

cheers,
jamal

^ permalink raw reply

* Re: [PATCH] bpf: Add sparc support to tools and samples.
From: Daniel Borkmann @ 2017-04-22 20:16 UTC (permalink / raw)
  To: David Miller; +Cc: alexei.starovoitov, netdev
In-Reply-To: <20170422.160246.303157283280813189.davem@davemloft.net>

On 04/22/2017 10:02 PM, David Miller wrote:
> From: Daniel Borkmann <daniel@iogearbox.net>
> Date: Sat, 22 Apr 2017 21:46:46 +0200
>
>> On 04/22/2017 09:38 PM, David Miller wrote:
>>>
>>> Signed-off-by: David S. Miller <davem@davemloft.net>
>>
>> LGTM, thanks!
>>
>> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
>
> Great, this and the sparc64 eBPF JIT are now pushed out to net-next.

Awesome, thanks for all the work!

^ permalink raw reply

* [PATCH net-next 0/2] flower: add MPLS matching support
From: Benjamin LaHaise @ 2017-04-22 20:52 UTC (permalink / raw)
  To: netdev; +Cc: Benjamin LaHaise

From: Benjamin LaHaise <bcrl@kvack.org>

This patch series adds support for parsing MPLS flows in the flow dissector
and the flower classifier.  Each of the MPLS TTL, BOS, TC and Label fields
can be used for matching.

v2: incorporate style feedback, move #defines to linux/include/mpls.h
Note: this omits Jiri's request to remove tabs between the type and 
field names in struct declarations.  This would be inconsistent with 
numerous other struct definitions.

Benjamin LaHaise (2):
  flow_dissector: add mpls support (v2)
  cls_flower: add support for matching MPLS fields (v2)

 include/linux/mpls.h         |  5 +++
 include/net/flow_dissector.h |  8 +++++
 include/uapi/linux/pkt_cls.h |  5 +++
 net/core/flow_dissector.c    | 25 +++++++++++++--
 net/sched/cls_flower.c       | 74 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 114 insertions(+), 3 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH net-next 1/2] flow_dissector: add mpls support (v2)
From: Benjamin LaHaise @ 2017-04-22 20:52 UTC (permalink / raw)
  To: netdev
  Cc: Benjamin LaHaise, Benjamin LaHaise, David S. Miller, Simon Horman,
	Jamal Hadi Salim, Cong Wang, Jiri Pirko, Hadar Hen Zion, Gao Feng
In-Reply-To: <1492894367-11637-1-git-send-email-benjamin.lahaise@netronome.com>

Add support for parsing MPLS flows to the flow dissector in preparation for
adding MPLS match support to cls_flower.

Signed-off-by: Benjamin LaHaise <benjamin.lahaise@netronome.com>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Simon Horman <simon.horman@netronome.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Eric Dumazet <jhs@mojatatu.com>
Cc: Hadar Hen Zion <hadarh@mellanox.com>
Cc: Gao Feng <fgao@ikuai8.com>
---
 include/linux/mpls.h         |  5 +++++
 include/net/flow_dissector.h |  8 ++++++++
 net/core/flow_dissector.c    | 25 ++++++++++++++++++++++---
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/include/linux/mpls.h b/include/linux/mpls.h
index 9999145..384fb22 100644
--- a/include/linux/mpls.h
+++ b/include/linux/mpls.h
@@ -3,4 +3,9 @@
 
 #include <uapi/linux/mpls.h>
 
+#define MPLS_TTL_MASK		(MPLS_LS_TTL_MASK >> MPLS_LS_TTL_SHIFT)
+#define MPLS_BOS_MASK		(MPLS_LS_S_MASK >> MPLS_LS_S_SHIFT)
+#define MPLS_TC_MASK		(MPLS_LS_TC_MASK >> MPLS_LS_TC_SHIFT)
+#define MPLS_LABEL_MASK		(MPLS_LS_LABEL_MASK >> MPLS_LS_LABEL_SHIFT)
+
 #endif  /* _LINUX_MPLS_H */
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index ac97030..8d21d44 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -41,6 +41,13 @@ struct flow_dissector_key_vlan {
 	u16	padding;
 };
 
+struct flow_dissector_key_mpls {
+	u32	mpls_ttl:8,
+		mpls_bos:1,
+		mpls_tc:3,
+		mpls_label:20;
+};
+
 struct flow_dissector_key_keyid {
 	__be32	keyid;
 };
@@ -169,6 +176,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
 	FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */
 	FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */
+	FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c9cf425..28d94bc 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -126,9 +126,11 @@ __skb_flow_dissect_mpls(const struct sk_buff *skb,
 {
 	struct flow_dissector_key_keyid *key_keyid;
 	struct mpls_label *hdr, _hdr[2];
+	u32 entry, label;
 
 	if (!dissector_uses_key(flow_dissector,
-				FLOW_DISSECTOR_KEY_MPLS_ENTROPY))
+				FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
+	    !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
 		return FLOW_DISSECT_RET_OUT_GOOD;
 
 	hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
@@ -136,8 +138,25 @@ __skb_flow_dissect_mpls(const struct sk_buff *skb,
 	if (!hdr)
 		return FLOW_DISSECT_RET_OUT_BAD;
 
-	if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
-	    MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
+	entry = ntohl(hdr[0].entry);
+	label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+		struct flow_dissector_key_mpls *key_mpls;
+
+		key_mpls = skb_flow_dissector_target(flow_dissector,
+						     FLOW_DISSECTOR_KEY_MPLS,
+						     target_container);
+		key_mpls->mpls_label = label;
+		key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
+					>> MPLS_LS_TTL_SHIFT;
+		key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
+					>> MPLS_LS_TC_SHIFT;
+		key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
+					>> MPLS_LS_S_SHIFT;
+	}
+
+	if (label == MPLS_LABEL_ENTROPY) {
 		key_keyid = skb_flow_dissector_target(flow_dissector,
 						      FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
 						      target_container);
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 2/2] cls_flower: add support for matching MPLS fields (v2)
From: Benjamin LaHaise @ 2017-04-22 20:52 UTC (permalink / raw)
  To: netdev
  Cc: Benjamin LaHaise, Benjamin LaHaise, David S. Miller, Simon Horman,
	Jamal Hadi Salim, Cong Wang, Jiri Pirko, Eric Dumazet,
	Hadar Hen Zion, Gao Feng
In-Reply-To: <1492894367-11637-1-git-send-email-benjamin.lahaise@netronome.com>

Add support to the tc flower classifier to match based on fields in MPLS
labels (TTL, Bottom of Stack, TC field, Label).

Signed-off-by: Benjamin LaHaise <benjamin.lahaise@netronome.com>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Simon Horman <simon.horman@netronome.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Hadar Hen Zion <hadarh@mellanox.com>
Cc: Gao Feng <fgao@ikuai8.com>
---
 include/uapi/linux/pkt_cls.h |  5 +++
 net/sched/cls_flower.c       | 74 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7a69f2a..f1129e3 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -432,6 +432,11 @@ enum {
 	TCA_FLOWER_KEY_ARP_THA,		/* ETH_ALEN */
 	TCA_FLOWER_KEY_ARP_THA_MASK,	/* ETH_ALEN */
 
+	TCA_FLOWER_KEY_MPLS_TTL,	/* u8 - 8 bits */
+	TCA_FLOWER_KEY_MPLS_BOS,	/* u8 - 1 bit */
+	TCA_FLOWER_KEY_MPLS_TC,		/* u8 - 3 bits */
+	TCA_FLOWER_KEY_MPLS_LABEL,	/* be32 - 20 bits */
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 31ee340..3ecf076 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -18,6 +18,7 @@
 #include <linux/if_ether.h>
 #include <linux/in6.h>
 #include <linux/ip.h>
+#include <linux/mpls.h>
 
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
@@ -47,6 +48,7 @@ struct fl_flow_key {
 		struct flow_dissector_key_ipv6_addrs enc_ipv6;
 	};
 	struct flow_dissector_key_ports enc_tp;
+	struct flow_dissector_key_mpls mpls;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -418,6 +420,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_ARP_SHA_MASK]	= { .len = ETH_ALEN },
 	[TCA_FLOWER_KEY_ARP_THA]	= { .len = ETH_ALEN },
 	[TCA_FLOWER_KEY_ARP_THA_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_MPLS_TTL]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -433,6 +439,31 @@ static void fl_set_key_val(struct nlattr **tb,
 		memcpy(mask, nla_data(tb[mask_type]), len);
 }
 
+static void fl_set_key_mpls(struct nlattr **tb,
+			    struct flow_dissector_key_mpls *key_val,
+			    struct flow_dissector_key_mpls *key_mask)
+{
+	if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
+		key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+		key_mask->mpls_ttl = MPLS_TTL_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
+		key_val->mpls_bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
+		key_mask->mpls_bos = MPLS_BOS_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
+		key_val->mpls_tc =
+			nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]) & MPLS_TC_MASK;
+		key_mask->mpls_tc = MPLS_TC_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+		key_val->mpls_label =
+			nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]) &
+			MPLS_LABEL_MASK;
+		key_mask->mpls_label = MPLS_LABEL_MASK;
+	}
+}
+
 static void fl_set_key_vlan(struct nlattr **tb,
 			    struct flow_dissector_key_vlan *key_val,
 			    struct flow_dissector_key_vlan *key_mask)
@@ -589,6 +620,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 			       &mask->icmp.code,
 			       TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
 			       sizeof(key->icmp.code));
+	} else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
+		   key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
+		fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
 	} else if (key->basic.n_proto == htons(ETH_P_ARP) ||
 		   key->basic.n_proto == htons(ETH_P_RARP)) {
 		fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
@@ -725,6 +759,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ARP, arp);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_MPLS, mpls);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
@@ -991,6 +1027,41 @@ static int fl_dump_key_val(struct sk_buff *skb,
 	return 0;
 }
 
+static int fl_dump_key_mpls(struct sk_buff *skb,
+			    struct flow_dissector_key_mpls *mpls_key,
+			    struct flow_dissector_key_mpls *mpls_mask)
+{
+	int err;
+
+	if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+		return 0;
+	if (mpls_mask->mpls_ttl) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
+				 mpls_key->mpls_ttl);
+		if (err)
+			return err;
+	}
+	if (mpls_mask->mpls_tc) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
+				 mpls_key->mpls_tc);
+		if (err)
+			return err;
+	}
+	if (mpls_mask->mpls_label) {
+		err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
+				  mpls_key->mpls_label);
+		if (err)
+			return err;
+	}
+	if (mpls_mask->mpls_bos) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
+				 mpls_key->mpls_bos);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int fl_dump_key_vlan(struct sk_buff *skb,
 			    struct flow_dissector_key_vlan *vlan_key,
 			    struct flow_dissector_key_vlan *vlan_mask)
@@ -1096,6 +1167,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 			    sizeof(key->basic.n_proto)))
 		goto nla_put_failure;
 
+	if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
+		goto nla_put_failure;
+
 	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
 		goto nla_put_failure;
 
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v2 net] net: ipv6: regenerate host route if moved to gc list
From: Martin KaFai Lau @ 2017-04-22 22:00 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, dvyukov, andreyknvl, mmanning
In-Reply-To: <1492879237-31566-1-git-send-email-dsa@cumulusnetworks.com>

On Sat, Apr 22, 2017 at 09:40:37AM -0700, David Ahern wrote:
[...]
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 08f9e8ea7a81..97e86158bbcb 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -3303,14 +3303,24 @@ static void addrconf_gre_config(struct net_device *dev)
>  static int fixup_permanent_addr(struct inet6_dev *idev,
>  				struct inet6_ifaddr *ifp)
>  {
> -	if (!ifp->rt) {
> -		struct rt6_info *rt;
> +	/* rt6i_ref == 0 means the host route was removed from the
> +	 * FIB, for example, if 'lo' device is taken down. In that
> +	 * case regenerate the host route.
> +	 */
> +	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
> +		struct rt6_info *rt, *prev;
>
>  		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
The rt regernation makes sense.

>  		if (unlikely(IS_ERR(rt)))
>  			return PTR_ERR(rt);
>
> +		spin_lock(&ifp->lock);
> +		prev = ifp->rt;
>  		ifp->rt = rt;
I am still missing something on the new spin_lock:
1) Is there an existing race in the existing
   ifp->rt modification ('ipf->rt = rt') which is
   not related to this bug?
2) If there is a race in ifp->rt, is the above if-checks
   on ifp->rt racy and need protection also? F.e. 'ifp->rt->rt6i_ref'
   since ifp->rt could be NULL or ifp->rt->rt6i_ref
   may not be zero later if there is concurrent
   modification on ifp->rt?

> +		spin_unlock(&ifp->lock);
> +
> +		if (prev)
> +			ip6_rt_put(prev);
Nit. ip6_rt_put() takes NULL.

>  	}
>
>  	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
> --
> 2.1.4
>

^ permalink raw reply

* Re: [PATCH v2 net] net: ipv6: regenerate host route if moved to gc list
From: David Ahern @ 2017-04-23  1:12 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: netdev, dvyukov, andreyknvl, mmanning
In-Reply-To: <20170422220041.wha72qa3zuy23hkf@kafai-mba.local>

On 4/22/17 4:00 PM, Martin KaFai Lau wrote:
> On Sat, Apr 22, 2017 at 09:40:37AM -0700, David Ahern wrote:
> [...]
>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>> index 08f9e8ea7a81..97e86158bbcb 100644
>> --- a/net/ipv6/addrconf.c
>> +++ b/net/ipv6/addrconf.c
>> @@ -3303,14 +3303,24 @@ static void addrconf_gre_config(struct net_device *dev)
>>  static int fixup_permanent_addr(struct inet6_dev *idev,
>>  				struct inet6_ifaddr *ifp)
>>  {
>> -	if (!ifp->rt) {
>> -		struct rt6_info *rt;
>> +	/* rt6i_ref == 0 means the host route was removed from the
>> +	 * FIB, for example, if 'lo' device is taken down. In that
>> +	 * case regenerate the host route.
>> +	 */
>> +	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
>> +		struct rt6_info *rt, *prev;
>>
>>  		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
> The rt regernation makes sense.
> 
>>  		if (unlikely(IS_ERR(rt)))
>>  			return PTR_ERR(rt);
>>
>> +		spin_lock(&ifp->lock);
>> +		prev = ifp->rt;
>>  		ifp->rt = rt;
> I am still missing something on the new spin_lock:
> 1) Is there an existing race in the existing
>    ifp->rt modification ('ipf->rt = rt') which is
>    not related to this bug?
> 2) If there is a race in ifp->rt, is the above if-checks
>    on ifp->rt racy and need protection also? F.e. 'ifp->rt->rt6i_ref'
>    since ifp->rt could be NULL or ifp->rt->rt6i_ref
>    may not be zero later if there is concurrent
>    modification on ifp->rt?

As I understand it:
- rt6i_ref is modified by the fib code (adding and removing to tree) and
always under RTNL.
- ifp->rt is only *set* under RTNL, but is accessed without (dad via
workqueue and sysctl).

The code path to fixup_permanent_addr is under RTNL, so the if check on
ifp->rt and rt6i_ref is ok -- neither can be changed since RTNL is held.

Since ifp->rt can be accessed outside of RTNL, the spinlock is needed to
change its value. Arguably only 'ifp->rt = rt;' needs the spinlock.

Let me know if I am missing something. There are many twists and turns
with the ipv6 code.

> 
>> +		spin_unlock(&ifp->lock);
>> +
>> +		if (prev)
>> +			ip6_rt_put(prev);
> Nit. ip6_rt_put() takes NULL.

ok.

^ permalink raw reply

* Re: [net-next 03/11] ixgbe: add support for XDP_TX action
From: Jakub Kicinski @ 2017-04-23  2:24 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: davem, John Fastabend, netdev, nhorman, sassmann, jogreene,
	John Fastabend
In-Reply-To: <20170421015029.18994-4-jeffrey.t.kirsher@intel.com>

On Thu, 20 Apr 2017 18:50:21 -0700, Jeff Kirsher wrote:
> +static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter)
> +{
> +	if (nr_cpu_ids > MAX_XDP_QUEUES)
> +		return 0;
> +
> +	return adapter->xdp_prog ? nr_cpu_ids : 0;
> +}

Nit: AFAICT ixgbe_xdp_setup() will guarantee xdp_prog is not set if
there are too many CPU ids.

> @@ -6120,10 +6193,21 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
>  		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
>  		goto err_setup_tx;
>  	}
> +	for (j = 0; j < adapter->num_xdp_queues; j++) {
> +		err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]);
> +		if (!err)
> +			continue;
> +
> +		e_err(probe, "Allocation for Tx Queue %u failed\n", j);
> +		goto err_setup_tx;
> +	}
> +
>  

Nit: extra line here

> @@ -9557,7 +9739,21 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
>  			return -EINVAL;
>  	}
>  
> +	if (nr_cpu_ids > MAX_XDP_QUEUES)
> +		return -ENOMEM;
> +
>  	old_prog = xchg(&adapter->xdp_prog, prog);
> +
> +	/* If transitioning XDP modes reconfigure rings */
> +	if (!!prog != !!old_prog) {
> +		int err = ixgbe_setup_tc(dev, netdev_get_num_tc(dev));
> +
> +		if (err) {
> +			rcu_assign_pointer(adapter->xdp_prog, old_prog);
> +			return -EINVAL;
> +		}
> +	}
> +
>  	for (i = 0; i < adapter->num_rx_queues; i++)
>  		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
>  

In case of disabling XDP I assume ixgbe_setup_tc() will free the rings
before the xdp_prog on the rings is swapped to NULL.  Is there anything
preventing TX in that time window?  I think usual ordering would be to
install the prog after reconfig but uninstall before.

^ permalink raw reply

* Re: [PATCH v2 net] net: ipv6: regenerate host route if moved to gc list
From: Martin KaFai Lau @ 2017-04-23  2:28 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, dvyukov, andreyknvl, mmanning
In-Reply-To: <072e9af7-2163-ce53-ce74-a288e0934825@cumulusnetworks.com>

On Sat, Apr 22, 2017 at 07:12:34PM -0600, David Ahern wrote:
> On 4/22/17 4:00 PM, Martin KaFai Lau wrote:
> > On Sat, Apr 22, 2017 at 09:40:37AM -0700, David Ahern wrote:
> > [...]
> >> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> >> index 08f9e8ea7a81..97e86158bbcb 100644
> >> --- a/net/ipv6/addrconf.c
> >> +++ b/net/ipv6/addrconf.c
> >> @@ -3303,14 +3303,24 @@ static void addrconf_gre_config(struct net_device *dev)
> >>  static int fixup_permanent_addr(struct inet6_dev *idev,
> >>  				struct inet6_ifaddr *ifp)
> >>  {
> >> -	if (!ifp->rt) {
> >> -		struct rt6_info *rt;
> >> +	/* rt6i_ref == 0 means the host route was removed from the
> >> +	 * FIB, for example, if 'lo' device is taken down. In that
> >> +	 * case regenerate the host route.
> >> +	 */
> >> +	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
> >> +		struct rt6_info *rt, *prev;
> >>
> >>  		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
> > The rt regernation makes sense.
> >
> >>  		if (unlikely(IS_ERR(rt)))
> >>  			return PTR_ERR(rt);
> >>
> >> +		spin_lock(&ifp->lock);
> >> +		prev = ifp->rt;
> >>  		ifp->rt = rt;
> > I am still missing something on the new spin_lock:
> > 1) Is there an existing race in the existing
> >    ifp->rt modification ('ipf->rt = rt') which is
> >    not related to this bug?
> > 2) If there is a race in ifp->rt, is the above if-checks
> >    on ifp->rt racy and need protection also? F.e. 'ifp->rt->rt6i_ref'
> >    since ifp->rt could be NULL or ifp->rt->rt6i_ref
> >    may not be zero later if there is concurrent
> >    modification on ifp->rt?
>
> As I understand it:
> - rt6i_ref is modified by the fib code (adding and removing to tree) and
> always under RTNL.
> - ifp->rt is only *set* under RTNL, but is accessed without (dad via
> workqueue and sysctl).
>
> The code path to fixup_permanent_addr is under RTNL, so the if check on
> ifp->rt and rt6i_ref is ok -- neither can be changed since RTNL is held.
>
> Since ifp->rt can be accessed outside of RTNL, the spinlock is needed to
> change its value.
Got it. It is to protect the readers which are not under RTNL.
Many thanks for pointing out what I was missing.  It all makes sense now.

> Arguably only 'ifp->rt = rt;' needs the spinlock.
It still seems like the existing 'ifp->rt = rt;' needs protection
anyway regardless of the rt regeneration change.  It would be nice to
explain it in the commit log or even better separating it out
into another patch.

>
> There are many twists and turns with the ipv6 code.
Nod Nod :)

>
> >
> >> +		spin_unlock(&ifp->lock);
> >> +
> >> +		if (prev)
> >> +			ip6_rt_put(prev);
> > Nit. ip6_rt_put() takes NULL.
>
> ok.
>

^ permalink raw reply

* [PATCH net-next v3 0/5] nfp: DMA flags, adjust head and fixes
From: Jakub Kicinski @ 2017-04-23  3:17 UTC (permalink / raw)
  To: netdev; +Cc: kubakici, oss-drivers, Jakub Kicinski

Hi!

This series takes advantage of Alex's DMA_ATTR_SKIP_CPU_SYNC to make 
XDP packet modifications "correct" from DMA API point of view.  It 
also allows us to parse the metadata before we run XDP at no additional
DMA sync cost.  That way we can get rid of the metadata memcpy, and 
remove the last upstream user of bpf_prog->xdp_adjust_head.

David's patch adds a way to read capabilities from the management
firmware.

There are also two net-next fixes.  Patch 4 which fixes what seems to
be a result of a botched rebase on my part.  Patch 5 corrects locking
when state of ethernet ports is being refreshed.

---
v3: move the sync from alloc func to the actual give to hw func
v2: sync rx buffers before giving them to the card (Alex)


David Brunecz (1):
  nfp: add NSP routine to get static information

Jakub Kicinski (4):
  nfp: make use of the DMA_ATTR_SKIP_CPU_SYNC attr
  nfp: parse metadata prepend before XDP runs
  nfp: fix free list buffer size reporting
  nfp: remove the refresh of all ports optimization

 drivers/net/ethernet/netronome/nfp/Makefile        |   1 +
 drivers/net/ethernet/netronome/nfp/nfp_main.c      |   7 ++
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |   9 +-
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 125 ++++++++++++---------
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |  13 ++-
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  |  67 +++++++----
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h   |   1 +
 .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c   |   7 ++
 .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h   |  24 ++++
 .../ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c  |  89 +++++++++++++++
 10 files changed, 265 insertions(+), 78 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c

-- 
2.11.0

^ permalink raw reply

* [PATCH net-next v3 1/5] nfp: make use of the DMA_ATTR_SKIP_CPU_SYNC attr
From: Jakub Kicinski @ 2017-04-23  3:17 UTC (permalink / raw)
  To: netdev; +Cc: kubakici, oss-drivers, Jakub Kicinski
In-Reply-To: <20170423031756.94429-1-jakub.kicinski@netronome.com>

DMA unmap may destroy changes CPU made to the buffer.  To make XDP
run correctly on non-x86 platforms we should use the
DMA_ATTR_SKIP_CPU_SYNC attribute.

Thanks to using the attribute we can now push the sync operation to the
common code path from XDP handler.

A little bit of variable name reshuffling is required to bring the
code back to readable state.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 53 ++++++++++++++--------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index e2197160e4dc..f1128d12cd24 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -87,16 +87,31 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
 
 static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
 {
-	return dma_map_single(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
-			      dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
-			      dp->rx_dma_dir);
+	return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
+				    dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+				    dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void
+nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
+{
+	dma_sync_single_for_device(dp->dev, dma_addr,
+				   dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+				   dp->rx_dma_dir);
 }
 
 static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
 {
-	dma_unmap_single(dp->dev, dma_addr,
-			 dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
-			 dp->rx_dma_dir);
+	dma_unmap_single_attrs(dp->dev, dma_addr,
+			       dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+			       dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr,
+				    unsigned int len)
+{
+	dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
+				len, dp->rx_dma_dir);
 }
 
 /* Firmware reconfig
@@ -1208,6 +1223,8 @@ static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
 
 	wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1);
 
+	nfp_net_dma_sync_dev_rx(dp, dma_addr);
+
 	/* Stash SKB and DMA address away */
 	rx_ring->rxbufs[wr_idx].frag = frag;
 	rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
@@ -1569,7 +1586,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 	tx_ring = r_vec->xdp_ring;
 
 	while (pkts_polled < budget) {
-		unsigned int meta_len, data_len, data_off, pkt_len;
+		unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
 		u8 meta_prepend[NFP_NET_MAX_PREPEND];
 		struct nfp_net_rx_buf *rxbuf;
 		struct nfp_net_rx_desc *rxd;
@@ -1608,11 +1625,12 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		data_len = le16_to_cpu(rxd->rxd.data_len);
 		pkt_len = data_len - meta_len;
 
+		pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
 		if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
-			data_off = NFP_NET_RX_BUF_HEADROOM + meta_len;
+			pkt_off += meta_len;
 		else
-			data_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_offset;
-		data_off += dp->rx_dma_off;
+			pkt_off += dp->rx_offset;
+		meta_off = pkt_off - meta_len;
 
 		/* Stats update */
 		u64_stats_update_begin(&r_vec->rx_sync);
@@ -1621,7 +1639,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		u64_stats_update_end(&r_vec->rx_sync);
 
 		/* Pointer to start of metadata */
-		meta = rxbuf->frag + data_off - meta_len;
+		meta = rxbuf->frag + meta_off;
 
 		if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
 			     (dp->rx_offset && meta_len > dp->rx_offset))) {
@@ -1631,6 +1649,9 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			continue;
 		}
 
+		nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
+					data_len);
+
 		if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
 				  dp->bpf_offload_xdp)) {
 			unsigned int dma_off;
@@ -1638,10 +1659,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			int act;
 
 			hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
-			dma_off = data_off - NFP_NET_RX_BUF_HEADROOM;
-			dma_sync_single_for_cpu(dp->dev, rxbuf->dma_addr,
-						dma_off + pkt_len,
-						DMA_BIDIRECTIONAL);
 
 			/* Move prepend out of the way */
 			if (xdp_prog->xdp_adjust_head) {
@@ -1650,12 +1667,12 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			}
 
 			act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
-					      &data_off, &pkt_len);
+					      &pkt_off, &pkt_len);
 			switch (act) {
 			case XDP_PASS:
 				break;
 			case XDP_TX:
-				dma_off = data_off - NFP_NET_RX_BUF_HEADROOM;
+				dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
 				if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
 								 tx_ring, rxbuf,
 								 dma_off,
@@ -1689,7 +1706,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 		nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
 
-		skb_reserve(skb, data_off);
+		skb_reserve(skb, pkt_off);
 		skb_put(skb, pkt_len);
 
 		if (!dp->chained_metadata_format) {
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v3 2/5] nfp: parse metadata prepend before XDP runs
From: Jakub Kicinski @ 2017-04-23  3:17 UTC (permalink / raw)
  To: netdev; +Cc: kubakici, oss-drivers, Jakub Kicinski
In-Reply-To: <20170423031756.94429-1-jakub.kicinski@netronome.com>

Calling memcpy to shift metadata out of the way for XDP to run
seems like an overkill.  The most common metadata contents are
8 bytes containing type and flow hash.  Simply parse the metadata
before we run XDP.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  6 ++
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 67 +++++++++++-----------
 2 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 052db9208fbb..8302a2d688da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -284,6 +284,12 @@ struct nfp_net_rx_desc {
 
 #define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
 
+struct nfp_meta_parsed {
+	u32 hash_type;
+	u32 hash;
+	u32 mark;
+};
+
 struct nfp_net_rx_hash {
 	__be32 hash_type;
 	__be32 hash;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index f1128d12cd24..3285053bece0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1402,8 +1402,9 @@ static void nfp_net_rx_csum(struct nfp_net_dp *dp,
 	}
 }
 
-static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
-			     unsigned int type, __be32 *hash)
+static void
+nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
+		 unsigned int type, __be32 *hash)
 {
 	if (!(netdev->features & NETIF_F_RXHASH))
 		return;
@@ -1412,16 +1413,18 @@ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
 	case NFP_NET_RSS_IPV4:
 	case NFP_NET_RSS_IPV6:
 	case NFP_NET_RSS_IPV6_EX:
-		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
+		meta->hash_type = PKT_HASH_TYPE_L3;
 		break;
 	default:
-		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
+		meta->hash_type = PKT_HASH_TYPE_L4;
 		break;
 	}
+
+	meta->hash = get_unaligned_be32(hash);
 }
 
 static void
-nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
 		      void *data, struct nfp_net_rx_desc *rxd)
 {
 	struct nfp_net_rx_hash *rx_hash = data;
@@ -1429,12 +1432,12 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
 	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
 		return;
 
-	nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+	nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
 			 &rx_hash->hash);
 }
 
 static void *
-nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
 		   void *data, int meta_len)
 {
 	u32 meta_info;
@@ -1446,13 +1449,13 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
 		switch (meta_info & NFP_NET_META_FIELD_MASK) {
 		case NFP_NET_META_HASH:
 			meta_info >>= NFP_NET_META_FIELD_SIZE;
-			nfp_net_set_hash(netdev, skb,
+			nfp_net_set_hash(netdev, meta,
 					 meta_info & NFP_NET_META_FIELD_MASK,
 					 (__be32 *)data);
 			data += 4;
 			break;
 		case NFP_NET_META_MARK:
-			skb->mark = get_unaligned_be32(data);
+			meta->mark = get_unaligned_be32(data);
 			data += 4;
 			break;
 		default:
@@ -1587,12 +1590,11 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 	while (pkts_polled < budget) {
 		unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
-		u8 meta_prepend[NFP_NET_MAX_PREPEND];
 		struct nfp_net_rx_buf *rxbuf;
 		struct nfp_net_rx_desc *rxd;
+		struct nfp_meta_parsed meta;
 		dma_addr_t new_dma_addr;
 		void *new_frag;
-		u8 *meta;
 
 		idx = rx_ring->rd_p & (rx_ring->cnt - 1);
 
@@ -1605,6 +1607,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		 */
 		dma_rmb();
 
+		memset(&meta, 0, sizeof(meta));
+
 		rx_ring->rd_p++;
 		pkts_polled++;
 
@@ -1638,9 +1642,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		r_vec->rx_bytes += pkt_len;
 		u64_stats_update_end(&r_vec->rx_sync);
 
-		/* Pointer to start of metadata */
-		meta = rxbuf->frag + meta_off;
-
 		if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
 			     (dp->rx_offset && meta_len > dp->rx_offset))) {
 			nn_dp_warn(dp, "oversized RX packet metadata %u\n",
@@ -1652,6 +1653,23 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
 					data_len);
 
+		if (!dp->chained_metadata_format) {
+			nfp_net_set_hash_desc(dp->netdev, &meta,
+					      rxbuf->frag + meta_off, rxd);
+		} else if (meta_len) {
+			void *end;
+
+			end = nfp_net_parse_meta(dp->netdev, &meta,
+						 rxbuf->frag + meta_off,
+						 meta_len);
+			if (unlikely(end != rxbuf->frag + pkt_off)) {
+				nn_dp_warn(dp, "invalid RX packet metadata\n");
+				nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
+						NULL);
+				continue;
+			}
+		}
+
 		if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
 				  dp->bpf_offload_xdp)) {
 			unsigned int dma_off;
@@ -1660,12 +1678,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 			hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
 
-			/* Move prepend out of the way */
-			if (xdp_prog->xdp_adjust_head) {
-				memcpy(meta_prepend, meta, meta_len);
-				meta = meta_prepend;
-			}
-
 			act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
 					      &pkt_off, &pkt_len);
 			switch (act) {
@@ -1709,19 +1721,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		skb_reserve(skb, pkt_off);
 		skb_put(skb, pkt_len);
 
-		if (!dp->chained_metadata_format) {
-			nfp_net_set_hash_desc(dp->netdev, skb, meta, rxd);
-		} else if (meta_len) {
-			void *end;
-
-			end = nfp_net_parse_meta(dp->netdev, skb, meta,
-						 meta_len);
-			if (unlikely(end != meta + meta_len)) {
-				nn_dp_warn(dp, "invalid RX packet metadata\n");
-				nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
-				continue;
-			}
-		}
+		skb->mark = meta.mark;
+		skb_set_hash(skb, meta.hash, meta.hash_type);
 
 		skb_record_rx_queue(skb, rx_ring->idx);
 		skb->protocol = eth_type_trans(skb, dp->netdev);
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v3 3/5] nfp: add NSP routine to get static information
From: Jakub Kicinski @ 2017-04-23  3:17 UTC (permalink / raw)
  To: netdev; +Cc: kubakici, oss-drivers, David Brunecz, Jakub Kicinski
In-Reply-To: <20170423031756.94429-1-jakub.kicinski@netronome.com>

From: David Brunecz <david.brunecz@netronome.com>

Retrieve identifying information from the NSP.  For now it only
contains versions of firmware subcomponents.

Signed-off-by: David Brunecz <david.brunecz@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/Makefile        |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_main.c      |  7 ++
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h   |  1 +
 .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c   |  7 ++
 .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h   | 24 ++++++
 .../ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c  | 89 ++++++++++++++++++++++
 6 files changed, 129 insertions(+)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 4a5d13ef92a4..4b15f0f496aa 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -9,6 +9,7 @@ nfp-objs := \
 	    nfpcore/nfp_mutex.o \
 	    nfpcore/nfp_nffw.o \
 	    nfpcore/nfp_nsp.o \
+	    nfpcore/nfp_nsp_cmds.o \
 	    nfpcore/nfp_nsp_eth.o \
 	    nfpcore/nfp_resource.o \
 	    nfpcore/nfp_rtsym.o \
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index bea2a1a6c211..dde35dae35c5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -253,6 +253,7 @@ nfp_fw_load(struct pci_dev *pdev, struct nfp_pf *pf, struct nfp_nsp *nsp)
 
 static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf)
 {
+	struct nfp_nsp_identify *nspi;
 	struct nfp_nsp *nsp;
 	int err;
 
@@ -269,6 +270,12 @@ static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf)
 
 	pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp);
 
+	nspi = __nfp_nsp_identify(nsp);
+	if (nspi) {
+		dev_info(&pdev->dev, "BSP: %s\n", nspi->version);
+		kfree(nspi);
+	}
+
 	err = nfp_fw_load(pdev, pf, nsp);
 	if (err < 0) {
 		kfree(pf->eth_tbl);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
index 8afef7593f13..4df2ce261b3f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h
@@ -63,6 +63,7 @@ void nfp_nsp_config_clear_state(struct nfp_nsp *state);
 int nfp_nsp_read_eth_table(struct nfp_nsp *state, void *buf, unsigned int size);
 int nfp_nsp_write_eth_table(struct nfp_nsp *state,
 			    const void *buf, unsigned int size);
+int nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size);
 
 /* Implemented in nfp_resource.c */
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 4635f42e15b0..61797c98f5fe 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -93,6 +93,7 @@ enum nfp_nsp_cmd {
 	SPCODE_FW_LOAD		= 6, /* Load fw from buffer, len in option */
 	SPCODE_ETH_RESCAN	= 7, /* Rescan ETHs, write ETH_TABLE to buf */
 	SPCODE_ETH_CONTROL	= 8, /* Update media config from buffer */
+	SPCODE_NSP_IDENTIFY	= 13, /* Read NSP version */
 
 	__MAX_SPCODE,
 };
@@ -493,3 +494,9 @@ int nfp_nsp_write_eth_table(struct nfp_nsp *state,
 	return nfp_nsp_command_buf(state, SPCODE_ETH_CONTROL, size, buf, size,
 				   NULL, 0);
 }
+
+int nfp_nsp_read_identify(struct nfp_nsp *state, void *buf, unsigned int size)
+{
+	return nfp_nsp_command_buf(state, SPCODE_NSP_IDENTIFY, size, NULL, 0,
+				   buf, size);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index 7d34ff145fd7..36b21e4dc56d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -147,4 +147,28 @@ int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode);
 int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed);
 int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes);
 
+/**
+ * struct nfp_nsp_identify - NSP static information
+ * @version:      opaque version string
+ * @flags:        version flags
+ * @br_primary:   branch id of primary bootloader
+ * @br_secondary: branch id of secondary bootloader
+ * @br_nsp:       branch id of NSP
+ * @primary:      version of primarary bootloader
+ * @secondary:    version id of secondary bootloader
+ * @nsp:          version id of NSP
+ */
+struct nfp_nsp_identify {
+	char version[40];
+	u8 flags;
+	u8 br_primary;
+	u8 br_secondary;
+	u8 br_nsp;
+	u16 primary;
+	u16 secondary;
+	u16 nsp;
+};
+
+struct nfp_nsp_identify *__nfp_nsp_identify(struct nfp_nsp *nsp);
+
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c
new file mode 100644
index 000000000000..e7a263de3731
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_cmds.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "nfp.h"
+#include "nfp_nsp.h"
+
+struct nsp_identify {
+	u8 version[40];
+	u8 flags;
+	u8 br_primary;
+	u8 br_secondary;
+	u8 br_nsp;
+	__le16 primary;
+	__le16 secondary;
+	__le16 nsp;
+	__le16 reserved;
+};
+
+struct nfp_nsp_identify *__nfp_nsp_identify(struct nfp_nsp *nsp)
+{
+	struct nfp_nsp_identify *nspi = NULL;
+	struct nsp_identify *ni;
+	int ret;
+
+	if (nfp_nsp_get_abi_ver_minor(nsp) < 15)
+		return NULL;
+
+	ni = kzalloc(sizeof(*ni), GFP_KERNEL);
+	if (!ni)
+		return NULL;
+
+	ret = nfp_nsp_read_identify(nsp, ni, sizeof(*ni));
+	if (ret < 0) {
+		nfp_err(nfp_nsp_cpp(nsp), "reading bsp version failed %d\n",
+			ret);
+		goto exit_free;
+	}
+
+	nspi = kzalloc(sizeof(*nspi), GFP_KERNEL);
+	if (!nspi)
+		goto exit_free;
+
+	memcpy(nspi->version, ni->version, sizeof(nspi->version));
+	nspi->version[sizeof(nspi->version) - 1] = '\0';
+	nspi->flags = ni->flags;
+	nspi->br_primary = ni->br_primary;
+	nspi->br_secondary = ni->br_secondary;
+	nspi->br_nsp = ni->br_nsp;
+	nspi->primary = le16_to_cpu(ni->primary);
+	nspi->secondary = le16_to_cpu(ni->secondary);
+	nspi->nsp = le16_to_cpu(ni->nsp);
+
+exit_free:
+	kfree(ni);
+	return nspi;
+}
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v3 4/5] nfp: fix free list buffer size reporting
From: Jakub Kicinski @ 2017-04-23  3:17 UTC (permalink / raw)
  To: netdev; +Cc: kubakici, oss-drivers, Jakub Kicinski
In-Reply-To: <20170423031756.94429-1-jakub.kicinski@netronome.com>

XDP headroom should not be included in free list buffer size.

Fixes: 6fe0c3b43804 ("nfp: add support for xdp_adjust_head()")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 3285053bece0..8a9b74305493 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2165,7 +2165,7 @@ nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
  */
 static int nfp_net_set_config_and_enable(struct nfp_net *nn)
 {
-	u32 new_ctrl, update = 0;
+	u32 bufsz, new_ctrl, update = 0;
 	unsigned int r;
 	int err;
 
@@ -2199,8 +2199,9 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
 	nfp_net_write_mac_addr(nn);
 
 	nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.netdev->mtu);
-	nn_writel(nn, NFP_NET_CFG_FLBUFSZ,
-		  nn->dp.fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
+
+	bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
+	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
 
 	/* Enable device */
 	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
-- 
2.11.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox