Netdev List

Netdev List
 help / color / mirror / Atom feed

* [for-next 05/11] net/mlx5: E-Switch, Create a dedicated send to vport rule deletion function
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Mark Bloch, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Mark Bloch <markb@mellanox.com>

In order for representors to send packets directly to VFs we use an
E-Switch function which insert special rules into the HW. For symmetry
create an E-Switch function that deletes these rules as well.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c           | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 +++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 19edaa155062..01bf4e3c8afa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -199,7 +199,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 		return;
 
 	list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
-		mlx5_del_flow_rules(esw_sq->send_to_vport_rule);
+		mlx5_eswitch_del_send_to_vport_rule(esw_sq->send_to_vport_rule);
 		list_del(&esw_sq->list);
 		kfree(esw_sq);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 21b506fd2b67..9ed401225225 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -225,6 +225,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
 				    u32 sqn);
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3e412d609c25..b37f8ac01e43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -339,6 +339,11 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 	return flow_rule;
 }
 
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
+{
+	mlx5_del_flow_rules(rule);
+}
+
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_act flow_act = {0};
-- 
2.14.2

^ permalink raw reply related

* [for-next 06/11] net/mlx5e: Move ethernet representors data into separate struct
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Mark Bloch, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Mark Bloch <markb@mellanox.com>

Ethernet representors have a need to store data which is applicable
only for them. Create a priv void pointer in struct mlx5_eswitch_rep
and move mlx5e to store the relevant data there. As part of this change
we also initialize rep_if in mlx5e_rep_register_vf_vports() as otherwise the
E-Switch code will copy a priv value which is garbage.

We also rename mlx5_eswitch_get_uplink_netdev() to
mlx5_eswitch_get_uplink_priv() and make it return void *.
This way E-Switch code doesn't need to deal with net devices and
we leave the task of getting it to mlx5e.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 58 ++++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  9 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 14 ++++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  7 +--
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  7 ++-
 5 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 01bf4e3c8afa..3c74f0599ad3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -194,11 +194,13 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep)
 {
 	struct mlx5_esw_sq *esw_sq, *tmp;
+	struct mlx5e_rep_priv *rpriv;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return;
 
-	list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
+	rpriv = mlx5e_rep_to_rep_priv(rep);
+	list_for_each_entry_safe(esw_sq, tmp, &rpriv->vport_sqs_list, list) {
 		mlx5_eswitch_del_send_to_vport_rule(esw_sq->send_to_vport_rule);
 		list_del(&esw_sq->list);
 		kfree(esw_sq);
@@ -210,6 +212,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 				 u16 *sqns_array, int sqns_num)
 {
 	struct mlx5_flow_handle *flow_rule;
+	struct mlx5e_rep_priv *rpriv;
 	struct mlx5_esw_sq *esw_sq;
 	int err;
 	int i;
@@ -217,6 +220,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 	if (esw->mode != SRIOV_OFFLOADS)
 		return 0;
 
+	rpriv = mlx5e_rep_to_rep_priv(rep);
 	for (i = 0; i < sqns_num; i++) {
 		esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
 		if (!esw_sq) {
@@ -234,7 +238,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 			goto out_err;
 		}
 		esw_sq->send_to_vport_rule = flow_rule;
-		list_add(&esw_sq->list, &rep->vport_sqs_list);
+		list_add(&esw_sq->list, &rpriv->vport_sqs_list);
 	}
 	return 0;
 
@@ -291,7 +295,7 @@ static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
 #endif
 	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
 						DELAY_PROBE_TIME);
-	struct net_device *netdev = rpriv->rep->netdev;
+	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
 	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
@@ -312,7 +316,7 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
 {
 	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
 						    neigh_update.neigh_stats_work.work);
-	struct net_device *netdev = rpriv->rep->netdev;
+	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_neigh_hash_entry *nhe;
 
@@ -408,7 +412,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
 						    neigh_update.netevent_nb);
 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-	struct net_device *netdev = rpriv->rep->netdev;
+	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_neigh_hash_entry *nhe = NULL;
 	struct mlx5e_neigh m_neigh = {};
@@ -536,7 +540,7 @@ static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
 static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
 {
 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-	struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev);
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 
 	unregister_netevent_notifier(&neigh_update->netevent_nb);
 
@@ -957,7 +961,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 		err = PTR_ERR(flow_rule);
 		goto err_destroy_direct_tirs;
 	}
-	rep->vport_rx_rule = flow_rule;
+	rpriv->vport_rx_rule = flow_rule;
 
 	err = mlx5e_tc_init(priv);
 	if (err)
@@ -966,7 +970,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	return 0;
 
 err_del_flow_rule:
-	mlx5_del_flow_rules(rep->vport_rx_rule);
+	mlx5_del_flow_rules(rpriv->vport_rx_rule);
 err_destroy_direct_tirs:
 	mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
@@ -977,10 +981,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
 
 	mlx5e_tc_cleanup(priv);
-	mlx5_del_flow_rules(rep->vport_rx_rule);
+	mlx5_del_flow_rules(rpriv->vport_rx_rule);
 	mlx5e_destroy_direct_tirs(priv);
 	mlx5e_destroy_direct_rqts(priv);
 }
@@ -1022,8 +1025,8 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
 static int
 mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5e_priv *priv = netdev_priv(rep->netdev);
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 
 	int err;
 
@@ -1047,8 +1050,8 @@ mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 static void
 mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5e_priv *priv = netdev_priv(rep->netdev);
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 		mlx5e_remove_sqs_fwd_rules(priv);
@@ -1063,6 +1066,7 @@ mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
 static int
 mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
+	struct mlx5e_rep_priv *uplink_rpriv;
 	struct mlx5e_rep_priv *rpriv;
 	struct net_device *netdev;
 	struct mlx5e_priv *upriv;
@@ -1080,8 +1084,10 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		return -EINVAL;
 	}
 
-	rep->netdev = netdev;
+	rpriv->netdev = netdev;
 	rpriv->rep = rep;
+	rep->priv = rpriv;
+	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 
 	err = mlx5e_attach_netdev(netdev_priv(netdev));
 	if (err) {
@@ -1097,7 +1103,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto err_detach_netdev;
 	}
 
-	upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(dev->priv.eswitch));
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch);
+	upriv = netdev_priv(uplink_rpriv->netdev);
 	err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb,
 					 upriv);
 	if (err)
@@ -1131,14 +1138,16 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 static void
 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 {
-	struct net_device *netdev = rep->netdev;
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5e_rep_priv *uplink_rpriv;
 	void *ppriv = priv->ppriv;
 	struct mlx5e_priv *upriv;
 
-	unregister_netdev(rep->netdev);
-	upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(priv->mdev->priv.eswitch));
+	unregister_netdev(netdev);
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch);
+	upriv = netdev_priv(uplink_rpriv->netdev);
 	tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
 				     upriv);
 	mlx5e_rep_neigh_cleanup(rpriv);
@@ -1155,7 +1164,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 	int vport;
 
 	for (vport = 1; vport < total_vfs; vport++) {
-		struct mlx5_eswitch_rep rep;
+		struct mlx5_eswitch_rep rep = {};
 
 		rep.load = mlx5e_vport_rep_load;
 		rep.unload = mlx5e_vport_rep_unload;
@@ -1178,11 +1187,16 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_eswitch *esw   = mdev->priv.eswitch;
+	struct mlx5e_rep_priv *rpriv;
 	struct mlx5_eswitch_rep rep;
 
+	rpriv = priv->ppriv;
+	rpriv->netdev = priv->netdev;
+
 	rep.load = mlx5e_nic_rep_load;
 	rep.unload = mlx5e_nic_rep_unload;
-	rep.netdev = priv->netdev;
+	rep.priv = rpriv;
+	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 	mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/
 
 	mlx5e_rep_register_vf_vports(priv); /* VFs vports */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 5659ed9f51e6..8db68369367e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -56,8 +56,17 @@ struct mlx5e_neigh_update_table {
 struct mlx5e_rep_priv {
 	struct mlx5_eswitch_rep *rep;
 	struct mlx5e_neigh_update_table neigh_update;
+	struct net_device      *netdev;
+	struct mlx5_flow_handle *vport_rx_rule;
+	struct list_head       vport_sqs_list;
 };
 
+static inline
+struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
+{
+	return (struct mlx5e_rep_priv *)rep->priv;
+}
+
 struct mlx5e_neigh {
 	struct net_device *dev;
 	union {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 55979ec2e88a..f462496cce7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -617,7 +617,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 						  FLOW_DISSECTOR_KEY_ENC_PORTS,
 						  f->mask);
 		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-		struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+		struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
+		struct net_device *up_dev = uplink_rpriv->netdev;
 		struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 
 		/* Full udp dst port must be given */
@@ -1507,6 +1508,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 				   int *out_ttl)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_rep_priv *uplink_rpriv;
 	struct rtable *rt;
 	struct neighbour *n = NULL;
 
@@ -1520,9 +1522,10 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 #else
 	return -EOPNOTSUPP;
 #endif
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
 	/* if the egress device isn't on the same HW e-switch, we use the uplink */
 	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
-		*out_dev = mlx5_eswitch_get_uplink_netdev(esw);
+		*out_dev = uplink_rpriv->netdev;
 	else
 		*out_dev = rt->dst.dev;
 
@@ -1543,6 +1546,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 				   struct neighbour **out_n,
 				   int *out_ttl)
 {
+	struct mlx5e_rep_priv *uplink_rpriv;
 	struct neighbour *n = NULL;
 	struct dst_entry *dst;
 
@@ -1557,9 +1561,10 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 
 	*out_ttl = ip6_dst_hoplimit(dst);
 
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
 	/* if the egress device isn't on the same HW e-switch, we use the uplink */
 	if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
-		*out_dev = mlx5_eswitch_get_uplink_netdev(esw);
+		*out_dev = uplink_rpriv->netdev;
 	else
 		*out_dev = dst->dev;
 #else
@@ -1859,7 +1864,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+	struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
+	struct net_device *up_dev = uplink_rpriv->netdev;
 	unsigned short family = ip_tunnel_info_af(tun_info);
 	struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9ed401225225..3a21ea4e4d24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -142,12 +142,9 @@ struct mlx5_eswitch_rep {
 	int		       (*load)(struct mlx5_core_dev *dev,
 				       struct mlx5_eswitch_rep *rep);
 	void		       (*unload)(struct mlx5_eswitch_rep *rep);
+	void			*priv;
 	u16		       vport;
 	u8		       hw_id[ETH_ALEN];
-	struct net_device      *netdev;
-
-	struct mlx5_flow_handle *vport_rx_rule;
-	struct list_head       vport_sqs_list;
 	u16		       vlan;
 	u32		       vlan_refcount;
 	bool		       valid;
@@ -274,7 +271,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     struct mlx5_eswitch_rep *rep);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 				       int vport_index);
-struct net_device *mlx5_eswitch_get_uplink_netdev(struct mlx5_eswitch *esw);
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 				 struct mlx5_esw_flow_attr *attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index b37f8ac01e43..6560e5943e1e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1131,9 +1131,8 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
 	rep->load   = __rep->load;
 	rep->unload = __rep->unload;
-	rep->netdev = __rep->netdev;
+	rep->priv = __rep->priv;
 
-	INIT_LIST_HEAD(&rep->vport_sqs_list);
 	rep->valid = true;
 }
 
@@ -1151,12 +1150,12 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 	rep->valid = false;
 }
 
-struct net_device *mlx5_eswitch_get_uplink_netdev(struct mlx5_eswitch *esw)
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw)
 {
 #define UPLINK_REP_INDEX 0
 	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
 	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
-	return rep->netdev;
+	return rep->priv;
 }
-- 
2.14.2

^ permalink raw reply related

* [for-next 08/11] net/mlx5e: E-Switch, Move send-to-vport rule struct to en_rep
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Mark Bloch, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Mark Bloch <markb@mellanox.com>

Move struct mlx5_esw_sq which keeps send-to-vport rule to from the eswitch
code to mlx5e and rename it to better reflect where it belongs

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 22 +++++++++++-----------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h  |  5 +++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  5 -----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 5b2b673c0b13..c6a77f8e99a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -193,17 +193,17 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5_esw_sq *esw_sq, *tmp;
+	struct mlx5e_rep_sq *rep_sq, *tmp;
 	struct mlx5e_rep_priv *rpriv;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return;
 
 	rpriv = mlx5e_rep_to_rep_priv(rep);
-	list_for_each_entry_safe(esw_sq, tmp, &rpriv->vport_sqs_list, list) {
-		mlx5_eswitch_del_send_to_vport_rule(esw_sq->send_to_vport_rule);
-		list_del(&esw_sq->list);
-		kfree(esw_sq);
+	list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
+		mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+		list_del(&rep_sq->list);
+		kfree(rep_sq);
 	}
 }
 
@@ -213,7 +213,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 {
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5e_rep_priv *rpriv;
-	struct mlx5_esw_sq *esw_sq;
+	struct mlx5e_rep_sq *rep_sq;
 	int err;
 	int i;
 
@@ -222,8 +222,8 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 
 	rpriv = mlx5e_rep_to_rep_priv(rep);
 	for (i = 0; i < sqns_num; i++) {
-		esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
-		if (!esw_sq) {
+		rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
+		if (!rep_sq) {
 			err = -ENOMEM;
 			goto out_err;
 		}
@@ -234,11 +234,11 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 								sqns_array[i]);
 		if (IS_ERR(flow_rule)) {
 			err = PTR_ERR(flow_rule);
-			kfree(esw_sq);
+			kfree(rep_sq);
 			goto out_err;
 		}
-		esw_sq->send_to_vport_rule = flow_rule;
-		list_add(&esw_sq->list, &rpriv->vport_sqs_list);
+		rep_sq->send_to_vport_rule = flow_rule;
+		list_add(&rep_sq->list, &rpriv->vport_sqs_list);
 	}
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index e4473a9ebd50..b9b481f2833a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -133,6 +133,11 @@ struct mlx5e_encap_entry {
 	int encap_size;
 };
 
+struct mlx5e_rep_sq {
+	struct mlx5_flow_handle	*send_to_vport_rule;
+	struct list_head	 list;
+};
+
 void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
 void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
 void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 91175965df7f..3b481182f13a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -138,11 +138,6 @@ struct mlx5_eswitch_fdb {
 	};
 };
 
-struct mlx5_esw_sq {
-	struct mlx5_flow_handle	*send_to_vport_rule;
-	struct list_head	 list;
-};
-
 struct mlx5_eswitch_rep;
 struct mlx5_eswitch_rep_if {
 	int		       (*load)(struct mlx5_core_dev *dev,
-- 
2.14.2

^ permalink raw reply related

* [for-next 07/11] net/mlx5: E-Switch, Create generic header struct to be used by representors
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Mark Bloch, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Mark Bloch <markb@mellanox.com>

Now that we don't store type dependent data in struct mlx5_eswitch_rep
we can create a generic interface, and representor type.

struct mlx5_eswitch_rep will store an array of interfaces, each
interface is used by a different representor type.

Once we moved to a more generic interface, rdma driver representors can
be added and utilize the same mechanism as the Ethernet driver
representors use.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 29 ++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  9 +--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 22 +++++--
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 72 ++++++++++++++++------
 5 files changed, 90 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3c74f0599ad3..5b2b673c0b13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1086,7 +1086,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 
 	rpriv->netdev = netdev;
 	rpriv->rep = rep;
-	rep->priv = rpriv;
+	rep->rep_if[REP_ETH].priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 
 	err = mlx5e_attach_netdev(netdev_priv(netdev));
@@ -1103,7 +1103,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto err_detach_netdev;
 	}
 
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch);
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
 	upriv = netdev_priv(uplink_rpriv->netdev);
 	err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb,
 					 upriv);
@@ -1146,7 +1146,8 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct mlx5e_priv *upriv;
 
 	unregister_netdev(netdev);
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch);
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
+						    REP_ETH);
 	upriv = netdev_priv(uplink_rpriv->netdev);
 	tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
 				     upriv);
@@ -1164,11 +1165,11 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 	int vport;
 
 	for (vport = 1; vport < total_vfs; vport++) {
-		struct mlx5_eswitch_rep rep = {};
+		struct mlx5_eswitch_rep_if rep_if = {};
 
-		rep.load = mlx5e_vport_rep_load;
-		rep.unload = mlx5e_vport_rep_unload;
-		mlx5_eswitch_register_vport_rep(esw, vport, &rep);
+		rep_if.load = mlx5e_vport_rep_load;
+		rep_if.unload = mlx5e_vport_rep_unload;
+		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
 	}
 }
 
@@ -1180,24 +1181,24 @@ static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
 	int vport;
 
 	for (vport = 1; vport < total_vfs; vport++)
-		mlx5_eswitch_unregister_vport_rep(esw, vport);
+		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
 }
 
 void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_eswitch *esw   = mdev->priv.eswitch;
+	struct mlx5_eswitch_rep_if rep_if;
 	struct mlx5e_rep_priv *rpriv;
-	struct mlx5_eswitch_rep rep;
 
 	rpriv = priv->ppriv;
 	rpriv->netdev = priv->netdev;
 
-	rep.load = mlx5e_nic_rep_load;
-	rep.unload = mlx5e_nic_rep_unload;
-	rep.priv = rpriv;
+	rep_if.load = mlx5e_nic_rep_load;
+	rep_if.unload = mlx5e_nic_rep_unload;
+	rep_if.priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
-	mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/
+	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
 
 	mlx5e_rep_register_vf_vports(priv); /* VFs vports */
 }
@@ -1208,7 +1209,7 @@ void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
 	struct mlx5_eswitch *esw   = mdev->priv.eswitch;
 
 	mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
-	mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/
+	mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
 }
 
 void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 8db68369367e..e4473a9ebd50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -64,7 +64,7 @@ struct mlx5e_rep_priv {
 static inline
 struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
 {
-	return (struct mlx5e_rep_priv *)rep->priv;
+	return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
 }
 
 struct mlx5e_neigh {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f462496cce7a..259e91e2d09a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -617,7 +617,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 						  FLOW_DISSECTOR_KEY_ENC_PORTS,
 						  f->mask);
 		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-		struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
+		struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 		struct net_device *up_dev = uplink_rpriv->netdev;
 		struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 
@@ -1522,7 +1522,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 #else
 	return -EOPNOTSUPP;
 #endif
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 	/* if the egress device isn't on the same HW e-switch, we use the uplink */
 	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
 		*out_dev = uplink_rpriv->netdev;
@@ -1561,7 +1561,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 
 	*out_ttl = ip6_dst_hoplimit(dst);
 
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 	/* if the egress device isn't on the same HW e-switch, we use the uplink */
 	if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
 		*out_dev = uplink_rpriv->netdev;
@@ -1864,7 +1864,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw);
+	struct mlx5e_rep_priv *uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw,
+									   REP_ETH);
 	struct net_device *up_dev = uplink_rpriv->netdev;
 	unsigned short family = ip_tunnel_info_af(tun_info);
 	struct mlx5e_priv *up_priv = netdev_priv(up_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 3a21ea4e4d24..91175965df7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -45,6 +45,11 @@ enum {
 	SRIOV_OFFLOADS
 };
 
+enum {
+	REP_ETH,
+	NUM_REP_TYPES,
+};
+
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -138,16 +143,21 @@ struct mlx5_esw_sq {
 	struct list_head	 list;
 };
 
-struct mlx5_eswitch_rep {
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
 	int		       (*load)(struct mlx5_core_dev *dev,
 				       struct mlx5_eswitch_rep *rep);
 	void		       (*unload)(struct mlx5_eswitch_rep *rep);
 	void			*priv;
+	bool		       valid;
+};
+
+struct mlx5_eswitch_rep {
+	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
 	u16		       vport;
 	u8		       hw_id[ETH_ALEN];
 	u16		       vlan;
 	u32		       vlan_refcount;
-	bool		       valid;
 };
 
 struct mlx5_esw_offload {
@@ -268,10 +278,12 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
-				     struct mlx5_eswitch_rep *rep);
+				     struct mlx5_eswitch_rep_if *rep_if,
+				     u8 rep_type);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index);
-void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw);
+				       int vport_index,
+				       u8 rep_type);
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 				 struct mlx5_esw_flow_attr *attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 6560e5943e1e..ee617f7d3e84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -130,7 +130,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
 	for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
 		rep = &esw->offloads.vport_reps[vf_vport];
-		if (!rep->valid)
+		if (!rep->rep_if[REP_ETH].valid)
 			continue;
 
 		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -720,21 +720,31 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	return 0;
 }
 
-static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
+					  u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
 	int vport;
 
 	for (vport = nvports - 1; vport >= 0; vport--) {
 		rep = &esw->offloads.vport_reps[vport];
-		if (!rep->valid)
+		if (!rep->rep_if[rep_type].valid)
 			continue;
 
-		rep->unload(rep);
+		rep->rep_if[rep_type].unload(rep);
 	}
 }
 
-static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+{
+	u8 rep_type = NUM_REP_TYPES;
+
+	while (rep_type-- > 0)
+		esw_offloads_unload_reps_type(esw, nvports, rep_type);
+}
+
+static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
+				       u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
 	int vport;
@@ -742,10 +752,10 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
 
 	for (vport = 0; vport < nvports; vport++) {
 		rep = &esw->offloads.vport_reps[vport];
-		if (!rep->valid)
+		if (!rep->rep_if[rep_type].valid)
 			continue;
 
-		err = rep->load(esw->dev, rep);
+		err = rep->rep_if[rep_type].load(esw->dev, rep);
 		if (err)
 			goto err_reps;
 	}
@@ -753,7 +763,28 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
 	return 0;
 
 err_reps:
-	esw_offloads_unload_reps(esw, vport);
+	esw_offloads_unload_reps_type(esw, vport, rep_type);
+	return err;
+}
+
+static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+{
+	u8 rep_type = 0;
+	int err;
+
+	while (rep_type < NUM_REP_TYPES) {
+		err = esw_offloads_load_reps_type(esw, nvports,
+						  rep_type);
+		if (err)
+			goto err_reps;
+		rep_type++;
+	}
+
+	return err;
+
+err_reps:
+	while (rep_type-- > 0)
+		esw_offloads_unload_reps_type(esw, nvports, rep_type);
 	return err;
 }
 
@@ -1122,22 +1153,23 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
-				     struct mlx5_eswitch_rep *__rep)
+				     struct mlx5_eswitch_rep_if *__rep_if,
+				     u8 rep_type)
 {
 	struct mlx5_esw_offload *offloads = &esw->offloads;
-	struct mlx5_eswitch_rep *rep;
+	struct mlx5_eswitch_rep_if *rep_if;
 
-	rep = &offloads->vport_reps[vport_index];
+	rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
 
-	rep->load   = __rep->load;
-	rep->unload = __rep->unload;
-	rep->priv = __rep->priv;
+	rep_if->load   = __rep_if->load;
+	rep_if->unload = __rep_if->unload;
+	rep_if->priv = __rep_if->priv;
 
-	rep->valid = true;
+	rep_if->valid = true;
 }
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index)
+				       int vport_index, u8 rep_type)
 {
 	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
@@ -1145,17 +1177,17 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 	rep = &offloads->vport_reps[vport_index];
 
 	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
-		rep->unload(rep);
+		rep->rep_if[rep_type].unload(rep);
 
-	rep->valid = false;
+	rep->rep_if[rep_type].valid = false;
 }
 
-void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw)
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
 #define UPLINK_REP_INDEX 0
 	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
 	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
-	return rep->priv;
+	return rep->rep_if[rep_type].priv;
 }
-- 
2.14.2

^ permalink raw reply related

* [for-next 09/11] net/mlx5e: E-Switch, Use the name of static array instead of its address
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Gal Pressman, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Gal Pressman <galp@mellanox.com>

Using the address of a static array is the same as using its name (in
this specific use-case), but it's confusing and makes the code less
readable.

Fixes: 1bd27b11c1df ("net/mlx5: Introduce E-switch QoS management")
Fixes: bd77bf1cb595 ("net/mlx5: Add SRIOV VF max rate configuration support")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 26 +++++++++++------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6d4cbdb69823..cdf65ed8714c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1290,7 +1290,7 @@ static int esw_create_tsar(struct mlx5_eswitch *esw)
 
 	err = mlx5_create_scheduling_element_cmd(dev,
 						 SCHEDULING_HIERARCHY_E_SWITCH,
-						 &tsar_ctx,
+						 tsar_ctx,
 						 &esw->qos.root_tsar_id);
 	if (err) {
 		esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
@@ -1333,20 +1333,20 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
 	if (vport->qos.enabled)
 		return -EEXIST;
 
-	MLX5_SET(scheduling_context, &sched_ctx, element_type,
+	MLX5_SET(scheduling_context, sched_ctx, element_type,
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
-	vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx,
+	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
 				  element_attributes);
 	MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
-	MLX5_SET(scheduling_context, &sched_ctx, parent_element_id,
+	MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
 		 esw->qos.root_tsar_id);
-	MLX5_SET(scheduling_context, &sched_ctx, max_average_bw,
+	MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
 		 initial_max_rate);
-	MLX5_SET(scheduling_context, &sched_ctx, bw_share, initial_bw_share);
+	MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
 
 	err = mlx5_create_scheduling_element_cmd(dev,
 						 SCHEDULING_HIERARCHY_E_SWITCH,
-						 &sched_ctx,
+						 sched_ctx,
 						 &vport->qos.esw_tsar_ix);
 	if (err) {
 		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
@@ -1392,22 +1392,22 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
 	if (!vport->qos.enabled)
 		return -EIO;
 
-	MLX5_SET(scheduling_context, &sched_ctx, element_type,
+	MLX5_SET(scheduling_context, sched_ctx, element_type,
 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
-	vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx,
+	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
 				  element_attributes);
 	MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
-	MLX5_SET(scheduling_context, &sched_ctx, parent_element_id,
+	MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
 		 esw->qos.root_tsar_id);
-	MLX5_SET(scheduling_context, &sched_ctx, max_average_bw,
+	MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
 		 max_rate);
-	MLX5_SET(scheduling_context, &sched_ctx, bw_share, bw_share);
+	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
 
 	err = mlx5_modify_scheduling_element_cmd(dev,
 						 SCHEDULING_HIERARCHY_E_SWITCH,
-						 &sched_ctx,
+						 sched_ctx,
 						 vport->qos.esw_tsar_ix,
 						 bitmask);
 	if (err) {
-- 
2.14.2

^ permalink raw reply related

* [for-next 10/11] net/mlx5: Fix ingress/egress naming mistake
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Gal Pressman, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Gal Pressman <galp@mellanox.com>

The functions names do not represent their actions, switch the mistaken
ingress/egress naming.

Fixes: fba53f7b5719 ("net/mlx5: Introduce mlx5_flow_steering structure")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c70fd663a633..5e786e29f93a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2406,7 +2406,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 	return PTR_ERR(prio);
 }
 
-static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
@@ -2420,7 +2420,7 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
 	return PTR_ERR_OR_ZERO(prio);
 }
 
-static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
 
-- 
2.14.2

^ permalink raw reply related

* [for-next 11/11] net/mlx5: Separate ingress/egress namespaces for each vport
From: Saeed Mahameed @ 2017-12-19 20:33 UTC (permalink / raw)
  To: David S. Miller, Doug Ledford
  Cc: netdev, linux-rdma, Leon Romanovsky, Gal Pressman, Saeed Mahameed
In-Reply-To: <20171219203340.2600-1-saeedm@mellanox.com>

From: Gal Pressman <galp@mellanox.com>

Each vport has its own root flow table for the ACL flow tables and root
flow table is per namespace, therefore we should create a namespace for
each vport.

Fixes: efdc810ba39d ("net/mlx5: Flow steering, Add vport ACL support")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 145 ++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |   4 +-
 include/linux/mlx5/fs.h                           |   4 +
 4 files changed, 133 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index cdf65ed8714c..7649e36653d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -867,9 +867,10 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
 		  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS);
+	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+						    vport->vport);
 	if (!root_ns) {
-		esw_warn(dev, "Failed to get E-Switch egress flow namespace\n");
+		esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
 	}
 
@@ -984,9 +985,10 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
 		  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS);
+	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+						    vport->vport);
 	if (!root_ns) {
-		esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n");
+		esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 5e786e29f93a..45e75b1010f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2014,16 +2014,6 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			return &steering->fdb_root_ns->ns;
 		else
 			return NULL;
-	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
-		if (steering->esw_egress_root_ns)
-			return &steering->esw_egress_root_ns->ns;
-		else
-			return NULL;
-	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
-		if (steering->esw_ingress_root_ns)
-			return &steering->esw_ingress_root_ns->ns;
-		else
-			return NULL;
 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
 		if (steering->sniffer_rx_root_ns)
 			return &steering->sniffer_rx_root_ns->ns;
@@ -2054,6 +2044,33 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL(mlx5_get_flow_namespace);
 
+struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
+							      enum mlx5_flow_namespace_type type,
+							      int vport)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
+	if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+		return NULL;
+
+	switch (type) {
+	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
+		if (steering->esw_egress_root_ns &&
+		    steering->esw_egress_root_ns[vport])
+			return &steering->esw_egress_root_ns[vport]->ns;
+		else
+			return NULL;
+	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+		if (steering->esw_ingress_root_ns &&
+		    steering->esw_ingress_root_ns[vport])
+			return &steering->esw_ingress_root_ns[vport]->ns;
+		else
+			return NULL;
+	default:
+		return NULL;
+	}
+}
+
 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 				      unsigned int prio, int num_levels)
 {
@@ -2331,13 +2348,41 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
 	clean_tree(&root_ns->ns.node);
 }
 
+static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int i;
+
+	if (!steering->esw_egress_root_ns)
+		return;
+
+	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+		cleanup_root_ns(steering->esw_egress_root_ns[i]);
+
+	kfree(steering->esw_egress_root_ns);
+}
+
+static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int i;
+
+	if (!steering->esw_ingress_root_ns)
+		return;
+
+	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+
+	kfree(steering->esw_ingress_root_ns);
+}
+
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 
 	cleanup_root_ns(steering->root_ns);
-	cleanup_root_ns(steering->esw_egress_root_ns);
-	cleanup_root_ns(steering->esw_ingress_root_ns);
+	cleanup_egress_acls_root_ns(dev);
+	cleanup_ingress_acls_root_ns(dev);
 	cleanup_root_ns(steering->fdb_root_ns);
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
@@ -2406,34 +2451,86 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 	return PTR_ERR(prio);
 }
 
-static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
 {
 	struct fs_prio *prio;
 
-	steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
-	if (!steering->esw_egress_root_ns)
+	steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
+	if (!steering->esw_egress_root_ns[vport])
 		return -ENOMEM;
 
 	/* create 1 prio*/
-	prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0,
-			      MLX5_TOTAL_VPORTS(steering->dev));
+	prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
 	return PTR_ERR_OR_ZERO(prio);
 }
 
-static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
 {
 	struct fs_prio *prio;
 
-	steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
-	if (!steering->esw_ingress_root_ns)
+	steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
+	if (!steering->esw_ingress_root_ns[vport])
 		return -ENOMEM;
 
 	/* create 1 prio*/
-	prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0,
-			      MLX5_TOTAL_VPORTS(steering->dev));
+	prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
 	return PTR_ERR_OR_ZERO(prio);
 }
 
+static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int err;
+	int i;
+
+	steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
+					       sizeof(*steering->esw_egress_root_ns),
+					       GFP_KERNEL);
+	if (!steering->esw_egress_root_ns)
+		return -ENOMEM;
+
+	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+		err = init_egress_acl_root_ns(steering, i);
+		if (err)
+			goto cleanup_root_ns;
+	}
+
+	return 0;
+
+cleanup_root_ns:
+	for (i--; i >= 0; i--)
+		cleanup_root_ns(steering->esw_egress_root_ns[i]);
+	kfree(steering->esw_egress_root_ns);
+	return err;
+}
+
+static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int err;
+	int i;
+
+	steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
+						sizeof(*steering->esw_ingress_root_ns),
+						GFP_KERNEL);
+	if (!steering->esw_ingress_root_ns)
+		return -ENOMEM;
+
+	for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+		err = init_ingress_acl_root_ns(steering, i);
+		if (err)
+			goto cleanup_root_ns;
+	}
+
+	return 0;
+
+cleanup_root_ns:
+	for (i--; i >= 0; i--)
+		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+	kfree(steering->esw_ingress_root_ns);
+	return err;
+}
+
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering;
@@ -2476,12 +2573,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 				goto err;
 		}
 		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
-			err = init_egress_acl_root_ns(steering);
+			err = init_egress_acls_root_ns(dev);
 			if (err)
 				goto err;
 		}
 		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
-			err = init_ingress_acl_root_ns(steering);
+			err = init_ingress_acls_root_ns(dev);
 			if (err)
 				goto err;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 397d24a621a4..3e571045626f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -71,8 +71,8 @@ struct mlx5_flow_steering {
 	struct kmem_cache               *ftes_cache;
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_root_namespace *fdb_root_ns;
-	struct mlx5_flow_root_namespace *esw_egress_root_ns;
-	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+	struct mlx5_flow_root_namespace **esw_egress_root_ns;
+	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_rx_root_ns;
 };
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index b25e7baa273e..a0b48afcb422 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -95,6 +95,10 @@ struct mlx5_flow_destination {
 struct mlx5_flow_namespace *
 mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			enum mlx5_flow_namespace_type type);
+struct mlx5_flow_namespace *
+mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
+				  enum mlx5_flow_namespace_type type,
+				  int vport);
 
 struct mlx5_flow_table *
 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
-- 
2.14.2

^ permalink raw reply related

* sparc64 verifier failures..
From: David Miller @ 2017-12-19 20:36 UTC (permalink / raw)
  To: daniel; +Cc: netdev


I'm getting about 100 verifier failures on sparc64.

The vast majority of them seem to be due to misaligned packet
accesses.  Here is a sample of some of the failures.

#32/p invalid fp arithmetic FAIL
Unexpected error message!
0: (b7) r0 = 0
1: (bf) r1 = r10
2: (17) r1 -= 8
3: (7b) *(u64 *)(r1 +0) = r0
misaligned access off (0x0; 0xffffffffffffffff)+0+0 size 8
 ...
#160/p raw_stack: skb_load_bytes, spilled regs corruption FAIL
Unexpected error message!
0: (b7) r2 = 4
1: (bf) r6 = r10
2: (07) r6 += -8
3: (7b) *(u64 *)(r6 +0) = r1
4: (bf) r3 = r6
5: (b7) r4 = 8
6: (85) call bpf_skb_load_bytes#26
7: (79) r0 = *(u64 *)(r6 +0)
8: (61) r0 = *(u32 *)(r0 +8)
misaligned access off (0x0; 0xffffffffffffffff)+0+8 size 4
#161/p raw_stack: skb_load_bytes, spilled regs corruption 2 FAIL
Unexpected error message!
0: (b7) r2 = 4
1: (bf) r6 = r10
2: (07) r6 += -16
3: (7b) *(u64 *)(r6 -8) = r1
4: (7b) *(u64 *)(r6 +0) = r1
5: (7b) *(u64 *)(r6 +8) = r1
6: (bf) r3 = r6
7: (b7) r4 = 8
8: (85) call bpf_skb_load_bytes#26
9: (79) r0 = *(u64 *)(r6 -8)
10: (79) r2 = *(u64 *)(r6 +8)
11: (79) r3 = *(u64 *)(r6 +0)
12: (61) r0 = *(u32 *)(r0 +8)
13: (61) r2 = *(u32 *)(r2 +32)
14: (0f) r0 += r2
15: (61) r3 = *(u32 *)(r3 +4)
misaligned access off (0x0; 0xffffffffffffffff)+0+4 size 4
 ...
#184/p direct packet access: test15 (spill with xadd) FAIL
Unexpected error message!
0: (61) r2 = *(u32 *)(r1 +76)
1: (61) r3 = *(u32 *)(r1 +80)
2: (bf) r0 = r2
3: (07) r0 += 8
4: (2d) if r0 > r3 goto pc+8
 R0=pkt(id=0,off=8,r=8,imm=0) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=8,imm=0) R3=pkt_end(id=0,off=0,imm=0) R10=fp0,call_-1
5: (b7) r5 = 4096
6: (bf) r4 = r10
7: (07) r4 += -8
8: (7b) *(u64 *)(r4 +0) = r2
9: (db) lock *(u64 *)(r4 +0) += r5
10: (79) r2 = *(u64 *)(r4 +0)
11: (63) *(u32 *)(r2 +0) = r5
misaligned access off (0x0; 0xffffffffffffffff)+0+0 size 4
 ...
#189/p direct packet access: test20 (x += pkt_ptr, 1) FAIL
Failed to load prog 'Permission denied'!
0: (61) r2 = *(u32 *)(r1 +76)
1: (61) r3 = *(u32 *)(r1 +80)
2: (b7) r0 = -1
3: (7b) *(u64 *)(r10 -8) = r0
4: (79) r0 = *(u64 *)(r10 -8)
5: (57) r0 &= 32767
6: (bf) r4 = r0
7: (0f) r4 += r2
8: (bf) r5 = r4
9: (07) r4 += 32766
10: (2d) if r4 > r3 goto pc+1
 R0=inv(id=0,umax_value=32767,var_off=(0x0; 0x7fff)) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=0,imm=0) R3=pkt_end(id=0,off=0,imm=0) R4=pkt(id=1,off=32766,r=32766,umax_value=32767,var_off=(0x0; 0x7fff)) R5=pkt(id=1,off=0,r=32766,umax_value=32767,var_off=(0x0; 0x7fff)) R10=fp0,call_-1
11: (7b) *(u64 *)(r5 +0) = r4
misaligned packet access off 2+(0x0; 0x7fff)+0+0 size 8
#190/p direct packet access: test21 (x += pkt_ptr, 2) FAIL
Failed to load prog 'Permission denied'!
0: (61) r2 = *(u32 *)(r1 +76)
1: (61) r3 = *(u32 *)(r1 +80)
2: (bf) r0 = r2
3: (07) r0 += 8
4: (2d) if r0 > r3 goto pc+9
 R0=pkt(id=0,off=8,r=8,imm=0) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=8,imm=0) R3=pkt_end(id=0,off=0,imm=0) R10=fp0,call_-1
5: (b7) r4 = -1
6: (7b) *(u64 *)(r10 -8) = r4
7: (79) r4 = *(u64 *)(r10 -8)
8: (57) r4 &= 32767
9: (0f) r4 += r2
10: (bf) r5 = r4
11: (07) r4 += 32766
12: (2d) if r4 > r3 goto pc+1
 R0=pkt(id=0,off=8,r=8,imm=0) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=8,imm=0) R3=pkt_end(id=0,off=0,imm=0) R4=pkt(id=1,off=32766,r=32766,umax_value=32767,var_off=(0x0; 0x7fff)) R5=pkt(id=1,off=0,r=32766,umax_value=32767,var_off=(0x0; 0x7fff)) R10=fp0,call_-1
13: (7b) *(u64 *)(r5 +0) = r4
misaligned packet access off 2+(0x0; 0x7fff)+0+0 size 8
#191/p direct packet access: test22 (x += pkt_ptr, 3) FAIL
Failed to load prog 'Permission denied'!
0: (61) r2 = *(u32 *)(r1 +76)
1: (61) r3 = *(u32 *)(r1 +80)
2: (bf) r0 = r2
3: (07) r0 += 8
4: (7b) *(u64 *)(r10 -8) = r2
5: (7b) *(u64 *)(r10 -16) = r3
6: (79) r3 = *(u64 *)(r10 -16)
7: (2d) if r0 > r3 goto pc+11
 R0=pkt(id=0,off=8,r=8,imm=0) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=8,imm=0) R3=pkt_end(id=0,off=0,imm=0) R10=fp0,call_-1 fp-8=pkt fp-16=pkt_end
8: (79) r2 = *(u64 *)(r10 -8)
9: (b7) r4 = -1
10: (db) lock *(u64 *)(r10 -8) += r4
11: (79) r4 = *(u64 *)(r10 -8)
12: (77) r4 >>= 49
13: (0f) r4 += r2
14: (bf) r0 = r4
15: (07) r0 += 2
16: (2d) if r0 > r3 goto pc+2
 R0=pkt(id=1,off=2,r=2,umax_value=32767,var_off=(0x0; 0x7fff)) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=8,imm=0) R3=pkt_end(id=0,off=0,imm=0) R4=pkt(id=1,off=0,r=2,umax_value=32767,var_off=(0x0; 0x7fff)) R10=fp0,call_-1 fp-16=pkt_end
17: (b7) r2 = 1
18: (6b) *(u16 *)(r4 +0) = r2
misaligned packet access off 2+(0x0; 0x7fff)+0+0 size 2
#192/p direct packet access: test23 (x += pkt_ptr, 4) FAIL
Unexpected error message!
0: (61) r2 = *(u32 *)(r1 +76)
1: (61) r3 = *(u32 *)(r1 +80)
2: (b7) r0 = -1
3: (7b) *(u64 *)(r10 -8) = r0
4: (79) r0 = *(u64 *)(r10 -8)
5: (57) r0 &= 65535
6: (bf) r4 = r0
7: (b7) r0 = 31
8: (0f) r0 += r4
9: (0f) r0 += r2
10: (bf) r5 = r0
11: (07) r0 += 65534
12: (2d) if r0 > r3 goto pc+1
 R0=pkt(id=1,off=65534,r=0,umin_value=31,umax_value=65566,var_off=(0x0; 0x1ffff)) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=0,imm=0) R3=pkt_end(id=0,off=0,imm=0) R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R5=pkt(id=1,off=0,r=0,umin_value=31,umax_value=65566,var_off=(0x0; 0x1ffff)) R10=fp0,call_-1
13: (7b) *(u64 *)(r5 +0) = r0
misaligned packet access off 2+(0x0; 0x1ffff)+0+0 size 8
#193/p direct packet access: test24 (x += pkt_ptr, 5) FAIL
Failed to load prog 'Permission denied'!
0: (61) r2 = *(u32 *)(r1 +76)
1: (61) r3 = *(u32 *)(r1 +80)
2: (b7) r0 = -1
3: (7b) *(u64 *)(r10 -8) = r0
4: (79) r0 = *(u64 *)(r10 -8)
5: (57) r0 &= 255
6: (bf) r4 = r0
7: (b7) r0 = 64
8: (0f) r0 += r4
9: (0f) r0 += r2
10: (bf) r5 = r0
11: (07) r0 += 32766
12: (2d) if r0 > r3 goto pc+1
 R0=pkt(id=1,off=32766,r=32766,umin_value=64,umax_value=319,var_off=(0x0; 0x1ff)) R1=ctx(id=0,off=0,imm=0) R2=pkt(id=0,off=0,r=0,imm=0) R3=pkt_end(id=0,off=0,imm=0) R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R5=pkt(id=1,off=0,r=32766,umin_value=64,umax_value=319,var_off=(0x0; 0x1ff)) R10=fp0,call_-1
13: (7b) *(u64 *)(r5 +0) = r0
misaligned packet access off 2+(0x0; 0x1ff)+0+0 size 8
 ...
#221/u valid map access into an array with a variable FAIL
Unexpected error message!
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (18) r1 = 0x0
5: (85) call bpf_map_lookup_elem#1
6: (15) if r0 == 0x0 goto pc+5
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R10=fp0,call_-1
7: (61) r1 = *(u32 *)(r0 +0)
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R10=fp0,call_-1
8: (35) if r1 >= 0xb goto pc+3
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv(id=0,umax_value=10,var_off=(0x0; 0xf)) R10=fp0,call_-1
9: (67) r1 <<= 2
10: (0f) r0 += r1
11: (7a) *(u64 *)(r0 +0) = 4
misaligned value access off (0x0; 0x3c)+0+0 size 8
#221/p valid map access into an array with a variable FAIL
Failed to load prog 'Permission denied'!
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (18) r1 = 0xfff8003fb5788e00
5: (85) call bpf_map_lookup_elem#1
6: (15) if r0 == 0x0 goto pc+5
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R10=fp0,call_-1
7: (61) r1 = *(u32 *)(r0 +0)
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R10=fp0,call_-1
8: (35) if r1 >= 0xb goto pc+3
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv(id=0,umax_value=10,var_off=(0x0; 0xf)) R10=fp0,call_-1
9: (67) r1 <<= 2
10: (0f) r0 += r1
11: (7a) *(u64 *)(r0 +0) = 4
misaligned value access off (0x0; 0x3c)+0+0 size 8
 ...
#222/p valid map access into an array with a signed variable FAIL
Failed to load prog 'Permission denied'!
0: (7a) *(u64 *)(r10 -8) = 0
1: (bf) r2 = r10
2: (07) r2 += -8
3: (18) r1 = 0xfff8003fb5789200
5: (85) call bpf_map_lookup_elem#1
6: (15) if r0 == 0x0 goto pc+9
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R10=fp0,call_-1
7: (61) r1 = *(u32 *)(r0 +0)
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R10=fp0,call_-1
8: (65) if r1 s> 0xffffffff goto pc+1
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0,call_-1
9: (b4) (u32) r1 = (u32) 0
10: (b4) (u32) r2 = (u32) 11
11: (6d) if r2 s> r1 goto pc+1
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv0 R2=inv11 R10=fp0,call_-1
12: (b4) (u32) r1 = (u32) 0
13: (64) (u32) r1 <<= (u32) 2
14: (0f) r0 += r1
15: (7a) *(u64 *)(r0 +0) = 4
 R0_w=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1_w=inv0 R2=inv11 R10=fp0,call_-1
16: (95) exit

from 11 to 13: safe

from 8 to 10: R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R10=fp0,call_-1
10: (b4) (u32) r2 = (u32) 11
11: (6d) if r2 s> r1 goto pc+1
 R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv(id=0,umin_value=11,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv11 R10=fp0,call_-1
12: (b4) (u32) r1 = (u32) 0
13: safe

from 11 to 13: R0=map_value(id=0,off=0,ks=8,vs=48,imm=0) R1=inv(id=0,umax_value=10,var_off=(0x0; 0xf)) R2=inv11 R10=fp0,call_-1
13: (64) (u32) r1 <<= (u32) 2
14: (0f) r0 += r1
15: (7a) *(u64 *)(r0 +0) = 4
misaligned value access off (0x0; 0x3c)+0+0 size 8
 ...
Summary: 603 PASSED, 100 FAILED

^ permalink raw reply

* Re: [PATCH net] openvswitch: Fix pop_vlan action for double tagged frames
From: Eric Garver @ 2017-12-19 20:42 UTC (permalink / raw)
  To: Jiri Benc; +Cc: netdev, ovs-dev
In-Reply-To: <20171219203929.12c1cb93@redhat.com>

On Tue, Dec 19, 2017 at 08:39:29PM +0100, Jiri Benc wrote:
> On Tue, 19 Dec 2017 13:57:53 -0500, Eric Garver wrote:
> > --- a/net/openvswitch/flow.c
> > +++ b/net/openvswitch/flow.c
> > @@ -559,8 +559,9 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> >   *      of a correct length, otherwise the same as skb->network_header.
> >   *      For other key->eth.type values it is left untouched.
> >   *
> > - *    - skb->protocol: the type of the data starting at skb->network_header.
> > - *      Equals to key->eth.type.
> > + *    - skb->protocol: For Ethernet, the ethertype or VLAN TPID.
> > + *      For non-Ethernet, the type of the data starting at skb->network_header
> > + *      (also equal to key->eth.type).
> >   */
> >  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  {
> > @@ -579,6 +580,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  			return -EINVAL;
> >  
> >  		skb_reset_network_header(skb);
> > +		key->eth.type = skb->protocol;
> >  	} else {
> >  		eth = eth_hdr(skb);
> >  		ether_addr_copy(key->eth.src, eth->h_source);
> > @@ -592,15 +594,14 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  		if (unlikely(parse_vlan(skb, key)))
> >  			return -ENOMEM;
> >  
> > -		skb->protocol = parse_ethertype(skb);
> > -		if (unlikely(skb->protocol == htons(0)))
> > +		key->eth.type = parse_ethertype(skb);
> > +		if (unlikely(key->eth.type == htons(0)))
> >  			return -ENOMEM;
> >  
> >  		skb_reset_network_header(skb);
> >  		__skb_push(skb, skb->data - skb_mac_header(skb));
> >  	}
> >  	skb_reset_mac_len(skb);
> > -	key->eth.type = skb->protocol;
> >  
> >  	/* Network layer. */
> >  	if (key->eth.type == htons(ETH_P_IP)) {
> 
> Unfortunately, this does not work. key_extract must set skb->protocol
> even for Ethernet frames that come from a mixed L2/L3 tunnel. Such
> packets will have key->mac_proto set to MAC_PROTO_ETHERNET and
> skb->protocol set to ETH_P_TEB (see key_extract_mac_proto). In
> key_extract, skb->protocol has to be correctly set to the dissected
> value.

AFAICS, it's always overridden to ETH_P_TEB on output by
ovs_vport_send() and that's the sole reason it works today.

For dissecting, the L2 case is currently setting skb->protocol to the
real ethertype (e.g. 0x800) not ETH_P_TEB. For RX from tunnel case it'll
indeed be ETH_P_TEB.

> 
> Which means that we have to check for the existence of inner vlan tag
> (by checking key->eth.cvlan.tci or, perhaps better, by returning it
> from parse_vlan) and set skb->protocol accordingly.
> 
>  Jiri

^ permalink raw reply

* Re: [PATCH net] openvswitch: Fix pop_vlan action for double tagged frames
From: Eric Garver @ 2017-12-19 20:42 UTC (permalink / raw)
  To: Jiri Benc; +Cc: netdev, ovs-dev
In-Reply-To: <20171219203929.12c1cb93@redhat.com>

On Tue, Dec 19, 2017 at 08:39:29PM +0100, Jiri Benc wrote:
> On Tue, 19 Dec 2017 13:57:53 -0500, Eric Garver wrote:
> > --- a/net/openvswitch/flow.c
> > +++ b/net/openvswitch/flow.c
> > @@ -559,8 +559,9 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> >   *      of a correct length, otherwise the same as skb->network_header.
> >   *      For other key->eth.type values it is left untouched.
> >   *
> > - *    - skb->protocol: the type of the data starting at skb->network_header.
> > - *      Equals to key->eth.type.
> > + *    - skb->protocol: For Ethernet, the ethertype or VLAN TPID.
> > + *      For non-Ethernet, the type of the data starting at skb->network_header
> > + *      (also equal to key->eth.type).
> >   */
> >  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  {
> > @@ -579,6 +580,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  			return -EINVAL;
> >  
> >  		skb_reset_network_header(skb);
> > +		key->eth.type = skb->protocol;
> >  	} else {
> >  		eth = eth_hdr(skb);
> >  		ether_addr_copy(key->eth.src, eth->h_source);
> > @@ -592,15 +594,14 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  		if (unlikely(parse_vlan(skb, key)))
> >  			return -ENOMEM;
> >  
> > -		skb->protocol = parse_ethertype(skb);
> > -		if (unlikely(skb->protocol == htons(0)))
> > +		key->eth.type = parse_ethertype(skb);
> > +		if (unlikely(key->eth.type == htons(0)))
> >  			return -ENOMEM;
> >  
> >  		skb_reset_network_header(skb);
> >  		__skb_push(skb, skb->data - skb_mac_header(skb));
> >  	}
> >  	skb_reset_mac_len(skb);
> > -	key->eth.type = skb->protocol;
> >  
> >  	/* Network layer. */
> >  	if (key->eth.type == htons(ETH_P_IP)) {
> 
> Unfortunately, this does not work. key_extract must set skb->protocol
> even for Ethernet frames that come from a mixed L2/L3 tunnel. Such
> packets will have key->mac_proto set to MAC_PROTO_ETHERNET and
> skb->protocol set to ETH_P_TEB (see key_extract_mac_proto). In
> key_extract, skb->protocol has to be correctly set to the dissected
> value.

AFAICS, it's always overridden to ETH_P_TEB on output by
ovs_vport_send() and that's the sole reason it works today.

For dissecting, the L2 case is currently setting skb->protocol to the
real ethertype (e.g. 0x800) not ETH_P_TEB. For RX from tunnel case it'll
indeed be ETH_P_TEB.

> 
> Which means that we have to check for the existence of inner vlan tag
> (by checking key->eth.cvlan.tci or, perhaps better, by returning it
> from parse_vlan) and set skb->protocol accordingly.
> 
>  Jiri

^ permalink raw reply

* Re: [PATCH net-next 4/4] sfc: expose CTPIO stats on NICs that support them
From: David Miller @ 2017-12-19 20:42 UTC (permalink / raw)
  To: jakub.kicinski; +Cc: ecree, linux-net-drivers, netdev
In-Reply-To: <20171219122019.7b3ac5d8@cakuba.netronome.com>

From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 19 Dec 2017 12:20:36 -0800

> On Tue, 19 Dec 2017 17:06:36 +0000, Edward Cree wrote:
>> From: Bert Kenward <bkenward@solarflare.com>
>> 
>> While the Linux driver doesn't use CTPIO ('cut-through programmed I/O'),
>>  other drivers on the same port might, so if we're responsible for
>>  reporting per-port stats we need to include the CTPIO stats.
>> 
>> Signed-off-by: Edward Cree <ecree@solarflare.com>
> 
> FWIW this is missing a sign-off from Bert.

Edward, please fix this and resubmit.

^ permalink raw reply

* Re: [RFC] hv_netvsc: automatically name slave VF network device
From: Stephen Hemminger @ 2017-12-19 20:44 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20171219123234.683f9b8d@cakuba.netronome.com>

On Tue, 19 Dec 2017 12:32:34 -0800
Jakub Kicinski <kubakici@wp.pl> wrote:

> On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:
> > Rename the VF device to ethX_vf based on the ethX as the
> > synthetic device.  This eliminates the need for delay on setup,
> > and the PCI (udev based) naming is not reproducible on Hyper-V
> > anyway. The name of the VF does not matter since all control
> > operations take place the primary device. It does make the
> > user experience better to associate the names.
> > 
> > Based on feedback from all.systems.go talk.
> > The downside is that it requires exporting a symbol from netdev
> > core which makes it harder to backport.
> > 
> > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>  
> 
> Why do you have to name the devices in the kernel space in the first
> place? :/  Why don't upstream the correct change to biosdevname like
> hardware vendors do?

biosdevname is dead, gone and wouldn't work on Azure (it dumpster dives in /dev/mem).
I assume you mean the modern application is udev, and it works but the name is meaningless
because it based of synthetic PCI information. The PCI host adapter is simulated
for pass through devices. Names like enp12s0.

Since every passthrough VF device on Hyper-V/Azure has a matching synthetic
network device with same mac address. It is best to have the relationship
shown in the name.

> 
> Your VF setup is really _not_ special, I don't understand why we are 
> OK with ignoring the standard practices.  Real enterprise distroes
> are very careful never to break the naming of interfaces and they keep
> the naming policy in user space.  Playing tricks in the kernel has every
> chance of breaking existing user setups.

Actually, Systemd folks said "naming policy is in userspace only because
kernel can't get it right". Also there is no uniformity in userspace
there are at least 5 systems trying to do network setup. And most of
them depend on eth0 (yes still). Fixing userspace is impossible.

^ permalink raw reply

* Re: [PATCH bpf]: Fix tools and testing build.
From: Daniel Borkmann @ 2017-12-19 20:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20171219.152203.371422797348974116.davem@davemloft.net>

On 12/19/2017 09:22 PM, David Miller wrote:
> 
> I'm getting various build failures on sparc64.  The key is
> usually that the userland tools get built 32-bit.
> 
> 1) clock_gettime() is in librt, so that must be added to the link
>    libraries.
> 
> 2) "sizeof(x)" must be printed with "%Z" printf prefix.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

Applied to bpf tree, thanks David!

^ permalink raw reply

* Re: [net-next: PATCH 0/8] Armada 7k/8k PP2 ACPI support
From: Andrew Lunn @ 2017-12-19 20:46 UTC (permalink / raw)
  To: Marcin Wojtas
  Cc: David Miller, linux-kernel, linux-arm-kernel, netdev,
	Russell King - ARM Linux, Rafael J. Wysocki, Florian Fainelli,
	Antoine Ténart, Thomas Petazzoni, Gregory Clément,
	Ezequiel Garcia, nadavh, Neta Zur Hershkovits, Ard Biesheuvel,
	Grzegorz Jaszczyk, Tomasz Nowicki
In-Reply-To: <CAPv3WKddg82waiapD8JzH0pCsWVTE2iT0LFz7PCYrrDDpXqmbw@mail.gmail.com>

> Of course! v2 will not have such problem, I've been waiting however
> for the feedback about the ACPI representation. Anyway, I'm strongly
> leaning towards using _ADR/_CID objects in PHY's nodes for ACPI, so
> maybe I'll just issue the v2 in order to push the discussion a bit
> forward.
 
Hi Marcin

I know ~0 about ACPI. But what seems to be missing for me is
documentation. You are defining a ABI here, which all future MDIO
busses, PHYs, and to some extent Ethernet switches need to follow. So
i would expect this to be documented somewhere.

How does documentation work in the ACPI world?

    Andrew

^ permalink raw reply

* Re: thunderx sgmii interface hang
From: Andrew Lunn @ 2017-12-19 20:52 UTC (permalink / raw)
  To: Tim Harvey; +Cc: Sunil Goutham, netdev
In-Reply-To: <CAJ+vNU0yYkoAGZ6sU9VGfA6A04KU4s3rzZSb=1uxsqwKHioePQ@mail.gmail.com>

On Mon, Dec 18, 2017 at 01:53:47PM -0800, Tim Harvey wrote:
> On Wed, Dec 13, 2017 at 11:43 AM, Andrew Lunn <andrew@lunn.ch> wrote:
> >> The nic appears to work fine (pings, TCP etc) up until a performance
> >> test is attempted.
> >> When an iperf bandwidth test is attempted the nic ends up in a state
> >> where truncated-ip packets are being sent out (per a tcpdump from
> >> another board):
> >
> > Hi Tim
> >
> > Are pause frames supported? Have you tried turning them off?
> >
> > Can you reproduce the issue with UDP? Or is it TCP only?
> >
> 
> Andrew,
> 
> Pause frames don't appear to be supported yet and the issue occurs
> when using UDP as well as TCP. I'm not clear what the best way to
> troubleshoot this is.

Hi Tim

Is pause being negotiated? In theory, it should not be. The PHY should
not offer it, if the MAC has not enabled it. But some PHY drivers are
probably broken and offer pause when they should not.

Also, can you trigger the issue using UDP at say 75% the maximum
bandwidth. That should be low enough that the peer never even tries to
use pause.

All this pause stuff is just a stab in the dark. Something else to try
is to turn off various forms off acceleration, ethtook -K, and see if
that makes a difference.

     Andrew

^ permalink raw reply

* [PATCH bpf-next] libbpf: Fix build errors.
From: David Miller @ 2017-12-19 20:53 UTC (permalink / raw)
  To: daniel; +Cc: netdev

These elf object pieces are of type Elf64_Xword and therefore could be
"long long" on some builds.

Cast to "long long" and use printf format %lld to deal with this since
we are building with -Werror=format.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5b83875..e9c4b7c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -910,8 +910,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				   GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
 		}
-		pr_debug("relo for %ld value %ld name %d\n",
-			 rel.r_info >> 32, sym.st_value, sym.st_name);
+		pr_debug("relo for %lld value %lld name %d\n",
+			 (long long) (rel.r_info >> 32),
+			 (long long) sym.st_value, sym.st_name);

 		if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
 			pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",

^ permalink raw reply related

* Re: [PATCH net] openvswitch: Fix pop_vlan action for double tagged frames
From: Eric Garver @ 2017-12-19 20:59 UTC (permalink / raw)
  To: Jiri Benc, netdev, ovs-dev
In-Reply-To: <20171219204247.GD25853@dev-rhel7>

On Tue, Dec 19, 2017 at 03:42:47PM -0500, Eric Garver wrote:
> On Tue, Dec 19, 2017 at 08:39:29PM +0100, Jiri Benc wrote:
> > On Tue, 19 Dec 2017 13:57:53 -0500, Eric Garver wrote:
> > > --- a/net/openvswitch/flow.c
> > > +++ b/net/openvswitch/flow.c
> > > @@ -559,8 +559,9 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> > >   *      of a correct length, otherwise the same as skb->network_header.
> > >   *      For other key->eth.type values it is left untouched.
> > >   *
> > > - *    - skb->protocol: the type of the data starting at skb->network_header.
> > > - *      Equals to key->eth.type.
> > > + *    - skb->protocol: For Ethernet, the ethertype or VLAN TPID.
> > > + *      For non-Ethernet, the type of the data starting at skb->network_header
> > > + *      (also equal to key->eth.type).
> > >   */
> > >  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> > >  {
> > > @@ -579,6 +580,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> > >  			return -EINVAL;
> > >  
> > >  		skb_reset_network_header(skb);
> > > +		key->eth.type = skb->protocol;
> > >  	} else {
> > >  		eth = eth_hdr(skb);
> > >  		ether_addr_copy(key->eth.src, eth->h_source);
> > > @@ -592,15 +594,14 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> > >  		if (unlikely(parse_vlan(skb, key)))
> > >  			return -ENOMEM;
> > >  
> > > -		skb->protocol = parse_ethertype(skb);
> > > -		if (unlikely(skb->protocol == htons(0)))
> > > +		key->eth.type = parse_ethertype(skb);
> > > +		if (unlikely(key->eth.type == htons(0)))
> > >  			return -ENOMEM;
> > >  
> > >  		skb_reset_network_header(skb);
> > >  		__skb_push(skb, skb->data - skb_mac_header(skb));
> > >  	}
> > >  	skb_reset_mac_len(skb);
> > > -	key->eth.type = skb->protocol;
> > >  
> > >  	/* Network layer. */
> > >  	if (key->eth.type == htons(ETH_P_IP)) {
> > 
> > Unfortunately, this does not work. key_extract must set skb->protocol
> > even for Ethernet frames that come from a mixed L2/L3 tunnel. Such
> > packets will have key->mac_proto set to MAC_PROTO_ETHERNET and
> > skb->protocol set to ETH_P_TEB (see key_extract_mac_proto). In
> > key_extract, skb->protocol has to be correctly set to the dissected
> > value.
> 
> AFAICS, it's always overridden to ETH_P_TEB on output by
> ovs_vport_send() and that's the sole reason it works today.
> 
> For dissecting, the L2 case is currently setting skb->protocol to the
> real ethertype (e.g. 0x800) not ETH_P_TEB. For RX from tunnel case it'll
> indeed be ETH_P_TEB.

Of course after I hit send I realize what you were saying. I follow now
why skb->protocol needs to be the real ethertype.

> > 
> > Which means that we have to check for the existence of inner vlan tag
> > (by checking key->eth.cvlan.tci or, perhaps better, by returning it
> > from parse_vlan) and set skb->protocol accordingly.

I'll see what we can do here. Thanks Jiri.

^ permalink raw reply

* [PATCH net V2] net: always reevalulate autoflowlabel setting for reset packet
From: Shaohua Li @ 2017-12-19 20:58 UTC (permalink / raw)
  To: netdev, davem; +Cc: Kernel Team, Shaohua Li, Martin KaFai Lau

From: Shaohua Li <shli@fb.com>

ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change
sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed,
so the sock will keep the old behavior in terms of auto flowlabel. Reset
packet is suffering from this problem, because reset packset is sent
from a special control socket, which is created at boot time. Since
sysctl.ipv6.auto_flowlabels is 2 by default, the control socket will
always have its ipv6_pinfo.autoflowlabel set, even after user set
sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have
flowlabel.

To fix this, we always reevaluate autoflowlabel setting for reset
packet. Normal sock has the same issue too, but since the
sysctl.ipv6.auto_flowlabels is usually set at host startup, this isn't a
big issue for normal sock.

Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 net/ipv6/tcp_ipv6.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7178476..5fba548 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -787,9 +787,11 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
+	struct ipv6_pinfo *np = inet6_sk(ctl_sk);
 	struct dst_entry *dst;
 	__be32 *topt;

+	np->autoflowlabel = ip6_default_np_autolabel(net);
 	if (tsecr)
 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
 #ifdef CONFIG_TCP_MD5SIG
-- 
2.9.5

^ permalink raw reply related

* [PATCH net-next] net: Clarify dev_weight documentation for LRO and GRO_HW.
From: Michael Chan @ 2017-12-19 21:12 UTC (permalink / raw)
  To: davem; +Cc: netdev

Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 Documentation/sysctl/net.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index b67044a..35c62f5 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -95,7 +95,9 @@ dev_weight
 --------------
 
 The maximum number of packets that kernel can handle on a NAPI interrupt,
-it's a Per-CPU variable.
+it's a Per-CPU variable. For drivers that support LRO or GRO_HW, a hardware
+aggregated packet is counted as one packet in this context.
+
 Default: 64
 
 dev_weight_rx_bias
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v1] net: amd-xgbe: Get rid of custom hex_dump_to_buffer()
From: Tom Lendacky @ 2017-12-19 21:16 UTC (permalink / raw)
  To: Andy Shevchenko, David S. Miller, netdev
In-Reply-To: <20171219180231.13347-1-andriy.shevchenko@linux.intel.com>

On 12/19/2017 12:02 PM, Andy Shevchenko wrote:
> Get rid of yet another custom hex_dump_to_buffer().
> 
> The output is slightly changed, i.e. each byte followed by white space.
> 
> Note, we don't use print_hex_dump() here since the original code uses
> nedev_dbg().
> 
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
>  drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 23 ++++++-----------------
>  1 file changed, 6 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
> index a74a8fbad53a..fc58dc43a5bd 100644
> --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
> +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
> @@ -2930,9 +2930,8 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *pdata, struct xgbe_ring *ring,
>  void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx)
>  {
>  	struct ethhdr *eth = (struct ethhdr *)skb->data;
> -	unsigned char *buf = skb->data;
>  	unsigned char buffer[128];
> -	unsigned int i, j;
> +	unsigned int i;
>  
>  	netdev_dbg(netdev, "\n************** SKB dump ****************\n");
>  
> @@ -2943,22 +2942,12 @@ void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx)
>  	netdev_dbg(netdev, "Src MAC addr: %pM\n", eth->h_source);
>  	netdev_dbg(netdev, "Protocol: %#06hx\n", ntohs(eth->h_proto));
>  
> -	for (i = 0, j = 0; i < skb->len;) {
> -		j += snprintf(buffer + j, sizeof(buffer) - j, "%02hhx",
> -			      buf[i++]);
> -
> -		if ((i % 32) == 0) {
> -			netdev_dbg(netdev, "  %#06x: %s\n", i - 32, buffer);
> -			j = 0;
> -		} else if ((i % 16) == 0) {
> -			buffer[j++] = ' ';
> -			buffer[j++] = ' ';
> -		} else if ((i % 4) == 0) {
> -			buffer[j++] = ' ';
> -		}
> +	for (i = 0; i < skb->len; i += 32) {
> +		unsigned int len = min(skb->len - i, 32U);
> +
> +		hex_dump_to_buffer(&skb->data[i], len, 32, 1, buffer, 128, false);

I'd prefer to see sizeof(buffer) vs the hard-coded 128 here.  Also, this
line exceeds 80 characters, so it should be split.

With those changes:

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>

> +		netdev_dbg(netdev, "  %#06x: %s\n", i, buffer);
>  	}
> -	if (i % 32)
> -		netdev_dbg(netdev, "  %#06x: %s\n", i - (i % 32), buffer);
>  
>  	netdev_dbg(netdev, "\n************** SKB dump ****************\n");
>  }
> 

^ permalink raw reply

* Re: [RFC] hv_netvsc: automatically name slave VF network device
From: Jakub Kicinski @ 2017-12-19 21:18 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Stephen Hemminger, Jiri Pirko
In-Reply-To: <20171219124425.56033614@xeon-e3>

On Tue, 19 Dec 2017 12:44:25 -0800, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 12:32:34 -0800
> Jakub Kicinski <kubakici@wp.pl> wrote:
> 
> > On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:  
> > > Rename the VF device to ethX_vf based on the ethX as the
> > > synthetic device.  This eliminates the need for delay on setup,
> > > and the PCI (udev based) naming is not reproducible on Hyper-V
> > > anyway. The name of the VF does not matter since all control
> > > operations take place the primary device. It does make the
> > > user experience better to associate the names.
> > > 
> > > Based on feedback from all.systems.go talk.
> > > The downside is that it requires exporting a symbol from netdev
> > > core which makes it harder to backport.
> > > 
> > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>    
> > 
> > Why do you have to name the devices in the kernel space in the first
> > place? :/  Why don't upstream the correct change to biosdevname like
> > hardware vendors do?  
> 
> biosdevname is dead, gone and wouldn't work on Azure (it dumpster dives in /dev/mem).

Hm, I haven't worked on biosdevname myself, but AFAIU it also falls 
back to information from the PCI VPD, which could be populated by 
the hypervisor.

> I assume you mean the modern application is udev, and it works but the name is meaningless
> because it based of synthetic PCI information. The PCI host adapter is simulated
> for pass through devices. Names like enp12s0.
> 
> Since every passthrough VF device on Hyper-V/Azure has a matching synthetic
> network device with same mac address. It is best to have the relationship
> shown in the name.

How about we make the VF drivers expose "vf" as phys_port_name?
Then systemd/udev should glue that onto the name regardless of
how the VF is used.

> > Your VF setup is really _not_ special, I don't understand why we are 
> > OK with ignoring the standard practices.  Real enterprise distroes
> > are very careful never to break the naming of interfaces and they keep
> > the naming policy in user space.  Playing tricks in the kernel has every
> > chance of breaking existing user setups.  
> 
> Actually, Systemd folks said "naming policy is in userspace only because
> kernel can't get it right". Also there is no uniformity in userspace
> there are at least 5 systems trying to do network setup. And most of
> them depend on eth0 (yes still). Fixing userspace is impossible.

^ permalink raw reply

* [PATCH v2] net: amd-xgbe: Get rid of custom hex_dump_to_buffer()
From: Andy Shevchenko @ 2017-12-19 21:22 UTC (permalink / raw)
  To: Tom Lendacky, David S . Miller, netdev; +Cc: Andy Shevchenko

Get rid of yet another custom hex_dump_to_buffer().

The output is slightly changed, i.e. each byte followed by white space.

Note, we don't use print_hex_dump() here since the original code uses
nedev_dbg().

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index a74a8fbad53a..7a3ebfd236f5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2930,9 +2930,8 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *pdata, struct xgbe_ring *ring,
 void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx)
 {
 	struct ethhdr *eth = (struct ethhdr *)skb->data;
-	unsigned char *buf = skb->data;
 	unsigned char buffer[128];
-	unsigned int i, j;
+	unsigned int i;
 
 	netdev_dbg(netdev, "\n************** SKB dump ****************\n");
 
@@ -2943,22 +2942,13 @@ void xgbe_print_pkt(struct net_device *netdev, struct sk_buff *skb, bool tx_rx)
 	netdev_dbg(netdev, "Src MAC addr: %pM\n", eth->h_source);
 	netdev_dbg(netdev, "Protocol: %#06hx\n", ntohs(eth->h_proto));
 
-	for (i = 0, j = 0; i < skb->len;) {
-		j += snprintf(buffer + j, sizeof(buffer) - j, "%02hhx",
-			      buf[i++]);
-
-		if ((i % 32) == 0) {
-			netdev_dbg(netdev, "  %#06x: %s\n", i - 32, buffer);
-			j = 0;
-		} else if ((i % 16) == 0) {
-			buffer[j++] = ' ';
-			buffer[j++] = ' ';
-		} else if ((i % 4) == 0) {
-			buffer[j++] = ' ';
-		}
+	for (i = 0; i < skb->len; i += 32) {
+		unsigned int len = min(skb->len - i, 32U);
+
+		hex_dump_to_buffer(&skb->data[i], len, 32, 1,
+				   buffer, sizeof(buffer), false);
+		netdev_dbg(netdev, "  %#06x: %s\n", i, buffer);
 	}
-	if (i % 32)
-		netdev_dbg(netdev, "  %#06x: %s\n", i - (i % 32), buffer);
 
 	netdev_dbg(netdev, "\n************** SKB dump ****************\n");
 }
-- 
2.15.1

^ permalink raw reply related

* Re: [RFC] hv_netvsc: automatically name slave VF network device
From: Stephen Hemminger @ 2017-12-19 21:29 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, Stephen Hemminger, Jiri Pirko
In-Reply-To: <20171219131816.70645a7b@cakuba.netronome.com>

On Tue, 19 Dec 2017 13:18:16 -0800
Jakub Kicinski <kubakici@wp.pl> wrote:

> On Tue, 19 Dec 2017 12:44:25 -0800, Stephen Hemminger wrote:
> > On Tue, 19 Dec 2017 12:32:34 -0800
> > Jakub Kicinski <kubakici@wp.pl> wrote:
> >   
> > > On Tue, 19 Dec 2017 11:35:37 -0800, Stephen Hemminger wrote:    
> > > > Rename the VF device to ethX_vf based on the ethX as the
> > > > synthetic device.  This eliminates the need for delay on setup,
> > > > and the PCI (udev based) naming is not reproducible on Hyper-V
> > > > anyway. The name of the VF does not matter since all control
> > > > operations take place the primary device. It does make the
> > > > user experience better to associate the names.
> > > > 
> > > > Based on feedback from all.systems.go talk.
> > > > The downside is that it requires exporting a symbol from netdev
> > > > core which makes it harder to backport.
> > > > 
> > > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>      
> > > 
> > > Why do you have to name the devices in the kernel space in the first
> > > place? :/  Why don't upstream the correct change to biosdevname like
> > > hardware vendors do?    
> > 
> > biosdevname is dead, gone and wouldn't work on Azure (it dumpster dives in /dev/mem).  
> 
> Hm, I haven't worked on biosdevname myself, but AFAIU it also falls 
> back to information from the PCI VPD, which could be populated by 
> the hypervisor.

VPD never had any useful standard are info.
The rules used by udev come off sysfs, see:
  https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/

> 
> > I assume you mean the modern application is udev, and it works but the name is meaningless
> > because it based of synthetic PCI information. The PCI host adapter is simulated
> > for pass through devices. Names like enp12s0.
> > 
> > Since every passthrough VF device on Hyper-V/Azure has a matching synthetic
> > network device with same mac address. It is best to have the relationship
> > shown in the name.  
> 
> How about we make the VF drivers expose "vf" as phys_port_name?
> Then systemd/udev should glue that onto the name regardless of
> how the VF is used.

One of the goals was not to modify in any way other drivers (like VF).

> 
> > > Your VF setup is really _not_ special, I don't understand why we are 
> > > OK with ignoring the standard practices.  Real enterprise distroes
> > > are very careful never to break the naming of interfaces and they keep
> > > the naming policy in user space.  Playing tricks in the kernel has every
> > > chance of breaking existing user setups.    
> > 
> > Actually, Systemd folks said "naming policy is in userspace only because
> > kernel can't get it right". Also there is no uniformity in userspace
> > there are at least 5 systems trying to do network setup. And most of
> > them depend on eth0 (yes still). Fixing userspace is impossible.  

^ permalink raw reply

* [PATCH net 0/2] cls_bpf: fix offload state tracking with block callbacks
From: Jakub Kicinski @ 2017-12-19 21:32 UTC (permalink / raw)
  To: netdev; +Cc: daniel, jiri, oss-drivers, Jakub Kicinski

Hi!

After introduction of block callbacks classifiers can no longer track
offload state.  cls_bpf used to do that in an attempt to move common
code from drivers to the core.  Remove that functionality and fix
drivers.

The user-visible bug this is fixing is that trying to offload a second
filter would trigger a spurious DESTROY and in turn disable the already
installed one.

Jakub Kicinski (2):
  cls_bpf: fix offload assumptions after callback conversion
  nfp: bpf: keep track of the offloaded program

 drivers/net/ethernet/netronome/nfp/bpf/main.c | 55 ++++++++++++----
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  8 +++
 include/net/pkt_cls.h                         |  5 +-
 net/sched/cls_bpf.c                           | 93 +++++++++++----------------
 4 files changed, 92 insertions(+), 69 deletions(-)

-- 
2.15.1

^ permalink raw reply

* [PATCH net 1/2] cls_bpf: fix offload assumptions after callback conversion
From: Jakub Kicinski @ 2017-12-19 21:32 UTC (permalink / raw)
  To: netdev; +Cc: daniel, jiri, oss-drivers, Jakub Kicinski
In-Reply-To: <20171219213214.1084-1-jakub.kicinski@netronome.com>

cls_bpf used to take care of tracking what offload state a filter
is in, i.e. it would track if offload request succeeded or not.
This information would then be used to issue correct requests to
the driver, e.g. requests for statistics only on offloaded filters,
removing only filters which were offloaded, using add instead of
replace if previous filter was not added etc.

This tracking of offload state no longer functions with the new
callback infrastructure.  There could be multiple entities trying
to offload the same filter.

Throw out all the tracking and corresponding commands and simply
pass to the drivers both old and new bpf program.  Drivers will
have to deal with offload state tracking by themselves.

Fixes: 3f7889c4c79b ("net: sched: cls_bpf: call block callbacks for offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c | 12 +---
 include/net/pkt_cls.h                         |  5 +-
 net/sched/cls_bpf.c                           | 93 +++++++++++----------------
 3 files changed, 43 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e379b78e86ef..a4cf62ba4604 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -110,16 +110,10 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 		return -EOPNOTSUPP;
 	}
 
-	switch (cls_bpf->command) {
-	case TC_CLSBPF_REPLACE:
-		return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
-	case TC_CLSBPF_ADD:
-		return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
-	case TC_CLSBPF_DESTROY:
-		return nfp_net_bpf_offload(nn, NULL, true);
-	default:
+	if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
 		return -EOPNOTSUPP;
-	}
+
+	return nfp_net_bpf_offload(nn, cls_bpf->prog, cls_bpf->oldprog);
 }
 
 static int nfp_bpf_setup_tc_block(struct net_device *netdev,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0105445cab83..8e08b6da72f3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
 };
 
 enum tc_clsbpf_command {
-	TC_CLSBPF_ADD,
-	TC_CLSBPF_REPLACE,
-	TC_CLSBPF_DESTROY,
+	TC_CLSBPF_OFFLOAD,
 	TC_CLSBPF_STATS,
 };
 
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
 	enum tc_clsbpf_command command;
 	struct tcf_exts *exts;
 	struct bpf_prog *prog;
+	struct bpf_prog *oldprog;
 	const char *name;
 	bool exts_integrated;
 	u32 gen_flags;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6fe798c2df1a..8d78e7f4ecc3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
 	struct list_head link;
 	struct tcf_result res;
 	bool exts_integrated;
-	bool offloaded;
 	u32 gen_flags;
 	struct tcf_exts exts;
 	u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 }
 
 static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-			       enum tc_clsbpf_command cmd)
+			       struct cls_bpf_prog *oldprog)
 {
-	bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
 	struct tcf_block *block = tp->chain->block;
-	bool skip_sw = tc_skip_sw(prog->gen_flags);
 	struct tc_cls_bpf_offload cls_bpf = {};
+	struct cls_bpf_prog *obj;
+	bool skip_sw;
 	int err;
 
+	skip_sw = prog && tc_skip_sw(prog->gen_flags);
+	obj = prog ?: oldprog;
+
 	tc_cls_common_offload_init(&cls_bpf.common, tp);
-	cls_bpf.command = cmd;
-	cls_bpf.exts = &prog->exts;
-	cls_bpf.prog = prog->filter;
-	cls_bpf.name = prog->bpf_name;
-	cls_bpf.exts_integrated = prog->exts_integrated;
-	cls_bpf.gen_flags = prog->gen_flags;
+	cls_bpf.command = TC_CLSBPF_OFFLOAD;
+	cls_bpf.exts = &obj->exts;
+	cls_bpf.prog = prog ? prog->filter : NULL;
+	cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+	cls_bpf.name = obj->bpf_name;
+	cls_bpf.exts_integrated = obj->exts_integrated;
+	cls_bpf.gen_flags = obj->gen_flags;
 
 	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-	if (addorrep) {
+	if (prog) {
 		if (err < 0) {
-			cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+			cls_bpf_offload_cmd(tp, oldprog, prog);
 			return err;
 		} else if (err > 0) {
 			prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
 		}
 	}
 
-	if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+	if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
 		return -EINVAL;
 
 	return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			   struct cls_bpf_prog *oldprog)
 {
-	struct cls_bpf_prog *obj = prog;
-	enum tc_clsbpf_command cmd;
-	bool skip_sw;
-	int ret;
-
-	skip_sw = tc_skip_sw(prog->gen_flags) ||
-		(oldprog && tc_skip_sw(oldprog->gen_flags));
-
-	if (oldprog && oldprog->offloaded) {
-		if (!tc_skip_hw(prog->gen_flags)) {
-			cmd = TC_CLSBPF_REPLACE;
-		} else if (!tc_skip_sw(prog->gen_flags)) {
-			obj = oldprog;
-			cmd = TC_CLSBPF_DESTROY;
-		} else {
-			return -EINVAL;
-		}
-	} else {
-		if (tc_skip_hw(prog->gen_flags))
-			return skip_sw ? -EINVAL : 0;
-		cmd = TC_CLSBPF_ADD;
-	}
-
-	ret = cls_bpf_offload_cmd(tp, obj, cmd);
-	if (ret)
-		return ret;
+	if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+		return -EINVAL;
 
-	obj->offloaded = true;
-	if (oldprog)
-		oldprog->offloaded = false;
+	if (prog && tc_skip_hw(prog->gen_flags))
+		prog = NULL;
+	if (oldprog && tc_skip_hw(oldprog->gen_flags))
+		oldprog = NULL;
+	if (!prog && !oldprog)
+		return 0;
 
-	return 0;
+	return cls_bpf_offload_cmd(tp, prog, oldprog);
 }
 
 static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
 {
 	int err;
 
-	if (!prog->offloaded)
-		return;
-
-	err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
-	if (err) {
+	err = cls_bpf_offload_cmd(tp, NULL, prog);
+	if (err)
 		pr_err("Stopping hardware offload failed: %d\n", err);
-		return;
-	}
-
-	prog->offloaded = false;
 }
 
 static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
 					 struct cls_bpf_prog *prog)
 {
-	if (!prog->offloaded)
-		return;
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_bpf_offload cls_bpf = {};
+
+	tc_cls_common_offload_init(&cls_bpf.common, tp);
+	cls_bpf.command = TC_CLSBPF_STATS;
+	cls_bpf.exts = &prog->exts;
+	cls_bpf.prog = prog->filter;
+	cls_bpf.name = prog->bpf_name;
+	cls_bpf.exts_integrated = prog->exts_integrated;
+	cls_bpf.gen_flags = prog->gen_flags;
 
-	cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
-- 
2.15.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox