Netdev List
 help / color / mirror / Atom feed
* [net-next 09/15] net/mlx5e: Extend encap entry with reference counter
From: Saeed Mahameed @ 2019-08-09 22:04 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Vlad Buslov, Jianbo Liu, Roi Dayan,
	Saeed Mahameed
In-Reply-To: <20190809220359.11516-1-saeedm@mellanox.com>

From: Vlad Buslov <vladbu@mellanox.com>

List of flows attached to encap entry is used as implicit reference
counter (encap entry is deallocated when list becomes free) and as a
mechanism to obtain encap entry that flow is attached to (through list
head). This is not safe when concurrent modification of list of flows
attached to encap entry is possible. Proper atomic reference counter is
required to support concurrent access.

As a preparation for extending encap with reference counting, extract code
that lookups and deletes encap entry into standalone put/get helpers. In
order to remove this dependency on external locking, extend encap entry
with reference counter to manage its lifetime and extend flow structure
with direct pointer to encap entry that flow is attached to.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  5 ++
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 84 ++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/en_tc.h   |  2 +
 4 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index b7f113e996e5..cd957ff4e207 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -613,12 +613,17 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
 	neigh_connected = (nud_state & NUD_VALID) && !dead;
 
 	list_for_each_entry(e, &nhe->encap_list, encap_list) {
+		if (!mlx5e_encap_take(e))
+			continue;
+
 		encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
 		priv = netdev_priv(e->out_dev);
 
 		if (encap_connected != neigh_connected ||
 		    !ether_addr_equal(e->h_dest, ha))
 			mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+
+		mlx5e_encap_put(priv, e);
 	}
 	mlx5e_rep_neigh_entry_release(nhe);
 	rtnl_unlock();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 43eeebe9c8d2..2e970d0729be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -164,6 +164,7 @@ struct mlx5e_encap_entry {
 	u8 flags;
 	char *encap_header;
 	int encap_size;
+	refcount_t refcnt;
 };
 
 struct mlx5e_rep_sq {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fcaf9ab9e373..4e378200a9d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -103,6 +103,7 @@ enum {
  *        container_of(helper item, containing struct type, helper field[index])
  */
 struct encap_flow_item {
+	struct mlx5e_encap_entry *e; /* attached encap instance */
 	struct list_head list;
 	int index;
 };
@@ -1433,8 +1434,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 
 	list_for_each_entry(e, &nhe->encap_list, encap_list) {
 		struct encap_flow_item *efi, *tmp;
-		if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
+
+		if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) ||
+		    !mlx5e_encap_take(e))
 			continue;
+
 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
 			flow = container_of(efi, struct mlx5e_tc_flow,
 					    encaps[efi->index]);
@@ -1453,6 +1457,8 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 
 			mlx5e_flow_put(netdev_priv(e->out_dev), flow);
 		}
+
+		mlx5e_encap_put(netdev_priv(e->out_dev), e);
 		if (neigh_used)
 			break;
 	}
@@ -1472,29 +1478,33 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 	}
 }
 
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+	if (!refcount_dec_and_test(&e->refcnt))
+		return;
+
+	WARN_ON(!list_empty(&e->flows));
+	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+	if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+		mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
+
+	hash_del_rcu(&e->encap_hlist);
+	kfree(e->encap_header);
+	kfree(e);
+}
+
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 			       struct mlx5e_tc_flow *flow, int out_index)
 {
-	struct list_head *next = flow->encaps[out_index].list.next;
-
 	/* flow wasn't fully initialized */
-	if (list_empty(&flow->encaps[out_index].list))
+	if (!flow->encaps[out_index].e)
 		return;
 
 	list_del(&flow->encaps[out_index].list);
-	if (list_empty(next)) {
-		struct mlx5e_encap_entry *e;
-
-		e = list_entry(next, struct mlx5e_encap_entry, flows);
-		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 
-		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-			mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
-
-		hash_del_rcu(&e->encap_hlist);
-		kfree(e->encap_header);
-		kfree(e);
-	}
+	mlx5e_encap_put(priv, flow->encaps[out_index].e);
+	flow->encaps[out_index].e = NULL;
 }
 
 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
@@ -2817,6 +2827,31 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
 
 
 
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
+{
+	return refcount_inc_not_zero(&e->refcnt);
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+		uintptr_t hash_key)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_encap_entry *e;
+	struct encap_key e_key;
+
+	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+				   encap_hlist, hash_key) {
+		e_key.ip_tun_key = &e->tun_info->key;
+		e_key.tc_tunnel = e->tunnel;
+		if (!cmp_encap_info(&e_key, key) &&
+		    mlx5e_encap_take(e))
+			return e;
+	}
+
+	return NULL;
+}
+
 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow,
 			      struct net_device *mirred_dev,
@@ -2829,11 +2864,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	const struct ip_tunnel_info *tun_info;
-	struct encap_key key, e_key;
+	struct encap_key key;
 	struct mlx5e_encap_entry *e;
 	unsigned short family;
 	uintptr_t hash_key;
-	bool found = false;
 	int err = 0;
 
 	parse_attr = attr->parse_attr;
@@ -2848,24 +2882,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 
 	hash_key = hash_encap_info(&key);
 
-	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
-				   encap_hlist, hash_key) {
-		e_key.ip_tun_key = &e->tun_info->key;
-		e_key.tc_tunnel = e->tunnel;
-		if (!cmp_encap_info(&e_key, &key)) {
-			found = true;
-			break;
-		}
-	}
+	e = mlx5e_encap_get(priv, &key, hash_key);
 
 	/* must verify if encap is valid or not */
-	if (found)
+	if (e)
 		goto attach_flow;
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
 	if (!e)
 		return -ENOMEM;
 
+	refcount_set(&e->refcnt, 1);
 	e->tun_info = tun_info;
 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
 	if (err)
@@ -2884,6 +2911,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
 
 attach_flow:
+	flow->encaps[out_index].e = e;
 	list_add(&flow->encaps[out_index].list, &e->flows);
 	flow->encaps[out_index].index = out_index;
 	*encap_dev = e->out_dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 20f045e96c92..ea2072e2fe84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -75,6 +75,8 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e);
 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e);
+bool mlx5e_encap_take(struct mlx5e_encap_entry *e);
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e);
 
 struct mlx5e_neigh_hash_entry;
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
-- 
2.21.0


^ permalink raw reply related

* [net-next 15/15] net/mlx5e: Use refcount_t for refcount
From: Saeed Mahameed @ 2019-08-09 22:04 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev@vger.kernel.org, Chuhong Yuan, Saeed Mahameed
In-Reply-To: <20190809220359.11516-1-saeedm@mellanox.com>

From: Chuhong Yuan <hslester96@gmail.com>

refcount_t is better for reference counters since its
implementation can prevent overflows.
So convert atomic_t ref counters to refcount_t.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index b9d4f4e19ff9..148b55c3db7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -32,6 +32,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/refcount.h>
 #include <linux/mlx5/driver.h>
 #include <net/vxlan.h>
 #include "mlx5_core.h"
@@ -48,7 +49,7 @@ struct mlx5_vxlan {
 
 struct mlx5_vxlan_port {
 	struct hlist_node hlist;
-	atomic_t refcount;
+	refcount_t refcount;
 	u16 udp_port;
 };
 
@@ -113,7 +114,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
 
 	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
 	if (vxlanp) {
-		atomic_inc(&vxlanp->refcount);
+		refcount_inc(&vxlanp->refcount);
 		return 0;
 	}
 
@@ -137,7 +138,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
 	}
 
 	vxlanp->udp_port = port;
-	atomic_set(&vxlanp->refcount, 1);
+	refcount_set(&vxlanp->refcount, 1);
 
 	spin_lock_bh(&vxlan->lock);
 	hash_add(vxlan->htable, &vxlanp->hlist, port);
@@ -170,7 +171,7 @@ int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
 		goto out_unlock;
 	}
 
-	if (atomic_dec_and_test(&vxlanp->refcount)) {
+	if (refcount_dec_and_test(&vxlanp->refcount)) {
 		hash_del(&vxlanp->hlist);
 		remove = true;
 	}
-- 
2.21.0


^ permalink raw reply related

* [net-next 11/15] net/mlx5e: Allow concurrent creation of encap entries
From: Saeed Mahameed @ 2019-08-09 22:04 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Vlad Buslov, Roi Dayan, Saeed Mahameed
In-Reply-To: <20190809220359.11516-1-saeedm@mellanox.com>

From: Vlad Buslov <vladbu@mellanox.com>

Encap entries creation is fully synchronized by encap_tbl_lock. In order to
allow concurrent allocation of hardware resources used to offload
encapsulation, extend mlx5e_encap_entry with 'res_ready' completion. Move
call to mlx5e_tc_tun_create_header_ipv{4|6}() out of encap_tbl_lock
critical section. Modify code that attaches new flows to existing encap to
wait for 'res_ready' completion before using the entry. Insert encap entry
to table before provisioning it to hardware and modify all users of the
encap table to verify that encap was fully initialized by checking
completion result for non-zero value (and to wait for 'res_ready'
completion, if necessary).

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |  2 ++
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 33 +++++++++++++++----
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 2e970d0729be..8ac96727cad8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -165,6 +165,8 @@ struct mlx5e_encap_entry {
 	char *encap_header;
 	int encap_size;
 	refcount_t refcnt;
+	struct completion res_ready;
+	int compl_result;
 };
 
 struct mlx5e_rep_sq {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index c13db9bc1f9b..5be3da621499 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2904,8 +2904,18 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	e = mlx5e_encap_get(priv, &key, hash_key);
 
 	/* must verify if encap is valid or not */
-	if (e)
+	if (e) {
+		mutex_unlock(&esw->offloads.encap_tbl_lock);
+		wait_for_completion(&e->res_ready);
+
+		/* Protect against concurrent neigh update. */
+		mutex_lock(&esw->offloads.encap_tbl_lock);
+		if (e->compl_result) {
+			err = -EREMOTEIO;
+			goto out_err;
+		}
 		goto attach_flow;
+	}
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
 	if (!e) {
@@ -2914,22 +2924,32 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	}
 
 	refcount_set(&e->refcnt, 1);
+	init_completion(&e->res_ready);
+
 	e->tun_info = tun_info;
 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
-	if (err)
+	if (err) {
+		kfree(e);
+		e = NULL;
 		goto out_err;
+	}
 
 	INIT_LIST_HEAD(&e->flows);
+	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
 
 	if (family == AF_INET)
 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
 	else if (family == AF_INET6)
 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
 
-	if (err)
+	/* Protect against concurrent neigh update. */
+	mutex_lock(&esw->offloads.encap_tbl_lock);
+	complete_all(&e->res_ready);
+	if (err) {
+		e->compl_result = err;
 		goto out_err;
-
-	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+	}
 
 attach_flow:
 	flow->encaps[out_index].e = e;
@@ -2949,7 +2969,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 
 out_err:
 	mutex_unlock(&esw->offloads.encap_tbl_lock);
-	kfree(e);
+	if (e)
+		mlx5e_encap_put(priv, e);
 	return err;
 }
 
-- 
2.21.0


^ permalink raw reply related

* [net-next 03/15] net/mlx5e: Protect hairpin hash table with mutex
From: Saeed Mahameed @ 2019-08-09 22:04 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Vlad Buslov, Roi Dayan, Saeed Mahameed
In-Reply-To: <20190809220359.11516-1-saeedm@mellanox.com>

From: Vlad Buslov <vladbu@mellanox.com>

To remove dependency on rtnl lock, protect hairpin hash table from
concurrent modifications with new "hairpin_tbl_lock" mutex.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  1 +
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 21 +++++++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 4518ce19112e..100506a3dd58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -17,6 +17,7 @@ struct mlx5e_tc_table {
 	struct rhashtable               ht;
 
 	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
 	DECLARE_HASHTABLE(hairpin_tbl, 8);
 
 	struct notifier_block     netdevice_nb;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0abfa9b3ec54..a7acb7fcbf5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -652,15 +652,16 @@ static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
 			      struct mlx5e_hairpin_entry *hpe)
 {
 	/* no more hairpin flows for us, release the hairpin pair */
-	if (!refcount_dec_and_test(&hpe->refcnt))
+	if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
 		return;
+	hash_del(&hpe->hairpin_hlist);
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 
 	netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 		   dev_name(hpe->hp->pair->peer_mdev->device));
 
 	WARN_ON(!list_empty(&hpe->flows));
 	mlx5e_hairpin_destroy(hpe->hp);
-	hash_del(&hpe->hairpin_hlist);
 	kfree(hpe);
 }
 
@@ -729,13 +730,17 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 				     extack);
 	if (err)
 		return err;
+
+	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 	hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
 	if (hpe)
 		goto attach_flow;
 
 	hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
-	if (!hpe)
-		return -ENOMEM;
+	if (!hpe) {
+		err = -ENOMEM;
+		goto create_hairpin_err;
+	}
 
 	spin_lock_init(&hpe->flows_lock);
 	INIT_LIST_HEAD(&hpe->flows);
@@ -784,6 +789,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 	} else {
 		flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
 	}
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
+
 	flow->hpe = hpe;
 	spin_lock(&hpe->flows_lock);
 	list_add(&flow->hairpin, &hpe->flows);
@@ -792,6 +799,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 	return 0;
 
 create_hairpin_err:
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 	kfree(hpe);
 	return err;
 }
@@ -3768,10 +3776,12 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
 
 	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
 
+	mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
 		if (hpe->peer_vhca_id == peer_vhca_id)
 			hpe->hp->pair->peer_gone = true;
 	}
+	mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 }
 
 static int mlx5e_tc_netdev_event(struct notifier_block *this,
@@ -3808,6 +3818,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 	mutex_init(&tc->t_lock);
 	hash_init(tc->mod_hdr_tbl);
+	mutex_init(&tc->hairpin_tbl_lock);
 	hash_init(tc->hairpin_tbl);
 
 	err = rhashtable_init(&tc->ht, &tc_ht_params);
@@ -3839,6 +3850,8 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 	if (tc->netdevice_nb.notifier_call)
 		unregister_netdevice_notifier(&tc->netdevice_nb);
 
+	mutex_destroy(&tc->hairpin_tbl_lock);
+
 	rhashtable_destroy(&tc->ht);
 
 	if (!IS_ERR_OR_NULL(tc->t)) {
-- 
2.21.0


^ permalink raw reply related

* [net-next 10/15] net/mlx5e: Protect encap hash table with mutex
From: Saeed Mahameed @ 2019-08-09 22:04 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev@vger.kernel.org, Vlad Buslov, Roi Dayan, Saeed Mahameed
In-Reply-To: <20190809220359.11516-1-saeedm@mellanox.com>

From: Vlad Buslov <vladbu@mellanox.com>

To remove dependency on rtnl lock, protect encap hash table from concurrent
modifications with new "encap_tbl_lock" mutex. Use the mutex to protect
internal encap entry state from concurrent modification. This is necessary
because a flow can be attached to multiple encap entries simultaneously,
which significantly complicates using finer grained per-entry lock.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 43 ++++++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |  2 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  1 +
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 4e378200a9d2..c13db9bc1f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1478,33 +1478,51 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 	}
 }
 
-void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 {
-	if (!refcount_dec_and_test(&e->refcnt))
-		return;
-
 	WARN_ON(!list_empty(&e->flows));
 	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 
 	if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 		mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 
-	hash_del_rcu(&e->encap_hlist);
 	kfree(e->encap_header);
 	kfree(e);
 }
 
+void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
+		return;
+	hash_del_rcu(&e->encap_hlist);
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+	mlx5e_encap_dealloc(priv, e);
+}
+
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 			       struct mlx5e_tc_flow *flow, int out_index)
 {
+	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
 	/* flow wasn't fully initialized */
-	if (!flow->encaps[out_index].e)
+	if (!e)
 		return;
 
+	mutex_lock(&esw->offloads.encap_tbl_lock);
 	list_del(&flow->encaps[out_index].list);
-
-	mlx5e_encap_put(priv, flow->encaps[out_index].e);
 	flow->encaps[out_index].e = NULL;
+	if (!refcount_dec_and_test(&e->refcnt)) {
+		mutex_unlock(&esw->offloads.encap_tbl_lock);
+		return;
+	}
+	hash_del_rcu(&e->encap_hlist);
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+
+	mlx5e_encap_dealloc(priv, e);
 }
 
 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
@@ -2882,6 +2900,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 
 	hash_key = hash_encap_info(&key);
 
+	mutex_lock(&esw->offloads.encap_tbl_lock);
 	e = mlx5e_encap_get(priv, &key, hash_key);
 
 	/* must verify if encap is valid or not */
@@ -2889,8 +2908,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 		goto attach_flow;
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
-	if (!e)
-		return -ENOMEM;
+	if (!e) {
+		err = -ENOMEM;
+		goto out_err;
+	}
 
 	refcount_set(&e->refcnt, 1);
 	e->tun_info = tun_info;
@@ -2922,10 +2943,12 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 	} else {
 		*encap_valid = false;
 	}
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
 
 	return err;
 
 out_err:
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
 	kfree(e);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2d734ecae719..f0692407f617 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1999,6 +1999,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	if (err)
 		goto abort;
 
+	mutex_init(&esw->offloads.encap_tbl_lock);
 	hash_init(esw->offloads.encap_tbl);
 	mutex_init(&esw->offloads.mod_hdr.lock);
 	hash_init(esw->offloads.mod_hdr.hlist);
@@ -2039,6 +2040,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	destroy_workqueue(esw->work_queue);
 	esw_offloads_cleanup_reps(esw);
 	mutex_destroy(&esw->offloads.mod_hdr.lock);
+	mutex_destroy(&esw->offloads.encap_tbl_lock);
 	kfree(esw->vports);
 	kfree(esw);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index fd63ba4ed0da..86db0e9776da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -181,6 +181,7 @@ struct mlx5_esw_offload {
 	struct mlx5_eswitch_rep *vport_reps;
 	struct list_head peer_flows;
 	struct mutex peer_mutex;
+	struct mutex encap_tbl_lock; /* protects encap_tbl */
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	struct mod_hdr_tbl mod_hdr;
 	DECLARE_HASHTABLE(termtbl_tbl, 8);
-- 
2.21.0


^ permalink raw reply related

* Regression in routing table cleanup when VRF support was fixed.
From: Craig Robson @ 2019-08-09 22:01 UTC (permalink / raw)
  To: netdev

This commit caused a regression for me when using policy routing.

5a56a0b3a45d net: Don't delete routes in different VRFs

The regression is that routes are no longer getting deleted when I delete IP
addresses when using non-default routing tables.

For example the following sequence used to delete the routing entry in table
100 when the ip was deleted, but after the above commit is does not delete the
routing table entry.

Setup
# ip link set dummy0 up
# ip addr add 10.10.10.10/24 dev dummy0
# ip addr add 192.168.10.10/24 dev dummy0
# ip route add dev dummy0 192.168.10.0/24 table 100 scope link src 192.168.10.10
# ip route show table 100
192.168.10.0/24 dev dummy0 scope link  src 192.168.10.10

Cleanup
# ip addr del 192.168.10.10/24 dev dummy0

Routing entry still exists and should not.
# ip route show table 100
192.168.10.0/24 dev dummy0 scope link  src 192.168.10.10

This was previously working (deleting the route) since at least 2.6.32.  I am
not using VRFs.  What needs fixed to get the old behavior?

Regards,
Craig Robson

^ permalink raw reply

* Re: [Potential Spoof] Re: [PATCH net-next v6 3/3] net: phy: broadcom: add 1000Base-X support for BCM54616S
From: Tao Ren @ 2019-08-09 22:16 UTC (permalink / raw)
  To: Heiner Kallweit, Andrew Lunn, Florian Fainelli, David S . Miller,
	Arun Parameswaran, Justin Chen, Vladimir Oltean,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	openbmc@lists.ozlabs.org
In-Reply-To: <d398ca57-575c-6e88-49a5-bf49cddfa2f0@gmail.com>

On 8/9/19 2:59 PM, Heiner Kallweit wrote:
> On 09.08.2019 23:13, Tao Ren wrote:
>> On 8/9/19 1:54 PM, Tao Ren wrote:
>>> Hi Heiner,
>>>
>>> On 8/9/19 1:21 PM, Heiner Kallweit wrote:
>>>> On 09.08.2019 07:44, Tao Ren wrote:
>>>>> The BCM54616S PHY cannot work properly in RGMII->1000Base-KX mode (for
>>>>> example, on Facebook CMM BMC platform), mainly because genphy functions
>>>>> are designed for copper links, and 1000Base-X (clause 37) auto negotiation
>>>>> needs to be handled differently.
>>>>>
>>>>> This patch enables 1000Base-X support for BCM54616S by customizing 3
>>>>> driver callbacks:
>>>>>
>>>>>   - probe: probe callback detects PHY's operation mode based on
>>>>>     INTERF_SEL[1:0] pins and 1000X/100FX selection bit in SerDES 100-FX
>>>>>     Control register.
>>>>>
>>>>>   - config_aneg: calls genphy_c37_config_aneg when the PHY is running in
>>>>>     1000Base-X mode; otherwise, genphy_config_aneg will be called.
>>>>>
>>>>>   - read_status: calls genphy_c37_read_status when the PHY is running in
>>>>>     1000Base-X mode; otherwise, genphy_read_status will be called.
>>>>>
>>>>> Signed-off-by: Tao Ren <taoren@fb.com>
>>>>> ---
>>>>>  Changes in v6:
>>>>>   - nothing changed.
>>>>>  Changes in v5:
>>>>>   - include Heiner's patch "net: phy: add support for clause 37
>>>>>     auto-negotiation" into the series.
>>>>>   - use genphy_c37_config_aneg and genphy_c37_read_status in BCM54616S
>>>>>     PHY driver's callback when the PHY is running in 1000Base-X mode.
>>>>>  Changes in v4:
>>>>>   - add bcm54616s_config_aneg_1000bx() to deal with auto negotiation in
>>>>>     1000Base-X mode.
>>>>>  Changes in v3:
>>>>>   - rename bcm5482_read_status to bcm54xx_read_status so the callback can
>>>>>     be shared by BCM5482 and BCM54616S.
>>>>>  Changes in v2:
>>>>>   - Auto-detect PHY operation mode instead of passing DT node.
>>>>>   - move PHY mode auto-detect logic from config_init to probe callback.
>>>>>   - only set speed (not including duplex) in read_status callback.
>>>>>   - update patch description with more background to avoid confusion.
>>>>>   - patch #1 in the series ("net: phy: broadcom: set features explicitly
>>>>>     for BCM54616") is dropped: the fix should go to get_features callback
>>>>>     which may potentially depend on this patch.
>>>>>
>>>>>  drivers/net/phy/broadcom.c | 54 +++++++++++++++++++++++++++++++++++---
>>>>>  include/linux/brcmphy.h    | 10 +++++--
>>>>>  2 files changed, 58 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
>>>>> index 937d0059e8ac..fbd76a31c142 100644
>>>>> --- a/drivers/net/phy/broadcom.c
>>>>> +++ b/drivers/net/phy/broadcom.c
>>>>> @@ -383,9 +383,9 @@ static int bcm5482_config_init(struct phy_device *phydev)
>>>>>  		/*
>>>>>  		 * Select 1000BASE-X register set (primary SerDes)
>>>>>  		 */
>>>>> -		reg = bcm_phy_read_shadow(phydev, BCM5482_SHD_MODE);
>>>>> -		bcm_phy_write_shadow(phydev, BCM5482_SHD_MODE,
>>>>> -				     reg | BCM5482_SHD_MODE_1000BX);
>>>>> +		reg = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE);
>>>>> +		bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE,
>>>>> +				     reg | BCM54XX_SHD_MODE_1000BX);
>>>>>  
>>>>>  		/*
>>>>>  		 * LED1=ACTIVITYLED, LED3=LINKSPD[2]
>>>>> @@ -451,12 +451,44 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
>>>>>  	return ret;
>>>>>  }
>>>>>  
>>>>> +static int bcm54616s_probe(struct phy_device *phydev)
>>>>> +{
>>>>> +	int val, intf_sel;
>>>>> +
>>>>> +	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE);
>>>>> +	if (val < 0)
>>>>> +		return val;
>>>>> +
>>>>> +	/* The PHY is strapped in RGMII to fiber mode when INTERF_SEL[1:0]
>>>>> +	 * is 01b.
>>>>> +	 */
>>>>> +	intf_sel = (val & BCM54XX_SHD_INTF_SEL_MASK) >> 1;
>>>>> +	if (intf_sel == 1) {
>>>>> +		val = bcm_phy_read_shadow(phydev, BCM54616S_SHD_100FX_CTRL);
>>>>> +		if (val < 0)
>>>>> +			return val;
>>>>> +
>>>>> +		/* Bit 0 of the SerDes 100-FX Control register, when set
>>>>> +		 * to 1, sets the MII/RGMII -> 100BASE-FX configuration.
>>>>> +		 * When this bit is set to 0, it sets the GMII/RGMII ->
>>>>> +		 * 1000BASE-X configuration.
>>>>> +		 */
>>>>> +		if (!(val & BCM54616S_100FX_MODE))
>>>>> +			phydev->dev_flags |= PHY_BCM_FLAGS_MODE_1000BX;
>>>>> +	}
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>>  static int bcm54616s_config_aneg(struct phy_device *phydev)
>>>>>  {
>>>>>  	int ret;
>>>>>  
>>>>>  	/* Aneg firsly. */
>>>>> -	ret = genphy_config_aneg(phydev);
>>>>> +	if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX)
>>>>> +		ret = genphy_c37_config_aneg(phydev);
>>>>> +	else
>>>>> +		ret = genphy_config_aneg(phydev);
>>>>>  
>>>>
>>>> I'm just wondering whether it needs to be considered that 100base-FX
>>>> doesn't support auto-negotiation. I suppose BMSR reports aneg as
>>>> supported, therefore phylib will use aneg per default.
>>>> Not sure who could set 100Base-FX mode when, but maybe at that place
>>>> also phydev->autoneg needs to be cleared. Did you test 100Base-FX mode?
>>>
>>> I'm doubting if 100Base-FX works. Besides auto-negotiation, 100Base-FX Control/Status registers are defined in shadow register instead of MII_BMCR and MII_BMSR.
>>>
>>> Unfortunately I don't have environment to test 100Base-FX and that's why I only make changes when the PHY is working in 1000X mode.
>>
>> I can prepare a patch for 100Base-FX based on my understanding of bcm54616s datasheet, but the patch would be just compile-tested 
>>
> Support for 1000Base-X should be sufficient. Best mention the missing support for
> 100Base-FX in the commit message and at a suited place in the driver code.

Make sense. Will do it soon.


Thanks,

Tao

^ permalink raw reply

* Re: [PATCH v2 net-next 2/2] net: mvpp2: support multiple comphy lanes
From: Matt Pelland @ 2019-08-09 22:20 UTC (permalink / raw)
  To: Antoine Tenart; +Cc: netdev, maxime.chevallier
In-Reply-To: <20190809083250.GB3516@kwain>

On Fri, Aug 09, 2019 at 10:32:50AM +0200, Antoine Tenart wrote:
> Hello Matt,
> 
> On Thu, Aug 08, 2019 at 07:06:06PM -0400, Matt Pelland wrote:
> >  
> >  static void mvpp2_port_enable(struct mvpp2_port *port)
> > @@ -3389,7 +3412,9 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
> >  
> >  	if (port->phylink)
> >  		phylink_stop(port->phylink);
> > -	phy_power_off(port->comphy);
> > +
> > +	if (port->priv->hw_version == MVPP22)
> > +		mvpp22_comphy_deinit(port);
> 
> You can drop the check on the version here, mvpp22_comphy_deinit will
> return 0 if no comphy was described. (You added other calls to this
> function without the check, which is fine).
> 
> > @@ -5037,20 +5062,18 @@ static int mvpp2_port_probe(struct platform_device *pdev,
> >  			    struct fwnode_handle *port_fwnode,
> >  			    struct mvpp2 *priv)
> >  {
> > -	struct phy *comphy = NULL;
> > -	struct mvpp2_port *port;
> > -	struct mvpp2_port_pcpu *port_pcpu;
> > +	unsigned int ntxqs, nrxqs, ncomphys, nrequired_comphys, thread;
> >  	struct device_node *port_node = to_of_node(port_fwnode);
> > +	struct mvpp2_port_pcpu *port_pcpu;
> >  	netdev_features_t features;
> > -	struct net_device *dev;
> >  	struct phylink *phylink;
> > -	char *mac_from = "";
> > -	unsigned int ntxqs, nrxqs, thread;
> > +	struct mvpp2_port *port;
> >  	unsigned long flags = 0;
> > +	struct net_device *dev;
> > +	int err, i, phy_mode;
> > +	char *mac_from = "";
> >  	bool has_tx_irqs;
> >  	u32 id;
> > -	int phy_mode;
> > -	int err, i;
> >  
> >  	has_tx_irqs = mvpp2_port_has_irqs(priv, port_node, &flags);
> >  	if (!has_tx_irqs && queue_mode == MVPP2_QDIST_MULTI_MODE) {
> > @@ -5084,14 +5107,38 @@ static int mvpp2_port_probe(struct platform_device *pdev,
> >  		goto err_free_netdev;
> >  	}
> >  
> > +	port = netdev_priv(dev);
> > +
> >  	if (port_node) {
> > -		comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
> > -		if (IS_ERR(comphy)) {
> > -			if (PTR_ERR(comphy) == -EPROBE_DEFER) {
> > -				err = -EPROBE_DEFER;
> > -				goto err_free_netdev;
> > +		for (i = 0, ncomphys = 0; i < ARRAY_SIZE(port->comphys); i++) {
> > +			port->comphys[i] = devm_of_phy_get_by_index(&pdev->dev,
> > +								    port_node,
> > +								    i);
> > +			if (IS_ERR(port->comphys[i])) {
> > +				err = PTR_ERR(port->comphys[i]);
> > +				port->comphys[i] = NULL;
> > +				if (err == -EPROBE_DEFER)
> > +					goto err_free_netdev;
> > +				err = 0;
> > +				break;
> >  			}
> > -			comphy = NULL;
> > +
> > +			++ncomphys;
> > +		}
> > +
> > +		if (phy_mode == PHY_INTERFACE_MODE_XAUI)
> > +			nrequired_comphys = 4;
> > +		else if (phy_mode == PHY_INTERFACE_MODE_RXAUI)
> > +			nrequired_comphys = 2;
> > +		else
> > +			nrequired_comphys = 1;
> > +
> > +		if (ncomphys < nrequired_comphys) {
> > +			dev_err(&pdev->dev,
> > +				"not enough comphys to support %s\n",
> > +				phy_modes(phy_mode));
> > +			err = -EINVAL;
> > +			goto err_free_netdev;
> 
> The comphy is optional and could not be described (some SoC do not have
> a driver for their comphy, and some aren't described at all). In such
> cases we do rely on the bootloader/firmware configuration. Also, I'm not
> sure how that would work with dynamic reconfiguration of the mode if the
> n# of lanes used changes (I'm not sure that is possible though).
> 

I'm new to this space, but, from my limited experience it seems unlikely that
some hardware configuration would require dynamically reconfiguring the number
of comphy lanes used depending on the phy mode. Unless you disagree, instead of
removing this check or making things really complicated to support this
scenario, I propose extending the conditional above to disable sanity checking
if no comphys were parsed out of the device tree. Something like:

if (ncomphys != 0 && ncomphys < nrequired_comphys)

This would cover Maxime's request for sanity checking, which I think is
valuable, while also maintaining compatibility with platforms that have no
comphy drivers or some other issue that prevents properly defining comphy nodes
in the device tree. Does that sound reasonable?

> Thanks!
> Antoine
> 
> -- 
> Antoine Ténart, Bootlin
> Embedded Linux and Kernel engineering
> https://bootlin.com

Thanks,
Matt

^ permalink raw reply

* Re: [PATCH v4 net-next 14/19] ionic: Add Tx and Rx handling
From: Shannon Nelson @ 2019-08-09 22:27 UTC (permalink / raw)
  To: Saeed Mahameed, netdev@vger.kernel.org, davem@davemloft.net
In-Reply-To: <c20a9924af9ce298f1997f57b8b51235726583d8.camel@mellanox.com>

On 7/25/19 4:48 PM, Saeed Mahameed wrote:
> On Mon, 2019-07-22 at 14:40 -0700, Shannon Nelson wrote:
>> Add both the Tx and Rx queue setup and handling.  The related
>> stats display comes later.  Instead of using the generic napi
>> routines used by the slow-path commands, the Tx and Rx paths
>> are simplified and inlined in one file in order to get better
>> compiler optimizations.
>>
>> Signed-off-by: Shannon Nelson<snelson@pensando.io>
>> ---
>>   drivers/net/ethernet/pensando/ionic/Makefile  |   2 +-
>>   .../net/ethernet/pensando/ionic/ionic_lif.c   | 387 ++++++++
>>   .../net/ethernet/pensando/ionic/ionic_lif.h   |  52 ++
>>   .../net/ethernet/pensando/ionic/ionic_txrx.c  | 879
>> ++++++++++++++++++
>>   .../net/ethernet/pensando/ionic/ionic_txrx.h  |  15 +
>>   5 files changed, 1334 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>>   create mode 100644 drivers/net/ethernet/pensando/ionic/ionic_txrx.h
>>
>> diff --git a/drivers/net/ethernet/pensando/ionic/Makefile
>> b/drivers/net/ethernet/pensando/ionic/Makefile
>> index 9b19bf57a489..0e2dc53f08d4 100644
>> --- a/drivers/net/ethernet/pensando/ionic/Makefile
>> +++ b/drivers/net/ethernet/pensando/ionic/Makefile
>> @@ -4,4 +4,4 @@
>>   obj-$(CONFIG_IONIC) := ionic.o
>>   
>>   ionic-y := ionic_main.o ionic_bus_pci.o ionic_dev.o ionic_ethtool.o
>> \
>> -	   ionic_lif.o ionic_rx_filter.o ionic_debugfs.o
>> +	   ionic_lif.o ionic_rx_filter.o ionic_txrx.o ionic_debugfs.o
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
>> b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
>> index 2bd8ce61c4a0..40d3b1cb362a 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
>> @@ -10,6 +10,7 @@
>>   #include "ionic.h"
>>   #include "ionic_bus.h"
>>   #include "ionic_lif.h"
>> +#include "ionic_txrx.h"
>>   #include "ionic_ethtool.h"
>>   #include "ionic_debugfs.h"
>>   
>> @@ -18,6 +19,13 @@ static int ionic_lif_addr_add(struct lif *lif,
>> const u8 *addr);
>>   static int ionic_lif_addr_del(struct lif *lif, const u8 *addr);
>>   static void ionic_link_status_check(struct lif *lif);
>>   
>> +static int ionic_lif_stop(struct lif *lif);
>> +static int ionic_txrx_alloc(struct lif *lif);
>> +static int ionic_txrx_init(struct lif *lif);
>> +static void ionic_qcq_free(struct lif *lif, struct qcq *qcq);
>> +static int ionic_lif_txqs_init(struct lif *lif);
>> +static int ionic_lif_rxqs_init(struct lif *lif);
>> +static void ionic_lif_qcq_deinit(struct lif *lif, struct qcq *qcq);
>>   static int ionic_set_nic_features(struct lif *lif, netdev_features_t
>> features);
>>   static int ionic_notifyq_clean(struct lif *lif, int budget);
>>   
>> @@ -66,12 +74,96 @@ static void ionic_lif_deferred_enqueue(struct
>> ionic_deferred *def,
>>   	schedule_work(&def->work);
>>   }
> Bottom up or top down ? your current design is very mixed and I had to
> to scroll down and up too often just to understand what a function is
> doing, i strongly suggest to pick an use one approach.
>
> [1]
> https://en.wikipedia.org/wiki/Top-down_and_bottom-up_design#Programming

Hi Saeed,

Sorry it has taken me so long to address the comments you had on this 
patch: I've been contemplating them while working some internal issues.  
Part of my hesitance is also because, while correct, these are 
suggesting some significant rework and juggling of stable code. I've 
been aware of these issues since I inherited the code, and have been 
addressing them over time while trying to keep on top of stability, and 
was planning to continue doing so.  It sounds like I need to evolve the 
driver a little further before the next patchset version.  I think I'll 
have time in the next week to do this, so I'm hoping to have a new 
patchset out in another week or so.

Thanks for the inputs,
sln



^ permalink raw reply

* Re: [PATCH net] rxrpc: Fix local refcounting
From: Jeffrey E Altman @ 2019-08-09 22:34 UTC (permalink / raw)
  To: David Howells, netdev; +Cc: linux-afs, linux-kernel
In-Reply-To: <156538726702.16201.13552536596121161945.stgit@warthog.procyon.org.uk>


[-- Attachment #1.1: Type: text/plain, Size: 1932 bytes --]

David,

Looks good to me.

You can add a Reviewed-by line.

Thanks.

Jeffrey Altman

On 8/9/2019 5:47 PM, David Howells wrote:
> Fix rxrpc_unuse_local() to handle a NULL local pointer as it can be called
> on an unbound socket on which rx->local is not yet set.
> 
> The following reproduced (includes omitted):
> 
> 	int main(void)
> 	{
> 		socket(AF_RXRPC, SOCK_DGRAM, AF_INET);
> 		return 0;
> 	}
> 
> causes the following oops to occur:
> 
> 	BUG: kernel NULL pointer dereference, address: 0000000000000010
> 	...
> 	RIP: 0010:rxrpc_unuse_local+0x8/0x1b
> 	...
> 	Call Trace:
> 	 rxrpc_release+0x2b5/0x338
> 	 __sock_release+0x37/0xa1
> 	 sock_close+0x14/0x17
> 	 __fput+0x115/0x1e9
> 	 task_work_run+0x72/0x98
> 	 do_exit+0x51b/0xa7a
> 	 ? __context_tracking_exit+0x4e/0x10e
> 	 do_group_exit+0xab/0xab
> 	 __x64_sys_exit_group+0x14/0x17
> 	 do_syscall_64+0x89/0x1d4
> 	 entry_SYSCALL_64_after_hwframe+0x49/0xbe
> 
> Reported-by: syzbot+20dee719a2e090427b5f@syzkaller.appspotmail.com
> Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting")
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Jeffrey Altman <jaltman@auristor.com>
> ---
> 
>  net/rxrpc/local_object.c |   12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
> index 9798159ee65f..c9db3e762d8d 100644
> --- a/net/rxrpc/local_object.c
> +++ b/net/rxrpc/local_object.c
> @@ -402,11 +402,13 @@ void rxrpc_unuse_local(struct rxrpc_local *local)
>  {
>  	unsigned int au;
>  
> -	au = atomic_dec_return(&local->active_users);
> -	if (au == 0)
> -		rxrpc_queue_local(local);
> -	else
> -		rxrpc_put_local(local);
> +	if (local) {
> +		au = atomic_dec_return(&local->active_users);
> +		if (au == 0)
> +			rxrpc_queue_local(local);
> +		else
> +			rxrpc_put_local(local);
> +	}
>  }
>  
>  /*
> 

[-- Attachment #1.2: jaltman.vcf --]
[-- Type: text/x-vcard, Size: 283 bytes --]

begin:vcard
fn:Jeffrey Altman
n:Altman;Jeffrey
org:AuriStor, Inc.
adr:;;255 W 94TH ST STE 6B;New York;NY;10025-6985;United States
email;internet:jaltman@auristor.com
title:CEO
tel;work:+1-212-769-9018
url:https://www.linkedin.com/in/jeffreyaltman/
version:2.1
end:vcard


[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 4033 bytes --]

^ permalink raw reply

* [PATCH net-next 1/1] tc-testing: added tdc tests for matchall filter
From: Roman Mashak @ 2019-08-09 22:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, kernel, jhs, xiyou.wangcong, jiri, Roman Mashak

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
---
 .../tc-testing/tc-tests/filters/matchall.json      | 391 +++++++++++++++++++++
 1 file changed, 391 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
new file mode 100644
index 000000000000..5f24c0598624
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
@@ -0,0 +1,391 @@
+[
+    {
+        "id": "f62b",
+        "name": "Add ingress matchall filter for protocol ipv4 and action PASS",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ip matchall action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip matchall",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "7f09",
+        "name": "Add egress matchall filter for protocol ipv4 and action PASS",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: handle 0x1 prio 1 protocol ip matchall action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent 1: handle 1 prio 1 protocol ip matchall",
+        "matchPattern": "^filter parent 1: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 root handle 1: prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "0596",
+        "name": "Add ingress matchall filter for protocol ipv6 and action DROP",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ipv6 matchall",
+        "matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "41df",
+        "name": "Add egress matchall filter for protocol ipv6 and action DROP",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: handle 0x1 prio 1 protocol ipv6 matchall action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent 1: handle 1 prio 1 protocol ipv6 matchall",
+        "matchPattern": "^filter parent 1: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 root handle 1: prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "e1da",
+        "name": "Add ingress matchall filter for protocol ipv4 and action PASS with priority at 16-bit maximum",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 65535 protocol ipv4 matchall action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65535 protocol ipv4 matchall",
+        "matchPattern": "^filter parent ffff: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "3de5",
+        "name": "Add egress matchall filter for protocol ipv4 and action PASS with priority at 16-bit maximum",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: handle 0x1 prio 65535 protocol ipv4 matchall action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent 1: handle 1 prio 65535 protocol ipv4 matchall",
+        "matchPattern": "^filter parent 1: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 root handle 1: prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "72d7",
+        "name": "Add ingress matchall filter for protocol ipv4 and action PASS with priority exceeding 16-bit maximum",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 655355 protocol ipv4 matchall action pass",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 655355 protocol ipv4 matchall",
+        "matchPattern": "^filter parent ffff: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "41d3",
+        "name": "Add egress matchall filter for protocol ipv4 and action PASS with priority exceeding 16-bit maximum",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: handle 0x1 prio 655355 protocol ipv4 matchall action pass",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent 1: handle 1 prio 655355 protocol ipv4 matchall",
+        "matchPattern": "^filter parent 1: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 root handle 1: prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "f755",
+        "name": "Add ingress matchall filter for all protocols and action CONTINUE with handle at 32-bit maximum",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all matchall action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all matchall",
+        "matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "2c33",
+        "name": "Add egress matchall filter for all protocols and action CONTINUE with handle at 32-bit maximum",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: handle 0xffffffff prio 1 protocol all matchall action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent 1: handle 0xffffffff prio 1 protocol all matchall",
+        "matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 root handle 1: prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "0e4a",
+        "name": "Add ingress matchall filter for all protocols and action RECLASSIFY with skip_hw flag",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x1 prio 1 protocol all matchall",
+        "matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "7f60",
+        "name": "Add egress matchall filter for all protocols and action RECLASSIFY with skip_hw flag",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent 1: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent 1: handle 0x1 prio 1 protocol all matchall",
+        "matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 root handle 1: prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "8bd2",
+        "name": "Add ingress matchall filter for protocol ipv6 and action PASS with classid",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:1 action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+        "matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:1.*gact action pass.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "2a4a",
+        "name": "Add ingress matchall filter for protocol ipv6 and action PASS with invalid classid",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 6789defg action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+        "matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 6789defg.*gact action pass.*ref 1 bind 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "eaf8",
+        "name": "Delete single ingress matchall filter",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:2 action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv6 matchall",
+        "matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:2.*gact action pass.*ref 1 bind 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "76ad",
+        "name": "Delete all ingress matchall filters",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x3 prio 3 protocol all matchall classid 1:4 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x4 prio 4 protocol all matchall classid 1:5 action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol all pref.*matchall.*handle.*flowid.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1eb9",
+        "name": "Delete single ingress matchall filter out of multiple",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x3 prio 3 protocol all matchall classid 1:4 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x4 prio 4 protocol all matchall classid 1:5 action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol all handle 0x2 prio 2 matchall",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol all pref 2 matchall.*handle 0x2 flowid 1:2.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "6d63",
+        "name": "Delete ingress matchall filter by chain ID",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol all chain 1 matchall classid 1:1 action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 0x1 prio 1 protocol ipv4 chain 2 matchall classid 1:3 action continue"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: chain 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol all pref 1 matchall chain 1 handle 0x1 flowid 1:1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    }
+]
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next 0/7] net: dsa: mv88e6xxx: prepare Wait Bit operation
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot

The Remote Management Interface has its own implementation of a Wait
Bit operation, which requires a bit number and a value to wait for.

In order to prepare the introduction of this implementation, rework the
code waiting for bits and masks in mv88e6xxx to match this signature.

This has the benefit to unify the implementation of wait routines while
removing obsolete wait and update functions and also reducing the code.

Vivien Didelot (7):
  net: dsa: mv88e6xxx: wait for 88E6185 PPU disabled
  net: dsa: mv88e6xxx: introduce wait mask routine
  net: dsa: mv88e6xxx: introduce wait bit routine
  net: dsa: mv88e6xxx: wait for AVB Busy bit
  net: dsa: mv88e6xxx: remove wait and update routines
  net: dsa: mv88e6xxx: fix SMI bit checking
  net: dsa: mv88e6xxx: add delay in direct SMI wait

 drivers/net/dsa/mv88e6xxx/chip.c            | 76 ++++++++---------
 drivers/net/dsa/mv88e6xxx/chip.h            |  7 +-
 drivers/net/dsa/mv88e6xxx/global1.c         | 95 ++++++---------------
 drivers/net/dsa/mv88e6xxx/global1.h         |  5 +-
 drivers/net/dsa/mv88e6xxx/global1_atu.c     |  7 +-
 drivers/net/dsa/mv88e6xxx/global1_vtu.c     |  6 +-
 drivers/net/dsa/mv88e6xxx/global2.c         | 72 +++++++++-------
 drivers/net/dsa/mv88e6xxx/global2.h         | 12 +--
 drivers/net/dsa/mv88e6xxx/global2_avb.c     | 29 ++++++-
 drivers/net/dsa/mv88e6xxx/global2_scratch.c |  3 +-
 drivers/net/dsa/mv88e6xxx/smi.c             |  4 +-
 11 files changed, 155 insertions(+), 161 deletions(-)

-- 
2.22.0


^ permalink raw reply

* [PATCH net-next 1/7] net: dsa: mv88e6xxx: wait for 88E6185 PPU disabled
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

The PPU state of 88E6185 can be either "Disabled at Reset" or
"Disabled after Initialization". Because we intentionally clear the
PPU Enabled bit before checking its state, it is safe to wait for the
MV88E6185_G1_STS_PPU_STATE_DISABLED state explicitly instead of waiting
for any state different than MV88E6185_G1_STS_PPU_STATE_POLLING.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/global1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 1323ef30a5e9..bbd31c9f8b48 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -46,7 +46,7 @@ static int mv88e6185_g1_wait_ppu_disabled(struct mv88e6xxx_chip *chip)
 
 		/* Check the value of the PPUState bits 15:14 */
 		state &= MV88E6185_G1_STS_PPU_STATE_MASK;
-		if (state != MV88E6185_G1_STS_PPU_STATE_POLLING)
+		if (state == MV88E6185_G1_STS_PPU_STATE_DISABLED)
 			return 0;
 
 		usleep_range(1000, 2000);
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 2/7] net: dsa: mv88e6xxx: introduce wait mask routine
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

The current mv88e6xxx_wait routine is used to wait for a given mask
to be cleared to zero. However in some cases, the driver may have
to wait for a given mask to be of a certain non-zero value.

Thus provide a generic wait mask routine that will be used to implement
the current mv88e6xxx_wait function, and use it to wait for 88E6185
PPU states.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c    | 42 +++++++++++++++-----------
 drivers/net/dsa/mv88e6xxx/chip.h    |  2 ++
 drivers/net/dsa/mv88e6xxx/global1.c | 47 ++++++++---------------------
 drivers/net/dsa/mv88e6xxx/global1.h |  2 ++
 4 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d3804ffd3d2a..bd61d0d3a245 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -80,6 +80,29 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
 	return 0;
 }
 
+int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
+			u16 mask, u16 val)
+{
+	u16 data;
+	int err;
+	int i;
+
+	/* There's no bus specific operation to wait for a mask */
+	for (i = 0; i < 16; i++) {
+		err = mv88e6xxx_read(chip, addr, reg, &data);
+		if (err)
+			return err;
+
+		if ((data & mask) == val)
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+
+	dev_err(chip->dev, "Timeout while waiting for switch\n");
+	return -ETIMEDOUT;
+}
+
 struct mii_bus *mv88e6xxx_default_mdio_bus(struct mv88e6xxx_chip *chip)
 {
 	struct mv88e6xxx_mdio_bus *mdio_bus;
@@ -365,24 +388,7 @@ static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
 
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		u16 val;
-		int err;
-
-		err = mv88e6xxx_read(chip, addr, reg, &val);
-		if (err)
-			return err;
-
-		if (!(val & mask))
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-
-	dev_err(chip->dev, "Timeout while waiting for switch\n");
-	return -ETIMEDOUT;
+	return mv88e6xxx_wait_mask(chip, addr, reg, mask, 0x0000);
 }
 
 /* Indirect write to single pointer-data register with an Update bit */
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 8c6d3c906197..95b44532a282 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -588,6 +588,8 @@ static inline bool mv88e6xxx_is_invalid_port(struct mv88e6xxx_chip *chip, int po
 
 int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
+int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
+			u16 mask, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
 		     u16 update);
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask);
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index bbd31c9f8b48..482f9f8465af 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -32,48 +32,27 @@ int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 	return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask);
 }
 
+int mv88e6xxx_g1_wait_mask(struct mv88e6xxx_chip *chip, int reg,
+			   u16 mask, u16 val)
+{
+	return mv88e6xxx_wait_mask(chip, chip->info->global1_addr, reg,
+				   mask, val);
+}
+
 /* Offset 0x00: Switch Global Status Register */
 
 static int mv88e6185_g1_wait_ppu_disabled(struct mv88e6xxx_chip *chip)
 {
-	u16 state;
-	int i, err;
-
-	for (i = 0; i < 16; i++) {
-		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &state);
-		if (err)
-			return err;
-
-		/* Check the value of the PPUState bits 15:14 */
-		state &= MV88E6185_G1_STS_PPU_STATE_MASK;
-		if (state == MV88E6185_G1_STS_PPU_STATE_DISABLED)
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-
-	return -ETIMEDOUT;
+	return mv88e6xxx_g1_wait_mask(chip, MV88E6XXX_G1_STS,
+				      MV88E6185_G1_STS_PPU_STATE_MASK,
+				      MV88E6185_G1_STS_PPU_STATE_DISABLED);
 }
 
 static int mv88e6185_g1_wait_ppu_polling(struct mv88e6xxx_chip *chip)
 {
-	u16 state;
-	int i, err;
-
-	for (i = 0; i < 16; ++i) {
-		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &state);
-		if (err)
-			return err;
-
-		/* Check the value of the PPUState bits 15:14 */
-		state &= MV88E6185_G1_STS_PPU_STATE_MASK;
-		if (state == MV88E6185_G1_STS_PPU_STATE_POLLING)
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-
-	return -ETIMEDOUT;
+	return mv88e6xxx_g1_wait_mask(chip, MV88E6XXX_G1_STS,
+				      MV88E6185_G1_STS_PPU_STATE_MASK,
+				      MV88E6185_G1_STS_PPU_STATE_POLLING);
 }
 
 static int mv88e6352_g1_wait_ppu_polling(struct mv88e6xxx_chip *chip)
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index d444266f7d78..48869d7984f4 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -250,6 +250,8 @@
 int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
 int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
 int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+int mv88e6xxx_g1_wait_mask(struct mv88e6xxx_chip *chip, int reg,
+			   u16 mask, u16 val);
 
 int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 4/7] net: dsa: mv88e6xxx: wait for AVB Busy bit
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

The AVB is not an indirect table using an Update bit, but a unit using
a Busy bit. This means that we must ensure that this bit is cleared
before setting it and wait until it gets cleared again after writing
an operation. Reflect that.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/global2_avb.c | 29 +++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c
index 116b8cf5a6e3..657783e043ff 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_avb.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c
@@ -11,6 +11,8 @@
  *	Brandon Streiff <brandon.streiff@ni.com>
  */
 
+#include <linux/bitfield.h>
+
 #include "global2.h"
 
 /* Offset 0x16: AVB Command Register
@@ -27,17 +29,33 @@
 /* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
  * The hardware supports snapshotting up to four contiguous registers.
  */
+static int mv88e6xxx_g2_avb_wait(struct mv88e6xxx_chip *chip)
+{
+	int bit = __bf_shf(MV88E6352_G2_AVB_CMD_BUSY);
+
+	return mv88e6xxx_g2_wait_bit(chip, MV88E6352_G2_AVB_CMD, bit, 0);
+}
+
 static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
 				 u16 *data, int len)
 {
 	int err;
 	int i;
 
+	err = mv88e6xxx_g2_avb_wait(chip);
+	if (err)
+		return err;
+
 	/* Hardware can only snapshot four words. */
 	if (len > 4)
 		return -E2BIG;
 
-	err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
+	err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_CMD,
+				 MV88E6352_G2_AVB_CMD_BUSY | readop);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_avb_wait(chip);
 	if (err)
 		return err;
 
@@ -57,11 +75,18 @@ static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
 {
 	int err;
 
+	err = mv88e6xxx_g2_avb_wait(chip);
+	if (err)
+		return err;
+
 	err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
 	if (err)
 		return err;
 
-	return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
+	err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_CMD,
+				 MV88E6352_G2_AVB_CMD_BUSY | writeop);
+
+	return mv88e6xxx_g2_avb_wait(chip);
 }
 
 static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 5/7] net: dsa: mv88e6xxx: remove wait and update routines
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

Now that we have proper Wait Bit and Wait Mask routines, remove the
unused mv88e6xxx_wait routine and its Global 1 and Global 2 variants.

The indirect tables such as the Device Mapping Table or Priority
Override Table make use of an Update bit to distinguish reading (0)
from writing (1) operations. After a write operation occurs, the bit
self clears right away so there's no need to wait on it. Thus keep
things simple and remove the mv88e6xxx_update helper as well.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c            | 22 -----------
 drivers/net/dsa/mv88e6xxx/chip.h            |  3 --
 drivers/net/dsa/mv88e6xxx/global1.c         |  5 ---
 drivers/net/dsa/mv88e6xxx/global1.h         |  1 -
 drivers/net/dsa/mv88e6xxx/global2.c         | 43 ++++++++++-----------
 drivers/net/dsa/mv88e6xxx/global2.h         | 12 ------
 drivers/net/dsa/mv88e6xxx/global2_scratch.c |  3 +-
 7 files changed, 22 insertions(+), 67 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index b7e0513c675a..818a83eb2dcb 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -394,28 +394,6 @@ static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
 	mv88e6xxx_reg_unlock(chip);
 }
 
-int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
-{
-	return mv88e6xxx_wait_mask(chip, addr, reg, mask, 0x0000);
-}
-
-/* Indirect write to single pointer-data register with an Update bit */
-int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update)
-{
-	u16 val;
-	int err;
-
-	/* Wait until the previous operation is completed */
-	err = mv88e6xxx_wait(chip, addr, reg, BIT(15));
-	if (err)
-		return err;
-
-	/* Set the Update bit to trigger a write operation */
-	val = BIT(15) | update;
-
-	return mv88e6xxx_write(chip, addr, reg, val);
-}
-
 int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link,
 			     int speed, int duplex, int pause,
 			     phy_interface_t mode)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 9cdb6bfead25..a406be2f5652 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -590,11 +590,8 @@ int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
 			u16 mask, u16 val);
-int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
-		     u16 update);
 int mv88e6xxx_wait_bit(struct mv88e6xxx_chip *chip, int addr, int reg,
 		       int bit, int val);
-int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask);
 int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link,
 			     int speed, int duplex, int pause,
 			     phy_interface_t mode);
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 5ace6490695b..25ec4c0ac589 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -27,11 +27,6 @@ int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 	return mv88e6xxx_write(chip, addr, reg, val);
 }
 
-int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
-{
-	return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask);
-}
-
 int mv88e6xxx_g1_wait_bit(struct mv88e6xxx_chip *chip, int reg, int
 			  bit, int val)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index ffa11749fecb..78b9ae22d18c 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -249,7 +249,6 @@
 
 int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
 int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
-int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
 int mv88e6xxx_g1_wait_bit(struct mv88e6xxx_chip *chip, int reg, int
 			  bit, int val);
 int mv88e6xxx_g1_wait_mask(struct mv88e6xxx_chip *chip, int reg,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index b5acf45f30cb..bdbb72fc20ed 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -26,16 +26,6 @@ int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 	return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
-int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
-{
-	return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
-}
-
-int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
-{
-	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
-}
-
 int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, int reg, int
 			  bit, int val)
 {
@@ -130,7 +120,8 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
 	 * but bit 4 is reserved on older chips, so it is safe to use.
 	 */
 
-	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_DEVICE_MAPPING, val);
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_DEVICE_MAPPING,
+				  MV88E6XXX_G2_DEVICE_MAPPING_UPDATE | val);
 }
 
 /* Offset 0x07: Trunk Mask Table register */
@@ -143,7 +134,8 @@ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
 	if (hash)
 		val |= MV88E6XXX_G2_TRUNK_MASK_HASH;
 
-	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_TRUNK_MASK, val);
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_TRUNK_MASK,
+				  MV88E6XXX_G2_TRUNK_MASK_UPDATE | val);
 }
 
 /* Offset 0x08: Trunk Mapping Table register */
@@ -154,7 +146,8 @@ static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
 	const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1;
 	u16 val = (id << 11) | (map & port_mask);
 
-	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_TRUNK_MAPPING, val);
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_TRUNK_MAPPING,
+				  MV88E6XXX_G2_TRUNK_MAPPING_UPDATE | val);
 }
 
 int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip)
@@ -270,7 +263,8 @@ static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
 {
 	u16 val = (pointer << 8) | data;
 
-	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SWITCH_MAC, val);
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MAC,
+				  MV88E6XXX_G2_SWITCH_MAC_UPDATE | val);
 }
 
 int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
@@ -293,7 +287,8 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
 {
 	u16 val = (pointer << 8) | (data & 0x7);
 
-	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_PRIO_OVERRIDE, val);
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_PRIO_OVERRIDE,
+				  MV88E6XXX_G2_PRIO_OVERRIDE_UPDATE | val);
 }
 
 int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
@@ -857,12 +852,13 @@ const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops = {
 
 static int mv88e6390_watchdog_setup(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g2_update(chip, MV88E6390_G2_WDOG_CTL,
-				   MV88E6390_G2_WDOG_CTL_PTR_INT_ENABLE |
-				   MV88E6390_G2_WDOG_CTL_CUT_THROUGH |
-				   MV88E6390_G2_WDOG_CTL_QUEUE_CONTROLLER |
-				   MV88E6390_G2_WDOG_CTL_EGRESS |
-				   MV88E6390_G2_WDOG_CTL_FORCE_IRQ);
+	return mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL,
+				  MV88E6390_G2_WDOG_CTL_UPDATE |
+				  MV88E6390_G2_WDOG_CTL_PTR_INT_ENABLE |
+				  MV88E6390_G2_WDOG_CTL_CUT_THROUGH |
+				  MV88E6390_G2_WDOG_CTL_QUEUE_CONTROLLER |
+				  MV88E6390_G2_WDOG_CTL_EGRESS |
+				  MV88E6390_G2_WDOG_CTL_FORCE_IRQ);
 }
 
 static int mv88e6390_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
@@ -895,8 +891,9 @@ static int mv88e6390_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
 
 static void mv88e6390_watchdog_free(struct mv88e6xxx_chip *chip)
 {
-	mv88e6xxx_g2_update(chip, MV88E6390_G2_WDOG_CTL,
-			    MV88E6390_G2_WDOG_CTL_PTR_INT_ENABLE);
+	mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL,
+			   MV88E6390_G2_WDOG_CTL_UPDATE |
+			   MV88E6390_G2_WDOG_CTL_PTR_INT_ENABLE);
 }
 
 const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index f5574e463a92..42da4bca73e8 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -295,8 +295,6 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 
 int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
 int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
-int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
-int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
 int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, int reg,
 			  int bit, int val);
 
@@ -378,16 +376,6 @@ static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 v
 	return -EOPNOTSUPP;
 }
 
-static inline int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip,
 					int reg, int bit, int val)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
index baddecadd8be..33b7b9570d29 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -37,7 +37,8 @@ static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
 {
 	u16 value = (reg << 8) | data;
 
-	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
+				  MV88E6XXX_G2_SCRATCH_MISC_UPDATE | value);
 }
 
 /**
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 6/7] net: dsa: mv88e6xxx: fix SMI bit checking
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

The current mv88e6xxx_smi_direct_wait function is only used to check
the 16th bit of the (16-bit) SMI Command register. But the bit shift
operation is not enough if we eventually use this function to check
other bits, thus replace it with a mask.

Fixes: e7ba0fad9c53 ("net: dsa: mv88e6xxx: refine SMI support")
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/smi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/smi.c b/drivers/net/dsa/mv88e6xxx/smi.c
index 5fc78a063843..18e87a5a20a3 100644
--- a/drivers/net/dsa/mv88e6xxx/smi.c
+++ b/drivers/net/dsa/mv88e6xxx/smi.c
@@ -64,7 +64,7 @@ static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip,
 		if (err)
 			return err;
 
-		if (!!(data >> bit) == !!val)
+		if (!!(data & BIT(bit)) == !!val)
 			return 0;
 	}
 
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 7/7] net: dsa: mv88e6xxx: add delay in direct SMI wait
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

The mv88e6xxx_smi_direct_wait routine is used to wait on indirect
registers access. It is of no exception and must delay between read
attempts, like other wait routines.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/smi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/smi.c b/drivers/net/dsa/mv88e6xxx/smi.c
index 18e87a5a20a3..282fe08db050 100644
--- a/drivers/net/dsa/mv88e6xxx/smi.c
+++ b/drivers/net/dsa/mv88e6xxx/smi.c
@@ -66,6 +66,8 @@ static int mv88e6xxx_smi_direct_wait(struct mv88e6xxx_chip *chip,
 
 		if (!!(data & BIT(bit)) == !!val)
 			return 0;
+
+		usleep_range(1000, 2000);
 	}
 
 	return -ETIMEDOUT;
-- 
2.22.0


^ permalink raw reply related

* [PATCH net-next 3/7] net: dsa: mv88e6xxx: introduce wait bit routine
From: Vivien Didelot @ 2019-08-09 22:47 UTC (permalink / raw)
  To: netdev; +Cc: davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190809224759.5743-1-vivien.didelot@gmail.com>

Many portions of the driver need to wait until a given bit is set
or cleared. Some busses even have a specific implementation for this
operation. In preparation for such variant, implement a generic Wait
Bit routine that can be used by the driver core functions.

This allows us to get rid of the custom implementations we may find
in the driver. Note that for the EEPROM bits, BUSY and RUNNING bits
are independent, thus it is more efficient to wait independently for
each bit instead of waiting for their mask.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c        | 14 ++++++-
 drivers/net/dsa/mv88e6xxx/chip.h        |  2 +
 drivers/net/dsa/mv88e6xxx/global1.c     | 49 +++++++------------------
 drivers/net/dsa/mv88e6xxx/global1.h     |  2 +
 drivers/net/dsa/mv88e6xxx/global1_atu.c |  7 +++-
 drivers/net/dsa/mv88e6xxx/global1_vtu.c |  6 ++-
 drivers/net/dsa/mv88e6xxx/global2.c     | 35 +++++++++++++-----
 drivers/net/dsa/mv88e6xxx/global2.h     |  8 ++++
 8 files changed, 73 insertions(+), 50 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index bd61d0d3a245..b7e0513c675a 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -10,6 +10,7 @@
  *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -103,6 +104,13 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
 	return -ETIMEDOUT;
 }
 
+int mv88e6xxx_wait_bit(struct mv88e6xxx_chip *chip, int addr, int reg,
+		       int bit, int val)
+{
+	return mv88e6xxx_wait_mask(chip, addr, reg, BIT(bit),
+				   val ? BIT(bit) : 0x0000);
+}
+
 struct mii_bus *mv88e6xxx_default_mdio_bus(struct mv88e6xxx_chip *chip)
 {
 	struct mv88e6xxx_mdio_bus *mdio_bus;
@@ -2360,8 +2368,10 @@ static int mv88e6390_hidden_write(struct mv88e6xxx_chip *chip, int port,
 
 static int mv88e6390_hidden_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_wait(chip, PORT_RESERVED_1A_CTRL_PORT,
-			      PORT_RESERVED_1A, PORT_RESERVED_1A_BUSY);
+	int bit = __bf_shf(PORT_RESERVED_1A_BUSY);
+
+	return mv88e6xxx_wait_bit(chip, PORT_RESERVED_1A_CTRL_PORT,
+				  PORT_RESERVED_1A, bit, 0);
 }
 
 
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 95b44532a282..9cdb6bfead25 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -592,6 +592,8 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
 			u16 mask, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
 		     u16 update);
+int mv88e6xxx_wait_bit(struct mv88e6xxx_chip *chip, int addr, int reg,
+		       int bit, int val);
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask);
 int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link,
 			     int speed, int duplex, int pause,
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 482f9f8465af..5ace6490695b 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -32,6 +32,13 @@ int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 	return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask);
 }
 
+int mv88e6xxx_g1_wait_bit(struct mv88e6xxx_chip *chip, int reg, int
+			  bit, int val)
+{
+	return mv88e6xxx_wait_bit(chip, chip->info->global1_addr, reg,
+				  bit, val);
+}
+
 int mv88e6xxx_g1_wait_mask(struct mv88e6xxx_chip *chip, int reg,
 			   u16 mask, u16 val)
 {
@@ -57,49 +64,20 @@ static int mv88e6185_g1_wait_ppu_polling(struct mv88e6xxx_chip *chip)
 
 static int mv88e6352_g1_wait_ppu_polling(struct mv88e6xxx_chip *chip)
 {
-	u16 state;
-	int i, err;
+	int bit = __bf_shf(MV88E6352_G1_STS_PPU_STATE);
 
-	for (i = 0; i < 16; ++i) {
-		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &state);
-		if (err)
-			return err;
-
-		/* Check the value of the PPUState (or InitState) bit 15 */
-		if (state & MV88E6352_G1_STS_PPU_STATE)
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-
-	return -ETIMEDOUT;
+	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1);
 }
 
 static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip)
 {
-	const unsigned long timeout = jiffies + 1 * HZ;
-	u16 val;
-	int err;
+	int bit = __bf_shf(MV88E6XXX_G1_STS_INIT_READY);
 
 	/* Wait up to 1 second for the switch to be ready. The InitReady bit 11
 	 * is set to a one when all units inside the device (ATU, VTU, etc.)
 	 * have finished their initialization and are ready to accept frames.
 	 */
-	while (time_before(jiffies, timeout)) {
-		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val);
-		if (err)
-			return err;
-
-		if (val & MV88E6XXX_G1_STS_INIT_READY)
-			break;
-
-		usleep_range(1000, 2000);
-	}
-
-	if (time_after(jiffies, timeout))
-		return -ETIMEDOUT;
-
-	return 0;
+	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1);
 }
 
 /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1
@@ -455,8 +433,9 @@ int mv88e6xxx_g1_set_device_number(struct mv88e6xxx_chip *chip, int index)
 
 static int mv88e6xxx_g1_stats_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g1_wait(chip, MV88E6XXX_G1_STATS_OP,
-				 MV88E6XXX_G1_STATS_OP_BUSY);
+	int bit = __bf_shf(MV88E6XXX_G1_STATS_OP_BUSY);
+
+	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STATS_OP, bit, 0);
 }
 
 int mv88e6095_g1_stats_set_histogram(struct mv88e6xxx_chip *chip)
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 48869d7984f4..ffa11749fecb 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -250,6 +250,8 @@
 int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
 int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
 int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+int mv88e6xxx_g1_wait_bit(struct mv88e6xxx_chip *chip, int reg, int
+			  bit, int val);
 int mv88e6xxx_g1_wait_mask(struct mv88e6xxx_chip *chip, int reg,
 			   u16 mask, u16 val);
 
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
index 1cf388e9bd94..18b86515b6bc 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
@@ -5,6 +5,8 @@
  * Copyright (c) 2008 Marvell Semiconductor
  * Copyright (c) 2017 Savoir-faire Linux, Inc.
  */
+
+#include <linux/bitfield.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 
@@ -75,8 +77,9 @@ int mv88e6xxx_g1_atu_set_age_time(struct mv88e6xxx_chip *chip,
 
 static int mv88e6xxx_g1_atu_op_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g1_wait(chip, MV88E6XXX_G1_ATU_OP,
-				 MV88E6XXX_G1_ATU_OP_BUSY);
+	int bit = __bf_shf(MV88E6XXX_G1_ATU_OP_BUSY);
+
+	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_ATU_OP, bit, 0);
 }
 
 static int mv88e6xxx_g1_atu_op(struct mv88e6xxx_chip *chip, u16 fid, u16 op)
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 6cac997360e8..33056a609e96 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -7,6 +7,7 @@
  * Copyright (c) 2017 Savoir-faire Linux, Inc.
  */
 
+#include <linux/bitfield.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 
@@ -67,8 +68,9 @@ static int mv88e6xxx_g1_vtu_sid_write(struct mv88e6xxx_chip *chip,
 
 static int mv88e6xxx_g1_vtu_op_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g1_wait(chip, MV88E6XXX_G1_VTU_OP,
-				 MV88E6XXX_G1_VTU_OP_BUSY);
+	int bit = __bf_shf(MV88E6XXX_G1_VTU_OP_BUSY);
+
+	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_VTU_OP, bit, 0);
 }
 
 static int mv88e6xxx_g1_vtu_op(struct mv88e6xxx_chip *chip, u16 op)
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 2305b94b3051..b5acf45f30cb 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -36,6 +36,13 @@ int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
 }
 
+int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, int reg, int
+			  bit, int val)
+{
+	return mv88e6xxx_wait_bit(chip, chip->info->global2_addr, reg,
+				  bit, val);
+}
+
 /* Offset 0x00: Interrupt Source Register */
 
 static int mv88e6xxx_g2_int_source(struct mv88e6xxx_chip *chip, u16 *src)
@@ -178,8 +185,9 @@ int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip)
 
 static int mv88e6xxx_g2_irl_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g2_wait(chip, MV88E6XXX_G2_IRL_CMD,
-				 MV88E6XXX_G2_IRL_CMD_BUSY);
+	int bit = __bf_shf(MV88E6XXX_G2_IRL_CMD_BUSY);
+
+	return mv88e6xxx_g2_wait_bit(chip, MV88E6XXX_G2_IRL_CMD, bit, 0);
 }
 
 static int mv88e6xxx_g2_irl_op(struct mv88e6xxx_chip *chip, u16 op, int port,
@@ -214,8 +222,9 @@ int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port)
 
 static int mv88e6xxx_g2_pvt_op_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g2_wait(chip, MV88E6XXX_G2_PVT_ADDR,
-				 MV88E6XXX_G2_PVT_ADDR_BUSY);
+	int bit = __bf_shf(MV88E6XXX_G2_PVT_ADDR_BUSY);
+
+	return mv88e6xxx_g2_wait_bit(chip, MV88E6XXX_G2_PVT_ADDR, bit, 0);
 }
 
 static int mv88e6xxx_g2_pvt_op(struct mv88e6xxx_chip *chip, int src_dev,
@@ -308,9 +317,16 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 
 static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g2_wait(chip, MV88E6XXX_G2_EEPROM_CMD,
-				 MV88E6XXX_G2_EEPROM_CMD_BUSY |
-				 MV88E6XXX_G2_EEPROM_CMD_RUNNING);
+	int bit = __bf_shf(MV88E6XXX_G2_EEPROM_CMD_BUSY);
+	int err;
+
+	err = mv88e6xxx_g2_wait_bit(chip, MV88E6XXX_G2_EEPROM_CMD, bit, 0);
+	if (err)
+		return err;
+
+	bit = __bf_shf(MV88E6XXX_G2_EEPROM_CMD_RUNNING);
+
+	return mv88e6xxx_g2_wait_bit(chip, MV88E6XXX_G2_EEPROM_CMD, bit, 0);
 }
 
 static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
@@ -572,8 +588,9 @@ int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
 
 static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
 {
-	return mv88e6xxx_g2_wait(chip, MV88E6XXX_G2_SMI_PHY_CMD,
-				 MV88E6XXX_G2_SMI_PHY_CMD_BUSY);
+	int bit = __bf_shf(MV88E6XXX_G2_SMI_PHY_CMD_BUSY);
+
+	return mv88e6xxx_g2_wait_bit(chip, MV88E6XXX_G2_SMI_PHY_CMD, bit, 0);
 }
 
 static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index a664fc25f132..f5574e463a92 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -297,6 +297,8 @@ int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
 int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
 int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
 int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, int reg,
+			  int bit, int val);
 
 int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
@@ -386,6 +388,12 @@ static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 ma
 	return -EOPNOTSUPP;
 }
 
+static inline int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip,
+					int reg, int bit, int val)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
 					    int port)
 {
-- 
2.22.0


^ permalink raw reply related

* Re: [PATCH net-next v2 4/4] net: phy: realtek: add support for the 2.5Gbps PHY in RTL8125
From: Andrew Lunn @ 2019-08-09 22:54 UTC (permalink / raw)
  To: Heiner Kallweit; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <c8e2b3e7-1d0b-eba3-6a36-8808641f3031@gmail.com>

On Fri, Aug 09, 2019 at 09:31:32PM +0200, Heiner Kallweit wrote:
> On 09.08.2019 21:18, Andrew Lunn wrote:
> >> +	}, {
> >> +		PHY_ID_MATCH_EXACT(0x001cca50),
> > 
> > Hi Heiner
> > 
> Hi Andrew,
> 
> > With the Marvell driver, i looked at the range of IDs the PHYs where
> > using. The switch, being MDIO based, also has ID values. The PHY range
> > and the switch range are well separated, and it seems unlikely Marvell
> > would reuse a switch ID in a PHY which was not compatible with the
> > PHY.
> > 
> > Could you explain why you picked this value for the PHY? What makes
> > you think it is not in use by another Realtek PHY? 
> > 
> 0x001cc800 being the Realtek OUI, I've seen only PHY's with ID
> 0x001cc8XX and 0x001cc9XX so far. Realtek doesn't seem to have such
> a clear separation between PHY and switch PHY ID's.
> 
> Example:
> 0x001cc961 (RTL8366, switch)
> 0x001cc916 (RTL8211F, PHY)
> 
> Last digit of the model is used as model number.
> I did the same and used 5 as model number (from RTL8125).
> Revision number is set to 0 because RTL8125 is brand-new.
> 
> I chose a PHY ID in 0x001ccaXX range because it isn't used by
> Realtek AFAIK.

Hi Heiner

O.K.

This should also be something which is internal. If Realtek do happen
to use the ID, we can change both the MAC and the PHY to an new ID to
avoid the collision.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* [PATCH net-next] selftests: Fix detection of nettest command in fcnal-test
From: David Ahern @ 2019-08-09 23:13 UTC (permalink / raw)
  To: davem; +Cc: netdev, David Ahern

From: David Ahern <dsahern@gmail.com>

Most of the tests run by fcnal-test.sh relies on the nettest command.
Rather than trying to cover all of the individual tests, check for the
binary only at the beginning.

Also removes the need for log_error which is undefined.

Fixes: 6f9d5cacfe07 ("selftests: Setup for functional tests for fib and socket lookups")
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/testing/selftests/net/fcnal-test.sh | 38 +++++--------------------------
 1 file changed, 6 insertions(+), 32 deletions(-)

diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index bd6b564382ec..9fd3a0b97f0d 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -998,13 +998,6 @@ ipv4_tcp_vrf()
 ipv4_tcp()
 {
 	log_section "IPv4/TCP"
-
-	which nettest >/dev/null
-	if [ $? -ne 0 ]; then
-		log_error "nettest not found; skipping tests"
-		return
-	fi
-
 	log_subsection "No VRF"
 	setup
 
@@ -1375,12 +1368,6 @@ ipv4_udp_vrf()
 
 ipv4_udp()
 {
-	which nettest >/dev/null
-	if [ $? -ne 0 ]; then
-		log_error "nettest not found; skipping tests"
-		return
-	fi
-
 	log_section "IPv4/UDP"
 	log_subsection "No VRF"
 
@@ -2314,13 +2301,6 @@ ipv6_tcp_vrf()
 ipv6_tcp()
 {
 	log_section "IPv6/TCP"
-
-	which nettest >/dev/null
-	if [ $? -ne 0 ]; then
-		log_error "nettest not found; skipping tests"
-		return
-	fi
-
 	log_subsection "No VRF"
 	setup
 
@@ -3156,12 +3136,6 @@ netfilter_icmp()
 
 ipv4_netfilter()
 {
-	which nettest >/dev/null
-	if [ $? -ne 0 ]; then
-		log_error "nettest not found; skipping tests"
-		return
-	fi
-
 	log_section "IPv4 Netfilter"
 	log_subsection "TCP reset"
 
@@ -3219,12 +3193,6 @@ netfilter_icmp6()
 
 ipv6_netfilter()
 {
-	which nettest >/dev/null
-	if [ $? -ne 0 ]; then
-		log_error "nettest not found; skipping tests"
-		return
-	fi
-
 	log_section "IPv6 Netfilter"
 	log_subsection "TCP reset"
 
@@ -3422,6 +3390,12 @@ elif [ "$TESTS" = "ipv6" ]; then
 	TESTS="$TESTS_IPV6"
 fi
 
+which nettest >/dev/null
+if [ $? -ne 0 ]; then
+	echo "'nettest' command not found; skipping tests"
+	exit 0
+fi
+
 declare -i nfail=0
 declare -i nsuccess=0
 
-- 
2.11.0


^ permalink raw reply related

* Re: [PATCH v3] tools: bpftool: fix reading from /proc/config.gz
From: Peter Wu @ 2019-08-09 23:20 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Stanislav Fomichev, Alexei Starovoitov, Daniel Borkmann, netdev,
	Stanislav Fomichev, Quentin Monnet
In-Reply-To: <20190809145726.2972fa7a@cakuba.netronome.com>

Hi all,

Thanks for the lovely feedback :)

On Fri, Aug 09, 2019 at 02:57:26PM -0700, Jakub Kicinski wrote:
> On Fri, 9 Aug 2019 14:48:31 -0700, Stanislav Fomichev wrote:
> > I'm just being nit picky :-)
> > Because changelog says we already depend on -lz, but then in the patch
> > we explicitly add it.

What I meant by that is that zlib is not a new dependency since it is
already a mandatory dependency of libelf which is currently marked as
mandatory dependency in bpftool. That is why I did not bother with
adding a feature test either since it would be redundant.

Adding an explicit dependency helps if you want to build bpftool as
static binary, or if libelf somehow drops zlib in the future.
-- 
Kind regards,
Peter Wu
https://lekensteyn.nl

^ permalink raw reply

* Re: [PATCH v5 bpf-next] BPF: helpers: New helper to obtain namespacedata from current task
From: Yonghong Song @ 2019-08-09 23:30 UTC (permalink / raw)
  To: Carlos Antonio Neira Bustos
  Cc: Y Song, netdev@vger.kernel.org, ebiederm@xmission.com,
	brouer@redhat.com, bpf, quentin.monnet@netronome.com
In-Reply-To: <20190809210332.3cxftmljxfhwotrz@dev00>



On 8/9/19 2:03 PM, Carlos Antonio Neira Bustos wrote:
> Yonghong,
> 
> I have splitted the patch in 2 :
> 
> - bpf_helper introduction :
>   
> 
>  From 40ec0781525b82d5235c45f5066a7a79dea71065 Mon Sep 17 00:00:00 2001
> From: Carlos <cneirabustos@gmail.com>
> Date: Fri, 9 Aug 2019 12:20:52 -0700
> Subject: [PATCH 1/2] [PATCH v8 bpf-next 1/2] BPF: New helper to obtain
>   namespace data  from current task

Such a submission is not what kernel developer typically do.
You can read through the following docs for more details.
https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html

Typically, I am using
    git format-patch --cover-letter --subject-prefix="PATCH bpf-next 
<version>" ...
to generate the patch set, you need edit patchset 0 with proper contents.

After patch set is properly prepared, you can use
    git send-email --to <...> --to <...> --cc <...> --cc <...> <All your 
patches>
to submit the patch.

I still prefer you to further split the patch into more than two
with my original suggestions. It might be difficult to do if you try
to attach the patches like below.
But it should become easier when you use the above
"git format-patch ..." and "git send-email ..." approach.

> 
> This helper obtains the active namespace from current and returns pid, tgid,
> device and namespace id as seen from that namespace, allowing to instrument
> a process inside a container.
> Device is read from /proc/self/ns/pid, as in the future it's possible that
> different pid_ns files may belong to different devices, according
> to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
> conference.
> Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
> scripts but this helper returns the pid as seen by the root namespace which is
> fine when a bcc script is not executed inside a container.
> When the process of interest is inside a container, pid filtering will not work
> if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
> returning the pid as it's seen by the current namespace where the script is
> executing.
> 
> This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
> used to do pid filtering even inside a container.
> 
> For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):
> 
>          u32 pid = bpf_get_current_pid_tgid() >> 32;
>          if (pid != <pid_arg_passed_in>)
>                  return 0;
> Could be modified to use bpf_get_current_pidns_info() as follows:
> 
>          struct bpf_pidns pidns;
>          bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
>          u32 pid = pidns.tgid;
>          u32 nsid = pidns.nsid;
>          if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
>                  return 0;
> 
> To find out the name PID namespace id of a process, you could use this command:
> 
> $ ps -h -o pidns -p <pid_of_interest>
> 
> Or this other command:
> 
> $ ls -Li /proc/<pid_of_interest>/ns/pid
> 
> Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
> ---
>   fs/internal.h                  |  2 --
>   fs/namei.c                     |  1 -
>   include/linux/bpf.h            |  1 +
>   include/linux/namei.h          |  4 +++
>   include/uapi/linux/bpf.h       | 31 +++++++++++++++++++-
>   kernel/bpf/core.c              |  1 +
>   kernel/bpf/helpers.c           | 64 ++++++++++++++++++++++++++++++++++++++++++
>   kernel/trace/bpf_trace.c       |  2 ++
>   tools/include/uapi/linux/bpf.h | 31 +++++++++++++++++++-
>   9 files changed, 132 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index 315fcd8d237c..6647e15dd419 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -59,8 +59,6 @@ extern int finish_clean_context(struct fs_context *fc);
>   /*
>    * namei.c
>    */
> -extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
> -			   struct path *path, struct path *root);
>   extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
>   extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
>   			   const char *, unsigned int, struct path *);
> diff --git a/fs/namei.c b/fs/namei.c
> index 209c51a5226c..a89fc72a4a10 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -19,7 +19,6 @@
>   #include <linux/export.h>
>   #include <linux/kernel.h>
>   #include <linux/slab.h>
> -#include <linux/fs.h>
>   #include <linux/namei.h>
>   #include <linux/pagemap.h>
>   #include <linux/fsnotify.h>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f9a506147c8a..e4adf5e05afd 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1050,6 +1050,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
>   extern const struct bpf_func_proto bpf_strtol_proto;
>   extern const struct bpf_func_proto bpf_strtoul_proto;
>   extern const struct bpf_func_proto bpf_tcp_sock_proto;
> +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
>   
>   /* Shared helpers among cBPF and eBPF. */
>   void bpf_user_rnd_init_once(void);
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index 9138b4471dbf..b45c8b6f7cb4 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -6,6 +6,7 @@
>   #include <linux/path.h>
>   #include <linux/fcntl.h>
>   #include <linux/errno.h>
> +#include <linux/fs.h>
>   
>   enum { MAX_NESTED_LINKS = 8 };
>   
> @@ -97,6 +98,9 @@ extern void unlock_rename(struct dentry *, struct dentry *);
>   
>   extern void nd_jump_link(struct path *path);
>   
> +extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
> +			   struct path *path, struct path *root);
> +
>   static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
>   {
>   	((char *) name)[min(len, maxlen)] = '\0';
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4393bd4b2419..db241857ec15 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2741,6 +2741,28 @@ union bpf_attr {
>    *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
>    *
>    *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
> + *
> + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> + *	Description
> + *		Copies into *pidns* pid, namespace id and tgid as seen by the
> + *		current namespace and also device from /proc/self/ns/pid.
> + *		*size_of_pidns* must be the size of *pidns*
> + *
> + *		This helper is used when pid filtering is needed inside a
> + *		container as bpf_get_current_tgid() helper returns always the
> + *		pid id as seen by the root namespace.
> + *	Return
> + *		0 on success
> + *
> + *		**-EINVAL** if *size_of_pidns* is not valid or unable to get ns, pid
> + *		or tgid of the current task.
> + *
> + *		**-ECHILD** if /proc/self/ns/pid does not exists.
> + *
> + *		**-ENOTDIR** if /proc/self/ns does not exists.
> + *
> + *		**-ENOMEM**  if allocation fails.
> + *
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -2853,7 +2875,8 @@ union bpf_attr {
>   	FN(sk_storage_get),		\
>   	FN(sk_storage_delete),		\
>   	FN(send_signal),		\
> -	FN(tcp_gen_syncookie),
> +	FN(tcp_gen_syncookie),		\
> +	FN(get_current_pidns_info),
>   
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>    * function eBPF program intends to call
> @@ -3604,4 +3627,10 @@ struct bpf_sockopt {
>   	__s32	retval;
>   };
>   
> +struct bpf_pidns_info {
> +	__u32 dev;
> +	__u32 nsid;
> +	__u32 tgid;
> +	__u32 pid;
> +};
>   #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 8191a7db2777..3159f2a0188c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2038,6 +2038,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
>   const struct bpf_func_proto bpf_get_current_comm_proto __weak;
>   const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
>   const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
>   
>   const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
>   {
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 5e28718928ca..41fbf1f28a48 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -11,6 +11,12 @@
>   #include <linux/uidgid.h>
>   #include <linux/filter.h>
>   #include <linux/ctype.h>
> +#include <linux/pid_namespace.h>
> +#include <linux/major.h>
> +#include <linux/stat.h>
> +#include <linux/namei.h>
> +#include <linux/version.h>
> +
>   
>   #include "../../lib/kstrtox.h"
>   
> @@ -312,6 +318,64 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
>   	preempt_enable();
>   }
>   
> +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
> +	 size)
> +{
> +	const char *pidns_path = "/proc/self/ns/pid";
> +	struct pid_namespace *pidns = NULL;
> +	struct filename *tmp = NULL;
> +	struct inode *inode;
> +	struct path kp;
> +	pid_t tgid = 0;
> +	pid_t pid = 0;
> +	int ret;
> +	int len;
> +
> +	if (unlikely(size != sizeof(struct bpf_pidns_info)))
> +		return -EINVAL;
> +	pidns = task_active_pid_ns(current);
> +	if (unlikely(!pidns))
> +		goto clear;
> +	pidns_info->nsid =  pidns->ns.inum;
> +	pid = task_pid_nr_ns(current, pidns);
> +	if (unlikely(!pid))
> +		goto clear;
> +	tgid = task_tgid_nr_ns(current, pidns);
> +	if (unlikely(!tgid))
> +		goto clear;
> +	pidns_info->tgid = (u32) tgid;
> +	pidns_info->pid = (u32) pid;
> +	tmp = kmem_cache_alloc(names_cachep, GFP_ATOMIC);
> +	if (unlikely(!tmp)) {
> +		memset((void *)pidns_info, 0, (size_t) size);
> +		return -ENOMEM;
> +	}
> +	len = strlen(pidns_path) + 1;
> +	memcpy((char *)tmp->name, pidns_path, len);
> +	tmp->uptr = NULL;
> +	tmp->aname = NULL;
> +	tmp->refcnt = 1;
> +	ret = filename_lookup(AT_FDCWD, tmp, 0, &kp, NULL);
> +	if (ret) {
> +		memset((void *)pidns_info, 0, (size_t) size);
> +		return ret;
> +	}
> +	inode = d_backing_inode(kp.dentry);
> +	pidns_info->dev = inode->i_sb->s_dev;
> +	return 0;
> +clear:
> +	memset((void *)pidns_info, 0, (size_t) size);
> +	return -EINVAL;
> +}
> +
> +const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
> +	.func		= bpf_get_current_pidns_info,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
> +	.arg2_type	= ARG_CONST_SIZE,
> +};
> +
>   #ifdef CONFIG_CGROUPS
>   BPF_CALL_0(bpf_get_current_cgroup_id)
>   {
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index ca1255d14576..5e1dc22765a5 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -709,6 +709,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>   #endif
>   	case BPF_FUNC_send_signal:
>   		return &bpf_send_signal_proto;
> +	case BPF_FUNC_get_current_pidns_info:
> +		return &bpf_get_current_pidns_info_proto;
>   	default:
>   		return NULL;
>   	}
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 4393bd4b2419..db241857ec15 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -2741,6 +2741,28 @@ union bpf_attr {
>    *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
>    *
>    *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
> + *
> + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> + *	Description
> + *		Copies into *pidns* pid, namespace id and tgid as seen by the
> + *		current namespace and also device from /proc/self/ns/pid.
> + *		*size_of_pidns* must be the size of *pidns*
> + *
> + *		This helper is used when pid filtering is needed inside a
> + *		container as bpf_get_current_tgid() helper returns always the
> + *		pid id as seen by the root namespace.
> + *	Return
> + *		0 on success
> + *
> + *		**-EINVAL** if *size_of_pidns* is not valid or unable to get ns, pid
> + *		or tgid of the current task.
> + *
> + *		**-ECHILD** if /proc/self/ns/pid does not exists.
> + *
> + *		**-ENOTDIR** if /proc/self/ns does not exists.
> + *
> + *		**-ENOMEM**  if allocation fails.
> + *
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -2853,7 +2875,8 @@ union bpf_attr {
>   	FN(sk_storage_get),		\
>   	FN(sk_storage_delete),		\
>   	FN(send_signal),		\
> -	FN(tcp_gen_syncookie),
> +	FN(tcp_gen_syncookie),		\
> +	FN(get_current_pidns_info),
>   
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>    * function eBPF program intends to call
> @@ -3604,4 +3627,10 @@ struct bpf_sockopt {
>   	__s32	retval;
>   };
>   
> +struct bpf_pidns_info {
> +	__u32 dev;
> +	__u32 nsid;
> +	__u32 tgid;
> +	__u32 pid;
> +};
>   #endif /* _UAPI__LINUX_BPF_H__ */
> 

^ permalink raw reply

* [PATCH 0/2] iproute2: Improve usability of `ip nexthop`
From: Donald Sharp @ 2019-08-10  0:18 UTC (permalink / raw)
  To: netdev, dsahern

First patch fixes a spacing issue and the second patch allows 
the user to filter on the specificed protocol.

Donald Sharp (2):
  ip nexthop: Add space to display properly when showing a group
  ip nexthop: Allow flush|list operations to specify a specific protocol

 ip/ipnexthop.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

-- 
2.21.0


^ permalink raw reply

* [PATCH 1/2] ip nexthop: Add space to display properly when showing a group
From: Donald Sharp @ 2019-08-10  0:18 UTC (permalink / raw)
  To: netdev, dsahern

When displaying a nexthop group made up of other nexthops, the display
line shows this when you have additional data at the end:

id 42 group 43/44/45/46/47/48/49/50/51/52/53/54/55/56/57/58/59/60/61/62/63/64/65/66/67/68/69/70/71/72/73/74proto zebra

Modify code so that it shows:

id 42 group 43/44/45/46/47/48/49/50/51/52/53/54/55/56/57/58/59/60/61/62/63/64/65/66/67/68/69/70/71/72/73/74 proto zebra

Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
---
 ip/ipnexthop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c
index 97f09e74..f35aab52 100644
--- a/ip/ipnexthop.c
+++ b/ip/ipnexthop.c
@@ -186,6 +186,7 @@ static void print_nh_group(FILE *fp, const struct rtattr *grps_attr)
 
 		close_json_object();
 	}
+	print_string(PRINT_FP, NULL, "%s", " ");
 	close_json_array(PRINT_JSON, NULL);
 }
 
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox