netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v4 0/6] Improve neigh_flush_dev performance
@ 2024-10-15 16:59 Gilad Naaman
  2024-10-15 16:59 ` [PATCH net-next v4 1/6] Add hlist_node to struct neighbour Gilad Naaman
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima


This patchsets improves the performance of neigh_flush_dev.

Currently, the only way to implement it requires traversing
all neighbours known to the kernel, across all network-namespaces.

This means that some flows are slowed down as a function of neighbour-scale,
even if the specific link they're handling has little to no neighbours.

In order to solve this, this patchset adds a netdev->neighbours list,
as well as making the original linked-list doubly-, so that it is
possible to unlink neighbours without traversing the hash-bucket to
obtain the previous neighbour.

The original use-case we encountered was mass-deletion of links (12K
VLANs) while there are 50K ARPs and 50K NDPs in the system; though the
slowdowns would also appear when the links are set down.

Changes in v4:

 - Remove usage of rcu_protected when under lock
 - Rename `list` to a more proper `hash`
 - Divide the patchset into bite-sized commits
 - Remove references to DECnet

Gilad Naaman (6):
  Add hlist_node to struct neighbour
  Define neigh_for_each
  Convert neigh_* seq_file functions to use hlist
  Convert neighbour iteration to use hlist+macro
  Remove bare neighbour::next pointer
  Create netdev->neighbour association

 .../networking/net_cachelines/net_device.rst  |   1 +
 .../ethernet/mellanox/mlxsw/spectrum_router.c |  25 +-
 include/linux/netdevice.h                     |   7 +
 include/net/neighbour.h                       |  29 +-
 include/net/neighbour_tables.h                |  12 +
 net/core/neighbour.c                          | 271 +++++++-----------
 net/mpls/af_mpls.c                            |   2 +-
 7 files changed, 159 insertions(+), 188 deletions(-)
 create mode 100644 include/net/neighbour_tables.h

-- 
2.46.0


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH net-next v4 1/6] Add hlist_node to struct neighbour
  2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
@ 2024-10-15 16:59 ` Gilad Naaman
  2024-10-15 22:49   ` Kuniyuki Iwashima
  2024-10-15 16:59 ` [PATCH net-next v4 2/6] Define neigh_for_each Gilad Naaman
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima

Add a doubly-linked node to neighbours, so that they
can be deleted without iterating the entire bucket they're in.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 include/net/neighbour.h |  2 ++
 net/core/neighbour.c    | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index a44f262a7384..5f2b7249ba02 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -136,6 +136,7 @@ struct neigh_statistics {
 
 struct neighbour {
 	struct neighbour __rcu	*next;
+	struct hlist_node	hash;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	unsigned long		confirmed;
@@ -191,6 +192,7 @@ struct pneigh_entry {
 
 struct neigh_hash_table {
 	struct neighbour __rcu	**hash_buckets;
+	struct hlist_head	*hash_heads;
 	unsigned int		hash_shift;
 	__u32			hash_rnd[NEIGH_NUM_HASH_RND];
 	struct rcu_head		rcu;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 77b819cd995b..01987368b6c5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -217,6 +217,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
 		neigh = rcu_dereference_protected(n->next,
 						  lockdep_is_held(&tbl->lock));
 		rcu_assign_pointer(*np, neigh);
+		hlist_del_rcu(&n->hash);
 		neigh_mark_dead(n);
 		retval = true;
 	}
@@ -403,6 +404,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 			rcu_assign_pointer(*np,
 				   rcu_dereference_protected(n->next,
 						lockdep_is_held(&tbl->lock)));
+			hlist_del_rcu(&n->hash);
 			write_lock(&n->lock);
 			neigh_del_timer(n);
 			neigh_mark_dead(n);
@@ -531,7 +533,9 @@ static void neigh_get_hash_rnd(u32 *x)
 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 {
 	size_t size = (1 << shift) * sizeof(struct neighbour *);
+	size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head);
 	struct neigh_hash_table *ret;
+	struct hlist_head *hash_heads;
 	struct neighbour __rcu **buckets;
 	int i;
 
@@ -540,17 +544,28 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 		return NULL;
 	if (size <= PAGE_SIZE) {
 		buckets = kzalloc(size, GFP_ATOMIC);
+		hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC);
+		if (!hash_heads)
+			kfree(buckets);
 	} else {
 		buckets = (struct neighbour __rcu **)
 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 					   get_order(size));
 		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
+
+		hash_heads = (struct hlist_head *)
+			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+					   get_order(hash_heads_size));
+		kmemleak_alloc(hash_heads, hash_heads_size, 1, GFP_ATOMIC);
+		if (!hash_heads)
+			free_pages((unsigned long)buckets, get_order(size));
 	}
-	if (!buckets) {
+	if (!buckets || !hash_heads) {
 		kfree(ret);
 		return NULL;
 	}
 	ret->hash_buckets = buckets;
+	ret->hash_heads = hash_heads;
 	ret->hash_shift = shift;
 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
@@ -564,6 +579,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 						    rcu);
 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 	struct neighbour __rcu **buckets = nht->hash_buckets;
+	size_t hash_heads_size = (1 << nht->hash_shift) * sizeof(struct hlist_head);
+	struct hlist_head *hash_heads = nht->hash_heads;
 
 	if (size <= PAGE_SIZE) {
 		kfree(buckets);
@@ -571,6 +588,13 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 		kmemleak_free(buckets);
 		free_pages((unsigned long)buckets, get_order(size));
 	}
+
+	if (hash_heads_size < PAGE_SIZE) {
+		kfree(hash_heads);
+	} else {
+		kmemleak_free(hash_heads);
+		free_pages((unsigned long)hash_heads, get_order(hash_heads_size));
+	}
 	kfree(nht);
 }
 
@@ -607,6 +631,8 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 						new_nht->hash_buckets[hash],
 						lockdep_is_held(&tbl->lock)));
 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+			hlist_del_rcu(&n->hash);
+			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
 		}
 	}
 
@@ -717,6 +743,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
 						     lockdep_is_held(&tbl->lock)));
 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
+	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
 	write_unlock_bh(&tbl->lock);
 	neigh_dbg(2, "neigh %p is created\n", n);
 	rc = n;
@@ -1002,6 +1029,7 @@ static void neigh_periodic_work(struct work_struct *work)
 				rcu_assign_pointer(*np,
 					rcu_dereference_protected(n->next,
 						lockdep_is_held(&tbl->lock)));
+				hlist_del_rcu(&n->hash);
 				neigh_mark_dead(n);
 				write_unlock(&n->lock);
 				neigh_cleanup_and_release(n);
@@ -3131,6 +3159,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 				rcu_assign_pointer(*np,
 					rcu_dereference_protected(n->next,
 						lockdep_is_held(&tbl->lock)));
+				hlist_del_rcu(&n->hash);
 				neigh_mark_dead(n);
 			} else
 				np = &n->next;
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next v4 2/6] Define neigh_for_each
  2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
  2024-10-15 16:59 ` [PATCH net-next v4 1/6] Add hlist_node to struct neighbour Gilad Naaman
@ 2024-10-15 16:59 ` Gilad Naaman
  2024-10-15 22:57   ` Kuniyuki Iwashima
  2024-10-15 16:59 ` [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima

Define neigh_for_each in neighbour.h and move old definition
to its only point of usage within the mlxsw driver.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 25 +++++++++++++++++--
 include/net/neighbour.h                       |  4 +--
 net/core/neighbour.c                          | 22 ----------------
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 800dfb64ec83..0bb46aba2502 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3006,6 +3006,27 @@ static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
 		rms->err = -ENOMEM;
 }
 
+static void mlxsw_sp_neigh_for_each(struct neigh_table *tbl,
+				    void (*cb)(struct neighbour *, void *),
+				    void *cookie)
+{
+	int chain;
+	struct neigh_hash_table *nht;
+
+	rcu_read_lock();
+	nht = rcu_dereference(tbl->nht);
+
+	read_lock_bh(&tbl->lock); /* avoid resizes */
+	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+		struct neighbour *n;
+
+		neigh_for_each(n, &nht->hash_heads[chain])
+			cb(n, cookie);
+	}
+	read_unlock_bh(&tbl->lock);
+	rcu_read_unlock();
+}
+
 static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_rif *rif)
 {
@@ -3014,12 +3035,12 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
 		.rif = rif,
 	};
 
-	neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
+	mlxsw_sp_neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
 	if (rms.err)
 		goto err_arp;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
+	mlxsw_sp_neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
 #endif
 	if (rms.err)
 		goto err_nd;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 5f2b7249ba02..2f4cb9e51364 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -278,6 +278,8 @@ static inline void *neighbour_priv(const struct neighbour *n)
 
 extern const struct nla_policy nda_policy[];
 
+#define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
+
 static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
 {
 	return *(const u32 *)n->primary_key == *(const u32 *)pkey;
@@ -391,8 +393,6 @@ static inline struct net *pneigh_net(const struct pneigh_entry *pneigh)
 }
 
 void neigh_app_ns(struct neighbour *n);
-void neigh_for_each(struct neigh_table *tbl,
-		    void (*cb)(struct neighbour *, void *), void *cookie);
 void __neigh_for_each_release(struct neigh_table *tbl,
 			      int (*cb)(struct neighbour *));
 int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 01987368b6c5..e91105a4c5ee 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3113,28 +3113,6 @@ static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	return err;
 }
 
-void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
-{
-	int chain;
-	struct neigh_hash_table *nht;
-
-	rcu_read_lock();
-	nht = rcu_dereference(tbl->nht);
-
-	read_lock_bh(&tbl->lock); /* avoid resizes */
-	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
-		struct neighbour *n;
-
-		for (n = rcu_dereference(nht->hash_buckets[chain]);
-		     n != NULL;
-		     n = rcu_dereference(n->next))
-			cb(n, cookie);
-	}
-	read_unlock_bh(&tbl->lock);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL(neigh_for_each);
-
 /* The tbl->lock must be held as a writer and BH disabled. */
 void __neigh_for_each_release(struct neigh_table *tbl,
 			      int (*cb)(struct neighbour *))
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist
  2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
  2024-10-15 16:59 ` [PATCH net-next v4 1/6] Add hlist_node to struct neighbour Gilad Naaman
  2024-10-15 16:59 ` [PATCH net-next v4 2/6] Define neigh_for_each Gilad Naaman
@ 2024-10-15 16:59 ` Gilad Naaman
  2024-10-15 23:25   ` Kuniyuki Iwashima
  2024-10-15 16:59 ` [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima

Convert seq_file-related neighbour functionality to use neighbour::hash
and the related for_each macro.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 include/net/neighbour.h |  4 ++++
 net/core/neighbour.c    | 26 ++++++++++----------------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2f4cb9e51364..7dc0d4d6a4a8 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -279,6 +279,10 @@ static inline void *neighbour_priv(const struct neighbour *n)
 extern const struct nla_policy nda_policy[];
 
 #define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
+#define neigh_hlist_entry(n) hlist_entry_safe(n, struct neighbour, hash)
+#define neigh_first_rcu(bucket) \
+	neigh_hlist_entry(rcu_dereference(hlist_first_rcu(bucket)))
+
 
 static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
 {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e91105a4c5ee..4bdf7649ca57 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3207,25 +3207,21 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
 
 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
-		n = rcu_dereference(nht->hash_buckets[bucket]);
-
-		while (n) {
+		neigh_for_each(n, &nht->hash_heads[bucket]) {
 			if (!net_eq(dev_net(n->dev), net))
-				goto next;
+				continue;
 			if (state->neigh_sub_iter) {
 				loff_t fakep = 0;
 				void *v;
 
 				v = state->neigh_sub_iter(state, n, &fakep);
 				if (!v)
-					goto next;
+					continue;
 			}
 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
 				break;
 			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
 				break;
-next:
-			n = rcu_dereference(n->next);
 		}
 
 		if (n)
@@ -3249,34 +3245,32 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
 		if (v)
 			return n;
 	}
-	n = rcu_dereference(n->next);
 
 	while (1) {
-		while (n) {
+		hlist_for_each_entry_continue_rcu(n, hash) {
 			if (!net_eq(dev_net(n->dev), net))
-				goto next;
+				continue;
 			if (state->neigh_sub_iter) {
 				void *v = state->neigh_sub_iter(state, n, pos);
 				if (v)
 					return n;
-				goto next;
+				continue;
 			}
 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
 				break;
 
 			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
 				break;
-next:
-			n = rcu_dereference(n->next);
 		}
 
 		if (n)
 			break;
 
-		if (++state->bucket >= (1 << nht->hash_shift))
-			break;
+		while (!n && ++state->bucket < (1 << nht->hash_shift))
+			n = neigh_first_rcu(&nht->hash_heads[state->bucket]);
 
-		n = rcu_dereference(nht->hash_buckets[state->bucket]);
+		if (!n)
+			break;
 	}
 
 	if (n && pos)
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro
  2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
                   ` (2 preceding siblings ...)
  2024-10-15 16:59 ` [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
@ 2024-10-15 16:59 ` Gilad Naaman
  2024-10-15 23:38   ` Kuniyuki Iwashima
  2024-10-15 16:59 ` [PATCH net-next v4 5/6] Remove bare neighbour::next pointer Gilad Naaman
  2024-10-15 16:59 ` [PATCH net-next v4 6/6] Create netdev->neighbour association Gilad Naaman
  5 siblings, 1 reply; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima

Remove all usage of the bare neighbour::next pointer,
replacing them with neighbour::hash and its for_each macro.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 include/net/neighbour.h |  7 +++----
 net/core/neighbour.c    | 29 +++++++++++------------------
 2 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 7dc0d4d6a4a8..c0c35a15d2ad 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -279,6 +279,8 @@ static inline void *neighbour_priv(const struct neighbour *n)
 extern const struct nla_policy nda_policy[];
 
 #define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
+#define neigh_for_each_safe(pos, tmp, head) hlist_for_each_entry_safe(pos, tmp, head, hash)
+
 #define neigh_hlist_entry(n) hlist_entry_safe(n, struct neighbour, hash)
 #define neigh_first_rcu(bucket) \
 	neigh_hlist_entry(rcu_dereference(hlist_first_rcu(bucket)))
@@ -312,12 +314,9 @@ static inline struct neighbour *___neigh_lookup_noref(
 	u32 hash_val;
 
 	hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
-	for (n = rcu_dereference(nht->hash_buckets[hash_val]);
-	     n != NULL;
-	     n = rcu_dereference(n->next)) {
+	neigh_for_each(n, &nht->hash_heads[hash_val])
 		if (n->dev == dev && key_eq(n, pkey))
 			return n;
-	}
 
 	return NULL;
 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4bdf7649ca57..cca524a55c97 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -391,8 +391,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 		struct neighbour *n;
 		struct neighbour __rcu **np = &nht->hash_buckets[i];
 
-		while ((n = rcu_dereference_protected(*np,
-					lockdep_is_held(&tbl->lock))) != NULL) {
+		neigh_for_each(n, &nht->hash_heads[i]) {
 			if (dev && n->dev != dev) {
 				np = &n->next;
 				continue;
@@ -427,6 +426,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 					n->nud_state = NUD_NONE;
 				neigh_dbg(2, "neigh %p is stray\n", n);
 			}
+			np = &n->next;
 			write_unlock(&n->lock);
 			neigh_cleanup_and_release(n);
 		}
@@ -614,11 +614,9 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 
 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
 		struct neighbour *n, *next;
+		struct hlist_node *tmp;
 
-		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
-						   lockdep_is_held(&tbl->lock));
-		     n != NULL;
-		     n = next) {
+		neigh_for_each_safe(n, tmp, &old_nht->hash_heads[i]) {
 			hash = tbl->hash(n->primary_key, n->dev,
 					 new_nht->hash_rnd);
 
@@ -719,11 +717,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
 		goto out_tbl_unlock;
 	}
 
-	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
-					    lockdep_is_held(&tbl->lock));
-	     n1 != NULL;
-	     n1 = rcu_dereference_protected(n1->next,
-			lockdep_is_held(&tbl->lock))) {
+	neigh_for_each(n1, &nht->hash_heads[hash_val]) {
 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
 			if (want_ref)
 				neigh_hold(n1);
@@ -976,6 +970,7 @@ static void neigh_periodic_work(struct work_struct *work)
 {
 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 	struct neighbour *n;
+	struct hlist_node *tmp;
 	struct neighbour __rcu **np;
 	unsigned int i;
 	struct neigh_hash_table *nht;
@@ -1005,8 +1000,7 @@ static void neigh_periodic_work(struct work_struct *work)
 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
 		np = &nht->hash_buckets[i];
 
-		while ((n = rcu_dereference_protected(*np,
-				lockdep_is_held(&tbl->lock))) != NULL) {
+		neigh_for_each_safe(n, tmp, &nht->hash_heads[i]) {
 			unsigned int state;
 
 			write_lock(&n->lock);
@@ -2756,9 +2750,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
 		if (h > s_h)
 			s_idx = 0;
-		for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
-		     n != NULL;
-		     n = rcu_dereference(n->next)) {
+		idx = 0;
+		neigh_for_each(n, &nht->hash_heads[h]) {
 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
 				goto next;
 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -3124,11 +3117,11 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 					lockdep_is_held(&tbl->lock));
 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
 		struct neighbour *n;
+		struct hlist_node *tmp;
 		struct neighbour __rcu **np;
 
 		np = &nht->hash_buckets[chain];
-		while ((n = rcu_dereference_protected(*np,
-					lockdep_is_held(&tbl->lock))) != NULL) {
+		neigh_for_each_safe(n, tmp, &nht->hash_heads[chain]) {
 			int release;
 
 			write_lock(&n->lock);
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next v4 5/6] Remove bare neighbour::next pointer
  2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
                   ` (3 preceding siblings ...)
  2024-10-15 16:59 ` [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
@ 2024-10-15 16:59 ` Gilad Naaman
  2024-10-15 16:59 ` [PATCH net-next v4 6/6] Create netdev->neighbour association Gilad Naaman
  5 siblings, 0 replies; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima

Remove the now-unused neighbour::next pointer, leaving struct neighbour
solely with the hlist_node implementation.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 include/net/neighbour.h |   2 -
 net/core/neighbour.c    | 128 ++++++++--------------------------------
 2 files changed, 24 insertions(+), 106 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index c0c35a15d2ad..21c0c20a0ed5 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -135,7 +135,6 @@ struct neigh_statistics {
 #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
 
 struct neighbour {
-	struct neighbour __rcu	*next;
 	struct hlist_node	hash;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
@@ -191,7 +190,6 @@ struct pneigh_entry {
 #define NEIGH_NUM_HASH_RND	4
 
 struct neigh_hash_table {
-	struct neighbour __rcu	**hash_buckets;
 	struct hlist_head	*hash_heads;
 	unsigned int		hash_shift;
 	__u32			hash_rnd[NEIGH_NUM_HASH_RND];
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cca524a55c97..61b5f0d4896a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -205,49 +205,24 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
 	}
 }
 
-static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
-		      struct neigh_table *tbl)
-{
-	bool retval = false;
-
-	write_lock(&n->lock);
-	if (refcount_read(&n->refcnt) == 1) {
-		struct neighbour *neigh;
-
-		neigh = rcu_dereference_protected(n->next,
-						  lockdep_is_held(&tbl->lock));
-		rcu_assign_pointer(*np, neigh);
-		hlist_del_rcu(&n->hash);
-		neigh_mark_dead(n);
-		retval = true;
-	}
-	write_unlock(&n->lock);
-	if (retval)
-		neigh_cleanup_and_release(n);
-	return retval;
-}
-
 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 {
 	struct neigh_hash_table *nht;
-	void *pkey = ndel->primary_key;
-	u32 hash_val;
-	struct neighbour *n;
-	struct neighbour __rcu **np;
+	bool retval = false;
 
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
-	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
-	hash_val = hash_val >> (32 - nht->hash_shift);
 
-	np = &nht->hash_buckets[hash_val];
-	while ((n = rcu_dereference_protected(*np,
-					      lockdep_is_held(&tbl->lock)))) {
-		if (n == ndel)
-			return neigh_del(n, np, tbl);
-		np = &n->next;
+	write_lock(&ndel->lock);
+	if (refcount_read(&ndel->refcnt) == 1) {
+		hlist_del_rcu(&ndel->hash);
+		neigh_mark_dead(ndel);
+		retval = true;
 	}
-	return false;
+	write_unlock(&ndel->lock);
+	if (retval)
+		neigh_cleanup_and_release(ndel);
+	return retval;
 }
 
 static int neigh_forced_gc(struct neigh_table *tbl)
@@ -389,20 +364,13 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 
 	for (i = 0; i < (1 << nht->hash_shift); i++) {
 		struct neighbour *n;
-		struct neighbour __rcu **np = &nht->hash_buckets[i];
 
 		neigh_for_each(n, &nht->hash_heads[i]) {
-			if (dev && n->dev != dev) {
-				np = &n->next;
+			if (dev && n->dev != dev)
 				continue;
-			}
-			if (skip_perm && n->nud_state & NUD_PERMANENT) {
-				np = &n->next;
+			if (skip_perm && n->nud_state & NUD_PERMANENT)
 				continue;
-			}
-			rcu_assign_pointer(*np,
-				   rcu_dereference_protected(n->next,
-						lockdep_is_held(&tbl->lock)));
+
 			hlist_del_rcu(&n->hash);
 			write_lock(&n->lock);
 			neigh_del_timer(n);
@@ -426,7 +394,6 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 					n->nud_state = NUD_NONE;
 				neigh_dbg(2, "neigh %p is stray\n", n);
 			}
-			np = &n->next;
 			write_unlock(&n->lock);
 			neigh_cleanup_and_release(n);
 		}
@@ -532,39 +499,26 @@ static void neigh_get_hash_rnd(u32 *x)
 
 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 {
-	size_t size = (1 << shift) * sizeof(struct neighbour *);
-	size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head);
+	size_t size = (1 << shift) * sizeof(struct hlist_head);
 	struct neigh_hash_table *ret;
 	struct hlist_head *hash_heads;
-	struct neighbour __rcu **buckets;
 	int i;
 
 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 	if (!ret)
 		return NULL;
 	if (size <= PAGE_SIZE) {
-		buckets = kzalloc(size, GFP_ATOMIC);
-		hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC);
-		if (!hash_heads)
-			kfree(buckets);
+		hash_heads = kzalloc(size, GFP_ATOMIC);
 	} else {
-		buckets = (struct neighbour __rcu **)
-			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
-					   get_order(size));
-		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
-
 		hash_heads = (struct hlist_head *)
 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
-					   get_order(hash_heads_size));
-		kmemleak_alloc(hash_heads, hash_heads_size, 1, GFP_ATOMIC);
-		if (!hash_heads)
-			free_pages((unsigned long)buckets, get_order(size));
+					   get_order(size));
+		kmemleak_alloc(hash_heads, size, 1, GFP_ATOMIC);
 	}
-	if (!buckets || !hash_heads) {
+	if (!hash_heads) {
 		kfree(ret);
 		return NULL;
 	}
-	ret->hash_buckets = buckets;
 	ret->hash_heads = hash_heads;
 	ret->hash_shift = shift;
 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
@@ -577,23 +531,14 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 	struct neigh_hash_table *nht = container_of(head,
 						    struct neigh_hash_table,
 						    rcu);
-	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
-	struct neighbour __rcu **buckets = nht->hash_buckets;
-	size_t hash_heads_size = (1 << nht->hash_shift) * sizeof(struct hlist_head);
+	size_t size = (1 << nht->hash_shift) * sizeof(struct hlist_head);
 	struct hlist_head *hash_heads = nht->hash_heads;
 
-	if (size <= PAGE_SIZE) {
-		kfree(buckets);
-	} else {
-		kmemleak_free(buckets);
-		free_pages((unsigned long)buckets, get_order(size));
-	}
-
-	if (hash_heads_size < PAGE_SIZE) {
+	if (size < PAGE_SIZE) {
 		kfree(hash_heads);
 	} else {
 		kmemleak_free(hash_heads);
-		free_pages((unsigned long)hash_heads, get_order(hash_heads_size));
+		free_pages((unsigned long)hash_heads, get_order(size));
 	}
 	kfree(nht);
 }
@@ -613,7 +558,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 		return old_nht;
 
 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
-		struct neighbour *n, *next;
+		struct neighbour *n;
 		struct hlist_node *tmp;
 
 		neigh_for_each_safe(n, tmp, &old_nht->hash_heads[i]) {
@@ -621,14 +566,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 					 new_nht->hash_rnd);
 
 			hash >>= (32 - new_nht->hash_shift);
-			next = rcu_dereference_protected(n->next,
-						lockdep_is_held(&tbl->lock));
 
-			rcu_assign_pointer(n->next,
-					   rcu_dereference_protected(
-						new_nht->hash_buckets[hash],
-						lockdep_is_held(&tbl->lock)));
-			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 			hlist_del_rcu(&n->hash);
 			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
 		}
@@ -733,10 +671,6 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
 	if (want_ref)
 		neigh_hold(n);
-	rcu_assign_pointer(n->next,
-			   rcu_dereference_protected(nht->hash_buckets[hash_val],
-						     lockdep_is_held(&tbl->lock)));
-	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
 	write_unlock_bh(&tbl->lock);
 	neigh_dbg(2, "neigh %p is created\n", n);
@@ -971,7 +905,6 @@ static void neigh_periodic_work(struct work_struct *work)
 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 	struct neighbour *n;
 	struct hlist_node *tmp;
-	struct neighbour __rcu **np;
 	unsigned int i;
 	struct neigh_hash_table *nht;
 
@@ -998,7 +931,6 @@ static void neigh_periodic_work(struct work_struct *work)
 		goto out;
 
 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
-		np = &nht->hash_buckets[i];
 
 		neigh_for_each_safe(n, tmp, &nht->hash_heads[i]) {
 			unsigned int state;
@@ -1009,7 +941,7 @@ static void neigh_periodic_work(struct work_struct *work)
 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
 			    (n->flags & NTF_EXT_LEARNED)) {
 				write_unlock(&n->lock);
-				goto next_elt;
+				continue;
 			}
 
 			if (time_before(n->used, n->confirmed) &&
@@ -1020,9 +952,6 @@ static void neigh_periodic_work(struct work_struct *work)
 			    (state == NUD_FAILED ||
 			     !time_in_range_open(jiffies, n->used,
 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
-				rcu_assign_pointer(*np,
-					rcu_dereference_protected(n->next,
-						lockdep_is_held(&tbl->lock)));
 				hlist_del_rcu(&n->hash);
 				neigh_mark_dead(n);
 				write_unlock(&n->lock);
@@ -1030,9 +959,6 @@ static void neigh_periodic_work(struct work_struct *work)
 				continue;
 			}
 			write_unlock(&n->lock);
-
-next_elt:
-			np = &n->next;
 		}
 		/*
 		 * It's fine to release lock here, even if hash table
@@ -3118,22 +3044,16 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
 		struct neighbour *n;
 		struct hlist_node *tmp;
-		struct neighbour __rcu **np;
 
-		np = &nht->hash_buckets[chain];
 		neigh_for_each_safe(n, tmp, &nht->hash_heads[chain]) {
 			int release;
 
 			write_lock(&n->lock);
 			release = cb(n);
 			if (release) {
-				rcu_assign_pointer(*np,
-					rcu_dereference_protected(n->next,
-						lockdep_is_held(&tbl->lock)));
 				hlist_del_rcu(&n->hash);
 				neigh_mark_dead(n);
-			} else
-				np = &n->next;
+			}
 			write_unlock(&n->lock);
 			if (release)
 				neigh_cleanup_and_release(n);
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH net-next v4 6/6] Create netdev->neighbour association
  2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
                   ` (4 preceding siblings ...)
  2024-10-15 16:59 ` [PATCH net-next v4 5/6] Remove bare neighbour::next pointer Gilad Naaman
@ 2024-10-15 16:59 ` Gilad Naaman
  2024-10-15 23:54   ` Kuniyuki Iwashima
  5 siblings, 1 reply; 16+ messages in thread
From: Gilad Naaman @ 2024-10-15 16:59 UTC (permalink / raw)
  To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Gilad Naaman, Kuniyuki Iwashima

Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 .../networking/net_cachelines/net_device.rst  |  1 +
 include/linux/netdevice.h                     |  7 ++
 include/net/neighbour.h                       | 10 +--
 include/net/neighbour_tables.h                | 12 +++
 net/core/neighbour.c                          | 85 ++++++++++++-------
 net/mpls/af_mpls.c                            |  2 +-
 6 files changed, 75 insertions(+), 42 deletions(-)
 create mode 100644 include/net/neighbour_tables.h

diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index db6192b2bb50..2edb6ac1cab4 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -189,4 +189,5 @@ u64                                 max_pacing_offload_horizon
 struct_napi_config*                 napi_config
 unsigned_long                       gro_flush_timeout
 u32                                 napi_defer_hard_irqs
+struct hlist_head                   neighbours[2]
 =================================== =========================== =================== =================== ===================================================================================
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8feaca12655e..80bde95cc302 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
 #include <net/net_trackers.h>
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
 
 struct netpoll_info;
 struct device;
@@ -2034,6 +2035,9 @@ enum netdev_reg_state {
  *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
  *				allow to avoid NIC hard IRQ, on busy queues.
  *
+ *	@neighbours:	List heads pointing to this device's neighbours'
+ *			dev_list, one per address-family.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2443,6 +2447,9 @@ struct net_device {
 	 */
 	struct net_shaper_hierarchy *net_shaper_hierarchy;
 #endif
+
+	struct hlist_head neighbours[NEIGH_NR_TABLES];
+
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 21c0c20a0ed5..d0bc618d4fe2 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -29,6 +29,7 @@
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
 
 /*
  * NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@ struct neigh_statistics {
 
 struct neighbour {
 	struct hlist_node	hash;
+	struct hlist_node	dev_list;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	unsigned long		confirmed;
@@ -236,14 +238,6 @@ struct neigh_table {
 	struct pneigh_entry	**phash_buckets;
 };
 
-enum {
-	NEIGH_ARP_TABLE = 0,
-	NEIGH_ND_TABLE = 1,
-	NEIGH_DN_TABLE = 2,
-	NEIGH_NR_TABLES,
-	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
 static inline int neigh_parms_family(struct neigh_parms *p)
 {
 	return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644
index 000000000000..bcffbe8f7601
--- /dev/null
+++ b/include/net/neighbour_tables.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+	NEIGH_ARP_TABLE = 0,
+	NEIGH_ND_TABLE = 1,
+	NEIGH_NR_TABLES,
+	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 61b5f0d4896a..dbfd27f79bb8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -61,6 +61,19 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
 static const struct seq_operations neigh_stat_seq_ops;
 #endif
 
+static int family_to_neightbl_index(int family)
+{
+	switch (family) {
+	case AF_INET:
+		return NEIGH_ARP_TABLE;
+	case AF_INET6:
+		return NEIGH_ND_TABLE;
+	default:
+		DEBUG_NET_WARN_ON_ONCE(1);
+		return 0; /* to avoid panic by null-ptr-deref */
+	}
+}
+
 /*
    Neighbour hash table buckets are protected with rwlock tbl->lock.
 
@@ -216,6 +229,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 	write_lock(&ndel->lock);
 	if (refcount_read(&ndel->refcnt) == 1) {
 		hlist_del_rcu(&ndel->hash);
+		hlist_del_rcu(&ndel->dev_list);
 		neigh_mark_dead(ndel);
 		retval = true;
 	}
@@ -357,46 +371,45 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 			    bool skip_perm)
 {
 	int i;
+	struct neighbour *n;
+	struct hlist_node *tmp;
 	struct neigh_hash_table *nht;
 
+	i = family_to_neightbl_index(tbl->family);
+
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 
-	for (i = 0; i < (1 << nht->hash_shift); i++) {
-		struct neighbour *n;
-
-		neigh_for_each(n, &nht->hash_heads[i]) {
-			if (dev && n->dev != dev)
-				continue;
-			if (skip_perm && n->nud_state & NUD_PERMANENT)
-				continue;
+	hlist_for_each_entry_safe(n, tmp, &dev->neighbours[i], dev_list) {
+		if (skip_perm && n->nud_state & NUD_PERMANENT)
+			continue;
 
-			hlist_del_rcu(&n->hash);
-			write_lock(&n->lock);
-			neigh_del_timer(n);
-			neigh_mark_dead(n);
-			if (refcount_read(&n->refcnt) != 1) {
-				/* The most unpleasant situation.
-				   We must destroy neighbour entry,
-				   but someone still uses it.
-
-				   The destroy will be delayed until
-				   the last user releases us, but
-				   we must kill timers etc. and move
-				   it to safe state.
-				 */
-				__skb_queue_purge(&n->arp_queue);
-				n->arp_queue_len_bytes = 0;
-				WRITE_ONCE(n->output, neigh_blackhole);
-				if (n->nud_state & NUD_VALID)
-					n->nud_state = NUD_NOARP;
-				else
-					n->nud_state = NUD_NONE;
-				neigh_dbg(2, "neigh %p is stray\n", n);
-			}
-			write_unlock(&n->lock);
-			neigh_cleanup_and_release(n);
+		hlist_del_rcu(&n->hash);
+		hlist_del_rcu(&n->dev_list);
+		write_lock(&n->lock);
+		neigh_del_timer(n);
+		neigh_mark_dead(n);
+		if (refcount_read(&n->refcnt) != 1) {
+			/* The most unpleasant situation.
+			 * We must destroy neighbour entry,
+			 * but someone still uses it.
+			 *
+			 * The destroy will be delayed until
+			 * the last user releases us, but
+			 * we must kill timers etc. and move
+			 * it to safe state.
+			 */
+			__skb_queue_purge(&n->arp_queue);
+			n->arp_queue_len_bytes = 0;
+			WRITE_ONCE(n->output, neigh_blackhole);
+			if (n->nud_state & NUD_VALID)
+				n->nud_state = NUD_NOARP;
+			else
+				n->nud_state = NUD_NONE;
+			neigh_dbg(2, "neigh %p is stray\n", n);
 		}
+		write_unlock(&n->lock);
+		neigh_cleanup_and_release(n);
 	}
 }
 
@@ -672,6 +685,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
 	if (want_ref)
 		neigh_hold(n);
 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+	error = family_to_neightbl_index(tbl->family);
+	hlist_add_head_rcu(&n->dev_list, &dev->neighbours[error]);
+
 	write_unlock_bh(&tbl->lock);
 	neigh_dbg(2, "neigh %p is created\n", n);
 	rc = n;
@@ -953,6 +970,7 @@ static void neigh_periodic_work(struct work_struct *work)
 			     !time_in_range_open(jiffies, n->used,
 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
 				hlist_del_rcu(&n->hash);
+				hlist_del_rcu(&n->dev_list);
 				neigh_mark_dead(n);
 				write_unlock(&n->lock);
 				neigh_cleanup_and_release(n);
@@ -3052,6 +3070,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 			release = cb(n);
 			if (release) {
 				hlist_del_rcu(&n->hash);
+				hlist_del_rcu(&n->dev_list);
 				neigh_mark_dead(n);
 			}
 			write_unlock(&n->lock);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index df62638b6498..a0573847bc55 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1664,7 +1664,7 @@ static int nla_put_via(struct sk_buff *skb,
 		       u8 table, const void *addr, int alen)
 {
 	static const int table_to_family[NEIGH_NR_TABLES + 1] = {
-		AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+		AF_INET, AF_INET6, AF_PACKET,
 	};
 	struct nlattr *nla;
 	struct rtvia *via;
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 1/6] Add hlist_node to struct neighbour
  2024-10-15 16:59 ` [PATCH net-next v4 1/6] Add hlist_node to struct neighbour Gilad Naaman
@ 2024-10-15 22:49   ` Kuniyuki Iwashima
  0 siblings, 0 replies; 16+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-15 22:49 UTC (permalink / raw)
  To: gnaaman; +Cc: davem, edumazet, kuba, kuniyu, netdev, pabeni

From: Gilad Naaman <gnaaman@drivenets.com>
Date: Tue, 15 Oct 2024 16:59:21 +0000
> @@ -531,7 +533,9 @@ static void neigh_get_hash_rnd(u32 *x)
>  static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
>  {
>  	size_t size = (1 << shift) * sizeof(struct neighbour *);
> +	size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head);
>  	struct neigh_hash_table *ret;
> +	struct hlist_head *hash_heads;
>  	struct neighbour __rcu **buckets;
>  	int i;

nit:

While at it, please sort variables in reverse xmas tree order.
Same for other places.

https://docs.kernel.org/process/maintainer-netdev.html#local-variable-ordering-reverse-xmas-tree-rcs

>  
> @@ -540,17 +544,28 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
>  		return NULL;
>  	if (size <= PAGE_SIZE) {
>  		buckets = kzalloc(size, GFP_ATOMIC);
> +		hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC);
> +		if (!hash_heads)
> +			kfree(buckets);
>  	} else {
>  		buckets = (struct neighbour __rcu **)
>  			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
>  					   get_order(size));
>  		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
> +
> +		hash_heads = (struct hlist_head *)
> +			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
> +					   get_order(hash_heads_size));
> +		kmemleak_alloc(hash_heads, hash_heads_size, 1, GFP_ATOMIC);
> +		if (!hash_heads)
> +			free_pages((unsigned long)buckets, get_order(size));
>  	}
> -	if (!buckets) {
> +	if (!buckets || !hash_heads) {
>  		kfree(ret);
>  		return NULL;

If buckets is NULL and hash_heads isn't, hash_heads is leaked.


>  	}
>  	ret->hash_buckets = buckets;
> +	ret->hash_heads = hash_heads;
>  	ret->hash_shift = shift;
>  	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
>  		neigh_get_hash_rnd(&ret->hash_rnd[i]);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 2/6] Define neigh_for_each
  2024-10-15 16:59 ` [PATCH net-next v4 2/6] Define neigh_for_each Gilad Naaman
@ 2024-10-15 22:57   ` Kuniyuki Iwashima
  0 siblings, 0 replies; 16+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-15 22:57 UTC (permalink / raw)
  To: gnaaman; +Cc: davem, edumazet, kuba, kuniyu, netdev, pabeni

From: Gilad Naaman <gnaaman@drivenets.com>
Date: Tue, 15 Oct 2024 16:59:22 +0000
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> index 800dfb64ec83..0bb46aba2502 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> @@ -3006,6 +3006,27 @@ static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
>  		rms->err = -ENOMEM;
>  }
>  
> +static void mlxsw_sp_neigh_for_each(struct neigh_table *tbl,
> +				    void (*cb)(struct neighbour *, void *),

The function pointer is no longer needed as cb() is always
mlxsw_sp_neigh_rif_made_sync_each().

Also, please CC this driver's maintainers next time.

  git show --format=email | ./scripts/get_maintainer.pl


> +				    void *cookie)
> +{
> +	int chain;
> +	struct neigh_hash_table *nht;

nit: reverse xmas tree order.


> +
> +	rcu_read_lock();
> +	nht = rcu_dereference(tbl->nht);
> +
> +	read_lock_bh(&tbl->lock); /* avoid resizes */
> +	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
> +		struct neighbour *n;
> +
> +		neigh_for_each(n, &nht->hash_heads[chain])
> +			cb(n, cookie);

This can be a direct call.


> +	}
> +	read_unlock_bh(&tbl->lock);
> +	rcu_read_unlock();
> +}
> +
>  static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
>  					struct mlxsw_sp_rif *rif)
>  {
> @@ -3014,12 +3035,12 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
>  		.rif = rif,
>  	};
>  
> -	neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
> +	mlxsw_sp_neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
>  	if (rms.err)
>  		goto err_arp;
>  
>  #if IS_ENABLED(CONFIG_IPV6)
> -	neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
> +	mlxsw_sp_neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
>  #endif
>  	if (rms.err)
>  		goto err_nd;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist
  2024-10-15 16:59 ` [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
@ 2024-10-15 23:25   ` Kuniyuki Iwashima
  2024-10-16  9:11     ` Gilad Naaman
  0 siblings, 1 reply; 16+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-15 23:25 UTC (permalink / raw)
  To: gnaaman; +Cc: davem, edumazet, kuba, kuniyu, netdev, pabeni

From: Gilad Naaman <gnaaman@drivenets.com>
Date: Tue, 15 Oct 2024 16:59:23 +0000
> Convert seq_file-related neighbour functionality to use neighbour::hash
> and the related for_each macro.
> 
> Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
> ---
>  include/net/neighbour.h |  4 ++++
>  net/core/neighbour.c    | 26 ++++++++++----------------
>  2 files changed, 14 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
> index 2f4cb9e51364..7dc0d4d6a4a8 100644
> --- a/include/net/neighbour.h
> +++ b/include/net/neighbour.h
> @@ -279,6 +279,10 @@ static inline void *neighbour_priv(const struct neighbour *n)
>  extern const struct nla_policy nda_policy[];
>  
>  #define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
> +#define neigh_hlist_entry(n) hlist_entry_safe(n, struct neighbour, hash)
> +#define neigh_first_rcu(bucket) \
> +	neigh_hlist_entry(rcu_dereference(hlist_first_rcu(bucket)))

No RCU helpers are needed, seq file is under RCU & tbl->lock

#define neigh_first_entry(bucket)				\
	hlist_entry_safe((bucket)->first, struct neighbour, hash)

> +

nit: unnecessary newline.


>  
>  static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
>  {
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index e91105a4c5ee..4bdf7649ca57 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -3207,25 +3207,21 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
>  
>  	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
>  	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
> -		n = rcu_dereference(nht->hash_buckets[bucket]);
> -
> -		while (n) {
> +		neigh_for_each(n, &nht->hash_heads[bucket]) {
>  			if (!net_eq(dev_net(n->dev), net))
> -				goto next;
> +				continue;
>  			if (state->neigh_sub_iter) {
>  				loff_t fakep = 0;
>  				void *v;
>  
>  				v = state->neigh_sub_iter(state, n, &fakep);
>  				if (!v)
> -					goto next;
> +					continue;
>  			}
>  			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
>  				break;

Let's avoid double-break and use goto just before setting bucket like:

out:
	state->bucket = bucket


>  			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
>  				break;

Same here.


> -next:
> -			n = rcu_dereference(n->next);
>  		}
>  
>  		if (n)

Then, this null check & break will be unnecessary.


> @@ -3249,34 +3245,32 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
>  		if (v)
>  			return n;
>  	}
> -	n = rcu_dereference(n->next);
>  
>  	while (1) {
> -		while (n) {
> +		hlist_for_each_entry_continue_rcu(n, hash) {
>  			if (!net_eq(dev_net(n->dev), net))
> -				goto next;
> +				continue;
>  			if (state->neigh_sub_iter) {
>  				void *v = state->neigh_sub_iter(state, n, pos);
>  				if (v)
>  					return n;
> -				goto next;
> +				continue;
>  			}
>  			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
>  				break;
>  
>  			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
>  				break;

Same remark here.


> -next:
> -			n = rcu_dereference(n->next);
>  		}
>  
>  		if (n)
>  			break;

and here.

>  
> -		if (++state->bucket >= (1 << nht->hash_shift))
> -			break;

Let's keep this,


> +		while (!n && ++state->bucket < (1 << nht->hash_shift))
> +			n = neigh_first_rcu(&nht->hash_heads[state->bucket]);
>  
> -		n = rcu_dereference(nht->hash_buckets[state->bucket]);

and simply fetch neigh_first_entry().

Then, we can let the next loop handle the termination condition
and keep the code simple.


> +		if (!n)
> +			break;
>  	}
>  
>  	if (n && pos)
> -- 
> 2.46.0

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro
  2024-10-15 16:59 ` [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
@ 2024-10-15 23:38   ` Kuniyuki Iwashima
  2024-10-16  9:18     ` Gilad Naaman
  0 siblings, 1 reply; 16+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-15 23:38 UTC (permalink / raw)
  To: gnaaman; +Cc: davem, edumazet, kuba, kuniyu, netdev, pabeni

From: Gilad Naaman <gnaaman@drivenets.com>
Date: Tue, 15 Oct 2024 16:59:24 +0000
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 4bdf7649ca57..cca524a55c97 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -391,8 +391,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
>  		struct neighbour *n;
>  		struct neighbour __rcu **np = &nht->hash_buckets[i];
>  
> -		while ((n = rcu_dereference_protected(*np,
> -					lockdep_is_held(&tbl->lock))) != NULL) {
> +		neigh_for_each(n, &nht->hash_heads[i]) {
>  			if (dev && n->dev != dev) {
>  				np = &n->next;
>  				continue;
> @@ -427,6 +426,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
>  					n->nud_state = NUD_NONE;
>  				neigh_dbg(2, "neigh %p is stray\n", n);
>  			}
> +			np = &n->next;
>  			write_unlock(&n->lock);
>  			neigh_cleanup_and_release(n);
>  		}

Is this chunk necessary ?


> @@ -976,6 +970,7 @@ static void neigh_periodic_work(struct work_struct *work)
>  {
>  	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
>  	struct neighbour *n;
> +	struct hlist_node *tmp;
>  	struct neighbour __rcu **np;
>  	unsigned int i;
>  	struct neigh_hash_table *nht;

nit: let's sort variables in reverse xmas tree order.



> @@ -3124,11 +3117,11 @@ void __neigh_for_each_release(struct neigh_table *tbl,
>  					lockdep_is_held(&tbl->lock));
>  	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
>  		struct neighbour *n;
> +		struct hlist_node *tmp;
>  		struct neighbour __rcu **np;

nit: reverse xmas tree order.


>  
>  		np = &nht->hash_buckets[chain];
> -		while ((n = rcu_dereference_protected(*np,
> -					lockdep_is_held(&tbl->lock))) != NULL) {
> +		neigh_for_each_safe(n, tmp, &nht->hash_heads[chain]) {
>  			int release;
>  
>  			write_lock(&n->lock);
> -- 
> 2.46.0

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 6/6] Create netdev->neighbour association
  2024-10-15 16:59 ` [PATCH net-next v4 6/6] Create netdev->neighbour association Gilad Naaman
@ 2024-10-15 23:54   ` Kuniyuki Iwashima
  0 siblings, 0 replies; 16+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-15 23:54 UTC (permalink / raw)
  To: gnaaman; +Cc: davem, edumazet, kuba, kuniyu, netdev, pabeni

From: Gilad Naaman <gnaaman@drivenets.com>
Date: Tue, 15 Oct 2024 16:59:26 +0000
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 61b5f0d4896a..dbfd27f79bb8 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -61,6 +61,19 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
>  static const struct seq_operations neigh_stat_seq_ops;
>  #endif
>  
> +static int family_to_neightbl_index(int family)
> +{

Let's return hlist_head * like this:

static hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
{
	int i;

	switch (family) {
	case default:
		DEBUG_NET_WARN_ON_ONCE(1);
		fallthrough; /* to avoid panic by null-ptr-deref */
	case AF_INET:
		i = NEIGH_ARP_TABLE;
		break;
	case AF_INET6:
		i = NEIGH_ND_TABLE;
		break;
	}

	return &dev->neighbours[i];
}


>  @@ -357,46 +371,45 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
>  			    bool skip_perm)
>  {
>  	int i;
> +	struct neighbour *n;
> +	struct hlist_node *tmp;

nit: reverse xmas tree order.

and cache hlist_head * from neigh_get_dev_table().

>  	struct neigh_hash_table *nht;

nht is no longer used ?


>  
> +	i = family_to_neightbl_index(tbl->family);
> +
>  	nht = rcu_dereference_protected(tbl->nht,
>  					lockdep_is_held(&tbl->lock));
>  
> -	for (i = 0; i < (1 << nht->hash_shift); i++) {
> -		struct neighbour *n;
> -
> -		neigh_for_each(n, &nht->hash_heads[i]) {
> -			if (dev && n->dev != dev)
> -				continue;
> -			if (skip_perm && n->nud_state & NUD_PERMANENT)
> -				continue;
> +	hlist_for_each_entry_safe(n, tmp, &dev->neighbours[i], dev_list) {
> +		if (skip_perm && n->nud_state & NUD_PERMANENT)
> +			continue;
>  
> -			hlist_del_rcu(&n->hash);
> -			write_lock(&n->lock);
> -			neigh_del_timer(n);
> -			neigh_mark_dead(n);
> -			if (refcount_read(&n->refcnt) != 1) {
> -				/* The most unpleasant situation.
> -				   We must destroy neighbour entry,
> -				   but someone still uses it.
> -
> -				   The destroy will be delayed until
> -				   the last user releases us, but
> -				   we must kill timers etc. and move
> -				   it to safe state.
> -				 */
> -				__skb_queue_purge(&n->arp_queue);
> -				n->arp_queue_len_bytes = 0;
> -				WRITE_ONCE(n->output, neigh_blackhole);
> -				if (n->nud_state & NUD_VALID)
> -					n->nud_state = NUD_NOARP;
> -				else
> -					n->nud_state = NUD_NONE;
> -				neigh_dbg(2, "neigh %p is stray\n", n);
> -			}
> -			write_unlock(&n->lock);
> -			neigh_cleanup_and_release(n);
> +		hlist_del_rcu(&n->hash);
> +		hlist_del_rcu(&n->dev_list);
> +		write_lock(&n->lock);
> +		neigh_del_timer(n);
> +		neigh_mark_dead(n);
> +		if (refcount_read(&n->refcnt) != 1) {
> +			/* The most unpleasant situation.
> +			 * We must destroy neighbour entry,
> +			 * but someone still uses it.
> +			 *
> +			 * The destroy will be delayed until
> +			 * the last user releases us, but
> +			 * we must kill timers etc. and move
> +			 * it to safe state.
> +			 */
> +			__skb_queue_purge(&n->arp_queue);
> +			n->arp_queue_len_bytes = 0;
> +			WRITE_ONCE(n->output, neigh_blackhole);
> +			if (n->nud_state & NUD_VALID)
> +				n->nud_state = NUD_NOARP;
> +			else
> +				n->nud_state = NUD_NONE;
> +			neigh_dbg(2, "neigh %p is stray\n", n);
>  		}
> +		write_unlock(&n->lock);
> +		neigh_cleanup_and_release(n);
>  	}
>  }
>  
> @@ -672,6 +685,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>  	if (want_ref)
>  		neigh_hold(n);
>  	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
> +
> +	error = family_to_neightbl_index(tbl->family);
> +	hlist_add_head_rcu(&n->dev_list, &dev->neighbours[error]);

Let's use neigh_dev_get_table() directly.

> +
>  	write_unlock_bh(&tbl->lock);
>  	neigh_dbg(2, "neigh %p is created\n", n);
>  	rc = n;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist
  2024-10-15 23:25   ` Kuniyuki Iwashima
@ 2024-10-16  9:11     ` Gilad Naaman
  2024-10-16 20:54       ` Kuniyuki Iwashima
  0 siblings, 1 reply; 16+ messages in thread
From: Gilad Naaman @ 2024-10-16  9:11 UTC (permalink / raw)
  To: kuniyu; +Cc: davem, edumazet, gnaaman, kuba, netdev, pabeni

> > -		if (++state->bucket >= (1 << nht->hash_shift))
> > -			break;
> 
> Let's keep this,
> 
> 
> > +		while (!n && ++state->bucket < (1 << nht->hash_shift))
> > +			n = neigh_first_rcu(&nht->hash_heads[state->bucket]);
> >  
> > -		n = rcu_dereference(nht->hash_buckets[state->bucket]);
> 
> and simply fetch neigh_first_entry().
> 
> Then, we can let the next loop handle the termination condition
> and keep the code simple.

Unfortunately `hlist_for_each_entry_continue_rcu` dereferences `n`
first thing using `hlist_next_rcu`, before checking for NULL:

    #define hlist_for_each_entry_continue_rcu(pos, member)			\
    	for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
    			&(pos)->member)), typeof(*(pos)), member);	\
    	     pos;							\
    	     pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(	\
    			&(pos)->member)), typeof(*(pos)), member))

If I'm using it, I have to add a null-check after calling `neigh_first_entry`.

Another alternative is to use `hlist_for_each_entry_from_rcu`,
but it'll require calling `next` one time before the loop.

Is this preferable?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro
  2024-10-15 23:38   ` Kuniyuki Iwashima
@ 2024-10-16  9:18     ` Gilad Naaman
  0 siblings, 0 replies; 16+ messages in thread
From: Gilad Naaman @ 2024-10-16  9:18 UTC (permalink / raw)
  To: kuniyu; +Cc: davem, edumazet, gnaaman, kuba, netdev, pabeni


> > @@ -427,6 +426,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
> >  					n->nud_state = NUD_NONE;
> >  				neigh_dbg(2, "neigh %p is stray\n", n);
> >  			}
> > +			np = &n->next;
> >  			write_unlock(&n->lock);
> >  			neigh_cleanup_and_release(n);
> >  		}
> 
> Is this chunk necessary ?

Apologies, you're right.

I mixed up skipping with regular loop maintnance,
and thought I need to compensate for removing the original loop logic.
Obviously wrong in retrospect.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist
  2024-10-16  9:11     ` Gilad Naaman
@ 2024-10-16 20:54       ` Kuniyuki Iwashima
  2024-10-17  6:31         ` Gilad Naaman
  0 siblings, 1 reply; 16+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-16 20:54 UTC (permalink / raw)
  To: gnaaman; +Cc: davem, edumazet, kuba, kuniyu, netdev, pabeni

From: Gilad Naaman <gnaaman@drivenets.com>
Date: Wed, 16 Oct 2024 09:11:52 +0000
> > > -		if (++state->bucket >= (1 << nht->hash_shift))
> > > -			break;
> > 
> > Let's keep this,
> > 
> > 
> > > +		while (!n && ++state->bucket < (1 << nht->hash_shift))
> > > +			n = neigh_first_rcu(&nht->hash_heads[state->bucket]);
> > >  
> > > -		n = rcu_dereference(nht->hash_buckets[state->bucket]);
> > 
> > and simply fetch neigh_first_entry().
> > 
> > Then, we can let the next loop handle the termination condition
> > and keep the code simple.
> 
> Unfortunately `hlist_for_each_entry_continue_rcu` dereferences `n`
> first thing using `hlist_next_rcu`, before checking for NULL:

Right, and I noticed we even can't use neigh_first_entry() after
checking against NULL because the first entry will be skipped by
hlist_for_each_entry_continue_rcu().

> 
>     #define hlist_for_each_entry_continue_rcu(pos, member)			\
>     	for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
>     			&(pos)->member)), typeof(*(pos)), member);	\
>     	     pos;							\
>     	     pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(	\
>     			&(pos)->member)), typeof(*(pos)), member))
> 
> If I'm using it, I have to add a null-check after calling `neigh_first_entry`.
> 
> Another alternative is to use `hlist_for_each_entry_from_rcu`,
> but it'll require calling `next` one time before the loop.
> 
> Is this preferable?

How about factorising the operations inside loops and use
hlist_for_each_entry_continue() and call neigh_get_first()
in neigh_get_next() ?

completely not tested:

---8<---
static struct neighbour *neigh_get_valid(struct seq_file *seq,
					 struct neighbour *n,
					 loff_t *pos)
{
	struct net *net = seq_file_net(seq);

	if (!net_eq(dev_net(n->dev), net))
		return NULL;

	if (state->neigh_sub_iter) {
		loff_t fakep = 0;
		void *v;

		v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
		if (!v)
			return NULL;
		if (pos)
			return v;
	}

	if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
		return n;

	if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
		return n;

	return NULL;
}

static struct neighbour *neigh_get_first(struct seq_file *seq)
{
	struct neigh_seq_state *state = seq->private;
	struct neigh_hash_table *nht = state->nht;
	struct net *net = seq_file_net(seq);
	struct neighbour *n, *tmp;

	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;

	while (++state->bucket < (1 << nht->hash_shift)) {
		neigh_for_each(n, &nht->hash_heads[state->bucket]) {
			tmp = neigh_get_valid(seq, n, pos);
			if (tmp)
				return tmp;
		}
	}

	return NULL;
}

static struct neighbour *neigh_get_next(struct seq_file *seq,
					struct neighbour *n,
					loff_t *pos)
{
	struct neigh_seq_state *state = seq->private;
	struct neigh_hash_table *nht = state->nht;
	struct net *net = seq_file_net(seq);
	struct neighbour *tmp;

	if (state->neigh_sub_iter) {
		void *v = state->neigh_sub_iter(state, n, pos);

		if (v)
			return n;
	}

	hlist_for_each_entry_continue(n, hash) {
		tmp = neigh_get_valid(seq, n, pos);
		if (tmp) {
			n = tmp;
			goto out;
		}
	}

	n = neigh_get_first(seq);
out:
	if (n && pos)
		--(*pos);

	return n;
}
---8<---

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist
  2024-10-16 20:54       ` Kuniyuki Iwashima
@ 2024-10-17  6:31         ` Gilad Naaman
  0 siblings, 0 replies; 16+ messages in thread
From: Gilad Naaman @ 2024-10-17  6:31 UTC (permalink / raw)
  To: kuniyu; +Cc: davem, edumazet, gnaaman, kuba, netdev, pabeni


> How about factorising the operations inside loops and use
> hlist_for_each_entry_continue() and call neigh_get_first()
> in neigh_get_next() ?

Yes, this works.
(Just had to initialize buckets to `-1` since `neigh_get_first` always
advances it, and otherwise the first bucket would have been skipped)

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2024-10-17  6:31 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 1/6] Add hlist_node to struct neighbour Gilad Naaman
2024-10-15 22:49   ` Kuniyuki Iwashima
2024-10-15 16:59 ` [PATCH net-next v4 2/6] Define neigh_for_each Gilad Naaman
2024-10-15 22:57   ` Kuniyuki Iwashima
2024-10-15 16:59 ` [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
2024-10-15 23:25   ` Kuniyuki Iwashima
2024-10-16  9:11     ` Gilad Naaman
2024-10-16 20:54       ` Kuniyuki Iwashima
2024-10-17  6:31         ` Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
2024-10-15 23:38   ` Kuniyuki Iwashima
2024-10-16  9:18     ` Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 5/6] Remove bare neighbour::next pointer Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 6/6] Create netdev->neighbour association Gilad Naaman
2024-10-15 23:54   ` Kuniyuki Iwashima

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).