* [PATCH net-next v5 0/6] Improve neigh_flush_dev performance
@ 2024-10-17 7:04 Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 1/6] Add hlist_node to struct neighbour Gilad Naaman
` (5 more replies)
0 siblings, 6 replies; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
This patchsets improves the performance of neigh_flush_dev.
Currently, the only way to implement it requires traversing
all neighbours known to the kernel, across all network-namespaces.
This means that some flows are slowed down as a function of neighbour-scale,
even if the specific link they're handling has little to no neighbours.
In order to solve this, this patchset adds a netdev->neighbours list,
as well as making the original linked-list doubly-, so that it is
possible to unlink neighbours without traversing the hash-bucket to
obtain the previous neighbour.
The original use-case we encountered was mass-deletion of links (12K
VLANs) while there are 50K ARPs and 50K NDPs in the system; though the
slowdowns would also appear when the links are set down.
Changes in v5:
- Fix leak in neigh_hash_alloc
- Refactor seq_file functions for readability
- Reverse xmasify
Gilad Naaman (6):
Add hlist_node to struct neighbour
Define neigh_for_each
Convert neigh_* seq_file functions to use hlist
Convert neighbour iteration to use hlist+macro
Remove bare neighbour::next pointer
Create netdev->neighbour association
.../networking/net_cachelines/net_device.rst | 1 +
.../ethernet/mellanox/mlxsw/spectrum_router.c | 24 +-
include/linux/netdevice.h | 7 +
include/net/neighbour.h | 26 +-
include/net/neighbour_tables.h | 12 +
net/core/neighbour.c | 365 +++++++-----------
6 files changed, 203 insertions(+), 232 deletions(-)
create mode 100644 include/net/neighbour_tables.h
--
2.46.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH net-next v5 1/6] Add hlist_node to struct neighbour
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
@ 2024-10-17 7:04 ` Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 2/6] Define neigh_for_each Gilad Naaman
` (4 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Add a doubly-linked node to neighbours, so that they
can be deleted without iterating the entire bucket they're in.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
include/net/neighbour.h | 2 ++
net/core/neighbour.c | 40 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 3887ed9e5026..0402447854c7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -136,6 +136,7 @@ struct neigh_statistics {
struct neighbour {
struct neighbour __rcu *next;
+ struct hlist_node hash;
struct neigh_table *tbl;
struct neigh_parms *parms;
unsigned long confirmed;
@@ -191,6 +192,7 @@ struct pneigh_entry {
struct neigh_hash_table {
struct neighbour __rcu **hash_buckets;
+ struct hlist_head *hash_heads;
unsigned int hash_shift;
__u32 hash_rnd[NEIGH_NUM_HASH_RND];
struct rcu_head rcu;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 395ae1626eef..45c8df801dfb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -217,6 +217,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
neigh = rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock));
rcu_assign_pointer(*np, neigh);
+ hlist_del_rcu(&n->hash);
neigh_mark_dead(n);
retval = true;
}
@@ -403,6 +404,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
write_lock(&n->lock);
neigh_del_timer(n);
neigh_mark_dead(n);
@@ -530,27 +532,47 @@ static void neigh_get_hash_rnd(u32 *x)
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
+ size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head);
size_t size = (1 << shift) * sizeof(struct neighbour *);
- struct neigh_hash_table *ret;
struct neighbour __rcu **buckets;
+ struct hlist_head *hash_heads;
+ struct neigh_hash_table *ret;
int i;
+ hash_heads = NULL;
+
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
return NULL;
if (size <= PAGE_SIZE) {
buckets = kzalloc(size, GFP_ATOMIC);
+
+ if (buckets) {
+ hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC);
+ if (!hash_heads)
+ kfree(buckets);
+ }
} else {
buckets = (struct neighbour __rcu **)
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
get_order(size));
kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
+
+ if (buckets) {
+ hash_heads = (struct hlist_head *)
+ __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+ get_order(hash_heads_size));
+ kmemleak_alloc(hash_heads, hash_heads_size, 1, GFP_ATOMIC);
+ if (!hash_heads)
+ free_pages((unsigned long)buckets, get_order(size));
+ }
}
- if (!buckets) {
+ if (!buckets || !hash_heads) {
kfree(ret);
return NULL;
}
ret->hash_buckets = buckets;
+ ret->hash_heads = hash_heads;
ret->hash_shift = shift;
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
neigh_get_hash_rnd(&ret->hash_rnd[i]);
@@ -564,6 +586,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
rcu);
size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
struct neighbour __rcu **buckets = nht->hash_buckets;
+ size_t hash_heads_size = (1 << nht->hash_shift) * sizeof(struct hlist_head);
+ struct hlist_head *hash_heads = nht->hash_heads;
if (size <= PAGE_SIZE) {
kfree(buckets);
@@ -571,6 +595,13 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
kmemleak_free(buckets);
free_pages((unsigned long)buckets, get_order(size));
}
+
+ if (hash_heads_size < PAGE_SIZE) {
+ kfree(hash_heads);
+ } else {
+ kmemleak_free(hash_heads);
+ free_pages((unsigned long)hash_heads, get_order(hash_heads_size));
+ }
kfree(nht);
}
@@ -607,6 +638,8 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
new_nht->hash_buckets[hash],
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(new_nht->hash_buckets[hash], n);
+ hlist_del_rcu(&n->hash);
+ hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
}
}
@@ -717,6 +750,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
rcu_dereference_protected(nht->hash_buckets[hash_val],
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(nht->hash_buckets[hash_val], n);
+ hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
@@ -1002,6 +1036,7 @@ static void neigh_periodic_work(struct work_struct *work)
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -3131,6 +3166,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
+ hlist_del_rcu(&n->hash);
neigh_mark_dead(n);
} else
np = &n->next;
--
2.46.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH net-next v5 2/6] Define neigh_for_each
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 1/6] Add hlist_node to struct neighbour Gilad Naaman
@ 2024-10-17 7:04 ` Gilad Naaman
2024-10-17 16:26 ` Petr Machata
2024-10-17 7:04 ` [PATCH net-next v5 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
` (3 subsequent siblings)
5 siblings, 1 reply; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Define neigh_for_each in neighbour.h and move old definition
to its only point of usage within the mlxsw driver.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
.../ethernet/mellanox/mlxsw/spectrum_router.c | 24 +++++++++++++++++--
include/net/neighbour.h | 4 ++--
net/core/neighbour.c | 22 -----------------
3 files changed, 24 insertions(+), 26 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 800dfb64ec83..de62587c5a63 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3006,6 +3006,26 @@ static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
rms->err = -ENOMEM;
}
+static void mlxsw_sp_neigh_for_each(struct neigh_table *tbl,
+ void *cookie)
+{
+ struct neigh_hash_table *nht;
+ int chain;
+
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
+
+ read_lock_bh(&tbl->lock); /* avoid resizes */
+ for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
+ struct neighbour *n;
+
+ neigh_for_each(n, &nht->hash_heads[chain])
+ mlxsw_sp_neigh_rif_made_sync_each(n, cookie);
+ }
+ read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
+}
+
static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif)
{
@@ -3014,12 +3034,12 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
.rif = rif,
};
- neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
+ mlxsw_sp_neigh_for_each(&arp_tbl, &rms);
if (rms.err)
goto err_arp;
#if IS_ENABLED(CONFIG_IPV6)
- neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
+ mlxsw_sp_neigh_for_each(&nd_tbl, &rms);
#endif
if (rms.err)
goto err_nd;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 0402447854c7..37303656ab65 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -277,6 +277,8 @@ static inline void *neighbour_priv(const struct neighbour *n)
extern const struct nla_policy nda_policy[];
+#define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
+
static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
{
return *(const u32 *)n->primary_key == *(const u32 *)pkey;
@@ -390,8 +392,6 @@ static inline struct net *pneigh_net(const struct pneigh_entry *pneigh)
}
void neigh_app_ns(struct neighbour *n);
-void neigh_for_each(struct neigh_table *tbl,
- void (*cb)(struct neighbour *, void *), void *cookie);
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *));
int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 45c8df801dfb..d9c458e6f627 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3120,28 +3120,6 @@ static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
-void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
-{
- int chain;
- struct neigh_hash_table *nht;
-
- rcu_read_lock();
- nht = rcu_dereference(tbl->nht);
-
- read_lock_bh(&tbl->lock); /* avoid resizes */
- for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
- struct neighbour *n;
-
- for (n = rcu_dereference(nht->hash_buckets[chain]);
- n != NULL;
- n = rcu_dereference(n->next))
- cb(n, cookie);
- }
- read_unlock_bh(&tbl->lock);
- rcu_read_unlock();
-}
-EXPORT_SYMBOL(neigh_for_each);
-
/* The tbl->lock must be held as a writer and BH disabled. */
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
--
2.46.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH net-next v5 3/6] Convert neigh_* seq_file functions to use hlist
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 1/6] Add hlist_node to struct neighbour Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 2/6] Define neigh_for_each Gilad Naaman
@ 2024-10-17 7:04 ` Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
` (2 subsequent siblings)
5 siblings, 0 replies; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Convert seq_file-related neighbour functionality to use neighbour::hash
and the related for_each macro.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
include/net/neighbour.h | 2 +
net/core/neighbour.c | 104 +++++++++++++++++++---------------------
2 files changed, 50 insertions(+), 56 deletions(-)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 37303656ab65..14f08a2e4f74 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -278,6 +278,8 @@ static inline void *neighbour_priv(const struct neighbour *n)
extern const struct nla_policy nda_policy[];
#define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
+#define neigh_first_entry(bucket) \
+ hlist_entry_safe((bucket)->first, struct neighbour, hash)
static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
{
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d9c458e6f627..e4e31d2ca2ea 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3204,43 +3204,53 @@ EXPORT_SYMBOL(neigh_xmit);
#ifdef CONFIG_PROC_FS
-static struct neighbour *neigh_get_first(struct seq_file *seq)
+static struct neighbour *neigh_get_valid(struct seq_file *seq,
+ struct neighbour *n,
+ loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
+
+ if (!net_eq(dev_net(n->dev), net))
+ return NULL;
+
+ if (state->neigh_sub_iter) {
+ loff_t fakep = 0;
+ void *v;
+
+ v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
+ if (!v)
+ return NULL;
+ if (pos)
+ return v;
+ }
+
+ if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+ return n;
+
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
+ return n;
+
+ return NULL;
+}
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+ struct neigh_seq_state *state = seq->private;
struct neigh_hash_table *nht = state->nht;
- struct neighbour *n = NULL;
- int bucket;
+ struct neighbour *n, *tmp;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference(nht->hash_buckets[bucket]);
-
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- loff_t fakep = 0;
- void *v;
- v = state->neigh_sub_iter(state, n, &fakep);
- if (!v)
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference(n->next);
+ while (++state->bucket < (1 << nht->hash_shift)) {
+ neigh_for_each(n, &nht->hash_heads[state->bucket]) {
+ tmp = neigh_get_valid(seq, n, NULL);
+ if (tmp)
+ return tmp;
}
-
- if (n)
- break;
}
- state->bucket = bucket;
- return n;
+ return NULL;
}
static struct neighbour *neigh_get_next(struct seq_file *seq,
@@ -3248,46 +3258,28 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
- struct net *net = seq_file_net(seq);
- struct neigh_hash_table *nht = state->nht;
+ struct neighbour *tmp;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
+
if (v)
return n;
}
- n = rcu_dereference(n->next);
-
- while (1) {
- while (n) {
- if (!net_eq(dev_net(n->dev), net))
- goto next;
- if (state->neigh_sub_iter) {
- void *v = state->neigh_sub_iter(state, n, pos);
- if (v)
- return n;
- goto next;
- }
- if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
- if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
- break;
-next:
- n = rcu_dereference(n->next);
+ hlist_for_each_entry_continue(n, hash) {
+ tmp = neigh_get_valid(seq, n, pos);
+ if (tmp) {
+ n = tmp;
+ goto out;
}
-
- if (n)
- break;
-
- if (++state->bucket >= (1 << nht->hash_shift))
- break;
-
- n = rcu_dereference(nht->hash_buckets[state->bucket]);
}
+ n = neigh_get_first(seq);
+out:
if (n && pos)
--(*pos);
+
return n;
}
@@ -3390,7 +3382,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
struct neigh_seq_state *state = seq->private;
state->tbl = tbl;
- state->bucket = 0;
+ state->bucket = -1;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
rcu_read_lock();
--
2.46.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH net-next v5 4/6] Convert neighbour iteration to use hlist+macro
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
` (2 preceding siblings ...)
2024-10-17 7:04 ` [PATCH net-next v5 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
@ 2024-10-17 7:04 ` Gilad Naaman
2024-10-18 11:42 ` Simon Horman
2024-10-17 7:04 ` [PATCH net-next v5 5/6] Remove bare neighbour::next pointer Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 6/6] Create netdev->neighbour association Gilad Naaman
5 siblings, 1 reply; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Remove all usage of the bare neighbour::next pointer,
replacing them with neighbour::hash and its for_each macro.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
include/net/neighbour.h | 7 +++----
net/core/neighbour.c | 34 +++++++++++++---------------------
2 files changed, 16 insertions(+), 25 deletions(-)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 14f08a2e4f74..96528a6cd74b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -278,6 +278,8 @@ static inline void *neighbour_priv(const struct neighbour *n)
extern const struct nla_policy nda_policy[];
#define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
+#define neigh_for_each_safe(pos, tmp, head) \
+ hlist_for_each_entry_safe(pos, tmp, head, hash)
#define neigh_first_entry(bucket) \
hlist_entry_safe((bucket)->first, struct neighbour, hash)
@@ -309,12 +311,9 @@ static inline struct neighbour *___neigh_lookup_noref(
u32 hash_val;
hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
- for (n = rcu_dereference(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference(n->next)) {
+ neigh_for_each(n, &nht->hash_heads[hash_val])
if (n->dev == dev && key_eq(n, pkey))
return n;
- }
return NULL;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e4e31d2ca2ea..a1dd419655a1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -391,8 +391,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
struct neighbour *n;
struct neighbour __rcu **np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each(n, &nht->hash_heads[i]) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
@@ -621,11 +620,9 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
struct neighbour *n, *next;
+ struct hlist_node *tmp;
- for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
- lockdep_is_held(&tbl->lock));
- n != NULL;
- n = next) {
+ neigh_for_each_safe(n, tmp, &old_nht->hash_heads[i]) {
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);
@@ -726,11 +723,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
goto out_tbl_unlock;
}
- for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock));
- n1 != NULL;
- n1 = rcu_dereference_protected(n1->next,
- lockdep_is_held(&tbl->lock))) {
+ neigh_for_each(n1, &nht->hash_heads[hash_val]) {
if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
if (want_ref)
neigh_hold(n1);
@@ -982,10 +975,11 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
- struct neighbour *n;
+ struct neigh_hash_table *nht;
struct neighbour __rcu **np;
+ struct hlist_node *tmp;
+ struct neighbour *n;
unsigned int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
@@ -1012,8 +1006,7 @@ static void neigh_periodic_work(struct work_struct *work)
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_safe(n, tmp, &nht->hash_heads[i]) {
unsigned int state;
write_lock(&n->lock);
@@ -2763,9 +2756,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
- n != NULL;
- n = rcu_dereference(n->next)) {
+ idx = 0;
+ neigh_for_each(n, &nht->hash_heads[h]) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -3124,18 +3116,18 @@ static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
- int chain;
struct neigh_hash_table *nht;
+ int chain;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
+ struct hlist_node *tmp;
struct neighbour __rcu **np;
np = &nht->hash_buckets[chain];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
+ neigh_for_each_safe(n, tmp, &nht->hash_heads[chain]) {
int release;
write_lock(&n->lock);
--
2.46.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH net-next v5 5/6] Remove bare neighbour::next pointer
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
` (3 preceding siblings ...)
2024-10-17 7:04 ` [PATCH net-next v5 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
@ 2024-10-17 7:04 ` Gilad Naaman
2024-10-18 4:56 ` kernel test robot
2024-10-17 7:04 ` [PATCH net-next v5 6/6] Create netdev->neighbour association Gilad Naaman
5 siblings, 1 reply; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Remove the now-unused neighbour::next pointer, leaving struct neighbour
solely with the hlist_node implementation.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
include/net/neighbour.h | 2 -
net/core/neighbour.c | 132 ++++++++--------------------------------
2 files changed, 24 insertions(+), 110 deletions(-)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 96528a6cd74b..61c85f5e1235 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -135,7 +135,6 @@ struct neigh_statistics {
#define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)
struct neighbour {
- struct neighbour __rcu *next;
struct hlist_node hash;
struct neigh_table *tbl;
struct neigh_parms *parms;
@@ -191,7 +190,6 @@ struct pneigh_entry {
#define NEIGH_NUM_HASH_RND 4
struct neigh_hash_table {
- struct neighbour __rcu **hash_buckets;
struct hlist_head *hash_heads;
unsigned int hash_shift;
__u32 hash_rnd[NEIGH_NUM_HASH_RND];
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a1dd419655a1..1c49515b850b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -205,49 +205,24 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
}
}
-static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
- struct neigh_table *tbl)
-{
- bool retval = false;
-
- write_lock(&n->lock);
- if (refcount_read(&n->refcnt) == 1) {
- struct neighbour *neigh;
-
- neigh = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(*np, neigh);
- hlist_del_rcu(&n->hash);
- neigh_mark_dead(n);
- retval = true;
- }
- write_unlock(&n->lock);
- if (retval)
- neigh_cleanup_and_release(n);
- return retval;
-}
-
bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
{
struct neigh_hash_table *nht;
- void *pkey = ndel->primary_key;
- u32 hash_val;
- struct neighbour *n;
- struct neighbour __rcu **np;
+ bool retval = false;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
- hash_val = hash_val >> (32 - nht->hash_shift);
- np = &nht->hash_buckets[hash_val];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock)))) {
- if (n == ndel)
- return neigh_del(n, np, tbl);
- np = &n->next;
+ write_lock(&ndel->lock);
+ if (refcount_read(&ndel->refcnt) == 1) {
+ hlist_del_rcu(&ndel->hash);
+ neigh_mark_dead(ndel);
+ retval = true;
}
- return false;
+ write_unlock(&ndel->lock);
+ if (retval)
+ neigh_cleanup_and_release(ndel);
+ return retval;
}
static int neigh_forced_gc(struct neigh_table *tbl)
@@ -389,20 +364,13 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
for (i = 0; i < (1 << nht->hash_shift); i++) {
struct neighbour *n;
- struct neighbour __rcu **np = &nht->hash_buckets[i];
neigh_for_each(n, &nht->hash_heads[i]) {
- if (dev && n->dev != dev) {
- np = &n->next;
+ if (dev && n->dev != dev)
continue;
- }
- if (skip_perm && n->nud_state & NUD_PERMANENT) {
- np = &n->next;
+ if (skip_perm && n->nud_state & NUD_PERMANENT)
continue;
- }
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
+
hlist_del_rcu(&n->hash);
write_lock(&n->lock);
neigh_del_timer(n);
@@ -531,9 +499,7 @@ static void neigh_get_hash_rnd(u32 *x)
static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
{
- size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head);
- size_t size = (1 << shift) * sizeof(struct neighbour *);
- struct neighbour __rcu **buckets;
+ size_t size = (1 << shift) * sizeof(struct hlist_head);
struct hlist_head *hash_heads;
struct neigh_hash_table *ret;
int i;
@@ -544,33 +510,17 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
if (!ret)
return NULL;
if (size <= PAGE_SIZE) {
- buckets = kzalloc(size, GFP_ATOMIC);
-
- if (buckets) {
- hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC);
- if (!hash_heads)
- kfree(buckets);
- }
+ hash_heads = kzalloc(size, GFP_ATOMIC);
} else {
- buckets = (struct neighbour __rcu **)
+ hash_heads = (struct hlist_head *)
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
get_order(size));
- kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
-
- if (buckets) {
- hash_heads = (struct hlist_head *)
- __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
- get_order(hash_heads_size));
- kmemleak_alloc(hash_heads, hash_heads_size, 1, GFP_ATOMIC);
- if (!hash_heads)
- free_pages((unsigned long)buckets, get_order(size));
- }
+ kmemleak_alloc(hash_heads, size, 1, GFP_ATOMIC);
}
- if (!buckets || !hash_heads) {
+ if (!hash_heads) {
kfree(ret);
return NULL;
}
- ret->hash_buckets = buckets;
ret->hash_heads = hash_heads;
ret->hash_shift = shift;
for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
@@ -583,23 +533,14 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table *nht = container_of(head,
struct neigh_hash_table,
rcu);
- size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
- struct neighbour __rcu **buckets = nht->hash_buckets;
- size_t hash_heads_size = (1 << nht->hash_shift) * sizeof(struct hlist_head);
+ size_t size = (1 << nht->hash_shift) * sizeof(struct hlist_head);
struct hlist_head *hash_heads = nht->hash_heads;
- if (size <= PAGE_SIZE) {
- kfree(buckets);
- } else {
- kmemleak_free(buckets);
- free_pages((unsigned long)buckets, get_order(size));
- }
-
- if (hash_heads_size < PAGE_SIZE) {
+ if (size < PAGE_SIZE) {
kfree(hash_heads);
} else {
kmemleak_free(hash_heads);
- free_pages((unsigned long)hash_heads, get_order(hash_heads_size));
+ free_pages((unsigned long)hash_heads, get_order(size));
}
kfree(nht);
}
@@ -619,7 +560,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
return old_nht;
for (i = 0; i < (1 << old_nht->hash_shift); i++) {
- struct neighbour *n, *next;
+ struct neighbour *n;
struct hlist_node *tmp;
neigh_for_each_safe(n, tmp, &old_nht->hash_heads[i]) {
@@ -627,14 +568,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
new_nht->hash_rnd);
hash >>= (32 - new_nht->hash_shift);
- next = rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock));
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(
- new_nht->hash_buckets[hash],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(new_nht->hash_buckets[hash], n);
hlist_del_rcu(&n->hash);
hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
}
@@ -739,10 +673,6 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
- rcu_assign_pointer(n->next,
- rcu_dereference_protected(nht->hash_buckets[hash_val],
- lockdep_is_held(&tbl->lock)));
- rcu_assign_pointer(nht->hash_buckets[hash_val], n);
hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
@@ -976,7 +906,6 @@ static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
struct neigh_hash_table *nht;
- struct neighbour __rcu **np;
struct hlist_node *tmp;
struct neighbour *n;
unsigned int i;
@@ -1004,7 +933,6 @@ static void neigh_periodic_work(struct work_struct *work)
goto out;
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
- np = &nht->hash_buckets[i];
neigh_for_each_safe(n, tmp, &nht->hash_heads[i]) {
unsigned int state;
@@ -1015,7 +943,7 @@ static void neigh_periodic_work(struct work_struct *work)
if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
(n->flags & NTF_EXT_LEARNED)) {
write_unlock(&n->lock);
- goto next_elt;
+ continue;
}
if (time_before(n->used, n->confirmed) &&
@@ -1026,9 +954,6 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
hlist_del_rcu(&n->hash);
neigh_mark_dead(n);
write_unlock(&n->lock);
@@ -1036,9 +961,6 @@ static void neigh_periodic_work(struct work_struct *work)
continue;
}
write_unlock(&n->lock);
-
-next_elt:
- np = &n->next;
}
/*
* It's fine to release lock here, even if hash table
@@ -3124,22 +3046,16 @@ void __neigh_for_each_release(struct neigh_table *tbl,
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;
struct hlist_node *tmp;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[chain];
neigh_for_each_safe(n, tmp, &nht->hash_heads[chain]) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- rcu_assign_pointer(*np,
- rcu_dereference_protected(n->next,
- lockdep_is_held(&tbl->lock)));
hlist_del_rcu(&n->hash);
neigh_mark_dead(n);
- } else
- np = &n->next;
+ }
write_unlock(&n->lock);
if (release)
neigh_cleanup_and_release(n);
--
2.46.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH net-next v5 6/6] Create netdev->neighbour association
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
` (4 preceding siblings ...)
2024-10-17 7:04 ` [PATCH net-next v5 5/6] Remove bare neighbour::next pointer Gilad Naaman
@ 2024-10-17 7:04 ` Gilad Naaman
5 siblings, 0 replies; 12+ messages in thread
From: Gilad Naaman @ 2024-10-17 7:04 UTC (permalink / raw)
To: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni
Cc: Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
.../networking/net_cachelines/net_device.rst | 1 +
include/linux/netdevice.h | 7 ++
include/net/neighbour.h | 9 +-
include/net/neighbour_tables.h | 12 +++
net/core/neighbour.c | 95 +++++++++++--------
5 files changed, 80 insertions(+), 44 deletions(-)
create mode 100644 include/net/neighbour_tables.h
diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index db6192b2bb50..2edb6ac1cab4 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -189,4 +189,5 @@ u64 max_pacing_offload_horizon
struct_napi_config* napi_config
unsigned_long gro_flush_timeout
u32 napi_defer_hard_irqs
+struct hlist_head neighbours[2]
=================================== =========================== =================== =================== ===================================================================================
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8feaca12655e..80bde95cc302 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
#include <net/net_trackers.h>
#include <net/net_debug.h>
#include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
struct netpoll_info;
struct device;
@@ -2034,6 +2035,9 @@ enum netdev_reg_state {
* @napi_defer_hard_irqs: If not zero, provides a counter that would
* allow to avoid NIC hard IRQ, on busy queues.
*
+ * @neighbours: List heads pointing to this device's neighbours'
+ * dev_list, one per address-family.
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@@ -2443,6 +2447,9 @@ struct net_device {
*/
struct net_shaper_hierarchy *net_shaper_hierarchy;
#endif
+
+ struct hlist_head neighbours[NEIGH_NR_TABLES];
+
u8 priv[] ____cacheline_aligned
__counted_by(priv_len);
} ____cacheline_aligned;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 61c85f5e1235..ecff9297c116 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -29,6 +29,7 @@
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
/*
* NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@ struct neigh_statistics {
struct neighbour {
struct hlist_node hash;
+ struct hlist_node dev_list;
struct neigh_table *tbl;
struct neigh_parms *parms;
unsigned long confirmed;
@@ -236,13 +238,6 @@ struct neigh_table {
struct pneigh_entry **phash_buckets;
};
-enum {
- NEIGH_ARP_TABLE = 0,
- NEIGH_ND_TABLE = 1,
- NEIGH_NR_TABLES,
- NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
static inline int neigh_parms_family(struct neigh_parms *p)
{
return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644
index 000000000000..bcffbe8f7601
--- /dev/null
+++ b/include/net/neighbour_tables.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+ NEIGH_ARP_TABLE = 0,
+ NEIGH_ND_TABLE = 1,
+ NEIGH_NR_TABLES,
+ NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1c49515b850b..2683fb68f5b5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -61,6 +61,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops;
#endif
+static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
+{
+ int i;
+
+ switch (family) {
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ fallthrough; /* to avoid panic by null-ptr-deref */
+ case AF_INET:
+ i = NEIGH_ARP_TABLE;
+ break;
+ case AF_INET6:
+ i = NEIGH_ND_TABLE;
+ break;
+ }
+
+ return &dev->neighbours[i];
+}
+
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
@@ -216,6 +235,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
write_lock(&ndel->lock);
if (refcount_read(&ndel->refcnt) == 1) {
hlist_del_rcu(&ndel->hash);
+ hlist_del_rcu(&ndel->dev_list);
neigh_mark_dead(ndel);
retval = true;
}
@@ -356,47 +376,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
- int i;
- struct neigh_hash_table *nht;
-
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
+ struct hlist_head *dev_head;
+ struct hlist_node *tmp;
+ struct neighbour *n;
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
+ dev_head = neigh_get_dev_table(dev, tbl->family);
- neigh_for_each(n, &nht->hash_heads[i]) {
- if (dev && n->dev != dev)
- continue;
- if (skip_perm && n->nud_state & NUD_PERMANENT)
- continue;
+ hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
+ if (skip_perm && n->nud_state & NUD_PERMANENT)
+ continue;
- hlist_del_rcu(&n->hash);
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- We must destroy neighbour entry,
- but someone still uses it.
-
- The destroy will be delayed until
- the last user releases us, but
- we must kill timers etc. and move
- it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ write_lock(&n->lock);
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ neigh_dbg(2, "neigh %p is stray\n", n);
}
+ write_unlock(&n->lock);
+ neigh_cleanup_and_release(n);
}
}
@@ -674,6 +689,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
if (want_ref)
neigh_hold(n);
hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+ hlist_add_head_rcu(&n->dev_list,
+ neigh_get_dev_table(dev, tbl->family));
+
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
@@ -955,6 +974,7 @@ static void neigh_periodic_work(struct work_struct *work)
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -3054,6 +3074,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
release = cb(n);
if (release) {
hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
}
write_unlock(&n->lock);
--
2.46.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH net-next v5 2/6] Define neigh_for_each
2024-10-17 7:04 ` [PATCH net-next v5 2/6] Define neigh_for_each Gilad Naaman
@ 2024-10-17 16:26 ` Petr Machata
2024-10-20 7:16 ` Gilad Naaman
0 siblings, 1 reply; 12+ messages in thread
From: Petr Machata @ 2024-10-17 16:26 UTC (permalink / raw)
To: Gilad Naaman
Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
Note about subjects: all your patches should have an appropriate subject
prefix. It looks like it could just be "net: neighbour:" for every patch.
Also giving this patch a subject "define neigh_for_each" is odd, that
function already is defined. Below I argue that reusing the name
neigh_for_each for the new helper is inappropriate. If you accept that,
you can add the helper in a separate patch and convert the open-coded
sites right away, which would be in 2/6. Then 3/6 would be the patch
that moves neigh_for_each to mlxsw and renames. (Though below I also
argue that perhaps it would be better to keep it where it is now.)
Gilad Naaman <gnaaman@drivenets.com> writes:
> Define neigh_for_each in neighbour.h and move old definition
> to its only point of usage within the mlxsw driver.
>
> Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
> ---
> .../ethernet/mellanox/mlxsw/spectrum_router.c | 24 +++++++++++++++++--
> include/net/neighbour.h | 4 ++--
> net/core/neighbour.c | 22 -----------------
> 3 files changed, 24 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> index 800dfb64ec83..de62587c5a63 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
> @@ -3006,6 +3006,26 @@ static void mlxsw_sp_neigh_rif_made_sync_each(struct neighbour *n, void *data)
> rms->err = -ENOMEM;
> }
>
> +static void mlxsw_sp_neigh_for_each(struct neigh_table *tbl,
> + void *cookie)
This is still named as if it were a generic walker, but actually it's
hardcoding the mlxsw_sp_neigh_rif_made_sync_each callback. The name
should reflect that.
E.g. mlxsw_sp_neigh_rif_made_sync_neighs.
Then it would also be good to rename mlxsw_sp_neigh_rif_made_sync_each
to mlxsw_sp_neigh_rif_made_sync_neigh as well.
> +{
> + struct neigh_hash_table *nht;
> + int chain;
> +
> + rcu_read_lock();
> + nht = rcu_dereference(tbl->nht);
> +
> + read_lock_bh(&tbl->lock); /* avoid resizes */
> + for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
> + struct neighbour *n;
> +
> + neigh_for_each(n, &nht->hash_heads[chain])
> + mlxsw_sp_neigh_rif_made_sync_each(n, cookie);
> + }
> + read_unlock_bh(&tbl->lock);
> + rcu_read_unlock();
> +}
> +
All this stuff looks like it's involved in private details of the
neighbor table implementation that IMHO a client of that module
shouldn't (have to) touch.
I'm not really sure why the function cannot stay where it is, under the
existing name, and the new function is not added under a different name.
Reusing neigh_for_each() seems inappropriate anyway, the name says "for
each neighbor", but in fact you are supposed to wrap it in this loop
over individual heads. Wouldn't it make sense to keep the existing
neigh_for_each(), and add a new helper, neigh_chain_for_each() or
something like that? And OK, call this new helper from neigh_for_each()?
Then this patch doesn't even need to exist.
> static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
> struct mlxsw_sp_rif *rif)
> {
> @@ -3014,12 +3034,12 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp,
> .rif = rif,
> };
>
> - neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
> + mlxsw_sp_neigh_for_each(&arp_tbl, &rms);
> if (rms.err)
> goto err_arp;
>
> #if IS_ENABLED(CONFIG_IPV6)
> - neigh_for_each(&nd_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms);
> + mlxsw_sp_neigh_for_each(&nd_tbl, &rms);
> #endif
> if (rms.err)
> goto err_nd;
> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
> index 0402447854c7..37303656ab65 100644
> --- a/include/net/neighbour.h
> +++ b/include/net/neighbour.h
> @@ -277,6 +277,8 @@ static inline void *neighbour_priv(const struct neighbour *n)
>
> extern const struct nla_policy nda_policy[];
>
> +#define neigh_for_each(pos, head) hlist_for_each_entry(pos, head, hash)
> +
> static inline bool neigh_key_eq32(const struct neighbour *n, const void *pkey)
> {
> return *(const u32 *)n->primary_key == *(const u32 *)pkey;
> @@ -390,8 +392,6 @@ static inline struct net *pneigh_net(const struct pneigh_entry *pneigh)
> }
>
> void neigh_app_ns(struct neighbour *n);
> -void neigh_for_each(struct neigh_table *tbl,
> - void (*cb)(struct neighbour *, void *), void *cookie);
> void __neigh_for_each_release(struct neigh_table *tbl,
> int (*cb)(struct neighbour *));
> int neigh_xmit(int fam, struct net_device *, const void *, struct sk_buff *);
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 45c8df801dfb..d9c458e6f627 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -3120,28 +3120,6 @@ static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
> return err;
> }
>
> -void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
> -{
> - int chain;
> - struct neigh_hash_table *nht;
> -
> - rcu_read_lock();
> - nht = rcu_dereference(tbl->nht);
> -
> - read_lock_bh(&tbl->lock); /* avoid resizes */
> - for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
> - struct neighbour *n;
> -
> - for (n = rcu_dereference(nht->hash_buckets[chain]);
> - n != NULL;
> - n = rcu_dereference(n->next))
> - cb(n, cookie);
> - }
> - read_unlock_bh(&tbl->lock);
> - rcu_read_unlock();
> -}
> -EXPORT_SYMBOL(neigh_for_each);
> -
> /* The tbl->lock must be held as a writer and BH disabled. */
> void __neigh_for_each_release(struct neigh_table *tbl,
> int (*cb)(struct neighbour *))
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH net-next v5 5/6] Remove bare neighbour::next pointer
2024-10-17 7:04 ` [PATCH net-next v5 5/6] Remove bare neighbour::next pointer Gilad Naaman
@ 2024-10-18 4:56 ` kernel test robot
0 siblings, 0 replies; 12+ messages in thread
From: kernel test robot @ 2024-10-18 4:56 UTC (permalink / raw)
To: Gilad Naaman, netdev, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni
Cc: oe-kbuild-all, Gilad Naaman, Kuniyuki Iwashima, Ido Schimmel,
Petr Machata
Hi Gilad,
kernel test robot noticed the following build warnings:
[auto build test WARNING on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Gilad-Naaman/Add-hlist_node-to-struct-neighbour/20241017-150629
base: net-next/main
patch link: https://lore.kernel.org/r/20241017070445.4013745-6-gnaaman%40drivenets.com
patch subject: [PATCH net-next v5 5/6] Remove bare neighbour::next pointer
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20241018/202410181237.GEcWZ6xL-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241018/202410181237.GEcWZ6xL-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410181237.GEcWZ6xL-lkp@intel.com/
All warnings (new ones prefixed by >>):
net/core/neighbour.c: In function 'neigh_remove_one':
>> net/core/neighbour.c:210:34: warning: variable 'nht' set but not used [-Wunused-but-set-variable]
210 | struct neigh_hash_table *nht;
| ^~~
vim +/nht +210 net/core/neighbour.c
207
208 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
209 {
> 210 struct neigh_hash_table *nht;
211 bool retval = false;
212
213 nht = rcu_dereference_protected(tbl->nht,
214 lockdep_is_held(&tbl->lock));
215
216 write_lock(&ndel->lock);
217 if (refcount_read(&ndel->refcnt) == 1) {
218 hlist_del_rcu(&ndel->hash);
219 neigh_mark_dead(ndel);
220 retval = true;
221 }
222 write_unlock(&ndel->lock);
223 if (retval)
224 neigh_cleanup_and_release(ndel);
225 return retval;
226 }
227
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH net-next v5 4/6] Convert neighbour iteration to use hlist+macro
2024-10-17 7:04 ` [PATCH net-next v5 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
@ 2024-10-18 11:42 ` Simon Horman
0 siblings, 0 replies; 12+ messages in thread
From: Simon Horman @ 2024-10-18 11:42 UTC (permalink / raw)
To: Gilad Naaman
Cc: netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Kuniyuki Iwashima, Ido Schimmel, Petr Machata
On Thu, Oct 17, 2024 at 07:04:39AM +0000, Gilad Naaman wrote:
> Remove all usage of the bare neighbour::next pointer,
> replacing them with neighbour::hash and its for_each macro.
>
> Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
...
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
...
> @@ -621,11 +620,9 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
>
> for (i = 0; i < (1 << old_nht->hash_shift); i++) {
> struct neighbour *n, *next;
> + struct hlist_node *tmp;
>
> - for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
> - lockdep_is_held(&tbl->lock));
> - n != NULL;
> - n = next) {
> + neigh_for_each_safe(n, tmp, &old_nht->hash_heads[i]) {
> hash = tbl->hash(n->primary_key, n->dev,
> new_nht->hash_rnd);
>
Hi Gilad,
With this change next is now set but otherwise unused in this function.
Probably it can be removed.
Flagged by W=1 x86_64 allmodconfig builds with gcc-14 and clang-18.
...
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH net-next v5 2/6] Define neigh_for_each
2024-10-17 16:26 ` Petr Machata
@ 2024-10-20 7:16 ` Gilad Naaman
2024-10-21 9:07 ` Petr Machata
0 siblings, 1 reply; 12+ messages in thread
From: Gilad Naaman @ 2024-10-20 7:16 UTC (permalink / raw)
To: petrm; +Cc: davem, edumazet, gnaaman, idosch, kuba, kuniyu, netdev, pabeni
> Note about subjects: all your patches should have an appropriate subject
> prefix. It looks like it could just be "net: neighbour:" for every patch.
As in:
git --subject-prefix="PATCH net-next: neighbour v5"
Or independently, after the prefix, as in:
[PATCH net-next vX A/B] neighbour: REST OF PATCH
> Also giving this patch a subject "define neigh_for_each" is odd, that
> function already is defined. Below I argue that reusing the name
> neigh_for_each for the new helper is inappropriate. If you accept that,
> you can add the helper in a separate patch and convert the open-coded
> sites right away, which would be in 2/6. Then 3/6 would be the patch
> that moves neigh_for_each to mlxsw and renames. (Though below I also
> argue that perhaps it would be better to keep it where it is now.)
Noted, will revert this change and rename the added macro.
Thank you for your time.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH net-next v5 2/6] Define neigh_for_each
2024-10-20 7:16 ` Gilad Naaman
@ 2024-10-21 9:07 ` Petr Machata
0 siblings, 0 replies; 12+ messages in thread
From: Petr Machata @ 2024-10-21 9:07 UTC (permalink / raw)
To: Gilad Naaman; +Cc: petrm, davem, edumazet, idosch, kuba, kuniyu, netdev, pabeni
Gilad Naaman <gnaaman@drivenets.com> writes:
>> Note about subjects: all your patches should have an appropriate subject
>> prefix. It looks like it could just be "net: neighbour:" for every patch.
>
> Or independently, after the prefix, as in:
>
> [PATCH net-next vX A/B] neighbour: REST OF PATCH
This one.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2024-10-21 9:08 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-17 7:04 [PATCH net-next v5 0/6] Improve neigh_flush_dev performance Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 1/6] Add hlist_node to struct neighbour Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 2/6] Define neigh_for_each Gilad Naaman
2024-10-17 16:26 ` Petr Machata
2024-10-20 7:16 ` Gilad Naaman
2024-10-21 9:07 ` Petr Machata
2024-10-17 7:04 ` [PATCH net-next v5 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
2024-10-17 7:04 ` [PATCH net-next v5 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
2024-10-18 11:42 ` Simon Horman
2024-10-17 7:04 ` [PATCH net-next v5 5/6] Remove bare neighbour::next pointer Gilad Naaman
2024-10-18 4:56 ` kernel test robot
2024-10-17 7:04 ` [PATCH net-next v5 6/6] Create netdev->neighbour association Gilad Naaman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).