From: Gilad Naaman <gnaaman@drivenets.com>
To: netdev <netdev@vger.kernel.org>,
"David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Gilad Naaman <gnaaman@drivenets.com>,
Kuniyuki Iwashima <kuniyu@amazon.com>
Subject: [PATCH net-next v4 6/6] Create netdev->neighbour association
Date: Tue, 15 Oct 2024 16:59:26 +0000 [thread overview]
Message-ID: <20241015165929.3203216-7-gnaaman@drivenets.com> (raw)
In-Reply-To: <20241015165929.3203216-1-gnaaman@drivenets.com>
Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.
Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
.../networking/net_cachelines/net_device.rst | 1 +
include/linux/netdevice.h | 7 ++
include/net/neighbour.h | 10 +--
include/net/neighbour_tables.h | 12 +++
net/core/neighbour.c | 85 ++++++++++++-------
net/mpls/af_mpls.c | 2 +-
6 files changed, 75 insertions(+), 42 deletions(-)
create mode 100644 include/net/neighbour_tables.h
diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index db6192b2bb50..2edb6ac1cab4 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -189,4 +189,5 @@ u64 max_pacing_offload_horizon
struct_napi_config* napi_config
unsigned_long gro_flush_timeout
u32 napi_defer_hard_irqs
+struct hlist_head neighbours[2]
=================================== =========================== =================== =================== ===================================================================================
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8feaca12655e..80bde95cc302 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
#include <net/net_trackers.h>
#include <net/net_debug.h>
#include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
struct netpoll_info;
struct device;
@@ -2034,6 +2035,9 @@ enum netdev_reg_state {
* @napi_defer_hard_irqs: If not zero, provides a counter that would
* allow to avoid NIC hard IRQ, on busy queues.
*
+ * @neighbours: List heads pointing to this device's neighbours'
+ * dev_list, one per address-family.
+ *
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@@ -2443,6 +2447,9 @@ struct net_device {
*/
struct net_shaper_hierarchy *net_shaper_hierarchy;
#endif
+
+ struct hlist_head neighbours[NEIGH_NR_TABLES];
+
u8 priv[] ____cacheline_aligned
__counted_by(priv_len);
} ____cacheline_aligned;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 21c0c20a0ed5..d0bc618d4fe2 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -29,6 +29,7 @@
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
/*
* NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@ struct neigh_statistics {
struct neighbour {
struct hlist_node hash;
+ struct hlist_node dev_list;
struct neigh_table *tbl;
struct neigh_parms *parms;
unsigned long confirmed;
@@ -236,14 +238,6 @@ struct neigh_table {
struct pneigh_entry **phash_buckets;
};
-enum {
- NEIGH_ARP_TABLE = 0,
- NEIGH_ND_TABLE = 1,
- NEIGH_DN_TABLE = 2,
- NEIGH_NR_TABLES,
- NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
static inline int neigh_parms_family(struct neigh_parms *p)
{
return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644
index 000000000000..bcffbe8f7601
--- /dev/null
+++ b/include/net/neighbour_tables.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+ NEIGH_ARP_TABLE = 0,
+ NEIGH_ND_TABLE = 1,
+ NEIGH_NR_TABLES,
+ NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 61b5f0d4896a..dbfd27f79bb8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -61,6 +61,19 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops;
#endif
+static int family_to_neightbl_index(int family)
+{
+ switch (family) {
+ case AF_INET:
+ return NEIGH_ARP_TABLE;
+ case AF_INET6:
+ return NEIGH_ND_TABLE;
+ default:
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return 0; /* to avoid panic by null-ptr-deref */
+ }
+}
+
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
@@ -216,6 +229,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
write_lock(&ndel->lock);
if (refcount_read(&ndel->refcnt) == 1) {
hlist_del_rcu(&ndel->hash);
+ hlist_del_rcu(&ndel->dev_list);
neigh_mark_dead(ndel);
retval = true;
}
@@ -357,46 +371,45 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
int i;
+ struct neighbour *n;
+ struct hlist_node *tmp;
struct neigh_hash_table *nht;
+ i = family_to_neightbl_index(tbl->family);
+
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
-
- neigh_for_each(n, &nht->hash_heads[i]) {
- if (dev && n->dev != dev)
- continue;
- if (skip_perm && n->nud_state & NUD_PERMANENT)
- continue;
+ hlist_for_each_entry_safe(n, tmp, &dev->neighbours[i], dev_list) {
+ if (skip_perm && n->nud_state & NUD_PERMANENT)
+ continue;
- hlist_del_rcu(&n->hash);
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- We must destroy neighbour entry,
- but someone still uses it.
-
- The destroy will be delayed until
- the last user releases us, but
- we must kill timers etc. and move
- it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+ write_lock(&n->lock);
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+ neigh_dbg(2, "neigh %p is stray\n", n);
}
+ write_unlock(&n->lock);
+ neigh_cleanup_and_release(n);
}
}
@@ -672,6 +685,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
if (want_ref)
neigh_hold(n);
hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+ error = family_to_neightbl_index(tbl->family);
+ hlist_add_head_rcu(&n->dev_list, &dev->neighbours[error]);
+
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
@@ -953,6 +970,7 @@ static void neigh_periodic_work(struct work_struct *work)
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -3052,6 +3070,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
release = cb(n);
if (release) {
hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
}
write_unlock(&n->lock);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index df62638b6498..a0573847bc55 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1664,7 +1664,7 @@ static int nla_put_via(struct sk_buff *skb,
u8 table, const void *addr, int alen)
{
static const int table_to_family[NEIGH_NR_TABLES + 1] = {
- AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+ AF_INET, AF_INET6, AF_PACKET,
};
struct nlattr *nla;
struct rtvia *via;
--
2.46.0
next prev parent reply other threads:[~2024-10-15 16:59 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-15 16:59 [PATCH net-next v4 0/6] Improve neigh_flush_dev performance Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 1/6] Add hlist_node to struct neighbour Gilad Naaman
2024-10-15 22:49 ` Kuniyuki Iwashima
2024-10-15 16:59 ` [PATCH net-next v4 2/6] Define neigh_for_each Gilad Naaman
2024-10-15 22:57 ` Kuniyuki Iwashima
2024-10-15 16:59 ` [PATCH net-next v4 3/6] Convert neigh_* seq_file functions to use hlist Gilad Naaman
2024-10-15 23:25 ` Kuniyuki Iwashima
2024-10-16 9:11 ` Gilad Naaman
2024-10-16 20:54 ` Kuniyuki Iwashima
2024-10-17 6:31 ` Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 4/6] Convert neighbour iteration to use hlist+macro Gilad Naaman
2024-10-15 23:38 ` Kuniyuki Iwashima
2024-10-16 9:18 ` Gilad Naaman
2024-10-15 16:59 ` [PATCH net-next v4 5/6] Remove bare neighbour::next pointer Gilad Naaman
2024-10-15 16:59 ` Gilad Naaman [this message]
2024-10-15 23:54 ` [PATCH net-next v4 6/6] Create netdev->neighbour association Kuniyuki Iwashima
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241015165929.3203216-7-gnaaman@drivenets.com \
--to=gnaaman@drivenets.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=kuniyu@amazon.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).