From: Ido Schimmel <idosch@nvidia.com>
To: <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>, <kuba@kernel.org>, <pabeni@redhat.com>,
<edumazet@google.com>, <andrew+netdev@lunn.ch>,
<horms@kernel.org>, <petrm@nvidia.com>, <razor@blackwall.org>,
Ido Schimmel <idosch@nvidia.com>
Subject: [PATCH net-next 06/15] vxlan: Use a single lock to protect the FDB table
Date: Tue, 15 Apr 2025 15:11:34 +0300 [thread overview]
Message-ID: <20250415121143.345227-7-idosch@nvidia.com> (raw)
In-Reply-To: <20250415121143.345227-1-idosch@nvidia.com>
Currently, the VXLAN driver stores FDB entries in a hash table with a
fixed number of buckets (256). Subsequent patches are going to convert
this table to rhashtable with a linked list for entry traversal, as
rhashtable is more scalable.
In preparation for this conversion, move from a per-bucket spin lock to
a single spin lock that protects the entire FDB table.
The per-bucket spin locks were introduced by commit fe1e0713bbe8
("vxlan: Use FDB_HASH_SIZE hash_locks to reduce contention") citing
"huge contention when inserting/deleting vxlan_fdbs into the fdb_head".
It is not clear from the commit message which code path was holding the
spin lock for long periods of time, but the obvious suspect is the FDB
cleanup routine (vxlan_cleanup()) that periodically traverses the entire
table in order to delete aged-out entries.
This will be solved by subsequent patches that will convert the FDB
cleanup routine to traverse the linked list of FDB entries using RCU,
only acquiring the spin lock when deleting an aged-out entry.
The change reduces the size of the VXLAN device structure from 3600
bytes to 2576 bytes.
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
drivers/net/vxlan/vxlan_core.c | 82 +++++++++++------------------
drivers/net/vxlan/vxlan_vnifilter.c | 8 ++-
include/net/vxlan.h | 2 +-
3 files changed, 34 insertions(+), 58 deletions(-)
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index d45b63180714..b8edd8afda28 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -524,8 +524,8 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
return -EINVAL;
vxlan = netdev_priv(dev);
+ spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
- spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
if (f->vni == vni) {
list_for_each_entry(rdst, &f->remotes, list) {
@@ -537,12 +537,12 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
}
}
}
- spin_unlock_bh(&vxlan->hash_lock[h]);
}
+ spin_unlock_bh(&vxlan->hash_lock);
return 0;
unlock:
- spin_unlock_bh(&vxlan->hash_lock[h]);
+ spin_unlock_bh(&vxlan->hash_lock);
return rc;
}
EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
@@ -558,14 +558,14 @@ void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
return;
vxlan = netdev_priv(dev);
+ spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
- spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
if (f->vni == vni)
list_for_each_entry(rdst, &f->remotes, list)
rdst->offloaded = false;
- spin_unlock_bh(&vxlan->hash_lock[h]);
}
+ spin_unlock_bh(&vxlan->hash_lock);
}
EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
@@ -1248,7 +1248,6 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
__be16 port;
__be32 src_vni, vni;
u32 ifindex, nhid;
- u32 hash_index;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@ -1268,13 +1267,12 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
return -EAFNOSUPPORT;
- hash_index = fdb_head_index(vxlan, addr, src_vni);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex,
ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
nhid, true, extack);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
if (!err)
*notified = true;
@@ -1325,7 +1323,6 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
union vxlan_addr ip;
__be32 src_vni, vni;
u32 ifindex, nhid;
- u32 hash_index;
__be16 port;
int err;
@@ -1334,11 +1331,10 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
if (err)
return err;
- hash_index = fdb_head_index(vxlan, addr, src_vni);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
true);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
if (!err)
*notified = true;
@@ -1486,10 +1482,8 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
rdst->remote_ip = *src_ip;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
- u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
-
/* learned new entry */
- spin_lock(&vxlan->hash_lock[hash_index]);
+ spin_lock(&vxlan->hash_lock);
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
@@ -1500,7 +1494,7 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
vni,
vxlan->default_dst.remote_vni,
ifindex, NTF_SELF, 0, true, NULL);
- spin_unlock(&vxlan->hash_lock[hash_index]);
+ spin_unlock(&vxlan->hash_lock);
}
return SKB_NOT_DROPPED_YET;
@@ -2839,10 +2833,10 @@ static void vxlan_cleanup(struct timer_list *t)
if (!netif_running(vxlan->dev))
return;
+ spin_lock(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct hlist_node *p, *n;
- spin_lock(&vxlan->hash_lock[h]);
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
@@ -2864,8 +2858,8 @@ static void vxlan_cleanup(struct timer_list *t)
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
- spin_unlock(&vxlan->hash_lock[h]);
}
+ spin_unlock(&vxlan->hash_lock);
mod_timer(&vxlan->age_timer, next_timer);
}
@@ -3056,10 +3050,10 @@ static void vxlan_flush(struct vxlan_dev *vxlan,
bool match_remotes = vxlan_fdb_flush_should_match_remotes(desc);
unsigned int h;
+ spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct hlist_node *p, *n;
- spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
@@ -3079,8 +3073,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan,
vxlan_fdb_destroy(vxlan, f, true, true);
}
- spin_unlock_bh(&vxlan->hash_lock[h]);
}
+ spin_unlock_bh(&vxlan->hash_lock);
}
static const struct nla_policy vxlan_del_bulk_policy[NDA_MAX + 1] = {
@@ -3358,15 +3352,14 @@ static void vxlan_setup(struct net_device *dev)
dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
INIT_LIST_HEAD(&vxlan->next);
+ spin_lock_init(&vxlan->hash_lock);
timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
vxlan->dev = dev;
- for (h = 0; h < FDB_HASH_SIZE; ++h) {
- spin_lock_init(&vxlan->hash_lock[h]);
+ for (h = 0; h < FDB_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
- }
}
static void vxlan_ether_setup(struct net_device *dev)
@@ -3977,10 +3970,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
/* create an fdb entry for a valid default destination */
if (!vxlan_addr_any(&dst->remote_ip)) {
- u32 hash_index = fdb_head_index(vxlan, all_zeros_mac,
- dst->remote_vni);
-
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_update(vxlan, all_zeros_mac,
&dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
@@ -3990,7 +3980,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
dst->remote_vni,
dst->remote_ifindex,
NTF_SELF, 0, true, extack);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
if (err)
goto unlink;
}
@@ -4420,9 +4410,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
/* handle default dst entry */
if (rem_ip_changed) {
- u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
-
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
if (!vxlan_addr_any(&conf.remote_ip)) {
err = vxlan_fdb_update(vxlan, all_zeros_mac,
&conf.remote_ip,
@@ -4433,7 +4421,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
conf.remote_ifindex,
NTF_SELF, 0, true, extack);
if (err) {
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
netdev_adjacent_change_abort(dst->remote_dev,
lowerdev, dev);
return err;
@@ -4447,7 +4435,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
dst->remote_vni,
dst->remote_ifindex,
true);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
/* If vni filtering device, also update fdb entries of
* all vnis that were using default remote ip
@@ -4747,11 +4735,8 @@ vxlan_fdb_offloaded_set(struct net_device *dev,
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *rdst;
struct vxlan_fdb *f;
- u32 hash_index;
-
- hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
@@ -4767,7 +4752,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev,
rdst->offloaded = fdb_info->offloaded;
out:
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
}
static int
@@ -4776,13 +4761,11 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct netlink_ext_ack *extack;
- u32 hash_index;
int err;
- hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
extack = switchdev_notifier_info_to_extack(&fdb_info->info);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
NUD_REACHABLE,
NLM_F_CREATE | NLM_F_REPLACE,
@@ -4792,7 +4775,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
fdb_info->remote_ifindex,
NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
0, false, extack);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
return err;
}
@@ -4803,11 +4786,9 @@ vxlan_fdb_external_learn_del(struct net_device *dev,
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
- u32 hash_index;
int err = 0;
- hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
@@ -4821,7 +4802,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev,
fdb_info->remote_ifindex,
false);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
return err;
}
@@ -4870,18 +4851,15 @@ static void vxlan_fdb_nh_flush(struct nexthop *nh)
{
struct vxlan_fdb *fdb;
struct vxlan_dev *vxlan;
- u32 hash_index;
rcu_read_lock();
list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
vxlan = rcu_dereference(fdb->vdev);
WARN_ON(!vxlan);
- hash_index = fdb_head_index(vxlan, fdb->eth_addr,
- vxlan->default_dst.remote_vni);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
if (!hlist_unhashed(&fdb->hlist))
vxlan_fdb_destroy(vxlan, fdb, false, false);
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
}
rcu_read_unlock();
}
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index 6e6e9f05509a..e137c5bb4478 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -483,11 +483,9 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
struct netlink_ext_ack *extack)
{
struct vxlan_rdst *dst = &vxlan->default_dst;
- u32 hash_index;
int err = 0;
- hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
- spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ spin_lock_bh(&vxlan->hash_lock);
if (remote_ip && !vxlan_addr_any(remote_ip)) {
err = vxlan_fdb_update(vxlan, all_zeros_mac,
remote_ip,
@@ -499,7 +497,7 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
dst->remote_ifindex,
NTF_SELF, 0, true, extack);
if (err) {
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
return err;
}
}
@@ -512,7 +510,7 @@ static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
dst->remote_ifindex,
true);
}
- spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ spin_unlock_bh(&vxlan->hash_lock);
return err;
}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 2dd23ee2bacd..272e11708a33 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -296,7 +296,7 @@ struct vxlan_dev {
struct vxlan_rdst default_dst; /* default destination */
struct timer_list age_timer;
- spinlock_t hash_lock[FDB_HASH_SIZE];
+ spinlock_t hash_lock;
unsigned int addrcnt;
struct gro_cells gro_cells;
--
2.49.0
next prev parent reply other threads:[~2025-04-15 12:12 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-15 12:11 [PATCH net-next 00/15] vxlan: Convert FDB table to rhashtable Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 01/15] vxlan: Add RCU read-side critical sections in the Tx path Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 02/15] vxlan: Simplify creation of default FDB entry Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 03/15] vxlan: Insert FDB into hash table in vxlan_fdb_create() Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 04/15] vxlan: Unsplit default FDB entry creation and notification Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 05/15] vxlan: Relocate assignment of default remote device Ido Schimmel
2025-04-15 12:11 ` Ido Schimmel [this message]
2025-04-15 12:11 ` [PATCH net-next 07/15] vxlan: Add a linked list of FDB entries Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 08/15] vxlan: Use linked list to traverse " Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 09/15] vxlan: Convert FDB garbage collection to RCU Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 10/15] vxlan: Convert FDB flushing " Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 11/15] vxlan: Rename FDB Tx lookup function Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 12/15] vxlan: Create wrappers for FDB lookup Ido Schimmel
2025-04-22 8:46 ` Paolo Abeni
2025-04-23 12:21 ` Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 13/15] vxlan: Do not treat dst cache initialization errors as fatal Ido Schimmel
2025-04-22 8:49 ` Paolo Abeni
2025-04-24 8:18 ` Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 14/15] vxlan: Introduce FDB key structure Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 15/15] vxlan: Convert FDB table to rhashtable Ido Schimmel
2025-04-15 14:15 ` [PATCH net-next 00/15] " Nikolay Aleksandrov
2025-04-22 9:38 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250415121143.345227-7-idosch@nvidia.com \
--to=idosch@nvidia.com \
--cc=andrew+netdev@lunn.ch \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=petrm@nvidia.com \
--cc=razor@blackwall.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).