netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ido Schimmel <idosch@nvidia.com>
To: <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>, <kuba@kernel.org>, <pabeni@redhat.com>,
	<edumazet@google.com>, <andrew+netdev@lunn.ch>,
	<horms@kernel.org>, <petrm@nvidia.com>, <razor@blackwall.org>,
	Ido Schimmel <idosch@nvidia.com>
Subject: [PATCH net-next 15/15] vxlan: Convert FDB table to rhashtable
Date: Tue, 15 Apr 2025 15:11:43 +0300	[thread overview]
Message-ID: <20250415121143.345227-16-idosch@nvidia.com> (raw)
In-Reply-To: <20250415121143.345227-1-idosch@nvidia.com>

FDB entries are currently stored in a hash table with a fixed number of
buckets (256), resulting in performance degradation as the number of
entries grows. Solve this by converting the driver to use rhashtable
which maintains more or less constant performance regardless of the
number of entries.

Measured transmitted packets per second using a single pktgen thread
with varying number of entries when the transmitted packet always hits
the default entry (worst case):

Number of entries | Improvement
------------------|------------
1k                | +1.12%
4k                | +9.22%
16k               | +55%
64k               | +585%
256k              | +2460%

In addition, the change reduces the size of the VXLAN device structure
from 2584 bytes to 672 bytes.

Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 drivers/net/vxlan/vxlan_core.c    | 102 ++++++++++++------------------
 drivers/net/vxlan/vxlan_private.h |   2 +-
 include/net/vxlan.h               |   2 +-
 3 files changed, 43 insertions(+), 63 deletions(-)

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 8e359cf8dbbd..a56d7239b127 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -15,6 +15,7 @@
 #include <linux/igmp.h>
 #include <linux/if_ether.h>
 #include <linux/ethtool.h>
+#include <linux/rhashtable.h>
 #include <net/arp.h>
 #include <net/ndisc.h>
 #include <net/gro.h>
@@ -63,8 +64,12 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan);
 
 static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
 
-/* salt for hash table */
-static u32 vxlan_salt __read_mostly;
+static const struct rhashtable_params vxlan_fdb_rht_params = {
+	.head_offset = offsetof(struct vxlan_fdb, rhnode),
+	.key_offset = offsetof(struct vxlan_fdb, key),
+	.key_len = sizeof(struct vxlan_fdb_key),
+	.automatic_shrinking = true,
+};
 
 static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
 {
@@ -371,62 +376,21 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
 	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
 }
 
-/* Hash Ethernet address */
-static u32 eth_hash(const unsigned char *addr)
-{
-	u64 value = get_unaligned((u64 *)addr);
-
-	/* only want 6 bytes */
-#ifdef __BIG_ENDIAN
-	value >>= 16;
-#else
-	value <<= 16;
-#endif
-	return hash_64(value, FDB_HASH_BITS);
-}
-
-u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
-{
-	/* use 1 byte of OUI and 3 bytes of NIC */
-	u32 key = get_unaligned((u32 *)(addr + 2));
-
-	return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
-}
-
-u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
-{
-	if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
-		return eth_vni_hash(mac, vni);
-	else
-		return eth_hash(mac);
-}
-
-/* Hash chain to use given mac address */
-static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
-						const u8 *mac, __be32 vni)
-{
-	return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
-}
-
 /* Look up Ethernet address in forwarding table */
 static struct vxlan_fdb *vxlan_find_mac_rcu(struct vxlan_dev *vxlan,
 					    const u8 *mac, __be32 vni)
 {
-	struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni);
-	struct vxlan_fdb *f;
+	struct vxlan_fdb_key key;
 
-	hlist_for_each_entry_rcu(f, head, hlist) {
-		if (ether_addr_equal(mac, f->key.eth_addr)) {
-			if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
-				if (vni == f->key.vni)
-					return f;
-			} else {
-				return f;
-			}
-		}
-	}
+	memset(&key, 0, sizeof(key));
+	memcpy(key.eth_addr, mac, sizeof(key.eth_addr));
+	if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA))
+		key.vni = vxlan->default_dst.remote_vni;
+	else
+		key.vni = vni;
 
-	return NULL;
+	return rhashtable_lookup(&vxlan->fdb_hash_tbl, &key,
+				 vxlan_fdb_rht_params);
 }
 
 static struct vxlan_fdb *vxlan_find_mac_tx(struct vxlan_dev *vxlan,
@@ -915,15 +879,27 @@ int vxlan_fdb_create(struct vxlan_dev *vxlan,
 	if (rc < 0)
 		goto errout;
 
+	rc = rhashtable_lookup_insert_fast(&vxlan->fdb_hash_tbl, &f->rhnode,
+					   vxlan_fdb_rht_params);
+	if (rc)
+		goto destroy_remote;
+
 	++vxlan->addrcnt;
-	hlist_add_head_rcu(&f->hlist,
-			   vxlan_fdb_head(vxlan, mac, src_vni));
 	hlist_add_head_rcu(&f->fdb_node, &vxlan->fdb_list);
 
 	*fdb = f;
 
 	return 0;
 
+destroy_remote:
+	if (rcu_access_pointer(f->nh)) {
+		list_del_rcu(&f->nh_list);
+		nexthop_put(rtnl_dereference(f->nh));
+	} else {
+		list_del(&rd->list);
+		dst_cache_destroy(&rd->dst_cache);
+		kfree(rd);
+	}
 errout:
 	kfree(f);
 	return rc;
@@ -974,7 +950,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 	}
 
 	hlist_del_init_rcu(&f->fdb_node);
-	hlist_del_rcu(&f->hlist);
+	rhashtable_remove_fast(&vxlan->fdb_hash_tbl, &f->rhnode,
+			       vxlan_fdb_rht_params);
 	list_del_rcu(&f->nh_list);
 	call_rcu(&f->rcu, vxlan_fdb_free);
 }
@@ -2898,10 +2875,14 @@ static int vxlan_init(struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	int err;
 
+	err = rhashtable_init(&vxlan->fdb_hash_tbl, &vxlan_fdb_rht_params);
+	if (err)
+		return err;
+
 	if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
 		err = vxlan_vnigroup_init(vxlan);
 		if (err)
-			return err;
+			goto err_rhashtable_destroy;
 	}
 
 	err = gro_cells_init(&vxlan->gro_cells, dev);
@@ -2920,6 +2901,8 @@ static int vxlan_init(struct net_device *dev)
 err_vnigroup_uninit:
 	if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
 		vxlan_vnigroup_uninit(vxlan);
+err_rhashtable_destroy:
+	rhashtable_destroy(&vxlan->fdb_hash_tbl);
 	return err;
 }
 
@@ -2933,6 +2916,8 @@ static void vxlan_uninit(struct net_device *dev)
 		vxlan_vnigroup_uninit(vxlan);
 
 	gro_cells_destroy(&vxlan->gro_cells);
+
+	rhashtable_destroy(&vxlan->fdb_hash_tbl);
 }
 
 /* Start ageing timer and join group when device is brought up */
@@ -3329,7 +3314,6 @@ static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
 static void vxlan_setup(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
-	unsigned int h;
 
 	eth_hw_addr_random(dev);
 	ether_setup(dev);
@@ -3362,8 +3346,6 @@ static void vxlan_setup(struct net_device *dev)
 
 	vxlan->dev = dev;
 
-	for (h = 0; h < FDB_HASH_SIZE; ++h)
-		INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
 	INIT_HLIST_HEAD(&vxlan->fdb_list);
 }
 
@@ -4944,8 +4926,6 @@ static int __init vxlan_init_module(void)
 {
 	int rc;
 
-	get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
-
 	rc = register_pernet_subsys(&vxlan_net_ops);
 	if (rc)
 		goto out1;
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
index 3ca19e7167c9..d328aed9feef 100644
--- a/drivers/net/vxlan/vxlan_private.h
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -31,7 +31,7 @@ struct vxlan_fdb_key {
 
 /* Forwarding table entry */
 struct vxlan_fdb {
-	struct hlist_node hlist;	/* linked list of entries */
+	struct rhash_head rhnode;
 	struct rcu_head	  rcu;
 	unsigned long	  updated;	/* jiffies */
 	unsigned long	  used;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 96a6c6f45c2e..e2f7ca045d3e 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -304,7 +304,7 @@ struct vxlan_dev {
 
 	struct vxlan_vni_group  __rcu *vnigrp;
 
-	struct hlist_head fdb_head[FDB_HASH_SIZE];
+	struct rhashtable fdb_hash_tbl;
 
 	struct rhashtable mdb_tbl;
 	struct hlist_head fdb_list;
-- 
2.49.0


  parent reply	other threads:[~2025-04-15 12:13 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-15 12:11 [PATCH net-next 00/15] vxlan: Convert FDB table to rhashtable Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 01/15] vxlan: Add RCU read-side critical sections in the Tx path Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 02/15] vxlan: Simplify creation of default FDB entry Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 03/15] vxlan: Insert FDB into hash table in vxlan_fdb_create() Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 04/15] vxlan: Unsplit default FDB entry creation and notification Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 05/15] vxlan: Relocate assignment of default remote device Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 06/15] vxlan: Use a single lock to protect the FDB table Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 07/15] vxlan: Add a linked list of FDB entries Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 08/15] vxlan: Use linked list to traverse " Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 09/15] vxlan: Convert FDB garbage collection to RCU Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 10/15] vxlan: Convert FDB flushing " Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 11/15] vxlan: Rename FDB Tx lookup function Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 12/15] vxlan: Create wrappers for FDB lookup Ido Schimmel
2025-04-22  8:46   ` Paolo Abeni
2025-04-23 12:21     ` Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 13/15] vxlan: Do not treat dst cache initialization errors as fatal Ido Schimmel
2025-04-22  8:49   ` Paolo Abeni
2025-04-24  8:18     ` Ido Schimmel
2025-04-15 12:11 ` [PATCH net-next 14/15] vxlan: Introduce FDB key structure Ido Schimmel
2025-04-15 12:11 ` Ido Schimmel [this message]
2025-04-15 14:15 ` [PATCH net-next 00/15] vxlan: Convert FDB table to rhashtable Nikolay Aleksandrov
2025-04-22  9:38 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250415121143.345227-16-idosch@nvidia.com \
    --to=idosch@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=petrm@nvidia.com \
    --cc=razor@blackwall.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).