public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Vitaliy Guschin <guschin108@gmail.com>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, kuba@kernel.org, dsahern@kernel.org,
	edumazet@google.com, pabeni@redhat.com, guschin108@gmail.com
Subject: [PATCH net-next v2] net: ipv4: add lwtunnel hash to fib_info_hash to fix mpls collisions
Date: Sun, 22 Feb 2026 01:40:01 +0000	[thread overview]
Message-ID: <20260222014033.20276-1-guschin108@gmail.com> (raw)
In-Reply-To: <20260222010820.8994-1-guschin108@gmail.com>

Currently, fib_info_hash_bucket does not account for MPLS labels
(lwtunnel state) when calculating the hash for fib_info objects. This leads
to massive hash collisions when many routes are configured with the same
gateway but different MPLS labels.

To resolve this, introduce lwtunnel_get_encap_hash() helper which calls a
new .get_encap_hash callback in lwtunnel_encap_ops. Implement this callback
for mpls_iptunnel to provide a hash of the MPLS label set.

This ensures proper distribution in the fib_info_hash table, improving
route installation and deletion performance by avoiding massive hash
collisions. In a test case with 100,000 MPLS routes, this changes the
algorithmic complexity from O(N) lookup in a single bucket to a
well-distributed hash table lookup.

Performance test (Batch installation of 100,000 routes with MPLS labels):
CPU: Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz

- Before patch: 6m 0.258s (sys 5m 56.895s)
- After patch:  0m 0.879s (sys 0m 0.468s)

Signed-off-by: Vitaliy Guschin <guschin108@gmail.com>
---

Changes in v2:
  - Removed unnecessary nla_total_size() call in lwtunnel_get_encap_hash logic.

 include/net/lwtunnel.h   |  7 +++++++
 net/core/lwtunnel.c      | 22 ++++++++++++++++++++++
 net/ipv4/fib_semantics.c | 12 +++++++++++-
 net/mpls/mpls_iptunnel.c | 13 +++++++++++++
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 26232f603e33..c91e4d4fa08b 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -47,6 +47,7 @@ struct lwtunnel_encap_ops {
 	int (*fill_encap)(struct sk_buff *skb,
 			  struct lwtunnel_state *lwtstate);
 	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+	unsigned int (*get_encap_hash)(struct lwtunnel_state *lwtstate);
 	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
 	int (*xmit)(struct sk_buff *skb);
 
@@ -127,6 +128,7 @@ int lwtunnel_build_state(struct net *net, u16 encap_type,
 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
 			int encap_attr, int encap_type_attr);
 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate);
 struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -237,6 +239,11 @@ static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 	return 0;
 }
 
+static inline unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate)
+{
+	return 0;
+}
+
 static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
 {
 	return NULL;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index f9d76d85d04f..07b01a0c1895 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -289,6 +289,28 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 }
 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
 
+unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate)
+{
+	const struct lwtunnel_encap_ops *ops;
+	unsigned int hash = 0;
+
+	if (!lwtstate)
+		return 0;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->get_encap_hash))
+		hash = ops->get_encap_hash(lwtstate);
+	rcu_read_unlock();
+
+	return hash;
+}
+EXPORT_SYMBOL_GPL(lwtunnel_get_encap_hash);
+
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 {
 	const struct lwtunnel_encap_ops *ops;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0caf38e44c73..775582537561 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -325,6 +325,16 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
 	return val;
 }
 
+static unsigned int fib_info_hashfn_nh(unsigned int val, const struct fib_nh *nh)
+{
+	val ^= nh->fib_nh_oif;
+
+	if (nh->fib_nh_lws)
+		val ^= lwtunnel_get_encap_hash(nh->fib_nh_lws);
+
+	return val;
+}
+
 static unsigned int fib_info_hashfn_result(const struct net *net,
 					   unsigned int val)
 {
@@ -344,7 +354,7 @@ static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
 		val ^= fi->nh->id;
 	} else {
 		for_nexthops(fi) {
-			val ^= nh->fib_nh_oif;
+			val ^= fib_info_hashfn_nh(val, nh);
 		} endfor_nexthops(fi)
 	}
 
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 1a1a0eb5b787..0960dfb3d633 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -259,6 +259,18 @@ static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
 	return nlsize;
 }
 
+static unsigned int mpls_encap_hash(struct lwtunnel_state *lwtstate)
+{
+	struct mpls_iptunnel_encap *tun_encap_info;
+	unsigned int hash;
+
+	tun_encap_info = mpls_lwtunnel_encap(lwtstate);
+
+	hash = jhash2(tun_encap_info->label, tun_encap_info->labels, 0);
+
+	return hash;
+}
+
 static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 {
 	struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a);
@@ -281,6 +293,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = {
 	.xmit = mpls_xmit,
 	.fill_encap = mpls_fill_encap_info,
 	.get_encap_size = mpls_encap_nlsize,
+	.get_encap_hash = mpls_encap_hash,
 	.cmp_encap = mpls_encap_cmp,
 	.owner = THIS_MODULE,
 };
-- 
2.53.0


  reply	other threads:[~2026-02-22  1:40 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-22  1:05 [PATCH net-next] net: ipv4: add lwtunnel hash to fib_info_hash to fix mpls collisions Vitaliy Guschin
2026-02-22  1:40 ` Vitaliy Guschin [this message]
2026-02-22 13:11 ` Ido Schimmel
2026-02-22 15:46   ` Vitaliy Guschin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260222014033.20276-1-guschin108@gmail.com \
    --to=guschin108@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox