From: Vitaliy Guschin <guschin108@gmail.com>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, kuba@kernel.org, dsahern@kernel.org,
edumazet@google.com, pabeni@redhat.com, guschin108@gmail.com
Subject: [PATCH net-next v2] net: ipv4: add lwtunnel hash to fib_info_hash to fix mpls collisions
Date: Sun, 22 Feb 2026 01:40:01 +0000 [thread overview]
Message-ID: <20260222014033.20276-1-guschin108@gmail.com> (raw)
In-Reply-To: <20260222010820.8994-1-guschin108@gmail.com>
Currently, fib_info_hash_bucket does not account for MPLS labels
(lwtunnel state) when calculating the hash for fib_info objects. This leads
to massive hash collisions when many routes are configured with the same
gateway but different MPLS labels.
To resolve this, introduce lwtunnel_get_encap_hash() helper which calls a
new .get_encap_hash callback in lwtunnel_encap_ops. Implement this callback
for mpls_iptunnel to provide a hash of the MPLS label set.
This ensures proper distribution in the fib_info_hash table, improving
route installation and deletion performance by avoiding massive hash
collisions. In a test case with 100,000 MPLS routes, this changes the
algorithmic complexity from O(N) lookup in a single bucket to a
well-distributed hash table lookup.
Performance test (Batch installation of 100,000 routes with MPLS labels):
CPU: Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz
- Before patch: 6m 0.258s (sys 5m 56.895s)
- After patch: 0m 0.879s (sys 0m 0.468s)
Signed-off-by: Vitaliy Guschin <guschin108@gmail.com>
---
Changes in v2:
- Removed unnecessary nla_total_size() call in lwtunnel_get_encap_hash logic.
include/net/lwtunnel.h | 7 +++++++
net/core/lwtunnel.c | 22 ++++++++++++++++++++++
net/ipv4/fib_semantics.c | 12 +++++++++++-
net/mpls/mpls_iptunnel.c | 13 +++++++++++++
4 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 26232f603e33..c91e4d4fa08b 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -47,6 +47,7 @@ struct lwtunnel_encap_ops {
int (*fill_encap)(struct sk_buff *skb,
struct lwtunnel_state *lwtstate);
int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+ unsigned int (*get_encap_hash)(struct lwtunnel_state *lwtstate);
int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
int (*xmit)(struct sk_buff *skb);
@@ -127,6 +128,7 @@ int lwtunnel_build_state(struct net *net, u16 encap_type,
int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
int encap_attr, int encap_type_attr);
int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate);
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -237,6 +239,11 @@ static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
return 0;
}
+static inline unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate)
+{
+ return 0;
+}
+
static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
{
return NULL;
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index f9d76d85d04f..07b01a0c1895 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -289,6 +289,28 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
}
EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
+unsigned int lwtunnel_get_encap_hash(struct lwtunnel_state *lwtstate)
+{
+ const struct lwtunnel_encap_ops *ops;
+ unsigned int hash = 0;
+
+ if (!lwtstate)
+ return 0;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->get_encap_hash))
+ hash = ops->get_encap_hash(lwtstate);
+ rcu_read_unlock();
+
+ return hash;
+}
+EXPORT_SYMBOL_GPL(lwtunnel_get_encap_hash);
+
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
const struct lwtunnel_encap_ops *ops;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0caf38e44c73..775582537561 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -325,6 +325,16 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
return val;
}
+static unsigned int fib_info_hashfn_nh(unsigned int val, const struct fib_nh *nh)
+{
+ val ^= nh->fib_nh_oif;
+
+ if (nh->fib_nh_lws)
+ val ^= lwtunnel_get_encap_hash(nh->fib_nh_lws);
+
+ return val;
+}
+
static unsigned int fib_info_hashfn_result(const struct net *net,
unsigned int val)
{
@@ -344,7 +354,7 @@ static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
val ^= fi->nh->id;
} else {
for_nexthops(fi) {
- val ^= nh->fib_nh_oif;
+ val ^= fib_info_hashfn_nh(val, nh);
} endfor_nexthops(fi)
}
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 1a1a0eb5b787..0960dfb3d633 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -259,6 +259,18 @@ static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
return nlsize;
}
+static unsigned int mpls_encap_hash(struct lwtunnel_state *lwtstate)
+{
+ struct mpls_iptunnel_encap *tun_encap_info;
+ unsigned int hash;
+
+ tun_encap_info = mpls_lwtunnel_encap(lwtstate);
+
+ hash = jhash2(tun_encap_info->label, tun_encap_info->labels, 0);
+
+ return hash;
+}
+
static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a);
@@ -281,6 +293,7 @@ static const struct lwtunnel_encap_ops mpls_iptun_ops = {
.xmit = mpls_xmit,
.fill_encap = mpls_fill_encap_info,
.get_encap_size = mpls_encap_nlsize,
+ .get_encap_hash = mpls_encap_hash,
.cmp_encap = mpls_encap_cmp,
.owner = THIS_MODULE,
};
--
2.53.0
next prev parent reply other threads:[~2026-02-22 1:40 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-22 1:05 [PATCH net-next] net: ipv4: add lwtunnel hash to fib_info_hash to fix mpls collisions Vitaliy Guschin
2026-02-22 1:40 ` Vitaliy Guschin [this message]
2026-02-22 13:11 ` Ido Schimmel
2026-02-22 15:46 ` Vitaliy Guschin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260222014033.20276-1-guschin108@gmail.com \
--to=guschin108@gmail.com \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox