* [PATCH v1 net-next 01/12] ipv4: fib: Use cached net in fib_inetaddr_event().
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 10:39 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 02/12] ipv4: fib: Allocate fib_info_hash[] and fib_info_laddrhash[] by kvmalloc_array() Kuniyuki Iwashima
` (11 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
net is available in fib_inetaddr_event(), let's use it.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_frontend.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 272e42d81323..6730e2034cf8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1450,7 +1450,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
fib_sync_up(dev, RTNH_F_DEAD);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
- rt_cache_flush(dev_net(dev));
+ rt_cache_flush(net);
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa, NULL);
@@ -1461,7 +1461,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
*/
fib_disable_ip(dev, event, true);
} else {
- rt_cache_flush(dev_net(dev));
+ rt_cache_flush(net);
}
break;
}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 01/12] ipv4: fib: Use cached net in fib_inetaddr_event().
2025-02-25 18:22 ` [PATCH v1 net-next 01/12] ipv4: fib: Use cached net in fib_inetaddr_event() Kuniyuki Iwashima
@ 2025-02-26 10:39 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 10:39 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:23 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> net is available in fib_inetaddr_event(), let's use it.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 02/12] ipv4: fib: Allocate fib_info_hash[] and fib_info_laddrhash[] by kvmalloc_array().
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
2025-02-25 18:22 ` [PATCH v1 net-next 01/12] ipv4: fib: Use cached net in fib_inetaddr_event() Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:08 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation Kuniyuki Iwashima
` (10 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Both fib_info_hash[] and fib_info_laddrhash[] are hash tables for
struct fib_info and are allocated by kvzmalloc() separately.
Let's replace the two kvzmalloc() calls with kvmalloc_array() to
remove the fib_info_laddrhash pointer later.
Note that fib_info_hash_alloc() allocates a new hash table based on
fib_info_hash_bits because we will remove fib_info_hash_size later.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_semantics.c | 44 ++++++++++++++++++++++++----------------
1 file changed, 26 insertions(+), 18 deletions(-)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d2cee5c314f5..a68a4eb5e0d1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -357,6 +357,19 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
return fib_info_hashfn_result(fi->fib_net, val);
}
+static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits)
+{
+ /* The second half is used for prefsrc */
+ return kvmalloc_array((1 << hash_bits) * 2,
+ sizeof(struct hlist_head *),
+ GFP_KERNEL | __GFP_ZERO);
+}
+
+static void fib_info_hash_free(struct hlist_head *head)
+{
+ kvfree(head);
+}
+
/* no metrics, only nexthop id */
static struct fib_info *fib_find_info_nh(struct net *net,
const struct fib_config *cfg)
@@ -1249,9 +1262,9 @@ fib_info_laddrhash_bucket(const struct net *net, __be32 val)
}
static void fib_info_hash_move(struct hlist_head *new_info_hash,
- struct hlist_head *new_laddrhash,
unsigned int new_size)
{
+ struct hlist_head *new_laddrhash = new_info_hash + new_size;
struct hlist_head *old_info_hash, *old_laddrhash;
unsigned int old_size = fib_info_hash_size;
unsigned int i;
@@ -1293,8 +1306,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
}
}
- kvfree(old_info_hash);
- kvfree(old_laddrhash);
+ fib_info_hash_free(old_info_hash);
}
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
@@ -1412,22 +1424,18 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
err = -ENOBUFS;
if (fib_info_cnt >= fib_info_hash_size) {
- unsigned int new_size = fib_info_hash_size << 1;
struct hlist_head *new_info_hash;
- struct hlist_head *new_laddrhash;
- size_t bytes;
-
- if (!new_size)
- new_size = 16;
- bytes = (size_t)new_size * sizeof(struct hlist_head *);
- new_info_hash = kvzalloc(bytes, GFP_KERNEL);
- new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
- if (!new_info_hash || !new_laddrhash) {
- kvfree(new_info_hash);
- kvfree(new_laddrhash);
- } else {
- fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
- }
+ unsigned int new_hash_bits;
+
+ if (!fib_info_hash_bits)
+ new_hash_bits = 4;
+ else
+ new_hash_bits = fib_info_hash_bits + 1;
+
+ new_info_hash = fib_info_hash_alloc(new_hash_bits);
+ if (new_info_hash)
+ fib_info_hash_move(new_info_hash, 1 << new_hash_bits);
+
if (!fib_info_hash_size)
goto failure;
}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 02/12] ipv4: fib: Allocate fib_info_hash[] and fib_info_laddrhash[] by kvmalloc_array().
2025-02-25 18:22 ` [PATCH v1 net-next 02/12] ipv4: fib: Allocate fib_info_hash[] and fib_info_laddrhash[] by kvmalloc_array() Kuniyuki Iwashima
@ 2025-02-26 11:08 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:08 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:24 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Both fib_info_hash[] and fib_info_laddrhash[] are hash tables for
> struct fib_info and are allocated by kvzmalloc() separately.
>
> Let's replace the two kvzmalloc() calls with kvmalloc_array() to
> remove the fib_info_laddrhash pointer later.
>
> Note that fib_info_hash_alloc() allocates a new hash table based on
> fib_info_hash_bits because we will remove fib_info_hash_size later.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
2025-02-25 18:22 ` [PATCH v1 net-next 01/12] ipv4: fib: Use cached net in fib_inetaddr_event() Kuniyuki Iwashima
2025-02-25 18:22 ` [PATCH v1 net-next 02/12] ipv4: fib: Allocate fib_info_hash[] and fib_info_laddrhash[] by kvmalloc_array() Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:10 ` Eric Dumazet
2025-02-26 17:48 ` David Ahern
2025-02-25 18:22 ` [PATCH v1 net-next 04/12] ipv4: fib: Make fib_info_hashfn() return struct hlist_head Kuniyuki Iwashima
` (9 subsequent siblings)
12 siblings, 2 replies; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
We will allocate fib_info_hash[] and fib_info_laddrhash[] for each netns.
Currently, fib_info_hash[] is allocated when the first route is added.
Let's move the first allocation to a new __net_init function.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/ip_fib.h | 2 ++
net/ipv4/fib_frontend.c | 9 ++++++++
net/ipv4/fib_semantics.c | 45 ++++++++++++++++++++++++++++------------
3 files changed, 43 insertions(+), 13 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a113c11ab56b..e3864b74e92a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -162,6 +162,8 @@ struct fib_info {
struct fib_nh fib_nh[] __counted_by(fib_nhs);
};
+int __net_init fib4_semantics_init(struct net *net);
+void __net_exit fib4_semantics_exit(struct net *net);
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rule;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6730e2034cf8..dbf84c23ca09 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1615,9 +1615,15 @@ static int __net_init fib_net_init(struct net *net)
error = ip_fib_net_init(net);
if (error < 0)
goto out;
+
+ error = fib4_semantics_init(net);
+ if (error)
+ goto out_semantics;
+
error = nl_fib_lookup_init(net);
if (error < 0)
goto out_nlfl;
+
error = fib_proc_init(net);
if (error < 0)
goto out_proc;
@@ -1627,6 +1633,8 @@ static int __net_init fib_net_init(struct net *net)
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
+ fib4_semantics_init(net);
+out_semantics:
rtnl_lock();
ip_fib_net_exit(net);
rtnl_unlock();
@@ -1637,6 +1645,7 @@ static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
+ fib4_semantics_init(net);
}
static void __net_exit fib_net_exit_batch(struct list_head *net_list)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a68a4eb5e0d1..f00ac983861a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1421,28 +1421,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
#endif
- err = -ENOBUFS;
-
if (fib_info_cnt >= fib_info_hash_size) {
+ unsigned int new_hash_bits = fib_info_hash_bits + 1;
struct hlist_head *new_info_hash;
- unsigned int new_hash_bits;
-
- if (!fib_info_hash_bits)
- new_hash_bits = 4;
- else
- new_hash_bits = fib_info_hash_bits + 1;
new_info_hash = fib_info_hash_alloc(new_hash_bits);
if (new_info_hash)
fib_info_hash_move(new_info_hash, 1 << new_hash_bits);
-
- if (!fib_info_hash_size)
- goto failure;
}
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
- if (!fi)
+ if (!fi) {
+ err = -ENOBUFS;
goto failure;
+ }
+
fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack);
if (IS_ERR(fi->fib_metrics)) {
err = PTR_ERR(fi->fib_metrics);
@@ -1863,7 +1856,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
struct fib_info *fi;
int ret = 0;
- if (!fib_info_laddrhash || local == 0)
+ if (!local)
return 0;
head = fib_info_laddrhash_bucket(net, local);
@@ -2265,3 +2258,29 @@ void fib_select_path(struct net *net, struct fib_result *res,
fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
}
}
+
+int __net_init fib4_semantics_init(struct net *net)
+{
+ unsigned int hash_bits = 4;
+
+ if (!net_eq(net, &init_net))
+ return 0;
+
+ fib_info_hash = fib_info_hash_alloc(hash_bits);
+ if (!fib_info_hash)
+ return -ENOMEM;
+
+ fib_info_hash_bits = hash_bits;
+ fib_info_hash_size = 1 << hash_bits;
+ fib_info_laddrhash = fib_info_hash + fib_info_hash_size;
+
+ return 0;
+}
+
+void __net_exit fib4_semantics_exit(struct net *net)
+{
+ if (!net_eq(net, &init_net))
+ return;
+
+ fib_info_hash_free(fib_info_hash);
+}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation.
2025-02-25 18:22 ` [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation Kuniyuki Iwashima
@ 2025-02-26 11:10 ` Eric Dumazet
2025-02-26 17:48 ` David Ahern
1 sibling, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:10 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:24 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> We will allocate fib_info_hash[] and fib_info_laddrhash[] for each netns.
>
> Currently, fib_info_hash[] is allocated when the first route is added.
>
> Let's move the first allocation to a new __net_init function.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation.
2025-02-25 18:22 ` [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation Kuniyuki Iwashima
2025-02-26 11:10 ` Eric Dumazet
@ 2025-02-26 17:48 ` David Ahern
2025-02-26 18:09 ` Eric Dumazet
1 sibling, 1 reply; 31+ messages in thread
From: David Ahern @ 2025-02-26 17:48 UTC (permalink / raw)
To: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, netdev
On 2/25/25 11:22 AM, Kuniyuki Iwashima wrote:
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 6730e2034cf8..dbf84c23ca09 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -1615,9 +1615,15 @@ static int __net_init fib_net_init(struct net *net)
> error = ip_fib_net_init(net);
> if (error < 0)
> goto out;
> +
> + error = fib4_semantics_init(net);
> + if (error)
> + goto out_semantics;
> +
> error = nl_fib_lookup_init(net);
> if (error < 0)
> goto out_nlfl;
> +
> error = fib_proc_init(net);
> if (error < 0)
> goto out_proc;
> @@ -1627,6 +1633,8 @@ static int __net_init fib_net_init(struct net *net)
> out_proc:
> nl_fib_lookup_exit(net);
> out_nlfl:
> + fib4_semantics_init(net);
_exit?
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation.
2025-02-26 17:48 ` David Ahern
@ 2025-02-26 18:09 ` Eric Dumazet
2025-02-26 18:12 ` David Ahern
0 siblings, 1 reply; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 18:09 UTC (permalink / raw)
To: David Ahern
Cc: Kuniyuki Iwashima, David S. Miller, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Wed, Feb 26, 2025 at 6:48 PM David Ahern <dsahern@kernel.org> wrote:
>
> On 2/25/25 11:22 AM, Kuniyuki Iwashima wrote:
> > diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> > index 6730e2034cf8..dbf84c23ca09 100644
> > --- a/net/ipv4/fib_frontend.c
> > +++ b/net/ipv4/fib_frontend.c
> > @@ -1615,9 +1615,15 @@ static int __net_init fib_net_init(struct net *net)
> > error = ip_fib_net_init(net);
> > if (error < 0)
> > goto out;
> > +
> > + error = fib4_semantics_init(net);
> > + if (error)
> > + goto out_semantics;
> > +
> > error = nl_fib_lookup_init(net);
> > if (error < 0)
> > goto out_nlfl;
> > +
> > error = fib_proc_init(net);
> > if (error < 0)
> > goto out_proc;
> > @@ -1627,6 +1633,8 @@ static int __net_init fib_net_init(struct net *net)
> > out_proc:
> > nl_fib_lookup_exit(net);
> > out_nlfl:
> > + fib4_semantics_init(net);
>
> _exit?
Yes, this was mentioned yesterday.
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation.
2025-02-26 18:09 ` Eric Dumazet
@ 2025-02-26 18:12 ` David Ahern
2025-02-26 18:20 ` Kuniyuki Iwashima
0 siblings, 1 reply; 31+ messages in thread
From: David Ahern @ 2025-02-26 18:12 UTC (permalink / raw)
To: Eric Dumazet
Cc: Kuniyuki Iwashima, David S. Miller, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On 2/26/25 11:09 AM, Eric Dumazet wrote:
>>> @@ -1627,6 +1633,8 @@ static int __net_init fib_net_init(struct net *net)
>>> out_proc:
>>> nl_fib_lookup_exit(net);
>>> out_nlfl:
>>> + fib4_semantics_init(net);
>>
>> _exit?
>
> Yes, this was mentioned yesterday.
not in this code path but rather fib_net_exit. Hence me pointing out
this one only
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation.
2025-02-26 18:12 ` David Ahern
@ 2025-02-26 18:20 ` Kuniyuki Iwashima
0 siblings, 0 replies; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-26 18:20 UTC (permalink / raw)
To: dsahern; +Cc: davem, edumazet, horms, kuba, kuni1840, kuniyu, netdev, pabeni
From: David Ahern <dsahern@kernel.org>
Date: Wed, 26 Feb 2025 11:12:13 -0700
> On 2/26/25 11:09 AM, Eric Dumazet wrote:
> >>> @@ -1627,6 +1633,8 @@ static int __net_init fib_net_init(struct net *net)
> >>> out_proc:
> >>> nl_fib_lookup_exit(net);
> >>> out_nlfl:
> >>> + fib4_semantics_init(net);
> >>
> >> _exit?
> >
> > Yes, this was mentioned yesterday.
>
> not in this code path but rather fib_net_exit. Hence me pointing out
> this one only
Sorry, actually I realised the path and fixed just after sending the
diff yesterday.
Will send v2 shortly.
Thank you both for the review !
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 04/12] ipv4: fib: Make fib_info_hashfn() return struct hlist_head.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (2 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 03/12] ipv4: fib: Allocate fib_info_hash[] during netns initialisation Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:13 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 05/12] ipv4: fib: Remove fib_info_laddrhash pointer Kuniyuki Iwashima
` (8 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Every time fib_info_hashfn() returns a hash value, we fetch
&fib_info_hash[hash].
Let's return the hlist_head pointer from fib_info_hashfn() and
rename it to fib_info_hash_bucket() to match a similar function,
fib_info_laddrhash_bucket().
Note that we need to move the fib_info_hash assignment earlier in
fib_info_hash_move() to use fib_info_hash_bucket() in the for loop.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_semantics.c | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f00ac983861a..18bec34645ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -338,7 +338,7 @@ static unsigned int fib_info_hashfn_result(const struct net *net,
return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits);
}
-static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
{
unsigned int val;
@@ -354,7 +354,7 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
} endfor_nexthops(fi)
}
- return fib_info_hashfn_result(fi->fib_net, val);
+ return &fib_info_hash[fib_info_hashfn_result(fi->fib_net, val)];
}
static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits)
@@ -405,12 +405,8 @@ static struct fib_info *fib_find_info_nh(struct net *net,
static struct fib_info *fib_find_info(struct fib_info *nfi)
{
- struct hlist_head *head;
+ struct hlist_head *head = fib_info_hash_bucket(nfi);
struct fib_info *fi;
- unsigned int hash;
-
- hash = fib_info_hashfn(nfi);
- head = &fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
if (!net_eq(fi->fib_net, nfi->fib_net))
@@ -1274,22 +1270,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
fib_info_hash_bits = ilog2(new_size);
+ fib_info_hash = new_info_hash;
for (i = 0; i < old_size; i++) {
- struct hlist_head *head = &fib_info_hash[i];
+ struct hlist_head *head = &old_info_hash[i];
struct hlist_node *n;
struct fib_info *fi;
- hlist_for_each_entry_safe(fi, n, head, fib_hash) {
- struct hlist_head *dest;
- unsigned int new_hash;
-
- new_hash = fib_info_hashfn(fi);
- dest = &new_info_hash[new_hash];
- hlist_add_head(&fi->fib_hash, dest);
- }
+ hlist_for_each_entry_safe(fi, n, head, fib_hash)
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
}
- fib_info_hash = new_info_hash;
fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
@@ -1573,8 +1563,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
refcount_set(&fi->fib_clntref, 1);
fib_info_cnt++;
- hlist_add_head(&fi->fib_hash,
- &fib_info_hash[fib_info_hashfn(fi)]);
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
+
if (fi->fib_prefsrc) {
struct hlist_head *head;
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 04/12] ipv4: fib: Make fib_info_hashfn() return struct hlist_head.
2025-02-25 18:22 ` [PATCH v1 net-next 04/12] ipv4: fib: Make fib_info_hashfn() return struct hlist_head Kuniyuki Iwashima
@ 2025-02-26 11:13 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:13 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:24 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Every time fib_info_hashfn() returns a hash value, we fetch
> &fib_info_hash[hash].
>
> Let's return the hlist_head pointer from fib_info_hashfn() and
> rename it to fib_info_hash_bucket() to match a similar function,
> fib_info_laddrhash_bucket().
>
> Note that we need to move the fib_info_hash assignment earlier in
> fib_info_hash_move() to use fib_info_hash_bucket() in the for loop.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Nice !
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 05/12] ipv4: fib: Remove fib_info_laddrhash pointer.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (3 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 04/12] ipv4: fib: Make fib_info_hashfn() return struct hlist_head Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:15 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 06/12] ipv4: fib: Remove fib_info_hash_size Kuniyuki Iwashima
` (7 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
We will allocate the fib_info hash tables per netns.
There are 5 global variables for fib_info hash tables:
fib_info_hash, fib_info_laddrhash, fib_info_hash_size,
fib_info_hash_bits, fib_info_cnt.
However, fib_info_laddrhash and fib_info_hash_size can be
easily calculated from fib_info_hash and fib_info_hash_bits.
Let's remove the fib_info_laddrhash pointer and instead use
fib_info_hash + (1 << fib_info_hash_bits).
While at it, fib_info_laddrhash_bucket() is moved near other
hash-table-specific functions.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_semantics.c | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 18bec34645ec..c57173516de5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -51,7 +51,6 @@
#include "fib_lookup.h"
static struct hlist_head *fib_info_hash;
-static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
@@ -357,6 +356,15 @@ static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
return &fib_info_hash[fib_info_hashfn_result(fi->fib_net, val)];
}
+static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net,
+ __be32 val)
+{
+ u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
+ fib_info_hash_bits);
+
+ return &fib_info_hash[(1 << fib_info_hash_bits) + slot];
+}
+
static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits)
{
/* The second half is used for prefsrc */
@@ -1248,26 +1256,15 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static struct hlist_head *
-fib_info_laddrhash_bucket(const struct net *net, __be32 val)
-{
- u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
- fib_info_hash_bits);
-
- return &fib_info_laddrhash[slot];
-}
-
static void fib_info_hash_move(struct hlist_head *new_info_hash,
unsigned int new_size)
{
- struct hlist_head *new_laddrhash = new_info_hash + new_size;
- struct hlist_head *old_info_hash, *old_laddrhash;
unsigned int old_size = fib_info_hash_size;
+ struct hlist_head *old_info_hash;
unsigned int i;
ASSERT_RTNL();
old_info_hash = fib_info_hash;
- old_laddrhash = fib_info_laddrhash;
fib_info_hash_size = new_size;
fib_info_hash_bits = ilog2(new_size);
fib_info_hash = new_info_hash;
@@ -1281,9 +1278,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
}
- fib_info_laddrhash = new_laddrhash;
for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &old_laddrhash[i];
+ struct hlist_head *lhead = &old_info_hash[old_size + i];
struct hlist_node *n;
struct fib_info *fi;
@@ -2262,7 +2258,6 @@ int __net_init fib4_semantics_init(struct net *net)
fib_info_hash_bits = hash_bits;
fib_info_hash_size = 1 << hash_bits;
- fib_info_laddrhash = fib_info_hash + fib_info_hash_size;
return 0;
}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 05/12] ipv4: fib: Remove fib_info_laddrhash pointer.
2025-02-25 18:22 ` [PATCH v1 net-next 05/12] ipv4: fib: Remove fib_info_laddrhash pointer Kuniyuki Iwashima
@ 2025-02-26 11:15 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:15 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:25 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> We will allocate the fib_info hash tables per netns.
>
> There are 5 global variables for fib_info hash tables:
> fib_info_hash, fib_info_laddrhash, fib_info_hash_size,
> fib_info_hash_bits, fib_info_cnt.
>
> However, fib_info_laddrhash and fib_info_hash_size can be
> easily calculated from fib_info_hash and fib_info_hash_bits.
>
> Let's remove the fib_info_laddrhash pointer and instead use
> fib_info_hash + (1 << fib_info_hash_bits).
>
> While at it, fib_info_laddrhash_bucket() is moved near other
> hash-table-specific functions.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 06/12] ipv4: fib: Remove fib_info_hash_size.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (4 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 05/12] ipv4: fib: Remove fib_info_laddrhash pointer Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:16 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 07/12] ipv4: fib: Add fib_info_hash_grow() Kuniyuki Iwashima
` (6 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
We will allocate the fib_info hash tables per netns.
There are 5 global variables for fib_info hash tables:
fib_info_hash, fib_info_laddrhash, fib_info_hash_size,
fib_info_hash_bits, fib_info_cnt.
However, fib_info_laddrhash and fib_info_hash_size can be
easily calculated from fib_info_hash and fib_info_hash_bits.
Let's remove fib_info_hash_size and use (1 << fib_info_hash_bits)
instead.
Now we need not pass the new hash table size to fib_info_hash_move().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_semantics.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c57173516de5..cf45e35a603f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -51,7 +51,6 @@
#include "fib_lookup.h"
static struct hlist_head *fib_info_hash;
-static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;
@@ -1256,17 +1255,15 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static void fib_info_hash_move(struct hlist_head *new_info_hash,
- unsigned int new_size)
+static void fib_info_hash_move(struct hlist_head *new_info_hash)
{
- unsigned int old_size = fib_info_hash_size;
+ unsigned int old_size = 1 << fib_info_hash_bits;
struct hlist_head *old_info_hash;
unsigned int i;
ASSERT_RTNL();
old_info_hash = fib_info_hash;
- fib_info_hash_size = new_size;
- fib_info_hash_bits = ilog2(new_size);
+ fib_info_hash_bits += 1;
fib_info_hash = new_info_hash;
for (i = 0; i < old_size; i++) {
@@ -1407,13 +1404,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
#endif
- if (fib_info_cnt >= fib_info_hash_size) {
- unsigned int new_hash_bits = fib_info_hash_bits + 1;
+ if (fib_info_cnt >= (1 << fib_info_hash_bits)) {
struct hlist_head *new_info_hash;
- new_info_hash = fib_info_hash_alloc(new_hash_bits);
+ new_info_hash = fib_info_hash_alloc(fib_info_hash_bits + 1);
if (new_info_hash)
- fib_info_hash_move(new_info_hash, 1 << new_hash_bits);
+ fib_info_hash_move(new_info_hash);
}
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
@@ -2257,7 +2253,6 @@ int __net_init fib4_semantics_init(struct net *net)
return -ENOMEM;
fib_info_hash_bits = hash_bits;
- fib_info_hash_size = 1 << hash_bits;
return 0;
}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 06/12] ipv4: fib: Remove fib_info_hash_size.
2025-02-25 18:22 ` [PATCH v1 net-next 06/12] ipv4: fib: Remove fib_info_hash_size Kuniyuki Iwashima
@ 2025-02-26 11:16 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:16 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:25 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> We will allocate the fib_info hash tables per netns.
>
> There are 5 global variables for fib_info hash tables:
> fib_info_hash, fib_info_laddrhash, fib_info_hash_size,
> fib_info_hash_bits, fib_info_cnt.
>
> However, fib_info_laddrhash and fib_info_hash_size can be
> easily calculated from fib_info_hash and fib_info_hash_bits.
>
> Let's remove fib_info_hash_size and use (1 << fib_info_hash_bits)
> instead.
>
> Now we need not pass the new hash table size to fib_info_hash_move().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 07/12] ipv4: fib: Add fib_info_hash_grow().
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (5 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 06/12] ipv4: fib: Remove fib_info_hash_size Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:18 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 08/12] ipv4: fib: Namespacify fib_info hash tables Kuniyuki Iwashima
` (5 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
When the number of struct fib_info exceeds the hash table size in
fib_create_info(), we try to allocate a new hash table with the
doubled size.
The allocation is done in fib_create_info(), and if successful, each
struct fib_info is moved to the new hash table by fib_info_hash_move().
Let's integrate the allocation and fib_info_hash_move() as
fib_info_hash_grow() to make the following change cleaner.
While at it, fib_info_hash_grow() is placed near other hash-table-specific
functions.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_semantics.c | 85 +++++++++++++++++++---------------------
1 file changed, 41 insertions(+), 44 deletions(-)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index cf45e35a603f..0eb583a7d772 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -377,6 +377,46 @@ static void fib_info_hash_free(struct hlist_head *head)
kvfree(head);
}
+static void fib_info_hash_grow(void)
+{
+ struct hlist_head *new_info_hash, *old_info_hash;
+ unsigned int old_size = 1 << fib_info_hash_bits;
+ unsigned int i;
+
+ if (fib_info_cnt < old_size)
+ return;
+
+ new_info_hash = fib_info_hash_alloc(fib_info_hash_bits + 1);
+ if (!new_info_hash)
+ return;
+
+ old_info_hash = fib_info_hash;
+ fib_info_hash = new_info_hash;
+ fib_info_hash_bits += 1;
+
+ for (i = 0; i < old_size; i++) {
+ struct hlist_head *head = &old_info_hash[i];
+ struct hlist_node *n;
+ struct fib_info *fi;
+
+ hlist_for_each_entry_safe(fi, n, head, fib_hash)
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
+ }
+
+ for (i = 0; i < old_size; i++) {
+ struct hlist_head *lhead = &old_info_hash[old_size + i];
+ struct hlist_node *n;
+ struct fib_info *fi;
+
+ hlist_for_each_entry_safe(fi, n, lhead, fib_lhash)
+ hlist_add_head(&fi->fib_lhash,
+ fib_info_laddrhash_bucket(fi->fib_net,
+ fi->fib_prefsrc));
+ }
+
+ fib_info_hash_free(old_info_hash);
+}
+
/* no metrics, only nexthop id */
static struct fib_info *fib_find_info_nh(struct net *net,
const struct fib_config *cfg)
@@ -1255,43 +1295,6 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static void fib_info_hash_move(struct hlist_head *new_info_hash)
-{
- unsigned int old_size = 1 << fib_info_hash_bits;
- struct hlist_head *old_info_hash;
- unsigned int i;
-
- ASSERT_RTNL();
- old_info_hash = fib_info_hash;
- fib_info_hash_bits += 1;
- fib_info_hash = new_info_hash;
-
- for (i = 0; i < old_size; i++) {
- struct hlist_head *head = &old_info_hash[i];
- struct hlist_node *n;
- struct fib_info *fi;
-
- hlist_for_each_entry_safe(fi, n, head, fib_hash)
- hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
- }
-
- for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &old_info_hash[old_size + i];
- struct hlist_node *n;
- struct fib_info *fi;
-
- hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
- struct hlist_head *ldest;
-
- ldest = fib_info_laddrhash_bucket(fi->fib_net,
- fi->fib_prefsrc);
- hlist_add_head(&fi->fib_lhash, ldest);
- }
- }
-
- fib_info_hash_free(old_info_hash);
-}
-
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
unsigned char scope)
{
@@ -1404,13 +1407,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
#endif
- if (fib_info_cnt >= (1 << fib_info_hash_bits)) {
- struct hlist_head *new_info_hash;
-
- new_info_hash = fib_info_hash_alloc(fib_info_hash_bits + 1);
- if (new_info_hash)
- fib_info_hash_move(new_info_hash);
- }
+ fib_info_hash_grow();
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
if (!fi) {
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 07/12] ipv4: fib: Add fib_info_hash_grow().
2025-02-25 18:22 ` [PATCH v1 net-next 07/12] ipv4: fib: Add fib_info_hash_grow() Kuniyuki Iwashima
@ 2025-02-26 11:18 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:18 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:26 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> When the number of struct fib_info exceeds the hash table size in
> fib_create_info(), we try to allocate a new hash table with the
> doubled size.
>
> The allocation is done in fib_create_info(), and if successful, each
> struct fib_info is moved to the new hash table by fib_info_hash_move().
>
> Let's integrate the allocation and fib_info_hash_move() as
> fib_info_hash_grow() to make the following change cleaner.
>
> While at it, fib_info_hash_grow() is placed near other hash-table-specific
> functions.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 08/12] ipv4: fib: Namespacify fib_info hash tables.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (6 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 07/12] ipv4: fib: Add fib_info_hash_grow() Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:22 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 09/12] ipv4: fib: Hold rtnl_net_lock() for ip_fib_net_exit() Kuniyuki Iwashima
` (4 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
We will convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.
Then, we need to have per-netns hash tables for struct fib_info.
Let's allocate the hash tables per netns.
fib_info_hash, fib_info_hash_bits, and fib_info_cnt are now moved
to struct netns_ipv4 and accessed with net->ipv4.fib_XXX.
Also, the netns checks are removed from fib_find_info_nh() and
fib_find_info().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/netns/ipv4.h | 3 ++
net/ipv4/fib_semantics.c | 61 +++++++++++++++++-----------------------
2 files changed, 29 insertions(+), 35 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 45ac125e8aeb..650b2dc9199f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -111,6 +111,9 @@ struct netns_ipv4 {
#endif
struct hlist_head *fib_table_hash;
struct sock *fibnl;
+ struct hlist_head *fib_info_hash;
+ unsigned int fib_info_hash_bits;
+ unsigned int fib_info_cnt;
struct sock *mc_autojoin_sk;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0eb583a7d772..dd80d2e291e4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -50,10 +50,6 @@
#include "fib_lookup.h"
-static struct hlist_head *fib_info_hash;
-static unsigned int fib_info_hash_bits;
-static unsigned int fib_info_cnt;
-
/* for_nexthops and change_nexthops only used when nexthop object
* is not set in a fib_info. The logic within can reference fib_nh.
*/
@@ -256,8 +252,7 @@ void fib_release_info(struct fib_info *fi)
ASSERT_RTNL();
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
-
- fib_info_cnt--;
+ fi->fib_net->ipv4.fib_info_cnt--;
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -333,11 +328,12 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
static unsigned int fib_info_hashfn_result(const struct net *net,
unsigned int val)
{
- return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits);
+ return hash_32(val ^ net_hash_mix(net), net->ipv4.fib_info_hash_bits);
}
static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
{
+ struct net *net = fi->fib_net;
unsigned int val;
val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
@@ -352,16 +348,18 @@ static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
} endfor_nexthops(fi)
}
- return &fib_info_hash[fib_info_hashfn_result(fi->fib_net, val)];
+ return &net->ipv4.fib_info_hash[fib_info_hashfn_result(net, val)];
}
static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net,
__be32 val)
{
- u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
- fib_info_hash_bits);
+ unsigned int hash_bits = net->ipv4.fib_info_hash_bits;
+ u32 slot;
- return &fib_info_hash[(1 << fib_info_hash_bits) + slot];
+ slot = hash_32(net_hash_mix(net) ^ (__force u32)val, hash_bits);
+
+ return &net->ipv4.fib_info_hash[(1 << hash_bits) + slot];
}
static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits)
@@ -377,22 +375,22 @@ static void fib_info_hash_free(struct hlist_head *head)
kvfree(head);
}
-static void fib_info_hash_grow(void)
+static void fib_info_hash_grow(struct net *net)
{
+ unsigned int old_size = 1 << net->ipv4.fib_info_hash_bits;
struct hlist_head *new_info_hash, *old_info_hash;
- unsigned int old_size = 1 << fib_info_hash_bits;
unsigned int i;
- if (fib_info_cnt < old_size)
+ if (net->ipv4.fib_info_cnt < old_size)
return;
- new_info_hash = fib_info_hash_alloc(fib_info_hash_bits + 1);
+ new_info_hash = fib_info_hash_alloc(net->ipv4.fib_info_hash_bits + 1);
if (!new_info_hash)
return;
- old_info_hash = fib_info_hash;
- fib_info_hash = new_info_hash;
- fib_info_hash_bits += 1;
+ old_info_hash = net->ipv4.fib_info_hash;
+ net->ipv4.fib_info_hash = new_info_hash;
+ net->ipv4.fib_info_hash_bits += 1;
for (i = 0; i < old_size; i++) {
struct hlist_head *head = &old_info_hash[i];
@@ -430,13 +428,12 @@ static struct fib_info *fib_find_info_nh(struct net *net,
(__force u32)cfg->fc_prefsrc,
cfg->fc_priority);
hash = fib_info_hashfn_result(net, hash);
- head = &fib_info_hash[hash];
+ head = &net->ipv4.fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
- if (!net_eq(fi->fib_net, net))
- continue;
if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
continue;
+
if (cfg->fc_protocol == fi->fib_protocol &&
cfg->fc_scope == fi->fib_scope &&
cfg->fc_prefsrc == fi->fib_prefsrc &&
@@ -456,10 +453,9 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
struct fib_info *fi;
hlist_for_each_entry(fi, head, fib_hash) {
- if (!net_eq(fi->fib_net, nfi->fib_net))
- continue;
if (fi->fib_nhs != nfi->fib_nhs)
continue;
+
if (nfi->fib_protocol == fi->fib_protocol &&
nfi->fib_scope == fi->fib_scope &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
@@ -1407,7 +1403,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
#endif
- fib_info_hash_grow();
+ fib_info_hash_grow(net);
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
if (!fi) {
@@ -1551,7 +1547,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
- fib_info_cnt++;
+ net->ipv4.fib_info_cnt++;
hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
if (fi->fib_prefsrc) {
@@ -2242,22 +2238,17 @@ int __net_init fib4_semantics_init(struct net *net)
{
unsigned int hash_bits = 4;
- if (!net_eq(net, &init_net))
- return 0;
-
- fib_info_hash = fib_info_hash_alloc(hash_bits);
- if (!fib_info_hash)
+ net->ipv4.fib_info_hash = fib_info_hash_alloc(hash_bits);
+ if (!net->ipv4.fib_info_hash)
return -ENOMEM;
- fib_info_hash_bits = hash_bits;
+ net->ipv4.fib_info_hash_bits = hash_bits;
+ net->ipv4.fib_info_cnt = 0;
return 0;
}
void __net_exit fib4_semantics_exit(struct net *net)
{
- if (!net_eq(net, &init_net))
- return;
-
- fib_info_hash_free(fib_info_hash);
+ fib_info_hash_free(net->ipv4.fib_info_hash);
}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 08/12] ipv4: fib: Namespacify fib_info hash tables.
2025-02-25 18:22 ` [PATCH v1 net-next 08/12] ipv4: fib: Namespacify fib_info hash tables Kuniyuki Iwashima
@ 2025-02-26 11:22 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 11:22 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:26 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> We will convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.
> Then, we need to have per-netns hash tables for struct fib_info.
>
> Let's allocate the hash tables per netns.
>
> fib_info_hash, fib_info_hash_bits, and fib_info_cnt are now moved
> to struct netns_ipv4 and accessed with net->ipv4.fib_XXX.
>
> Also, the netns checks are removed from fib_find_info_nh() and
> fib_find_info().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 09/12] ipv4: fib: Hold rtnl_net_lock() for ip_fib_net_exit().
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (7 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 08/12] ipv4: fib: Namespacify fib_info hash tables Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 11:23 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 10/12] ipv4: fib: Hold rtnl_net_lock() in ip_rt_ioctl() Kuniyuki Iwashima
` (3 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
ip_fib_net_exit() requires RTNL and is called from fib_net_init()
and fib_net_exit_batch().
Let's hold rtnl_net_lock() before ip_fib_net_exit().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_frontend.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index dbf84c23ca09..ad3a36bc928b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1575,7 +1575,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1635,9 +1635,9 @@ static int __net_init fib_net_init(struct net *net)
out_nlfl:
fib4_semantics_init(net);
out_semantics:
- rtnl_lock();
+ rtnl_net_lock(net);
ip_fib_net_exit(net);
- rtnl_unlock();
+ rtnl_net_unlock(net);
goto out;
}
@@ -1653,9 +1653,11 @@ static void __net_exit fib_net_exit_batch(struct list_head *net_list)
struct net *net;
rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
+ list_for_each_entry(net, net_list, exit_list) {
+ __rtnl_net_lock(net);
ip_fib_net_exit(net);
-
+ __rtnl_net_unlock(net);
+ }
rtnl_unlock();
}
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* [PATCH v1 net-next 10/12] ipv4: fib: Hold rtnl_net_lock() in ip_rt_ioctl().
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (8 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 09/12] ipv4: fib: Hold rtnl_net_lock() for ip_fib_net_exit() Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 14:42 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 11/12] ipv4: fib: Move fib_valid_key_len() to rtm_to_fib_config() Kuniyuki Iwashima
` (2 subsequent siblings)
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
ioctl(SIOCADDRT/SIOCDELRT) calls ip_rt_ioctl() to add/remove a route in
the netns of the specified socket.
Let's hold rtnl_net_lock() there.
Note that rtentry_to_fib_config() can be called without rtnl_net_lock()
if we convert rtentry.dev handling to RCU later.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_frontend.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ad3a36bc928b..180c1944c064 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -553,18 +553,16 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
const struct in_ifaddr *ifa;
struct in_device *in_dev;
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (!in_dev)
return -ENODEV;
*colon = ':';
- rcu_read_lock();
- in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) {
if (strcmp(ifa->ifa_label, devname) == 0)
break;
}
- rcu_read_unlock();
if (!ifa)
return -ENODEV;
@@ -635,7 +633,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- rtnl_lock();
+ rtnl_net_lock(net);
err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
@@ -659,7 +657,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
/* allocated by rtentry_to_fib_config() */
kfree(cfg.fc_mx);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
return -EINVAL;
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 10/12] ipv4: fib: Hold rtnl_net_lock() in ip_rt_ioctl().
2025-02-25 18:22 ` [PATCH v1 net-next 10/12] ipv4: fib: Hold rtnl_net_lock() in ip_rt_ioctl() Kuniyuki Iwashima
@ 2025-02-26 14:42 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 14:42 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:27 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> ioctl(SIOCADDRT/SIOCDELRT) calls ip_rt_ioctl() to add/remove a route in
> the netns of the specified socket.
>
> Let's hold rtnl_net_lock() there.
>
> Note that rtentry_to_fib_config() can be called without rtnl_net_lock()
> if we convert rtentry.dev handling to RCU later.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 11/12] ipv4: fib: Move fib_valid_key_len() to rtm_to_fib_config().
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (9 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 10/12] ipv4: fib: Hold rtnl_net_lock() in ip_rt_ioctl() Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 14:43 ` Eric Dumazet
2025-02-25 18:22 ` [PATCH v1 net-next 12/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
2025-02-26 0:24 ` [PATCH v1 net-next 00/12] " Jakub Kicinski
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
fib_valid_key_len() is called in the beginning of fib_table_insert()
or fib_table_delete() to check if the prefix length is valid.
fib_table_insert() and fib_table_delete() are called from 3 paths
- ip_rt_ioctl()
- inet_rtm_newroute() / inet_rtm_delroute()
- fib_magic()
In the first ioctl() path, rtentry_to_fib_config() checks the prefix
length with bad_mask(). Also, fib_magic() always passes the correct
prefix: 32 or ifa->ifa_prefixlen, which is already validated.
Let's move fib_valid_key_len() to the rtnetlink path, rtm_to_fib_config().
While at it, 2 direct returns in rtm_to_fib_config() are changed to
goto to match other places in the same function
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_frontend.c | 18 ++++++++++++++++--
net/ipv4/fib_trie.c | 22 ----------------------
2 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 180c1944c064..00c20dc021ce 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -835,19 +835,33 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
+ if (cfg->fc_dst_len > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
if (cfg->fc_nh_id) {
if (cfg->fc_oif || cfg->fc_gw_family ||
cfg->fc_encap || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Nexthop specification and nexthop id are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
}
if (has_gw && has_via) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
if (!cfg->fc_table)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d6411ac81096..59a6f0a9638f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1187,22 +1187,6 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
return 0;
}
-static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
-{
- if (plen > KEYLENGTH) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- return false;
- }
-
- if ((plen < KEYLENGTH) && (key << plen)) {
- NL_SET_ERR_MSG(extack,
- "Invalid prefix for given prefix length");
- return false;
- }
-
- return true;
-}
-
static void fib_remove_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *old);
@@ -1223,9 +1207,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
fi = fib_create_info(cfg, extack);
@@ -1717,9 +1698,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
l = fib_find_node(t, &tp, key);
if (!l)
return -ESRCH;
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 11/12] ipv4: fib: Move fib_valid_key_len() to rtm_to_fib_config().
2025-02-25 18:22 ` [PATCH v1 net-next 11/12] ipv4: fib: Move fib_valid_key_len() to rtm_to_fib_config() Kuniyuki Iwashima
@ 2025-02-26 14:43 ` Eric Dumazet
0 siblings, 0 replies; 31+ messages in thread
From: Eric Dumazet @ 2025-02-26 14:43 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Jakub Kicinski, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, Feb 25, 2025 at 7:27 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> fib_valid_key_len() is called in the beginning of fib_table_insert()
> or fib_table_delete() to check if the prefix length is valid.
>
> fib_table_insert() and fib_table_delete() are called from 3 paths
>
> - ip_rt_ioctl()
> - inet_rtm_newroute() / inet_rtm_delroute()
> - fib_magic()
>
> In the first ioctl() path, rtentry_to_fib_config() checks the prefix
> length with bad_mask(). Also, fib_magic() always passes the correct
> prefix: 32 or ifa->ifa_prefixlen, which is already validated.
>
> Let's move fib_valid_key_len() to the rtnetlink path, rtm_to_fib_config().
>
> While at it, 2 direct returns in rtm_to_fib_config() are changed to
> goto to match other places in the same function
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v1 net-next 12/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (10 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 11/12] ipv4: fib: Move fib_valid_key_len() to rtm_to_fib_config() Kuniyuki Iwashima
@ 2025-02-25 18:22 ` Kuniyuki Iwashima
2025-02-26 14:45 ` Eric Dumazet
2025-02-26 0:24 ` [PATCH v1 net-next 00/12] " Jakub Kicinski
12 siblings, 1 reply; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-25 18:22 UTC (permalink / raw)
To: David S. Miller, David Ahern, Eric Dumazet, Jakub Kicinski,
Paolo Abeni, Simon Horman
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
We converted fib_info hash tables to per-netns one and now ready to
convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.
Let's hold rtnl_net_lock() in inet_rtm_newroute() and inet_rtm_delroute().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/ipv4/fib_frontend.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 00c20dc021ce..b9ead0257340 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -884,20 +884,24 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ rtnl_net_lock(net);
+
if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
err = -EINVAL;
- goto errout;
+ goto unlock;
}
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
NL_SET_ERR_MSG(extack, "FIB table does not exist");
err = -ESRCH;
- goto errout;
+ goto unlock;
}
err = fib_table_delete(net, tb, &cfg, extack);
+unlock:
+ rtnl_net_unlock(net);
errout:
return err;
}
@@ -914,15 +918,20 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ rtnl_net_lock(net);
+
tb = fib_new_table(net, cfg.fc_table);
if (!tb) {
err = -ENOBUFS;
- goto errout;
+ goto unlock;
}
err = fib_table_insert(net, tb, &cfg, extack);
if (!err && cfg.fc_type == RTN_LOCAL)
net->ipv4.fib_has_custom_local_routes = true;
+
+unlock:
+ rtnl_net_unlock(net);
errout:
return err;
}
@@ -1681,9 +1690,9 @@ static struct pernet_operations fib_net_ops = {
static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = {
{.protocol = PF_INET, .msgtype = RTM_NEWROUTE,
- .doit = inet_rtm_newroute},
+ .doit = inet_rtm_newroute, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_DELROUTE,
- .doit = inet_rtm_delroute},
+ .doit = inet_rtm_delroute, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib,
.flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
};
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.
2025-02-25 18:22 [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
` (11 preceding siblings ...)
2025-02-25 18:22 ` [PATCH v1 net-next 12/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL Kuniyuki Iwashima
@ 2025-02-26 0:24 ` Jakub Kicinski
2025-02-26 0:46 ` Kuniyuki Iwashima
12 siblings, 1 reply; 31+ messages in thread
From: Jakub Kicinski @ 2025-02-26 0:24 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, David Ahern, Eric Dumazet, Paolo Abeni,
Simon Horman, Kuniyuki Iwashima, netdev
On Tue, 25 Feb 2025 10:22:38 -0800 Kuniyuki Iwashima wrote:
> Patch 1 is a misc cleanup.
> Patch 2 ~ 8 converts two fib_info hash tables to per-netns.
> Patch 9 ~ 12 converts rtnl_lock() to rtnl_net_lcok().
Breaks quite a few tests :(
unreferenced object 0xffff88800bfc6800 (size 256):
comm "ip", pid 577, jiffies 4294699578
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace (crc 0):
__kmalloc_node_noprof+0x35d/0x4a0
fib4_semantics_init+0x25/0xf0
fib_net_init+0x17e/0x340
ops_init+0x189/0x550
setup_net+0x189/0x750
copy_net_ns+0x1f7/0x340
create_new_namespaces+0x35f/0x920
unshare_nsproxy_namespaces+0x8d/0x130
ksys_unshare+0x2a9/0x660
__x64_sys_unshare+0x31/0x40
do_syscall_64+0xc1/0x1d0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
--
pw-bot: cr
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v1 net-next 00/12] ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.
2025-02-26 0:24 ` [PATCH v1 net-next 00/12] " Jakub Kicinski
@ 2025-02-26 0:46 ` Kuniyuki Iwashima
0 siblings, 0 replies; 31+ messages in thread
From: Kuniyuki Iwashima @ 2025-02-26 0:46 UTC (permalink / raw)
To: kuba; +Cc: davem, dsahern, edumazet, horms, kuni1840, kuniyu, netdev, pabeni
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 25 Feb 2025 16:24:48 -0800
> On Tue, 25 Feb 2025 10:22:38 -0800 Kuniyuki Iwashima wrote:
> > Patch 1 is a misc cleanup.
> > Patch 2 ~ 8 converts two fib_info hash tables to per-netns.
> > Patch 9 ~ 12 converts rtnl_lock() to rtnl_net_lcok().
>
> Breaks quite a few tests :(
Oh, sorry... why I didn't notice this silly mistake :/
I enabled kmemleak on my debug config.
Will fix in v2.
---8<---
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b9ead0257340..34cfea5c127b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1666,7 +1666,7 @@ static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
- fib4_semantics_init(net);
+ fib4_semantics_exit(net);
}
static void __net_exit fib_net_exit_batch(struct list_head *net_list)
---8<---
>
> unreferenced object 0xffff88800bfc6800 (size 256):
> comm "ip", pid 577, jiffies 4294699578
> hex dump (first 32 bytes):
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
> backtrace (crc 0):
> __kmalloc_node_noprof+0x35d/0x4a0
> fib4_semantics_init+0x25/0xf0
> fib_net_init+0x17e/0x340
^ permalink raw reply related [flat|nested] 31+ messages in thread