* [PATCH v2 net-next 1/4] ipv4: Link IPv4 address to per-netns hash table.
2024-10-04 19:59 [PATCH v2 net-next 0/4] ipv4: Namespacify IPv4 address hash table Kuniyuki Iwashima
@ 2024-10-04 19:59 ` Kuniyuki Iwashima
2024-10-04 19:59 ` [PATCH v2 net-next 2/4] ipv4: Use per-netns hash table in inet_lookup_ifaddr_rcu() Kuniyuki Iwashima
` (2 subsequent siblings)
3 siblings, 0 replies; 8+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-04 19:59 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
David Ahern
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
As a prep for per-netns RTNL conversion, we want to namespacify
the IPv4 address hash table and the GC work.
Let's allocate the per-netns IPv4 address hash table to
net->ipv4.inet_addr_lst and link IPv4 addresses into it.
The actual users will be converted later.
Note that the IPv6 address hash table is already namespacified.
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/linux/inetdevice.h | 1 +
include/net/netns/ipv4.h | 1 +
net/ipv4/devinet.c | 22 +++++++++++++++++++---
3 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index cb5280e6cc21..d0c2bf67a9b0 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -142,6 +142,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
struct in_ifaddr {
struct hlist_node hash;
+ struct hlist_node addr_lst;
struct in_ifaddr __rcu *ifa_next;
struct in_device *ifa_dev;
struct rcu_head rcu_head;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 276f622f3516..29eba2eaaa26 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -270,5 +270,6 @@ struct netns_ipv4 {
atomic_t rt_genid;
siphash_key_t ip_id_key;
+ struct hlist_head *inet_addr_lst;
};
#endif
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ab76744383cf..059807a627a6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -134,11 +134,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
ASSERT_RTNL();
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
+ hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
}
static void inet_hash_remove(struct in_ifaddr *ifa)
{
ASSERT_RTNL();
+ hlist_del_init_rcu(&ifa->addr_lst);
hlist_del_init_rcu(&ifa->hash);
}
@@ -228,6 +230,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
ifa->ifa_dev = in_dev;
INIT_HLIST_NODE(&ifa->hash);
+ INIT_HLIST_NODE(&ifa->addr_lst);
return ifa;
}
@@ -2663,14 +2666,21 @@ static struct ctl_table ctl_forward_entry[] = {
static __net_init int devinet_init_net(struct net *net)
{
- int err;
- struct ipv4_devconf *all, *dflt;
#ifdef CONFIG_SYSCTL
- struct ctl_table *tbl;
struct ctl_table_header *forw_hdr;
+ struct ctl_table *tbl;
#endif
+ struct ipv4_devconf *all, *dflt;
+ int err;
+ int i;
err = -ENOMEM;
+ net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!net->ipv4.inet_addr_lst)
+ goto err_alloc_hash;
+
all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
if (!all)
goto err_alloc_all;
@@ -2731,6 +2741,9 @@ static __net_init int devinet_init_net(struct net *net)
net->ipv4.forw_hdr = forw_hdr;
#endif
+ for (i = 0; i < IN4_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
+
net->ipv4.devconf_all = all;
net->ipv4.devconf_dflt = dflt;
return 0;
@@ -2748,6 +2761,8 @@ static __net_init int devinet_init_net(struct net *net)
err_alloc_dflt:
kfree(all);
err_alloc_all:
+ kfree(net->ipv4.inet_addr_lst);
+err_alloc_hash:
return err;
}
@@ -2766,6 +2781,7 @@ static __net_exit void devinet_exit_net(struct net *net)
#endif
kfree(net->ipv4.devconf_dflt);
kfree(net->ipv4.devconf_all);
+ kfree(net->ipv4.inet_addr_lst);
}
static __net_initdata struct pernet_operations devinet_ops = {
--
2.30.2
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 net-next 3/4] ipv4: Namespacify IPv4 address GC.
2024-10-04 19:59 [PATCH v2 net-next 0/4] ipv4: Namespacify IPv4 address hash table Kuniyuki Iwashima
2024-10-04 19:59 ` [PATCH v2 net-next 1/4] ipv4: Link IPv4 address to per-netns " Kuniyuki Iwashima
2024-10-04 19:59 ` [PATCH v2 net-next 2/4] ipv4: Use per-netns hash table in inet_lookup_ifaddr_rcu() Kuniyuki Iwashima
@ 2024-10-04 19:59 ` Kuniyuki Iwashima
2024-10-04 19:59 ` [PATCH v2 net-next 4/4] ipv4: Retire global IPv4 hash table inet_addr_lst Kuniyuki Iwashima
3 siblings, 0 replies; 8+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-04 19:59 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
David Ahern
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Each IPv4 address could have a lifetime, which is useful for DHCP,
and GC is periodically executed as check_lifetime_work.
check_lifetime() does the actual GC under RTNL.
1. Acquire RTNL
2. Iterate inet_addr_lst
3. Remove IPv4 address if expired
4. Release RTNL
Namespacifying the GC is required for per-netns RTNL, but using the
per-netns hash table will shorten the time on the hash bucket iteration
under RTNL.
Let's add per-netns GC work and use the per-netns hash table.
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/netns/ipv4.h | 1 +
net/ipv4/devinet.c | 32 ++++++++++++++++++--------------
2 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 29eba2eaaa26..66a4cffc44ee 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -271,5 +271,6 @@ struct netns_ipv4 {
atomic_t rt_genid;
siphash_key_t ip_id_key;
struct hlist_head *inet_addr_lst;
+ struct delayed_work addr_chk_work;
};
#endif
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cf47b5ac061f..ac245944e89e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -486,15 +486,12 @@ static void inet_del_ifa(struct in_device *in_dev,
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
}
-static void check_lifetime(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
-
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
u32 portid, struct netlink_ext_ack *extack)
{
struct in_ifaddr __rcu **last_primary, **ifap;
struct in_device *in_dev = ifa->ifa_dev;
+ struct net *net = dev_net(in_dev->dev);
struct in_validator_info ivi;
struct in_ifaddr *ifa1;
int ret;
@@ -563,8 +560,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
inet_hash_insert(dev_net(in_dev->dev), ifa);
- cancel_delayed_work(&check_lifetime_work);
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
+ cancel_delayed_work(&net->ipv4.addr_chk_work);
+ queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
@@ -710,16 +707,19 @@ static void check_lifetime(struct work_struct *work)
unsigned long now, next, next_sec, next_sched;
struct in_ifaddr *ifa;
struct hlist_node *n;
+ struct net *net;
int i;
+ net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+ struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
bool change_needed = false;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
+ hlist_for_each_entry_rcu(ifa, head, addr_lst) {
unsigned long age, tstamp;
u32 preferred_lft;
u32 valid_lft;
@@ -757,7 +757,7 @@ static void check_lifetime(struct work_struct *work)
if (!change_needed)
continue;
rtnl_lock();
- hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
+ hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
unsigned long age;
if (ifa->ifa_flags & IFA_F_PERMANENT)
@@ -806,8 +806,8 @@ static void check_lifetime(struct work_struct *work)
if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
- next_sched - now);
+ queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
+ next_sched - now);
}
static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
@@ -1004,9 +1004,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifa->ifa_proto = new_proto;
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
- cancel_delayed_work(&check_lifetime_work);
+ cancel_delayed_work(&net->ipv4.addr_chk_work);
queue_delayed_work(system_power_efficient_wq,
- &check_lifetime_work, 0);
+ &net->ipv4.addr_chk_work, 0);
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
}
return 0;
@@ -2743,6 +2743,8 @@ static __net_init int devinet_init_net(struct net *net)
for (i = 0; i < IN4_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
+ INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
+
net->ipv4.devconf_all = all;
net->ipv4.devconf_dflt = dflt;
return 0;
@@ -2769,7 +2771,11 @@ static __net_exit void devinet_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
const struct ctl_table *tbl;
+#endif
+
+ cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
+#ifdef CONFIG_SYSCTL
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
@@ -2806,8 +2812,6 @@ void __init devinet_init(void)
register_pernet_subsys(&devinet_ops);
register_netdevice_notifier(&ip_netdev_notifier);
- queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
-
rtnl_af_register(&inet_af_ops);
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
--
2.30.2
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 net-next 4/4] ipv4: Retire global IPv4 hash table inet_addr_lst.
2024-10-04 19:59 [PATCH v2 net-next 0/4] ipv4: Namespacify IPv4 address hash table Kuniyuki Iwashima
` (2 preceding siblings ...)
2024-10-04 19:59 ` [PATCH v2 net-next 3/4] ipv4: Namespacify IPv4 address GC Kuniyuki Iwashima
@ 2024-10-04 19:59 ` Kuniyuki Iwashima
2024-10-08 11:10 ` Paolo Abeni
3 siblings, 1 reply; 8+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-04 19:59 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
David Ahern
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
No one uses inet_addr_lst anymore, so let's remove it.
While at it, we can remove net_hash_mix() from the hash calculation.
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/linux/inetdevice.h | 1 -
net/ipv4/devinet.c | 14 +-------------
2 files changed, 1 insertion(+), 14 deletions(-)
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index d0c2bf67a9b0..d9c690c8c80b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -141,7 +141,6 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
ARP_EVICT_NOCARRIER)
struct in_ifaddr {
- struct hlist_node hash;
struct hlist_node addr_lst;
struct in_ifaddr __rcu *ifa_next;
struct in_device *ifa_dev;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac245944e89e..6cdecee96cf5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -119,13 +119,9 @@ struct inet_fill_args {
#define IN4_ADDR_HSIZE_SHIFT 8
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
-static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
-
static u32 inet_addr_hash(const struct net *net, __be32 addr)
{
- u32 val = (__force u32) addr ^ net_hash_mix(net);
-
- return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
+ return hash_32((__force u32)addr, IN4_ADDR_HSIZE_SHIFT);
}
static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
@@ -133,7 +129,6 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
u32 hash = inet_addr_hash(net, ifa->ifa_local);
ASSERT_RTNL();
- hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
}
@@ -141,7 +136,6 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
{
ASSERT_RTNL();
hlist_del_init_rcu(&ifa->addr_lst);
- hlist_del_init_rcu(&ifa->hash);
}
/**
@@ -228,7 +222,6 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
in_dev_hold(in_dev);
ifa->ifa_dev = in_dev;
- INIT_HLIST_NODE(&ifa->hash);
INIT_HLIST_NODE(&ifa->addr_lst);
return ifa;
@@ -2804,11 +2797,6 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = {
void __init devinet_init(void)
{
- int i;
-
- for (i = 0; i < IN4_ADDR_HSIZE; i++)
- INIT_HLIST_HEAD(&inet_addr_lst[i]);
-
register_pernet_subsys(&devinet_ops);
register_netdevice_notifier(&ip_netdev_notifier);
--
2.30.2
^ permalink raw reply related [flat|nested] 8+ messages in thread