* [PATCH v2 net-next 1/4] Revert "rtnetlink: add guard for RTNL"
2024-10-02 15:12 [PATCH v2 net-next 0/4] rtnetlink: Per-netns RTNL Kuniyuki Iwashima
@ 2024-10-02 15:12 ` Kuniyuki Iwashima
2024-10-02 15:14 ` Eric Dumazet
2024-10-02 15:12 ` [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL Kuniyuki Iwashima
` (2 subsequent siblings)
3 siblings, 1 reply; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-02 15:12 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
This reverts commit 464eb03c4a7cfb32cb3324249193cf6bb5b35152.
Once we have a per-netns RTNL, we won't use guard(rtnl).
Also, there's no users for now.
$ grep -rnI "guard(rtnl" || true
$
Suggested-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/netdev/CANn89i+KoYzUH+VPLdGmLABYf5y4TW0hrM4UAeQQJ9AREty0iw@mail.gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/linux/rtnetlink.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a7da7dfc06a2..cdfc897f1e3c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -7,7 +7,6 @@
#include <linux/netdevice.h>
#include <linux/wait.h>
#include <linux/refcount.h>
-#include <linux/cleanup.h>
#include <uapi/linux/rtnetlink.h>
extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
@@ -47,8 +46,6 @@ extern int rtnl_is_locked(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
-DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock())
-
extern wait_queue_head_t netdev_unregistering_wq;
extern atomic_t dev_unreg_count;
extern struct rw_semaphore pernet_ops_rwsem;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-02 15:12 [PATCH v2 net-next 0/4] rtnetlink: Per-netns RTNL Kuniyuki Iwashima
2024-10-02 15:12 ` [PATCH v2 net-next 1/4] Revert "rtnetlink: add guard for RTNL" Kuniyuki Iwashima
@ 2024-10-02 15:12 ` Kuniyuki Iwashima
2024-10-04 11:08 ` Eric Dumazet
2024-10-04 20:21 ` Jakub Kicinski
2024-10-02 15:12 ` [PATCH v2 net-next 3/4] rtnetlink: Add assertion helpers for " Kuniyuki Iwashima
2024-10-02 15:12 ` [PATCH v2 net-next 4/4] rtnetlink: Add ASSERT_RTNL_NET() placeholder for netdev notifier Kuniyuki Iwashima
3 siblings, 2 replies; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-02 15:12 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
The goal is to break RTNL down into per-netns mutex.
This patch adds per-netns mutex and its helper functions, rtnl_net_lock()
and rtnl_net_unlock().
rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and
rtnl_net_unlock() releases them.
We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes
rtnl_lock() in rtnl_net_lock().
When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(),
and its locking order is defined by rtnl_net_lock_cmp_fn() as follows:
1. init_net is first
2. netns address ascending order
Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL
with LOCKDEP so that we can carefully add the extra mutex without slowing
down RTNL operations during conversion.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/linux/rtnetlink.h | 13 +++++++++
include/net/net_namespace.h | 4 +++
net/Kconfig.debug | 15 ++++++++++
net/core/net_namespace.c | 6 ++++
net/core/rtnetlink.c | 58 +++++++++++++++++++++++++++++++++++++
5 files changed, 96 insertions(+)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index cdfc897f1e3c..f743c4f678bf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -46,6 +46,19 @@ extern int rtnl_is_locked(void);
extern int rtnl_lock_killable(void);
extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+void __rtnl_net_lock(struct net *net);
+void __rtnl_net_unlock(struct net *net);
+void rtnl_net_lock(struct net *net);
+void rtnl_net_unlock(struct net *net);
+int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
+#else
+#define __rtnl_net_lock(net)
+#define __rtnl_net_unlock(net)
+#define rtnl_net_lock(net) rtnl_lock()
+#define rtnl_net_unlock(net) rtnl_unlock()
+#endif
+
extern wait_queue_head_t netdev_unregistering_wq;
extern atomic_t dev_unreg_count;
extern struct rw_semaphore pernet_ops_rwsem;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index e67b483cc8bb..873c0f9fdac6 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -188,6 +188,10 @@ struct net {
#if IS_ENABLED(CONFIG_SMC)
struct netns_smc smc;
#endif
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+ /* Move to a better place when the config guard is removed. */
+ struct mutex rtnl_mutex;
+#endif
} __randomize_layout;
#include <linux/seq_file_net.h>
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index 5e3fffe707dd..277fab8c4d77 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -24,3 +24,18 @@ config DEBUG_NET
help
Enable extra sanity checks in networking.
This is mostly used by fuzzers, but is safe to select.
+
+config DEBUG_NET_SMALL_RTNL
+ bool "Add extra per-netns mutex inside RTNL"
+ depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT
+ select PROVE_LOCKING
+ default n
+ help
+ rtnl_lock() is being replaced with rtnl_net_lock() that
+ acquires the global RTNL and a small per-netns RTNL mutex.
+
+ During the conversion, rtnl_net_lock() just adds an extra
+ mutex in every RTNL scope and slows down the operations.
+
+ Once the conversion completes, rtnl_lock() will be removed
+ and rtnetlink will gain per-netns scalability.
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e39479f1c9a4..105e3cd26763 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
mutex_init(&net->ipv4.ra_mutex);
+
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+ mutex_init(&net->rtnl_mutex);
+ lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
+#endif
+
preinit_net_sysctl(net);
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f0a520987085..edf530441b65 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -179,6 +179,64 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */
+#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
+void __rtnl_net_lock(struct net *net)
+{
+ ASSERT_RTNL();
+
+ mutex_lock(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(__rtnl_net_lock);
+
+void __rtnl_net_unlock(struct net *net)
+{
+ ASSERT_RTNL();
+
+ mutex_unlock(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(__rtnl_net_unlock);
+
+void rtnl_net_lock(struct net *net)
+{
+ rtnl_lock();
+ __rtnl_net_lock(net);
+}
+EXPORT_SYMBOL(rtnl_net_lock);
+
+void rtnl_net_unlock(struct net *net)
+{
+ __rtnl_net_unlock(net);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL(rtnl_net_unlock);
+
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ if (net_eq(net_a, net_b))
+ return 0;
+
+ /* always init_net first */
+ if (net_eq(net_a, &init_net))
+ return -1;
+
+ if (net_eq(net_b, &init_net))
+ return 1;
+
+ /* otherwise lock in ascending order */
+ return net_a < net_b ? -1 : 1;
+}
+
+int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b)
+{
+ const struct net *net_a, *net_b;
+
+ net_a = container_of(a, struct net, rtnl_mutex.dep_map);
+ net_b = container_of(b, struct net, rtnl_mutex.dep_map);
+
+ return rtnl_net_cmp_locks(net_a, net_b);
+}
+#endif
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-02 15:12 ` [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL Kuniyuki Iwashima
@ 2024-10-04 11:08 ` Eric Dumazet
2024-10-04 20:21 ` Jakub Kicinski
1 sibling, 0 replies; 14+ messages in thread
From: Eric Dumazet @ 2024-10-04 11:08 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Kuniyuki Iwashima,
netdev
On Wed, Oct 2, 2024 at 5:13 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> The goal is to break RTNL down into per-netns mutex.
>
> This patch adds per-netns mutex and its helper functions, rtnl_net_lock()
> and rtnl_net_unlock().
>
> rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and
> rtnl_net_unlock() releases them.
>
> We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes
> rtnl_lock() in rtnl_net_lock().
>
> When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(),
> and its locking order is defined by rtnl_net_lock_cmp_fn() as follows:
>
> 1. init_net is first
> 2. netns address ascending order
>
> Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL
> with LOCKDEP so that we can carefully add the extra mutex without slowing
> down RTNL operations during conversion.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-02 15:12 ` [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL Kuniyuki Iwashima
2024-10-04 11:08 ` Eric Dumazet
@ 2024-10-04 20:21 ` Jakub Kicinski
2024-10-04 20:45 ` Kuniyuki Iwashima
1 sibling, 1 reply; 14+ messages in thread
From: Jakub Kicinski @ 2024-10-04 20:21 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Paolo Abeni, Kuniyuki Iwashima,
netdev
On Wed, 2 Oct 2024 08:12:38 -0700 Kuniyuki Iwashima wrote:
> +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
> +void __rtnl_net_lock(struct net *net);
> +void __rtnl_net_unlock(struct net *net);
> +void rtnl_net_lock(struct net *net);
> +void rtnl_net_unlock(struct net *net);
> +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
> +#else
> +#define __rtnl_net_lock(net)
> +#define __rtnl_net_unlock(net)
> +#define rtnl_net_lock(net) rtnl_lock()
> +#define rtnl_net_unlock(net) rtnl_unlock()
Let's make sure net is always evaluated?
At the very least make sure the preprocessor doesn't eat it completely
otherwise we may end up with config-dependent "unused variable"
warnings down the line.
> +#endif
--
pw-bot: cr
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-04 20:21 ` Jakub Kicinski
@ 2024-10-04 20:45 ` Kuniyuki Iwashima
2024-10-04 20:51 ` Kuniyuki Iwashima
0 siblings, 1 reply; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-04 20:45 UTC (permalink / raw)
To: kuba; +Cc: davem, edumazet, kuni1840, kuniyu, netdev, pabeni
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 4 Oct 2024 13:21:45 -0700
> On Wed, 2 Oct 2024 08:12:38 -0700 Kuniyuki Iwashima wrote:
> > +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
> > +void __rtnl_net_lock(struct net *net);
> > +void __rtnl_net_unlock(struct net *net);
> > +void rtnl_net_lock(struct net *net);
> > +void rtnl_net_unlock(struct net *net);
> > +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
> > +#else
> > +#define __rtnl_net_lock(net)
> > +#define __rtnl_net_unlock(net)
> > +#define rtnl_net_lock(net) rtnl_lock()
> > +#define rtnl_net_unlock(net) rtnl_unlock()
>
> Let's make sure net is always evaluated?
> At the very least make sure the preprocessor doesn't eat it completely
> otherwise we may end up with config-dependent "unused variable"
> warnings down the line.
Sure, what comes to mind is void casting, which I guess is old-school
style ? Do you have any other idea or is this acceptable ?
#define __rtnl_net_lock(net) (void)(net)
#define __rtnl_net_unlock(net) (void)(net)
#define rtnl_net_lock(net) \
do { \
(void)(net); \
rtnl_lock(); \
} while (0)
#define rtnl_net_unlock(net) \
do { \
(void)(net); \
rtnl_unlock(); \
} while (0)
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-04 20:45 ` Kuniyuki Iwashima
@ 2024-10-04 20:51 ` Kuniyuki Iwashima
2024-10-04 20:59 ` Eric Dumazet
0 siblings, 1 reply; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-04 20:51 UTC (permalink / raw)
To: kuniyu; +Cc: davem, edumazet, kuba, kuni1840, netdev, pabeni
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Fri, 4 Oct 2024 13:45:26 -0700
> From: Jakub Kicinski <kuba@kernel.org>
> Date: Fri, 4 Oct 2024 13:21:45 -0700
> > On Wed, 2 Oct 2024 08:12:38 -0700 Kuniyuki Iwashima wrote:
> > > +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
> > > +void __rtnl_net_lock(struct net *net);
> > > +void __rtnl_net_unlock(struct net *net);
> > > +void rtnl_net_lock(struct net *net);
> > > +void rtnl_net_unlock(struct net *net);
> > > +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
> > > +#else
> > > +#define __rtnl_net_lock(net)
> > > +#define __rtnl_net_unlock(net)
> > > +#define rtnl_net_lock(net) rtnl_lock()
> > > +#define rtnl_net_unlock(net) rtnl_unlock()
> >
> > Let's make sure net is always evaluated?
> > At the very least make sure the preprocessor doesn't eat it completely
> > otherwise we may end up with config-dependent "unused variable"
> > warnings down the line.
>
> Sure, what comes to mind is void casting, which I guess is old-school
> style ? Do you have any other idea or is this acceptable ?
>
> #define __rtnl_net_lock(net) (void)(net)
> #define __rtnl_net_unlock(net) (void)(net)
> #define rtnl_net_lock(net) \
> do { \
> (void)(net); \
> rtnl_lock(); \
> } while (0)
> #define rtnl_net_unlock(net) \
> do { \
> (void)(net); \
> rtnl_unlock(); \
> } while (0)
or simply define these as static inline functions and
probably this is more preferable ?
static inline void __rtnl_net_lock(struct net *net) {}
static inline void __rtnl_net_unlock(struct net *net) {}
static inline void rtnl_net_lock(struct net *net)
{
rtnl_lock();
}
static inline void rtnl_net_unlock(struct net *net)
{
rtnl_unlock();
}
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-04 20:51 ` Kuniyuki Iwashima
@ 2024-10-04 20:59 ` Eric Dumazet
2024-10-04 21:22 ` Kuniyuki Iwashima
0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2024-10-04 20:59 UTC (permalink / raw)
To: Kuniyuki Iwashima; +Cc: davem, kuba, kuni1840, netdev, pabeni
On Fri, Oct 4, 2024 at 10:51 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> From: Kuniyuki Iwashima <kuniyu@amazon.com>
> Date: Fri, 4 Oct 2024 13:45:26 -0700
> > From: Jakub Kicinski <kuba@kernel.org>
> > Date: Fri, 4 Oct 2024 13:21:45 -0700
> > > On Wed, 2 Oct 2024 08:12:38 -0700 Kuniyuki Iwashima wrote:
> > > > +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
> > > > +void __rtnl_net_lock(struct net *net);
> > > > +void __rtnl_net_unlock(struct net *net);
> > > > +void rtnl_net_lock(struct net *net);
> > > > +void rtnl_net_unlock(struct net *net);
> > > > +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
> > > > +#else
> > > > +#define __rtnl_net_lock(net)
> > > > +#define __rtnl_net_unlock(net)
> > > > +#define rtnl_net_lock(net) rtnl_lock()
> > > > +#define rtnl_net_unlock(net) rtnl_unlock()
> > >
> > > Let's make sure net is always evaluated?
> > > At the very least make sure the preprocessor doesn't eat it completely
> > > otherwise we may end up with config-dependent "unused variable"
> > > warnings down the line.
> >
> > Sure, what comes to mind is void casting, which I guess is old-school
> > style ? Do you have any other idea or is this acceptable ?
> >
> > #define __rtnl_net_lock(net) (void)(net)
> > #define __rtnl_net_unlock(net) (void)(net)
> > #define rtnl_net_lock(net) \
> > do { \
> > (void)(net); \
> > rtnl_lock(); \
> > } while (0)
> > #define rtnl_net_unlock(net) \
> > do { \
> > (void)(net); \
> > rtnl_unlock(); \
> > } while (0)
>
> or simply define these as static inline functions and
> probably this is more preferable ?
>
> static inline void __rtnl_net_lock(struct net *net) {}
> static inline void __rtnl_net_unlock(struct net *net) {}
> static inline void rtnl_net_lock(struct net *net)
> {
> rtnl_lock();
> }
> static inline void rtnl_net_unlock(struct net *net)
> {
> rtnl_unlock();
> }
static inline functions seem better to me.
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL.
2024-10-04 20:59 ` Eric Dumazet
@ 2024-10-04 21:22 ` Kuniyuki Iwashima
0 siblings, 0 replies; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-04 21:22 UTC (permalink / raw)
To: edumazet; +Cc: davem, kuba, kuni1840, kuniyu, netdev, pabeni
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 4 Oct 2024 22:59:32 +0200
> On Fri, Oct 4, 2024 at 10:51 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > From: Kuniyuki Iwashima <kuniyu@amazon.com>
> > Date: Fri, 4 Oct 2024 13:45:26 -0700
> > > From: Jakub Kicinski <kuba@kernel.org>
> > > Date: Fri, 4 Oct 2024 13:21:45 -0700
> > > > On Wed, 2 Oct 2024 08:12:38 -0700 Kuniyuki Iwashima wrote:
> > > > > +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
> > > > > +void __rtnl_net_lock(struct net *net);
> > > > > +void __rtnl_net_unlock(struct net *net);
> > > > > +void rtnl_net_lock(struct net *net);
> > > > > +void rtnl_net_unlock(struct net *net);
> > > > > +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
> > > > > +#else
> > > > > +#define __rtnl_net_lock(net)
> > > > > +#define __rtnl_net_unlock(net)
> > > > > +#define rtnl_net_lock(net) rtnl_lock()
> > > > > +#define rtnl_net_unlock(net) rtnl_unlock()
> > > >
> > > > Let's make sure net is always evaluated?
> > > > At the very least make sure the preprocessor doesn't eat it completely
> > > > otherwise we may end up with config-dependent "unused variable"
> > > > warnings down the line.
> > >
> > > Sure, what comes to mind is void casting, which I guess is old-school
> > > style ? Do you have any other idea or is this acceptable ?
> > >
> > > #define __rtnl_net_lock(net) (void)(net)
> > > #define __rtnl_net_unlock(net) (void)(net)
> > > #define rtnl_net_lock(net) \
> > > do { \
> > > (void)(net); \
> > > rtnl_lock(); \
> > > } while (0)
> > > #define rtnl_net_unlock(net) \
> > > do { \
> > > (void)(net); \
> > > rtnl_unlock(); \
> > > } while (0)
> >
> > or simply define these as static inline functions and
> > probably this is more preferable ?
> >
> > static inline void __rtnl_net_lock(struct net *net) {}
> > static inline void __rtnl_net_unlock(struct net *net) {}
> > static inline void rtnl_net_lock(struct net *net)
> > {
> > rtnl_lock();
> > }
> > static inline void rtnl_net_unlock(struct net *net)
> > {
> > rtnl_unlock();
> > }
>
> static inline functions seem better to me.
Will use them.
Thanks !
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 net-next 3/4] rtnetlink: Add assertion helpers for per-netns RTNL.
2024-10-02 15:12 [PATCH v2 net-next 0/4] rtnetlink: Per-netns RTNL Kuniyuki Iwashima
2024-10-02 15:12 ` [PATCH v2 net-next 1/4] Revert "rtnetlink: add guard for RTNL" Kuniyuki Iwashima
2024-10-02 15:12 ` [PATCH v2 net-next 2/4] rtnetlink: Add per-netns RTNL Kuniyuki Iwashima
@ 2024-10-02 15:12 ` Kuniyuki Iwashima
2024-10-04 11:10 ` Eric Dumazet
2024-10-02 15:12 ` [PATCH v2 net-next 4/4] rtnetlink: Add ASSERT_RTNL_NET() placeholder for netdev notifier Kuniyuki Iwashima
3 siblings, 1 reply; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-02 15:12 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Once an RTNL scope is converted with rtnl_net_lock(), we will replace
RTNL helper functions inside the scope with the following per-netns
alternatives:
ASSERT_RTNL() -> ASSERT_RTNL_NET(net)
rcu_dereference_rtnl(p) -> rcu_dereference_rtnl_net(net, p)
Note that the per-netns helpers are equivalent to the conventional
helpers unless CONFIG_DEBUG_NET_SMALL_RTNL is enabled.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/linux/rtnetlink.h | 25 +++++++++++++++++++++++++
net/core/rtnetlink.c | 12 ++++++++++++
2 files changed, 37 insertions(+)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index f743c4f678bf..ec2fc946cd8a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -52,11 +52,36 @@ void __rtnl_net_unlock(struct net *net);
void rtnl_net_lock(struct net *net);
void rtnl_net_unlock(struct net *net);
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
+
+bool rtnl_net_is_locked(struct net *net);
+
+#define ASSERT_RTNL_NET(net) \
+ WARN_ONCE(!rtnl_net_is_locked(net), \
+ "RTNL_NET: assertion failed at %s (%d)\n", \
+ __FILE__, __LINE__)
+
+bool lockdep_rtnl_net_is_held(struct net *net);
+
+#define rcu_dereference_rtnl_net(net, p) \
+ rcu_dereference_check(p, lockdep_rtnl_net_is_held(net))
+#define rtnl_net_dereference(net, p) \
+ rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net))
+#define rcu_replace_pointer_rtnl_net(net, rp, p) \
+ rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net))
#else
#define __rtnl_net_lock(net)
#define __rtnl_net_unlock(net)
#define rtnl_net_lock(net) rtnl_lock()
#define rtnl_net_unlock(net) rtnl_unlock()
+
+#define ASSERT_RTNL_NET(net) ASSERT_RTNL()
+
+#define rcu_dereference_rtnl_net(net, p) \
+ rcu_dereference_rtnl(p)
+#define rtnl_net_dereference(net, p) \
+ rtnl_dereference(p)
+#define rcu_replace_pointer_rtnl_net(net, rp, p) \
+ rcu_replace_pointer_rtnl(rp, p)
#endif
extern wait_queue_head_t netdev_unregistering_wq;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index edf530441b65..2b44ec690780 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -235,6 +235,18 @@ int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *
return rtnl_net_cmp_locks(net_a, net_b);
}
+
+bool rtnl_net_is_locked(struct net *net)
+{
+ return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_net_is_locked);
+
+bool lockdep_rtnl_net_is_held(struct net *net)
+{
+ return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
#endif
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH v2 net-next 3/4] rtnetlink: Add assertion helpers for per-netns RTNL.
2024-10-02 15:12 ` [PATCH v2 net-next 3/4] rtnetlink: Add assertion helpers for " Kuniyuki Iwashima
@ 2024-10-04 11:10 ` Eric Dumazet
0 siblings, 0 replies; 14+ messages in thread
From: Eric Dumazet @ 2024-10-04 11:10 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Kuniyuki Iwashima,
netdev
On Wed, Oct 2, 2024 at 5:14 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Once an RTNL scope is converted with rtnl_net_lock(), we will replace
> RTNL helper functions inside the scope with the following per-netns
> alternatives:
>
> ASSERT_RTNL() -> ASSERT_RTNL_NET(net)
> rcu_dereference_rtnl(p) -> rcu_dereference_rtnl_net(net, p)
>
> Note that the per-netns helpers are equivalent to the conventional
> helpers unless CONFIG_DEBUG_NET_SMALL_RTNL is enabled.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 net-next 4/4] rtnetlink: Add ASSERT_RTNL_NET() placeholder for netdev notifier.
2024-10-02 15:12 [PATCH v2 net-next 0/4] rtnetlink: Per-netns RTNL Kuniyuki Iwashima
` (2 preceding siblings ...)
2024-10-02 15:12 ` [PATCH v2 net-next 3/4] rtnetlink: Add assertion helpers for " Kuniyuki Iwashima
@ 2024-10-02 15:12 ` Kuniyuki Iwashima
2024-10-04 11:14 ` Eric Dumazet
3 siblings, 1 reply; 14+ messages in thread
From: Kuniyuki Iwashima @ 2024-10-02 15:12 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
The global and per-netns netdev notifier depend on RTNL, and its
dependency is not so clear due to nested calls.
Let's add a placeholder to place ASSERT_RTNL_NET() for each event.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/core/Makefile | 1 +
net/core/rtnl_net_debug.c | 131 ++++++++++++++++++++++++++++++++++++++
2 files changed, 132 insertions(+)
create mode 100644 net/core/rtnl_net_debug.c
diff --git a/net/core/Makefile b/net/core/Makefile
index c3ebbaf9c81e..5a72a87ee0f1 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -45,3 +45,4 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
obj-$(CONFIG_NET_DEVMEM) += devmem.o
+obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
diff --git a/net/core/rtnl_net_debug.c b/net/core/rtnl_net_debug.c
new file mode 100644
index 000000000000..e90a32242e22
--- /dev/null
+++ b/net/core/rtnl_net_debug.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+static int rtnl_net_debug_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ enum netdev_cmd cmd = event;
+
+ /* Keep enum and don't add default to trigger -Werror=switch */
+ switch (cmd) {
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGE:
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_PRE_CHANGEADDR:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_PRE_UP:
+ case NETDEV_PRE_TYPE_CHANGE:
+ case NETDEV_POST_TYPE_CHANGE:
+ case NETDEV_POST_INIT:
+ case NETDEV_PRE_UNINIT:
+ case NETDEV_RELEASE:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_JOIN:
+ case NETDEV_CHANGEUPPER:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_PRECHANGEMTU:
+ case NETDEV_CHANGEINFODATA:
+ case NETDEV_BONDING_INFO:
+ case NETDEV_PRECHANGEUPPER:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_UDP_TUNNEL_PUSH_INFO:
+ case NETDEV_UDP_TUNNEL_DROP_INFO:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
+ case NETDEV_CVLAN_FILTER_PUSH_INFO:
+ case NETDEV_CVLAN_FILTER_DROP_INFO:
+ case NETDEV_SVLAN_FILTER_PUSH_INFO:
+ case NETDEV_SVLAN_FILTER_DROP_INFO:
+ case NETDEV_OFFLOAD_XSTATS_ENABLE:
+ case NETDEV_OFFLOAD_XSTATS_DISABLE:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
+ case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
+ case NETDEV_XDP_FEAT_CHANGE:
+ ASSERT_RTNL();
+ break;
+
+ /* Once an event fully supports RTNL_NET, move it here
+ * and remove "if (0)" below.
+ *
+ * case NETDEV_XXX:
+ * ASSERT_RTNL_NET(net);
+ * break;
+ */
+ }
+
+ /* Just to avoid unused-variable error for dev and net. */
+ if (0)
+ ASSERT_RTNL_NET(net);
+
+ return NOTIFY_DONE;
+}
+
+static int rtnl_net_debug_net_id;
+
+static int __net_init rtnl_net_debug_net_init(struct net *net)
+{
+ struct notifier_block *nb;
+
+ nb = net_generic(net, rtnl_net_debug_net_id);
+ nb->notifier_call = rtnl_net_debug_event;
+
+ return register_netdevice_notifier_net(net, nb);
+}
+
+static void __net_exit rtnl_net_debug_net_exit(struct net *net)
+{
+ struct notifier_block *nb;
+
+ nb = net_generic(net, rtnl_net_debug_net_id);
+ unregister_netdevice_notifier_net(net, nb);
+}
+
+static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
+ .init = rtnl_net_debug_net_init,
+ .exit = rtnl_net_debug_net_exit,
+ .id = &rtnl_net_debug_net_id,
+ .size = sizeof(struct notifier_block),
+};
+
+static struct notifier_block rtnl_net_debug_block = {
+ .notifier_call = rtnl_net_debug_event,
+};
+
+static int __init rtnl_net_debug_init(void)
+{
+ int ret;
+
+ ret = register_pernet_device(&rtnl_net_debug_net_ops);
+ if (ret)
+ return ret;
+
+ ret = register_netdevice_notifier(&rtnl_net_debug_block);
+ if (ret)
+ unregister_pernet_subsys(&rtnl_net_debug_net_ops);
+
+ return ret;
+}
+
+static void __exit rtnl_net_debug_exit(void)
+{
+ unregister_netdevice_notifier(&rtnl_net_debug_block);
+ unregister_pernet_device(&rtnl_net_debug_net_ops);
+}
+
+subsys_initcall(rtnl_net_debug_init);
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread