From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Simon Horman <horms@kernel.org>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>,
Marc Kleine-Budde <mkl@pengutronix.de>,
Vincent Mailhol <mailhol.vincent@wanadoo.fr>,
"Daniel Borkmann" <daniel@iogearbox.net>,
Nikolay Aleksandrov <razor@blackwall.org>,
Kuniyuki Iwashima <kuniyu@amazon.com>,
Kuniyuki Iwashima <kuni1840@gmail.com>, <netdev@vger.kernel.org>
Subject: [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers.
Date: Mon, 4 Nov 2024 18:05:07 -0800 [thread overview]
Message-ID: <20241105020514.41963-2-kuniyu@amazon.com> (raw)
In-Reply-To: <20241105020514.41963-1-kuniyu@amazon.com>
rtnl_newlink() needs to hold 3 per-netns RTNL: 2 for a new device
and 1 for its peer.
We will add rtnl_nets_lock() later, which performs the nested locking
based on struct rtnl_nets, which has an array of struct net pointers.
rtnl_nets_add() adds a net pointer to the array and sorts it so that
rtnl_nets_lock() can simply acquire per-netns RTNL from array[0] to [2].
Before calling rtnl_nets_add(), get_net() must be called for the net,
and rtnl_nets_destroy() will call put_net() for each.
Let's apply the helpers to rtnl_newlink().
When CONFIG_DEBUG_NET_SMALL_RTNL is disabled, we do not call
rtnl_net_lock() thus do not care about the array order, so
rtnl_net_cmp_locks() returns -1 so that the loop in rtnl_nets_add()
can be optimised to NOP.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/rtnetlink.h | 10 +++++++
net/core/rtnetlink.c | 63 +++++++++++++++++++++++++++++++++++++++--
2 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index b260c0cc9671..814364367dd7 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -68,6 +68,16 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
return AF_UNSPEC;
}
+struct rtnl_nets {
+ /* ->newlink() needs to freeze 3 netns at most;
+ * 2 for the new device, 1 for its peer.
+ */
+ struct net *net[3];
+ unsigned char len;
+};
+
+void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net);
+
/**
* struct rtnl_link_ops - rtnetlink link operations
*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3b33810d92a8..f98706ad390a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -258,8 +258,60 @@ bool lockdep_rtnl_net_is_held(struct net *net)
return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
}
EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
+#else
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ /* No need to swap */
+ return -1;
+}
#endif
+static void rtnl_nets_init(struct rtnl_nets *rtnl_nets)
+{
+ memset(rtnl_nets, 0, sizeof(*rtnl_nets));
+}
+
+static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ put_net(rtnl_nets->net[i]);
+ rtnl_nets->net[i] = NULL;
+ }
+
+ rtnl_nets->len = 0;
+}
+
+/**
+ * rtnl_nets_add - Add netns to be locked before ->newlink().
+ *
+ * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net().
+ * @net: netns pointer with an extra refcnt held.
+ *
+ * The extra refcnt is released in rtnl_nets_destroy().
+ */
+void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
+{
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net));
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) {
+ case 0:
+ put_net(net);
+ return;
+ case 1:
+ swap(rtnl_nets->net[i], net);
+ }
+ }
+
+ rtnl_nets->net[i] = net;
+ rtnl_nets->len++;
+}
+EXPORT_SYMBOL(rtnl_nets_add);
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
@@ -3796,6 +3848,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *tgt_net, *link_net = NULL;
struct rtnl_link_ops *ops = NULL;
struct rtnl_newlink_tbs *tbs;
+ struct rtnl_nets rtnl_nets;
int ops_srcu_index;
int ret;
@@ -3839,6 +3892,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
#endif
}
+ rtnl_nets_init(&rtnl_nets);
+
if (ops) {
if (ops->maxtype > RTNL_MAX_TYPE) {
ret = -EINVAL;
@@ -3868,6 +3923,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_ops;
}
+ rtnl_nets_add(&rtnl_nets, tgt_net);
+
if (tb[IFLA_LINK_NETNSID]) {
int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
@@ -3878,6 +3935,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_net;
}
+ rtnl_nets_add(&rtnl_nets, link_net);
+
if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
goto put_net;
@@ -3887,9 +3946,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
put_net:
- if (link_net)
- put_net(link_net);
- put_net(tgt_net);
+ rtnl_nets_destroy(&rtnl_nets);
put_ops:
if (ops)
rtnl_link_ops_put(ops, ops_srcu_index);
--
2.39.5 (Apple Git-154)
next prev parent reply other threads:[~2024-11-05 2:05 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
2024-11-05 2:05 ` Kuniyuki Iwashima [this message]
2024-11-05 10:16 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Eric Dumazet
2024-11-06 0:35 ` Jakub Kicinski
2024-11-06 0:41 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb() Kuniyuki Iwashima
2024-11-05 10:18 ` Eric Dumazet
2024-11-06 0:37 ` Jakub Kicinski
2024-11-06 0:44 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
2024-11-05 10:23 ` Eric Dumazet
2024-11-06 0:39 ` Jakub Kicinski
2024-11-06 0:39 ` Jakub Kicinski
2024-11-06 0:52 ` Kuniyuki Iwashima
2024-11-06 0:58 ` Kuniyuki Iwashima
2024-11-06 1:04 ` Jakub Kicinski
2024-11-06 1:22 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type Kuniyuki Iwashima
2024-11-05 10:24 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type Kuniyuki Iwashima
2024-11-05 10:37 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type Kuniyuki Iwashima
2024-11-05 10:39 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
2024-11-05 10:40 ` Eric Dumazet
2024-11-05 16:22 ` Paolo Abeni
2024-11-05 17:11 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 8/8] rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP Kuniyuki Iwashima
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241105020514.41963-2-kuniyu@amazon.com \
--to=kuniyu@amazon.com \
--cc=andrew+netdev@lunn.ch \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=kuni1840@gmail.com \
--cc=mailhol.vincent@wanadoo.fr \
--cc=mkl@pengutronix.de \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=razor@blackwall.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.