* [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL.
@ 2024-11-05 2:05 Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Kuniyuki Iwashima
` (7 more replies)
0 siblings, 8 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Patch 1 introduces struct rtnl_nets and helper functions to acquire
multiple per-netns RTNL in rtnl_newlink().
Patch 2 - 6 are to prefetch the peer device's netns in rtnl_newlink().
Patch 7 converts rtnl_newlink() to per-netns RTNL.
Patch 8 pushes RTNL down to rtnl_dellink() and rtnl_setlink(), but
the conversion will not be completed unless we support cases with
peer/upper/lower devices.
Kuniyuki Iwashima (8):
rtnetlink: Introduce struct rtnl_nets and helpers.
rtnetlink: Factorise rtnl_link_get_net_tb().
rtnetlink: Add peer_type in struct rtnl_link_ops.
veth: Set VETH_INFO_PEER to veth_link_ops.peer_type.
vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type.
netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type.
rtnetlink: Convert RTM_NEWLINK to per-netns RTNL.
rtnetlink: Register rtnl_dellink() and rtnl_setlink() with
RTNL_FLAG_DOIT_PERNET_WIP.
drivers/net/can/vxcan.c | 12 +--
drivers/net/netkit.c | 11 +--
drivers/net/veth.c | 18 +----
include/net/rtnetlink.h | 14 ++++
net/core/rtnetlink.c | 163 +++++++++++++++++++++++++++++++++++++---
5 files changed, 175 insertions(+), 43 deletions(-)
--
2.39.5 (Apple Git-154)
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:16 ` Eric Dumazet
2024-11-06 0:35 ` Jakub Kicinski
2024-11-05 2:05 ` [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb() Kuniyuki Iwashima
` (6 subsequent siblings)
7 siblings, 2 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
rtnl_newlink() needs to hold 3 per-netns RTNL: 2 for a new device
and 1 for its peer.
We will add rtnl_nets_lock() later, which performs the nested locking
based on struct rtnl_nets, which has an array of struct net pointers.
rtnl_nets_add() adds a net pointer to the array and sorts it so that
rtnl_nets_lock() can simply acquire per-netns RTNL from array[0] to [2].
Before calling rtnl_nets_add(), get_net() must be called for the net,
and rtnl_nets_destroy() will call put_net() for each.
Let's apply the helpers to rtnl_newlink().
When CONFIG_DEBUG_NET_SMALL_RTNL is disabled, we do not call
rtnl_net_lock() thus do not care about the array order, so
rtnl_net_cmp_locks() returns -1 so that the loop in rtnl_nets_add()
can be optimised to NOP.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/rtnetlink.h | 10 +++++++
net/core/rtnetlink.c | 63 +++++++++++++++++++++++++++++++++++++++--
2 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index b260c0cc9671..814364367dd7 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -68,6 +68,16 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
return AF_UNSPEC;
}
+struct rtnl_nets {
+ /* ->newlink() needs to freeze 3 netns at most;
+ * 2 for the new device, 1 for its peer.
+ */
+ struct net *net[3];
+ unsigned char len;
+};
+
+void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net);
+
/**
* struct rtnl_link_ops - rtnetlink link operations
*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3b33810d92a8..f98706ad390a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -258,8 +258,60 @@ bool lockdep_rtnl_net_is_held(struct net *net)
return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
}
EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
+#else
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+ /* No need to swap */
+ return -1;
+}
#endif
+static void rtnl_nets_init(struct rtnl_nets *rtnl_nets)
+{
+ memset(rtnl_nets, 0, sizeof(*rtnl_nets));
+}
+
+static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ put_net(rtnl_nets->net[i]);
+ rtnl_nets->net[i] = NULL;
+ }
+
+ rtnl_nets->len = 0;
+}
+
+/**
+ * rtnl_nets_add - Add netns to be locked before ->newlink().
+ *
+ * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net().
+ * @net: netns pointer with an extra refcnt held.
+ *
+ * The extra refcnt is released in rtnl_nets_destroy().
+ */
+void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
+{
+ int i;
+
+ DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net));
+
+ for (i = 0; i < rtnl_nets->len; i++) {
+ switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) {
+ case 0:
+ put_net(net);
+ return;
+ case 1:
+ swap(rtnl_nets->net[i], net);
+ }
+ }
+
+ rtnl_nets->net[i] = net;
+ rtnl_nets->len++;
+}
+EXPORT_SYMBOL(rtnl_nets_add);
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
@@ -3796,6 +3848,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *tgt_net, *link_net = NULL;
struct rtnl_link_ops *ops = NULL;
struct rtnl_newlink_tbs *tbs;
+ struct rtnl_nets rtnl_nets;
int ops_srcu_index;
int ret;
@@ -3839,6 +3892,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
#endif
}
+ rtnl_nets_init(&rtnl_nets);
+
if (ops) {
if (ops->maxtype > RTNL_MAX_TYPE) {
ret = -EINVAL;
@@ -3868,6 +3923,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_ops;
}
+ rtnl_nets_add(&rtnl_nets, tgt_net);
+
if (tb[IFLA_LINK_NETNSID]) {
int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
@@ -3878,6 +3935,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_net;
}
+ rtnl_nets_add(&rtnl_nets, link_net);
+
if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
goto put_net;
@@ -3887,9 +3946,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
put_net:
- if (link_net)
- put_net(link_net);
- put_net(tgt_net);
+ rtnl_nets_destroy(&rtnl_nets);
put_ops:
if (ops)
rtnl_link_ops_put(ops, ops_srcu_index);
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb().
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:18 ` Eric Dumazet
2024-11-06 0:37 ` Jakub Kicinski
2024-11-05 2:05 ` [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
` (5 subsequent siblings)
7 siblings, 2 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
In ops->newlink(), veth, vxcan, and netkit call rtnl_link_get_net() with
a net pointer, which is the first argument of ->newlink().
rtnl_link_get_net() could return another netns based on IFLA_NET_NS_PID
and IFLA_NET_NS_FD in the peer device's attributes.
We want to get it and fill rtnl_nets->nets[] in advance.
Let's factorise the peer netns part from rtnl_link_get_net().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/rtnetlink.h | 1 +
net/core/rtnetlink.c | 17 ++++++++++++++---
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 814364367dd7..b9ed44b2d056 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -221,6 +221,7 @@ struct rtnl_af_ops {
int rtnl_af_register(struct rtnl_af_ops *ops);
void rtnl_af_unregister(struct rtnl_af_ops *ops);
+struct net *rtnl_link_get_net_tb(struct nlattr *tb[]);
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]);
struct net_device *rtnl_create_link(struct net *net, const char *ifname,
unsigned char name_assign_type,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f98706ad390a..1bc8afcefc1e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2511,9 +2511,10 @@ int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
}
EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
-struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+struct net *rtnl_link_get_net_tb(struct nlattr *tb[])
{
- struct net *net;
+ struct net *net = NULL;
+
/* Examine the link attributes and figure out which
* network namespace we are talking about.
*/
@@ -2521,8 +2522,18 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
else if (tb[IFLA_NET_NS_FD])
net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
- else
+
+ return net;
+}
+EXPORT_SYMBOL(rtnl_link_get_net_tb);
+
+struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
+{
+ struct net *net = rtnl_link_get_net_tb(tb);
+
+ if (!net)
net = get_net(src_net);
+
return net;
}
EXPORT_SYMBOL(rtnl_link_get_net);
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb() Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:23 ` Eric Dumazet
2024-11-06 0:39 ` Jakub Kicinski
2024-11-05 2:05 ` [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type Kuniyuki Iwashima
` (4 subsequent siblings)
7 siblings, 2 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
For veth, vxcan, and netkit, we need to prefetch the peer device's
netns in rtnl_newlink() for per-netns RTNL.
All of the three get the netns in the same way peer netlink attr tb:
1. Call rtnl_nla_parse_ifinfomsg()
2. Call ops->validate() (vxcan doesn't have)
3. Call rtnl_link_get_net_tb()
Let's add a new field peer_type to struct rtnl_link_ops and fetch
netns in peer attrbutes to add it to rtnl_nets in rtnl_newlink().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/rtnetlink.h | 2 ++
net/core/rtnetlink.c | 39 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index b9ed44b2d056..c3548da95ffa 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -85,6 +85,7 @@ void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net);
* @srcu: Used internally
* @kind: Identifier
* @netns_refund: Physical device, move to init_net on netns exit
+ * @peer_type: Peer device specific netlink attribute number (e.g. VETH_INFO_PEER)
* @maxtype: Highest device specific netlink attribute number
* @policy: Netlink policy for device specific attribute validation
* @validate: Optional validation function for netlink/changelink parameters
@@ -126,6 +127,7 @@ struct rtnl_link_ops {
void (*setup)(struct net_device *dev);
bool netns_refund;
+ const unsigned char peer_type;
unsigned int maxtype;
const struct nla_policy *policy;
int (*validate)(struct nlattr *tb[],
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1bc8afcefc1e..48bd9e062550 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3798,6 +3798,37 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
goto out;
}
+static int rtnl_add_peer_net(struct rtnl_nets *rtnl_nets,
+ const struct rtnl_link_ops *ops,
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net *net;
+ int err;
+
+ if (!data || !data[ops->peer_type])
+ return 0;
+
+ err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack);
+ if (err < 0)
+ return err;
+
+ if (ops->validate) {
+ err = ops->validate(tb, NULL, extack);
+ if (err < 0)
+ return err;
+ }
+
+ net = rtnl_link_get_net_tb(tb);
+ if (IS_ERR(net))
+ return PTR_ERR(net);
+ if (net)
+ rtnl_nets_add(rtnl_nets, net);
+
+ return 0;
+}
+
static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct rtnl_link_ops *ops,
struct net *tgt_net, struct net *link_net,
@@ -3926,12 +3957,18 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ret < 0)
goto put_ops;
}
+
+ if (ops->peer_type) {
+ ret = rtnl_add_peer_net(&rtnl_nets, ops, data, extack);
+ if (ret < 0)
+ goto put_ops;
+ }
}
tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN);
if (IS_ERR(tgt_net)) {
ret = PTR_ERR(tgt_net);
- goto put_ops;
+ goto put_net;
}
rtnl_nets_add(&rtnl_nets, tgt_net);
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
` (2 preceding siblings ...)
2024-11-05 2:05 ` [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:24 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type Kuniyuki Iwashima
` (3 subsequent siblings)
7 siblings, 1 reply; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
For per-netns RTNL, we need to prefetch the peer device's netns.
Let's set rtnl_link_ops.peer_type and accordingly remove duplicated
validation in ->newlink().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
drivers/net/veth.c | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 18148e068aa0..0d6d0d749d44 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1781,19 +1781,11 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
/*
* create and register peer first
*/
- if (data != NULL && data[VETH_INFO_PEER] != NULL) {
- struct nlattr *nla_peer;
+ if (data && data[VETH_INFO_PEER]) {
+ struct nlattr *nla_peer = data[VETH_INFO_PEER];
- nla_peer = data[VETH_INFO_PEER];
ifmp = nla_data(nla_peer);
- err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
- if (err < 0)
- return err;
-
- err = veth_validate(peer_tb, NULL, extack);
- if (err < 0)
- return err;
-
+ rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
tbp = peer_tb;
} else {
ifmp = NULL;
@@ -1809,9 +1801,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
}
net = rtnl_link_get_net(src_net, tbp);
- if (IS_ERR(net))
- return PTR_ERR(net);
-
peer = rtnl_create_link(net, ifname, name_assign_type,
&veth_link_ops, tbp, extack);
if (IS_ERR(peer)) {
@@ -1952,6 +1941,7 @@ static struct rtnl_link_ops veth_link_ops = {
.newlink = veth_newlink,
.dellink = veth_dellink,
.policy = veth_policy,
+ .peer_type = VETH_INFO_PEER,
.maxtype = VETH_INFO_MAX,
.get_link_net = veth_get_link_net,
.get_num_tx_queues = veth_get_num_queues,
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
` (3 preceding siblings ...)
2024-11-05 2:05 ` [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:37 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type Kuniyuki Iwashima
` (2 subsequent siblings)
7 siblings, 1 reply; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
For per-netns RTNL, we need to prefetch the peer device's netns.
Let's set rtnl_link_ops.peer_type and accordingly remove duplicated
validation in ->newlink().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
Note for CAN maintainers, this patch needs to go through net-next
directly as the later patch depends on this.
---
drivers/net/can/vxcan.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 9e1b7d41005f..da7c72105fb6 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -188,14 +188,10 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
/* register peer device */
if (data && data[VXCAN_INFO_PEER]) {
- struct nlattr *nla_peer;
+ struct nlattr *nla_peer = data[VXCAN_INFO_PEER];
- nla_peer = data[VXCAN_INFO_PEER];
ifmp = nla_data(nla_peer);
- err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
- if (err < 0)
- return err;
-
+ rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
tbp = peer_tb;
}
@@ -208,9 +204,6 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
}
peer_net = rtnl_link_get_net(net, tbp);
- if (IS_ERR(peer_net))
- return PTR_ERR(peer_net);
-
peer = rtnl_create_link(peer_net, ifname, name_assign_type,
&vxcan_link_ops, tbp, extack);
if (IS_ERR(peer)) {
@@ -302,6 +295,7 @@ static struct rtnl_link_ops vxcan_link_ops = {
.newlink = vxcan_newlink,
.dellink = vxcan_dellink,
.policy = vxcan_policy,
+ .peer_type = VXCAN_INFO_PEER,
.maxtype = VXCAN_INFO_MAX,
.get_link_net = vxcan_get_link_net,
};
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
` (4 preceding siblings ...)
2024-11-05 2:05 ` [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:39 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 8/8] rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP Kuniyuki Iwashima
7 siblings, 1 reply; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
For per-netns RTNL, we need to prefetch the peer device's netns.
Let's set rtnl_link_ops.peer_type and accordingly remove duplicated
validation in ->newlink().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
drivers/net/netkit.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index cd8360b9bbde..bb07725d1c72 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -351,12 +351,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
if (data[IFLA_NETKIT_PEER_INFO]) {
attr = data[IFLA_NETKIT_PEER_INFO];
ifmp = nla_data(attr);
- err = rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
- if (err < 0)
- return err;
- err = netkit_validate(peer_tb, NULL, extack);
- if (err < 0)
- return err;
+ rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
tbp = peer_tb;
}
if (data[IFLA_NETKIT_SCRUB])
@@ -391,9 +386,6 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
return -EOPNOTSUPP;
net = rtnl_link_get_net(src_net, tbp);
- if (IS_ERR(net))
- return PTR_ERR(net);
-
peer = rtnl_create_link(net, ifname, ifname_assign_type,
&netkit_link_ops, tbp, extack);
if (IS_ERR(peer)) {
@@ -978,6 +970,7 @@ static struct rtnl_link_ops netkit_link_ops = {
.fill_info = netkit_fill_info,
.policy = netkit_policy,
.validate = netkit_validate,
+ .peer_type = IFLA_NETKIT_PEER_INFO,
.maxtype = IFLA_NETKIT_MAX,
};
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
` (5 preceding siblings ...)
2024-11-05 2:05 ` [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
2024-11-05 10:40 ` Eric Dumazet
2024-11-05 16:22 ` Paolo Abeni
2024-11-05 2:05 ` [PATCH v1 net-next 8/8] rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP Kuniyuki Iwashima
7 siblings, 2 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Now, we are ready to convert rtnl_newlink() to per-netns RTNL;
rtnl_link_ops is protected by SRCU and netns is prefetched in
rtnl_newlink().
Let's register rtnl_newlink() with RTNL_FLAG_DOIT_PERNET and
push RTNL down as rtnl_nets_lock().
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/core/rtnetlink.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 48bd9e062550..0df0cba0a700 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -312,6 +312,26 @@ void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
}
EXPORT_SYMBOL(rtnl_nets_add);
+static void rtnl_nets_lock(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ rtnl_lock();
+
+ for (i = 0; i < rtnl_nets->len; i++)
+ __rtnl_net_lock(rtnl_nets->net[i]);
+}
+
+static void rtnl_nets_unlock(struct rtnl_nets *rtnl_nets)
+{
+ int i;
+
+ for (i = 0; i < rtnl_nets->len; i++)
+ __rtnl_net_unlock(rtnl_nets->net[i]);
+
+ rtnl_unlock();
+}
+
static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
static inline int rtm_msgindex(int msgtype)
@@ -3991,7 +4011,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
+ rtnl_nets_lock(&rtnl_nets);
ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
+ rtnl_nets_unlock(&rtnl_nets);
put_net:
rtnl_nets_destroy(&rtnl_nets);
@@ -6995,7 +7017,8 @@ static struct pernet_operations rtnetlink_net_ops = {
};
static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {
- {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink},
+ {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink,
+ .flags = RTNL_FLAG_DOIT_PERNET},
{.msgtype = RTM_DELLINK, .doit = rtnl_dellink},
{.msgtype = RTM_GETLINK, .doit = rtnl_getlink,
.dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH v1 net-next 8/8] rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP.
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
` (6 preceding siblings ...)
2024-11-05 2:05 ` [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
@ 2024-11-05 2:05 ` Kuniyuki Iwashima
7 siblings, 0 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 2:05 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, Kuniyuki Iwashima, netdev
Currently, rtnl_setlink() and rtnl_dellink() cannot be fully converted
to per-netns RTNL due to a lack of handling peer/lower/upper devices in
different netns.
For example, when we change a device in rtnl_setlink() and need to
propagate that to its upper devices, we want to avoid acquiring all netns
locks, for which we do not know the upper limit.
The same situation happens when we remove a device.
rtnl_dellink() could be transformed to remove a single device in the
requested netns and delegate other devices to per-netns work, and
rtnl_setlink() might be ?
Until we come up with a better idea, let's use a new flag
RTNL_FLAG_DOIT_PERNET_WIP for rtnl_dellink() and rtnl_setlink().
This will unblock converting RTNL users where such devices are not related.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
include/net/rtnetlink.h | 1 +
net/core/rtnetlink.c | 19 ++++++++++++++++---
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index c3548da95ffa..4b0f53891f48 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,6 +13,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
#define RTNL_FLAG_DOIT_PERNET RTNL_FLAG_DOIT_UNLOCKED
+#define RTNL_FLAG_DOIT_PERNET_WIP RTNL_FLAG_DOIT_UNLOCKED
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0df0cba0a700..36bafc105bd5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3402,6 +3402,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
+ struct rtnl_nets rtnl_nets;
struct net *tgt_net;
int err;
@@ -3420,6 +3421,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
+ rtnl_nets_init(&rtnl_nets);
+ rtnl_nets_add(&rtnl_nets, get_net(net));
+ rtnl_nets_add(&rtnl_nets, tgt_net);
+
+ rtnl_nets_lock(&rtnl_nets);
+
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
@@ -3432,7 +3439,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
else if (!err)
err = -ENODEV;
- put_net(tgt_net);
+ rtnl_nets_unlock(&rtnl_nets);
errout:
return err;
}
@@ -3517,6 +3524,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(tgt_net);
}
+ rtnl_net_lock(tgt_net);
+
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
@@ -3531,6 +3540,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
else
err = -EINVAL;
+ rtnl_net_unlock(tgt_net);
+
if (netnsid >= 0)
put_net(tgt_net);
@@ -7019,10 +7030,12 @@ static struct pernet_operations rtnetlink_net_ops = {
static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {
{.msgtype = RTM_NEWLINK, .doit = rtnl_newlink,
.flags = RTNL_FLAG_DOIT_PERNET},
- {.msgtype = RTM_DELLINK, .doit = rtnl_dellink},
+ {.msgtype = RTM_DELLINK, .doit = rtnl_dellink,
+ .flags = RTNL_FLAG_DOIT_PERNET_WIP},
{.msgtype = RTM_GETLINK, .doit = rtnl_getlink,
.dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
- {.msgtype = RTM_SETLINK, .doit = rtnl_setlink},
+ {.msgtype = RTM_SETLINK, .doit = rtnl_setlink,
+ .flags = RTNL_FLAG_DOIT_PERNET_WIP},
{.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all},
{.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all},
{.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all},
--
2.39.5 (Apple Git-154)
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers.
2024-11-05 2:05 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Kuniyuki Iwashima
@ 2024-11-05 10:16 ` Eric Dumazet
2024-11-06 0:35 ` Jakub Kicinski
1 sibling, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:16 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:05 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> rtnl_newlink() needs to hold 3 per-netns RTNL: 2 for a new device
> and 1 for its peer.
>
> We will add rtnl_nets_lock() later, which performs the nested locking
> based on struct rtnl_nets, which has an array of struct net pointers.
>
> rtnl_nets_add() adds a net pointer to the array and sorts it so that
> rtnl_nets_lock() can simply acquire per-netns RTNL from array[0] to [2].
>
> Before calling rtnl_nets_add(), get_net() must be called for the net,
> and rtnl_nets_destroy() will call put_net() for each.
>
> Let's apply the helpers to rtnl_newlink().
>
> When CONFIG_DEBUG_NET_SMALL_RTNL is disabled, we do not call
> rtnl_net_lock() thus do not care about the array order, so
> rtnl_net_cmp_locks() returns -1 so that the loop in rtnl_nets_add()
> can be optimised to NOP.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb().
2024-11-05 2:05 ` [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb() Kuniyuki Iwashima
@ 2024-11-05 10:18 ` Eric Dumazet
2024-11-06 0:37 ` Jakub Kicinski
1 sibling, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:18 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:06 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> In ops->newlink(), veth, vxcan, and netkit call rtnl_link_get_net() with
> a net pointer, which is the first argument of ->newlink().
>
> rtnl_link_get_net() could return another netns based on IFLA_NET_NS_PID
> and IFLA_NET_NS_FD in the peer device's attributes.
>
> We want to get it and fill rtnl_nets->nets[] in advance.
>
> Let's factorise the peer netns part from rtnl_link_get_net().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-05 2:05 ` [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
@ 2024-11-05 10:23 ` Eric Dumazet
2024-11-06 0:39 ` Jakub Kicinski
1 sibling, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:23 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:06 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> For veth, vxcan, and netkit, we need to prefetch the peer device's
> netns in rtnl_newlink() for per-netns RTNL.
>
> All of the three get the netns in the same way peer netlink attr tb:
>
> 1. Call rtnl_nla_parse_ifinfomsg()
> 2. Call ops->validate() (vxcan doesn't have)
> 3. Call rtnl_link_get_net_tb()
>
> Let's add a new field peer_type to struct rtnl_link_ops and fetch
> netns in peer attrbutes to add it to rtnl_nets in rtnl_newlink().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type.
2024-11-05 2:05 ` [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type Kuniyuki Iwashima
@ 2024-11-05 10:24 ` Eric Dumazet
0 siblings, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:24 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:06 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> For per-netns RTNL, we need to prefetch the peer device's netns.
>
> Let's set rtnl_link_ops.peer_type and accordingly remove duplicated
> validation in ->newlink().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type.
2024-11-05 2:05 ` [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type Kuniyuki Iwashima
@ 2024-11-05 10:37 ` Eric Dumazet
0 siblings, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:37 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:07 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> For per-netns RTNL, we need to prefetch the peer device's netns.
>
> Let's set rtnl_link_ops.peer_type and accordingly remove duplicated
> validation in ->newlink().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type.
2024-11-05 2:05 ` [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type Kuniyuki Iwashima
@ 2024-11-05 10:39 ` Eric Dumazet
0 siblings, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:39 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:07 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> For per-netns RTNL, we need to prefetch the peer device's netns.
>
> Let's set rtnl_link_ops.peer_type and accordingly remove duplicated
> validation in ->newlink().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL.
2024-11-05 2:05 ` [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
@ 2024-11-05 10:40 ` Eric Dumazet
2024-11-05 16:22 ` Paolo Abeni
1 sibling, 0 replies; 28+ messages in thread
From: Eric Dumazet @ 2024-11-05 10:40 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, Nov 5, 2024 at 3:07 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Now, we are ready to convert rtnl_newlink() to per-netns RTNL;
> rtnl_link_ops is protected by SRCU and netns is prefetched in
> rtnl_newlink().
>
> Let's register rtnl_newlink() with RTNL_FLAG_DOIT_PERNET and
> push RTNL down as rtnl_nets_lock().
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
Reviewed-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL.
2024-11-05 2:05 ` [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
2024-11-05 10:40 ` Eric Dumazet
@ 2024-11-05 16:22 ` Paolo Abeni
2024-11-05 17:11 ` Kuniyuki Iwashima
1 sibling, 1 reply; 28+ messages in thread
From: Paolo Abeni @ 2024-11-05 16:22 UTC (permalink / raw)
To: Kuniyuki Iwashima, David S. Miller, Eric Dumazet, Jakub Kicinski,
Simon Horman
Cc: Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On 11/5/24 03:05, Kuniyuki Iwashima wrote:
> @@ -6995,7 +7017,8 @@ static struct pernet_operations rtnetlink_net_ops = {
> };
>
> static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {
> - {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink},
> + {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink,
> + .flags = RTNL_FLAG_DOIT_PERNET},
The above causes a lockdep splat in many selftests:
https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/846801/12-bareudp-sh/stderr
the problem is in rtnl_newlink():
#ifdef CONFIG_MODULES
if (!ops) {
__rtnl_unlock();
// we no more under the rtnlock
request_module("rtnl-link-%s", kind);
rtnl_lock();
ops = rtnl_link_ops_get(kind, &ops_srcu_index);
}
#endif
Cheers,
Paolo
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL.
2024-11-05 16:22 ` Paolo Abeni
@ 2024-11-05 17:11 ` Kuniyuki Iwashima
0 siblings, 0 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-05 17:11 UTC (permalink / raw)
To: pabeni
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuba, kuni1840,
kuniyu, mailhol.vincent, mkl, netdev, razor
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 5 Nov 2024 17:22:54 +0100
> On 11/5/24 03:05, Kuniyuki Iwashima wrote:
> > @@ -6995,7 +7017,8 @@ static struct pernet_operations rtnetlink_net_ops = {
> > };
> >
> > static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = {
> > - {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink},
> > + {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink,
> > + .flags = RTNL_FLAG_DOIT_PERNET},
>
> The above causes a lockdep splat in many selftests:
>
> https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/846801/12-bareudp-sh/stderr
>
> the problem is in rtnl_newlink():
>
> #ifdef CONFIG_MODULES
> if (!ops) {
> __rtnl_unlock();
> // we no more under the rtnlock
> request_module("rtnl-link-%s", kind);
> rtnl_lock();
> ops = rtnl_link_ops_get(kind, &ops_srcu_index);
> }
> #endif
Oh, somehow I dropped this part of change while rebasing the RFC version...
https://github.com/q2ven/linux/commit/fea4b46993e8802e6dd7341d6e7dd49396e378d1
Will remove the unlock dance in v2.
Thanks!
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers.
2024-11-05 2:05 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Kuniyuki Iwashima
2024-11-05 10:16 ` Eric Dumazet
@ 2024-11-06 0:35 ` Jakub Kicinski
2024-11-06 0:41 ` Kuniyuki Iwashima
1 sibling, 1 reply; 28+ messages in thread
From: Jakub Kicinski @ 2024-11-06 0:35 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Mon, 4 Nov 2024 18:05:07 -0800 Kuniyuki Iwashima wrote:
> +EXPORT_SYMBOL(rtnl_nets_add);
Will you need the export later?
Current series doesn't use it in modules.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb().
2024-11-05 2:05 ` [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb() Kuniyuki Iwashima
2024-11-05 10:18 ` Eric Dumazet
@ 2024-11-06 0:37 ` Jakub Kicinski
2024-11-06 0:44 ` Kuniyuki Iwashima
1 sibling, 1 reply; 28+ messages in thread
From: Jakub Kicinski @ 2024-11-06 0:37 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Mon, 4 Nov 2024 18:05:08 -0800 Kuniyuki Iwashima wrote:
> +struct net *rtnl_link_get_net_tb(struct nlattr *tb[])
"tb" stands for "table", AFAIU, it's not a very meaningful suffix.
I'd suggestrtnl_link_get_net_ifla or rtnl_get_net_ifla
ifla == if_link attribute
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-05 2:05 ` [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
2024-11-05 10:23 ` Eric Dumazet
@ 2024-11-06 0:39 ` Jakub Kicinski
2024-11-06 0:39 ` Jakub Kicinski
1 sibling, 1 reply; 28+ messages in thread
From: Jakub Kicinski @ 2024-11-06 0:39 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Mon, 4 Nov 2024 18:05:09 -0800 Kuniyuki Iwashima wrote:
> + const unsigned char peer_type;
technically netlink attr types are 14b wide or some such
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-06 0:39 ` Jakub Kicinski
@ 2024-11-06 0:39 ` Jakub Kicinski
2024-11-06 0:52 ` Kuniyuki Iwashima
0 siblings, 1 reply; 28+ messages in thread
From: Jakub Kicinski @ 2024-11-06 0:39 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Paolo Abeni, Simon Horman,
Andrew Lunn, Marc Kleine-Budde, Vincent Mailhol, Daniel Borkmann,
Nikolay Aleksandrov, Kuniyuki Iwashima, netdev
On Tue, 5 Nov 2024 16:39:11 -0800 Jakub Kicinski wrote:
> On Mon, 4 Nov 2024 18:05:09 -0800 Kuniyuki Iwashima wrote:
> > + const unsigned char peer_type;
>
> technically netlink attr types are 14b wide or some such
I guess compiler will warn if someone tries to use < 255
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers.
2024-11-06 0:35 ` Jakub Kicinski
@ 2024-11-06 0:41 ` Kuniyuki Iwashima
0 siblings, 0 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-06 0:41 UTC (permalink / raw)
To: kuba
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuni1840, kuniyu,
mailhol.vincent, mkl, netdev, pabeni, razor
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 5 Nov 2024 16:35:06 -0800
> On Mon, 4 Nov 2024 18:05:07 -0800 Kuniyuki Iwashima wrote:
> > +EXPORT_SYMBOL(rtnl_nets_add);
>
> Will you need the export later?
> Current series doesn't use it in modules.
Ah, good catch. I used it in each drivers in v0 but factorised
it to core, so the symbol no longer need to be exported.
Will remove in v2.
Thanks!
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb().
2024-11-06 0:37 ` Jakub Kicinski
@ 2024-11-06 0:44 ` Kuniyuki Iwashima
0 siblings, 0 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-06 0:44 UTC (permalink / raw)
To: kuba
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuni1840, kuniyu,
mailhol.vincent, mkl, netdev, pabeni, razor
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 5 Nov 2024 16:37:07 -0800
> On Mon, 4 Nov 2024 18:05:08 -0800 Kuniyuki Iwashima wrote:
> > +struct net *rtnl_link_get_net_tb(struct nlattr *tb[])
>
> "tb" stands for "table", AFAIU, it's not a very meaningful suffix.
> I'd suggestrtnl_link_get_net_ifla or rtnl_get_net_ifla
> ifla == if_link attribute
Will use rtnl_get_net_ifla().
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-06 0:39 ` Jakub Kicinski
@ 2024-11-06 0:52 ` Kuniyuki Iwashima
2024-11-06 0:58 ` Kuniyuki Iwashima
0 siblings, 1 reply; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-06 0:52 UTC (permalink / raw)
To: kuba
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuni1840, kuniyu,
mailhol.vincent, mkl, netdev, pabeni, razor
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 5 Nov 2024 16:39:57 -0800
> On Tue, 5 Nov 2024 16:39:11 -0800 Jakub Kicinski wrote:
> > On Mon, 4 Nov 2024 18:05:09 -0800 Kuniyuki Iwashima wrote:
> > > + const unsigned char peer_type;
> >
> > technically netlink attr types are 14b wide or some such
>
> I guess compiler will warn if someone tries to use < 255
I chose 1 just because all of the three peer attr types were 1.
Should peer_type be u16 or extend when a future device use >255 for
peer ifla ?
VETH_INFO_PEER
VXCAN_INFO_PEER
IFLA_NETKIT_PEER_INFO
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-06 0:52 ` Kuniyuki Iwashima
@ 2024-11-06 0:58 ` Kuniyuki Iwashima
2024-11-06 1:04 ` Jakub Kicinski
0 siblings, 1 reply; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-06 0:58 UTC (permalink / raw)
To: kuniyu
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuba, kuni1840,
mailhol.vincent, mkl, netdev, pabeni, razor
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 5 Nov 2024 16:52:37 -0800
> From: Jakub Kicinski <kuba@kernel.org>
> Date: Tue, 5 Nov 2024 16:39:57 -0800
> > On Tue, 5 Nov 2024 16:39:11 -0800 Jakub Kicinski wrote:
> > > On Mon, 4 Nov 2024 18:05:09 -0800 Kuniyuki Iwashima wrote:
> > > > + const unsigned char peer_type;
> > >
> > > technically netlink attr types are 14b wide or some such
> >
> > I guess compiler will warn if someone tries to use < 255
>
> I chose 1 just because all of the three peer attr types were 1.
s/chose 1/chose u8/ :)
> Should peer_type be u16 or extend when a future device use >255 for
> peer ifla ?
>
> VETH_INFO_PEER
> VXCAN_INFO_PEER
> IFLA_NETKIT_PEER_INFO
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-06 0:58 ` Kuniyuki Iwashima
@ 2024-11-06 1:04 ` Jakub Kicinski
2024-11-06 1:22 ` Kuniyuki Iwashima
0 siblings, 1 reply; 28+ messages in thread
From: Jakub Kicinski @ 2024-11-06 1:04 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuni1840,
mailhol.vincent, mkl, netdev, pabeni, razor
On Tue, 5 Nov 2024 16:58:25 -0800 Kuniyuki Iwashima wrote:
> > > I guess compiler will warn if someone tries to use < 255
> >
> > I chose 1 just because all of the three peer attr types were 1.
>
> s/chose 1/chose u8/ :)
>
>
> > Should peer_type be u16 or extend when a future device use >255 for
> > peer ifla ?
I think we can extend in the future if you're doing this for packing
reasons. Barely any family has more attrs than 256 and as I replied to
myself we will assign a constant so compiler will warn us.
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops.
2024-11-06 1:04 ` Jakub Kicinski
@ 2024-11-06 1:22 ` Kuniyuki Iwashima
0 siblings, 0 replies; 28+ messages in thread
From: Kuniyuki Iwashima @ 2024-11-06 1:22 UTC (permalink / raw)
To: kuba
Cc: andrew+netdev, daniel, davem, edumazet, horms, kuni1840, kuniyu,
mailhol.vincent, mkl, netdev, pabeni, razor
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 5 Nov 2024 17:04:47 -0800
> On Tue, 5 Nov 2024 16:58:25 -0800 Kuniyuki Iwashima wrote:
> > > > I guess compiler will warn if someone tries to use < 255
> > >
> > > I chose 1 just because all of the three peer attr types were 1.
> >
> > s/chose 1/chose u8/ :)
> >
> >
> > > Should peer_type be u16 or extend when a future device use >255 for
> > > peer ifla ?
>
> I think we can extend in the future if you're doing this for packing
> reasons. Barely any family has more attrs than 256 and as I replied to
> myself we will assign a constant so compiler will warn us.
I have no strong preference so will use u16.
Also, there was a 3-bytes hole.
Thanks!
^ permalink raw reply [flat|nested] 28+ messages in thread
end of thread, other threads:[~2024-11-06 1:22 UTC | newest]
Thread overview: 28+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-05 2:05 [PATCH v1 net-next 0/8] rtnetlink: Convert rtnl_newlink() to per-netns RTNL Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 1/8] rtnetlink: Introduce struct rtnl_nets and helpers Kuniyuki Iwashima
2024-11-05 10:16 ` Eric Dumazet
2024-11-06 0:35 ` Jakub Kicinski
2024-11-06 0:41 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 2/8] rtnetlink: Factorise rtnl_link_get_net_tb() Kuniyuki Iwashima
2024-11-05 10:18 ` Eric Dumazet
2024-11-06 0:37 ` Jakub Kicinski
2024-11-06 0:44 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 3/8] rtnetlink: Add peer_type in struct rtnl_link_ops Kuniyuki Iwashima
2024-11-05 10:23 ` Eric Dumazet
2024-11-06 0:39 ` Jakub Kicinski
2024-11-06 0:39 ` Jakub Kicinski
2024-11-06 0:52 ` Kuniyuki Iwashima
2024-11-06 0:58 ` Kuniyuki Iwashima
2024-11-06 1:04 ` Jakub Kicinski
2024-11-06 1:22 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 4/8] veth: Set VETH_INFO_PEER to veth_link_ops.peer_type Kuniyuki Iwashima
2024-11-05 10:24 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 5/8] vxcan: Set VXCAN_INFO_PEER to vxcan_link_ops.peer_type Kuniyuki Iwashima
2024-11-05 10:37 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 6/8] netkit: Set IFLA_NETKIT_PEER_INFO to netkit_link_ops.peer_type Kuniyuki Iwashima
2024-11-05 10:39 ` Eric Dumazet
2024-11-05 2:05 ` [PATCH v1 net-next 7/8] rtnetlink: Convert RTM_NEWLINK to per-netns RTNL Kuniyuki Iwashima
2024-11-05 10:40 ` Eric Dumazet
2024-11-05 16:22 ` Paolo Abeni
2024-11-05 17:11 ` Kuniyuki Iwashima
2024-11-05 2:05 ` [PATCH v1 net-next 8/8] rtnetlink: Register rtnl_dellink() and rtnl_setlink() with RTNL_FLAG_DOIT_PERNET_WIP Kuniyuki Iwashima
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).