From: Kuniyuki Iwashima <kuniyu@amazon.com>
To: "David S. Miller" <davem@davemloft.net>,
Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>,
Kuniyuki Iwashima <kuni1840@gmail.com>, <netdev@vger.kernel.org>
Subject: [PATCH v2 net-next 07/14] rtnetlink: Protect struct rtnl_link_ops with SRCU.
Date: Wed, 16 Oct 2024 11:53:50 -0700 [thread overview]
Message-ID: <20241016185357.83849-8-kuniyu@amazon.com> (raw)
In-Reply-To: <20241016185357.83849-1-kuniyu@amazon.com>
Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK
even when its module is being unloaded.
Let's use SRCU to protect ops.
rtnl_link_ops_get() now iterates link_ops under RCU and returns
SRCU-protected ops pointer. The caller must call rtnl_link_ops_put()
to release the pointer after the use.
Also, __rtnl_link_unregister() unlinks the ops first and calls
synchronize_srcu() to wait for inflight RTM_NEWLINK requests to
complete.
Note that link_ops needs to be protected by its dedicated lock
when RTNL is removed.
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
v2:
* Handle error of init_srcu_struct().
* Call cleanup_srcu_struct() after synchronize_srcu().
---
include/net/rtnetlink.h | 5 ++-
net/core/rtnetlink.c | 83 ++++++++++++++++++++++++++++++-----------
2 files changed, 65 insertions(+), 23 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bb49c5708ce7..1a6aa5ca74f3 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -3,6 +3,7 @@
#define __NET_RTNETLINK_H
#include <linux/rtnetlink.h>
+#include <linux/srcu.h>
#include <net/netlink.h>
typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
@@ -69,7 +70,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
/**
* struct rtnl_link_ops - rtnetlink link operations
*
- * @list: Used internally
+ * @list: Used internally, protected by RTNL and SRCU
+ * @srcu: Used internally
* @kind: Identifier
* @netns_refund: Physical device, move to init_net on netns exit
* @maxtype: Highest device specific netlink attribute number
@@ -100,6 +102,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
*/
struct rtnl_link_ops {
struct list_head list;
+ struct srcu_struct srcu;
const char *kind;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9c9290a6c271..31b105b3a834 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -457,15 +457,29 @@ EXPORT_SYMBOL_GPL(__rtnl_unregister_many);
static LIST_HEAD(link_ops);
-static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
+static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index)
{
- const struct rtnl_link_ops *ops;
+ struct rtnl_link_ops *ops;
- list_for_each_entry(ops, &link_ops, list) {
- if (!strcmp(ops->kind, kind))
- return ops;
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(ops, &link_ops, list) {
+ if (!strcmp(ops->kind, kind)) {
+ *srcu_index = srcu_read_lock(&ops->srcu);
+ goto unlock;
+ }
}
- return NULL;
+
+ ops = NULL;
+unlock:
+ rcu_read_unlock();
+
+ return ops;
+}
+
+static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index)
+{
+ srcu_read_unlock(&ops->srcu, srcu_index);
}
/**
@@ -480,8 +494,16 @@ static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
*/
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
- if (rtnl_link_ops_get(ops->kind))
- return -EEXIST;
+ struct rtnl_link_ops *tmp;
+ int err;
+
+ /* When RTNL is removed, add lock for link_ops. */
+ ASSERT_RTNL();
+
+ list_for_each_entry(tmp, &link_ops, list) {
+ if (!strcmp(ops->kind, tmp->kind))
+ return -EEXIST;
+ }
/* The check for alloc/setup is here because if ops
* does not have that filled up, it is not possible
@@ -491,7 +513,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
if ((ops->alloc || ops->setup) && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
- list_add_tail(&ops->list, &link_ops);
+ err = init_srcu_struct(&ops->srcu);
+ if (err)
+ return err;
+
+ list_add_tail_rcu(&ops->list, &link_ops);
+
return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_link_register);
@@ -542,10 +569,12 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
struct net *net;
- for_each_net(net) {
+ list_del_rcu(&ops->list);
+ synchronize_srcu(&ops->srcu);
+ cleanup_srcu_struct(&ops->srcu);
+
+ for_each_net(net)
__rtnl_kill_links(net, ops);
- }
- list_del(&ops->list);
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
@@ -2158,10 +2187,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
[IFLA_XDP_PROG_ID] = { .type = NLA_U32 },
};
-static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
+static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla,
+ int *ops_srcu_index)
{
- const struct rtnl_link_ops *ops = NULL;
struct nlattr *linfo[IFLA_INFO_MAX + 1];
+ struct rtnl_link_ops *ops = NULL;
if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
return NULL;
@@ -2170,7 +2200,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
char kind[MODULE_NAME_LEN];
nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
- ops = rtnl_link_ops_get(kind);
+ ops = rtnl_link_ops_get(kind, ops_srcu_index);
}
return ops;
@@ -2290,8 +2320,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct rtnl_link_ops *kind_ops = NULL;
struct netlink_ext_ack *extack = cb->extack;
+ struct rtnl_link_ops *kind_ops = NULL;
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
unsigned int flags = NLM_F_MULTI;
@@ -2302,6 +2332,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
struct net *tgt_net = net;
u32 ext_filter_mask = 0;
struct net_device *dev;
+ int ops_srcu_index;
int master_idx = 0;
int netnsid = -1;
int err, i;
@@ -2335,7 +2366,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
master_idx = nla_get_u32(tb[i]);
break;
case IFLA_LINKINFO:
- kind_ops = linkinfo_to_kind_ops(tb[i]);
+ kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index);
break;
default:
if (cb->strict_check) {
@@ -2361,6 +2392,10 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
break;
}
+
+ if (kind_ops)
+ rtnl_link_ops_put(kind_ops, ops_srcu_index);
+
cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
if (netnsid >= 0)
@@ -3747,8 +3782,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr **tb, **linkinfo, **data = NULL;
- const struct rtnl_link_ops *ops = NULL;
+ struct rtnl_link_ops *ops = NULL;
struct rtnl_newlink_tbs *tbs;
+ int ops_srcu_index;
int ret;
tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
@@ -3780,13 +3816,13 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
char kind[MODULE_NAME_LEN];
nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
- ops = rtnl_link_ops_get(kind);
+ ops = rtnl_link_ops_get(kind, &ops_srcu_index);
#ifdef CONFIG_MODULES
if (!ops) {
__rtnl_unlock();
request_module("rtnl-link-%s", kind);
rtnl_lock();
- ops = rtnl_link_ops_get(kind);
+ ops = rtnl_link_ops_get(kind, &ops_srcu_index);
}
#endif
}
@@ -3800,7 +3836,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
linkinfo[IFLA_INFO_DATA],
ops->policy, extack);
if (ret < 0)
- goto free;
+ goto put_ops;
data = tbs->attr;
}
@@ -3808,12 +3844,15 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ops->validate) {
ret = ops->validate(tb, data, extack);
if (ret < 0)
- goto free;
+ goto put_ops;
}
}
ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack);
+put_ops:
+ if (ops)
+ rtnl_link_ops_put(ops, ops_srcu_index);
free:
kfree(tbs);
return ret;
--
2.39.5 (Apple Git-154)
next prev parent reply other threads:[~2024-10-16 18:56 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-16 18:53 [PATCH v2 net-next 00/14] rtnetlink: Refactor rtnl_{new,del,set}link() for per-netns RTNL Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 01/14] rtnetlink: Allocate linkinfo[] as struct rtnl_newlink_tbs Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 02/14] rtnetlink: Call validate_linkmsg() in do_setlink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 03/14] rtnetlink: Factorise do_setlink() path from __rtnl_newlink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 04/14] rtnetlink: Move simple validation from __rtnl_newlink() to rtnl_newlink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 05/14] rtnetlink: Move rtnl_link_ops_get() and retry " Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 06/14] rtnetlink: Move ops->validate " Kuniyuki Iwashima
2024-10-16 18:53 ` Kuniyuki Iwashima [this message]
2024-10-22 8:35 ` [PATCH v2 net-next 07/14] rtnetlink: Protect struct rtnl_link_ops with SRCU Paolo Abeni
2024-10-22 8:42 ` Paolo Abeni
2024-10-16 18:53 ` [PATCH v2 net-next 08/14] rtnetlink: Call rtnl_link_get_net_capable() in rtnl_newlink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 09/14] rtnetlink: Fetch IFLA_LINK_NETNSID " Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 10/14] rtnetlink: Clean up rtnl_dellink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 11/14] rtnetlink: Clean up rtnl_setlink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 12/14] rtnetlink: Call rtnl_link_get_net_capable() in do_setlink() Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 13/14] rtnetlink: Return int from rtnl_af_register() Kuniyuki Iwashima
2024-10-16 19:09 ` David Ahern
2024-10-16 23:11 ` Kuniyuki Iwashima
2024-10-18 4:10 ` Matt Johnston
2024-10-22 8:53 ` Paolo Abeni
2024-10-22 8:59 ` Simon Horman
2024-10-22 19:42 ` Kuniyuki Iwashima
2024-10-16 18:53 ` [PATCH v2 net-next 14/14] rtnetlink: Protect struct rtnl_af_ops with SRCU Kuniyuki Iwashima
2024-10-22 9:10 ` [PATCH v2 net-next 00/14] rtnetlink: Refactor rtnl_{new,del,set}link() for per-netns RTNL patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241016185357.83849-8-kuniyu@amazon.com \
--to=kuniyu@amazon.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=kuni1840@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).