From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
Jakub Kicinski <kuba@kernel.org>,
Jaroslav Pulchart <jaroslav.pulchart@gooddata.com>,
dsahern@kernel.org
Subject: [PATCH net v3] rtnetlink: make the "split" NLM_DONE handling generic
Date: Mon, 3 Jun 2024 11:48:26 -0700 [thread overview]
Message-ID: <20240603184826.1087245-1-kuba@kernel.org> (raw)
Jaroslav reports Dell's OMSA Systems Management Data Engine
expects NLM_DONE in a separate recvmsg(), both for rtnl_dump_ifinfo()
and inet_dump_ifaddr(). We already added a similar fix previously in
commit 460b0d33cf10 ("inet: bring NLM_DONE out to a separate recv() again")
Instead of modifying all the dump handlers, and making them look
different than modern for_each_netdev_dump()-based dump handlers -
put the workaround in rtnetlink code. This will also help us move
the custom rtnl-locking from af_netlink in the future (in net-next).
Note that this change is not touching rtnl_dump_all(). rtnl_dump_all()
is different kettle of fish and a potential problem. We now mix families
in a single recvmsg(), but NLM_DONE is not coalesced.
Tested:
./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_addr.yaml \
--dump getaddr --json '{"ifa-family": 2}'
./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_route.yaml \
--dump getroute --json '{"rtm-family": 2}'
./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_link.yaml \
--dump getlink
Fixes: 3e41af90767d ("rtnetlink: use xarray iterator to implement rtnl_dump_ifinfo()")
Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
Reported-by: Jaroslav Pulchart <jaroslav.pulchart@gooddata.com>
Link: https://lore.kernel.org/all/CAK8fFZ7MKoFSEzMBDAOjoUt+vTZRRQgLDNXEOfdCCXSoXXKE0g@mail.gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
v3:
- change the approach to centralized handling + flag
v2:
- adjust the comment in the kdoc
- add getlink
CC: dsahern@kernel.org
---
include/net/rtnetlink.h | 1 +
net/core/rtnetlink.c | 44 +++++++++++++++++++++++++++++++++++++++--
net/ipv4/devinet.c | 2 +-
net/ipv4/fib_frontend.c | 7 +------
4 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 3bfb80bad173..b45d57b5968a 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,6 +13,7 @@ enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
+ RTNL_FLAG_DUMP_SPLIT_NLM_DONE = BIT(3), /* legacy behavior */
};
enum rtnl_kinds {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b86b0a87367d..4668d6718040 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -6484,6 +6484,46 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Process one rtnetlink message. */
+static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ rtnl_dumpit_func dumpit = cb->data;
+ int err;
+
+ /* Previous iteration have already finished, avoid calling->dumpit()
+ * again, it may not expect to be called after it reached the end.
+ */
+ if (!dumpit)
+ return 0;
+
+ err = dumpit(skb, cb);
+
+ /* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
+ * Some applications which parse netlink manually depend on this.
+ */
+ if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+ if (err < 0 && err != -EMSGSIZE)
+ return err;
+ if (!err)
+ cb->data = NULL;
+
+ return skb->len;
+ }
+ return err;
+}
+
+static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *control)
+{
+ if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+ WARN_ON(control->data);
+ control->data = control->dump;
+ control->dump = rtnl_dumpit;
+ }
+
+ return netlink_dump_start(ssk, skb, nlh, control);
+}
+
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -6548,7 +6588,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.module = owner,
.flags = flags,
};
- err = netlink_dump_start(rtnl, skb, nlh, &c);
+ err = rtnetlink_dump_start(rtnl, skb, nlh, &c);
/* netlink_dump_start() will keep a reference on
* module if dump is still in progress.
*/
@@ -6694,7 +6734,7 @@ void __init rtnetlink_init(void)
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
- rtnl_dump_ifinfo, 0);
+ rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f3892ee9dfb3..d09f557eaa77 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2805,7 +2805,7 @@ void __init devinet_init(void)
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
- RTNL_FLAG_DUMP_UNLOCKED);
+ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
inet_netconf_dump_devconf,
RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c484b1c0fc00..7ad2cafb9276 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1050,11 +1050,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
e++;
}
}
-
- /* Don't let NLM_DONE coalesce into a message, even if it could.
- * Some user space expects NLM_DONE in a separate recv().
- */
- err = skb->len;
out:
cb->args[1] = e;
@@ -1665,5 +1660,5 @@ void __init ip_fib_init(void)
rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
- RTNL_FLAG_DUMP_UNLOCKED);
+ RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
}
--
2.45.1
next reply other threads:[~2024-06-03 18:48 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-03 18:48 Jakub Kicinski [this message]
2024-06-03 19:49 ` [PATCH net v3] rtnetlink: make the "split" NLM_DONE handling generic Eric Dumazet
2024-06-04 2:33 ` David Ahern
2024-06-04 14:08 ` Jakub Kicinski
2024-06-05 11:40 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240603184826.1087245-1-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=jaroslav.pulchart@gooddata.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.