* [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations
@ 2026-04-28 17:04 Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
Eric Dumazet
Before converting tc_dump_qdisc() to RCU, we make the following changes:
- Use for_each_netdev_dump() instead of for_each_netdev()
- Only dump qdiscs of a single device at user space request.
v2: addressed Jakub feedback (thanks!)
Eric Dumazet (4):
net/sched: propagate tc_fill_tclass() error
net/sched: tc_dump_qdisc_root() refactor
net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided
net/sched/sch_api.c | 129 +++++++++++++++++++++++---------------------
1 file changed, 69 insertions(+), 60 deletions(-)
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error
2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor Eric Dumazet
` (2 subsequent siblings)
3 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
Eric Dumazet
Change tc_fill_tclass() to return -EMSGSIZE when skb is too small.
Change its caller to propagate this error (instead of -EINVAL)
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/sched/sch_api.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ed869a5ffc7377b7c19e66ae5fc9788e709488da..32ccd4672083aa19340520155aeba6d8b6ff546c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1987,15 +1987,16 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
out_nlmsg_trim:
nla_put_failure:
nlmsg_trim(skb, b);
- return -1;
+ return -EMSGSIZE;
}
static int tclass_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct Qdisc *q,
unsigned long cl, int event, struct netlink_ext_ack *extack)
{
- struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct sk_buff *skb;
+ int ret;
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return 0;
@@ -2004,9 +2005,10 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
+ ret = tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack);
+ if (ret < 0) {
kfree_skb(skb);
- return -EINVAL;
+ return ret;
}
return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
@@ -2017,17 +2019,19 @@ static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct Qdisc *q,
unsigned long cl, struct netlink_ext_ack *extack)
{
- struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct sk_buff *skb;
+ int ret;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
- extack) < 0) {
+ ret = tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ RTM_NEWTCLASS, extack);
+ if (ret < 0) {
kfree_skb(skb);
- return -EINVAL;
+ return ret;
}
return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
@@ -2041,7 +2045,7 @@ static int tclass_del_notify(struct net *net,
struct netlink_ext_ack *extack)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
int err = 0;
if (!cops->delete)
@@ -2052,13 +2056,12 @@ static int tclass_del_notify(struct net *net,
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
- RTM_DELTCLASS, extack) < 0) {
+ err = tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ RTM_DELTCLASS, extack);
+ if (err < 0) {
kfree_skb(skb);
- return -EINVAL;
+ return err;
}
- } else {
- skb = NULL;
}
err = cops->delete(q, cl, extack);
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor
2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided Eric Dumazet
3 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
Eric Dumazet
Change tc_fill_qdisc() to return -EMSGSIZE when skb is too small.
Change tc_dump_qdisc_root() to propagate tc_fill_qdisc() error to its callers.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/sched/sch_api.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 32ccd4672083aa19340520155aeba6d8b6ff546c..029e0f87ea9c61cc757432a07a6af92c90c551ef 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -976,7 +976,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
out_nlmsg_trim:
nla_put_failure:
nlmsg_trim(skb, b);
- return -1;
+ return -EMSGSIZE;
}
static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
@@ -1836,11 +1836,13 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (q_idx < s_q_idx) {
q_idx++;
} else {
- if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
- tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC, NULL) <= 0)
- goto done;
+ if (!tc_qdisc_dump_ignore(q, dump_invisible))
+ ret = tc_fill_qdisc(skb, q, q->parent,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWQDISC, NULL);
+ if (ret < 0)
+ goto out;
q_idx++;
}
@@ -1858,20 +1860,19 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
q_idx++;
continue;
}
- if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
- tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC, NULL) <= 0)
- goto done;
+ if (!tc_qdisc_dump_ignore(q, dump_invisible))
+ ret = tc_fill_qdisc(skb, q, q->parent,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWQDISC, NULL);
+ if (ret < 0)
+ goto out;
q_idx++;
}
out:
*q_idx_p = q_idx;
return ret;
-done:
- ret = -1;
- goto out;
}
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
2026-04-30 2:00 ` Jakub Kicinski
2026-04-28 17:04 ` [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided Eric Dumazet
3 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
Eric Dumazet
Use for_each_netdev_dump() instead of for_each_netdev().
This is more scalable, and will ease RCU conversion.
This also offer better behavior when other threads
are adding or deleting netevices concurrently.
This enables dumping qdiscs for a single device
at user space request in the following patch.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/sched/sch_api.c | 62 ++++++++++++++++++++++-----------------------
1 file changed, 30 insertions(+), 32 deletions(-)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 029e0f87ea9c61cc757432a07a6af92c90c551ef..641f5a01aca167dd230173078f2db5801dca58da 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1877,18 +1877,18 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- int idx, q_idx;
- int s_idx, s_q_idx;
- struct net_device *dev;
const struct nlmsghdr *nlh = cb->nlh;
+ struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ struct {
+ unsigned long ifindex;
+ int q_idx;
+ } *ctx = (void *)cb->ctx;
+ unsigned long s_ifindex;
+ struct net_device *dev;
+ int s_q_idx, q_idx;
int err;
- s_idx = cb->args[0];
- s_q_idx = q_idx = cb->args[1];
-
- idx = 0;
ASSERT_RTNL();
err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
@@ -1896,42 +1896,40 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- for_each_netdev(net, dev) {
+ s_ifindex = ctx->ifindex;
+ s_q_idx = ctx->q_idx;
+
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
struct netdev_queue *dev_queue;
+ struct Qdisc *q;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- s_q_idx = 0;
q_idx = 0;
netdev_lock_ops(dev);
- if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
- skb, cb, &q_idx, s_q_idx,
- true, tca[TCA_DUMP_INVISIBLE]) < 0) {
- netdev_unlock_ops(dev);
- goto done;
- }
+ q = rtnl_dereference(dev->qdisc);
+ err = tc_dump_qdisc_root(q, skb, cb, &q_idx, s_q_idx,
+ true, tca[TCA_DUMP_INVISIBLE]);
+ if (err < 0)
+ goto error_unlock;
dev_queue = dev_ingress_queue(dev);
- if (dev_queue &&
- tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
- skb, cb, &q_idx, s_q_idx, false,
- tca[TCA_DUMP_INVISIBLE]) < 0) {
- netdev_unlock_ops(dev);
- goto done;
+ if (dev_queue) {
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
+ err = tc_dump_qdisc_root(q, skb, cb, &q_idx, s_q_idx,
+ false, tca[TCA_DUMP_INVISIBLE]);
+ if (err < 0)
+ goto error_unlock;
}
netdev_unlock_ops(dev);
-
-cont:
- idx++;
+ s_q_idx = 0;
}
+ return skb->len;
-done:
- cb->args[0] = idx;
- cb->args[1] = q_idx;
+error_unlock:
+ netdev_unlock_ops(dev);
+ ctx->q_idx = q_idx;
- return skb->len;
+ return err;
}
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided
2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
` (2 preceding siblings ...)
2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
3 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
Eric Dumazet
There is no point dumping qdiscs for all devices when user space
wants them for a single device:
tc -s -d qdisc show dev eth1
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/sched/sch_api.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 641f5a01aca167dd230173078f2db5801dca58da..80827ef8f4f6b0b27391da7c866f5da15f830b2d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1885,6 +1885,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int q_idx;
} *ctx = (void *)cb->ctx;
unsigned long s_ifindex;
+ const struct tcmsg *tcm;
struct net_device *dev;
int s_q_idx, q_idx;
int err;
@@ -1895,6 +1896,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
rtm_tca_policy, cb->extack);
if (err < 0)
return err;
+ tcm = nlmsg_data(nlh);
+ if (tcm->tcm_ifindex && !ctx->ifindex)
+ ctx->ifindex = tcm->tcm_ifindex;
s_ifindex = ctx->ifindex;
s_q_idx = ctx->q_idx;
@@ -1903,6 +1907,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
struct netdev_queue *dev_queue;
struct Qdisc *q;
+ if (tcm->tcm_ifindex && ctx->ifindex != tcm->tcm_ifindex)
+ break;
+
q_idx = 0;
netdev_lock_ops(dev);
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
@ 2026-04-30 2:00 ` Jakub Kicinski
2026-04-30 2:16 ` Eric Dumazet
0 siblings, 1 reply; 7+ messages in thread
From: Jakub Kicinski @ 2026-04-30 2:00 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Paolo Abeni, Simon Horman, Jamal Hadi Salim,
Jiri Pirko, netdev, eric.dumazet
On Tue, 28 Apr 2026 17:04:13 +0000 Eric Dumazet wrote:
> static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
> {
> + struct {
> + unsigned long ifindex;
> + int q_idx;
> + } *ctx = (void *)cb->ctx;
> + unsigned long s_ifindex;
> + struct net_device *dev;
> + int s_q_idx, q_idx;
> int err;
>
> - s_idx = cb->args[0];
> - s_q_idx = q_idx = cb->args[1];
> -
> - idx = 0;
> ASSERT_RTNL();
>
> err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
> @@ -1896,42 +1896,40 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
> if (err < 0)
> return err;
>
> - for_each_netdev(net, dev) {
> + s_ifindex = ctx->ifindex;
> + s_q_idx = ctx->q_idx;
> +
> + for_each_netdev_dump(net, dev, ctx->ifindex) {
compilers point out that s_ifindex is set but not used since we iterate
directly on ctx->ifindex
--
pw-bot: cr
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
2026-04-30 2:00 ` Jakub Kicinski
@ 2026-04-30 2:16 ` Eric Dumazet
0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-30 2:16 UTC (permalink / raw)
To: Jakub Kicinski
Cc: David S . Miller, Paolo Abeni, Simon Horman, Jamal Hadi Salim,
Jiri Pirko, netdev, eric.dumazet
On Wed, Apr 29, 2026 at 7:00 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 28 Apr 2026 17:04:13 +0000 Eric Dumazet wrote:
> > static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
> > {
>
> > + struct {
> > + unsigned long ifindex;
> > + int q_idx;
> > + } *ctx = (void *)cb->ctx;
> > + unsigned long s_ifindex;
> > + struct net_device *dev;
> > + int s_q_idx, q_idx;
> > int err;
> >
> > - s_idx = cb->args[0];
> > - s_q_idx = q_idx = cb->args[1];
> > -
> > - idx = 0;
> > ASSERT_RTNL();
> >
> > err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
> > @@ -1896,42 +1896,40 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
> > if (err < 0)
> > return err;
> >
> > - for_each_netdev(net, dev) {
> > + s_ifindex = ctx->ifindex;
> > + s_q_idx = ctx->q_idx;
> > +
> > + for_each_netdev_dump(net, dev, ctx->ifindex) {
>
> compilers point out that s_ifindex is set but not used since we iterate
> directly on ctx->ifindex
Ah right, I need to remember to add W=1 to my builds :/
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2026-04-30 2:16 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
2026-04-30 2:00 ` Jakub Kicinski
2026-04-30 2:16 ` Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox