public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations
@ 2026-04-28 17:04 Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Before converting tc_dump_qdisc() to RCU, we make the following changes:

- Use for_each_netdev_dump() instead of for_each_netdev()

- Only dump qdiscs of a single device at user space request.

v2: addressed Jakub feedback (thanks!)

Eric Dumazet (4):
  net/sched: propagate tc_fill_tclass() error
  net/sched: tc_dump_qdisc_root() refactor
  net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
  net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided

 net/sched/sch_api.c | 129 +++++++++++++++++++++++---------------------
 1 file changed, 69 insertions(+), 60 deletions(-)

-- 
2.54.0.545.g6539524ca2-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error
  2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor Eric Dumazet
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Change tc_fill_tclass() to return -EMSGSIZE when skb is too small.

Change its caller to propagate this error (instead of -EINVAL)

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_api.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ed869a5ffc7377b7c19e66ae5fc9788e709488da..32ccd4672083aa19340520155aeba6d8b6ff546c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1987,15 +1987,16 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 out_nlmsg_trim:
 nla_put_failure:
 	nlmsg_trim(skb, b);
-	return -1;
+	return -EMSGSIZE;
 }
 
 static int tclass_notify(struct net *net, struct sk_buff *oskb,
 			 struct nlmsghdr *n, struct Qdisc *q,
 			 unsigned long cl, int event, struct netlink_ext_ack *extack)
 {
-	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct sk_buff *skb;
+	int ret;
 
 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
 		return 0;
@@ -2004,9 +2005,10 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
+	ret = tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack);
+	if (ret < 0) {
 		kfree_skb(skb);
-		return -EINVAL;
+		return ret;
 	}
 
 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
@@ -2017,17 +2019,19 @@ static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
 			     struct nlmsghdr *n, struct Qdisc *q,
 			     unsigned long cl, struct netlink_ext_ack *extack)
 {
-	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct sk_buff *skb;
+	int ret;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
-	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
-			   extack) < 0) {
+	ret = tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+			     RTM_NEWTCLASS, extack);
+	if (ret < 0) {
 		kfree_skb(skb);
-		return -EINVAL;
+		return ret;
 	}
 
 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
@@ -2041,7 +2045,7 @@ static int tclass_del_notify(struct net *net,
 			     struct netlink_ext_ack *extack)
 {
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	int err = 0;
 
 	if (!cops->delete)
@@ -2052,13 +2056,12 @@ static int tclass_del_notify(struct net *net,
 		if (!skb)
 			return -ENOBUFS;
 
-		if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
-				   RTM_DELTCLASS, extack) < 0) {
+		err = tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+				     RTM_DELTCLASS, extack);
+		if (err < 0) {
 			kfree_skb(skb);
-			return -EINVAL;
+			return err;
 		}
-	} else {
-		skb = NULL;
 	}
 
 	err = cops->delete(q, cl, extack);
-- 
2.54.0.545.g6539524ca2-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor
  2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided Eric Dumazet
  3 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Change tc_fill_qdisc() to return -EMSGSIZE when skb is too small.

Change tc_dump_qdisc_root() to propagate tc_fill_qdisc() error to its callers.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_api.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 32ccd4672083aa19340520155aeba6d8b6ff546c..029e0f87ea9c61cc757432a07a6af92c90c551ef 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -976,7 +976,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 out_nlmsg_trim:
 nla_put_failure:
 	nlmsg_trim(skb, b);
-	return -1;
+	return -EMSGSIZE;
 }
 
 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
@@ -1836,11 +1836,13 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 	if (q_idx < s_q_idx) {
 		q_idx++;
 	} else {
-		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
-		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				  RTM_NEWQDISC, NULL) <= 0)
-			goto done;
+		if (!tc_qdisc_dump_ignore(q, dump_invisible))
+		    ret = tc_fill_qdisc(skb, q, q->parent,
+					NETLINK_CB(cb->skb).portid,
+					cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					RTM_NEWQDISC, NULL);
+		if (ret < 0)
+			goto out;
 		q_idx++;
 	}
 
@@ -1858,20 +1860,19 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			q_idx++;
 			continue;
 		}
-		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
-		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				  RTM_NEWQDISC, NULL) <= 0)
-			goto done;
+		if (!tc_qdisc_dump_ignore(q, dump_invisible))
+			ret = tc_fill_qdisc(skb, q, q->parent,
+					    NETLINK_CB(cb->skb).portid,
+					    cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					    RTM_NEWQDISC, NULL);
+		if (ret < 0)
+			goto out;
 		q_idx++;
 	}
 
 out:
 	*q_idx_p = q_idx;
 	return ret;
-done:
-	ret = -1;
-	goto out;
 }
 
 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
-- 
2.54.0.545.g6539524ca2-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
  2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
  2026-04-28 17:04 ` [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
  2026-04-30  2:00   ` Jakub Kicinski
  2026-04-28 17:04 ` [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided Eric Dumazet
  3 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Use for_each_netdev_dump() instead of for_each_netdev().

This is more scalable, and will ease RCU conversion.

This also offer better behavior when other threads
are adding or deleting netevices concurrently.

This enables dumping qdiscs for a single device
at user space request in the following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_api.c | 62 ++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 029e0f87ea9c61cc757432a07a6af92c90c551ef..641f5a01aca167dd230173078f2db5801dca58da 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1877,18 +1877,18 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 
 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	int idx, q_idx;
-	int s_idx, s_q_idx;
-	struct net_device *dev;
 	const struct nlmsghdr *nlh = cb->nlh;
+	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
+	struct {
+		unsigned long ifindex;
+		int q_idx;
+	} *ctx = (void *)cb->ctx;
+	unsigned long s_ifindex;
+	struct net_device *dev;
+	int s_q_idx, q_idx;
 	int err;
 
-	s_idx = cb->args[0];
-	s_q_idx = q_idx = cb->args[1];
-
-	idx = 0;
 	ASSERT_RTNL();
 
 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
@@ -1896,42 +1896,40 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	if (err < 0)
 		return err;
 
-	for_each_netdev(net, dev) {
+	s_ifindex = ctx->ifindex;
+	s_q_idx = ctx->q_idx;
+
+	for_each_netdev_dump(net, dev, ctx->ifindex) {
 		struct netdev_queue *dev_queue;
+		struct Qdisc *q;
 
-		if (idx < s_idx)
-			goto cont;
-		if (idx > s_idx)
-			s_q_idx = 0;
 		q_idx = 0;
 
 		netdev_lock_ops(dev);
-		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
-				       skb, cb, &q_idx, s_q_idx,
-				       true, tca[TCA_DUMP_INVISIBLE]) < 0) {
-			netdev_unlock_ops(dev);
-			goto done;
-		}
+		q = rtnl_dereference(dev->qdisc);
+		err = tc_dump_qdisc_root(q, skb, cb, &q_idx, s_q_idx,
+					 true, tca[TCA_DUMP_INVISIBLE]);
+		if (err < 0)
+			goto error_unlock;
 
 		dev_queue = dev_ingress_queue(dev);
-		if (dev_queue &&
-		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
-				       skb, cb, &q_idx, s_q_idx, false,
-				       tca[TCA_DUMP_INVISIBLE]) < 0) {
-			netdev_unlock_ops(dev);
-			goto done;
+		if (dev_queue) {
+			q = rtnl_dereference(dev_queue->qdisc_sleeping);
+			err = tc_dump_qdisc_root(q, skb, cb, &q_idx, s_q_idx,
+						 false, tca[TCA_DUMP_INVISIBLE]);
+			if (err < 0)
+				goto error_unlock;
 		}
 		netdev_unlock_ops(dev);
-
-cont:
-		idx++;
+		s_q_idx = 0;
 	}
+	return skb->len;
 
-done:
-	cb->args[0] = idx;
-	cb->args[1] = q_idx;
+error_unlock:
+	netdev_unlock_ops(dev);
+	ctx->q_idx = q_idx;
 
-	return skb->len;
+	return err;
 }
 
 
-- 
2.54.0.545.g6539524ca2-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided
  2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
                   ` (2 preceding siblings ...)
  2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
@ 2026-04-28 17:04 ` Eric Dumazet
  3 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-28 17:04 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

There is no point dumping qdiscs for all devices when user space
wants them for a single device:

tc -s -d qdisc show dev eth1

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_api.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 641f5a01aca167dd230173078f2db5801dca58da..80827ef8f4f6b0b27391da7c866f5da15f830b2d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1885,6 +1885,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		int q_idx;
 	} *ctx = (void *)cb->ctx;
 	unsigned long s_ifindex;
+	const struct tcmsg *tcm;
 	struct net_device *dev;
 	int s_q_idx, q_idx;
 	int err;
@@ -1895,6 +1896,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 				     rtm_tca_policy, cb->extack);
 	if (err < 0)
 		return err;
+	tcm = nlmsg_data(nlh);
+	if (tcm->tcm_ifindex && !ctx->ifindex)
+		ctx->ifindex = tcm->tcm_ifindex;
 
 	s_ifindex = ctx->ifindex;
 	s_q_idx = ctx->q_idx;
@@ -1903,6 +1907,9 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 		struct netdev_queue *dev_queue;
 		struct Qdisc *q;
 
+		if (tcm->tcm_ifindex && ctx->ifindex != tcm->tcm_ifindex)
+			break;
+
 		q_idx = 0;
 
 		netdev_lock_ops(dev);
-- 
2.54.0.545.g6539524ca2-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
  2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
@ 2026-04-30  2:00   ` Jakub Kicinski
  2026-04-30  2:16     ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Kicinski @ 2026-04-30  2:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Jamal Hadi Salim,
	Jiri Pirko, netdev, eric.dumazet

On Tue, 28 Apr 2026 17:04:13 +0000 Eric Dumazet wrote:
>  static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
>  {

> +	struct {
> +		unsigned long ifindex;
> +		int q_idx;
> +	} *ctx = (void *)cb->ctx;
> +	unsigned long s_ifindex;
> +	struct net_device *dev;
> +	int s_q_idx, q_idx;
>  	int err;
>  
> -	s_idx = cb->args[0];
> -	s_q_idx = q_idx = cb->args[1];
> -
> -	idx = 0;
>  	ASSERT_RTNL();
>  
>  	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
> @@ -1896,42 +1896,40 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
>  	if (err < 0)
>  		return err;
>  
> -	for_each_netdev(net, dev) {
> +	s_ifindex = ctx->ifindex;
> +	s_q_idx = ctx->q_idx;
> +
> +	for_each_netdev_dump(net, dev, ctx->ifindex) {

compilers point out that s_ifindex is set but not used since we iterate
directly on ctx->ifindex
-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump()
  2026-04-30  2:00   ` Jakub Kicinski
@ 2026-04-30  2:16     ` Eric Dumazet
  0 siblings, 0 replies; 7+ messages in thread
From: Eric Dumazet @ 2026-04-30  2:16 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Jamal Hadi Salim,
	Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 29, 2026 at 7:00 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Tue, 28 Apr 2026 17:04:13 +0000 Eric Dumazet wrote:
> >  static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
> >  {
>
> > +     struct {
> > +             unsigned long ifindex;
> > +             int q_idx;
> > +     } *ctx = (void *)cb->ctx;
> > +     unsigned long s_ifindex;
> > +     struct net_device *dev;
> > +     int s_q_idx, q_idx;
> >       int err;
> >
> > -     s_idx = cb->args[0];
> > -     s_q_idx = q_idx = cb->args[1];
> > -
> > -     idx = 0;
> >       ASSERT_RTNL();
> >
> >       err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
> > @@ -1896,42 +1896,40 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
> >       if (err < 0)
> >               return err;
> >
> > -     for_each_netdev(net, dev) {
> > +     s_ifindex = ctx->ifindex;
> > +     s_q_idx = ctx->q_idx;
> > +
> > +     for_each_netdev_dump(net, dev, ctx->ifindex) {
>
> compilers point out that s_ifindex is set but not used since we iterate
> directly on ctx->ifindex

Ah right, I need to remember to add W=1 to my builds :/

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-04-30  2:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-28 17:04 [PATCH v2 net-next 0/4] net/sched: tc_dump_qdisc() optimizations Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 1/4] net/sched: propagate tc_fill_tclass() error Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 2/4] net/sched: tc_dump_qdisc_root() refactor Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 3/4] net/sched: switch tc_dump_qdisc() to for_each_netdev_dump() Eric Dumazet
2026-04-30  2:00   ` Jakub Kicinski
2026-04-30  2:16     ` Eric Dumazet
2026-04-28 17:04 ` [PATCH v2 net-next 4/4] net/sched: speedup tc_dump_qdisc() when tcm_ifindex is provided Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox