Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next] net/sched: introduce TCQ_F_IN_HW to track successful qdisc offload
@ 2026-05-09 12:22 David Yang
  0 siblings, 0 replies; only message in thread
From: David Yang @ 2026-05-09 12:22 UTC (permalink / raw)
  To: netdev
  Cc: David Yang, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Jamal Hadi Salim, Jiri Pirko,
	linux-kernel

qdisc_offload_dump_helper(), introduced for RED in commit 602f3baf2218
("net_sch: red: Add offload ability to RED qdisc"), re-evaluates offload
liveness on every dump because a parent qdisc change can alter the
offload state without any qdisc-specific callback being invoked.  In
this path -EOPNOTSUPP means "I do not offload this qdisc currently",
not "I do not support statistics at all".

Only mlxsw and nfp handle this correctly by maintaining internal qdisc
tracking (they need it for RED offload).  Every other qdisc type that
supports offload (ETS, PRIO, TBF, FIFO, MQ) calls TC_*_DESTROY
unconditionally in its destroy path regardless of whether a previous
TC_*_REPLACE succeeded.  Moreover, without a flag recording whether
REPLACE was accepted, a simple driver that queries hardware directly
rather than maintaining a software shadow has no way to answer STATS
correctly: it must either always return -EOPNOTSUPP (hiding real
offloads from stats) or always return 0 (false positive, claiming a
qdisc is offloaded when it never was).

Gating TC_*_STATS or TC_*_DESTROY on prior TC_*_REPLACE / TC_*_GRAFT
success is not an option: sch_gred relies on receiving STATS even after
offload ends in order to adjust backlog counters, and silently skipping
DESTROY for a qdisc that was previously offloaded would leak hardware
state.

Introduce TCQ_F_IN_HW to indicate that a qdisc has been successfully
loaded to hardware via TC_*_REPLACE.  The flag is set by the new
qdisc_offload_change_helper() on success and consumed by the new
qdisc_offload_destroy_helper() which skips the driver call entirely
when it is clear.  This is orthogonal to TCQ_F_OFFLOADED, which remains
the liveness indicator re-evaluated on every dump.

With this a simple driver can be stateless: on STATS it queries
hardware by handle and returns -EOPNOTSUPP when the entry is absent (the
core will clear TCQ_F_OFFLOADED but TCQ_F_IN_HW remains, so a subsequent
destroy still cleans up); on DESTROY it need not defend against "was
this ever offloaded?" because the core already verified TCQ_F_IN_HW.
The driver no longer needs to maintain its own registry of offloaded
qdisc handles.

Signed-off-by: David Yang <mmyangfl@gmail.com>
---
 include/net/sch_generic.h | 26 +++++++++++++++++++++++---
 net/sched/sch_api.c       | 38 ++++++++++++++++++++++++++++++++++++++
 net/sched/sch_ets.c       |  9 +++------
 net/sched/sch_fifo.c      | 14 ++++----------
 net/sched/sch_gred.c      |  5 +++--
 net/sched/sch_mq.c        |  8 ++++----
 net/sched/sch_prio.c      |  7 ++++---
 net/sched/sch_red.c       |  7 ++++---
 net/sched/sch_tbf.c       | 13 +++----------
 9 files changed, 86 insertions(+), 41 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ccfabfac674e..e69a0d29fd01 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -92,6 +92,7 @@ struct Qdisc {
 #define TCQ_F_NOLOCK		0x100 /* qdisc does not require locking */
 #define TCQ_F_OFFLOADED		0x200 /* qdisc is offloaded to HW */
 #define TCQ_F_DEQUEUE_DROPS	0x400 /* ->dequeue() can drop packets in q->to_free */
+#define TCQ_F_IN_HW		0x800 /* qdisc offloading has been accepted by HW */
 
 	u32			limit;
 	const struct Qdisc_ops	*ops;
@@ -748,18 +749,37 @@ static inline void dev_reset_queue(struct net_device *dev,
 }
 
 #ifdef CONFIG_NET_SCHED
-int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
+void qdisc_offload_destroy_helper(struct Qdisc *sch, enum tc_setup_type type,
+				  void *type_data);
+int qdisc_offload_change_helper(struct Qdisc *sch, enum tc_setup_type type,
+				void *type_data);
+int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
 			      void *type_data);
 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
 				struct Qdisc *new, struct Qdisc *old,
 				enum tc_setup_type type, void *type_data,
 				struct netlink_ext_ack *extack);
 #else
+static inline void
+qdisc_offload_destroy_helper(struct Qdisc *sch, enum tc_setup_type type,
+			     void *type_data)
+{
+	sch->flags &= ~TCQ_F_IN_HW;
+}
+
+static inline int
+qdisc_offload_change_helper(struct Qdisc *sch, enum tc_setup_type type,
+			     void *type_data)
+{
+	sch->flags &= ~TCQ_F_IN_HW;
+	return -EOPNOTSUPP;
+}
+
 static inline int
-qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
+qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
 			  void *type_data)
 {
-	q->flags &= ~TCQ_F_OFFLOADED;
+	sch->flags &= ~TCQ_F_OFFLOADED;
 	return 0;
 }
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6f7847c5536f..05c8b2097cb7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -813,6 +813,44 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
 }
 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 
+void qdisc_offload_destroy_helper(struct Qdisc *sch, enum tc_setup_type type,
+				  void *type_data)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	if (!(sch->flags & TCQ_F_IN_HW))
+		return;
+
+	sch->flags &= ~TCQ_F_IN_HW;
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
+	if (err)
+		netdev_warn(dev, "Error when destroying offloaded qdisc: %d",
+			    err);
+}
+EXPORT_SYMBOL(qdisc_offload_destroy_helper);
+
+int qdisc_offload_change_helper(struct Qdisc *sch, enum tc_setup_type type,
+				void *type_data)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	int err;
+
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
+
+	if (!err)
+		sch->flags |= TCQ_F_IN_HW;
+
+	return err;
+}
+EXPORT_SYMBOL(qdisc_offload_change_helper);
+
 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
 			      void *type_data)
 {
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index a4b07b661b77..848fdc8608ac 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -151,21 +151,18 @@ static void ets_offload_change(struct Qdisc *sch)
 		qopt.replace_params.weights[i] = weight;
 	}
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
+	qdisc_offload_change_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
 }
 
 static void ets_offload_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_ets_qopt_offload qopt;
 
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return;
-
 	qopt.command = TC_ETS_DESTROY;
 	qopt.handle = sch->handle;
 	qopt.parent = sch->parent;
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
+
+	qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
 }
 
 static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index e6bfd39ff339..c6435df37c31 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -58,30 +58,24 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 static void fifo_offload_init(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_fifo_qopt_offload qopt;
 
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return;
-
 	qopt.command = TC_FIFO_REPLACE;
 	qopt.handle = sch->handle;
 	qopt.parent = sch->parent;
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt);
+
+	qdisc_offload_change_helper(sch, TC_SETUP_QDISC_FIFO, &qopt);
 }
 
 static void fifo_offload_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_fifo_qopt_offload qopt;
 
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return;
-
 	qopt.command = TC_FIFO_DESTROY;
 	qopt.handle = sch->handle;
 	qopt.parent = sch->parent;
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt);
+
+	qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_FIFO, &qopt);
 }
 
 static int fifo_offload_dump(struct Qdisc *sch)
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8ae65572162c..65688dc49f42 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -346,9 +346,10 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
 			opt->set.tab[i].backlog = &q->backlog;
 		}
 		opt->set.qstats = &sch->qstats;
+		qdisc_offload_change_helper(sch, TC_SETUP_QDISC_GRED, opt);
+	} else {
+		qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_GRED, opt);
 	}
-
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
 }
 
 static int gred_offload_dump_stats(struct Qdisc *sch)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index a0133a7b9d3b..7fedb562f734 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -19,16 +19,16 @@
 
 static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_mq_qopt_offload opt = {
 		.command = cmd,
 		.handle = sch->handle,
 	};
 
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return -EOPNOTSUPP;
+	if (cmd == TC_MQ_CREATE)
+		return qdisc_offload_change_helper(sch, TC_SETUP_QDISC_MQ, &opt);
 
-	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
+	qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_MQ, &opt);
+	return 0;
 }
 
 static int mq_offload_stats(struct Qdisc *sch)
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 9e2b9a490db2..470a4d9c0ac5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -154,11 +154,12 @@ static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 		memcpy(&opt.replace_params.priomap, qopt->priomap,
 		       TC_PRIO_MAX + 1);
 		opt.replace_params.qstats = &sch->qstats;
-	} else {
-		opt.command = TC_PRIO_DESTROY;
+		return qdisc_offload_change_helper(sch, TC_SETUP_QDISC_PRIO, &opt);
 	}
 
-	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
+	opt.command = TC_PRIO_DESTROY;
+	qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_PRIO, &opt);
+	return 0;
 }
 
 static void
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 4d0e44a2e7c6..e64a29c27ca0 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -212,11 +212,12 @@ static int red_offload(struct Qdisc *sch, bool enable)
 		opt.set.is_harddrop = red_use_harddrop(q);
 		opt.set.is_nodrop = red_use_nodrop(q);
 		opt.set.qstats = &sch->qstats;
-	} else {
-		opt.command = TC_RED_DESTROY;
+		return qdisc_offload_change_helper(sch, TC_SETUP_QDISC_RED, &opt);
 	}
 
-	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+	opt.command = TC_RED_DESTROY;
+	qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_RED, &opt);
+	return 0;
 }
 
 static void red_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index f2340164f579..c8f315e03e3e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -142,12 +142,8 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
 static void tbf_offload_change(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_tbf_qopt_offload qopt;
 
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return;
-
 	qopt.command = TC_TBF_REPLACE;
 	qopt.handle = sch->handle;
 	qopt.parent = sch->parent;
@@ -155,21 +151,18 @@ static void tbf_offload_change(struct Qdisc *sch)
 	qopt.replace_params.max_size = q->max_size;
 	qopt.replace_params.qstats = &sch->qstats;
 
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
+	qdisc_offload_change_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
 }
 
 static void tbf_offload_destroy(struct Qdisc *sch)
 {
-	struct net_device *dev = qdisc_dev(sch);
 	struct tc_tbf_qopt_offload qopt;
 
-	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
-		return;
-
 	qopt.command = TC_TBF_DESTROY;
 	qopt.handle = sch->handle;
 	qopt.parent = sch->parent;
-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
+
+	qdisc_offload_destroy_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
 }
 
 static int tbf_offload_dump(struct Qdisc *sch)
-- 
2.53.0


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-05-09 12:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-09 12:22 [PATCH net-next] net/sched: introduce TCQ_F_IN_HW to track successful qdisc offload David Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox