public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	 Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Jiri Pirko <jiri@resnulli.us>,
	 netdev@vger.kernel.org, eric.dumazet@gmail.com,
	 Eric Dumazet <edumazet@google.com>
Subject: [PATCH net-next] codel: annotate data-races in codel_dump_stats()
Date: Tue,  7 Apr 2026 14:30:53 +0000	[thread overview]
Message-ID: <20260407143053.1570620-1-edumazet@google.com> (raw)

codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function                                     old     new   delta
codel_qdisc_dequeue                         2462    2465      +3
codel_dump_stats                             250     249      -1
Total: Before=29739919, After=29739921, chg +0.00%

Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net-next tree has been chosen to avoid a merge conflict.
I will handle stable submission, this bug is minor.

 include/net/codel_impl.h | 45 +++++++++++++++++++++-------------------
 net/sched/sch_codel.c    | 22 ++++++++++----------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index b2c359c6dd1b84e3aeb77834272641bac73f8e6a..2c1f0ec309e9fe02e79d377129da6c5c0cbce19b 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
 	}
 
 	skb_len = skb_len_func(skb);
-	vars->ldelay = now - skb_time_func(skb);
+	WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
 
 	if (unlikely(skb_len > stats->maxpacket))
-		stats->maxpacket = skb_len;
+		WRITE_ONCE(stats->maxpacket, skb_len);
 
 	if (codel_time_before(vars->ldelay, params->target) ||
 	    *backlog <= params->mtu) {
@@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 
 	if (!skb) {
 		vars->first_above_time = 0;
-		vars->dropping = false;
+		WRITE_ONCE(vars->dropping, false);
 		return skb;
 	}
 	now = codel_get_time();
@@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 	if (vars->dropping) {
 		if (!drop) {
 			/* sojourn time below target - leave dropping state */
-			vars->dropping = false;
+			WRITE_ONCE(vars->dropping, false);
 		} else if (codel_time_after_eq(now, vars->drop_next)) {
 			/* It's time for the next drop. Drop the current
 			 * packet and dequeue the next. The dequeue might
@@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
 			 */
 			while (vars->dropping &&
 			       codel_time_after_eq(now, vars->drop_next)) {
-				vars->count++; /* dont care of possible wrap
-						* since there is no more divide
-						*/
+				/* dont care of possible wrap
+				 * since there is no more divide.
+				 */
+				WRITE_ONCE(vars->count, vars->count + 1);
 				codel_Newton_step(vars);
 				if (params->ecn && INET_ECN_set_ce(skb)) {
-					stats->ecn_mark++;
-					vars->drop_next =
+					WRITE_ONCE(stats->ecn_mark,
+						   stats->ecn_mark + 1);
+					WRITE_ONCE(vars->drop_next,
 						codel_control_law(vars->drop_next,
 								  params->interval,
-								  vars->rec_inv_sqrt);
+								  vars->rec_inv_sqrt));
 					goto end;
 				}
 				stats->drop_len += skb_len_func(skb);
@@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
 						       skb_time_func,
 						       backlog, now)) {
 					/* leave dropping state */
-					vars->dropping = false;
+					WRITE_ONCE(vars->dropping, false);
 				} else {
 					/* and schedule the next drop */
-					vars->drop_next =
+					WRITE_ONCE(vars->drop_next,
 						codel_control_law(vars->drop_next,
 								  params->interval,
-								  vars->rec_inv_sqrt);
+								  vars->rec_inv_sqrt));
 				}
 			}
 		}
@@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 		u32 delta;
 
 		if (params->ecn && INET_ECN_set_ce(skb)) {
-			stats->ecn_mark++;
+			WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
 		} else {
 			stats->drop_len += skb_len_func(skb);
 			drop_func(skb, ctx);
@@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 						 stats, skb_len_func,
 						 skb_time_func, backlog, now);
 		}
-		vars->dropping = true;
+		WRITE_ONCE(vars->dropping, true);
 		/* if min went above target close to when we last went below it
 		 * assume that the drop rate that controlled the queue on the
 		 * last cycle is a good starting point to control it now.
@@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
 		if (delta > 1 &&
 		    codel_time_before(now - vars->drop_next,
 				      16 * params->interval)) {
-			vars->count = delta;
+			WRITE_ONCE(vars->count, delta);
 			/* we dont care if rec_inv_sqrt approximation
 			 * is not very precise :
 			 * Next Newton steps will correct it quadratically.
 			 */
 			codel_Newton_step(vars);
 		} else {
-			vars->count = 1;
+			WRITE_ONCE(vars->count, 1);
 			vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
 		}
-		vars->lastcount = vars->count;
-		vars->drop_next = codel_control_law(now, params->interval,
-						    vars->rec_inv_sqrt);
+		WRITE_ONCE(vars->lastcount, vars->count);
+		WRITE_ONCE(vars->drop_next,
+			   codel_control_law(now, params->interval,
+					     vars->rec_inv_sqrt));
 	}
 end:
 	if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
@@ -262,7 +265,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
 				   params->ce_threshold_selector));
 		}
 		if (set_ce && INET_ECN_set_ce(skb))
-			stats->ce_mark++;
+			WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
 	}
 	return skb;
 }
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index dc2be90666ffbd715550800790a0091acd28701d..317aae0ec7bd6aedb4bae09b18423c981fed16e7 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -85,7 +85,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		return qdisc_enqueue_tail(skb, sch);
 	}
 	q = qdisc_priv(sch);
-	q->drop_overlimit++;
+	WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1);
 	return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT);
 }
 
@@ -221,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	const struct codel_sched_data *q = qdisc_priv(sch);
 	struct tc_codel_xstats st = {
-		.maxpacket	= q->stats.maxpacket,
-		.count		= q->vars.count,
-		.lastcount	= q->vars.lastcount,
-		.drop_overlimit = q->drop_overlimit,
-		.ldelay		= codel_time_to_us(q->vars.ldelay),
-		.dropping	= q->vars.dropping,
-		.ecn_mark	= q->stats.ecn_mark,
-		.ce_mark	= q->stats.ce_mark,
+		.maxpacket	= READ_ONCE(q->stats.maxpacket),
+		.count		= READ_ONCE(q->vars.count),
+		.lastcount	= READ_ONCE(q->vars.lastcount),
+		.drop_overlimit = READ_ONCE(q->drop_overlimit),
+		.ldelay		= codel_time_to_us(READ_ONCE(q->vars.ldelay)),
+		.dropping	= READ_ONCE(q->vars.dropping),
+		.ecn_mark	= READ_ONCE(q->stats.ecn_mark),
+		.ce_mark	= READ_ONCE(q->stats.ce_mark),
 	};
 
-	if (q->vars.dropping) {
-		codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
+	if (st.dropping) {
+		codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time();
 
 		if (delta >= 0)
 			st.drop_next = codel_time_to_us(delta);
-- 
2.53.0.1213.gd9a14994de-goog


                 reply	other threads:[~2026-04-07 14:31 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260407143053.1570620-1-edumazet@google.com \
    --to=edumazet@google.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=horms@kernel.org \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox