* [PATCH net-next] codel: annotate data-races in codel_dump_stats()
@ 2026-04-07 14:30 Eric Dumazet
0 siblings, 0 replies; only message in thread
From: Eric Dumazet @ 2026-04-07 14:30 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski, Paolo Abeni
Cc: Simon Horman, Jamal Hadi Salim, Jiri Pirko, netdev, eric.dumazet,
Eric Dumazet
codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.
Add READ_ONCE()/WRITE_ONCE() annotations.
Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.
tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.
No change in kernel size:
$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function old new delta
codel_qdisc_dequeue 2462 2465 +3
codel_dump_stats 250 249 -1
Total: Before=29739919, After=29739921, chg +0.00%
Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net-next tree has been chosen to avoid a merge conflict.
I will handle stable submission, this bug is minor.
include/net/codel_impl.h | 45 +++++++++++++++++++++-------------------
net/sched/sch_codel.c | 22 ++++++++++----------
2 files changed, 35 insertions(+), 32 deletions(-)
diff --git a/include/net/codel_impl.h b/include/net/codel_impl.h
index b2c359c6dd1b84e3aeb77834272641bac73f8e6a..2c1f0ec309e9fe02e79d377129da6c5c0cbce19b 100644
--- a/include/net/codel_impl.h
+++ b/include/net/codel_impl.h
@@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
}
skb_len = skb_len_func(skb);
- vars->ldelay = now - skb_time_func(skb);
+ WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
if (unlikely(skb_len > stats->maxpacket))
- stats->maxpacket = skb_len;
+ WRITE_ONCE(stats->maxpacket, skb_len);
if (codel_time_before(vars->ldelay, params->target) ||
*backlog <= params->mtu) {
@@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (!skb) {
vars->first_above_time = 0;
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
return skb;
}
now = codel_get_time();
@@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (vars->dropping) {
if (!drop) {
/* sojourn time below target - leave dropping state */
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
} else if (codel_time_after_eq(now, vars->drop_next)) {
/* It's time for the next drop. Drop the current
* packet and dequeue the next. The dequeue might
@@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
*/
while (vars->dropping &&
codel_time_after_eq(now, vars->drop_next)) {
- vars->count++; /* dont care of possible wrap
- * since there is no more divide
- */
+ /* dont care of possible wrap
+ * since there is no more divide.
+ */
+ WRITE_ONCE(vars->count, vars->count + 1);
codel_Newton_step(vars);
if (params->ecn && INET_ECN_set_ce(skb)) {
- stats->ecn_mark++;
- vars->drop_next =
+ WRITE_ONCE(stats->ecn_mark,
+ stats->ecn_mark + 1);
+ WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next,
params->interval,
- vars->rec_inv_sqrt);
+ vars->rec_inv_sqrt));
goto end;
}
stats->drop_len += skb_len_func(skb);
@@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
skb_time_func,
backlog, now)) {
/* leave dropping state */
- vars->dropping = false;
+ WRITE_ONCE(vars->dropping, false);
} else {
/* and schedule the next drop */
- vars->drop_next =
+ WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next,
params->interval,
- vars->rec_inv_sqrt);
+ vars->rec_inv_sqrt));
}
}
}
@@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
u32 delta;
if (params->ecn && INET_ECN_set_ce(skb)) {
- stats->ecn_mark++;
+ WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
} else {
stats->drop_len += skb_len_func(skb);
drop_func(skb, ctx);
@@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
stats, skb_len_func,
skb_time_func, backlog, now);
}
- vars->dropping = true;
+ WRITE_ONCE(vars->dropping, true);
/* if min went above target close to when we last went below it
* assume that the drop rate that controlled the queue on the
* last cycle is a good starting point to control it now.
@@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (delta > 1 &&
codel_time_before(now - vars->drop_next,
16 * params->interval)) {
- vars->count = delta;
+ WRITE_ONCE(vars->count, delta);
/* we dont care if rec_inv_sqrt approximation
* is not very precise :
* Next Newton steps will correct it quadratically.
*/
codel_Newton_step(vars);
} else {
- vars->count = 1;
+ WRITE_ONCE(vars->count, 1);
vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
}
- vars->lastcount = vars->count;
- vars->drop_next = codel_control_law(now, params->interval,
- vars->rec_inv_sqrt);
+ WRITE_ONCE(vars->lastcount, vars->count);
+ WRITE_ONCE(vars->drop_next,
+ codel_control_law(now, params->interval,
+ vars->rec_inv_sqrt));
}
end:
if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
@@ -262,7 +265,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
params->ce_threshold_selector));
}
if (set_ce && INET_ECN_set_ce(skb))
- stats->ce_mark++;
+ WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
}
return skb;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index dc2be90666ffbd715550800790a0091acd28701d..317aae0ec7bd6aedb4bae09b18423c981fed16e7 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -85,7 +85,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_enqueue_tail(skb, sch);
}
q = qdisc_priv(sch);
- q->drop_overlimit++;
+ WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1);
return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT);
}
@@ -221,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
const struct codel_sched_data *q = qdisc_priv(sch);
struct tc_codel_xstats st = {
- .maxpacket = q->stats.maxpacket,
- .count = q->vars.count,
- .lastcount = q->vars.lastcount,
- .drop_overlimit = q->drop_overlimit,
- .ldelay = codel_time_to_us(q->vars.ldelay),
- .dropping = q->vars.dropping,
- .ecn_mark = q->stats.ecn_mark,
- .ce_mark = q->stats.ce_mark,
+ .maxpacket = READ_ONCE(q->stats.maxpacket),
+ .count = READ_ONCE(q->vars.count),
+ .lastcount = READ_ONCE(q->vars.lastcount),
+ .drop_overlimit = READ_ONCE(q->drop_overlimit),
+ .ldelay = codel_time_to_us(READ_ONCE(q->vars.ldelay)),
+ .dropping = READ_ONCE(q->vars.dropping),
+ .ecn_mark = READ_ONCE(q->stats.ecn_mark),
+ .ce_mark = READ_ONCE(q->stats.ce_mark),
};
- if (q->vars.dropping) {
- codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
+ if (st.dropping) {
+ codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time();
if (delta >= 0)
st.drop_next = codel_time_to_us(delta);
--
2.53.0.1213.gd9a14994de-goog
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2026-04-07 14:31 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-07 14:30 [PATCH net-next] codel: annotate data-races in codel_dump_stats() Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox