Netdev List
 help / color / mirror / Atom feed
* [PATCH net 1/1] net/sched: sch_teql: Introduce slaves_lock to avoid race condition and UAF
@ 2026-06-23 18:42 Jamal Hadi Salim
  0 siblings, 0 replies; only message in thread
From: Jamal Hadi Salim @ 2026-06-23 18:42 UTC (permalink / raw)
  To: netdev
  Cc: davem, edumazet, kuba, pabeni, horms, victor, andrew+netdev,
	zdi-disclosures, security, stable, Jamal Hadi Salim

The teql master->slaves singly linked list is not protected against multiple
writes. It can be mod'ed concurently from teql_master_xmit(), teql_dequeue(),
teql_init() and teql_destroy() without holding any list lock or RCU protection.

zdi-disclosures@trendmicro.com has demonstrated that the qdisc is freed
after an RCU grace period, but teql_master_xmit() running on another
CPU can still hold a stale pointer into the list, resulting in a
slab-use-after-free:

BUG: KASAN: slab-use-after-free in teql_destroy+0x3ca/0x440 linux/net/sched/sch_teql.c:142
Read of size 8 at addr ffff88802923aa80 by task ip/10024

CPU: 1 UID: 0 PID: 10024 Comm: ip Not tainted 7.1.0-rc5 #1 PREEMPT(lazy)
Hardware name: QEMU Ubuntu 25.10 PC v2 (i440FX + PIIX, + 10.1 machine, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
Call Trace:
 <TASK>
 __dump_stack linux/lib/dump_stack.c:94
 dump_stack_lvl+0x100/0x190 linux/lib/dump_stack.c:120
 print_address_description linux/mm/kasan/report.c:378
 print_report+0x139/0x4ad linux/mm/kasan/report.c:482
 kasan_report+0xe4/0x1d0 linux/mm/kasan/report.c:595
 teql_destroy+0x3ca/0x440 linux/net/sched/sch_teql.c:142
 __qdisc_destroy+0x109/0x540 linux/net/sched/sch_generic.c:1100
 qdisc_put+0xad/0xf0 linux/net/sched/sch_generic.c:1128
 dev_shutdown+0x1cd/0x450 linux/net/sched/sch_generic.c:1493
 unregister_netdevice_many_notify+0xd30/0x24b0 linux/net/core/dev.c:12409
 rtnl_delete_link linux/net/core/rtnetlink.c:3552
 rtnl_dellink+0x476/0xb50 linux/net/core/rtnetlink.c:3594
 rtnetlink_rcv_msg+0x954/0xe80 linux/net/core/rtnetlink.c:6997
 netlink_rcv_skb+0x156/0x420 linux/net/netlink/af_netlink.c:2550
 netlink_unicast_kernel linux/net/netlink/af_netlink.c:1318
 netlink_unicast+0x58d/0x860 linux/net/netlink/af_netlink.c:1344
 netlink_sendmsg+0x89a/0xd80 linux/net/netlink/af_netlink.c:1894
 sock_sendmsg_nosec linux/net/socket.c:787
 __sock_sendmsg linux/net/socket.c:802
 ____sys_sendmsg+0x9d9/0xb70 linux/net/socket.c:2698
 ___sys_sendmsg+0x194/0x1e0 linux/net/socket.c:2752
 __sys_sendmsg+0x171/0x220 linux/net/socket.c:2784
 do_syscall_x64 linux/arch/x86/entry/syscall_64.c:63
 do_syscall_64+0xff/0x890 linux/arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f linux/arch/x86/entry/entry_64.S:121
[..]

The zdi-disclosures@trendmicro.com repro created concurrent AF_PACKET senders
on a teql device against a thread that repeatedly adds/deletes the slave qdisc,
together with a SLUB spray that reclaims the freed slot; the resulting
UAF is controllable enough to be turned into a read/write primitive against the
freed qdisc object.

The fix?
Add a per-master slaves_lock spinlock that serializes all mutations of
master->slaves and the NEXT_SLAVE() links in teql_destroy() and
teql_qdisc_init(). teql_master_xmit() also takes the same slaves_lock around
those updates.
Pair this with READ_ONCE()/WRITE_ONCE() on the shared pointers and
rcu_read_lock_bh()/rcu_read_unlock_bh() around the list traversal in
teql_master_xmit() and teql_dequeue(), so that readers either observe a fully
linked list or are deferred until the in-flight mutation completes. The two
early-return paths in teql_master_xmit() are updated to release the RCU-bh
read-side critical section before returning, since leaving it held would disable
BH on that CPU for good.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: zdi-disclosures@trendmicro.com
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 net/sched/sch_teql.c | 71 +++++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 24 deletions(-)

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index e7bbc9e5174d..dacdc46637df 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,6 +53,7 @@ struct teql_master {
 	struct Qdisc_ops qops;
 	struct net_device *dev;
 	struct Qdisc *slaves;
+	spinlock_t		slaves_lock;	/* serializes writes to ->slaves */
 	struct list_head master_list;
 	unsigned long	tx_bytes;
 	unsigned long	tx_packets;
@@ -101,7 +102,9 @@ teql_dequeue(struct Qdisc *sch)
 	if (skb == NULL) {
 		struct net_device *m = qdisc_dev(q);
 		if (m) {
-			dat->m->slaves = sch;
+			spin_lock_bh(&dat->m->slaves_lock);
+			rcu_assign_pointer(dat->m->slaves, sch);
+			spin_unlock_bh(&dat->m->slaves_lock);
 			netif_wake_queue(m);
 		}
 	} else {
@@ -132,34 +135,37 @@ teql_destroy(struct Qdisc *sch)
 	struct Qdisc *q, *prev;
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct teql_master *master = dat->m;
+	struct netdev_queue *txq = NULL;
+	bool reset_master_queue = false;
 
 	if (!master)
 		return;
 
-	prev = master->slaves;
+	spin_lock_bh(&master->slaves_lock);
+	prev = READ_ONCE(master->slaves);
 	if (prev) {
 		do {
-			q = NEXT_SLAVE(prev);
+			q = READ_ONCE(NEXT_SLAVE(prev));
 			if (q == sch) {
-				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
-				if (q == master->slaves) {
-					master->slaves = NEXT_SLAVE(q);
-					if (q == master->slaves) {
-						struct netdev_queue *txq;
-
+				WRITE_ONCE(NEXT_SLAVE(prev), READ_ONCE(NEXT_SLAVE(q)));
+				if (q == READ_ONCE(master->slaves)) {
+					WRITE_ONCE(master->slaves, READ_ONCE(NEXT_SLAVE(q)));
+					if (q == READ_ONCE(master->slaves)) {
 						txq = netdev_get_tx_queue(master->dev, 0);
-						master->slaves = NULL;
-
-						dev_reset_queue(master->dev,
-								txq, NULL);
+						WRITE_ONCE(master->slaves, NULL);
+						reset_master_queue = true;
 					}
 				}
 				skb_queue_purge(&dat->q);
 				break;
 			}
 
-		} while ((prev = q) != master->slaves);
+		} while ((prev = q) != READ_ONCE(master->slaves));
 	}
+	spin_unlock_bh(&master->slaves_lock);
+
+	if (reset_master_queue)
+		dev_reset_queue(master->dev, txq, NULL);
 }
 
 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
@@ -184,7 +190,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 
 	skb_queue_head_init(&q->q);
 
-	if (m->slaves) {
+	spin_lock_bh(&m->slaves_lock);
+	if (READ_ONCE(m->slaves)) {
 		if (m->dev->flags & IFF_UP) {
 			if ((m->dev->flags & IFF_POINTOPOINT &&
 			     !(dev->flags & IFF_POINTOPOINT)) ||
@@ -192,8 +199,10 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 			     !(dev->flags & IFF_BROADCAST)) ||
 			    (m->dev->flags & IFF_MULTICAST &&
 			     !(dev->flags & IFF_MULTICAST)) ||
-			    dev->mtu < m->dev->mtu)
+			    dev->mtu < m->dev->mtu) {
+				spin_unlock_bh(&m->slaves_lock);
 				return -EINVAL;
+			}
 		} else {
 			if (!(dev->flags&IFF_POINTOPOINT))
 				m->dev->flags &= ~IFF_POINTOPOINT;
@@ -204,14 +213,15 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 			if (dev->mtu < m->dev->mtu)
 				m->dev->mtu = dev->mtu;
 		}
-		q->next = NEXT_SLAVE(m->slaves);
-		NEXT_SLAVE(m->slaves) = sch;
+		WRITE_ONCE(q->next, READ_ONCE(NEXT_SLAVE(m->slaves)));
+		rcu_assign_pointer(NEXT_SLAVE(m->slaves), sch);
 	} else {
-		q->next = sch;
-		m->slaves = sch;
+		WRITE_ONCE(q->next, sch);
+		rcu_assign_pointer(m->slaves, sch);
 		m->dev->mtu = dev->mtu;
 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 	}
+	spin_unlock_bh(&m->slaves_lock);
 	return 0;
 }
 
@@ -285,7 +295,9 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 	int subq = skb_get_queue_mapping(skb);
 	struct sk_buff *skb_res = NULL;
 
-	start = master->slaves;
+	rcu_read_lock_bh();
+
+	start = rcu_dereference_bh(master->slaves);
 
 restart:
 	nores = 0;
@@ -317,10 +329,14 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 				    netdev_start_xmit(skb, slave, slave_txq, false) ==
 				    NETDEV_TX_OK) {
 					__netif_tx_unlock(slave_txq);
-					master->slaves = NEXT_SLAVE(q);
+					spin_lock_bh(&master->slaves_lock);
+					rcu_assign_pointer(master->slaves,
+							   rcu_dereference_bh(NEXT_SLAVE(q)));
+					spin_unlock_bh(&master->slaves_lock);
 					netif_wake_queue(dev);
 					master->tx_packets++;
 					master->tx_bytes += length;
+					rcu_read_unlock_bh();
 					return NETDEV_TX_OK;
 				}
 				__netif_tx_unlock(slave_txq);
@@ -329,14 +345,18 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 				busy = 1;
 			break;
 		case 1:
-			master->slaves = NEXT_SLAVE(q);
+			spin_lock_bh(&master->slaves_lock);
+			rcu_assign_pointer(master->slaves,
+					   rcu_dereference_bh(NEXT_SLAVE(q)));
+			spin_unlock_bh(&master->slaves_lock);
+			rcu_read_unlock_bh();
 			return NETDEV_TX_OK;
 		default:
 			nores = 1;
 			break;
 		}
 		__skb_pull(skb, skb_network_offset(skb));
-	} while ((q = NEXT_SLAVE(q)) != start);
+	} while ((q = rcu_dereference_bh(NEXT_SLAVE(q))) != start);
 
 	if (nores && skb_res == NULL) {
 		skb_res = skb;
@@ -345,12 +365,14 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (busy) {
 		netif_stop_queue(dev);
+		rcu_read_unlock_bh();
 		return NETDEV_TX_BUSY;
 	}
 	master->tx_errors++;
 
 drop:
 	master->tx_dropped++;
+	rcu_read_unlock_bh();
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
@@ -444,6 +466,7 @@ static __init void teql_master_setup(struct net_device *dev)
 	struct teql_master *master = netdev_priv(dev);
 	struct Qdisc_ops *ops = &master->qops;
 
+	spin_lock_init(&master->slaves_lock);
 	master->dev	= dev;
 	ops->priv_size  = sizeof(struct teql_sched_data);
 
-- 
2.54.0


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-06-23 18:43 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-23 18:42 [PATCH net 1/1] net/sched: sch_teql: Introduce slaves_lock to avoid race condition and UAF Jamal Hadi Salim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox