All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic
@ 2026-06-24 17:10 Eric Dumazet
  2026-06-24 21:57 ` Jakub Kicinski
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2026-06-24 17:10 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Ido Schimmel, David Ahern, netdev, eric.dumazet,
	Eric Dumazet, Yue Sun

Yue Sun reported a use-after-free and debugobjects warning in
udp_tunnel_nic_device_sync_work() during concurrent device operations.

The state flags of struct udp_tunnel_nic were originally bitfields
sharing a byte, modified concurrently without locking (RCU vs worker).
Even after converting to atomic bitops, a single WORK_PENDING flag
races: the workqueue core clears the pending bit before running the
worker. A concurrent queueing sets the flag, but the running worker
clears it, leading to premature freeing in unregister() while the
re-queued work is still active.

Fix this introducing reference counting for struct
udp_tunnel_nic. Increment the refcount on successful queue_work(),
and decrement it at the end of the worker. Defer the dev_put() call
for the last device to the free path to ensure the net_device remains
valid as long as the structure is alive.

Additionally, convert concurrent modifications of the 'missed' bitmap
to atomic operations (set_bit, bitmap_zero) to prevent data races there.

Fixes: cc4e3835eff4 ("udp_tunnel: add central NIC RX port offload infrastructure")
Reported-by: Yue Sun <samsun1006219@gmail.com>
Closes: https://lore.kernel.org/netdev/20260624090135.95763-1-samsun1006219@gmail.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/udp_tunnel_nic.c | 75 +++++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 9944ed923ddfd10f9adf6ad788c0740daeaf2adb..884b5d93b7b39f7f20855ff8ca2ec4d7ef5a9ef6 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -30,9 +30,8 @@ struct udp_tunnel_nic_table_entry {
  * @work:	async work for talking to hardware from process context
  * @dev:	netdev pointer
  * @lock:	protects all fields
- * @need_sync:	at least one port start changed
- * @need_replay: space was freed, we need a replay of all ports
- * @work_pending: @work is currently scheduled
+ * @flags:	sync, replay flags
+ * @refcnt:	reference count
  * @n_tables:	number of tables under @entries
  * @missed:	bitmap of tables which overflown
  * @entries:	table of tables of ports currently offloaded
@@ -44,9 +43,11 @@ struct udp_tunnel_nic {
 
 	struct mutex lock;
 
-	u8 need_sync:1;
-	u8 need_replay:1;
-	u8 work_pending:1;
+	unsigned long flags;
+#define UDP_TUNNEL_NIC_NEED_SYNC	0
+#define UDP_TUNNEL_NIC_NEED_REPLAY	1
+
+	refcount_t refcnt;
 
 	unsigned int n_tables;
 	unsigned long missed;
@@ -116,7 +117,7 @@ udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn,
 			   unsigned int flag)
 {
 	entry->flags |= flag;
-	utn->need_sync = 1;
+	set_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
 }
 
 static void
@@ -283,7 +284,7 @@ udp_tunnel_nic_device_sync_by_table(struct net_device *dev,
 static void
 __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
-	if (!utn->need_sync)
+	if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
 		return;
 
 	if (dev->udp_tunnel_nic_info->sync_table)
@@ -291,21 +292,24 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 	else
 		udp_tunnel_nic_device_sync_by_port(dev, utn);
 
-	utn->need_sync = 0;
+	clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
 	/* Can't replay directly here, in case we come from the tunnel driver's
 	 * notification - trying to replay may deadlock inside tunnel driver.
 	 */
-	utn->need_replay = udp_tunnel_nic_should_replay(dev, utn);
+	if (udp_tunnel_nic_should_replay(dev, utn))
+		set_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
+	else
+		clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
 }
 
 static void
 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
-	if (!utn->need_sync)
+	if (!test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags))
 		return;
 
-	queue_work(udp_tunnel_nic_workqueue, &utn->work);
-	utn->work_pending = 1;
+	if (queue_work(udp_tunnel_nic_workqueue, &utn->work))
+		refcount_inc(&utn->refcnt);
 }
 
 static bool
@@ -348,7 +352,7 @@ udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn,
 			if (!udp_tunnel_nic_entry_is_free(entry) &&
 			    entry->port == ti->port &&
 			    entry->type != ti->type) {
-				__set_bit(i, &utn->missed);
+				set_bit(i, &utn->missed);
 				return true;
 			}
 		}
@@ -483,7 +487,7 @@ udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn,
 		 * are no devices currently which have multiple tables accepting
 		 * the same tunnel type, and false positives are okay.
 		 */
-		__set_bit(i, &utn->missed);
+		set_bit(i, &utn->missed);
 	}
 
 	return false;
@@ -552,7 +556,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
 
 	mutex_lock(&utn->lock);
 
-	utn->need_sync = false;
+	clear_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags);
 	for (i = 0; i < utn->n_tables; i++)
 		for (j = 0; j < info->tables[i].n_entries; j++) {
 			struct udp_tunnel_nic_table_entry *entry;
@@ -696,8 +700,8 @@ udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn)
 	for (i = 0; i < utn->n_tables; i++)
 		memset(utn->entries[i], 0, array_size(info->tables[i].n_entries,
 						      sizeof(**utn->entries)));
-	WARN_ON(utn->need_sync);
-	utn->need_replay = 0;
+	WARN_ON(test_bit(UDP_TUNNEL_NIC_NEED_SYNC, &utn->flags));
+	clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
 }
 
 static void
@@ -713,8 +717,8 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
 	for (i = 0; i < utn->n_tables; i++)
 		for (j = 0; j < info->tables[i].n_entries; j++)
 			udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]);
-	utn->missed = 0;
-	utn->need_replay = 0;
+	bitmap_zero(&utn->missed, UDP_TUNNEL_NIC_MAX_TABLES);
+	clear_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags);
 
 	if (!info->shared) {
 		udp_tunnel_get_rx_info(dev);
@@ -728,6 +732,8 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
 			udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]);
 }
 
+static void udp_tunnel_nic_put(struct udp_tunnel_nic *utn);
+
 static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
 {
 	struct udp_tunnel_nic *utn =
@@ -736,14 +742,15 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
 	rtnl_lock();
 	mutex_lock(&utn->lock);
 
-	utn->work_pending = 0;
 	__udp_tunnel_nic_device_sync(utn->dev, utn);
 
-	if (utn->need_replay)
+	if (test_bit(UDP_TUNNEL_NIC_NEED_REPLAY, &utn->flags))
 		udp_tunnel_nic_replay(utn->dev, utn);
 
 	mutex_unlock(&utn->lock);
 	rtnl_unlock();
+
+	udp_tunnel_nic_put(utn);
 }
 
 static struct udp_tunnel_nic *
@@ -759,6 +766,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
 	utn->n_tables = n_tables;
 	INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
 	mutex_init(&utn->lock);
+	refcount_set(&utn->refcnt, 1);
 
 	for (i = 0; i < n_tables; i++) {
 		utn->entries[i] = kzalloc_objs(*utn->entries[i],
@@ -782,9 +790,19 @@ static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
 
 	for (i = 0; i < utn->n_tables; i++)
 		kfree(utn->entries[i]);
+
+	if (utn->dev)
+		dev_put(utn->dev);
+
 	kfree(utn);
 }
 
+static void udp_tunnel_nic_put(struct udp_tunnel_nic *utn)
+{
+	if (refcount_dec_and_test(&utn->refcnt))
+		udp_tunnel_nic_free(utn);
+}
+
 static int udp_tunnel_nic_register(struct net_device *dev)
 {
 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
@@ -863,6 +881,7 @@ static void
 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
 	const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+	bool last = true;
 
 	udp_tunnel_nic_lock(dev);
 
@@ -889,6 +908,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 			udp_tunnel_drop_rx_info(dev);
 			utn->dev = first->dev;
 			udp_tunnel_nic_unlock(dev);
+			last = false;
 			goto release_dev;
 		}
 
@@ -901,16 +921,11 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 	udp_tunnel_nic_flush(dev, utn);
 	udp_tunnel_nic_unlock(dev);
 
-	/* Wait for the work to be done using the state, netdev core will
-	 * retry unregister until we give up our reference on this device.
-	 */
-	if (utn->work_pending)
-		return;
-
-	udp_tunnel_nic_free(utn);
+	udp_tunnel_nic_put(utn);
 release_dev:
 	dev->udp_tunnel_nic = NULL;
-	dev_put(dev);
+	if (!last)
+		dev_put(dev);
 }
 
 static int
-- 
2.55.0.rc0.799.gd6f94ed593-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic
  2026-06-24 17:10 [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic Eric Dumazet
@ 2026-06-24 21:57 ` Jakub Kicinski
  2026-06-24 22:31   ` Jakub Kicinski
  2026-06-25  2:47   ` Jiayuan Chen
  0 siblings, 2 replies; 5+ messages in thread
From: Jakub Kicinski @ 2026-06-24 21:57 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Ido Schimmel,
	David Ahern, netdev, eric.dumazet, Yue Sun

On Wed, 24 Jun 2026 17:10:34 +0000 Eric Dumazet wrote:
> Yue Sun reported a use-after-free and debugobjects warning in
> udp_tunnel_nic_device_sync_work() during concurrent device operations.
> 
> The state flags of struct udp_tunnel_nic were originally bitfields
> sharing a byte, modified concurrently without locking (RCU vs worker).

Can you clarify the path where the bits are modified without locks??
My mental model is that this is basically all under rtnl_lock, and 
Stan added _another_ lock so that drivers can call "sync" / reply
without needing rtnl lock, but any changes are still under rtnl_lock.

The gap seems to be that we don't check pending under Stan's new lock,
since commit 1ead7501094c6 ("udp_tunnel: remove rtnl_lock dependency")
did:

+++ b/drivers/net/netdevsim/udp_tunnels.c
@@ -112,12 +112,10 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data,
        struct net_device *dev = file->private_data;
        struct netdevsim *ns = netdev_priv(dev);
 
-       rtnl_lock();
        if (dev->reg_state == NETREG_REGISTERED) {
                memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports));
                udp_tunnel_nic_reset_ntf(dev);
        }
-       rtnl_unlock();


so we just need:

diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 9944ed923ddf..d7db89a222f8 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -863,6 +863,7 @@ static void
 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
        const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+       bool pending;
 
        udp_tunnel_nic_lock(dev);
 
@@ -899,12 +900,14 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
         * from the work which we will boot immediately.
         */
        udp_tunnel_nic_flush(dev, utn);
+
+       pending = utn->work_pending;
        udp_tunnel_nic_unlock(dev);
 
        /* Wait for the work to be done using the state, netdev core will
         * retry unregister until we give up our reference on this device.
         */
-       if (utn->work_pending)
+       if (pending)
                return;
 
        udp_tunnel_nic_free(utn);



> Even after converting to atomic bitops, a single WORK_PENDING flag
> races: the workqueue core clears the pending bit before running the
> worker. A concurrent queueing sets the flag, but the running worker
> clears it, leading to premature freeing in unregister() while the
> re-queued work is still active.

> +	if (utn->dev)
> +		dev_put(utn->dev);

nit: cocci complains that null check is not needed here.

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic
  2026-06-24 21:57 ` Jakub Kicinski
@ 2026-06-24 22:31   ` Jakub Kicinski
  2026-06-25  2:47   ` Jiayuan Chen
  1 sibling, 0 replies; 5+ messages in thread
From: Jakub Kicinski @ 2026-06-24 22:31 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Ido Schimmel,
	David Ahern, netdev, eric.dumazet, Yue Sun

On Wed, 24 Jun 2026 14:57:22 -0700 Jakub Kicinski wrote:
> so we just need:
> 
> diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
> index 9944ed923ddf..d7db89a222f8 100644
> --- a/net/ipv4/udp_tunnel_nic.c
> +++ b/net/ipv4/udp_tunnel_nic.c
> @@ -863,6 +863,7 @@ static void
>  udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
>  {
>         const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
> +       bool pending;
>  
>         udp_tunnel_nic_lock(dev);
>  
> @@ -899,12 +900,14 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
>          * from the work which we will boot immediately.
>          */
>         udp_tunnel_nic_flush(dev, utn);
> +
> +       pending = utn->work_pending;
>         udp_tunnel_nic_unlock(dev);
>  
>         /* Wait for the work to be done using the state, netdev core will
>          * retry unregister until we give up our reference on this device.
>          */
> -       if (utn->work_pending)
> +       if (pending)
>                 return;
>  
>         udp_tunnel_nic_free(utn);

Maybe not even.. by definition the driver should not race with its own
netdev's unregister? I don't get what the race is..

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic
  2026-06-24 21:57 ` Jakub Kicinski
  2026-06-24 22:31   ` Jakub Kicinski
@ 2026-06-25  2:47   ` Jiayuan Chen
  2026-06-25  2:55     ` Jakub Kicinski
  1 sibling, 1 reply; 5+ messages in thread
From: Jiayuan Chen @ 2026-06-25  2:47 UTC (permalink / raw)
  To: Jakub Kicinski, Eric Dumazet
  Cc: David S . Miller, Paolo Abeni, Simon Horman, Ido Schimmel,
	David Ahern, netdev, eric.dumazet, Yue Sun


On 6/25/26 5:57 AM, Jakub Kicinski wrote:
> On Wed, 24 Jun 2026 17:10:34 +0000 Eric Dumazet wrote:
>> Yue Sun reported a use-after-free and debugobjects warning in
>> udp_tunnel_nic_device_sync_work() during concurrent device operations.
>>
>> The state flags of struct udp_tunnel_nic were originally bitfields
>> sharing a byte, modified concurrently without locking (RCU vs worker).
> Can you clarify the path where the bits are modified without locks??
> My mental model is that this is basically all under rtnl_lock, and
> Stan added _another_ lock so that drivers can call "sync" / reply
> without needing rtnl lock, but any changes are still under rtnl_lock.
>
> The gap seems to be that we don't check pending under Stan's new lock,
> since commit 1ead7501094c6 ("udp_tunnel: remove rtnl_lock dependency")
> did:


I think the real problem is that a single work_pending flag can't track 
the work being queued twice:

1. Thread A calls queue_work() -> work_pending = 1.
2. The worker gets picked up; workqueue clears the PENDING(internal work 
queue flag) bit before running the work function.
    The worker then blocks on rtnl/utn->lock.
3. Thread B calls queue_work() again. Since PENDING was already cleared, 
it enqueues a second
    instance and sets work_pending = 1.
4. A's worker finally gets the lock and does work_pending = 0, runs, 
returns.
5. Now work_pending == 0 but B's instance is still queued. unregister 
sees 0, frees utn.



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic
  2026-06-25  2:47   ` Jiayuan Chen
@ 2026-06-25  2:55     ` Jakub Kicinski
  0 siblings, 0 replies; 5+ messages in thread
From: Jakub Kicinski @ 2026-06-25  2:55 UTC (permalink / raw)
  To: Jiayuan Chen
  Cc: Eric Dumazet, David S . Miller, Paolo Abeni, Simon Horman,
	Ido Schimmel, David Ahern, netdev, eric.dumazet, Yue Sun

On Thu, 25 Jun 2026 10:47:09 +0800 Jiayuan Chen wrote:
> On 6/25/26 5:57 AM, Jakub Kicinski wrote:
> > On Wed, 24 Jun 2026 17:10:34 +0000 Eric Dumazet wrote:  
> >> Yue Sun reported a use-after-free and debugobjects warning in
> >> udp_tunnel_nic_device_sync_work() during concurrent device operations.
> >>
> >> The state flags of struct udp_tunnel_nic were originally bitfields
> >> sharing a byte, modified concurrently without locking (RCU vs worker).  
> > Can you clarify the path where the bits are modified without locks??
> > My mental model is that this is basically all under rtnl_lock, and
> > Stan added _another_ lock so that drivers can call "sync" / reply
> > without needing rtnl lock, but any changes are still under rtnl_lock.
> >
> > The gap seems to be that we don't check pending under Stan's new lock,
> > since commit 1ead7501094c6 ("udp_tunnel: remove rtnl_lock dependency")
> > did:  
> 
> 
> I think the real problem is that a single work_pending flag can't track 
> the work being queued twice:
> 
> 1. Thread A calls queue_work() -> work_pending = 1.
> 2. The worker gets picked up; workqueue clears the PENDING(internal work 
> queue flag) bit before running the work function.
>     The worker then blocks on rtnl/utn->lock.
> 3. Thread B calls queue_work() again. Since PENDING was already cleared, 
> it enqueues a second
>     instance and sets work_pending = 1.
> 4. A's worker finally gets the lock and does work_pending = 0, runs, 
> returns.
> 5. Now work_pending == 0 but B's instance is still queued. unregister 
> sees 0, frees utn.

Ah, thanks, now I get it. Claude told me the same thing but in 10,000
words and I lost the thread before reading 'til the end... 

In that case:

diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 9944ed923ddf..3b32a0afa979 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -301,7 +301,7 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 static void
 udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
 {
-       if (!utn->need_sync)
+       if (!utn->need_sync || utn->work_pending)
                return;
 
        queue_work(udp_tunnel_nic_workqueue, &utn->work);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-06-25  2:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-24 17:10 [PATCH net] net: udp_tunnel: fix use-after-free by refcounting udp_tunnel_nic Eric Dumazet
2026-06-24 21:57 ` Jakub Kicinski
2026-06-24 22:31   ` Jakub Kicinski
2026-06-25  2:47   ` Jiayuan Chen
2026-06-25  2:55     ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.