[PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace

public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
@ 2026-04-02 10:17 Eric Dumazet
  2026-04-02 18:10 ` Justin Iurman
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Eric Dumazet @ 2026-04-02 10:17 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, David Ahern, netdev, eric.dumazet, Eric Dumazet,
	Yiming Qian, Justin Iurman

We need to check __in6_dev_get() for possible NULL value, as
suggested by Yiming Qian.

Also add skb_dst_dev_rcu() instead of skb_dst_dev(),
and two missing READ_ONCE().

Note that @dev can't be NULL.

Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace")
Reported-by: Yiming Qian <yimingqian591@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Justin Iurman <justin.iurman@gmail.com>
---
 net/ipv6/ioam6.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 3978773bec424890cd18db78cf7cac9d3d652130..05a0b7d7e2aac35f634641fc4a791d1965dc85fd 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -710,7 +710,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
 				    struct ioam6_schema *sc,
 				    unsigned int sclen, bool is_input)
 {
-	struct net_device *dev = skb_dst_dev(skb);
+	/* Note: skb_dst_dev_rcu() can't be NULL at this point. */
+	struct net_device *dev = skb_dst_dev_rcu(skb);
+	struct inet6_dev *i_skb_dev, *idev;
 	struct timespec64 ts;
 	ktime_t tstamp;
 	u64 raw64;
@@ -721,13 +723,16 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
 
 	data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
 
+	i_skb_dev = skb->dev ? __in6_dev_get(skb->dev) : NULL;
+	idev = __in6_dev_get(dev);
+
 	/* hop_lim and node_id */
 	if (trace->type.bit0) {
 		byte = ipv6_hdr(skb)->hop_limit;
 		if (is_input)
 			byte--;
 
-		raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id;
+		raw32 = READ_ONCE(dev_net(dev)->ipv6.sysctl.ioam6_id);
 
 		*(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
 		data += sizeof(__be32);
@@ -735,18 +740,18 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
 
 	/* ingress_if_id and egress_if_id */
 	if (trace->type.bit1) {
-		if (!skb->dev)
+		if (!i_skb_dev)
 			raw16 = IOAM6_U16_UNAVAILABLE;
 		else
-			raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id);
+			raw16 = (__force u16)READ_ONCE(i_skb_dev->cnf.ioam6_id);
 
 		*(__be16 *)data = cpu_to_be16(raw16);
 		data += sizeof(__be16);
 
-		if (dev->flags & IFF_LOOPBACK)
+		if ((dev->flags & IFF_LOOPBACK) || !idev)
 			raw16 = IOAM6_U16_UNAVAILABLE;
 		else
-			raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id);
+			raw16 = (__force u16)READ_ONCE(idev->cnf.ioam6_id);
 
 		*(__be16 *)data = cpu_to_be16(raw16);
 		data += sizeof(__be16);
@@ -822,7 +827,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
 		if (is_input)
 			byte--;
 
-		raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide;
+		raw64 = READ_ONCE(dev_net(dev)->ipv6.sysctl.ioam6_id_wide);
 
 		*(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
 		data += sizeof(__be64);
@@ -830,18 +835,18 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
 
 	/* ingress_if_id and egress_if_id (wide) */
 	if (trace->type.bit9) {
-		if (!skb->dev)
+		if (!i_skb_dev)
 			raw32 = IOAM6_U32_UNAVAILABLE;
 		else
-			raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide);
+			raw32 = READ_ONCE(i_skb_dev->cnf.ioam6_id_wide);
 
 		*(__be32 *)data = cpu_to_be32(raw32);
 		data += sizeof(__be32);
 
-		if (dev->flags & IFF_LOOPBACK)
+		if ((dev->flags & IFF_LOOPBACK) || !idev)
 			raw32 = IOAM6_U32_UNAVAILABLE;
 		else
-			raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide);
+			raw32 = READ_ONCE(idev->cnf.ioam6_id_wide);
 
 		*(__be32 *)data = cpu_to_be32(raw32);
 		data += sizeof(__be32);
-- 
2.53.0.1185.g05d4b7b318-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-02 10:17 [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data() Eric Dumazet
@ 2026-04-02 18:10 ` Justin Iurman
  2026-04-03 21:44 ` Jakub Kicinski
  2026-04-03 22:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 10+ messages in thread
From: Justin Iurman @ 2026-04-02 18:10 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, David Ahern, netdev, eric.dumazet, Yiming Qian

On 4/2/26 12:17, Eric Dumazet wrote:
> We need to check __in6_dev_get() for possible NULL value, as
> suggested by Yiming Qian.
> 
> Also add skb_dst_dev_rcu() instead of skb_dst_dev(),
> and two missing READ_ONCE().
> 
> Note that @dev can't be NULL.
> 
> Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace")
> Reported-by: Yiming Qian <yimingqian591@gmail.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Justin Iurman <justin.iurman@gmail.com>
> ---
>   net/ipv6/ioam6.c | 27 ++++++++++++++++-----------
>   1 file changed, 16 insertions(+), 11 deletions(-)
> 
> diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
> index 3978773bec424890cd18db78cf7cac9d3d652130..05a0b7d7e2aac35f634641fc4a791d1965dc85fd 100644
> --- a/net/ipv6/ioam6.c
> +++ b/net/ipv6/ioam6.c
> @@ -710,7 +710,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
>   				    struct ioam6_schema *sc,
>   				    unsigned int sclen, bool is_input)
>   {
> -	struct net_device *dev = skb_dst_dev(skb);
> +	/* Note: skb_dst_dev_rcu() can't be NULL at this point. */
> +	struct net_device *dev = skb_dst_dev_rcu(skb);
> +	struct inet6_dev *i_skb_dev, *idev;
>   	struct timespec64 ts;
>   	ktime_t tstamp;
>   	u64 raw64;
> @@ -721,13 +723,16 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
>   
>   	data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
>   
> +	i_skb_dev = skb->dev ? __in6_dev_get(skb->dev) : NULL;
> +	idev = __in6_dev_get(dev);
> +
>   	/* hop_lim and node_id */
>   	if (trace->type.bit0) {
>   		byte = ipv6_hdr(skb)->hop_limit;
>   		if (is_input)
>   			byte--;
>   
> -		raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id;
> +		raw32 = READ_ONCE(dev_net(dev)->ipv6.sysctl.ioam6_id);
>   
>   		*(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
>   		data += sizeof(__be32);
> @@ -735,18 +740,18 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
>   
>   	/* ingress_if_id and egress_if_id */
>   	if (trace->type.bit1) {
> -		if (!skb->dev)
> +		if (!i_skb_dev)
>   			raw16 = IOAM6_U16_UNAVAILABLE;
>   		else
> -			raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id);
> +			raw16 = (__force u16)READ_ONCE(i_skb_dev->cnf.ioam6_id);
>   
>   		*(__be16 *)data = cpu_to_be16(raw16);
>   		data += sizeof(__be16);
>   
> -		if (dev->flags & IFF_LOOPBACK)
> +		if ((dev->flags & IFF_LOOPBACK) || !idev)
>   			raw16 = IOAM6_U16_UNAVAILABLE;
>   		else
> -			raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id);
> +			raw16 = (__force u16)READ_ONCE(idev->cnf.ioam6_id);
>   
>   		*(__be16 *)data = cpu_to_be16(raw16);
>   		data += sizeof(__be16);
> @@ -822,7 +827,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
>   		if (is_input)
>   			byte--;
>   
> -		raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide;
> +		raw64 = READ_ONCE(dev_net(dev)->ipv6.sysctl.ioam6_id_wide);
>   
>   		*(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
>   		data += sizeof(__be64);
> @@ -830,18 +835,18 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
>   
>   	/* ingress_if_id and egress_if_id (wide) */
>   	if (trace->type.bit9) {
> -		if (!skb->dev)
> +		if (!i_skb_dev)
>   			raw32 = IOAM6_U32_UNAVAILABLE;
>   		else
> -			raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide);
> +			raw32 = READ_ONCE(i_skb_dev->cnf.ioam6_id_wide);
>   
>   		*(__be32 *)data = cpu_to_be32(raw32);
>   		data += sizeof(__be32);
>   
> -		if (dev->flags & IFF_LOOPBACK)
> +		if ((dev->flags & IFF_LOOPBACK) || !idev)
>   			raw32 = IOAM6_U32_UNAVAILABLE;
>   		else
> -			raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide);
> +			raw32 = READ_ONCE(idev->cnf.ioam6_id_wide);
>   
>   		*(__be32 *)data = cpu_to_be32(raw32);
>   		data += sizeof(__be32);

LGTM, thanks Eric.

Reviewed-by: Justin Iurman <justin.iurman@gmail.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-02 10:17 [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data() Eric Dumazet
  2026-04-02 18:10 ` Justin Iurman
@ 2026-04-03 21:44 ` Jakub Kicinski
  2026-04-03 21:47   ` Eric Dumazet
  2026-04-03 22:00 ` patchwork-bot+netdevbpf
  2 siblings, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2026-04-03 21:44 UTC (permalink / raw)
  To: edumazet
  Cc: Jakub Kicinski, davem, pabeni, horms, dsahern, netdev,
	eric.dumazet, yimingqian591, justin.iurman

This is an AI-generated review of your patch. The human sending this
email says "more bugs to fix.."
---
ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()

This commit fixes potential NULL pointer dereferences in
__ioam6_fill_trace_data() by checking __in6_dev_get() and using
skb_dst_dev_rcu(). It also adds missing READ_ONCE() annotations.

> diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
> index 3978773bec42..05a0b7d7e2aa 100644
> --- a/net/ipv6/ioam6.c
> +++ b/net/ipv6/ioam6.c
> @@ -710,7 +710,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
>  				    struct ioam6_schema *sc,
>  				    unsigned int sclen, bool is_input)
>  {
> -	struct net_device *dev = skb_dst_dev(skb);
> +	/* Note: skb_dst_dev_rcu() can't be NULL at this point. */
> +	struct net_device *dev = skb_dst_dev_rcu(skb);

Could this code lead to an out-of-bounds access of the dev->_tx[] array
further down in this function?

While reviewing this patch, a pre-existing regression was noticed when
trace->type.bit6 is set:

    if (trace->type.bit6) {
        ...
        queue = skb_get_tx_queue(dev, skb);
        qdisc = rcu_dereference(queue->qdisc);

When is_input is true, the packet is on the RX path and skb->queue_mapping
contains the RX queue index of the ingress device.

If the ingress device has more RX queues than the egress device (dev) has
TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.

Since skb_get_tx_queue() does not clamp the index, will it return an
out-of-bounds pointer into the egress device's dev->_tx array, causing
dereferencing queue->qdisc to read invalid memory?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-03 21:44 ` Jakub Kicinski
@ 2026-04-03 21:47   ` Eric Dumazet
  2026-04-03 21:50     ` Jakub Kicinski
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2026-04-03 21:47 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, pabeni, horms, dsahern, netdev, eric.dumazet,
	yimingqian591, justin.iurman

On Fri, Apr 3, 2026 at 2:44 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> This is an AI-generated review of your patch. The human sending this
> email says "more bugs to fix.."
> ---
> ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
>
> This commit fixes potential NULL pointer dereferences in
> __ioam6_fill_trace_data() by checking __in6_dev_get() and using
> skb_dst_dev_rcu(). It also adds missing READ_ONCE() annotations.
>
> > diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
> > index 3978773bec42..05a0b7d7e2aa 100644
> > --- a/net/ipv6/ioam6.c
> > +++ b/net/ipv6/ioam6.c
> > @@ -710,7 +710,9 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
> >                                   struct ioam6_schema *sc,
> >                                   unsigned int sclen, bool is_input)
> >  {
> > -     struct net_device *dev = skb_dst_dev(skb);
> > +     /* Note: skb_dst_dev_rcu() can't be NULL at this point. */
> > +     struct net_device *dev = skb_dst_dev_rcu(skb);
>
> Could this code lead to an out-of-bounds access of the dev->_tx[] array
> further down in this function?
>
> While reviewing this patch, a pre-existing regression was noticed when
> trace->type.bit6 is set:
>
>     if (trace->type.bit6) {
>         ...
>         queue = skb_get_tx_queue(dev, skb);
>         qdisc = rcu_dereference(queue->qdisc);
>
> When is_input is true, the packet is on the RX path and skb->queue_mapping
> contains the RX queue index of the ingress device.
>
> If the ingress device has more RX queues than the egress device (dev) has
> TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
>
> Since skb_get_tx_queue() does not clamp the index, will it return an
> out-of-bounds pointer into the egress device's dev->_tx array, causing
> dereferencing queue->qdisc to read invalid memory?

This seems a different bug ?

I mean, do we need to fix all bug in a single patch ?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-03 21:47   ` Eric Dumazet
@ 2026-04-03 21:50     ` Jakub Kicinski
  2026-04-03 21:51       ` Eric Dumazet
  2026-04-04 10:13       ` Justin Iurman
  0 siblings, 2 replies; 10+ messages in thread
From: Jakub Kicinski @ 2026-04-03 21:50 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, pabeni, horms, dsahern, netdev, eric.dumazet,
	yimingqian591, justin.iurman

On Fri, 3 Apr 2026 14:47:32 -0700 Eric Dumazet wrote:
> > If the ingress device has more RX queues than the egress device (dev) has
> > TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
> >
> > Since skb_get_tx_queue() does not clamp the index, will it return an
> > out-of-bounds pointer into the egress device's dev->_tx array, causing
> > dereferencing queue->qdisc to read invalid memory?  
> 
> This seems a different bug ?
> 
> I mean, do we need to fix all bug in a single patch ?

no no, sorry, i'm just sending this out for Justin or you as a separate
thing.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-03 21:50     ` Jakub Kicinski
@ 2026-04-03 21:51       ` Eric Dumazet
  2026-04-04 10:13       ` Justin Iurman
  1 sibling, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2026-04-03 21:51 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, pabeni, horms, dsahern, netdev, eric.dumazet,
	yimingqian591, justin.iurman

On Fri, Apr 3, 2026 at 2:50 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri, 3 Apr 2026 14:47:32 -0700 Eric Dumazet wrote:
> > > If the ingress device has more RX queues than the egress device (dev) has
> > > TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
> > >
> > > Since skb_get_tx_queue() does not clamp the index, will it return an
> > > out-of-bounds pointer into the egress device's dev->_tx array, causing
> > > dereferencing queue->qdisc to read invalid memory?
> >
> > This seems a different bug ?
> >
> > I mean, do we need to fix all bug in a single patch ?
>
> no no, sorry, i'm just sending this out for Justin or you as a separate
> thing.

Ah ok, thanks for the clarification :)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-02 10:17 [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data() Eric Dumazet
  2026-04-02 18:10 ` Justin Iurman
  2026-04-03 21:44 ` Jakub Kicinski
@ 2026-04-03 22:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 10+ messages in thread
From: patchwork-bot+netdevbpf @ 2026-04-03 22:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, horms, dsahern, netdev, eric.dumazet,
	yimingqian591, justin.iurman

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Thu,  2 Apr 2026 10:17:32 +0000 you wrote:
> We need to check __in6_dev_get() for possible NULL value, as
> suggested by Yiming Qian.
> 
> Also add skb_dst_dev_rcu() instead of skb_dst_dev(),
> and two missing READ_ONCE().
> 
> Note that @dev can't be NULL.
> 
> [...]

Here is the summary with links:
  - [net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
    https://git.kernel.org/netdev/net/c/4e65a8b8daa1

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-03 21:50     ` Jakub Kicinski
  2026-04-03 21:51       ` Eric Dumazet
@ 2026-04-04 10:13       ` Justin Iurman
  2026-04-04 11:44         ` Eric Dumazet
  1 sibling, 1 reply; 10+ messages in thread
From: Justin Iurman @ 2026-04-04 10:13 UTC (permalink / raw)
  To: Jakub Kicinski, Eric Dumazet
  Cc: davem, pabeni, horms, dsahern, netdev, eric.dumazet,
	yimingqian591

On 4/3/26 23:50, Jakub Kicinski wrote:
> On Fri, 3 Apr 2026 14:47:32 -0700 Eric Dumazet wrote:
>>> If the ingress device has more RX queues than the egress device (dev) has
>>> TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
>>>
>>> Since skb_get_tx_queue() does not clamp the index, will it return an
>>> out-of-bounds pointer into the egress device's dev->_tx array, causing
>>> dereferencing queue->qdisc to read invalid memory?
>>
>> This seems a different bug ?
>>
>> I mean, do we need to fix all bug in a single patch ?
> 
> no no, sorry, i'm just sending this out for Justin or you as a separate
> thing.

Thanks Jakub, I don't know how I missed that. It's not just about a 
possible OOB: it's actually incorrect for IOAM to do that because we're 
relying on a lucky assumption at best (i.e., if queue_mapping on RX == 
queue_mapping on TX). This unfortunately rules out the possibility to 
have an accurate per queue visibility (which I originally provided for 
flexibility). I've had a local patch for a long time to aggregate the 
queue depth (i.e., sum of all TX queues). It is more expensive(***), but 
it would kill two birds with one stone and solve both problems 
simultaneously.

Jakub, Eric, please let me know if the following plan works for both of you:
- (net) fix the OOB reported by Jakub (and, while at it, add missing 
locks around qdisc_qstats_qlen_backlog() -> also included in my local patch)
- (net-next) after the merge window, add the new feature

After that, the per queue visibility would be considered 
legacy/deprecated and not the default behavior. Documentation would be 
updated accordingly to explain why per queue visibility cannot be trusted.

  (***) the advise to operators in such a case is (obviously) to reduce 
the IOAM insertion (with the frequency parameter), especially at line rate.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-04 10:13       ` Justin Iurman
@ 2026-04-04 11:44         ` Eric Dumazet
  2026-04-04 17:32           ` Justin Iurman
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2026-04-04 11:44 UTC (permalink / raw)
  To: Justin Iurman
  Cc: Jakub Kicinski, davem, pabeni, horms, dsahern, netdev,
	eric.dumazet, yimingqian591

On Sat, Apr 4, 2026 at 3:13 AM Justin Iurman <justin.iurman@gmail.com> wrote:
>
> On 4/3/26 23:50, Jakub Kicinski wrote:
> > On Fri, 3 Apr 2026 14:47:32 -0700 Eric Dumazet wrote:
> >>> If the ingress device has more RX queues than the egress device (dev) has
> >>> TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
> >>>
> >>> Since skb_get_tx_queue() does not clamp the index, will it return an
> >>> out-of-bounds pointer into the egress device's dev->_tx array, causing
> >>> dereferencing queue->qdisc to read invalid memory?
> >>
> >> This seems a different bug ?
> >>
> >> I mean, do we need to fix all bug in a single patch ?
> >
> > no no, sorry, i'm just sending this out for Justin or you as a separate
> > thing.
>
> Thanks Jakub, I don't know how I missed that. It's not just about a
> possible OOB: it's actually incorrect for IOAM to do that because we're
> relying on a lucky assumption at best (i.e., if queue_mapping on RX ==
> queue_mapping on TX). This unfortunately rules out the possibility to
> have an accurate per queue visibility (which I originally provided for
> flexibility). I've had a local patch for a long time to aggregate the
> queue depth (i.e., sum of all TX queues). It is more expensive(***), but
> it would kill two birds with one stone and solve both problems
> simultaneously.
>
> Jakub, Eric, please let me know if the following plan works for both of you:
> - (net) fix the OOB reported by Jakub (and, while at it, add missing
> locks around qdisc_qstats_qlen_backlog() -> also included in my local patch)
> - (net-next) after the merge window, add the new feature
>
> After that, the per queue visibility would be considered
> legacy/deprecated and not the default behavior. Documentation would be
> updated accordingly to explain why per queue visibility cannot be trusted.
>
>   (***) the advise to operators in such a case is (obviously) to reduce
> the IOAM insertion (with the frequency parameter), especially at line rate.

Getting qdisc stats can be expensive for qdisc with per-cpu storage
for counters like pfifo-fast.

If your plan is to sum MQ stats, this could have quadratic behavior :
O(nr_cpus ^ 2)

I suggest rate-limiting this very expensive operation, or OAM will
become a nice DOS tool.
(Ie not depend on operators being nice, enforce this on the hosts)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data()
  2026-04-04 11:44         ` Eric Dumazet
@ 2026-04-04 17:32           ` Justin Iurman
  0 siblings, 0 replies; 10+ messages in thread
From: Justin Iurman @ 2026-04-04 17:32 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jakub Kicinski, davem, pabeni, horms, dsahern, netdev,
	eric.dumazet, yimingqian591

On 4/4/26 13:44, Eric Dumazet wrote:
> On Sat, Apr 4, 2026 at 3:13 AM Justin Iurman <justin.iurman@gmail.com> wrote:
>>
>> On 4/3/26 23:50, Jakub Kicinski wrote:
>>> On Fri, 3 Apr 2026 14:47:32 -0700 Eric Dumazet wrote:
>>>>> If the ingress device has more RX queues than the egress device (dev) has
>>>>> TX queues, skb_get_queue_mapping(skb) will exceed dev->num_tx_queues.
>>>>>
>>>>> Since skb_get_tx_queue() does not clamp the index, will it return an
>>>>> out-of-bounds pointer into the egress device's dev->_tx array, causing
>>>>> dereferencing queue->qdisc to read invalid memory?
>>>>
>>>> This seems a different bug ?
>>>>
>>>> I mean, do we need to fix all bug in a single patch ?
>>>
>>> no no, sorry, i'm just sending this out for Justin or you as a separate
>>> thing.
>>
>> Thanks Jakub, I don't know how I missed that. It's not just about a
>> possible OOB: it's actually incorrect for IOAM to do that because we're
>> relying on a lucky assumption at best (i.e., if queue_mapping on RX ==
>> queue_mapping on TX). This unfortunately rules out the possibility to
>> have an accurate per queue visibility (which I originally provided for
>> flexibility). I've had a local patch for a long time to aggregate the
>> queue depth (i.e., sum of all TX queues). It is more expensive(***), but
>> it would kill two birds with one stone and solve both problems
>> simultaneously.
>>
>> Jakub, Eric, please let me know if the following plan works for both of you:
>> - (net) fix the OOB reported by Jakub (and, while at it, add missing
>> locks around qdisc_qstats_qlen_backlog() -> also included in my local patch)
>> - (net-next) after the merge window, add the new feature
>>
>> After that, the per queue visibility would be considered
>> legacy/deprecated and not the default behavior. Documentation would be
>> updated accordingly to explain why per queue visibility cannot be trusted.
>>
>>    (***) the advise to operators in such a case is (obviously) to reduce
>> the IOAM insertion (with the frequency parameter), especially at line rate.
> 
> Getting qdisc stats can be expensive for qdisc with per-cpu storage
> for counters like pfifo-fast.
> 
> If your plan is to sum MQ stats, this could have quadratic behavior :
> O(nr_cpus ^ 2)
> 
> I suggest rate-limiting this very expensive operation, or OAM will
> become a nice DOS tool.
> (Ie not depend on operators being nice, enforce this on the hosts)

Makes sense, I agree. But then rate-limiting would be redundant with the 
frequency of IOAM insertion, which acts as a rate-limit on its own 
already. IOAM is not (never ever) enabled by default, it must be enabled 
on ingress interface(s) explicitly, and configured accordingly. So it 
rules out the need to enforce anything on "hosts", as it is assumed you 
know what you're doing when you enable something (***). Also, IOAM only 
runs inside a limited domain (i.e., private/administrative domain 
operated by the same person/entity). Traffic must be filtered on the 
ingress of the domain (i.e., no IOAM injection from the outside), on the 
egress of the domain (i.e., prevent potential data leaking due to, e.g., 
misconfiguration), and the frequency of IOAM insertion actually is the 
rate-limit inside the domain (to each their own configuration, depending 
on performance). This is why I believe that documenting the whole thing 
would be enough in this case, since we already have rate-limiting thanks 
to the frequency of IOAM insertion.

(***) well, we may add another security barrier, just in case someone 
enables IOAM by mistake, by disabling the queue depth by default.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-04-04 17:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-02 10:17 [PATCH net] ipv6: ioam: fix potential NULL dereferences in __ioam6_fill_trace_data() Eric Dumazet
2026-04-02 18:10 ` Justin Iurman
2026-04-03 21:44 ` Jakub Kicinski
2026-04-03 21:47   ` Eric Dumazet
2026-04-03 21:50     ` Jakub Kicinski
2026-04-03 21:51       ` Eric Dumazet
2026-04-04 10:13       ` Justin Iurman
2026-04-04 11:44         ` Eric Dumazet
2026-04-04 17:32           ` Justin Iurman
2026-04-03 22:00 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox