Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net v4 1/2] flow_dissector: do not dissect PPPoE PFC frames
From: Simon Horman @ 2026-04-10 17:10 UTC (permalink / raw)
  To: Qingfang Deng
  Cc: linux-ppp, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Guillaume Nault, Wojciech Drewek, Tony Nguyen,
	linux-kernel, netdev, Paul Mackerras, Jaco Kroon, James Carlson,
	Marcin Szycik
In-Reply-To: <20260410033627.93786-1-qingfang.deng@linux.dev>

On Fri, Apr 10, 2026 at 11:36:20AM +0800, Qingfang Deng wrote:
> RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT
> RECOMMENDED for PPPoE. In practice, pppd does not support negotiating
> PFC for PPPoE sessions, and the flow dissector driver has assumed an
> uncompressed frame until the blamed commit.
> 
> During the review process of that commit [1], support for PFC is
> suggested. However, having a compressed (1-byte) protocol field means
> the subsequent PPP payload is shifted by one byte, causing 4-byte
> misalignment for the network header and an unaligned access exception
> on some architectures.
> 
> The exception can be reproduced by sending a PPPoE PFC frame to an
> ethernet interface of a MIPS board, with RPS enabled, even if no PPPoE
> session is active on that interface:
> 
> $ 0   : 00000000 80c40000 00000000 85144817
> $ 4   : 00000008 00000100 80a75758 81dc9bb8
> $ 8   : 00000010 8087ae2c 0000003d 00000000
> $12   : 000000e0 00000039 00000000 00000000
> $16   : 85043240 80a75758 81dc9bb8 00006488
> $20   : 0000002f 00000007 85144810 80a70000
> $24   : 81d1bda0 00000000
> $28   : 81dc8000 81dc9aa8 00000000 805ead08
> Hi    : 00009d51
> Lo    : 2163358a
> epc   : 805e91f0 __skb_flow_dissect+0x1b0/0x1b50
> ra    : 805ead08 __skb_get_hash_net+0x74/0x12c
> Status: 11000403        KERNEL EXL IE
> Cause : 40800010 (ExcCode 04)
> BadVA : 85144817
> PrId  : 0001992f (MIPS 1004Kc)
> Call Trace:
> [<805e91f0>] __skb_flow_dissect+0x1b0/0x1b50
> [<805ead08>] __skb_get_hash_net+0x74/0x12c
> [<805ef330>] get_rps_cpu+0x1b8/0x3fc
> [<805fca70>] netif_receive_skb_list_internal+0x324/0x364
> [<805fd120>] napi_complete_done+0x68/0x2a4
> [<8058de5c>] mtk_napi_rx+0x228/0xfec
> [<805fd398>] __napi_poll+0x3c/0x1c4
> [<805fd754>] napi_threaded_poll_loop+0x234/0x29c
> [<805fd848>] napi_threaded_poll+0x8c/0xb0
> [<80053544>] kthread+0x104/0x12c
> [<80002bd8>] ret_from_kernel_thread+0x14/0x1c
> 
> Code: 02d51821  1060045b  00000000 <8c640000> 3084000f  2c820005  144001a2  00042080  8e220000
> 
> To reduce the attack surface and maintain performance, do not process
> PPPoE PFC frames. While at it, avoid byte-swapping at runtime, restoring
> the original behavior.
> 
> [1] https://patch.msgid.link/20220630231016.GA392@debian.home
> Fixes: 46126db9c861 ("flow_dissector: Add PPPoE dissectors")
> Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
> ---
> Changes in v4: no new changes
>  Link to v3: https://lore.kernel.org/r/20260409031107.616630-1-qingfang.deng@linux.dev
> Changes in v3:
>  Make ppp_proto_is_valid() private and fix kdoc warning, avoiding
>  gotchas if some out-of-tree modules use this function.
>  Link to v1: https://lore.kernel.org/r/20260407045743.174446-1-qingfang.deng@linux.dev
> 
>  include/linux/ppp_defs.h  | 13 -------------
>  net/core/flow_dissector.c | 39 +++++++++++++++++++++++----------------
>  2 files changed, 23 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
> index b7e57fdbd413..45c0947fa404 100644
> --- a/include/linux/ppp_defs.h
> +++ b/include/linux/ppp_defs.h
> @@ -12,17 +12,4 @@
>  
>  #define PPP_FCS(fcs, c) crc_ccitt_byte(fcs, c)
>  
> -/**
> - * ppp_proto_is_valid - checks if PPP protocol is valid
> - * @proto: PPP protocol
> - *
> - * Assumes proto is not compressed.
> - * Protocol is valid if the value is odd and the least significant bit of the
> - * most significant octet is 0 (see RFC 1661, section 2).
> - */
> -static inline bool ppp_proto_is_valid(u16 proto)
> -{
> -	return !!((proto & 0x0101) == 0x0001);
> -}
> -
>  #endif /* _PPP_DEFS_H_ */
> diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
> index 1b61bb25ba0e..64b843800370 100644
> --- a/net/core/flow_dissector.c
> +++ b/net/core/flow_dissector.c
> @@ -1035,6 +1035,21 @@ static bool is_pppoe_ses_hdr_valid(const struct pppoe_hdr *hdr)
>  	return hdr->ver == 1 && hdr->type == 1 && hdr->code == 0;
>  }
>  
> +/**
> + * ppp_proto_is_valid - checks if PPP protocol is valid
> + * @proto: PPP protocol
> + *
> + * Assumes proto is not compressed.
> + * Protocol is valid if the value is odd and the least significant bit of the
> + * most significant octet is 0 (see RFC 1661, section 2).
> + *
> + * Return: Whether the PPP protocol is valid.
> + */
> +static bool ppp_proto_is_valid(__be16 proto)
> +{
> +	return (proto & htons(0x0101)) == htons(0x0001);
> +}
> +
>  /**
>   * __skb_flow_dissect - extract the flow_keys struct and return it
>   * @net: associated network namespace, derived from @skb if NULL
> @@ -1361,7 +1376,7 @@ bool __skb_flow_dissect(const struct net *net,
>  			struct pppoe_hdr hdr;
>  			__be16 proto;
>  		} *hdr, _hdr;
> -		u16 ppp_proto;
> +		__be16 ppp_proto;

I'm unclear of the relationship between changing the type of ppp_proto
and the problem described in the patch description. And it
is creating a log of churn in this patch. I suggest dropping it.

>  
>  		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
>  		if (!hdr) {
> @@ -1374,27 +1389,19 @@ bool __skb_flow_dissect(const struct net *net,
>  			break;
>  		}
>  
> -		/* least significant bit of the most significant octet
> -		 * indicates if protocol field was compressed
> -		 */
> -		ppp_proto = ntohs(hdr->proto);
> -		if (ppp_proto & 0x0100) {
> -			ppp_proto = ppp_proto >> 8;
> -			nhoff += PPPOE_SES_HLEN - 1;
> -		} else {
> -			nhoff += PPPOE_SES_HLEN;
> -		}

Could we go for something like this?

		ppp_proto = ntohs(hdr->proto);
		nhoff += PPPOE_SES_HLEN;

		/* Explanation of what is going on */
		if (ppp_proto & 0x0100)
			ppp_proto = some invalid value like 0

> +		ppp_proto = hdr->proto;
> +		nhoff += PPPOE_SES_HLEN;
>  
> -		if (ppp_proto == PPP_IP) {
> +		if (ppp_proto == htons(PPP_IP)) {
>  			proto = htons(ETH_P_IP);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -		} else if (ppp_proto == PPP_IPV6) {
> +		} else if (ppp_proto == htons(PPP_IPV6)) {
>  			proto = htons(ETH_P_IPV6);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -		} else if (ppp_proto == PPP_MPLS_UC) {
> +		} else if (ppp_proto == htons(PPP_MPLS_UC)) {
>  			proto = htons(ETH_P_MPLS_UC);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
> -		} else if (ppp_proto == PPP_MPLS_MC) {
> +		} else if (ppp_proto == htons(PPP_MPLS_MC)) {
>  			proto = htons(ETH_P_MPLS_MC);
>  			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
>  		} else if (ppp_proto_is_valid(ppp_proto)) {
> @@ -1412,7 +1419,7 @@ bool __skb_flow_dissect(const struct net *net,
>  							      FLOW_DISSECTOR_KEY_PPPOE,
>  							      target_container);
>  			key_pppoe->session_id = hdr->hdr.sid;
> -			key_pppoe->ppp_proto = htons(ppp_proto);
> +			key_pppoe->ppp_proto = ppp_proto;
>  			key_pppoe->type = htons(ETH_P_PPP_SES);
>  		}
>  		break;
> -- 
> 2.43.0
> 

^ permalink raw reply

* [PATCH net] net: phy: qcom: at803x: Use the correct bit to disable extended next page
From: Maxime Chevallier @ 2026-04-10 17:10 UTC (permalink / raw)
  To: Andrew Lunn, Jakub Kicinski, davem, Eric Dumazet, Paolo Abeni,
	Simon Horman, Russell King
  Cc: Maxime Chevallier, thomas.petazzoni, netdev, linux-kernel,
	linux-arm-msm

As noted in the blamed commit, the AR8035 and other PHYs from this
family advertise the Extended Next Page support by default, which may be
understood by some partners as this PHY being multi-gig capable.

The fix is to disable XNP advertising, which is done by setting bit 12
of the Auto-Negotiation Advertisement Register (MII_ADVERTISE).

The blamed commit incorrectly uses MDIO_AN_CTRL1_XNP, which is bit 13 as per
802.3 : 45.2.7.1 AN control register (Register 7.0)

BIT 12 in MII_ADVERTISE is wrapped by ADVERTISE_RESV, used by some
drivers such as the aquantia one. 802.3 Clause 28 defines bit 12 as
Extended Next Page ability, at least in recent versions of the standard.

Let's add a define for it and use it in the at803x driver.

Fixes: 3c51fa5d2afe ("net: phy: ar803x: disable extended next page bit")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
---
Some further driver cleanup can be done to remove the use of
ADVERTISE_RESV, but the macro is uapi now so it'll have to stay.

 drivers/net/phy/qcom/at803x.c | 2 +-
 include/uapi/linux/mii.h      | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
index 338acd11a9b6..023c1fe0cd14 100644
--- a/drivers/net/phy/qcom/at803x.c
+++ b/drivers/net/phy/qcom/at803x.c
@@ -524,7 +524,7 @@ static int at803x_config_init(struct phy_device *phydev)
 	 * behaviour but we still need to accommodate it. XNP is only needed
 	 * for 10Gbps support, so disable XNP.
 	 */
-	return phy_modify(phydev, MII_ADVERTISE, MDIO_AN_CTRL1_XNP, 0);
+	return phy_modify(phydev, MII_ADVERTISE, ADVERTISE_XNP, 0);
 }
 
 static void at803x_link_change_notify(struct phy_device *phydev)
diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index 39f7c44baf53..61d6edad4b94 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h
@@ -82,7 +82,8 @@
 #define ADVERTISE_100BASE4	0x0200	/* Try for 100mbps 4k packets  */
 #define ADVERTISE_PAUSE_CAP	0x0400	/* Try for pause               */
 #define ADVERTISE_PAUSE_ASYM	0x0800	/* Try for asymetric pause     */
-#define ADVERTISE_RESV		0x1000	/* Unused...                   */
+#define ADVERTISE_XNP		0x1000  /* Extended Next Page */
+#define ADVERTISE_RESV		ADVERTISE_XNP /* Used to be reserved */
 #define ADVERTISE_RFAULT	0x2000	/* Say we can detect faults    */
 #define ADVERTISE_LPACK		0x4000	/* Ack link partners response  */
 #define ADVERTISE_NPAGE		0x8000	/* Next page bit               */
-- 
2.49.0


^ permalink raw reply related

* Re: [PATCH net-next] net: phy: call phy_init_hw() in phy resume path
From: Florian Fainelli @ 2026-04-10 17:00 UTC (permalink / raw)
  To: Russell King (Oracle), Biju
  Cc: Andrew Lunn, Heiner Kallweit, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Ovidiu Panait, netdev, linux-kernel,
	Geert Uytterhoeven, Prabhakar Mahadev Lad, linux-renesas-soc,
	Biju Das
In-Reply-To: <adkOZl4gt5UoGv-0@shell.armlinux.org.uk>

On 4/10/26 07:51, Russell King (Oracle) wrote:
> On Fri, Apr 10, 2026 at 03:29:01PM +0100, Biju wrote:
>> From: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
>>
>> When mac_managed_pm flag is set, mdio_bus_phy_resume() is skipped, so
>> phy_init_hw(), which performs soft_reset and config_init, is not called
>> during resume.
>>
>> This is inconsistent with the non-mac_managed_pm path, where
>> mdio_bus_phy_resume() calls phy_init_hw() before phy_resume() on every
>> resume.
>>
>> To align both paths, add a phy_init_hw() call at the top of
>> __phy_resume(), before invoking the driver's resume callback. This
>> guarantees the PHY undergoes soft reset and re-initialization regardless
>> of whether PM is managed by the MAC or the MDIO bus.
>>
>> Signed-off-by: Ovidiu Panait <ovidiu.panait.rb@renesas.com>
>> Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
>> ---
>>   drivers/net/phy/phy_device.c | 4 ++++
>>   1 file changed, 4 insertions(+)
>>
>> diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
>> index 0edff47478c2..8255f4208d66 100644
>> --- a/drivers/net/phy/phy_device.c
>> +++ b/drivers/net/phy/phy_device.c
>> @@ -2008,6 +2008,10 @@ int __phy_resume(struct phy_device *phydev)
>>   	if (!phydrv || !phydrv->resume)
>>   		return 0;
>>   
>> +	ret = phy_init_hw(phydev);
>> +	if (ret)
>> +		return ret;
> 
> Do we want to do this even when phydrv->resume is NULL?

Seems to me we would want that, but this gets into the territory of 
potentially creating many soft-reset of the PHYs, something that I 
regret having introduced years ago.

> 
> Apart from that, looks fine to me - it seems some paths call
> phy_init_hw() can be called with or without phydev->lock held, and
> this one will call it with the lock held which seems to be okay.
> 

Agreed.
-- 
Florian


^ permalink raw reply

* Re: [PATCH net v2] pppoe: drop PFC frames
From: Simon Horman @ 2026-04-10 16:49 UTC (permalink / raw)
  To: Qingfang Deng
  Cc: linux-ppp, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Michal Ostrowski, Yue Haibing,
	Breno Leitao, Kuniyuki Iwashima, Sebastian Andrzej Siewior,
	Kees Cook, netdev, linux-kernel, Paul Mackerras, Jaco Kroon,
	James Carlson, Wojciech Drewek, Guillaume Nault
In-Reply-To: <20260410164442.GB469338@kernel.org>

On Fri, Apr 10, 2026 at 05:44:42PM +0100, Simon Horman wrote:
> On Wed, Apr 08, 2026 at 10:42:39AM +0800, Qingfang Deng wrote:
> > RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT
> > RECOMMENDED for PPPoE. In practice, pppd does not support negotiating
> > PFC for PPPoE sessions, and the current PPPoE driver assumes an
> > uncompressed (2-byte) protocol field. However, the generic PPP layer
> > function ppp_input() is not aware of the negotiation result, and still
> > accepts PFC frames.
> > 
> > If a peer with a broken implementation or an attacker sends a frame with
> > a compressed (1-byte) protocol field, the subsequent PPP payload is
> > shifted by one byte. This causes the network header to be 4-byte
> > misaligned, which may trigger unaligned access exceptions on some
> > architectures.
> > 
> > To reduce the attack surface, drop PPPoE PFC frames. Introduce
> > ppp_skb_is_compressed_proto() helper function to be used in both
> > ppp_generic.c and pppoe.c to avoid open-coding.
> > 
> > Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers")
> 
> AI generated review points out that the commit cited above only moves code
> around. And thus while it may show up in git annotate as the source
> of the buggy lines of code, actually it predates that commit.
> 
> AI generated code suggests an earlier commit that adds
> the length check. But I believe that the bug predates that too.
> And that the bug is present, in some form, since the beginning of
> git history, because at that time fields of the ppp header was read without
> taking PFC into account.
> 
> So I suggest:
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> 
> Unless there is other review you probably don't need to repost just
> to address this.

Sorry, I now see there is a v3 and v4.
I will follow-up there.

-- 
pw-bot: superseded

^ permalink raw reply

* Re: [PATCH net v2] pppoe: drop PFC frames
From: Simon Horman @ 2026-04-10 16:44 UTC (permalink / raw)
  To: Qingfang Deng
  Cc: linux-ppp, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Michal Ostrowski, Yue Haibing,
	Breno Leitao, Kuniyuki Iwashima, Sebastian Andrzej Siewior,
	Kees Cook, netdev, linux-kernel, Paul Mackerras, Jaco Kroon,
	James Carlson, Wojciech Drewek, Guillaume Nault
In-Reply-To: <20260408024245.312732-1-qingfang.deng@linux.dev>

On Wed, Apr 08, 2026 at 10:42:39AM +0800, Qingfang Deng wrote:
> RFC 2516 Section 7 states that Protocol Field Compression (PFC) is NOT
> RECOMMENDED for PPPoE. In practice, pppd does not support negotiating
> PFC for PPPoE sessions, and the current PPPoE driver assumes an
> uncompressed (2-byte) protocol field. However, the generic PPP layer
> function ppp_input() is not aware of the negotiation result, and still
> accepts PFC frames.
> 
> If a peer with a broken implementation or an attacker sends a frame with
> a compressed (1-byte) protocol field, the subsequent PPP payload is
> shifted by one byte. This causes the network header to be 4-byte
> misaligned, which may trigger unaligned access exceptions on some
> architectures.
> 
> To reduce the attack surface, drop PPPoE PFC frames. Introduce
> ppp_skb_is_compressed_proto() helper function to be used in both
> ppp_generic.c and pppoe.c to avoid open-coding.
> 
> Fixes: 224cf5ad14c0 ("ppp: Move the PPP drivers")

AI generated review points out that the commit cited above only moves code
around. And thus while it may show up in git annotate as the source
of the buggy lines of code, actually it predates that commit.

AI generated code suggests an earlier commit that adds
the length check. But I believe that the bug predates that too.
And that the bug is present, in some form, since the beginning of
git history, because at that time fields of the ppp header was read without
taking PFC into account.

So I suggest:

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")

Unless there is other review you probably don't need to repost just
to address this.

> Signed-off-by: Qingfang Deng <qingfang.deng@linux.dev>
> ---
> v2: retarget to net, and add a helper function
>  https://lore.kernel.org/netdev/20260403083926.68320-1-qingfang.deng@linux.dev/

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* RE: [PATCH net-next] net: phy: call phy_init_hw() in phy resume path
From: Biju Das @ 2026-04-10 16:41 UTC (permalink / raw)
  To: Andrew Lunn, Russell King (Oracle)
  Cc: biju.das.au, Heiner Kallweit, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Ovidiu Panait,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Geert Uytterhoeven, Prabhakar Mahadev Lad,
	linux-renesas-soc@vger.kernel.org
In-Reply-To: <839fec66-5ec0-4cc0-a0c4-ae2de6902188@lunn.ch>

Hi Andrew,

> -----Original Message-----
> From: Andrew Lunn <andrew@lunn.ch>
> Sent: 10 April 2026 16:15
> Subject: Re: [PATCH net-next] net: phy: call phy_init_hw() in phy resume path
> 
> > Apart from that, looks fine to me - it seems some paths call
> > phy_init_hw() can be called with or without phydev->lock held, and
> > this one will call it with the lock held which seems to be okay.
> 
> Haven't we had deadlocks in this area before?
> 
> Please test with CONFIG_PROVE_LOCKING enabled.

I have n't faced any issue with micrel phy. But my collegue
got the below issue with Microsemi phy. It doesn't finish the boot.

drivers/net/phy/mscc/mscc_main.c 


[    5.125699] ============================================
[    5.131849] WARNING: possible recursive locking detected
[    5.137996] 7.0.0-rc7-next-20260409+ #614 Not tainted
[    5.143847] --------------------------------------------
[    5.149991] swapper/0/1 is trying to acquire lock:
[    5.155333] ffff000182bbe7c0 (&dev->lock#2){+.+.}-{4:4}, at: vsc85xx_config_init+0x68/0x344
[    5.164771] 
[    5.164771] but task is already holding lock:
[    5.171520] ffff000182bbe7c0 (&dev->lock#2){+.+.}-{4:4}, at: phy_attach_direct+0x19c/0x3d0
[    5.180984] 
[    5.180984] other info that might help us debug this:
[    5.188237]  Possible unsafe locking scenario:
[    5.188237] 
[    5.195089]        CPU0
[    5.197914]        ----
[    5.200670]   lock(&dev->lock#2);
[    5.204425]   lock(&dev->lock#2);
[    5.208273] 
[    5.208273]  *** DEADLOCK ***
[    5.208273] 
[    5.214920]  May be due to missing lock nesting notation
[    5.214920] 
[    5.222677] 2 locks held by swapper/0/1:
[    5.227217]  #0: ffff800082f051f0 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock+0x1c/0x28
[    5.236041]  #1: ffff000182bbe7c0 (&dev->lock#2){+.+.}-{4:4}, at: phy_attach_direct+0x19c/0x3d0
[    5.245880] 
[    5.245880] stack backtrace:
[    5.250824] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 7.0.0-rc7-next-20260409+ #614 PREEMPT 
[    5.250840] Hardware name: Renesas RZ/T2H EVK Board based on r9a09g077m44 (DT)
[    5.250847] Call trace:
[    5.250853]  show_stack+0x18/0x30 (C)
[    5.250876]  dump_stack_lvl+0x70/0x98
[    5.250894]  dump_stack+0x18/0x24
[    5.250910]  print_deadlock_bug+0x220/0x234
[    5.250931]  __lock_acquire+0xe10/0x1594
[    5.250950]  lock_acquire+0x284/0x400
[    5.250967]  __mutex_lock+0xa8/0x804
[    5.250983]  mutex_lock_nested+0x24/0x3c
[    5.250998]  vsc85xx_config_init+0x68/0x344
[    5.251013]  phy_init_hw+0x68/0xa8
[    5.251030]  __phy_resume+0x3c/0x98
[    5.251047]  phy_attach_direct+0x1a4/0x3d0
[    5.251064]  phylink_fwnode_phy_connect+0x98/0x140
[    5.251085]  stmmac_open+0x120/0x38c
[    5.251103]  __dev_open+0x13c/0x280
[    5.251117]  __dev_change_flags+0x19c/0x21c
[    5.251131]  netif_change_flags+0x24/0x6c
[    5.251144]  dev_change_flags+0x48/0x80
[    5.251159]  ip_auto_config+0x264/0xec0
[    5.251176]  do_one_initcall+0x7c/0x4d0
[    5.251194]  kernel_init_freeable+0x2b4/0x33c
[    5.251212]  kernel_init+0x24/0x140
[    5.251231]  ret_from_fork+0x10/0x20


Cheers,
Biju

^ permalink raw reply

* Re: [PATCH net-next v6 13/14] selftests: net: add team_bridge_macvlan rx_mode test
From: Breno Leitao @ 2026-04-10 16:35 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, edumazet, kuba, pabeni
In-Reply-To: <20260407153101.3694714-14-sdf@fomichev.me>

On Tue, Apr 07, 2026 at 08:31:00AM -0700, Stanislav Fomichev wrote:
> Add a test that exercises the ndo_change_rx_flags path through a
> macvlan -> bridge -> team -> dummy stack. This triggers dev_uc_add
> under addr_list_lock which flips promiscuity on the lower device.
> With the new work queue approach, this must not deadlock.
> 
> Link: https://lore.kernel.org/netdev/20260214033859.43857-1-jiayuan.chen@linux.dev/
> Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>

Reviewed-by: Breno Leitao <leitao@debian.org>
>
> ---
>  tools/testing/selftests/net/config       |  3 ++
>  tools/testing/selftests/net/rtnetlink.sh | 44 ++++++++++++++++++++++++
>  2 files changed, 47 insertions(+)
> 
> diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
> index 2a390cae41bf..38611ea11c6b 100644
> --- a/tools/testing/selftests/net/config
> +++ b/tools/testing/selftests/net/config
> @@ -101,6 +101,9 @@ CONFIG_NET_SCH_HTB=m
>  CONFIG_NET_SCH_INGRESS=m
>  CONFIG_NET_SCH_NETEM=y
>  CONFIG_NET_SCH_PRIO=m
> +CONFIG_NET_TEAM=y
> +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
> +CONFIG_NET_TEAM_MODE_LOADBALANCE=y

Why do you need LOADBALANCE enabled for the test?

^ permalink raw reply

* Re: [PATCH net v6 2/2] selftests/bpf: add test for xdp_master_redirect with bond not up
From: Daniel Borkmann @ 2026-04-10 16:28 UTC (permalink / raw)
  To: Jiayuan Chen, netdev
  Cc: Martin KaFai Lau, John Fastabend, Stanislav Fomichev,
	Alexei Starovoitov, Andrii Nakryiko, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Hao Luo, Jiri Olsa, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Jesper Dangaard Brouer, Shuah Khan, Jussi Maki, bpf, linux-kernel,
	linux-kselftest, Nikolay Aleksandrov
In-Reply-To: <20260410113726.368111-3-jiayuan.chen@linux.dev>

On 4/10/26 1:37 PM, Jiayuan Chen wrote:
> Add a selftest that reproduces the null-ptr-deref in
> bond_rr_gen_slave_id() when XDP redirect targets a bond device in
> round-robin mode that was never brought up. The test verifies the fix
> by ensuring no crash occurs.
> 
> Test setup:
> - bond0: active-backup mode, UP, with native XDP (enables
>    bpf_master_redirect_enabled_key globally)
> - bond1: round-robin mode, never UP
> - veth1: slave of bond1, with generic XDP (XDP_TX)
> - BPF_PROG_TEST_RUN with live frames triggers the redirect path
> 
> Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>

I checked locally that this XDP test passes fine and triggers a NULL
pointer deref without the fix.

[...]
> +	/* Attach generic XDP (XDP_TX) to veth1.
> +	 * When packets arrive at veth1 via netif_receive_skb, do_xdp_generic()
> +	 * runs this program. XDP_TX + bond slave triggers xdp_master_redirect().
> +	 */
> +	xdp_tx_fd = bpf_program__fd(skeletons->xdp_tx->progs.xdp_tx);
> +	if (!ASSERT_GE(xdp_tx_fd, 0, "xdp_tx prog_fd"))
> +		goto out;

nit: no need for the ASSERT_GE given the skeleton loaded, see also the various
other tests gathering bpf_program__fd().

> +	err = bpf_xdp_attach(veth1_ifindex, xdp_tx_fd,
> +			     XDP_FLAGS_SKB_MODE, NULL);
> +	if (!ASSERT_OK(err, "attach generic XDP to veth1"))
> +		goto out;
> +
> +	/* Run BPF_PROG_TEST_RUN with XDP_PASS live frames on veth1.
> +	 * XDP_PASS frames become SKBs with skb->dev = veth1, entering
> +	 * netif_receive_skb -> do_xdp_generic -> xdp_master_redirect.
> +	 * Without the fix, bond_rr_gen_slave_id() dereferences NULL
> +	 * rr_tx_counter and crashes.
> +	 */
> +	xdp_pass_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog);
> +	if (!ASSERT_GE(xdp_pass_fd, 0, "xdp_pass prog_fd"))
> +		goto out;

ditto, can be simplified a bit into:

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 0d4ec1e5b401..c42488e445c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -506,7 +506,7 @@ static void test_xdp_bonding_nested(struct skeletons *skeletons)
  static void test_xdp_bonding_redirect_no_up(struct skeletons *skeletons)
  {
  	struct nstoken *nstoken = NULL;
-	int xdp_pass_fd, xdp_tx_fd;
+	int xdp_pass_fd;
  	int veth1_ifindex;
  	int err;
  	char pkt[ETH_HLEN + 1];
@@ -555,11 +555,8 @@ static void test_xdp_bonding_redirect_no_up(struct skeletons *skeletons)
  	 * When packets arrive at veth1 via netif_receive_skb, do_xdp_generic()
  	 * runs this program. XDP_TX + bond slave triggers xdp_master_redirect().
  	 */
-	xdp_tx_fd = bpf_program__fd(skeletons->xdp_tx->progs.xdp_tx);
-	if (!ASSERT_GE(xdp_tx_fd, 0, "xdp_tx prog_fd"))
-		goto out;
-
-	err = bpf_xdp_attach(veth1_ifindex, xdp_tx_fd,
+	err = bpf_xdp_attach(veth1_ifindex,
+			     bpf_program__fd(skeletons->xdp_tx->progs.xdp_tx),
  			     XDP_FLAGS_SKB_MODE, NULL);
  	if (!ASSERT_OK(err, "attach generic XDP to veth1"))
  		goto out;
@@ -571,8 +568,6 @@ static void test_xdp_bonding_redirect_no_up(struct skeletons *skeletons)
  	 * rr_tx_counter and crashes.
  	 */
  	xdp_pass_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog);
-	if (!ASSERT_GE(xdp_pass_fd, 0, "xdp_pass prog_fd"))
-		goto out;
  
  	memset(pkt, 0, sizeof(pkt));
  	ctx_in.data_end = sizeof(pkt);


^ permalink raw reply related

* Re: [Intel-wired-lan] [PATCH net v2 3/4] iavf: send MAC change request synchronously
From: Jose Ignacio Tornos Martinez @ 2026-04-10 16:25 UTC (permalink / raw)
  To: przemyslaw.kitszel
  Cc: anthony.l.nguyen, davem, edumazet, intel-wired-lan,
	jacob.e.keller, jtornosm, kohei.enju, kuba, netdev, pabeni,
	stable
In-Reply-To: <30d48647-8c1d-4683-ae9d-becb33cf8d4f@intel.com>

Hello Przemek,

Thank you again for your comments.
I will try to include the new ones too in the next version.
In addition, I will analyze the suggested integration with existing
iavf_poll_virtchnl_msg() and possibly iavf_virtchnl_completion, but maybe
better in a new patch of the series as a refactoring.

Best regards
Jose Ignacio

^ permalink raw reply

* [PATCH v3 0/2] net: fix skb_ext BUILD_BUG_ON failures with GCOV
From: Konstantin Khorenko @ 2026-04-10 16:21 UTC (permalink / raw)
  To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Thomas Weißschuh, Arnd Bergmann,
	Peter Oberparleiter, Mikhail Zaslonko, netdev, linux-kernel,
	Pavel Tikhomirov, Vasileios Almpanis, Konstantin Khorenko
In-Reply-To: <20260409214736.2651198-1-khorenko@virtuozzo.com>

This mini-series fixes build failures in net/core/skbuff.c when the
kernel is built with CONFIG_GCOV_PROFILE_ALL=y.

This is part of a larger effort to add -fprofile-update=atomic to
global CFLAGS_GCOV (posted earlier as a combined series):
  https://lore.kernel.org/lkml/20260401142020.1434243-1-khorenko@virtuozzo.com/T/#t

That combined series was split per subsystem as requested by Jakub.
The companion patches are:

 - iommu: use __always_inline for amdv1pt_install_leaf_entry()
   (sent to iommu maintainers)
 - gcov: add -fprofile-update=atomic globally (sent to gcov/kbuild
   maintainers, depends on this series and the iommu patch)

Patch 1/2 fixes a pre-existing build failure with CONFIG_GCOV_PROFILE_ALL:
GCOV counters prevent GCC from constant-folding the skb_ext_total_length()
loop.  It also removes the CONFIG_KCOV_INSTRUMENT_ALL preprocessor guard
from d6e5794b06c0: that guard was a precaution in case KCOV instrumentation
also prevented constant folding, but KCOV's -fsanitize-coverage=trace-pc
does not interfere with GCC's constant folding (verified experimentally
with GCC 14.2 and GCC 16.0.1), so the guard is unnecessary.

Patch 2/2 is an additional fix needed when -fprofile-update=atomic is
added to CFLAGS_GCOV: __no_profile on the __always_inline function alone
is insufficient because after inlining, the code resides in the caller's
profiled body.  The caller (skb_extensions_init) needs __no_profile and
noinline to prevent re-exposure to GCOV instrumentation.

Changes v2 -> v3:
 - Patch 2/2: added __init since skb_extensions_init() is only called
   from __init skb_init() - without it noinline would keep the function
   in permanent .text, wasting memory after boot.

Changes v1 -> v2:
 - Patch 1/2: expanded the commit message to explain why removing the
   CONFIG_KCOV_INSTRUMENT_ALL guard is safe (KCOV's
   -fsanitize-coverage=trace-pc does not inhibit constant folding,
   unlike GCOV's -fprofile-arcs + -fno-tree-loop-im).
   Changed Fixes tag to point at the commit that introduced the 5th
   SKB extension type which triggered the failure.
   Removed empty lines in tags area.
 - Patch 2/2: added noinline to skb_extensions_init() to prevent
   the compiler from inlining it into skb_init(), which would
   re-expose the function body to GCOV instrumentation.

v2: https://lore.kernel.org/lkml/20260402140558.1437002-1-khorenko@virtuozzo.com/T/#t
v1: https://lore.kernel.org/lkml/20260401142020.1434243-1-khorenko@virtuozzo.com/T/#t

Tested with:
 - GCC 14.2.1, CONFIG_GCOV_PROFILE_ALL=y
 - GCC 14.2.1, CONFIG_KCOV_INSTRUMENT_ALL=y (GCOV disabled)
 - GCC 16.0.1 20260327 (experimental), CONFIG_GCOV_PROFILE_ALL=y

Konstantin Khorenko (2):
  net: fix skb_ext_total_length() BUILD_BUG_ON with
    CONFIG_GCOV_PROFILE_ALL
  net: add noinline __init __no_profile to skb_extensions_init() for
    GCOV compatibility

 net/core/skbuff.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

-- 
2.43.5

^ permalink raw reply

* [PATCH v3 1/2] net: fix skb_ext_total_length() BUILD_BUG_ON with CONFIG_GCOV_PROFILE_ALL
From: Konstantin Khorenko @ 2026-04-10 16:21 UTC (permalink / raw)
  To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Thomas Weißschuh, Arnd Bergmann,
	Peter Oberparleiter, Mikhail Zaslonko, netdev, linux-kernel,
	Pavel Tikhomirov, Vasileios Almpanis, Konstantin Khorenko
In-Reply-To: <20260410162150.3105738-1-khorenko@virtuozzo.com>

When CONFIG_GCOV_PROFILE_ALL=y is enabled, the kernel fails to build:

  In file included from <command-line>:
  In function 'skb_extensions_init',
      inlined from 'skb_init' at net/core/skbuff.c:5214:2:
  ././include/linux/compiler_types.h:706:45: error: call to
    '__compiletime_assert_1490' declared with attribute error:
    BUILD_BUG_ON failed: skb_ext_total_length() > 255

CONFIG_GCOV_PROFILE_ALL adds -fprofile-arcs -ftest-coverage
-fno-tree-loop-im to CFLAGS globally. GCC inserts branch profiling
counters into the skb_ext_total_length() loop and, combined with
-fno-tree-loop-im (which disables loop invariant motion), cannot
constant-fold the result.
BUILD_BUG_ON requires a compile-time constant and fails.

The issue manifests in kernels with 5+ SKB extension types enabled
(e.g., after addition of SKB_EXT_CAN, SKB_EXT_PSP). With 4 extensions
GCC can still unroll and fold the loop despite GCOV instrumentation;
with 5+ it gives up.

Mark skb_ext_total_length() with __no_profile to prevent GCOV from
inserting counters into this function. Without counters the loop is
"clean" and GCC can constant-fold it even with -fno-tree-loop-im active.
This allows BUILD_BUG_ON to work correctly while keeping GCOV profiling
for the rest of the kernel.

This also removes the CONFIG_KCOV_INSTRUMENT_ALL preprocessor guard
introduced by d6e5794b06c0. That guard was added as a precaution because
KCOV instrumentation was also suspected of inhibiting constant folding.
However, KCOV uses -fsanitize-coverage=trace-pc, which inserts
lightweight trace callbacks that do not interfere with GCC's constant
folding or loop optimization passes. Only GCOV's -fprofile-arcs combined
with -fno-tree-loop-im actually prevents the compiler from evaluating
the loop at compile time. The guard is therefore unnecessary and can be
safely removed.

Fixes: 96ea3a1e2d31 ("can: add CAN skb extension infrastructure")
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Reviewed-by: Thomas Weissschuh <linux@weissschuh.net>
---
 net/core/skbuff.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 43ee86dcf2ea..59fb4b2bb821 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5142,7 +5142,7 @@ static const u8 skb_ext_type_len[] = {
 #endif
 };

-static __always_inline unsigned int skb_ext_total_length(void)
+static __always_inline __no_profile unsigned int skb_ext_total_length(void)
 {
 	unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext);
 	int i;
@@ -5156,9 +5156,7 @@ static __always_inline unsigned int skb_ext_total_length(void)
 static void skb_extensions_init(void)
 {
 	BUILD_BUG_ON(SKB_EXT_NUM > 8);
-#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL)
 	BUILD_BUG_ON(skb_ext_total_length() > 255);
-#endif

 	skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
 					     SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
-- 
2.43.5

^ permalink raw reply related

* [PATCH v3 2/2] net: add noinline __init __no_profile to skb_extensions_init() for GCOV compatibility
From: Konstantin Khorenko @ 2026-04-10 16:21 UTC (permalink / raw)
  To: David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, Thomas Weißschuh, Arnd Bergmann,
	Peter Oberparleiter, Mikhail Zaslonko, netdev, linux-kernel,
	Pavel Tikhomirov, Vasileios Almpanis, Konstantin Khorenko
In-Reply-To: <20260410162150.3105738-1-khorenko@virtuozzo.com>

With -fprofile-update=atomic in global CFLAGS_GCOV, GCC still cannot
constant-fold the skb_ext_total_length() loop when it is inlined into a
profiled caller.  The existing __no_profile on skb_ext_total_length()
itself is insufficient because after __always_inline expansion the code
resides in the caller's body, which still carries GCOV instrumentation.

Mark skb_extensions_init() with __no_profile so the BUILD_BUG_ON checks
can be evaluated at compile time.  Also mark it noinline to prevent the
compiler from inlining it into skb_init() (which lacks __no_profile),
which would re-expose the function body to GCOV instrumentation.

Add __init since skb_extensions_init() is only called from __init
skb_init().  Previously it was implicitly inlined into the .init.text
section; with noinline it would otherwise remain in permanent .text,
wasting memory after boot.

Build-tested with both CONFIG_GCOV_PROFILE_ALL=y and
CONFIG_KCOV_INSTRUMENT_ALL=y.

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 59fb4b2bb821..0978f526acf4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5153,7 +5153,7 @@ static __always_inline __no_profile unsigned int skb_ext_total_length(void)
 	return l;
 }

-static void skb_extensions_init(void)
+static noinline void __init __no_profile skb_extensions_init(void)
 {
 	BUILD_BUG_ON(SKB_EXT_NUM > 8);
 	BUILD_BUG_ON(skb_ext_total_length() > 255);
-- 
2.43.5

^ permalink raw reply related

* Re: [PATCH v2] net: hamradio: 6pack: fix uninit-value in sixpack_receive_buf
From: Simon Horman @ 2026-04-10 16:13 UTC (permalink / raw)
  To: Mashiro Chen
  Cc: netdev, davem, edumazet, kuba, pabeni, linux-hams, stable,
	syzbot+ecdb8c9878a81eb21e54
In-Reply-To: <20260407173101.107352-1-mashiro.chen@mailbox.org>

On Wed, Apr 08, 2026 at 01:31:01AM +0800, Mashiro Chen wrote:
> sixpack_receive_buf() does not properly skip bytes with TTY error flags.
> The while loop iterates through the flags buffer but never advances the
> data pointer (cp), and passes the original count (including error bytes)
> to sixpack_decode(). This causes sixpack_decode() to process bytes that
> should have been skipped due to TTY errors.  The TTY layer does not
> guarantee that cp[i] holds a meaningful value when fp[i] is set, so
> passing those positions to sixpack_decode() results in KMSAN reporting
> an uninit-value read.
> 
> Fix this by processing bytes one at a time, advancing cp on each
> iteration, and only passing valid (non-error) bytes to sixpack_decode().
> This matches the pattern used by slip_receive_buf() and
> mkiss_receive_buf() for the same purpose.
> 
> Reported-by: syzbot+ecdb8c9878a81eb21e54@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=ecdb8c9878a81eb21e54
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Mashiro Chen <mashiro.chen@mailbox.org>

Thanks for the updates.

Reviewed-by: Simon Horman <horms@kernel.org>

^ permalink raw reply

* Re: [PATCH net-next v6 10/14] netdevsim: convert to ndo_set_rx_mode_async
From: Breno Leitao @ 2026-04-10 16:11 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, edumazet, kuba, pabeni
In-Reply-To: <20260407153101.3694714-11-sdf@fomichev.me>

On Tue, Apr 07, 2026 at 08:30:57AM -0700, Stanislav Fomichev wrote:
> Convert netdevsim from ndo_set_rx_mode to ndo_set_rx_mode_async.
> The callback is a no-op stub so just update the signature and
> ops struct wiring.
> 
> Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>

Reviewed-by: Breno Leitao <leitao@debian.org>

^ permalink raw reply

* Re: [PATCH 3/3] net: rose: fix out-of-bounds read in rose_parse_ccitt()
From: Simon Horman @ 2026-04-10 16:10 UTC (permalink / raw)
  To: mashiro.chen
  Cc: 'Simon Horman', netdev, davem, edumazet, kuba, pabeni,
	gregkh, ben, linux-hams, linux-kernel, stable
In-Reply-To: <20260407171600.102988-4-mashiro.chen@mailbox.org>

From: 'Simon Horman' <horms@kernel.org>

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
Full review at: https://sashiko.dev
---
net: rose: fix out-of-bounds read in rose_parse_ccitt()

This commit adds bounds checking to rose_parse_ccitt() for 0xC0-class
facilities to prevent an out-of-bounds read when parsing call request
frames. It ensures the remaining buffer contains enough bytes before
accessing the facility data.

Cc: stable@vger.kernel.org

> diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
> index 4dbc437a9e229..a902ddeddc5bd 100644
> --- a/net/rose/rose_subr.c
> +++ b/net/rose/rose_subr.c
> @@ -370,6 +370,9 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
>  			if (l < 10 || l > 20)
>  				return -1;
>  
> +			if (len < 2 + l)
> +				return -1;
> +
>  			if (*p == FAC_CCITT_DEST_NSAP) {
>  				memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
>  				memcpy(callsign, p + 12,   l - 10);

Can asc2ax() read uninitialized stack memory here?

If a facility length of l = 17 is provided, memcpy() copies 7 bytes to the
callsign array, and the 8th byte is set to null, leaving the remaining
bytes uninitialized.

When asc2ax() processes this, it consumes the first 6 bytes in its loop.
The pointer then advances past the null byte, and the check if (*s != '\0')
will read the 9th byte, which contains uninitialized stack garbage.

Would zero-initializing the callsign array prevent this?

This regression wasn't introduced by this patch, but exists in the modified
function's loop termination logic:

net/rose/rose_subr.c:rose_parse_ccitt() {
    ...
    } while (*p != 0x00 && len > 0);
}

Due to short-circuit evaluation, if a facility consumes the exact remaining
bytes so that len becomes 0, will the loop dereference the pointer p before
checking if len > 0?

This would cause an out-of-bounds read past the valid facility data.

Should the condition be swapped to while (len > 0 && *p != 0x00)?

This regression wasn't introduced by this patch, but resides in the same
parsing sequence:

net/rose/rose_subr.c:rose_parse_facilities() {
    ...
    facilities_len = *p++;
    if (facilities_len == 0 || (unsigned int)facilities_len > packet_len)
    ...
}

Since packet_len represents the total remaining bytes including the 1-byte
length prefix itself, and the length byte has already been consumed, only
packet_len - 1 bytes actually remain for the facility data.

If an attacker crafts a packet where facilities_len == packet_len, will
this validation incorrectly pass?

The parsing loops will then expect packet_len bytes but only have
packet_len - 1 available, leading to an out-of-bounds read.

Could this check be updated to use facilities_len >= packet_len?

This regression wasn't introduced by this patch, but violates networking
subsystem safety rules regarding socket buffers.

Does the rose subsystem safely linearize socket buffers before dereferencing
packet headers?

Throughout the subsystem, including the parsing functions and routing logic,
packet offsets are accessed directly from skb->data based only on skb->len.

Since skb->len includes paged fragments, if a packet is fragmented such
that skb_headlen(skb) is smaller than the accessed offset, could
dereferencing skb->data trigger a page fault or read garbage memory?

Would adding pskb_may_pull() checks before accessing headers resolve this?

^ permalink raw reply

* Re: [PATCH 2/3] net: netrom: validate source address in nr_find_socket()
From: Simon Horman @ 2026-04-10 16:09 UTC (permalink / raw)
  To: mashiro.chen
  Cc: 'Simon Horman', netdev, davem, edumazet, kuba, pabeni,
	gregkh, ben, linux-hams, linux-kernel, stable
In-Reply-To: <20260407171600.102988-3-mashiro.chen@mailbox.org>

From: 'Simon Horman' <horms@kernel.org>

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
Full review at: https://sashiko.dev
---
net: netrom: validate source address in nr_find_socket()

This commit adds a source address parameter to nr_find_socket() to validate
incoming frames against the socket's dest_addr. This prevents unauthenticated
injection of crafted NR_INFO frames into existing connections.

> diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
> index b816c56124ab8..1da32c52a1931 100644
> --- a/net/netrom/af_netrom.c
> +++ b/net/netrom/af_netrom.c

[ ... ]

> @@ -219,7 +221,8 @@ static unsigned short nr_find_next_circuit(void)
>  		j = id % 256;
>  
>  		if (i != 0 && j != 0) {
> -			if ((sk=nr_find_socket(i, j)) == NULL)
> +			sk = nr_find_socket(i, j, NULL);
> +			if (!sk)
>  				break;
>  			sock_put(sk);
>  		}

This isn't a bug introduced by this patch, but does the loop in
nr_find_next_circuit() lack a termination condition?

If all 65,025 valid (i, j) circuit IDs are in use, nr_find_socket() will
always return a socket. The loop will increment id indefinitely, wrap around
65535, and never break, potentially triggering a CPU lockup in softirq
context.

Additionally, since the circuit ID is managed globally, is there a race
condition during its allocation? Multiple threads could concurrently execute
nr_find_next_circuit() and circuit++, leading to identical circuit IDs being
assigned to different connections.

[ ... ]

> @@ -923,7 +926,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
>  		if (frametype == NR_CONNREQ)
>  			sk = nr_find_peer(circuit_index, circuit_id, src);
>  		else
> -			sk = nr_find_socket(circuit_index, circuit_id);
> +			sk = nr_find_socket(circuit_index, circuit_id, src);
>  	}
>  
>  	if (sk != NULL) {

This isn't a bug introduced by this patch, but does nr_rx_frame() safely
access the socket buffer data?

It unconditionally accesses data up to skb->data[19], and for CONNREQ frames,
it copies 7 bytes from skb->data + 21. If the packet is shorter than 28 bytes,
could this cause an out-of-bounds read and leak adjacent kernel memory?

Furthermore, pskb_may_pull() is not called before these accesses.

Additionally, if an IP-over-NET/ROM packet is smaller than 20 bytes,
skb_pull() fails and is ignored, erroneously passing the unmodified packet
to the IP stack.

^ permalink raw reply

* Re: [PATCH 1/3] net: netrom: fix integer overflow in nr_queue_rx_frame()
From: Simon Horman @ 2026-04-10 16:04 UTC (permalink / raw)
  To: mashiro.chen
  Cc: 'Simon Horman', netdev, davem, edumazet, kuba, pabeni,
	gregkh, ben, linux-hams, linux-kernel, stable
In-Reply-To: <20260407171600.102988-2-mashiro.chen@mailbox.org>

From: 'Simon Horman' <horms@kernel.org>

This is an AI-generated review of your patch. The human sending this
email has considered the AI review valid, or at least plausible.
Full review at: https://sashiko.dev

Simon says: This review combines a review of the patch with a report
of an existing problem. And you will see a similar pattern in
the AI generated reviews I am forwarding for the other patches in this
series.

I would suggest not growing the scope of this series too much.
But rather noting (other) existing problems as areas for further work.

Frankly, I suspect fixing all such problems is a significant body of work.

---
    net: netrom: fix integer overflow in nr_queue_rx_frame()

This commit addresses an integer overflow in nr_queue_rx_frame() where
accumulating fragment lengths could wrap around a 16-bit integer. It attempts
to fix this by checking if adding the incoming skb length to the accumulated
fraglen exceeds USHRT_MAX, purging the queue and returning an error if so.

> diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
> index 97944db6b5ac6..0b7cdb99ae501 100644
> --- a/net/netrom/nr_in.c
> +++ b/net/netrom/nr_in.c
> @@ -36,12 +36,22 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
>  	nr_start_idletimer(sk);
>  
>  	if (more) {
> +		if ((unsigned int)nr->fraglen + skb->len > USHRT_MAX) {
> +			skb_queue_purge(&nr->frag_queue);
> +			nr->fraglen = 0;
> +			return 1;
> +		}

If we return 1 here, does the caller (nr_state3_machine) treat it as a
transient busy condition?

If the caller retries the skb later, the skb will have already been modified
by the unconditional skb_pull() at the start of nr_queue_rx_frame(). Does
this mean the next time it is processed, it will read from the wrong offset
and potentially process attacker-controlled payload?

Furthermore, since nr->fraglen is reset to 0, wouldn't the oversized fragment
now pass the USHRT_MAX check on retry, bypassing the overflow mitigation
entirely?

When we purge the fragment queue here, are we also silently dropping
fragments that the protocol stack has already acknowledged to the sender?

Since nr_queue_rx_frame() previously returned 0 for those earlier fragments,
the expected sequence number has already advanced. If the sender never
retransmits them and fraglen is reset, will the receiver accept the next
fragment as the start of a new sequence, leading to a truncated message being
delivered to the application?

[ ... ]

While reviewing this patch, I also noticed a few pre-existing issues in
nr_queue_rx_frame() that might be worth addressing.

Looking at the start of the function:

static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
{
	struct sk_buff *skbo, *skbn = skb;
	struct nr_sock *nr = nr_sk(sk);

	skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);

Is it possible for skb_pull() to be called on a malformed packet where the
total length is sufficient but the linear region is too small? If so, would
this trigger a BUG_ON() panic inside __skb_pull()? Should there be a length
validation check using pskb_may_pull() beforehand?

Further down, during fragment reassembly:

	while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
		skb_copy_from_linear_data(skbo,
					  skb_put(skbn, skbo->len),
					  skbo->len);
		kfree_skb(skbo);
	}

If skbo is a non-linear (paged) skb, skbo->len includes the paged data length.
Since skb_copy_from_linear_data() acts as a direct memory copy, does this
result in reading past the end of the linear allocation (skb_headlen()) into
adjacent kernel memory?

Finally, looking at the error paths:

	if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
		return 1;
	...
	return sock_queue_rcv_skb(sk, skbn);

If sock_queue_rcv_skb() fails, it returns a non-zero error which propagates
to the caller. Does the caller then try to re-enqueue the original fragment
pointer into a retry queue?

Since the original fragments were already dequeued from frag_queue and freed
via kfree_skb() in the loop above, could enqueuing the freed skb lead to a
use-after-free?

Additionally, if alloc_skb() fails and returns 1, does the caller enqueue the
original skb while it is still linked in frag_queue, potentially corrupting
the doubly-linked list?

^ permalink raw reply

* Re: [GIT PULL] bluetooth 2026-04-01
From: patchwork-bot+bluetooth @ 2026-04-10 16:06 UTC (permalink / raw)
  To: Luiz Augusto von Dentz; +Cc: davem, kuba, linux-bluetooth, netdev
In-Reply-To: <20260401205834.2189162-1-luiz.dentz@gmail.com>

Hello:

This pull request was applied to bluetooth/bluetooth-next.git (master)
by Jakub Kicinski <kuba@kernel.org>:

On Wed,  1 Apr 2026 16:58:34 -0400 you wrote:
> The following changes since commit 48b3cd69265f346f64b93064723492da46206e9b:
> 
>   net: stmmac: skip VLAN restore when VLAN hash ops are missing (2026-03-31 19:45:26 -0700)
> 
> are available in the Git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git tags/for-net-2026-04-01
> 
> [...]

Here is the summary with links:
  - [GIT,PULL] bluetooth 2026-04-01
    https://git.kernel.org/bluetooth/bluetooth-next/c/6d6be7070e90

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [GIT PULL] bluetooth 2026-03-25
From: patchwork-bot+bluetooth @ 2026-04-10 16:06 UTC (permalink / raw)
  To: Luiz Augusto von Dentz; +Cc: davem, kuba, linux-bluetooth, netdev
In-Reply-To: <20260325194358.618892-1-luiz.dentz@gmail.com>

Hello:

This pull request was applied to bluetooth/bluetooth-next.git (master)
by Paolo Abeni <pabeni@redhat.com>:

On Wed, 25 Mar 2026 15:43:58 -0400 you wrote:
> The following changes since commit c4ea7d8907cf72b259bf70bd8c2e791e1c4ff70f:
> 
>   net: mana: fix use-after-free in add_adev() error path (2026-03-24 21:07:58 -0700)
> 
> are available in the Git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git tags/for-net-2026-03-25
> 
> [...]

Here is the summary with links:
  - [GIT,PULL] bluetooth 2026-03-25
    https://git.kernel.org/bluetooth/bluetooth-next/c/aa637b2cf303

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [GIT PULL] bluetooth 2026-03-12
From: patchwork-bot+bluetooth @ 2026-04-10 16:06 UTC (permalink / raw)
  To: Luiz Augusto von Dentz; +Cc: davem, kuba, linux-bluetooth, netdev
In-Reply-To: <20260312200655.1215688-1-luiz.dentz@gmail.com>

Hello:

This pull request was applied to bluetooth/bluetooth-next.git (master)
by Jakub Kicinski <kuba@kernel.org>:

On Thu, 12 Mar 2026 16:06:54 -0400 you wrote:
> The following changes since commit c38b8f5f791ecce13ab77e2257f8fd2444ba80f6:
> 
>   net: prevent NULL deref in ip[6]tunnel_xmit() (2026-03-12 16:03:41 +0100)
> 
> are available in the Git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git tags/for-net-2026-03-12
> 
> [...]

Here is the summary with links:
  - [GIT,PULL] bluetooth 2026-03-12
    https://git.kernel.org/bluetooth/bluetooth-next/c/74c1e2737bd5

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [GIT PULL] bluetooth 2026-03-19
From: patchwork-bot+bluetooth @ 2026-04-10 16:06 UTC (permalink / raw)
  To: Luiz Augusto von Dentz; +Cc: davem, kuba, linux-bluetooth, netdev
In-Reply-To: <20260319190455.135302-1-luiz.dentz@gmail.com>

Hello:

This pull request was applied to bluetooth/bluetooth-next.git (master)
by Jakub Kicinski <kuba@kernel.org>:

On Thu, 19 Mar 2026 15:04:55 -0400 you wrote:
> The following changes since commit 7ab4a7c5d969642782b8a5b608da0dd02aa9f229:
> 
>   MPTCP: fix lock class name family in pm_nl_create_listen_socket (2026-03-19 09:37:48 -0700)
> 
> are available in the Git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git tags/for-net-2026-03-19
> 
> [...]

Here is the summary with links:
  - [GIT,PULL] bluetooth 2026-03-19
    https://git.kernel.org/bluetooth/bluetooth-next/c/57ce3b2e9cda

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net-next] net: fix reference tracker mismanagement in netdev_put_lock()
From: Daniel Borkmann @ 2026-04-10 16:04 UTC (permalink / raw)
  To: Jakub Kicinski, davem
  Cc: netdev, edumazet, pabeni, andrew+netdev, horms, dw, skhawaja,
	bestswngs, razor
In-Reply-To: <20260410153600.1984522-1-kuba@kernel.org>

On 4/10/26 5:36 PM, Jakub Kicinski wrote:
> dev_put() releases a reference which didn't have a tracker.
> References without a tracker are accounted in the tracking
> code as "no_tracker". We can't free the tracker and then
> call dev_put(). The references themselves will be fine
> but the tracking code will think it's a double-release:
> 
>    refcount_t: decrement hit 0; leaking memory.
> 
> IOW commit under fixes confused dev_put() (release never tracked
> reference) with __dev_put() (just release the reference, skipping
> the reference tracking infra).
> 
> Since __netdev_put_lock() uses dev_put() we can't feed a previously
> tracked netdev ref into it. Let's flip things around.
> netdev_put(dev, NULL) is the same as dev_put(dev) so make
> netdev_put_lock() the real function and have __netdev_put_lock()
> feed it a NULL tracker for all the cases that were untracked.
> 
> Fixes: d04686d9bc86 ("net: Implement netdev_nl_queue_create_doit")
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Thanks!

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* Re: [PATCH] udp: Force compute_score to always inline
From: Gabriel Krisman Bertazi @ 2026-04-10 16:01 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: davem, dsahern, edumazet, kuba, pabeni, kuniyu, horms, netdev
In-Reply-To: <willemdebruijn.kernel.242540011e53c@gmail.com>

Willem de Bruijn <willemdebruijn.kernel@gmail.com> writes:

> Gabriel Krisman Bertazi wrote:
>
>> Back in 2024 I reported a 7-12% regression on an iperf3 UDP loopback
>> thoughput test that we traced to the extra overhead of calling
>> compute_score on two places, introduced by commit f0ea27e7bfe1 ("udp:
>> re-score reuseport groups when connected sockets are present").  At the
>> time, I pointed out the overhead was caused by the multiple calls,
>> associated with cpu-specific mitigations, and merged commit
>> 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites") to
>> jump back explicitly, to force the rescore call in a single place.
>> 
>> Recently though, we got another regression report against a newer distro
>> version, which a team colleague traced back to the same root-cause.
>> Turns out that once we updated to gcc-13, the compiler got smart enough
>> to unroll the loop, undoing my previous mitigation.  Let's bite the
>> bullet and __always_inline compute_score on both ipv4 and ipv6 to
>> prevent gcc from de-optimizing it again in the future.  These functions
>> are only called in two places each, udpX_lib_lookup1 and
>> udpX_lib_lookup2, so the extra size shouldn't be a problem and it is hot
>> enough to be very visible in profilings.  In fact, with gcc13, forcing
>> the inline will prevent gcc from unrolling the fix from commit
>> 50aee97d1511, so we don't end up increasing udpX_lib_lookup2 at all.
>> 
>> I haven't recollected the results myself, as I don't have access to the
>> machine at the moment.  But the same colleague reported 4.67%
>> inprovement with this patch in the loopback benchmark, solving the
>> regression report within noise margins.
>> 
>> Fixes: 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites")
>> Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
>
> Spotted this a tad late: should the comment udp4_lib_lookup2 be
> updated: "compute_score is too long of a function to be inline .."

Thanks for noticing.  I send a v2 just with this fixed and adding
bloat-o-meter data to the commit message, but preserved your ack.
Please review the updated comment for the ack.

-- 
Gabriel Krisman Bertazi

^ permalink raw reply

* Re: [PATCH net-next] net: phy: call phy_init_hw() in phy resume path
From: Andrew Lunn @ 2026-04-10 16:00 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Biju, Heiner Kallweit, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Ovidiu Panait, netdev, linux-kernel,
	Geert Uytterhoeven, Prabhakar Mahadev Lad, linux-renesas-soc,
	Biju Das
In-Reply-To: <adkVr0mMzDsXile1@shell.armlinux.org.uk>

On Fri, Apr 10, 2026 at 04:22:23PM +0100, Russell King (Oracle) wrote:
> On Fri, Apr 10, 2026 at 05:15:21PM +0200, Andrew Lunn wrote:
> > > Apart from that, looks fine to me - it seems some paths call
> > > phy_init_hw() can be called with or without phydev->lock held, and
> > > this one will call it with the lock held which seems to be okay.
> > 
> > Haven't we had deadlocks in this area before?
> 
> If we have a problem calling phy_init_hw() with phydev->lock held, then:

I thought it was an AB-BA with RTNL?

Lets see what PROVE_LOCKING actually says. I could be remembering
wrongly.

	Andrew

^ permalink raw reply

* [PATCH v2] udp: Force compute_score to always inline
From: Gabriel Krisman Bertazi @ 2026-04-10 15:59 UTC (permalink / raw)
  To: willemdebruijn.kernel, davem, dsahern, edumazet, kuba, pabeni,
	kuniyu
  Cc: horms, netdev, Gabriel Krisman Bertazi, Willem de Bruijn

Back in 2024 I reported a 7-12% regression on an iperf3 UDP loopback
thoughput test that we traced to the extra overhead of calling
compute_score on two places, introduced by commit f0ea27e7bfe1 ("udp:
re-score reuseport groups when connected sockets are present").  At the
time, I pointed out the overhead was caused by the multiple calls,
associated with cpu-specific mitigations, and merged commit
50aee97d1511 ("udp: Avoid call to compute_score on multiple sites") to
jump back explicitly, to force the rescore call in a single place.

Recently though, we got another regression report against a newer distro
version, which a team colleague traced back to the same root-cause.
Turns out that once we updated to gcc-13, the compiler got smart enough
to unroll the loop, undoing my previous mitigation.  Let's bite the
bullet and __always_inline compute_score on both ipv4 and ipv6 to
prevent gcc from de-optimizing it again in the future.  These functions
are only called in two places each, udpX_lib_lookup1 and
udpX_lib_lookup2, so the extra size shouldn't be a problem and it is hot
enough to be very visible in profilings.  In fact, with gcc13, forcing
the inline will prevent gcc from unrolling the fix from commit
50aee97d1511, so we don't end up increasing udpX_lib_lookup2 at all.

I haven't recollected the results myself, as I don't have access to the
machine at the moment.  But the same colleague reported 4.67%
inprovement with this patch in the loopback benchmark, solving the
regression report within noise margins.

Eric Dumazet reported no size change to vmlinux when built with clang.
I report the same also with gcc-13:

scripts/bloat-o-meter vmlinux vmlinux-inline
add/remove: 0/2 grow/shrink: 4/0 up/down: 616/-416 (200)
Function                                     old     new   delta
udp6_lib_lookup2                             762     949    +187
__udp6_lib_lookup                            810     975    +165
udp4_lib_lookup2                             757     906    +149
__udp4_lib_lookup                            871     986    +115
__pfx_compute_score                           32       -     -32
compute_score                                384       -    -384
Total: Before=35011784, After=35011984, chg +0.00%

Fixes: 50aee97d1511 ("udp: Avoid call to compute_score on multiple sites")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>

---
v2:
  - update comment in udpX_lib_lookup2(Willem)
  - add bloat-o-meter information (Eric)
---
 net/ipv4/udp.c | 12 ++++++------
 net/ipv6/udp.c | 13 +++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6c6b68a66dcd..74b621b20e83 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -365,10 +365,10 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, hash2_nulladdr);
 }

-static int compute_score(struct sock *sk, const struct net *net,
-			 __be32 saddr, __be16 sport,
-			 __be32 daddr, unsigned short hnum,
-			 int dif, int sdif)
+static __always_inline int
+compute_score(struct sock *sk, const struct net *net,
+	      __be32 saddr, __be16 sport, __be32 daddr,
+	      unsigned short hnum, int dif, int sdif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -508,8 +508,8 @@ static struct sock *udp4_lib_lookup2(const struct net *net,
 				continue;

 			/* compute_score is too long of a function to be
-			 * inlined, and calling it again here yields
-			 * measurable overhead for some
+			 * inlined twice here, and calling it uninlined
+			 * here yields measurable overhead for some
 			 * workloads. Work around it by jumping
 			 * backwards to rescore 'result'.
 			 */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 010b909275dd..301649a63e8a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -127,10 +127,11 @@ void udp_v6_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash, new_hash4);
 }

-static int compute_score(struct sock *sk, const struct net *net,
-			 const struct in6_addr *saddr, __be16 sport,
-			 const struct in6_addr *daddr, unsigned short hnum,
-			 int dif, int sdif)
+static __always_inline int
+compute_score(struct sock *sk, const struct net *net,
+	      const struct in6_addr *saddr, __be16 sport,
+	      const struct in6_addr *daddr, unsigned short hnum,
+	      int dif, int sdif)
 {
 	int bound_dev_if, score;
 	struct inet_sock *inet;
@@ -260,8 +261,8 @@ static struct sock *udp6_lib_lookup2(const struct net *net,
 				continue;

 			/* compute_score is too long of a function to be
-			 * inlined, and calling it again here yields
-			 * measurable overhead for some
+			 * inlined twice here, and calling it uninlined
+			 * here yields measurable overhead for some
 			 * workloads. Work around it by jumping
 			 * backwards to rescore 'result'.
 			 */
-- 
2.52.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox