Netdev List

Netdev List
 help / color / mirror / Atom feed

* RE: [Intel-wired-lan] [PATCH iwl-next v2 2/3] igc: move autoneg-enabled settings into igc_handle_autoneg_enabled()
From: Loktionov, Aleksandr @ 2026-04-16  9:05 UTC (permalink / raw)
  To: KhaiWenTan, Nguyen, Anthony L, Kitszel, Przemyslaw,
	andrew+netdev@lunn.ch, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com
  Cc: intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, Abdul Rahim, Faizal, Looi, Hong Aun,
	Tan, Khai Wen, Faizal Rahim, Looi, Alan Chia Wei
In-Reply-To: <20260416015520.6090-3-khai.wen.tan@linux.intel.com>



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of KhaiWenTan
> Sent: Thursday, April 16, 2026 3:55 AM
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> Przemyslaw <przemyslaw.kitszel@intel.com>; andrew+netdev@lunn.ch;
> davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com
> Cc: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; Abdul Rahim, Faizal
> <faizal.abdul.rahim@intel.com>; Looi, Hong Aun
> <hong.aun.looi@intel.com>; Tan, Khai Wen <khai.wen.tan@intel.com>;
> Faizal Rahim <faizal.abdul.rahim@linux.intel.com>; Looi; KhaiWenTan
> <khai.wen.tan@linux.intel.com>
> Subject: [Intel-wired-lan] [PATCH iwl-next v2 2/3] igc: move autoneg-
> enabled settings into igc_handle_autoneg_enabled()
> 
> From: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
> 
> Move the advertised link modes and flow control configuration from
> igc_ethtool_set_link_ksettings() into igc_handle_autoneg_enabled().
> 
> No functional change.
> 
> Reviewed-by: Looi, Hong Aun <hong.aun.looi@intel.com>
> Signed-off-by: Faizal Rahim <faizal.abdul.rahim@linux.intel.com>
> Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
> ---
>  drivers/net/ethernet/intel/igc/igc_ethtool.c | 72 ++++++++++++-------
> -
>  1 file changed, 44 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c
> b/drivers/net/ethernet/intel/igc/igc_ethtool.c
> index 0122009bedd0..cfcbf2fdad6e 100644
> --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
> +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
> @@ -2000,6 +2000,49 @@ static int
> igc_ethtool_get_link_ksettings(struct net_device *netdev,
>  	return 0;
>  }
> 
> +/**
> + * igc_handle_autoneg_enabled - Configure autonegotiation
> advertisement
> + * @adapter: private driver structure
> + * @cmd: ethtool link ksettings from user
> + *
> + * Records advertised speeds and flow control settings when autoneg
> + * is enabled.
> + */
> +static void igc_handle_autoneg_enabled(struct igc_adapter *adapter,
> +				       const struct ethtool_link_ksettings
> *cmd) {
> +	struct igc_hw *hw = &adapter->hw;
> +	u16 advertised = 0;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  2500baseT_Full))
> +		advertised |= ADVERTISE_2500_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  1000baseT_Full))
> +		advertised |= ADVERTISE_1000_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  100baseT_Full))
> +		advertised |= ADVERTISE_100_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  100baseT_Half))
> +		advertised |= ADVERTISE_100_HALF;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  10baseT_Full))
> +		advertised |= ADVERTISE_10_FULL;
> +
> +	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> +						  10baseT_Half))
> +		advertised |= ADVERTISE_10_HALF;
> +
> +	hw->phy.autoneg_advertised = advertised;
> +	if (adapter->fc_autoneg)
> +		hw->fc.requested_mode = igc_fc_default; }
> +
>  static int
>  igc_ethtool_set_link_ksettings(struct net_device *netdev,
>  			       const struct ethtool_link_ksettings *cmd)
> @@ -2007,7 +2050,6 @@ igc_ethtool_set_link_ksettings(struct net_device
> *netdev,
>  	struct igc_adapter *adapter = netdev_priv(netdev);
>  	struct net_device *dev = adapter->netdev;
>  	struct igc_hw *hw = &adapter->hw;
> -	u16 advertised = 0;
> 
>  	/* When adapter in resetting mode, autoneg/speed/duplex
>  	 * cannot be changed
> @@ -2032,34 +2074,8 @@ igc_ethtool_set_link_ksettings(struct
> net_device *netdev,
>  	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
>  		usleep_range(1000, 2000);
> 
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  2500baseT_Full))
> -		advertised |= ADVERTISE_2500_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  1000baseT_Full))
> -		advertised |= ADVERTISE_1000_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  100baseT_Full))
> -		advertised |= ADVERTISE_100_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  100baseT_Half))
> -		advertised |= ADVERTISE_100_HALF;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  10baseT_Full))
> -		advertised |= ADVERTISE_10_FULL;
> -
> -	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
> -						  10baseT_Half))
> -		advertised |= ADVERTISE_10_HALF;
> -
>  	if (cmd->base.autoneg == AUTONEG_ENABLE) {
> -		hw->phy.autoneg_advertised = advertised;
> -		if (adapter->fc_autoneg)
> -			hw->fc.requested_mode = igc_fc_default;
> +		igc_handle_autoneg_enabled(adapter, cmd);
>  	} else {
>  		netdev_info(dev, "Force mode currently not
> supported\n");
>  	}
> --
> 2.43.0


Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>

^ permalink raw reply

* Re: [PATCH v3 net] ax25: fix OOB read after address header strip in ax25_rcv()
From: David Laight @ 2026-04-16  9:21 UTC (permalink / raw)
  To: Ashutosh Desai
  Cc: netdev, linux-hams, jreuter, davem, edumazet, kuba, pabeni, horms,
	stable, linux-kernel
In-Reply-To: <69e07601.c80a0220.2f9024.1e0b@mx.google.com>

On Wed, 15 Apr 2026 22:39:13 -0700 (PDT)
Ashutosh Desai <ashutoshdesai993@gmail.com> wrote:

> On Wed, 15 Apr 2026 08:59:21 +0100, David Laight wrote:
> > Is it just worth linearising the skb on entry to all this code?  
> 
> Thanks for the feedback, David.
> 
> skb_linearize() on entry is a nice idea for simplifying sanity checks
> overall, but it wouldn't fix this particular bug on its own - the issue
> is skb->len dropping to zero after skb_pull(), not non-linear data. We'd
> still need a length check regardless. pskb_may_pull(skb, 2) handles both
> in one call.

The skb->len >= 2 check will be a lot cheaper/smaller.

> That said, linearizing on entry to ax25_rcv() as a cleanup to simplify
> future checks sounds worthwhile - happy to send that as a separate
> net-next patch.

I think you proposed just checking skb->len in an earlier version
and it was pointed out that the skb may not be linear.
So perhaps linearize as part of this fix and leave the simplifcation
of any other checks to later.

	David

^ permalink raw reply

* RE: [Intel-wired-lan] [PATCH net] ixgbe: only access vfinfo and mv_list under RCU lock
From: Loktionov, Aleksandr @ 2026-04-16  9:23 UTC (permalink / raw)
  To: Vinschen, Corinna, intel-wired-lan@lists.osuosl.org,
	netdev@vger.kernel.org
  Cc: Vinschen, Corinna
In-Reply-To: <20260416084227.3787828-1-vinschen@redhat.com>



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> Of Corinna Vinschen
> Sent: Thursday, April 16, 2026 10:42 AM
> To: intel-wired-lan@lists.osuosl.org; netdev@vger.kernel.org
> Cc: Vinschen, Corinna <vinschen@redhat.com>
> Subject: [Intel-wired-lan] [PATCH net] ixgbe: only access vfinfo and
> mv_list under RCU lock
> 
> Commit 1e53834ce541d ("ixgbe: Add locking to prevent panic when
> setting
> sriov_numvfs to zero") added a spinlock to the adapter info.  The
> reason
> at the time was an observed crash when ixgbe_disable_sriov() freed the
> adapter->vfinfo array while the interrupt driven function
> ixgbe_msg_task()
> was handling VF messages.
> 
> Recent stability testing turned up another crash, which is very easily
> reproducible:
> 
>   while true
>   do
>     for numvfs in 5 0
>     do
>       echo $numvfs > /sys/class/net/eth0/device/sriov_numvfs
>     done
>   done
> 
> This crashed almost always within the first two hundred runs with
> a NULL pointer deref while running the ixgbe_service_task() workqueue:
> 
> [ 5052.036491] BUG: kernel NULL pointer dereference, address:
> 0000000000000258
> [ 5052.043454] #PF: supervisor read access in kernel mode
> [ 5052.048594] #PF: error_code(0x0000) - not-present page
> [ 5052.053734] PGD 0 P4D 0
> [ 5052.056272] Oops: Oops: 0000 #1 SMP NOPTI
> [ 5052.060459] CPU: 2 UID: 0 PID: 132253 Comm: kworker/u96:0 Kdump:
> loaded Not tainted 6.12.0-180.el10.x86_64 #1 PREEMPT(voluntary)
> [ 5052.072100] Hardware name: Dell Inc. PowerEdge R740/0DY2X0, BIOS
> 2.12.2 07/09/2021
> [ 5052.079664] Workqueue: ixgbe ixgbe_service_task [ixgbe]
> [ 5052.084907] RIP: 0010:ixgbe_update_stats+0x8b1/0xb40 [ixgbe]
> [ 5052.090585] Code: 21 56 50 49 8b b6 18 26 00 00 4c 01 fe 48 09 46
> 50 42 8d 34 a5 00 83 00 00 e8 cb 7a ff ff 49 8b b6 18 26 00 00 89 c0
> 4c 01 fe <48> 3b 86 88 00 00 00 73 18 48 b9 00 00 00 00 01 00 00 00 48
> 01 4e
> [ 5052.109331] RSP: 0018:ffffd5f1e8a6bd88 EFLAGS: 00010202
> [ 5052.114558] RAX: 0000000000000000 RBX: ffff8f49b22b14a0 RCX:
> 000000000000023c
> [ 5052.121689] RDX: ffffffff00000000 RSI: 00000000000001d0 RDI:
> ffff8f49b22b14a0
> [ 5052.128823] RBP: 000000000000109c R08: 0000000000000000 R09:
> 0000000000000000
> [ 5052.135955] R10: 0000000000000000 R11: 0000000000000000 R12:
> 0000000000000002
> [ 5052.143086] R13: 0000000000008410 R14: ffff8f49b22b01a0 R15:
> 00000000000001d0
> [ 5052.150221] FS:  0000000000000000(0000) GS:ffff8f58bfc80000(0000)
> knlGS:0000000000000000
> [ 5052.158307] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 5052.164054] CR2: 0000000000000258 CR3: 0000000bf2624006 CR4:
> 00000000007726f0
> [ 5052.171187] PKRU: 55555554
> [ 5052.173898] Call Trace:
> [ 5052.176351]  <TASK>
> [ 5052.178457]  ? show_trace_log_lvl+0x1b0/0x2f0
> [ 5052.182816]  ? show_trace_log_lvl+0x1b0/0x2f0
> [ 5052.187177]  ? ixgbe_watchdog_subtask+0x1a1/0x230 [ixgbe]
> [ 5052.192591]  ? __die_body.cold+0x8/0x12
> [ 5052.196433]  ? page_fault_oops+0x148/0x160
> [ 5052.200532]  ? exc_page_fault+0x7f/0x150
> [ 5052.204458]  ? asm_exc_page_fault+0x26/0x30
> [ 5052.208643]  ? ixgbe_update_stats+0x8b1/0xb40 [ixgbe]
> [ 5052.213714]  ? ixgbe_update_stats+0x8a5/0xb40 [ixgbe]
> [ 5052.218784]  ixgbe_watchdog_subtask+0x1a1/0x230 [ixgbe]
> [ 5052.224026]  ixgbe_service_task+0x15a/0x3f0 [ixgbe]
> [ 5052.228916]  process_one_work+0x177/0x330
> [ 5052.232928]  worker_thread+0x256/0x3a0
> [ 5052.236681]  ? __pfx_worker_thread+0x10/0x10
> [ 5052.240952]  kthread+0xfa/0x240
> [ 5052.244099]  ? __pfx_kthread+0x10/0x10
> [ 5052.247852]  ret_from_fork+0x34/0x50
> [ 5052.251429]  ? __pfx_kthread+0x10/0x10
> [ 5052.255185]  ret_from_fork_asm+0x1a/0x30
> [ 5052.259112]  </TASK>
> 
> The first simple patch, just adding spinlocking to
> ixgbe_update_stats()
> while reading from adapter->vfinfo, did not fix the problem, it just
> moved it elsewhere: I could now reproduce the same kind of crash in
> ixgbe_restore_vf_multicasts().
> 
> But adding more spinlocking doesn't really cut it.  One reason is that
> ixgbe_restore_vf_multicasts() is called from within ixgbe_msg_task()
> with active spinlock, as well as from outside without locking.
> 
> Additionally, given that ixgbe_disable_sriov() is the only call
> changing
> adapter->vfinfo, and given ixgbe_disable_sriov() is called very
> seldom compared to other actions in the driver, just adding more
> spinlocks would unnecessarily occupy the driver with spinning when
> multiple functions accessing adapter->vfinfo are running in parallel.
> 
> So this patch drops the spinlock in favor of RCU and uses it
> throughout
> the driver.
> 
> While changing this, it seems prudent to do the same for the
> adapter->mv_list array, which is allocated and freed at the same time
> as
> adapter->vfinfo, albeit there was no crash observed.
> 
> Fixes: 1e53834ce541d ("ixgbe: Add locking to prevent panic when
> setting sriov_numvfs to zero")
> Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h      |   7 +-
>  .../net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c   |  36 +-
>  .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c  |  44 +-
>  .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c    |  17 +-
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 229 +++++---
>  .../net/ethernet/intel/ixgbe/ixgbe_sriov.c    | 547 ++++++++++++-----
> -
>  6 files changed, 593 insertions(+), 287 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index 9b8217523fd2..8849b9f42bf6 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -210,6 +210,7 @@ struct vf_stats {
>  };
> 
>  struct vf_data_storage {
> +	struct rcu_head rcu_head;
>  	struct pci_dev *vfdev;
>  	unsigned char vf_mac_addresses[ETH_ALEN];
>  	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
> @@ -240,6 +241,7 @@ enum ixgbevf_xcast_modes {
>  };
> 
>  struct vf_macvlans {
> +	struct rcu_head rcu_head;
>  	struct list_head l;
>  	int vf;
>  	bool free;
> @@ -808,10 +810,10 @@ struct ixgbe_adapter {
>  	/* SR-IOV */
>  	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
>  	unsigned int num_vfs;

...

>  		if (!vfdev)
>  			continue;
>  		pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
> @@ -9744,17 +9781,23 @@ static int ixgbe_ndo_get_vf_stats(struct
> net_device *netdev, int vf,
>  				  struct ifla_vf_stats *vf_stats)
>  {
>  	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
> +	struct vf_data_storage *vfinfo;
> 
>  	if (vf < 0 || vf >= adapter->num_vfs)
>  		return -EINVAL;
> 
> -	vf_stats->rx_packets = adapter->vfinfo[vf].vfstats.gprc;
> -	vf_stats->rx_bytes   = adapter->vfinfo[vf].vfstats.gorc;
> -	vf_stats->tx_packets = adapter->vfinfo[vf].vfstats.gptc;
> -	vf_stats->tx_bytes   = adapter->vfinfo[vf].vfstats.gotc;
> -	vf_stats->multicast  = adapter->vfinfo[vf].vfstats.mprc;
> +	rcu_read_lock();
> +	vfinfo = rcu_dereference(adapter->vfinfo);
> +	if (vfinfo) {
> +		vf_stats->rx_packets = vfinfo[vf].vfstats.gprc;
> +		vf_stats->rx_bytes   = vfinfo[vf].vfstats.gorc;
> +		vf_stats->tx_packets = vfinfo[vf].vfstats.gptc;
> +		vf_stats->tx_bytes   = vfinfo[vf].vfstats.gotc;
> +		vf_stats->multicast  = vfinfo[vf].vfstats.mprc;
> +	}
> +	rcu_read_unlock();
> 
> -	return 0;
> +	return vfinfo ? 0 : -EINVAL;
Before it returned always success, but now it will break 'ip link show dev' in short window when SR-IOV is being torn down.
For me it looks like UAPI regression.

>  }
> 
>  #ifdef CONFIG_IXGBE_DCB
> @@ -10071,20 +10114,26 @@ static int handle_redirect_action(struct
> ixgbe_adapter *adapter, int ifindex,
>  {
>  	struct ixgbe_ring_feature *vmdq = &adapter-
> >ring_feature[RING_F_VMDQ];
>  	unsigned int num_vfs = adapter->num_vfs, vf;

...

>  	return 0;
>  }
> --
> 2.53.0


^ permalink raw reply

* Re: [net,PATCH v3 1/2] net: ks8851: Reinstate disabling of BHs around IRQ handler
From: Marek Vasut @ 2026-04-16  9:26 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: netdev, stable, David S. Miller, Andrew Lunn, Eric Dumazet,
	Jakub Kicinski, Nicolai Buchwitz, Paolo Abeni, Ronald Wahl,
	Yicong Hui, linux-kernel
In-Reply-To: <20260416062159.fPxqc52X@linutronix.de>

On 4/16/26 8:21 AM, Sebastian Andrzej Siewior wrote:
> On 2026-04-16 01:14:35 [+0200], Marek Vasut wrote:
>>> spin_unlock_bh(&ks->statelock)? After that unlock, the softirq must be
>>> processed and __netdev_alloc_skb() _could_ observe pending softirqs but
>>> not from ks8851.
>> Because __netdev_alloc_skb() also enables/disables BH , see the "else"
> 
> Yes. But there is no softirq raised in that part. That softirq is raised
> by netif_wake_queue() within a bh disabled section. Therefore upon the
> unlock the softirq must be invoked.
> After that, rhe allocation later on may invoke softirqs which were
> raised but I don't see how ks8851 can be part of it.
> Before commit 0913ec336a6c0 ("net: ks8851: Fix deadlock with the SPI
> chip variant") there was no _bh around it meaning the softirq was raised
> but not invoked immediately. This happened on the bh unlock during
> memory allocation. Therefore I am saying this backtrace is from an older
> kernel.

I actually did update the backtrace in V3 with the one from next 
20260413 that contained b44596ffe1b4 ("ARM: Allow to enable RT") from 
stable-rt/v6.12-rt-rebase branch [1] .

I think I misunderstood the usage of "softirq is raised" vs. "softirq is 
invoked" above . Is it possible that there was an already raised softirq 
before the threaded IRQ handler was invoked, and __netdev_alloc_skb() is 
what invoked that softirq ?

> If there is a flaw in my the theory please explain _how_ you managed
> that get that backtrace. I am sure it must have from an older kernel and
> _now_ this lockup also happens on !RT kernels (except for the SPI
> platform).
I used [1] , with PREEMPT_RT enabled , on stm32mp157c SoC . I ran iperf3 
-s on the stm32 side, iperf3 -c 192.168.1.2 -t 0 --bidir on the hostpc 
side. The backtrace happened shortly after.

^ permalink raw reply

* Re: [PATCH] net/sched: act_mirred: fix wrong device for mac_header_xmit check in tcf_blockcast_redir
From: patchwork-bot+netdevbpf @ 2026-04-16  9:40 UTC (permalink / raw)
  To: Dudu Lu; +Cc: netdev, jhs, jiri
In-Reply-To: <20260413084927.71353-1-phx0fer@gmail.com>

Hello:

This patch was applied to netdev/net.git (main)
by Paolo Abeni <pabeni@redhat.com>:

On Mon, 13 Apr 2026 16:49:27 +0800 you wrote:
> In tcf_blockcast_redir(), when iterating block ports to redirect
> packets to multiple devices, the mac_header_xmit flag is queried
> from the wrong device. The loop sends to dev_prev but queries
> dev_is_mac_header_xmit(dev) — which is the NEXT device in the
> iteration, not the one being sent to.
> 
> This causes tcf_mirred_to_dev() to make incorrect decisions about
> whether to push or pull the MAC header. When the block contains
> mixed device types (e.g., an ethernet veth and a tunnel device),
> intermediate devices get the wrong mac_header_xmit flag, leading to
> skb header corruption. In the worst case, skb_push_rcsum with an
> incorrect mac_len can exhaust headroom and panic.
> 
> [...]

Here is the summary with links:
  - net/sched: act_mirred: fix wrong device for mac_header_xmit check in tcf_blockcast_redir
    https://git.kernel.org/netdev/net/c/4510d140524c

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH] net: dsa: sja1105: fix division by zero in sja1105_tas_set_runtime_params()
From: Paolo Abeni @ 2026-04-16 10:09 UTC (permalink / raw)
  To: Alexander.Chesnokov, olteanv
  Cc: lvc-project, Oleg.Kazakov, Pavel.Zhigulin, stable, Andrew Lunn,
	Florian Fainelli, David S. Miller, Eric Dumazet, Jakub Kicinski,
	linux-kernel, netdev
In-Reply-To: <20260413085140.33138-1-Alexander.Chesnokov@kaspersky.com>

On 4/13/26 10:51 AM, Alexander.Chesnokov@kaspersky.com wrote:
> From: Alexander Chesnokov <Alexander.Chesnokov@kaspersky.com>
> 
> If taprio offload is configured such that none of the ports' base_time
> is less than S64_MAX (the initial value of earliest_base_time), then
> its_cycle_time remains zero and is passed to future_base_time() as
> cycle_time, causing division by zero in div_s64().
> 
> Add a check for its_cycle_time being zero before calling
> future_base_time() and return -EINVAL.
> 
> Found by Linux Verification Center (linuxtesting.org) with SVACE.
> 
> Fixes: 86db36a347b4 ("net: dsa: sja1105: Implement state machine for TAS with PTP clock source")
> Cc: stable@vger.kernel.org
> 

No empty lines in the tag area.

> Signed-off-by: Alexander Chesnokov <Alexander.Chesnokov@kaspersky.com>
> ---
>  drivers/net/dsa/sja1105/sja1105_tas.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c
> index e6153848a950..ce4b544a2b9c 100644
> --- a/drivers/net/dsa/sja1105/sja1105_tas.c
> +++ b/drivers/net/dsa/sja1105/sja1105_tas.c
> @@ -62,6 +62,9 @@ static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
>  	if (!tas_data->enabled)
>  		return 0;
>  
> +	if (!its_cycle_time)
> +		return -EINVAL;

Sashiko says:

Is this division by zero reachable without this check?
When all ports have base_time == S64_MAX, earliest_base_time and
latest_base_time are both S64_MAX. When future_base_time(S64_MAX, 0,
S64_MAX) is called, it returns early because base_time >= now (S64_MAX
>= S64_MAX), avoiding the division.
Could this new error path cause an actual division by zero later?
When returning -EINVAL here, tas_data->enabled is already set to true,
but tas_data->max_cycle_time is left uninitialized (0).
If sja1105_tas_state_machine() runs later, it will pass this
max_cycle_time as the cycle_time argument to future_base_time(). Since 0
>= now + 1s is false, it proceeds to call div_s64() with a zero divisor.

/P


^ permalink raw reply

* Re: [PATCH] macvlan: fix macvlan_get_size() not reserving space for IFLA_MACVLAN_BC_CUTOFF
From: patchwork-bot+netdevbpf @ 2026-04-16 10:20 UTC (permalink / raw)
  To: Dudu Lu; +Cc: netdev, andrew+netdev, davem, edumazet, kuba, pabeni
In-Reply-To: <20260413085349.73977-1-phx0fer@gmail.com>

Hello:

This patch was applied to netdev/net.git (main)
by Paolo Abeni <pabeni@redhat.com>:

On Mon, 13 Apr 2026 16:53:49 +0800 you wrote:
> macvlan_get_size() does not account for IFLA_MACVLAN_BC_CUTOFF, but
> macvlan_fill_info() conditionally includes it when port->bc_cutoff != 1.
> This causes nla_put_s32() to fail with -EMSGSIZE when the netlink skb
> runs out of space, triggering a WARN_ON in rtnetlink and preventing the
> interface from being dumped.
> 
> The bug can be reproduced with:
> 
> [...]

Here is the summary with links:
  - macvlan: fix macvlan_get_size() not reserving space for IFLA_MACVLAN_BC_CUTOFF
    https://git.kernel.org/netdev/net/c/fa92a77b0ed4

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net 00/14] Netfilter/IPVS fixes for net
From: Pablo Neira Ayuso @ 2026-04-16 10:20 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, horms
In-Reply-To: <aeCPB1_WaFOX-Xos@chamomile>

Hi,

On Thu, Apr 16, 2026 at 09:25:59AM +0200, Pablo Neira Ayuso wrote:
> Hi,
> 
> I am preparing a v2 to address so AI generated comment, I should be
> ready in a few hours.

Just a quick follow up.

I cannot send a batch before 16h my local time, I need a bit more
time.

Sorry.

> Thanks.
> 
> On Thu, Apr 16, 2026 at 03:30:47AM +0200, Pablo Neira Ayuso wrote:
> > Hi,
> > 
> > The following patchset contains Netfilter/IPVS fixes for net: Mostly
> > addressing very old bugs in the SIP conntrack helper string parser,
> > unsafe arp_tables match support with legacy IEEE1394, restrict xt_realm
> > to IPv4 and incorrect use of RCU lists in nat core and nftables. This
> > batch also includes one IPVS MTU fix. The exception is a fix for a
> > recent issue related to broken double-tagged vlan in the flowtable.
> > 
> > 1) Fix possible stack recursion in nft_fwd_netdev from egress path,
> >    from Weiming Shi.
> > 
> > 2) Fix unsafe port parser in SIP helper, from Jenny Guanni Qu.
> > 
> > 3) Fix arp_tables match with IEEE1394 ARP payload, allowing to
> >    reach bytes off the skb boundary, from Weiming Shi.
> > 
> > 4) Reject unsafe nfnetlink_osf configurations from control plane,
> >    this is addressing a possible division by zero, from Xiang Mei.
> > 
> > 5) nft_osf actually only supports IPv4, restrict it.
> > 
> > 6) Fix double-tagged-vlan support (again) in the flowtable, from
> >    Eric Woudstra.
> > 
> > 7) Remove unsafe use of sprintf to fix possible buffer overflow
> >    in the SIP NAT helper, from Florian Westphal.
> > 
> > 8) Restrict xt_mac, xt_owner and xt_physdev to inet families only;
> >    xt_realm is only for ipv4, otherwise null-pointer-deref is possible.
> > 
> > 9) Use kfree_rcu() in nat core to release hooks, this can be an issue
> >    once nfnetlink_hook gets support to dump NAT hook information,
> >    not currently a real issue but better fix it now.
> > 
> > 10) Fix MTU checks in IPVS, from Yingnan Zhang.
> > 
> > 11) Use list_del_rcu() in chain and flowtable hook unregistration,
> >     concurrent RCU reader could be walking over the hook list,
> >     from Florian Westphal.
> > 
> > 12) Add list_splice_rcu(), this is required to fix unsafe
> >     splice to RCU protected hook list. Reviewed by Paul McKenney.
> > 
> > 13) Use list_splice_rcu() to splice new chain and flowtable hooks.
> > 
> > 14) Add shim nft_trans_hook object to track chain and flowtable
> >     hook deletions and flag them as removed, instead of unsafely
> >     moving around hooks in the RCU-protected hook list. This allows
> >     to restore the previous state from the abort path.
> > 
> > Please, pull these changes from:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git nf-26-04-16
> > 
> > Thanks.
> > 
> > ----------------------------------------------------------------
> > 
> > The following changes since commit 2dddb34dd0d07b01fa770eca89480a4da4f13153:
> > 
> >   net: ethernet: mtk_eth_soc: initialize PPE per-tag-layer MTU registers (2026-04-12 15:22:58 -0700)
> > 
> > are available in the Git repository at:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git tags/nf-26-04-16
> > 
> > for you to fetch changes up to e349f90da812aeddd22c3914a2cc639b51e4eb48:
> > 
> >   netfilter: nf_tables: add hook transactions for device deletions (2026-04-16 02:47:58 +0200)
> > 
> > ----------------------------------------------------------------
> > netfilter pull request 26-04-16
> > 
> > ----------------------------------------------------------------
> > Eric Woudstra (1):
> >       netfilter: nf_flow_table_ip: Introduce nf_flow_vlan_push()
> > 
> > Florian Westphal (2):
> >       netfilter: conntrack: remove sprintf usage
> >       netfilter: nf_tables: use list_del_rcu for netlink hooks
> > 
> > Jenny Guanni Qu (1):
> >       netfilter: nf_conntrack_sip: add bounds-checked port parsing helper
> > 
> > Pablo Neira Ayuso (6):
> >       netfilter: nft_osf: restrict it to ipv4
> >       netfilter: xtables: restrict several matches to inet family
> >       netfilter: nat: use kfree_rcu to release ops
> >       rculist: add list_splice_rcu() for private lists
> >       netfilter: nf_tables: join hook list via splice_list_rcu() in commit phase
> >       netfilter: nf_tables: add hook transactions for device deletions
> > 
> > Weiming Shi (2):
> >       netfilter: nft_fwd_netdev: use recursion counter in neigh egress path
> >       netfilter: arp_tables: fix IEEE1394 ARP payload parsing in arp_packet_match()
> > 
> > Xiang Mei (1):
> >       netfilter: nfnetlink_osf: fix divide-by-zero in OSF_WSS_MODULO
> > 
> > Yingnan Zhang (1):
> >       ipvs: fix MTU check for GSO packets in tunnel mode
> > 
> >  include/linux/rculist.h               |  29 ++++++
> >  include/net/netfilter/nf_dup_netdev.h |  13 +++
> >  include/net/netfilter/nf_tables.h     |  13 +++
> >  net/ipv4/netfilter/arp_tables.c       |  14 ++-
> >  net/ipv4/netfilter/iptable_nat.c      |   2 +-
> >  net/ipv6/netfilter/ip6table_nat.c     |   2 +-
> >  net/netfilter/ipvs/ip_vs_xmit.c       |  19 +++-
> >  net/netfilter/nf_conntrack_sip.c      |  80 +++++++++++-----
> >  net/netfilter/nf_dup_netdev.c         |  16 ----
> >  net/netfilter/nf_flow_table_ip.c      |  25 ++++-
> >  net/netfilter/nf_nat_amanda.c         |   2 +-
> >  net/netfilter/nf_nat_core.c           |  10 +-
> >  net/netfilter/nf_nat_sip.c            |  33 ++++---
> >  net/netfilter/nf_tables_api.c         | 168 ++++++++++++++++++++++++----------
> >  net/netfilter/nfnetlink_osf.c         |   4 +
> >  net/netfilter/nft_fwd_netdev.c        |   7 ++
> >  net/netfilter/nft_osf.c               |   6 +-
> >  net/netfilter/xt_mac.c                |  34 ++++---
> >  net/netfilter/xt_owner.c              |  37 +++++---
> >  net/netfilter/xt_physdev.c            |  29 ++++--
> >  net/netfilter/xt_realm.c              |   2 +-
> >  21 files changed, 393 insertions(+), 152 deletions(-)
> > 
> 

^ permalink raw reply

* Re: [patch 18/38] lib/tests: Replace get_cycles() with ktime_get()
From: Geert Uytterhoeven @ 2026-04-16 10:24 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Andrew Morton, Uladzislau Rezki, linux-mm, Arnd Bergmann,
	x86, Lu Baolu, iommu, Michael Grzeschik, netdev, linux-wireless,
	Herbert Xu, linux-crypto, Vlastimil Babka, David Woodhouse,
	Bernie Thompson, linux-fbdev, Theodore Tso, linux-ext4,
	Marco Elver, Dmitry Vyukov, kasan-dev, Andrey Ryabinin,
	Thomas Sailer, linux-hams, Jason A. Donenfeld, Richard Henderson,
	linux-alpha, Russell King, linux-arm-kernel, Catalin Marinas,
	Huacai Chen, loongarch, linux-m68k, Dinh Nguyen, Jonas Bonn,
	linux-openrisc, Helge Deller, linux-parisc, Michael Ellerman,
	linuxppc-dev, Paul Walmsley, linux-riscv, Heiko Carstens,
	linux-s390, David S. Miller, sparclinux
In-Reply-To: <20260410120318.794680738@kernel.org>

Hi Thomas,

On Fri, 10 Apr 2026 at 14:20, Thomas Gleixner <tglx@kernel.org> wrote:
> get_cycles() is the historical access to a fine grained time source, but it
> is a suboptimal choice for two reasons:
>
>    - get_cycles() is not guaranteed to be supported and functional on all
>      systems/platforms. If not supported or not functional it returns 0,
>      which makes benchmarking moot.
>
>    - get_cycles() returns the raw counter value of whatever the
>      architecture platform provides. The original x86 Time Stamp Counter
>      (TSC) was despite its name tied to the actual CPU core frequency.
>      That's not longer the case. So the counter value is only meaningful
>      when the CPU operates at the same frequency as the TSC or the value is
>      adjusted to the actual CPU frequency. Other architectures and
>      platforms provide similar disjunct counters via get_cycles(), so the
>      result is operations per BOGO-cycles, which is not really meaningful.
>
> Use ktime_get() instead which provides nanosecond timestamps with the
> granularity of the underlying hardware counter, which is not different to
> the variety of get_cycles() implementations.
>
> This provides at least understandable metrics, i.e. operations/nanoseconds,
> and is available on all platforms. As with get_cycles() the result might
> have to be put into relation with the CPU operating frequency, but that's
> not any different.
>
> This is part of a larger effort to remove get_cycles() usage from
> non-architecture code.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>

Thanks for your patch!

> --- a/lib/interval_tree_test.c
> +++ b/lib/interval_tree_test.c
> @@ -65,13 +65,13 @@ static void init(void)
>  static int basic_check(void)
>  {
>         int i, j;
> -       cycles_t time1, time2, time;
> +       ktime_t time1, time2, time;
>
>         printk(KERN_ALERT "interval tree insert/remove");
>
>         init();
>
> -       time1 = get_cycles();
> +       time1 = ktime_get();
>
>         for (i = 0; i < perf_loops; i++) {
>                 for (j = 0; j < nnodes; j++)
> @@ -80,11 +80,11 @@ static int basic_check(void)
>                         interval_tree_remove(nodes + j, &root);
>         }
>
> -       time2 = get_cycles();
> +       time2 = ktime_get();
>         time = time2 - time1;
>
>         time = div_u64(time, perf_loops);
> -       printk(" -> %llu cycles\n", (unsigned long long)time);
> +       printk(" -> %llu nsecs\n", (unsigned long long)time);

While cycles_t was unsigned long or long long, ktime_t is always s64,
so "%lld", and the cast can be dropped (everywhere).

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* [PATCH net] slip: bound decode() reads against the compressed packet length
From: Weiming Shi @ 2026-04-16 10:01 UTC (permalink / raw)
  To: Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni
  Cc: Simon Horman, netdev, Weiming Shi

slhc_uncompress() parses a VJ-compressed TCP header by advancing a
pointer through the packet via decode() and pull16(). Neither helper
bounds-checks against isize, and decode() masks its return with
& 0xffff so it can never return the -1 that callers test for -- those
error paths are dead code.

A short compressed frame whose change byte requests optional fields
lets decode() read past the end of the packet. The over-read bytes
are folded into the cached cstate and reflected into subsequent
reconstructed packets.

Make decode() and pull16() take the packet end pointer and return -1
when exhausted. Add a bounds check before the TCP-checksum read.
The existing == -1 tests now do what they were always meant to.

Fixes: b5451d783ade ("slip: Move the SLIP drivers")
Reported-by: Simon Horman <horms@kernel.org>
Closes: https://lore.kernel.org/netdev/20260414134126.758795-2-horms@kernel.org/
Signed-off-by: Weiming Shi <bestswngs@gmail.com>
---
 drivers/net/slip/slhc.c | 43 ++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index e3c785da3eef3..d1c0523085d3a 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -80,9 +80,9 @@
 #include <linux/unaligned.h>
 
 static unsigned char *encode(unsigned char *cp, unsigned short n);
-static long decode(unsigned char **cpp);
+static long decode(unsigned char **cpp, const unsigned char *end);
 static unsigned char * put16(unsigned char *cp, unsigned short x);
-static unsigned short pull16(unsigned char **cpp);
+static long pull16(unsigned char **cpp, const unsigned char *end);
 
 /* Allocate compression data structure
  *	slots must be in range 0 to 255 (zero meaning no compression)
@@ -190,30 +190,34 @@ encode(unsigned char *cp, unsigned short n)
 	return cp;
 }
 
-/* Pull a 16-bit integer in host order from buffer in network byte order */
-static unsigned short
-pull16(unsigned char **cpp)
+/* Pull a 16-bit integer in host order from buffer in network byte order.
+ * Returns -1 if the buffer is exhausted, otherwise the 16-bit value.
+ */
+static long
+pull16(unsigned char **cpp, const unsigned char *end)
 {
-	short rval;
+	long rval;
 
+	if (*cpp + 2 > end)
+		return -1;
 	rval = *(*cpp)++;
 	rval <<= 8;
 	rval |= *(*cpp)++;
 	return rval;
 }
 
-/* Decode a number */
+/* Decode a number. Returns -1 if the buffer is exhausted. */
 static long
-decode(unsigned char **cpp)
+decode(unsigned char **cpp, const unsigned char *end)
 {
 	int x;
 
+	if (*cpp >= end)
+		return -1;
 	x = *(*cpp)++;
-	if(x == 0){
-		return pull16(cpp) & 0xffff;	/* pull16 returns -1 on error */
-	} else {
-		return x & 0xff;		/* -1 if PULLCHAR returned error */
-	}
+	if (x == 0)
+		return pull16(cpp, end);
+	return x & 0xff;
 }
 
 /*
@@ -499,6 +503,7 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
 	struct cstate *cs;
 	int len, hdrlen;
 	unsigned char *cp = icp;
+	const unsigned char *end = icp + isize;
 
 	/* We've got a compressed packet; read the change byte */
 	comp->sls_i_compressed++;
@@ -534,6 +539,8 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
 	thp = &cs->cs_tcp;
 	ip = &cs->cs_ip;
 
+	if (cp + 2 > end)
+		goto bad;
 	thp->check = *(__sum16 *)cp;
 	cp += 2;
 
@@ -564,26 +571,26 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
 	default:
 		if(changes & NEW_U){
 			thp->urg = 1;
-			if((x = decode(&cp)) == -1) {
+			if((x = decode(&cp, end)) == -1) {
 				goto bad;
 			}
 			thp->urg_ptr = htons(x);
 		} else
 			thp->urg = 0;
 		if(changes & NEW_W){
-			if((x = decode(&cp)) == -1) {
+			if((x = decode(&cp, end)) == -1) {
 				goto bad;
 			}
 			thp->window = htons( ntohs(thp->window) + x);
 		}
 		if(changes & NEW_A){
-			if((x = decode(&cp)) == -1) {
+			if((x = decode(&cp, end)) == -1) {
 				goto bad;
 			}
 			thp->ack_seq = htonl( ntohl(thp->ack_seq) + x);
 		}
 		if(changes & NEW_S){
-			if((x = decode(&cp)) == -1) {
+			if((x = decode(&cp, end)) == -1) {
 				goto bad;
 			}
 			thp->seq = htonl( ntohl(thp->seq) + x);
@@ -591,7 +598,7 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
 		break;
 	}
 	if(changes & NEW_I){
-		if((x = decode(&cp)) == -1) {
+		if((x = decode(&cp, end)) == -1) {
 			goto bad;
 		}
 		ip->id = htons (ntohs (ip->id) + x);
-- 
2.43.0


^ permalink raw reply related

* [PATCH net v2] net: airoha: Fix possible TX queue stall in airoha_qdma_tx_napi_poll()
From: Lorenzo Bianconi @ 2026-04-16 10:30 UTC (permalink / raw)
  To: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Lorenzo Bianconi
  Cc: linux-arm-kernel, linux-mediatek, netdev

Since multiple net_device TX queues can share the same hw QDMA TX queue,
there is no guarantee we have inflight packets queued in hw belonging to a
net_device TX queue stopped in the xmit path because hw QDMA TX queue
can be full. In this corner case the net_device TX queue will never be
re-activated. In order to avoid any potential net_device TX queue stall,
we need to wake all the net_device TX queues feeding the same hw QDMA TX
queue in airoha_qdma_tx_napi_poll routine.

Fixes: 23020f0493270 ("net: airoha: Introduce ethernet support for EN7581 SoC")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes in v2:
- Add txq_stopped parameter to avoid any possible corner cases where the
  netdev queue stalls.
- Link to v1: https://lore.kernel.org/r/20260413-airoha-txq-potential-stall-v1-1-7830363b1543@kernel.org
---
 drivers/net/ethernet/airoha/airoha_eth.c | 37 +++++++++++++++++++++++++++-----
 drivers/net/ethernet/airoha/airoha_eth.h |  1 +
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index e1ab15f1ee7d..19f67c7dd8e1 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -843,6 +843,21 @@ static int airoha_qdma_init_rx(struct airoha_qdma *qdma)
 	return 0;
 }
 
+static void airoha_qdma_wake_netdev_txqs(struct airoha_queue *q)
+{
+	struct airoha_qdma *qdma = q->qdma;
+	struct airoha_eth *eth = qdma->eth;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(eth->ports); i++) {
+		struct airoha_gdm_port *port = eth->ports[i];
+
+		if (port && port->qdma == qdma)
+			netif_tx_wake_all_queues(port->dev);
+	}
+	q->txq_stopped = false;
+}
+
 static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct airoha_tx_irq_queue *irq_q;
@@ -919,12 +934,21 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget)
 
 			txq = netdev_get_tx_queue(skb->dev, queue);
 			netdev_tx_completed_queue(txq, 1, skb->len);
-			if (netif_tx_queue_stopped(txq) &&
-			    q->ndesc - q->queued >= q->free_thr)
-				netif_tx_wake_queue(txq);
-
 			dev_kfree_skb_any(skb);
 		}
+
+		if (q->txq_stopped && q->ndesc - q->queued >= q->free_thr) {
+			/* Since multiple net_device TX queues can share the
+			 * same hw QDMA TX queue, there is no guarantee we have
+			 * inflight packets queued in hw belonging to a
+			 * net_device TX queue stopped in the xmit path.
+			 * In order to avoid any potential net_device TX queue
+			 * stall, we need to wake all the net_device TX queues
+			 * feeding the same hw QDMA TX queue.
+			 */
+			airoha_qdma_wake_netdev_txqs(q);
+		}
+
 unlock:
 		spin_unlock_bh(&q->lock);
 	}
@@ -1984,6 +2008,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
 	if (q->queued + nr_frags >= q->ndesc) {
 		/* not enough space in the queue */
 		netif_tx_stop_queue(txq);
+		q->txq_stopped = true;
 		spin_unlock_bh(&q->lock);
 		return NETDEV_TX_BUSY;
 	}
@@ -2039,8 +2064,10 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
 				TX_RING_CPU_IDX_MASK,
 				FIELD_PREP(TX_RING_CPU_IDX_MASK, index));
 
-	if (q->ndesc - q->queued < q->free_thr)
+	if (q->ndesc - q->queued < q->free_thr) {
 		netif_tx_stop_queue(txq);
+		q->txq_stopped = true;
+	}
 
 	spin_unlock_bh(&q->lock);
 
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index 95e557638617..87b328cfefb0 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -193,6 +193,7 @@ struct airoha_queue {
 	int ndesc;
 	int free_thr;
 	int buf_size;
+	bool txq_stopped;
 
 	struct napi_struct napi;
 	struct page_pool *page_pool;

---
base-commit: 1f5ffc672165ff851063a5fd044b727ab2517ae3
change-id: 20260407-airoha-txq-potential-stall-ad52c53094e8

Best regards,
-- 
Lorenzo Bianconi <lorenzo@kernel.org>


^ permalink raw reply related

* Re: [RFC PATCH 1/2] kernel/notifier: replace single-linked list with double-linked list for reverse traversal
From: Petr Mladek @ 2026-04-16 10:33 UTC (permalink / raw)
  To: chensong_2000
  Cc: rafael, lenb, mturquette, sboyd, viresh.kumar, agk, snitzer,
	mpatocka, bmarzins, song, yukuai, linan122, jason.wessel, danielt,
	dianders, horms, davem, edumazet, kuba, pabeni, paulmck, frederic,
	mcgrof, petr.pavlu, da.gomez, samitolvanen, atomlin, jpoimboe,
	jikos, mbenes, joe.lawrence, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-modules, linux-kernel,
	linux-trace-kernel, linux-acpi, linux-clk, linux-pm,
	live-patching, dm-devel, linux-raid, kgdb-bugreport, netdev
In-Reply-To: <20260415070137.17860-1-chensong_2000@189.cn>

On Wed 2026-04-15 15:01:37, chensong_2000@189.cn wrote:
> From: Song Chen <chensong_2000@189.cn>
> 
> The current notifier chain implementation uses a single-linked list
> (struct notifier_block *next), which only supports forward traversal
> in priority order. This makes it difficult to handle cleanup/teardown
> scenarios that require notifiers to be called in reverse priority order.
> 
> A concrete example is the ordering dependency between ftrace and
> livepatch during module load/unload. see the detail here [1].
> 
> This patch replaces the single-linked list in struct notifier_block
> with a struct list_head, converting the notifier chain into a
> doubly-linked list sorted in descending priority order. Based on
> this, a new function notifier_call_chain_reverse() is introduced,
> which traverses the chain in reverse (ascending priority order).
> The corresponding blocking_notifier_call_chain_reverse() is also
> added as the locking wrapper for blocking notifier chains.
> 
> The internal notifier_call_chain_robust() is updated to use
> notifier_call_chain_reverse() for rollback: on error, it records
> the failing notifier (last_nb) and the count of successfully called
> notifiers (nr), then rolls back exactly those nr-1 notifiers in
> reverse order starting from last_nb's predecessor, without needing
> to know the total length of the chain.
> 
> With this change, subsystems with symmetric setup/teardown ordering
> requirements can register a single notifier_block with one priority
> value, and rely on blocking_notifier_call_chain() for forward
> traversal and blocking_notifier_call_chain_reverse() for reverse
> traversal, without needing hard-coded call sequences or separate
> notifier registrations for each direction.
> 
> [1]:https://lore.kernel.org/all
> 	/alpine.LNX.2.00.1602172216491.22700@cbobk.fhfr.pm/
> 
> --- a/include/linux/notifier.h
> +++ b/include/linux/notifier.h
> @@ -53,41 +53,41 @@ typedef	int (*notifier_fn_t)(struct notifier_block *nb,
[...]
>  struct notifier_block {
>  	notifier_fn_t notifier_call;
> -	struct notifier_block __rcu *next;
> +	struct list_head __rcu entry;
>  	int priority;
>  };
[...]
>  #define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
>  		spin_lock_init(&(name)->lock);	\
> -		(name)->head = NULL;		\
> +		INIT_LIST_HEAD(&(name)->head);		\

I would expect the RCU variant here, aka INIT_LIST_HEAD_RCU().

> --- a/kernel/notifier.c
> +++ b/kernel/notifier.c
> @@ -14,39 +14,47 @@
>   *	are layered on top of these, with appropriate locking added.
>   */
>  
> -static int notifier_chain_register(struct notifier_block **nl,
> +static int notifier_chain_register(struct list_head *nl,
>  				   struct notifier_block *n,
>  				   bool unique_priority)
>  {
> -	while ((*nl) != NULL) {
> -		if (unlikely((*nl) == n)) {
> +	struct notifier_block *cur;
> +
> +	list_for_each_entry(cur, nl, entry) {
> +		if (unlikely(cur == n)) {
>  			WARN(1, "notifier callback %ps already registered",
>  			     n->notifier_call);
>  			return -EEXIST;
>  		}
> -		if (n->priority > (*nl)->priority)
> -			break;
> -		if (n->priority == (*nl)->priority && unique_priority)
> +
> +		if (n->priority == cur->priority && unique_priority)
>  			return -EBUSY;
> -		nl = &((*nl)->next);
> +
> +		if (n->priority > cur->priority) {
> +			list_add_tail(&n->entry, &cur->entry);
> +			goto out;
> +		}
>  	}
> -	n->next = *nl;
> -	rcu_assign_pointer(*nl, n);
> +
> +	list_add_tail(&n->entry, nl);

I would expect list_add_tail_rcu() here.

> @@ -59,25 +67,25 @@ static int notifier_chain_unregister(struct notifier_block **nl,
>   *			value of this parameter is -1.
>   *	@nr_calls:	Records the number of notifications sent. Don't care
>   *			value of this field is NULL.
> + *	@last_nb:  Records the last called notifier block for rolling back
>   *	Return:		notifier_call_chain returns the value returned by the
>   *			last notifier function called.
>   */
> -static int notifier_call_chain(struct notifier_block **nl,
> +static int notifier_call_chain(struct list_head *nl,
>  			       unsigned long val, void *v,
> -			       int nr_to_call, int *nr_calls)
> +			       int nr_to_call, int *nr_calls,
> +				   struct notifier_block **last_nb)
>  {
>  	int ret = NOTIFY_DONE;
> -	struct notifier_block *nb, *next_nb;
> -
> -	nb = rcu_dereference_raw(*nl);
> +	struct notifier_block *nb;
>  
> -	while (nb && nr_to_call) {
> -		next_nb = rcu_dereference_raw(nb->next);
> +	if (!nr_to_call)
> +		return ret;
>  
> +	list_for_each_entry(nb, nl, entry) {

I would expect the RCU variant here, aka list_for_each_rcu()

These are just two random examples which I found by a quick look.

I guess that the notifier API is very old and it does not use all
the RCU API features which allow to track safety when
CONFIG_PROVE_RCU and CONFIG_PROVE_RCU_LIST are enabled.

It actually might be worth to audit the code and make it right.

>  #ifdef CONFIG_DEBUG_NOTIFIERS
>  		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
>  			WARN(1, "Invalid notifier called!");
> -			nb = next_nb;
>  			continue;
>  		}
>  #endif

That said, I am not sure if the ftrace/livepatching handlers are
the right motivation for this. Especially when I see the
complexity of the 2nd patch [*]

After thinking more about it. I am not even sure that the ftrace and
livepatching callbacks are good candidates for generic notifiers.
They are too special. It is not only about ordering them against
each other. But it is also about ordering them against other
notifiers. The ftrace/livepatching callbacks must be the first/last
during module load/release.

[*] The 2nd patch is not archived by lore for some reason.
    I have found only a review but it gives a good picture, see
    https://lore.kernel.org/all/1191caf5-6a61-4622-a15e-854d3701f4fc@suse.com/

Best Regards,
Petr

^ permalink raw reply

* [PATCH net] ipv6: fix possible UAF in icmpv6_rcv()
From: Eric Dumazet @ 2026-04-16 10:35 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Simon Horman, David Ahern, Ido Schimmel, netdev, eric.dumazet,
	Eric Dumazet

Caching saddr and daddr before pskb_pull() is problematic
since skb->head can change.

Remove these temporary variables:

- We only access &ipv6_hdr(skb)->saddr and &ipv6_hdr(skb)->daddr
  when net_dbg_ratelimited() is called in the slow path.

- Avoid potential future misuse after pskb_pull() call.

Fixes: 4b3418fba0fe ("ipv6: icmp: include addresses in debug messages")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/icmp.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 799d9e9ac45d11f7b460da7d8a7aeeaf0eb50f2f..efb23807a0262e8d68aa1afc8d96ee94eab89d50 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1104,7 +1104,6 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	struct net *net = dev_net_rcu(skb->dev);
 	struct net_device *dev = icmp6_dev(skb);
 	struct inet6_dev *idev = __in6_dev_get(dev);
-	const struct in6_addr *saddr, *daddr;
 	struct icmp6hdr *hdr;
 	u8 type;
 
@@ -1135,12 +1134,10 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
 	__ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
 
-	saddr = &ipv6_hdr(skb)->saddr;
-	daddr = &ipv6_hdr(skb)->daddr;
-
 	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 		net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
-				    saddr, daddr);
+				    &ipv6_hdr(skb)->saddr,
+				    &ipv6_hdr(skb)->daddr);
 		goto csum_error;
 	}
 
@@ -1220,7 +1217,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 			break;
 
 		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
-				    saddr, daddr);
+				    &ipv6_hdr(skb)->saddr,
+				    &ipv6_hdr(skb)->daddr);
 
 		/*
 		 * error of unknown type.
-- 
2.54.0.rc1.513.gad8abe7a5a-goog


^ permalink raw reply related

* Re: [PATCH v25 06/11] cxl/hdm: Add support for getting region from committed decoder
From: Alejandro Lucero Palau @ 2026-04-16 10:37 UTC (permalink / raw)
  To: Dan Williams, alejandro.lucero-palau, linux-cxl, netdev,
	dave.jiang, edward.cree, davem, kuba, pabeni, edumazet
In-Reply-To: <69ccaabf8d9cc_1b0cc6100fd@dwillia2-mobl4.notmuch>


On 4/1/26 06:18, Dan Williams wrote:
> alejandro.lucero-palau@ wrote:
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> A Type2 device configured by the BIOS can have its HDM committed and
>> a cxl region linked by auto discovery when the device memdev is created.
>>
>> Add a function for a Type2 driver to obtain such a region.
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> [..]
>> +/**
>> + * cxl_get_region_from_committed_decoder - obtain a pointer to a region
>> + * @cxlmd: CXL memdev from an endpoint device
>> + *
>> + * An accelerator decoder can be set up by the firmware/BIOS and the auto
>> + * discovery region creation triggered by the memdev object initialization.
>> + * Using this function the related driver can obtain such a region.
>> + *
>> + * Only one committed HDM is expected, returning the first one found.
>> + *
>> + * Return pointer to a region or NULL
>> + */
>> +struct cxl_region *cxl_get_region_from_committed_decoder(struct cxl_memdev *cxlmd)
>> +{
>> +	struct cxl_port *endpoint = cxlmd->endpoint;
>> +	struct cxl_endpoint_decoder *cxled;
>> +
>> +	if (!endpoint)
>> +		return NULL;
>> +
>> +	guard(rwsem_read)(&cxl_rwsem.dpa);
>> +	struct device *cxled_dev __free(put_device) =
>> +		device_find_child(&endpoint->dev, NULL,
>> +				  find_committed_endpoint_decoder);
>> +
>> +	if (!cxled_dev)
>> +		return NULL;
>> +
>> +	cxled = to_cxl_endpoint_decoder(cxled_dev);
>> +
>> +	return cxled->cxld.region;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_get_region_from_committed_decoder, "CXL");
> As soon as this function returns there is no guarantee that the region
> is still valid.


 From my comment on your patch trying to solve this problem:


https://lore.kernel.org/linux-cxl/20260403210050.1058650-1-dan.j.williams@intel.com/T/#m467dc88199645865a05b5fef858a67f3a608895b


I think the protection for getting the region needs to cover the use 
from the type2 driver, what is likely the ioremapping and some internal 
updates based on the success of these "atomic" actions. If the region 
disappears after that point, the same type2 driver should get an event 
for stopping that usage, what can be implemented as a callback/action 
for the cxl region device removal as done in v17.


If we agree on this, the main issue is how to implement it. I do not 
think adding specific functions or params covering the different 
possibilities (ioremap, ioremap_wc, ...) is desirable, and it will lack 
the flexibility needs for syncing things internally by the type2 driver 
against the region removal race. I wonder if we could have something 
like this:


cxl_get_locked_region_from_committed_decoder()   --> implying locking on 
cxlrd->dev, if regions does exist


cxl_release_region_lock  --> implying unlocking cxlrd->dev


  to be used by the type2 driver allowing the safe ioremap and any 
internal variable updates between them.


^ permalink raw reply

* Re: [PATCH net 00/14] Netfilter/IPVS fixes for net
From: Florian Westphal @ 2026-04-16 10:40 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: netfilter-devel, davem, netdev, kuba, pabeni, edumazet, horms
In-Reply-To: <aeC4A75gYD9qT5OR@chamomile>

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> I cannot send a batch before 16h my local time, I need a bit more
> time.
>
> Sorry.

No problem.  Alternative is to drop patches, this is what I did in the
past.  Some LLM comment indicates problem, remove patch from v2
and defer to next week.

But that was before LLM reviews flagged 50% of patches.
I'll pick up on anything left behind for next weeks batch(es).

^ permalink raw reply

* Re: [PATCH net 1/1] net: bridge: use a stable FDB dst snapshot in RCU readers
From: Paolo Abeni @ 2026-04-16 10:41 UTC (permalink / raw)
  To: Ido Schimmel, Ren Wei
  Cc: bridge, netdev, razor, davem, edumazet, kuba, horms,
	makita.toshiaki, vyasevic, yifanwucs, tomapufckgml, yuantan098,
	bird, enjou1224z, zcliangcn
In-Reply-To: <20260414074722.GA321402@shredder>

On 4/14/26 10:05 AM, Ido Schimmel wrote:
> On Mon, Apr 13, 2026 at 05:08:46PM +0800, Ren Wei wrote:
>> From: Zhengchuan Liang <zcliangcn@gmail.com>
>>
>> Local FDB entries can be rewritten in place by `fdb_delete_local()`, which
>> updates `f->dst` to another port or to `NULL` while keeping the entry
>> alive. Several bridge RCU readers inspect `f->dst`, including
>> `br_fdb_fillbuf()` through the `brforward_read()` sysfs path.
>>
>> These readers currently load `f->dst` multiple times and can therefore
>> observe inconsistent values across the check and later dereference.
>> In `br_fdb_fillbuf()`, this means a concurrent local-FDB update can change
>> `f->dst` after the NULL check and before the `port_no` dereference,
>> leading to a NULL-ptr-deref.
>>
>> Fix this by taking a single `READ_ONCE()` snapshot of `f->dst` in each
>> affected RCU reader and using that snapshot for the rest of the access
>> sequence. Also publish the in-place `f->dst` updates in `fdb_delete_local()`
>> with `WRITE_ONCE()` so the readers and writer use matching access patterns.
> 
> Sashiko is complaining [1] about missing READ_ONCE() annotations in some
> places, but I can handle them in net-next in a similar fashion to commit
> 3e19ae7c6fd6 ("net: bridge: use READ_ONCE() and WRITE_ONCE() compiler
> barriers for fdb->dst").

I agree they can be handled separately, because they don't look harmful.
I think a 'net' patch could be used for such follow-up (data race)

/P


^ permalink raw reply

* Re: [Intel-wired-lan] [PATCH net] ixgbe: only access vfinfo and mv_list under RCU lock
From: Corinna Vinschen @ 2026-04-16 10:42 UTC (permalink / raw)
  To: Loktionov, Aleksandr
  Cc: intel-wired-lan@lists.osuosl.org, netdev@vger.kernel.org,
	Corinna Vinschen
In-Reply-To: <IA3PR11MB898633AB0B2D010F495944E8E5232@IA3PR11MB8986.namprd11.prod.outlook.com>

On Apr 16 09:23, Loktionov, Aleksandr wrote:
> > From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> > [...]
> > @@ -9744,17 +9781,23 @@ static int ixgbe_ndo_get_vf_stats(struct
> > net_device *netdev, int vf,
> >  				  struct ifla_vf_stats *vf_stats)
> >  {
> >  	struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
> > +	struct vf_data_storage *vfinfo;
> > 
> >  	if (vf < 0 || vf >= adapter->num_vfs)
> >  		return -EINVAL;
> > 
> > -	vf_stats->rx_packets = adapter->vfinfo[vf].vfstats.gprc;
> > -	vf_stats->rx_bytes   = adapter->vfinfo[vf].vfstats.gorc;
> > -	vf_stats->tx_packets = adapter->vfinfo[vf].vfstats.gptc;
> > -	vf_stats->tx_bytes   = adapter->vfinfo[vf].vfstats.gotc;
> > -	vf_stats->multicast  = adapter->vfinfo[vf].vfstats.mprc;
> > +	rcu_read_lock();
> > +	vfinfo = rcu_dereference(adapter->vfinfo);
> > +	if (vfinfo) {
> > +		vf_stats->rx_packets = vfinfo[vf].vfstats.gprc;
> > +		vf_stats->rx_bytes   = vfinfo[vf].vfstats.gorc;
> > +		vf_stats->tx_packets = vfinfo[vf].vfstats.gptc;
> > +		vf_stats->tx_bytes   = vfinfo[vf].vfstats.gotc;
> > +		vf_stats->multicast  = vfinfo[vf].vfstats.mprc;
> > +	}
> > +	rcu_read_unlock();
> > 
> > -	return 0;
> > +	return vfinfo ? 0 : -EINVAL;
> Before it returned always success, but now it will break 'ip link show dev' in short window when SR-IOV is being torn down.
> For me it looks like UAPI regression.

Good point.  I'll change that back for a v2, just waiting for more
feedback.


Thanks,
Corinna


^ permalink raw reply

* Re: [net,PATCH v3 1/2] net: ks8851: Reinstate disabling of BHs around IRQ handler
From: Sebastian Andrzej Siewior @ 2026-04-16 10:48 UTC (permalink / raw)
  To: Marek Vasut
  Cc: netdev, stable, David S. Miller, Andrew Lunn, Eric Dumazet,
	Jakub Kicinski, Nicolai Buchwitz, Paolo Abeni, Ronald Wahl,
	Yicong Hui, linux-kernel
In-Reply-To: <afe7ed2c-4434-4394-9d87-a4bdf5a15ec1@nabladev.com>

On 2026-04-16 11:26:00 [+0200], Marek Vasut wrote:
> > memory allocation. Therefore I am saying this backtrace is from an older
> > kernel.
> 
> I actually did update the backtrace in V3 with the one from next 20260413
> that contained b44596ffe1b4 ("ARM: Allow to enable RT") from
> stable-rt/v6.12-rt-rebase branch [1] .
> 
> I think I misunderstood the usage of "softirq is raised" vs. "softirq is
> invoked" above . Is it possible that there was an already raised softirq
> before the threaded IRQ handler was invoked, and __netdev_alloc_skb() is
> what invoked that softirq ?

It is not impossible. Something needs to netif_wake_queue() and
ks8851_irq() must only report IRQ_RXI (not IRQ_TXI). Then it can happen.
But usually the driver "stops" the queue if it can't process any new
packets and resumes it once a packet has been sent so it has room again.

> > If there is a flaw in my the theory please explain _how_ you managed
> > that get that backtrace. I am sure it must have from an older kernel and
> > _now_ this lockup also happens on !RT kernels (except for the SPI
> > platform).
> I used [1] , with PREEMPT_RT enabled , on stm32mp157c SoC . I ran iperf3 -s
> on the stm32 side, iperf3 -c 192.168.1.2 -t 0 --bidir on the hostpc side.
> The backtrace happened shortly after.

Hmm. Let me accept it then.

Sebastian

^ permalink raw reply

* [PATCH net,v3 1/1] net: stmmac: Update default_an_inband before passing value to phylink_config
From: KhaiWenTan @ 2026-04-16 10:26 UTC (permalink / raw)
  To: andrew+netdev, davem, edumazet, kuba, pabeni, mcoquelin.stm32,
	alexandre.torgue, rmk+kernel, maxime.chevallier
  Cc: netdev, linux-stm32, linux-arm-kernel, linux-kernel,
	yoong.siang.song, hong.aun.looi, khai.wen.tan, KhaiWenTan

From: KhaiWenTan <khai.wen.tan@linux.intel.com>

get_interfaces() will update both the plat->phy_interfaces and
mdio_bus_data->default_an_inband based on reading a SERDES register. As
get_interfaces() will be called after default_an_inband had already been
read, dwmac-intel regressed as a result with incorrect default_an_inband
value in phylink_config.

Therefore, we moved the priv->plat->get_interfaces() to be executed first
before assigning priv->plat->default_an_inband to config->default_an_inband
to ensure default_an_inband is in correct value.

Fixes: d3836052fe09 ("net: stmmac: intel: convert speed_mode_2500() to get_interfaces()")
Signed-off-by: KhaiWenTan <khai.wen.tan@linux.intel.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
v3:
  - rebase on the latest net tree (Paolo Abeni)

v2: https://patchwork.kernel.org/project/netdevbpf/patch/20260413020339.68426-1-khai.wen.tan@linux.intel.com/
  - update commit message for better understanding (Russell King)
  - corrected the blamed commit (Russell King)

v1: https://patchwork.kernel.org/project/netdevbpf/patch/20260410020735.327590-1-khai.wen.tan@linux.intel.com/
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 01a983001ab4..ca68248dbc78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1410,8 +1410,6 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 	priv->tx_lpi_clk_stop = priv->plat->flags &
 				STMMAC_FLAG_EN_TX_LPI_CLOCKGATING;
 
-	config->default_an_inband = priv->plat->default_an_inband;
-
 	/* Get the PHY interface modes (at the PHY end of the link) that
 	 * are supported by the platform.
 	 */
@@ -1419,6 +1417,8 @@ static int stmmac_phylink_setup(struct stmmac_priv *priv)
 		priv->plat->get_interfaces(priv, priv->plat->bsp_priv,
 					   config->supported_interfaces);
 
+	config->default_an_inband = priv->plat->default_an_inband;
+
 	/* Set the platform/firmware specified interface mode if the
 	 * supported interfaces have not already been provided using
 	 * phy_interface as a last resort.
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH net-next 5/6] net: stmmac: move PHY handling out of __stmmac_open()/release()
From: Russell King (Oracle) @ 2026-04-16 10:49 UTC (permalink / raw)
  To: Alexander Stein
  Cc: Andrew Lunn, Heiner Kallweit, Alexandre Torgue, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, linux-arm-kernel,
	linux-stm32, Maxime Coquelin, netdev, Paolo Abeni
In-Reply-To: <5987484.DvuYhMxLoT@steina-w>

On Thu, Apr 16, 2026 at 08:20:13AM +0200, Alexander Stein wrote:
> Am Mittwoch, 15. April 2026, 14:59:32 CEST schrieb Russell King (Oracle):
> > On Wed, Apr 15, 2026 at 08:08:40AM +0200, Alexander Stein wrote:
> > > Hi,
> > > 
> > > Am Dienstag, 23. September 2025, 13:26:19 CEST schrieb Russell King (Oracle):
> > > > Move the PHY attachment/detachment from the network driver out of
> > > > __stmmac_open() and __stmmac_release() into stmmac_open() and
> > > > stmmac_release() where these actions will only happen when the
> > > > interface is administratively brought up or down. It does not make
> > > > sense to detach and re-attach the PHY during a change of MTU.
> > > 
> > > Sorry for coming up now. But I recently noticed this commit breaks changing
> > > the MTU on i.MX8MP. Once I simply change the MTU I run into some DMA error:
> > > $ ip link set dev end1 mtu 1400
> > > imx-dwmac 30bf0000.ethernet end1: Register MEM_TYPE_PAGE_POOL RxQ-0
> > > imx-dwmac 30bf0000.ethernet end1: Register MEM_TYPE_PAGE_POOL RxQ-1
> > > imx-dwmac 30bf0000.ethernet end1: Register MEM_TYPE_PAGE_POOL RxQ-2
> > > imx-dwmac 30bf0000.ethernet end1: Register MEM_TYPE_PAGE_POOL RxQ-3
> > > imx-dwmac 30bf0000.ethernet end1: Register MEM_TYPE_PAGE_POOL RxQ-4
> > > imx-dwmac 30bf0000.ethernet end1: Link is Down
> > > imx-dwmac 30bf0000.ethernet end1: Failed to reset the dma
> > > imx-dwmac 30bf0000.ethernet end1: stmmac_hw_setup: DMA engine initialization failed
> > 
> > This basically means that a clock is missing. Please provide more
> > information:
> > 
> > - what kernel version are you using?
> 
> Currently I am using v6.18.22.
> $ ethtool -i end1
> driver: st_gmac
> version: 6.18.22
> firmware-version: 
> expansion-rom-version: 
> bus-info: 30bf0000.ethernet
> supports-statistics: yes
> supports-test: no
> supports-eeprom-access: no
> supports-register-dump: yes
> supports-priv-flags: no
> 
> > - has EEE been negotiated?
> 
> No. It is marked as not supported
> 
> $ ethtool --show-eee end1
> EEE settings for end1:
>         EEE status: not supported
> 
> > - does the problem persist when EEE is disabled?
> 
> As EEE is not supported the problem occurs even with EEE disabled.
> 
> > - which PHY is attached to stmmac?
> 
> It is a TI DP83867.
> 
> imx-dwmac 30bf0000.ethernet eth1: PHY [stmmac-1:03] driver [TI DP83867] (irq=136)
> 
> > - which PHY interface mode is being used to connect the PHY to stmmac?
> 
> For this interface
> > phy-mode = "rgmii-id";
> is set.
> 
> In case it is helpful. My platform is arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts
> Thanks for assisting. If there a further questions, don't hesitate to ask.

Thanks.

So, as best I can determine at the moment, we end up with the following
sequence:

stmmac_change_mtu()
 __stmmac_release()
  phylink_stop()
   phy_stop()
    phy->state = PHY_HALTED
    _phy_state_machine() returns PHY_STATE_WORK_SUSPEND
    _phy_state_machine_post_work()
     phy_suspend()
      genphy_suspend()
       phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN)

With the DP83867, this causes most of the PHY to be powered down, thus
stopping the clocks, and this causes the stmmac reset to time out.

Prior to this commit, we would have called phylink_disconnect_phy()
immediately after phylink_stop(), but I can see nothing that would
be affected by this change there (since that also calls
phy_suspend(), but as the PHY is already suspended, this becomes a
no-op.)

However, __stmmac_open() would have called stmmac_init_phy(), which
would reattach the PHY. This would have called phy_init_hw(), 
resetting the PHY, and phy_resume() which would ensure that the
PDOWN bit is clear - thus clocks would be running.

As a hack, please can you try calling phylink_prepare_resume()
between the __stmmac_release() and __stmmac_open() in
stmmac_change_mtu(). This should resume the PHY, thus restoring the
clocks necessary for stmmac to reset.

Thanks.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net-next v8 4/4] tun/tap & vhost-net: avoid ptr_ring tail-drop when a qdisc is present
From: Michael S. Tsirkin @ 2026-04-16 10:51 UTC (permalink / raw)
  To: Simon Schippers
  Cc: Jason Wang, willemdebruijn.kernel, andrew+netdev, davem, edumazet,
	kuba, pabeni, eperezma, leiyang, stephen, jon, tim.gebauer,
	netdev, linux-kernel, kvm, virtualization
In-Reply-To: <73033bfc-4da3-4057-9480-bd977a66ecef@tu-dortmund.de>

On Thu, Apr 16, 2026 at 10:54:45AM +0200, Simon Schippers wrote:
> To summarize the discussion from my POV:
> 
> Open point: __ptr_ring_zero_tail() is only called after
>             consuming ring.batch elements.
> 1) Consumer wakes up the producer but the slot is not cleaned.
> --> I disagree, the consumer only wakes after consuming ring.size/2.
>     Then __ptr_ring_zero_tail() was called at least once.
> 2) Producer is woken up but see the ring is full, so it need to
>    drop the packet.
> --> I disagree, because then NETDEV_TX_BUSY is returned. This is
>     noticeable as qdisc requeue and only happens very rarely.
> 
> Points I will address:
> - Minor nit on patch 2 by MST.
> - Rebase patch 3 because of commit d748047
>   ("ptr_ring: disable KCSAN warnings").
> - Document the pair of the smp_mb__after_atomic() in tun_net_xmit
>   with tun_ring_consume().
> - Use 1 ptr_ring spinlock instead of 2 (currently used for consume
>   and empty check), not sure how to implement it pretty rn.
> - Run pktgen benchmarks with pg_set SHARED.

Thanks! The plan makes sense to me.

-- 
MST


^ permalink raw reply

* Re: [PATCH net V2 1/3] net/mlx5: SD: Serialize init/cleanup
From: Paolo Abeni @ 2026-04-16 11:00 UTC (permalink / raw)
  To: Tariq Toukan, Eric Dumazet, Jakub Kicinski, Andrew Lunn,
	David S. Miller
  Cc: Saeed Mahameed, Mark Bloch, Leon Romanovsky, Shay Drory,
	Simon Horman, Kees Cook, Parav Pandit, Patrisious Haddad,
	Gal Pressman, netdev, linux-rdma, linux-kernel, Dragos Tatulea
In-Reply-To: <20260413105323.186411-2-tariqt@nvidia.com>

On 4/13/26 12:53 PM, Tariq Toukan wrote:
> @@ -491,23 +508,35 @@ void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
>  {
>  	struct mlx5_sd *sd = mlx5_get_sd(dev);
>  	struct mlx5_core_dev *primary, *pos;
> +	struct mlx5_sd *primary_sd = NULL;
>  	int i;
>  
>  	if (!sd)
>  		return;
>  
> +	mlx5_devcom_comp_lock(sd->devcom);
>  	if (!mlx5_devcom_comp_is_ready(sd->devcom))
> -		goto out;
> +		goto out_unlock;
>  
>  	primary = mlx5_sd_get_primary(dev);
> +	primary_sd = mlx5_get_sd(primary);
> +
> +	if (primary_sd->state != MLX5_SD_STATE_UP)
> +		goto out_unlock;
> +
>  	mlx5_sd_for_each_secondary(i, primary, pos)
>  		sd_cmd_unset_secondary(pos);
>  	sd_cmd_unset_primary(primary);
>  	debugfs_remove_recursive(sd->dfs);
>  
>  	sd_info(primary, "group id %#x, uncombined\n", sd->group_id);
> -out:
> +	primary_sd->state = MLX5_SD_STATE_DESTROYING;
> +out_unlock:
> +	mlx5_devcom_comp_unlock(sd->devcom);
>  	sd_unregister(dev);
> +	if (primary_sd)
> +		/* devcom isn't ready, reset the state */
> +		primary_sd->state = MLX5_SD_STATE_DOWN;

Sashiko says:

---
Since primary_sd is only populated if devcom is ready, this condition will
never trigger when devcom isn't ready, contrary to the comment. Instead,
it triggers on the normal path after devcom is ready, immediately
overwriting
MLX5_SD_STATE_DESTROYING with MLX5_SD_STATE_DOWN outside the lock.
Could this allow concurrent mlx5_sd_init() calls to see the down state and
attempt hardware re-initialization while the group is still being torn down?
Also, can this race and cause a use-after-free regression?
During a concurrent Socket-Direct group teardown, the primary PF and
secondary
PF can execute mlx5_sd_cleanup() in parallel.
If the primary PF completes its cleanup first, it will call
sd_cleanup(primary)
which calls kfree() on the sd structure, freeing the primary_sd memory.
If the secondary PF is preempted just after releasing the devcom lock,
it will resume, evaluate its local non-NULL primary_sd pointer, and
locklessly
write to primary_sd->state. Does this dereference the freed memory of the
primary PF?
---


^ permalink raw reply

* Re: [PATCH net 1/1] net: bridge: use a stable FDB dst snapshot in RCU readers
From: patchwork-bot+netdevbpf @ 2026-04-16 11:00 UTC (permalink / raw)
  To: Ren Wei
  Cc: bridge, netdev, razor, idosch, davem, edumazet, kuba, pabeni,
	horms, makita.toshiaki, vyasevic, yifanwucs, tomapufckgml,
	yuantan098, bird, enjou1224z, zcliangcn
In-Reply-To: <6570fabb85ecadb8baaf019efe856f407711c7b9.1776043229.git.zcliangcn@gmail.com>

Hello:

This patch was applied to netdev/net.git (main)
by Paolo Abeni <pabeni@redhat.com>:

On Mon, 13 Apr 2026 17:08:46 +0800 you wrote:
> From: Zhengchuan Liang <zcliangcn@gmail.com>
> 
> Local FDB entries can be rewritten in place by `fdb_delete_local()`, which
> updates `f->dst` to another port or to `NULL` while keeping the entry
> alive. Several bridge RCU readers inspect `f->dst`, including
> `br_fdb_fillbuf()` through the `brforward_read()` sysfs path.
> 
> [...]

Here is the summary with links:
  - [net,1/1] net: bridge: use a stable FDB dst snapshot in RCU readers
    https://git.kernel.org/netdev/net/c/df4601653201

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply

* Re: [PATCH net V2 3/3] net/mlx5e: SD, Fix race condition in secondary device probe/remove
From: Paolo Abeni @ 2026-04-16 11:07 UTC (permalink / raw)
  To: Tariq Toukan, Eric Dumazet, Jakub Kicinski, Andrew Lunn,
	David S. Miller
  Cc: Saeed Mahameed, Mark Bloch, Leon Romanovsky, Shay Drory,
	Simon Horman, Kees Cook, Parav Pandit, Patrisious Haddad,
	Gal Pressman, netdev, linux-rdma, linux-kernel, Dragos Tatulea
In-Reply-To: <20260413105323.186411-4-tariqt@nvidia.com>

On 4/13/26 12:53 PM, Tariq Toukan wrote:
> From: Shay Drory <shayd@nvidia.com>
> 
> When utilizing Socket-Direct single netdev functionality the driver
> resolves the actual auxiliary device using mlx5_sd_get_adev(). However,
> the current implementation returns the primary ETH auxiliary device
> without holding the device lock, leading to a potential race condition
> where the ETH device could be unbound or removed concurrently during
> probe, suspend, resume, or remove operations.[1]
> 
> Fix this by introducing mlx5_sd_put_adev() and updating
> mlx5_sd_get_adev() so that secondaries devices would acquire the device
> lock of the returned auxiliary device. After the lock is acquired, a
> second devcom check is needed[2].
> In addition, update The callers to pair the get operation with the new
> put operation, ensuring the lock is held while the auxiliary device is
> being operated on and released afterwards.
> 
> The "primary" designation is determined once in sd_register(). It's set
> before devcom is marked ready, and it never changes after that.
> In Addition, The primary path never locks a secondary: When the primary
> device invoke mlx5_sd_get_adev(), it sees dev == primary and returns.
> no additional lock is taken.
> Therefore lock ordering is always: secondary_lock -> primary_lock. The
> reverse never happens, so ABBA deadlock is impossible.
> 
> [1]
> for example:
> BUG: kernel NULL pointer dereference, address: 0000000000000370
> PGD 0 P4D 0
> Oops: Oops: 0000 [#1] SMP
> CPU: 4 UID: 0 PID: 3945 Comm: bash Not tainted 6.19.0-rc3+ #1 NONE
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
> RIP: 0010:mlx5e_dcbnl_dscp_app+0x23/0x100 [mlx5_core]
> Call Trace:
>  <TASK>
>  mlx5e_remove+0x82/0x12a [mlx5_core]
>  device_release_driver_internal+0x194/0x1f0
>  bus_remove_device+0xc6/0x140
>  device_del+0x159/0x3c0
>  ? devl_param_driverinit_value_get+0x29/0x80
>  mlx5_rescan_drivers_locked+0x92/0x160 [mlx5_core]
>  mlx5_unregister_device+0x34/0x50 [mlx5_core]
>  mlx5_uninit_one+0x43/0xb0 [mlx5_core]
>  remove_one+0x4e/0xc0 [mlx5_core]
>  pci_device_remove+0x39/0xa0
>  device_release_driver_internal+0x194/0x1f0
>  unbind_store+0x99/0xa0
>  kernfs_fop_write_iter+0x12e/0x1e0
>  vfs_write+0x215/0x3d0
>  ksys_write+0x5f/0xd0
>  do_syscall_64+0x55/0xe90
>  entry_SYSCALL_64_after_hwframe+0x4b/0x53
> 
> [2]
>     CPU0 (primary)                     CPU1 (secondary)
> ==========================================================================
> mlx5e_remove() (device_lock held)
>                                      mlx5e_remove() (2nd device_lock held)
>                                       mlx5_sd_get_adev()
>                                        mlx5_devcom_comp_is_ready() => true
>                                        device_lock(primary)
>  mlx5_sd_get_adev() ==> ret adev
>  _mlx5e_remove()
>  mlx5_sd_cleanup()
>  // mlx5e_remove finished
>  // releasing device_lock
>                                        //need another check here...
>                                        mlx5_devcom_comp_is_ready() => false
> 
> Fixes: 381978d28317 ("net/mlx5e: Create single netdev per SD group")
> Signed-off-by: Shay Drory <shayd@nvidia.com>
> Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en_main.c  | 18 ++++++++++++++----
>  .../net/ethernet/mellanox/mlx5/core/lib/sd.c   | 17 +++++++++++++++++
>  .../net/ethernet/mellanox/mlx5/core/lib/sd.h   |  2 ++
>  3 files changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 0b8b44bbcb9e..11f80158e107 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -6657,8 +6657,11 @@ static int mlx5e_resume(struct auxiliary_device *adev)
>  		return err;
>  
>  	actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
> -	if (actual_adev)
> -		return _mlx5e_resume(actual_adev);
> +	if (actual_adev) {
> +		err = _mlx5e_resume(actual_adev);
> +		mlx5_sd_put_adev(actual_adev, adev);
> +		return err;
> +	}
>  	return 0;
>  }
>  
> @@ -6698,6 +6701,8 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
>  		err = _mlx5e_suspend(actual_adev, false);
>  
>  	mlx5_sd_cleanup(mdev);
> +	if (actual_adev)
> +		mlx5_sd_put_adev(actual_adev, adev);
>  	return err;
>  }
>  
> @@ -6795,8 +6800,11 @@ static int mlx5e_probe(struct auxiliary_device *adev,
>  		return err;
>  
>  	actual_adev = mlx5_sd_get_adev(mdev, adev, edev->idx);
> -	if (actual_adev)
> -		return _mlx5e_probe(actual_adev);
> +	if (actual_adev) {
> +		err = _mlx5e_probe(actual_adev);
> +		mlx5_sd_put_adev(actual_adev, adev);

Sashiko says:

---
If _mlx5e_probe(actual_adev) fails, it frees mlx5e_dev but leaves the
auxiliary device's drvdata pointing to it. Furthermore, mlx5e_probe()
returns the error without calling mlx5_sd_cleanup(mdev), leaving devcom
incorrectly marked as ready.
If the primary device is later unbound, mlx5e_remove() will see that
devcom is ready, call _mlx5e_remove(), and blindly dereference the
dangling mlx5e_dev pointer.
Is there a missing cleanup step here to clear drvdata or reset the sd
state on failure?
---

Please try to address AI comments (i.e. explaining why not relevant)
proactively.

Thanks,

Paolo


^ permalink raw reply

* Re: [PATCH iwl-net] i40e: keep q_vectors array in sync with channel count changes
From: Maciej Fijalkowski @ 2026-04-16 11:12 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: netdev, magnus.karlsson, kuba, pabeni, horms, przemyslaw.kitszel,
	jacob.e.keller
In-Reply-To: <20260414121405.631092-1-maciej.fijalkowski@intel.com>

On Tue, Apr 14, 2026 at 02:14:05PM +0200, Maciej Fijalkowski wrote:
> For the main VSI, i40e_set_num_rings_in_vsi() always derives
> num_q_vectors from pf->num_lan_msix. At the same time, ethtool -L stores
> the user requested channel count in vsi->req_queue_pairs and the queue
> setup path uses that value for the effective number of queue pairs.
> 
> This leaves queue and vector counts out of sync after shrinking channel
> count via ethtool -L. The active queue configuration is reduced, but the
> VSI still keeps the full PF-sized q_vector topology.
> 
> That mismatch breaks reconfiguration flows which rely on vector/NAPI
> state matching the effective channel configuration. In particular,
> toggling /sys/class/net/<dev>/threaded after reducing the channel count
> can hang, and later channel-count changes can fail because VSI reinit
> does not rebuild q_vectors to match the new vector count.
> 
> Fix this by making the main VSI num_q_vectors follow the effective
> requested channel count, capped by the available MSI-X vectors. Update
> i40e_vsi_reinit_setup() to rebuild q_vectors during VSI reinit so the
> vector topology is refreshed together with the ring arrays when channel
> count changes.
> 
> Keep alloc_queue_pairs unchanged and based on pf->num_lan_qps so the VSI
> retains its full queue capacity.
> 
> Selftest napi_threaded.py was originally used when Jakub reported hang
> on /sys/class/net/<dev>/threaded toggle. In order to make it pass on
> i40e, use persistent NAPI configuration for q_vector NAPIs so NAPI
> identity and threaded settings survive q_vector reallocation across
> channel-count changes. This is achieved by using netif_napi_add_config()
> when configuring q_vectors.
> 
> $ export NETIF=ens259f1np1
> $ sudo -E env PATH="$PATH" ./tools/testing/selftests/drivers/net/napi_threaded.py
> TAP version 13
> 1..3
> ok 1 napi_threaded.napi_init
> ok 2 napi_threaded.change_num_queues
> ok 3 napi_threaded.enable_dev_threaded_disable_napi_threaded
> Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
> 
> Reported-by: Jakub Kicinski <kuba@kernel.org>
> Closes: https://lore.kernel.org/intel-wired-lan/20260316133100.6054a11f@kernel.org/
> Fixes: d2a69fefd756 ("i40e: Fix changing previously set num_queue_pairs for PFs")
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> ---
>  drivers/net/ethernet/intel/i40e/i40e_main.c | 34 +++++++++++++++++----
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index 926d001b2150..5636ad71f940 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -11403,10 +11403,14 @@ static void i40e_service_timer(struct timer_list *t)
>  static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
>  {
>  	struct i40e_pf *pf = vsi->back;
> +	u16 qps;
>  
>  	switch (vsi->type) {
>  	case I40E_VSI_MAIN:
>  		vsi->alloc_queue_pairs = pf->num_lan_qps;
> +		qps = vsi->req_queue_pairs ?
> +		      min_t(u16, vsi->req_queue_pairs, pf->num_lan_qps) :
> +		      pf->num_lan_qps;
>  		if (!vsi->num_tx_desc)
>  			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
>  						 I40E_REQ_DESCRIPTOR_MULTIPLE);
> @@ -11414,7 +11418,8 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
>  			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
>  						 I40E_REQ_DESCRIPTOR_MULTIPLE);
>  		if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
> -			vsi->num_q_vectors = pf->num_lan_msix;
> +			vsi->num_q_vectors = max_t(int, 1,
> +						   min_t(int, qps, pf->num_lan_msix));
>  		else
>  			vsi->num_q_vectors = 1;
>  
> @@ -12043,7 +12048,8 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
>  	cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
>  
>  	if (vsi->netdev)
> -		netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll);
> +		netif_napi_add_config(vsi->netdev, &q_vector->napi,
> +				      i40e_napi_poll, v_idx);
>  
>  	/* tie q_vector and vsi together */
>  	vsi->q_vectors[v_idx] = q_vector;
> @@ -14265,12 +14271,27 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
>  
>  	pf = vsi->back;
>  
> +	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
> +		i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
> +		vsi->base_vector = 0;
> +	}
> +
>  	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
>  	i40e_vsi_clear_rings(vsi);
>  
> -	i40e_vsi_free_arrays(vsi, false);
> +	i40e_vsi_free_q_vectors(vsi);
> +	i40e_vsi_free_arrays(vsi, true);
>  	i40e_set_num_rings_in_vsi(vsi);
> -	ret = i40e_vsi_alloc_arrays(vsi, false);
> +
> +	ret = i40e_vsi_alloc_arrays(vsi, true);
> +	if (ret)
> +		goto err_vsi;

Sashiko warns about potential double-free on vsi->tx_rings. I will send a
v2 where I include NULLing this ptr in i40e_vsi_alloc_arrays().

Thanks,
Maciej

> +
> +	/* Rebuild q_vectors during VSI reinit because the effective channel
> +	 * count may change num_q_vectors. Keep vector topology aligned with the
> +	 * queue configuration after ethtool's .set_channels() callback.
> +	 */
> +	ret = i40e_vsi_setup_vectors(vsi);
>  	if (ret)
>  		goto err_vsi;
>  
> @@ -14282,7 +14303,7 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
>  		dev_info(&pf->pdev->dev,
>  			 "failed to get tracking for %d queues for VSI %d err %d\n",
>  			 alloc_queue_pairs, vsi->seid, ret);
> -		goto err_vsi;
> +		goto err_lump;
>  	}
>  	vsi->base_queue = ret;
>  
> @@ -14306,7 +14327,6 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
>  	return vsi;
>  
>  err_rings:
> -	i40e_vsi_free_q_vectors(vsi);
>  	if (vsi->netdev_registered) {
>  		vsi->netdev_registered = false;
>  		unregister_netdev(vsi->netdev);
> @@ -14316,6 +14336,8 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
>  	if (vsi->type == I40E_VSI_MAIN)
>  		i40e_devlink_destroy_port(pf);
>  	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
> +err_lump:
> +	i40e_vsi_free_q_vectors(vsi);
>  err_vsi:
>  	i40e_vsi_clear(vsi);
>  	return NULL;
> -- 
> 2.43.0
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox